661ad12e0cc9d6a730e194edc82c64dac91dbf75
[~shefty/rdma-dev.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK         0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82
83    Reference count prevents destruction.
84
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103         kfree_skb(skb);
104         return -ENETDOWN;
105 }
106
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109         if (neigh->parms->neigh_cleanup)
110                 neigh->parms->neigh_cleanup(neigh);
111
112         __neigh_notify(neigh, RTM_DELNEIGH, 0);
113         neigh_release(neigh);
114 }
115
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124         return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131         int shrunk = 0;
132         int i;
133         struct neigh_hash_table *nht;
134
135         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136
137         write_lock_bh(&tbl->lock);
138         nht = rcu_dereference_protected(tbl->nht,
139                                         lockdep_is_held(&tbl->lock));
140         for (i = 0; i < (1 << nht->hash_shift); i++) {
141                 struct neighbour *n;
142                 struct neighbour __rcu **np;
143
144                 np = &nht->hash_buckets[i];
145                 while ((n = rcu_dereference_protected(*np,
146                                         lockdep_is_held(&tbl->lock))) != NULL) {
147                         /* Neighbour record may be discarded if:
148                          * - nobody refers to it.
149                          * - it is not permanent
150                          */
151                         write_lock(&n->lock);
152                         if (atomic_read(&n->refcnt) == 1 &&
153                             !(n->nud_state & NUD_PERMANENT)) {
154                                 rcu_assign_pointer(*np,
155                                         rcu_dereference_protected(n->next,
156                                                   lockdep_is_held(&tbl->lock)));
157                                 n->dead = 1;
158                                 shrunk  = 1;
159                                 write_unlock(&n->lock);
160                                 neigh_cleanup_and_release(n);
161                                 continue;
162                         }
163                         write_unlock(&n->lock);
164                         np = &n->next;
165                 }
166         }
167
168         tbl->last_flush = jiffies;
169
170         write_unlock_bh(&tbl->lock);
171
172         return shrunk;
173 }
174
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177         neigh_hold(n);
178         if (unlikely(mod_timer(&n->timer, when))) {
179                 printk("NEIGH: BUG, double timer add, state is %x\n",
180                        n->nud_state);
181                 dump_stack();
182         }
183 }
184
185 static int neigh_del_timer(struct neighbour *n)
186 {
187         if ((n->nud_state & NUD_IN_TIMER) &&
188             del_timer(&n->timer)) {
189                 neigh_release(n);
190                 return 1;
191         }
192         return 0;
193 }
194
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197         struct sk_buff *skb;
198
199         while ((skb = skb_dequeue(list)) != NULL) {
200                 dev_put(skb->dev);
201                 kfree_skb(skb);
202         }
203 }
204
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207         int i;
208         struct neigh_hash_table *nht;
209
210         nht = rcu_dereference_protected(tbl->nht,
211                                         lockdep_is_held(&tbl->lock));
212
213         for (i = 0; i < (1 << nht->hash_shift); i++) {
214                 struct neighbour *n;
215                 struct neighbour __rcu **np = &nht->hash_buckets[i];
216
217                 while ((n = rcu_dereference_protected(*np,
218                                         lockdep_is_held(&tbl->lock))) != NULL) {
219                         if (dev && n->dev != dev) {
220                                 np = &n->next;
221                                 continue;
222                         }
223                         rcu_assign_pointer(*np,
224                                    rcu_dereference_protected(n->next,
225                                                 lockdep_is_held(&tbl->lock)));
226                         write_lock(&n->lock);
227                         neigh_del_timer(n);
228                         n->dead = 1;
229
230                         if (atomic_read(&n->refcnt) != 1) {
231                                 /* The most unpleasant situation.
232                                    We must destroy neighbour entry,
233                                    but someone still uses it.
234
235                                    The destroy will be delayed until
236                                    the last user releases us, but
237                                    we must kill timers etc. and move
238                                    it to safe state.
239                                  */
240                                 skb_queue_purge(&n->arp_queue);
241                                 n->arp_queue_len_bytes = 0;
242                                 n->output = neigh_blackhole;
243                                 if (n->nud_state & NUD_VALID)
244                                         n->nud_state = NUD_NOARP;
245                                 else
246                                         n->nud_state = NUD_NONE;
247                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
248                         }
249                         write_unlock(&n->lock);
250                         neigh_cleanup_and_release(n);
251                 }
252         }
253 }
254
255 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
256 {
257         write_lock_bh(&tbl->lock);
258         neigh_flush_dev(tbl, dev);
259         write_unlock_bh(&tbl->lock);
260 }
261 EXPORT_SYMBOL(neigh_changeaddr);
262
263 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
264 {
265         write_lock_bh(&tbl->lock);
266         neigh_flush_dev(tbl, dev);
267         pneigh_ifdown(tbl, dev);
268         write_unlock_bh(&tbl->lock);
269
270         del_timer_sync(&tbl->proxy_timer);
271         pneigh_queue_purge(&tbl->proxy_queue);
272         return 0;
273 }
274 EXPORT_SYMBOL(neigh_ifdown);
275
276 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
277 {
278         struct neighbour *n = NULL;
279         unsigned long now = jiffies;
280         int entries;
281
282         entries = atomic_inc_return(&tbl->entries) - 1;
283         if (entries >= tbl->gc_thresh3 ||
284             (entries >= tbl->gc_thresh2 &&
285              time_after(now, tbl->last_flush + 5 * HZ))) {
286                 if (!neigh_forced_gc(tbl) &&
287                     entries >= tbl->gc_thresh3)
288                         goto out_entries;
289         }
290
291         n = kzalloc(tbl->entry_size, GFP_ATOMIC);
292         if (!n)
293                 goto out_entries;
294
295         skb_queue_head_init(&n->arp_queue);
296         rwlock_init(&n->lock);
297         seqlock_init(&n->ha_lock);
298         n->updated        = n->used = now;
299         n->nud_state      = NUD_NONE;
300         n->output         = neigh_blackhole;
301         seqlock_init(&n->hh.hh_lock);
302         n->parms          = neigh_parms_clone(&tbl->parms);
303         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
304
305         NEIGH_CACHE_STAT_INC(tbl, allocs);
306         n->tbl            = tbl;
307         atomic_set(&n->refcnt, 1);
308         n->dead           = 1;
309 out:
310         return n;
311
312 out_entries:
313         atomic_dec(&tbl->entries);
314         goto out;
315 }
316
317 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
318 {
319         size_t size = (1 << shift) * sizeof(struct neighbour *);
320         struct neigh_hash_table *ret;
321         struct neighbour __rcu **buckets;
322
323         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
324         if (!ret)
325                 return NULL;
326         if (size <= PAGE_SIZE)
327                 buckets = kzalloc(size, GFP_ATOMIC);
328         else
329                 buckets = (struct neighbour __rcu **)
330                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
331                                            get_order(size));
332         if (!buckets) {
333                 kfree(ret);
334                 return NULL;
335         }
336         ret->hash_buckets = buckets;
337         ret->hash_shift = shift;
338         get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
339         ret->hash_rnd |= 1;
340         return ret;
341 }
342
343 static void neigh_hash_free_rcu(struct rcu_head *head)
344 {
345         struct neigh_hash_table *nht = container_of(head,
346                                                     struct neigh_hash_table,
347                                                     rcu);
348         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
349         struct neighbour __rcu **buckets = nht->hash_buckets;
350
351         if (size <= PAGE_SIZE)
352                 kfree(buckets);
353         else
354                 free_pages((unsigned long)buckets, get_order(size));
355         kfree(nht);
356 }
357
358 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
359                                                 unsigned long new_shift)
360 {
361         unsigned int i, hash;
362         struct neigh_hash_table *new_nht, *old_nht;
363
364         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
365
366         old_nht = rcu_dereference_protected(tbl->nht,
367                                             lockdep_is_held(&tbl->lock));
368         new_nht = neigh_hash_alloc(new_shift);
369         if (!new_nht)
370                 return old_nht;
371
372         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
373                 struct neighbour *n, *next;
374
375                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
376                                                    lockdep_is_held(&tbl->lock));
377                      n != NULL;
378                      n = next) {
379                         hash = tbl->hash(n->primary_key, n->dev,
380                                          new_nht->hash_rnd);
381
382                         hash >>= (32 - new_nht->hash_shift);
383                         next = rcu_dereference_protected(n->next,
384                                                 lockdep_is_held(&tbl->lock));
385
386                         rcu_assign_pointer(n->next,
387                                            rcu_dereference_protected(
388                                                 new_nht->hash_buckets[hash],
389                                                 lockdep_is_held(&tbl->lock)));
390                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
391                 }
392         }
393
394         rcu_assign_pointer(tbl->nht, new_nht);
395         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
396         return new_nht;
397 }
398
399 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
400                                struct net_device *dev)
401 {
402         struct neighbour *n;
403         int key_len = tbl->key_len;
404         u32 hash_val;
405         struct neigh_hash_table *nht;
406
407         NEIGH_CACHE_STAT_INC(tbl, lookups);
408
409         rcu_read_lock_bh();
410         nht = rcu_dereference_bh(tbl->nht);
411         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
412
413         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
414              n != NULL;
415              n = rcu_dereference_bh(n->next)) {
416                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
417                         if (!atomic_inc_not_zero(&n->refcnt))
418                                 n = NULL;
419                         NEIGH_CACHE_STAT_INC(tbl, hits);
420                         break;
421                 }
422         }
423
424         rcu_read_unlock_bh();
425         return n;
426 }
427 EXPORT_SYMBOL(neigh_lookup);
428
429 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
430                                      const void *pkey)
431 {
432         struct neighbour *n;
433         int key_len = tbl->key_len;
434         u32 hash_val;
435         struct neigh_hash_table *nht;
436
437         NEIGH_CACHE_STAT_INC(tbl, lookups);
438
439         rcu_read_lock_bh();
440         nht = rcu_dereference_bh(tbl->nht);
441         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
442
443         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
444              n != NULL;
445              n = rcu_dereference_bh(n->next)) {
446                 if (!memcmp(n->primary_key, pkey, key_len) &&
447                     net_eq(dev_net(n->dev), net)) {
448                         if (!atomic_inc_not_zero(&n->refcnt))
449                                 n = NULL;
450                         NEIGH_CACHE_STAT_INC(tbl, hits);
451                         break;
452                 }
453         }
454
455         rcu_read_unlock_bh();
456         return n;
457 }
458 EXPORT_SYMBOL(neigh_lookup_nodev);
459
460 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
461                                struct net_device *dev)
462 {
463         u32 hash_val;
464         int key_len = tbl->key_len;
465         int error;
466         struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
467         struct neigh_hash_table *nht;
468
469         if (!n) {
470                 rc = ERR_PTR(-ENOBUFS);
471                 goto out;
472         }
473
474         memcpy(n->primary_key, pkey, key_len);
475         n->dev = dev;
476         dev_hold(dev);
477
478         /* Protocol specific setup. */
479         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
480                 rc = ERR_PTR(error);
481                 goto out_neigh_release;
482         }
483
484         /* Device specific setup. */
485         if (n->parms->neigh_setup &&
486             (error = n->parms->neigh_setup(n)) < 0) {
487                 rc = ERR_PTR(error);
488                 goto out_neigh_release;
489         }
490
491         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
492
493         write_lock_bh(&tbl->lock);
494         nht = rcu_dereference_protected(tbl->nht,
495                                         lockdep_is_held(&tbl->lock));
496
497         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
498                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
499
500         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
501
502         if (n->parms->dead) {
503                 rc = ERR_PTR(-EINVAL);
504                 goto out_tbl_unlock;
505         }
506
507         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
508                                             lockdep_is_held(&tbl->lock));
509              n1 != NULL;
510              n1 = rcu_dereference_protected(n1->next,
511                         lockdep_is_held(&tbl->lock))) {
512                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
513                         neigh_hold(n1);
514                         rc = n1;
515                         goto out_tbl_unlock;
516                 }
517         }
518
519         n->dead = 0;
520         neigh_hold(n);
521         rcu_assign_pointer(n->next,
522                            rcu_dereference_protected(nht->hash_buckets[hash_val],
523                                                      lockdep_is_held(&tbl->lock)));
524         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
525         write_unlock_bh(&tbl->lock);
526         NEIGH_PRINTK2("neigh %p is created.\n", n);
527         rc = n;
528 out:
529         return rc;
530 out_tbl_unlock:
531         write_unlock_bh(&tbl->lock);
532 out_neigh_release:
533         neigh_release(n);
534         goto out;
535 }
536 EXPORT_SYMBOL(neigh_create);
537
538 static u32 pneigh_hash(const void *pkey, int key_len)
539 {
540         u32 hash_val = *(u32 *)(pkey + key_len - 4);
541         hash_val ^= (hash_val >> 16);
542         hash_val ^= hash_val >> 8;
543         hash_val ^= hash_val >> 4;
544         hash_val &= PNEIGH_HASHMASK;
545         return hash_val;
546 }
547
548 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
549                                               struct net *net,
550                                               const void *pkey,
551                                               int key_len,
552                                               struct net_device *dev)
553 {
554         while (n) {
555                 if (!memcmp(n->key, pkey, key_len) &&
556                     net_eq(pneigh_net(n), net) &&
557                     (n->dev == dev || !n->dev))
558                         return n;
559                 n = n->next;
560         }
561         return NULL;
562 }
563
564 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
565                 struct net *net, const void *pkey, struct net_device *dev)
566 {
567         int key_len = tbl->key_len;
568         u32 hash_val = pneigh_hash(pkey, key_len);
569
570         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
571                                  net, pkey, key_len, dev);
572 }
573 EXPORT_SYMBOL_GPL(__pneigh_lookup);
574
575 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
576                                     struct net *net, const void *pkey,
577                                     struct net_device *dev, int creat)
578 {
579         struct pneigh_entry *n;
580         int key_len = tbl->key_len;
581         u32 hash_val = pneigh_hash(pkey, key_len);
582
583         read_lock_bh(&tbl->lock);
584         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
585                               net, pkey, key_len, dev);
586         read_unlock_bh(&tbl->lock);
587
588         if (n || !creat)
589                 goto out;
590
591         ASSERT_RTNL();
592
593         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
594         if (!n)
595                 goto out;
596
597         write_pnet(&n->net, hold_net(net));
598         memcpy(n->key, pkey, key_len);
599         n->dev = dev;
600         if (dev)
601                 dev_hold(dev);
602
603         if (tbl->pconstructor && tbl->pconstructor(n)) {
604                 if (dev)
605                         dev_put(dev);
606                 release_net(net);
607                 kfree(n);
608                 n = NULL;
609                 goto out;
610         }
611
612         write_lock_bh(&tbl->lock);
613         n->next = tbl->phash_buckets[hash_val];
614         tbl->phash_buckets[hash_val] = n;
615         write_unlock_bh(&tbl->lock);
616 out:
617         return n;
618 }
619 EXPORT_SYMBOL(pneigh_lookup);
620
621
622 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
623                   struct net_device *dev)
624 {
625         struct pneigh_entry *n, **np;
626         int key_len = tbl->key_len;
627         u32 hash_val = pneigh_hash(pkey, key_len);
628
629         write_lock_bh(&tbl->lock);
630         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
631              np = &n->next) {
632                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
633                     net_eq(pneigh_net(n), net)) {
634                         *np = n->next;
635                         write_unlock_bh(&tbl->lock);
636                         if (tbl->pdestructor)
637                                 tbl->pdestructor(n);
638                         if (n->dev)
639                                 dev_put(n->dev);
640                         release_net(pneigh_net(n));
641                         kfree(n);
642                         return 0;
643                 }
644         }
645         write_unlock_bh(&tbl->lock);
646         return -ENOENT;
647 }
648
649 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
650 {
651         struct pneigh_entry *n, **np;
652         u32 h;
653
654         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
655                 np = &tbl->phash_buckets[h];
656                 while ((n = *np) != NULL) {
657                         if (!dev || n->dev == dev) {
658                                 *np = n->next;
659                                 if (tbl->pdestructor)
660                                         tbl->pdestructor(n);
661                                 if (n->dev)
662                                         dev_put(n->dev);
663                                 release_net(pneigh_net(n));
664                                 kfree(n);
665                                 continue;
666                         }
667                         np = &n->next;
668                 }
669         }
670         return -ENOENT;
671 }
672
673 static void neigh_parms_destroy(struct neigh_parms *parms);
674
675 static inline void neigh_parms_put(struct neigh_parms *parms)
676 {
677         if (atomic_dec_and_test(&parms->refcnt))
678                 neigh_parms_destroy(parms);
679 }
680
681 /*
682  *      neighbour must already be out of the table;
683  *
684  */
685 void neigh_destroy(struct neighbour *neigh)
686 {
687         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
688
689         if (!neigh->dead) {
690                 printk(KERN_WARNING
691                        "Destroying alive neighbour %p\n", neigh);
692                 dump_stack();
693                 return;
694         }
695
696         if (neigh_del_timer(neigh))
697                 printk(KERN_WARNING "Impossible event.\n");
698
699         skb_queue_purge(&neigh->arp_queue);
700         neigh->arp_queue_len_bytes = 0;
701
702         dev_put(neigh->dev);
703         neigh_parms_put(neigh->parms);
704
705         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
706
707         atomic_dec(&neigh->tbl->entries);
708         kfree_rcu(neigh, rcu);
709 }
710 EXPORT_SYMBOL(neigh_destroy);
711
712 /* Neighbour state is suspicious;
713    disable fast path.
714
715    Called with write_locked neigh.
716  */
717 static void neigh_suspect(struct neighbour *neigh)
718 {
719         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
720
721         neigh->output = neigh->ops->output;
722 }
723
724 /* Neighbour state is OK;
725    enable fast path.
726
727    Called with write_locked neigh.
728  */
729 static void neigh_connect(struct neighbour *neigh)
730 {
731         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
732
733         neigh->output = neigh->ops->connected_output;
734 }
735
736 static void neigh_periodic_work(struct work_struct *work)
737 {
738         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
739         struct neighbour *n;
740         struct neighbour __rcu **np;
741         unsigned int i;
742         struct neigh_hash_table *nht;
743
744         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
745
746         write_lock_bh(&tbl->lock);
747         nht = rcu_dereference_protected(tbl->nht,
748                                         lockdep_is_held(&tbl->lock));
749
750         /*
751          *      periodically recompute ReachableTime from random function
752          */
753
754         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
755                 struct neigh_parms *p;
756                 tbl->last_rand = jiffies;
757                 for (p = &tbl->parms; p; p = p->next)
758                         p->reachable_time =
759                                 neigh_rand_reach_time(p->base_reachable_time);
760         }
761
762         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
763                 np = &nht->hash_buckets[i];
764
765                 while ((n = rcu_dereference_protected(*np,
766                                 lockdep_is_held(&tbl->lock))) != NULL) {
767                         unsigned int state;
768
769                         write_lock(&n->lock);
770
771                         state = n->nud_state;
772                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
773                                 write_unlock(&n->lock);
774                                 goto next_elt;
775                         }
776
777                         if (time_before(n->used, n->confirmed))
778                                 n->used = n->confirmed;
779
780                         if (atomic_read(&n->refcnt) == 1 &&
781                             (state == NUD_FAILED ||
782                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
783                                 *np = n->next;
784                                 n->dead = 1;
785                                 write_unlock(&n->lock);
786                                 neigh_cleanup_and_release(n);
787                                 continue;
788                         }
789                         write_unlock(&n->lock);
790
791 next_elt:
792                         np = &n->next;
793                 }
794                 /*
795                  * It's fine to release lock here, even if hash table
796                  * grows while we are preempted.
797                  */
798                 write_unlock_bh(&tbl->lock);
799                 cond_resched();
800                 write_lock_bh(&tbl->lock);
801         }
802         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
803          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
804          * base_reachable_time.
805          */
806         schedule_delayed_work(&tbl->gc_work,
807                               tbl->parms.base_reachable_time >> 1);
808         write_unlock_bh(&tbl->lock);
809 }
810
811 static __inline__ int neigh_max_probes(struct neighbour *n)
812 {
813         struct neigh_parms *p = n->parms;
814         return (n->nud_state & NUD_PROBE) ?
815                 p->ucast_probes :
816                 p->ucast_probes + p->app_probes + p->mcast_probes;
817 }
818
819 static void neigh_invalidate(struct neighbour *neigh)
820         __releases(neigh->lock)
821         __acquires(neigh->lock)
822 {
823         struct sk_buff *skb;
824
825         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
826         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
827         neigh->updated = jiffies;
828
829         /* It is very thin place. report_unreachable is very complicated
830            routine. Particularly, it can hit the same neighbour entry!
831
832            So that, we try to be accurate and avoid dead loop. --ANK
833          */
834         while (neigh->nud_state == NUD_FAILED &&
835                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
836                 write_unlock(&neigh->lock);
837                 neigh->ops->error_report(neigh, skb);
838                 write_lock(&neigh->lock);
839         }
840         skb_queue_purge(&neigh->arp_queue);
841         neigh->arp_queue_len_bytes = 0;
842 }
843
844 static void neigh_probe(struct neighbour *neigh)
845         __releases(neigh->lock)
846 {
847         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
848         /* keep skb alive even if arp_queue overflows */
849         if (skb)
850                 skb = skb_copy(skb, GFP_ATOMIC);
851         write_unlock(&neigh->lock);
852         neigh->ops->solicit(neigh, skb);
853         atomic_inc(&neigh->probes);
854         kfree_skb(skb);
855 }
856
857 /* Called when a timer expires for a neighbour entry. */
858
859 static void neigh_timer_handler(unsigned long arg)
860 {
861         unsigned long now, next;
862         struct neighbour *neigh = (struct neighbour *)arg;
863         unsigned state;
864         int notify = 0;
865
866         write_lock(&neigh->lock);
867
868         state = neigh->nud_state;
869         now = jiffies;
870         next = now + HZ;
871
872         if (!(state & NUD_IN_TIMER))
873                 goto out;
874
875         if (state & NUD_REACHABLE) {
876                 if (time_before_eq(now,
877                                    neigh->confirmed + neigh->parms->reachable_time)) {
878                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
879                         next = neigh->confirmed + neigh->parms->reachable_time;
880                 } else if (time_before_eq(now,
881                                           neigh->used + neigh->parms->delay_probe_time)) {
882                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
883                         neigh->nud_state = NUD_DELAY;
884                         neigh->updated = jiffies;
885                         neigh_suspect(neigh);
886                         next = now + neigh->parms->delay_probe_time;
887                 } else {
888                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
889                         neigh->nud_state = NUD_STALE;
890                         neigh->updated = jiffies;
891                         neigh_suspect(neigh);
892                         notify = 1;
893                 }
894         } else if (state & NUD_DELAY) {
895                 if (time_before_eq(now,
896                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
897                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
898                         neigh->nud_state = NUD_REACHABLE;
899                         neigh->updated = jiffies;
900                         neigh_connect(neigh);
901                         notify = 1;
902                         next = neigh->confirmed + neigh->parms->reachable_time;
903                 } else {
904                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
905                         neigh->nud_state = NUD_PROBE;
906                         neigh->updated = jiffies;
907                         atomic_set(&neigh->probes, 0);
908                         next = now + neigh->parms->retrans_time;
909                 }
910         } else {
911                 /* NUD_PROBE|NUD_INCOMPLETE */
912                 next = now + neigh->parms->retrans_time;
913         }
914
915         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
916             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
917                 neigh->nud_state = NUD_FAILED;
918                 notify = 1;
919                 neigh_invalidate(neigh);
920         }
921
922         if (neigh->nud_state & NUD_IN_TIMER) {
923                 if (time_before(next, jiffies + HZ/2))
924                         next = jiffies + HZ/2;
925                 if (!mod_timer(&neigh->timer, next))
926                         neigh_hold(neigh);
927         }
928         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
929                 neigh_probe(neigh);
930         } else {
931 out:
932                 write_unlock(&neigh->lock);
933         }
934
935         if (notify)
936                 neigh_update_notify(neigh);
937
938         neigh_release(neigh);
939 }
940
941 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
942 {
943         int rc;
944         bool immediate_probe = false;
945
946         write_lock_bh(&neigh->lock);
947
948         rc = 0;
949         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
950                 goto out_unlock_bh;
951
952         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
953                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
954                         unsigned long next, now = jiffies;
955
956                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
957                         neigh->nud_state     = NUD_INCOMPLETE;
958                         neigh->updated = now;
959                         next = now + max(neigh->parms->retrans_time, HZ/2);
960                         neigh_add_timer(neigh, next);
961                         immediate_probe = true;
962                 } else {
963                         neigh->nud_state = NUD_FAILED;
964                         neigh->updated = jiffies;
965                         write_unlock_bh(&neigh->lock);
966
967                         kfree_skb(skb);
968                         return 1;
969                 }
970         } else if (neigh->nud_state & NUD_STALE) {
971                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
972                 neigh->nud_state = NUD_DELAY;
973                 neigh->updated = jiffies;
974                 neigh_add_timer(neigh,
975                                 jiffies + neigh->parms->delay_probe_time);
976         }
977
978         if (neigh->nud_state == NUD_INCOMPLETE) {
979                 if (skb) {
980                         while (neigh->arp_queue_len_bytes + skb->truesize >
981                                neigh->parms->queue_len_bytes) {
982                                 struct sk_buff *buff;
983
984                                 buff = __skb_dequeue(&neigh->arp_queue);
985                                 if (!buff)
986                                         break;
987                                 neigh->arp_queue_len_bytes -= buff->truesize;
988                                 kfree_skb(buff);
989                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
990                         }
991                         skb_dst_force(skb);
992                         __skb_queue_tail(&neigh->arp_queue, skb);
993                         neigh->arp_queue_len_bytes += skb->truesize;
994                 }
995                 rc = 1;
996         }
997 out_unlock_bh:
998         if (immediate_probe)
999                 neigh_probe(neigh);
1000         else
1001                 write_unlock(&neigh->lock);
1002         local_bh_enable();
1003         return rc;
1004 }
1005 EXPORT_SYMBOL(__neigh_event_send);
1006
1007 static void neigh_update_hhs(struct neighbour *neigh)
1008 {
1009         struct hh_cache *hh;
1010         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1011                 = NULL;
1012
1013         if (neigh->dev->header_ops)
1014                 update = neigh->dev->header_ops->cache_update;
1015
1016         if (update) {
1017                 hh = &neigh->hh;
1018                 if (hh->hh_len) {
1019                         write_seqlock_bh(&hh->hh_lock);
1020                         update(hh, neigh->dev, neigh->ha);
1021                         write_sequnlock_bh(&hh->hh_lock);
1022                 }
1023         }
1024 }
1025
1026
1027
1028 /* Generic update routine.
1029    -- lladdr is new lladdr or NULL, if it is not supplied.
1030    -- new    is new state.
1031    -- flags
1032         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1033                                 if it is different.
1034         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1035                                 lladdr instead of overriding it
1036                                 if it is different.
1037                                 It also allows to retain current state
1038                                 if lladdr is unchanged.
1039         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1040
1041         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1042                                 NTF_ROUTER flag.
1043         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1044                                 a router.
1045
1046    Caller MUST hold reference count on the entry.
1047  */
1048
1049 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1050                  u32 flags)
1051 {
1052         u8 old;
1053         int err;
1054         int notify = 0;
1055         struct net_device *dev;
1056         int update_isrouter = 0;
1057
1058         write_lock_bh(&neigh->lock);
1059
1060         dev    = neigh->dev;
1061         old    = neigh->nud_state;
1062         err    = -EPERM;
1063
1064         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1065             (old & (NUD_NOARP | NUD_PERMANENT)))
1066                 goto out;
1067
1068         if (!(new & NUD_VALID)) {
1069                 neigh_del_timer(neigh);
1070                 if (old & NUD_CONNECTED)
1071                         neigh_suspect(neigh);
1072                 neigh->nud_state = new;
1073                 err = 0;
1074                 notify = old & NUD_VALID;
1075                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1076                     (new & NUD_FAILED)) {
1077                         neigh_invalidate(neigh);
1078                         notify = 1;
1079                 }
1080                 goto out;
1081         }
1082
1083         /* Compare new lladdr with cached one */
1084         if (!dev->addr_len) {
1085                 /* First case: device needs no address. */
1086                 lladdr = neigh->ha;
1087         } else if (lladdr) {
1088                 /* The second case: if something is already cached
1089                    and a new address is proposed:
1090                    - compare new & old
1091                    - if they are different, check override flag
1092                  */
1093                 if ((old & NUD_VALID) &&
1094                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1095                         lladdr = neigh->ha;
1096         } else {
1097                 /* No address is supplied; if we know something,
1098                    use it, otherwise discard the request.
1099                  */
1100                 err = -EINVAL;
1101                 if (!(old & NUD_VALID))
1102                         goto out;
1103                 lladdr = neigh->ha;
1104         }
1105
1106         if (new & NUD_CONNECTED)
1107                 neigh->confirmed = jiffies;
1108         neigh->updated = jiffies;
1109
1110         /* If entry was valid and address is not changed,
1111            do not change entry state, if new one is STALE.
1112          */
1113         err = 0;
1114         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1115         if (old & NUD_VALID) {
1116                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1117                         update_isrouter = 0;
1118                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1119                             (old & NUD_CONNECTED)) {
1120                                 lladdr = neigh->ha;
1121                                 new = NUD_STALE;
1122                         } else
1123                                 goto out;
1124                 } else {
1125                         if (lladdr == neigh->ha && new == NUD_STALE &&
1126                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1127                              (old & NUD_CONNECTED))
1128                             )
1129                                 new = old;
1130                 }
1131         }
1132
1133         if (new != old) {
1134                 neigh_del_timer(neigh);
1135                 if (new & NUD_IN_TIMER)
1136                         neigh_add_timer(neigh, (jiffies +
1137                                                 ((new & NUD_REACHABLE) ?
1138                                                  neigh->parms->reachable_time :
1139                                                  0)));
1140                 neigh->nud_state = new;
1141         }
1142
1143         if (lladdr != neigh->ha) {
1144                 write_seqlock(&neigh->ha_lock);
1145                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1146                 write_sequnlock(&neigh->ha_lock);
1147                 neigh_update_hhs(neigh);
1148                 if (!(new & NUD_CONNECTED))
1149                         neigh->confirmed = jiffies -
1150                                       (neigh->parms->base_reachable_time << 1);
1151                 notify = 1;
1152         }
1153         if (new == old)
1154                 goto out;
1155         if (new & NUD_CONNECTED)
1156                 neigh_connect(neigh);
1157         else
1158                 neigh_suspect(neigh);
1159         if (!(old & NUD_VALID)) {
1160                 struct sk_buff *skb;
1161
1162                 /* Again: avoid dead loop if something went wrong */
1163
1164                 while (neigh->nud_state & NUD_VALID &&
1165                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1166                         struct dst_entry *dst = skb_dst(skb);
1167                         struct neighbour *n2, *n1 = neigh;
1168                         write_unlock_bh(&neigh->lock);
1169
1170                         rcu_read_lock();
1171                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1172                         if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
1173                                 n1 = n2;
1174                         n1->output(n1, skb);
1175                         rcu_read_unlock();
1176
1177                         write_lock_bh(&neigh->lock);
1178                 }
1179                 skb_queue_purge(&neigh->arp_queue);
1180                 neigh->arp_queue_len_bytes = 0;
1181         }
1182 out:
1183         if (update_isrouter) {
1184                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1185                         (neigh->flags | NTF_ROUTER) :
1186                         (neigh->flags & ~NTF_ROUTER);
1187         }
1188         write_unlock_bh(&neigh->lock);
1189
1190         if (notify)
1191                 neigh_update_notify(neigh);
1192
1193         return err;
1194 }
1195 EXPORT_SYMBOL(neigh_update);
1196
1197 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1198                                  u8 *lladdr, void *saddr,
1199                                  struct net_device *dev)
1200 {
1201         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1202                                                  lladdr || !dev->addr_len);
1203         if (neigh)
1204                 neigh_update(neigh, lladdr, NUD_STALE,
1205                              NEIGH_UPDATE_F_OVERRIDE);
1206         return neigh;
1207 }
1208 EXPORT_SYMBOL(neigh_event_ns);
1209
1210 /* called with read_lock_bh(&n->lock); */
1211 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1212 {
1213         struct net_device *dev = dst->dev;
1214         __be16 prot = dst->ops->protocol;
1215         struct hh_cache *hh = &n->hh;
1216
1217         write_lock_bh(&n->lock);
1218
1219         /* Only one thread can come in here and initialize the
1220          * hh_cache entry.
1221          */
1222         if (!hh->hh_len)
1223                 dev->header_ops->cache(n, hh, prot);
1224
1225         write_unlock_bh(&n->lock);
1226 }
1227
1228 /* This function can be used in contexts, where only old dev_queue_xmit
1229  * worked, f.e. if you want to override normal output path (eql, shaper),
1230  * but resolution is not made yet.
1231  */
1232
1233 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1234 {
1235         struct net_device *dev = skb->dev;
1236
1237         __skb_pull(skb, skb_network_offset(skb));
1238
1239         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1240                             skb->len) < 0 &&
1241             dev->header_ops->rebuild(skb))
1242                 return 0;
1243
1244         return dev_queue_xmit(skb);
1245 }
1246 EXPORT_SYMBOL(neigh_compat_output);
1247
1248 /* Slow and careful. */
1249
1250 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1251 {
1252         struct dst_entry *dst = skb_dst(skb);
1253         int rc = 0;
1254
1255         if (!dst)
1256                 goto discard;
1257
1258         __skb_pull(skb, skb_network_offset(skb));
1259
1260         if (!neigh_event_send(neigh, skb)) {
1261                 int err;
1262                 struct net_device *dev = neigh->dev;
1263                 unsigned int seq;
1264
1265                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1266                         neigh_hh_init(neigh, dst);
1267
1268                 do {
1269                         seq = read_seqbegin(&neigh->ha_lock);
1270                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1271                                               neigh->ha, NULL, skb->len);
1272                 } while (read_seqretry(&neigh->ha_lock, seq));
1273
1274                 if (err >= 0)
1275                         rc = dev_queue_xmit(skb);
1276                 else
1277                         goto out_kfree_skb;
1278         }
1279 out:
1280         return rc;
1281 discard:
1282         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1283                       dst, neigh);
1284 out_kfree_skb:
1285         rc = -EINVAL;
1286         kfree_skb(skb);
1287         goto out;
1288 }
1289 EXPORT_SYMBOL(neigh_resolve_output);
1290
1291 /* As fast as possible without hh cache */
1292
1293 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1294 {
1295         struct net_device *dev = neigh->dev;
1296         unsigned int seq;
1297         int err;
1298
1299         __skb_pull(skb, skb_network_offset(skb));
1300
1301         do {
1302                 seq = read_seqbegin(&neigh->ha_lock);
1303                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1304                                       neigh->ha, NULL, skb->len);
1305         } while (read_seqretry(&neigh->ha_lock, seq));
1306
1307         if (err >= 0)
1308                 err = dev_queue_xmit(skb);
1309         else {
1310                 err = -EINVAL;
1311                 kfree_skb(skb);
1312         }
1313         return err;
1314 }
1315 EXPORT_SYMBOL(neigh_connected_output);
1316
1317 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1318 {
1319         return dev_queue_xmit(skb);
1320 }
1321 EXPORT_SYMBOL(neigh_direct_output);
1322
1323 static void neigh_proxy_process(unsigned long arg)
1324 {
1325         struct neigh_table *tbl = (struct neigh_table *)arg;
1326         long sched_next = 0;
1327         unsigned long now = jiffies;
1328         struct sk_buff *skb, *n;
1329
1330         spin_lock(&tbl->proxy_queue.lock);
1331
1332         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1333                 long tdif = NEIGH_CB(skb)->sched_next - now;
1334
1335                 if (tdif <= 0) {
1336                         struct net_device *dev = skb->dev;
1337
1338                         __skb_unlink(skb, &tbl->proxy_queue);
1339                         if (tbl->proxy_redo && netif_running(dev)) {
1340                                 rcu_read_lock();
1341                                 tbl->proxy_redo(skb);
1342                                 rcu_read_unlock();
1343                         } else {
1344                                 kfree_skb(skb);
1345                         }
1346
1347                         dev_put(dev);
1348                 } else if (!sched_next || tdif < sched_next)
1349                         sched_next = tdif;
1350         }
1351         del_timer(&tbl->proxy_timer);
1352         if (sched_next)
1353                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1354         spin_unlock(&tbl->proxy_queue.lock);
1355 }
1356
1357 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1358                     struct sk_buff *skb)
1359 {
1360         unsigned long now = jiffies;
1361         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1362
1363         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1364                 kfree_skb(skb);
1365                 return;
1366         }
1367
1368         NEIGH_CB(skb)->sched_next = sched_next;
1369         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1370
1371         spin_lock(&tbl->proxy_queue.lock);
1372         if (del_timer(&tbl->proxy_timer)) {
1373                 if (time_before(tbl->proxy_timer.expires, sched_next))
1374                         sched_next = tbl->proxy_timer.expires;
1375         }
1376         skb_dst_drop(skb);
1377         dev_hold(skb->dev);
1378         __skb_queue_tail(&tbl->proxy_queue, skb);
1379         mod_timer(&tbl->proxy_timer, sched_next);
1380         spin_unlock(&tbl->proxy_queue.lock);
1381 }
1382 EXPORT_SYMBOL(pneigh_enqueue);
1383
1384 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1385                                                       struct net *net, int ifindex)
1386 {
1387         struct neigh_parms *p;
1388
1389         for (p = &tbl->parms; p; p = p->next) {
1390                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1391                     (!p->dev && !ifindex))
1392                         return p;
1393         }
1394
1395         return NULL;
1396 }
1397
1398 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1399                                       struct neigh_table *tbl)
1400 {
1401         struct neigh_parms *p, *ref;
1402         struct net *net = dev_net(dev);
1403         const struct net_device_ops *ops = dev->netdev_ops;
1404
1405         ref = lookup_neigh_parms(tbl, net, 0);
1406         if (!ref)
1407                 return NULL;
1408
1409         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1410         if (p) {
1411                 p->tbl            = tbl;
1412                 atomic_set(&p->refcnt, 1);
1413                 p->reachable_time =
1414                                 neigh_rand_reach_time(p->base_reachable_time);
1415
1416                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1417                         kfree(p);
1418                         return NULL;
1419                 }
1420
1421                 dev_hold(dev);
1422                 p->dev = dev;
1423                 write_pnet(&p->net, hold_net(net));
1424                 p->sysctl_table = NULL;
1425                 write_lock_bh(&tbl->lock);
1426                 p->next         = tbl->parms.next;
1427                 tbl->parms.next = p;
1428                 write_unlock_bh(&tbl->lock);
1429         }
1430         return p;
1431 }
1432 EXPORT_SYMBOL(neigh_parms_alloc);
1433
1434 static void neigh_rcu_free_parms(struct rcu_head *head)
1435 {
1436         struct neigh_parms *parms =
1437                 container_of(head, struct neigh_parms, rcu_head);
1438
1439         neigh_parms_put(parms);
1440 }
1441
1442 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1443 {
1444         struct neigh_parms **p;
1445
1446         if (!parms || parms == &tbl->parms)
1447                 return;
1448         write_lock_bh(&tbl->lock);
1449         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1450                 if (*p == parms) {
1451                         *p = parms->next;
1452                         parms->dead = 1;
1453                         write_unlock_bh(&tbl->lock);
1454                         if (parms->dev)
1455                                 dev_put(parms->dev);
1456                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1457                         return;
1458                 }
1459         }
1460         write_unlock_bh(&tbl->lock);
1461         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1462 }
1463 EXPORT_SYMBOL(neigh_parms_release);
1464
1465 static void neigh_parms_destroy(struct neigh_parms *parms)
1466 {
1467         release_net(neigh_parms_net(parms));
1468         kfree(parms);
1469 }
1470
1471 static struct lock_class_key neigh_table_proxy_queue_class;
1472
1473 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1474 {
1475         unsigned long now = jiffies;
1476         unsigned long phsize;
1477
1478         write_pnet(&tbl->parms.net, &init_net);
1479         atomic_set(&tbl->parms.refcnt, 1);
1480         tbl->parms.reachable_time =
1481                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1482
1483         tbl->stats = alloc_percpu(struct neigh_statistics);
1484         if (!tbl->stats)
1485                 panic("cannot create neighbour cache statistics");
1486
1487 #ifdef CONFIG_PROC_FS
1488         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1489                               &neigh_stat_seq_fops, tbl))
1490                 panic("cannot create neighbour proc dir entry");
1491 #endif
1492
1493         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1494
1495         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1496         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1497
1498         if (!tbl->nht || !tbl->phash_buckets)
1499                 panic("cannot allocate neighbour cache hashes");
1500
1501         rwlock_init(&tbl->lock);
1502         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1503         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1504         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1505         skb_queue_head_init_class(&tbl->proxy_queue,
1506                         &neigh_table_proxy_queue_class);
1507
1508         tbl->last_flush = now;
1509         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1510 }
1511 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1512
1513 void neigh_table_init(struct neigh_table *tbl)
1514 {
1515         struct neigh_table *tmp;
1516
1517         neigh_table_init_no_netlink(tbl);
1518         write_lock(&neigh_tbl_lock);
1519         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1520                 if (tmp->family == tbl->family)
1521                         break;
1522         }
1523         tbl->next       = neigh_tables;
1524         neigh_tables    = tbl;
1525         write_unlock(&neigh_tbl_lock);
1526
1527         if (unlikely(tmp)) {
1528                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1529                        "family %d\n", tbl->family);
1530                 dump_stack();
1531         }
1532 }
1533 EXPORT_SYMBOL(neigh_table_init);
1534
1535 int neigh_table_clear(struct neigh_table *tbl)
1536 {
1537         struct neigh_table **tp;
1538
1539         /* It is not clean... Fix it to unload IPv6 module safely */
1540         cancel_delayed_work_sync(&tbl->gc_work);
1541         del_timer_sync(&tbl->proxy_timer);
1542         pneigh_queue_purge(&tbl->proxy_queue);
1543         neigh_ifdown(tbl, NULL);
1544         if (atomic_read(&tbl->entries))
1545                 printk(KERN_CRIT "neighbour leakage\n");
1546         write_lock(&neigh_tbl_lock);
1547         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1548                 if (*tp == tbl) {
1549                         *tp = tbl->next;
1550                         break;
1551                 }
1552         }
1553         write_unlock(&neigh_tbl_lock);
1554
1555         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1556                  neigh_hash_free_rcu);
1557         tbl->nht = NULL;
1558
1559         kfree(tbl->phash_buckets);
1560         tbl->phash_buckets = NULL;
1561
1562         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1563
1564         free_percpu(tbl->stats);
1565         tbl->stats = NULL;
1566
1567         return 0;
1568 }
1569 EXPORT_SYMBOL(neigh_table_clear);
1570
1571 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1572 {
1573         struct net *net = sock_net(skb->sk);
1574         struct ndmsg *ndm;
1575         struct nlattr *dst_attr;
1576         struct neigh_table *tbl;
1577         struct net_device *dev = NULL;
1578         int err = -EINVAL;
1579
1580         ASSERT_RTNL();
1581         if (nlmsg_len(nlh) < sizeof(*ndm))
1582                 goto out;
1583
1584         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1585         if (dst_attr == NULL)
1586                 goto out;
1587
1588         ndm = nlmsg_data(nlh);
1589         if (ndm->ndm_ifindex) {
1590                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1591                 if (dev == NULL) {
1592                         err = -ENODEV;
1593                         goto out;
1594                 }
1595         }
1596
1597         read_lock(&neigh_tbl_lock);
1598         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1599                 struct neighbour *neigh;
1600
1601                 if (tbl->family != ndm->ndm_family)
1602                         continue;
1603                 read_unlock(&neigh_tbl_lock);
1604
1605                 if (nla_len(dst_attr) < tbl->key_len)
1606                         goto out;
1607
1608                 if (ndm->ndm_flags & NTF_PROXY) {
1609                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1610                         goto out;
1611                 }
1612
1613                 if (dev == NULL)
1614                         goto out;
1615
1616                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1617                 if (neigh == NULL) {
1618                         err = -ENOENT;
1619                         goto out;
1620                 }
1621
1622                 err = neigh_update(neigh, NULL, NUD_FAILED,
1623                                    NEIGH_UPDATE_F_OVERRIDE |
1624                                    NEIGH_UPDATE_F_ADMIN);
1625                 neigh_release(neigh);
1626                 goto out;
1627         }
1628         read_unlock(&neigh_tbl_lock);
1629         err = -EAFNOSUPPORT;
1630
1631 out:
1632         return err;
1633 }
1634
1635 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1636 {
1637         struct net *net = sock_net(skb->sk);
1638         struct ndmsg *ndm;
1639         struct nlattr *tb[NDA_MAX+1];
1640         struct neigh_table *tbl;
1641         struct net_device *dev = NULL;
1642         int err;
1643
1644         ASSERT_RTNL();
1645         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1646         if (err < 0)
1647                 goto out;
1648
1649         err = -EINVAL;
1650         if (tb[NDA_DST] == NULL)
1651                 goto out;
1652
1653         ndm = nlmsg_data(nlh);
1654         if (ndm->ndm_ifindex) {
1655                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1656                 if (dev == NULL) {
1657                         err = -ENODEV;
1658                         goto out;
1659                 }
1660
1661                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1662                         goto out;
1663         }
1664
1665         read_lock(&neigh_tbl_lock);
1666         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1667                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1668                 struct neighbour *neigh;
1669                 void *dst, *lladdr;
1670
1671                 if (tbl->family != ndm->ndm_family)
1672                         continue;
1673                 read_unlock(&neigh_tbl_lock);
1674
1675                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1676                         goto out;
1677                 dst = nla_data(tb[NDA_DST]);
1678                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1679
1680                 if (ndm->ndm_flags & NTF_PROXY) {
1681                         struct pneigh_entry *pn;
1682
1683                         err = -ENOBUFS;
1684                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1685                         if (pn) {
1686                                 pn->flags = ndm->ndm_flags;
1687                                 err = 0;
1688                         }
1689                         goto out;
1690                 }
1691
1692                 if (dev == NULL)
1693                         goto out;
1694
1695                 neigh = neigh_lookup(tbl, dst, dev);
1696                 if (neigh == NULL) {
1697                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1698                                 err = -ENOENT;
1699                                 goto out;
1700                         }
1701
1702                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1703                         if (IS_ERR(neigh)) {
1704                                 err = PTR_ERR(neigh);
1705                                 goto out;
1706                         }
1707                 } else {
1708                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1709                                 err = -EEXIST;
1710                                 neigh_release(neigh);
1711                                 goto out;
1712                         }
1713
1714                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1715                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1716                 }
1717
1718                 if (ndm->ndm_flags & NTF_USE) {
1719                         neigh_event_send(neigh, NULL);
1720                         err = 0;
1721                 } else
1722                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1723                 neigh_release(neigh);
1724                 goto out;
1725         }
1726
1727         read_unlock(&neigh_tbl_lock);
1728         err = -EAFNOSUPPORT;
1729 out:
1730         return err;
1731 }
1732
1733 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1734 {
1735         struct nlattr *nest;
1736
1737         nest = nla_nest_start(skb, NDTA_PARMS);
1738         if (nest == NULL)
1739                 return -ENOBUFS;
1740
1741         if (parms->dev)
1742                 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1743
1744         NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1745         NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
1746         /* approximative value for deprecated QUEUE_LEN (in packets) */
1747         NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
1748                     DIV_ROUND_UP(parms->queue_len_bytes,
1749                                  SKB_TRUESIZE(ETH_FRAME_LEN)));
1750         NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1751         NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1752         NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1753         NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1754         NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1755         NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1756                       parms->base_reachable_time);
1757         NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1758         NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1759         NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1760         NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1761         NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1762         NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1763
1764         return nla_nest_end(skb, nest);
1765
1766 nla_put_failure:
1767         nla_nest_cancel(skb, nest);
1768         return -EMSGSIZE;
1769 }
1770
1771 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1772                               u32 pid, u32 seq, int type, int flags)
1773 {
1774         struct nlmsghdr *nlh;
1775         struct ndtmsg *ndtmsg;
1776
1777         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1778         if (nlh == NULL)
1779                 return -EMSGSIZE;
1780
1781         ndtmsg = nlmsg_data(nlh);
1782
1783         read_lock_bh(&tbl->lock);
1784         ndtmsg->ndtm_family = tbl->family;
1785         ndtmsg->ndtm_pad1   = 0;
1786         ndtmsg->ndtm_pad2   = 0;
1787
1788         NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1789         NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1790         NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1791         NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1792         NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1793
1794         {
1795                 unsigned long now = jiffies;
1796                 unsigned int flush_delta = now - tbl->last_flush;
1797                 unsigned int rand_delta = now - tbl->last_rand;
1798                 struct neigh_hash_table *nht;
1799                 struct ndt_config ndc = {
1800                         .ndtc_key_len           = tbl->key_len,
1801                         .ndtc_entry_size        = tbl->entry_size,
1802                         .ndtc_entries           = atomic_read(&tbl->entries),
1803                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1804                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1805                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1806                 };
1807
1808                 rcu_read_lock_bh();
1809                 nht = rcu_dereference_bh(tbl->nht);
1810                 ndc.ndtc_hash_rnd = nht->hash_rnd;
1811                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1812                 rcu_read_unlock_bh();
1813
1814                 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1815         }
1816
1817         {
1818                 int cpu;
1819                 struct ndt_stats ndst;
1820
1821                 memset(&ndst, 0, sizeof(ndst));
1822
1823                 for_each_possible_cpu(cpu) {
1824                         struct neigh_statistics *st;
1825
1826                         st = per_cpu_ptr(tbl->stats, cpu);
1827                         ndst.ndts_allocs                += st->allocs;
1828                         ndst.ndts_destroys              += st->destroys;
1829                         ndst.ndts_hash_grows            += st->hash_grows;
1830                         ndst.ndts_res_failed            += st->res_failed;
1831                         ndst.ndts_lookups               += st->lookups;
1832                         ndst.ndts_hits                  += st->hits;
1833                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1834                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1835                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1836                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1837                 }
1838
1839                 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1840         }
1841
1842         BUG_ON(tbl->parms.dev);
1843         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1844                 goto nla_put_failure;
1845
1846         read_unlock_bh(&tbl->lock);
1847         return nlmsg_end(skb, nlh);
1848
1849 nla_put_failure:
1850         read_unlock_bh(&tbl->lock);
1851         nlmsg_cancel(skb, nlh);
1852         return -EMSGSIZE;
1853 }
1854
1855 static int neightbl_fill_param_info(struct sk_buff *skb,
1856                                     struct neigh_table *tbl,
1857                                     struct neigh_parms *parms,
1858                                     u32 pid, u32 seq, int type,
1859                                     unsigned int flags)
1860 {
1861         struct ndtmsg *ndtmsg;
1862         struct nlmsghdr *nlh;
1863
1864         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1865         if (nlh == NULL)
1866                 return -EMSGSIZE;
1867
1868         ndtmsg = nlmsg_data(nlh);
1869
1870         read_lock_bh(&tbl->lock);
1871         ndtmsg->ndtm_family = tbl->family;
1872         ndtmsg->ndtm_pad1   = 0;
1873         ndtmsg->ndtm_pad2   = 0;
1874
1875         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1876             neightbl_fill_parms(skb, parms) < 0)
1877                 goto errout;
1878
1879         read_unlock_bh(&tbl->lock);
1880         return nlmsg_end(skb, nlh);
1881 errout:
1882         read_unlock_bh(&tbl->lock);
1883         nlmsg_cancel(skb, nlh);
1884         return -EMSGSIZE;
1885 }
1886
1887 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1888         [NDTA_NAME]             = { .type = NLA_STRING },
1889         [NDTA_THRESH1]          = { .type = NLA_U32 },
1890         [NDTA_THRESH2]          = { .type = NLA_U32 },
1891         [NDTA_THRESH3]          = { .type = NLA_U32 },
1892         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1893         [NDTA_PARMS]            = { .type = NLA_NESTED },
1894 };
1895
1896 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1897         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1898         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1899         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1900         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1901         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1902         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1903         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1904         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1905         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1906         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1907         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1908         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1909         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1910 };
1911
1912 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1913 {
1914         struct net *net = sock_net(skb->sk);
1915         struct neigh_table *tbl;
1916         struct ndtmsg *ndtmsg;
1917         struct nlattr *tb[NDTA_MAX+1];
1918         int err;
1919
1920         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1921                           nl_neightbl_policy);
1922         if (err < 0)
1923                 goto errout;
1924
1925         if (tb[NDTA_NAME] == NULL) {
1926                 err = -EINVAL;
1927                 goto errout;
1928         }
1929
1930         ndtmsg = nlmsg_data(nlh);
1931         read_lock(&neigh_tbl_lock);
1932         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1933                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1934                         continue;
1935
1936                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1937                         break;
1938         }
1939
1940         if (tbl == NULL) {
1941                 err = -ENOENT;
1942                 goto errout_locked;
1943         }
1944
1945         /*
1946          * We acquire tbl->lock to be nice to the periodic timers and
1947          * make sure they always see a consistent set of values.
1948          */
1949         write_lock_bh(&tbl->lock);
1950
1951         if (tb[NDTA_PARMS]) {
1952                 struct nlattr *tbp[NDTPA_MAX+1];
1953                 struct neigh_parms *p;
1954                 int i, ifindex = 0;
1955
1956                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1957                                        nl_ntbl_parm_policy);
1958                 if (err < 0)
1959                         goto errout_tbl_lock;
1960
1961                 if (tbp[NDTPA_IFINDEX])
1962                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1963
1964                 p = lookup_neigh_parms(tbl, net, ifindex);
1965                 if (p == NULL) {
1966                         err = -ENOENT;
1967                         goto errout_tbl_lock;
1968                 }
1969
1970                 for (i = 1; i <= NDTPA_MAX; i++) {
1971                         if (tbp[i] == NULL)
1972                                 continue;
1973
1974                         switch (i) {
1975                         case NDTPA_QUEUE_LEN:
1976                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
1977                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
1978                                 break;
1979                         case NDTPA_QUEUE_LENBYTES:
1980                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
1981                                 break;
1982                         case NDTPA_PROXY_QLEN:
1983                                 p->proxy_qlen = nla_get_u32(tbp[i]);
1984                                 break;
1985                         case NDTPA_APP_PROBES:
1986                                 p->app_probes = nla_get_u32(tbp[i]);
1987                                 break;
1988                         case NDTPA_UCAST_PROBES:
1989                                 p->ucast_probes = nla_get_u32(tbp[i]);
1990                                 break;
1991                         case NDTPA_MCAST_PROBES:
1992                                 p->mcast_probes = nla_get_u32(tbp[i]);
1993                                 break;
1994                         case NDTPA_BASE_REACHABLE_TIME:
1995                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
1996                                 break;
1997                         case NDTPA_GC_STALETIME:
1998                                 p->gc_staletime = nla_get_msecs(tbp[i]);
1999                                 break;
2000                         case NDTPA_DELAY_PROBE_TIME:
2001                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2002                                 break;
2003                         case NDTPA_RETRANS_TIME:
2004                                 p->retrans_time = nla_get_msecs(tbp[i]);
2005                                 break;
2006                         case NDTPA_ANYCAST_DELAY:
2007                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2008                                 break;
2009                         case NDTPA_PROXY_DELAY:
2010                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2011                                 break;
2012                         case NDTPA_LOCKTIME:
2013                                 p->locktime = nla_get_msecs(tbp[i]);
2014                                 break;
2015                         }
2016                 }
2017         }
2018
2019         if (tb[NDTA_THRESH1])
2020                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2021
2022         if (tb[NDTA_THRESH2])
2023                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2024
2025         if (tb[NDTA_THRESH3])
2026                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2027
2028         if (tb[NDTA_GC_INTERVAL])
2029                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2030
2031         err = 0;
2032
2033 errout_tbl_lock:
2034         write_unlock_bh(&tbl->lock);
2035 errout_locked:
2036         read_unlock(&neigh_tbl_lock);
2037 errout:
2038         return err;
2039 }
2040
2041 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2042 {
2043         struct net *net = sock_net(skb->sk);
2044         int family, tidx, nidx = 0;
2045         int tbl_skip = cb->args[0];
2046         int neigh_skip = cb->args[1];
2047         struct neigh_table *tbl;
2048
2049         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2050
2051         read_lock(&neigh_tbl_lock);
2052         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2053                 struct neigh_parms *p;
2054
2055                 if (tidx < tbl_skip || (family && tbl->family != family))
2056                         continue;
2057
2058                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2059                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2060                                        NLM_F_MULTI) <= 0)
2061                         break;
2062
2063                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2064                         if (!net_eq(neigh_parms_net(p), net))
2065                                 continue;
2066
2067                         if (nidx < neigh_skip)
2068                                 goto next;
2069
2070                         if (neightbl_fill_param_info(skb, tbl, p,
2071                                                      NETLINK_CB(cb->skb).pid,
2072                                                      cb->nlh->nlmsg_seq,
2073                                                      RTM_NEWNEIGHTBL,
2074                                                      NLM_F_MULTI) <= 0)
2075                                 goto out;
2076                 next:
2077                         nidx++;
2078                 }
2079
2080                 neigh_skip = 0;
2081         }
2082 out:
2083         read_unlock(&neigh_tbl_lock);
2084         cb->args[0] = tidx;
2085         cb->args[1] = nidx;
2086
2087         return skb->len;
2088 }
2089
2090 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2091                            u32 pid, u32 seq, int type, unsigned int flags)
2092 {
2093         unsigned long now = jiffies;
2094         struct nda_cacheinfo ci;
2095         struct nlmsghdr *nlh;
2096         struct ndmsg *ndm;
2097
2098         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2099         if (nlh == NULL)
2100                 return -EMSGSIZE;
2101
2102         ndm = nlmsg_data(nlh);
2103         ndm->ndm_family  = neigh->ops->family;
2104         ndm->ndm_pad1    = 0;
2105         ndm->ndm_pad2    = 0;
2106         ndm->ndm_flags   = neigh->flags;
2107         ndm->ndm_type    = neigh->type;
2108         ndm->ndm_ifindex = neigh->dev->ifindex;
2109
2110         NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2111
2112         read_lock_bh(&neigh->lock);
2113         ndm->ndm_state   = neigh->nud_state;
2114         if (neigh->nud_state & NUD_VALID) {
2115                 char haddr[MAX_ADDR_LEN];
2116
2117                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2118                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2119                         read_unlock_bh(&neigh->lock);
2120                         goto nla_put_failure;
2121                 }
2122         }
2123
2124         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2125         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2126         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2127         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2128         read_unlock_bh(&neigh->lock);
2129
2130         NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2131         NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2132
2133         return nlmsg_end(skb, nlh);
2134
2135 nla_put_failure:
2136         nlmsg_cancel(skb, nlh);
2137         return -EMSGSIZE;
2138 }
2139
2140 static void neigh_update_notify(struct neighbour *neigh)
2141 {
2142         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2143         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2144 }
2145
2146 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2147                             struct netlink_callback *cb)
2148 {
2149         struct net *net = sock_net(skb->sk);
2150         struct neighbour *n;
2151         int rc, h, s_h = cb->args[1];
2152         int idx, s_idx = idx = cb->args[2];
2153         struct neigh_hash_table *nht;
2154
2155         rcu_read_lock_bh();
2156         nht = rcu_dereference_bh(tbl->nht);
2157
2158         for (h = 0; h < (1 << nht->hash_shift); h++) {
2159                 if (h < s_h)
2160                         continue;
2161                 if (h > s_h)
2162                         s_idx = 0;
2163                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2164                      n != NULL;
2165                      n = rcu_dereference_bh(n->next)) {
2166                         if (!net_eq(dev_net(n->dev), net))
2167                                 continue;
2168                         if (idx < s_idx)
2169                                 goto next;
2170                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2171                                             cb->nlh->nlmsg_seq,
2172                                             RTM_NEWNEIGH,
2173                                             NLM_F_MULTI) <= 0) {
2174                                 rc = -1;
2175                                 goto out;
2176                         }
2177 next:
2178                         idx++;
2179                 }
2180         }
2181         rc = skb->len;
2182 out:
2183         rcu_read_unlock_bh();
2184         cb->args[1] = h;
2185         cb->args[2] = idx;
2186         return rc;
2187 }
2188
2189 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2190 {
2191         struct neigh_table *tbl;
2192         int t, family, s_t;
2193
2194         read_lock(&neigh_tbl_lock);
2195         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2196         s_t = cb->args[0];
2197
2198         for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2199                 if (t < s_t || (family && tbl->family != family))
2200                         continue;
2201                 if (t > s_t)
2202                         memset(&cb->args[1], 0, sizeof(cb->args) -
2203                                                 sizeof(cb->args[0]));
2204                 if (neigh_dump_table(tbl, skb, cb) < 0)
2205                         break;
2206         }
2207         read_unlock(&neigh_tbl_lock);
2208
2209         cb->args[0] = t;
2210         return skb->len;
2211 }
2212
2213 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2214 {
2215         int chain;
2216         struct neigh_hash_table *nht;
2217
2218         rcu_read_lock_bh();
2219         nht = rcu_dereference_bh(tbl->nht);
2220
2221         read_lock(&tbl->lock); /* avoid resizes */
2222         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2223                 struct neighbour *n;
2224
2225                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2226                      n != NULL;
2227                      n = rcu_dereference_bh(n->next))
2228                         cb(n, cookie);
2229         }
2230         read_unlock(&tbl->lock);
2231         rcu_read_unlock_bh();
2232 }
2233 EXPORT_SYMBOL(neigh_for_each);
2234
2235 /* The tbl->lock must be held as a writer and BH disabled. */
2236 void __neigh_for_each_release(struct neigh_table *tbl,
2237                               int (*cb)(struct neighbour *))
2238 {
2239         int chain;
2240         struct neigh_hash_table *nht;
2241
2242         nht = rcu_dereference_protected(tbl->nht,
2243                                         lockdep_is_held(&tbl->lock));
2244         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2245                 struct neighbour *n;
2246                 struct neighbour __rcu **np;
2247
2248                 np = &nht->hash_buckets[chain];
2249                 while ((n = rcu_dereference_protected(*np,
2250                                         lockdep_is_held(&tbl->lock))) != NULL) {
2251                         int release;
2252
2253                         write_lock(&n->lock);
2254                         release = cb(n);
2255                         if (release) {
2256                                 rcu_assign_pointer(*np,
2257                                         rcu_dereference_protected(n->next,
2258                                                 lockdep_is_held(&tbl->lock)));
2259                                 n->dead = 1;
2260                         } else
2261                                 np = &n->next;
2262                         write_unlock(&n->lock);
2263                         if (release)
2264                                 neigh_cleanup_and_release(n);
2265                 }
2266         }
2267 }
2268 EXPORT_SYMBOL(__neigh_for_each_release);
2269
2270 #ifdef CONFIG_PROC_FS
2271
2272 static struct neighbour *neigh_get_first(struct seq_file *seq)
2273 {
2274         struct neigh_seq_state *state = seq->private;
2275         struct net *net = seq_file_net(seq);
2276         struct neigh_hash_table *nht = state->nht;
2277         struct neighbour *n = NULL;
2278         int bucket = state->bucket;
2279
2280         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2281         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2282                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2283
2284                 while (n) {
2285                         if (!net_eq(dev_net(n->dev), net))
2286                                 goto next;
2287                         if (state->neigh_sub_iter) {
2288                                 loff_t fakep = 0;
2289                                 void *v;
2290
2291                                 v = state->neigh_sub_iter(state, n, &fakep);
2292                                 if (!v)
2293                                         goto next;
2294                         }
2295                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2296                                 break;
2297                         if (n->nud_state & ~NUD_NOARP)
2298                                 break;
2299 next:
2300                         n = rcu_dereference_bh(n->next);
2301                 }
2302
2303                 if (n)
2304                         break;
2305         }
2306         state->bucket = bucket;
2307
2308         return n;
2309 }
2310
2311 static struct neighbour *neigh_get_next(struct seq_file *seq,
2312                                         struct neighbour *n,
2313                                         loff_t *pos)
2314 {
2315         struct neigh_seq_state *state = seq->private;
2316         struct net *net = seq_file_net(seq);
2317         struct neigh_hash_table *nht = state->nht;
2318
2319         if (state->neigh_sub_iter) {
2320                 void *v = state->neigh_sub_iter(state, n, pos);
2321                 if (v)
2322                         return n;
2323         }
2324         n = rcu_dereference_bh(n->next);
2325
2326         while (1) {
2327                 while (n) {
2328                         if (!net_eq(dev_net(n->dev), net))
2329                                 goto next;
2330                         if (state->neigh_sub_iter) {
2331                                 void *v = state->neigh_sub_iter(state, n, pos);
2332                                 if (v)
2333                                         return n;
2334                                 goto next;
2335                         }
2336                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2337                                 break;
2338
2339                         if (n->nud_state & ~NUD_NOARP)
2340                                 break;
2341 next:
2342                         n = rcu_dereference_bh(n->next);
2343                 }
2344
2345                 if (n)
2346                         break;
2347
2348                 if (++state->bucket >= (1 << nht->hash_shift))
2349                         break;
2350
2351                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2352         }
2353
2354         if (n && pos)
2355                 --(*pos);
2356         return n;
2357 }
2358
2359 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2360 {
2361         struct neighbour *n = neigh_get_first(seq);
2362
2363         if (n) {
2364                 --(*pos);
2365                 while (*pos) {
2366                         n = neigh_get_next(seq, n, pos);
2367                         if (!n)
2368                                 break;
2369                 }
2370         }
2371         return *pos ? NULL : n;
2372 }
2373
2374 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2375 {
2376         struct neigh_seq_state *state = seq->private;
2377         struct net *net = seq_file_net(seq);
2378         struct neigh_table *tbl = state->tbl;
2379         struct pneigh_entry *pn = NULL;
2380         int bucket = state->bucket;
2381
2382         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2383         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2384                 pn = tbl->phash_buckets[bucket];
2385                 while (pn && !net_eq(pneigh_net(pn), net))
2386                         pn = pn->next;
2387                 if (pn)
2388                         break;
2389         }
2390         state->bucket = bucket;
2391
2392         return pn;
2393 }
2394
2395 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2396                                             struct pneigh_entry *pn,
2397                                             loff_t *pos)
2398 {
2399         struct neigh_seq_state *state = seq->private;
2400         struct net *net = seq_file_net(seq);
2401         struct neigh_table *tbl = state->tbl;
2402
2403         do {
2404                 pn = pn->next;
2405         } while (pn && !net_eq(pneigh_net(pn), net));
2406
2407         while (!pn) {
2408                 if (++state->bucket > PNEIGH_HASHMASK)
2409                         break;
2410                 pn = tbl->phash_buckets[state->bucket];
2411                 while (pn && !net_eq(pneigh_net(pn), net))
2412                         pn = pn->next;
2413                 if (pn)
2414                         break;
2415         }
2416
2417         if (pn && pos)
2418                 --(*pos);
2419
2420         return pn;
2421 }
2422
2423 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2424 {
2425         struct pneigh_entry *pn = pneigh_get_first(seq);
2426
2427         if (pn) {
2428                 --(*pos);
2429                 while (*pos) {
2430                         pn = pneigh_get_next(seq, pn, pos);
2431                         if (!pn)
2432                                 break;
2433                 }
2434         }
2435         return *pos ? NULL : pn;
2436 }
2437
2438 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2439 {
2440         struct neigh_seq_state *state = seq->private;
2441         void *rc;
2442         loff_t idxpos = *pos;
2443
2444         rc = neigh_get_idx(seq, &idxpos);
2445         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2446                 rc = pneigh_get_idx(seq, &idxpos);
2447
2448         return rc;
2449 }
2450
2451 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2452         __acquires(rcu_bh)
2453 {
2454         struct neigh_seq_state *state = seq->private;
2455
2456         state->tbl = tbl;
2457         state->bucket = 0;
2458         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2459
2460         rcu_read_lock_bh();
2461         state->nht = rcu_dereference_bh(tbl->nht);
2462
2463         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2464 }
2465 EXPORT_SYMBOL(neigh_seq_start);
2466
2467 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2468 {
2469         struct neigh_seq_state *state;
2470         void *rc;
2471
2472         if (v == SEQ_START_TOKEN) {
2473                 rc = neigh_get_first(seq);
2474                 goto out;
2475         }
2476
2477         state = seq->private;
2478         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2479                 rc = neigh_get_next(seq, v, NULL);
2480                 if (rc)
2481                         goto out;
2482                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2483                         rc = pneigh_get_first(seq);
2484         } else {
2485                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2486                 rc = pneigh_get_next(seq, v, NULL);
2487         }
2488 out:
2489         ++(*pos);
2490         return rc;
2491 }
2492 EXPORT_SYMBOL(neigh_seq_next);
2493
2494 void neigh_seq_stop(struct seq_file *seq, void *v)
2495         __releases(rcu_bh)
2496 {
2497         rcu_read_unlock_bh();
2498 }
2499 EXPORT_SYMBOL(neigh_seq_stop);
2500
2501 /* statistics via seq_file */
2502
2503 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2504 {
2505         struct neigh_table *tbl = seq->private;
2506         int cpu;
2507
2508         if (*pos == 0)
2509                 return SEQ_START_TOKEN;
2510
2511         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2512                 if (!cpu_possible(cpu))
2513                         continue;
2514                 *pos = cpu+1;
2515                 return per_cpu_ptr(tbl->stats, cpu);
2516         }
2517         return NULL;
2518 }
2519
2520 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2521 {
2522         struct neigh_table *tbl = seq->private;
2523         int cpu;
2524
2525         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2526                 if (!cpu_possible(cpu))
2527                         continue;
2528                 *pos = cpu+1;
2529                 return per_cpu_ptr(tbl->stats, cpu);
2530         }
2531         return NULL;
2532 }
2533
2534 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2535 {
2536
2537 }
2538
2539 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2540 {
2541         struct neigh_table *tbl = seq->private;
2542         struct neigh_statistics *st = v;
2543
2544         if (v == SEQ_START_TOKEN) {
2545                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2546                 return 0;
2547         }
2548
2549         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2550                         "%08lx %08lx  %08lx %08lx %08lx\n",
2551                    atomic_read(&tbl->entries),
2552
2553                    st->allocs,
2554                    st->destroys,
2555                    st->hash_grows,
2556
2557                    st->lookups,
2558                    st->hits,
2559
2560                    st->res_failed,
2561
2562                    st->rcv_probes_mcast,
2563                    st->rcv_probes_ucast,
2564
2565                    st->periodic_gc_runs,
2566                    st->forced_gc_runs,
2567                    st->unres_discards
2568                    );
2569
2570         return 0;
2571 }
2572
2573 static const struct seq_operations neigh_stat_seq_ops = {
2574         .start  = neigh_stat_seq_start,
2575         .next   = neigh_stat_seq_next,
2576         .stop   = neigh_stat_seq_stop,
2577         .show   = neigh_stat_seq_show,
2578 };
2579
2580 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2581 {
2582         int ret = seq_open(file, &neigh_stat_seq_ops);
2583
2584         if (!ret) {
2585                 struct seq_file *sf = file->private_data;
2586                 sf->private = PDE(inode)->data;
2587         }
2588         return ret;
2589 };
2590
2591 static const struct file_operations neigh_stat_seq_fops = {
2592         .owner   = THIS_MODULE,
2593         .open    = neigh_stat_seq_open,
2594         .read    = seq_read,
2595         .llseek  = seq_lseek,
2596         .release = seq_release,
2597 };
2598
2599 #endif /* CONFIG_PROC_FS */
2600
2601 static inline size_t neigh_nlmsg_size(void)
2602 {
2603         return NLMSG_ALIGN(sizeof(struct ndmsg))
2604                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2605                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2606                + nla_total_size(sizeof(struct nda_cacheinfo))
2607                + nla_total_size(4); /* NDA_PROBES */
2608 }
2609
2610 static void __neigh_notify(struct neighbour *n, int type, int flags)
2611 {
2612         struct net *net = dev_net(n->dev);
2613         struct sk_buff *skb;
2614         int err = -ENOBUFS;
2615
2616         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2617         if (skb == NULL)
2618                 goto errout;
2619
2620         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2621         if (err < 0) {
2622                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2623                 WARN_ON(err == -EMSGSIZE);
2624                 kfree_skb(skb);
2625                 goto errout;
2626         }
2627         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2628         return;
2629 errout:
2630         if (err < 0)
2631                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2632 }
2633
2634 #ifdef CONFIG_ARPD
2635 void neigh_app_ns(struct neighbour *n)
2636 {
2637         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2638 }
2639 EXPORT_SYMBOL(neigh_app_ns);
2640 #endif /* CONFIG_ARPD */
2641
2642 #ifdef CONFIG_SYSCTL
2643
2644 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2645                            size_t *lenp, loff_t *ppos)
2646 {
2647         int size, ret;
2648         ctl_table tmp = *ctl;
2649
2650         tmp.data = &size;
2651         size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2652         ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2653         if (write && !ret)
2654                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2655         return ret;
2656 }
2657
2658 enum {
2659         NEIGH_VAR_MCAST_PROBE,
2660         NEIGH_VAR_UCAST_PROBE,
2661         NEIGH_VAR_APP_PROBE,
2662         NEIGH_VAR_RETRANS_TIME,
2663         NEIGH_VAR_BASE_REACHABLE_TIME,
2664         NEIGH_VAR_DELAY_PROBE_TIME,
2665         NEIGH_VAR_GC_STALETIME,
2666         NEIGH_VAR_QUEUE_LEN,
2667         NEIGH_VAR_QUEUE_LEN_BYTES,
2668         NEIGH_VAR_PROXY_QLEN,
2669         NEIGH_VAR_ANYCAST_DELAY,
2670         NEIGH_VAR_PROXY_DELAY,
2671         NEIGH_VAR_LOCKTIME,
2672         NEIGH_VAR_RETRANS_TIME_MS,
2673         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2674         NEIGH_VAR_GC_INTERVAL,
2675         NEIGH_VAR_GC_THRESH1,
2676         NEIGH_VAR_GC_THRESH2,
2677         NEIGH_VAR_GC_THRESH3,
2678         NEIGH_VAR_MAX
2679 };
2680
2681 static struct neigh_sysctl_table {
2682         struct ctl_table_header *sysctl_header;
2683         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2684         char *dev_name;
2685 } neigh_sysctl_template __read_mostly = {
2686         .neigh_vars = {
2687                 [NEIGH_VAR_MCAST_PROBE] = {
2688                         .procname       = "mcast_solicit",
2689                         .maxlen         = sizeof(int),
2690                         .mode           = 0644,
2691                         .proc_handler   = proc_dointvec,
2692                 },
2693                 [NEIGH_VAR_UCAST_PROBE] = {
2694                         .procname       = "ucast_solicit",
2695                         .maxlen         = sizeof(int),
2696                         .mode           = 0644,
2697                         .proc_handler   = proc_dointvec,
2698                 },
2699                 [NEIGH_VAR_APP_PROBE] = {
2700                         .procname       = "app_solicit",
2701                         .maxlen         = sizeof(int),
2702                         .mode           = 0644,
2703                         .proc_handler   = proc_dointvec,
2704                 },
2705                 [NEIGH_VAR_RETRANS_TIME] = {
2706                         .procname       = "retrans_time",
2707                         .maxlen         = sizeof(int),
2708                         .mode           = 0644,
2709                         .proc_handler   = proc_dointvec_userhz_jiffies,
2710                 },
2711                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2712                         .procname       = "base_reachable_time",
2713                         .maxlen         = sizeof(int),
2714                         .mode           = 0644,
2715                         .proc_handler   = proc_dointvec_jiffies,
2716                 },
2717                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2718                         .procname       = "delay_first_probe_time",
2719                         .maxlen         = sizeof(int),
2720                         .mode           = 0644,
2721                         .proc_handler   = proc_dointvec_jiffies,
2722                 },
2723                 [NEIGH_VAR_GC_STALETIME] = {
2724                         .procname       = "gc_stale_time",
2725                         .maxlen         = sizeof(int),
2726                         .mode           = 0644,
2727                         .proc_handler   = proc_dointvec_jiffies,
2728                 },
2729                 [NEIGH_VAR_QUEUE_LEN] = {
2730                         .procname       = "unres_qlen",
2731                         .maxlen         = sizeof(int),
2732                         .mode           = 0644,
2733                         .proc_handler   = proc_unres_qlen,
2734                 },
2735                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2736                         .procname       = "unres_qlen_bytes",
2737                         .maxlen         = sizeof(int),
2738                         .mode           = 0644,
2739                         .proc_handler   = proc_dointvec,
2740                 },
2741                 [NEIGH_VAR_PROXY_QLEN] = {
2742                         .procname       = "proxy_qlen",
2743                         .maxlen         = sizeof(int),
2744                         .mode           = 0644,
2745                         .proc_handler   = proc_dointvec,
2746                 },
2747                 [NEIGH_VAR_ANYCAST_DELAY] = {
2748                         .procname       = "anycast_delay",
2749                         .maxlen         = sizeof(int),
2750                         .mode           = 0644,
2751                         .proc_handler   = proc_dointvec_userhz_jiffies,
2752                 },
2753                 [NEIGH_VAR_PROXY_DELAY] = {
2754                         .procname       = "proxy_delay",
2755                         .maxlen         = sizeof(int),
2756                         .mode           = 0644,
2757                         .proc_handler   = proc_dointvec_userhz_jiffies,
2758                 },
2759                 [NEIGH_VAR_LOCKTIME] = {
2760                         .procname       = "locktime",
2761                         .maxlen         = sizeof(int),
2762                         .mode           = 0644,
2763                         .proc_handler   = proc_dointvec_userhz_jiffies,
2764                 },
2765                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2766                         .procname       = "retrans_time_ms",
2767                         .maxlen         = sizeof(int),
2768                         .mode           = 0644,
2769                         .proc_handler   = proc_dointvec_ms_jiffies,
2770                 },
2771                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2772                         .procname       = "base_reachable_time_ms",
2773                         .maxlen         = sizeof(int),
2774                         .mode           = 0644,
2775                         .proc_handler   = proc_dointvec_ms_jiffies,
2776                 },
2777                 [NEIGH_VAR_GC_INTERVAL] = {
2778                         .procname       = "gc_interval",
2779                         .maxlen         = sizeof(int),
2780                         .mode           = 0644,
2781                         .proc_handler   = proc_dointvec_jiffies,
2782                 },
2783                 [NEIGH_VAR_GC_THRESH1] = {
2784                         .procname       = "gc_thresh1",
2785                         .maxlen         = sizeof(int),
2786                         .mode           = 0644,
2787                         .proc_handler   = proc_dointvec,
2788                 },
2789                 [NEIGH_VAR_GC_THRESH2] = {
2790                         .procname       = "gc_thresh2",
2791                         .maxlen         = sizeof(int),
2792                         .mode           = 0644,
2793                         .proc_handler   = proc_dointvec,
2794                 },
2795                 [NEIGH_VAR_GC_THRESH3] = {
2796                         .procname       = "gc_thresh3",
2797                         .maxlen         = sizeof(int),
2798                         .mode           = 0644,
2799                         .proc_handler   = proc_dointvec,
2800                 },
2801                 {},
2802         },
2803 };
2804
2805 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2806                           char *p_name, proc_handler *handler)
2807 {
2808         struct neigh_sysctl_table *t;
2809         const char *dev_name_source = NULL;
2810
2811 #define NEIGH_CTL_PATH_ROOT     0
2812 #define NEIGH_CTL_PATH_PROTO    1
2813 #define NEIGH_CTL_PATH_NEIGH    2
2814 #define NEIGH_CTL_PATH_DEV      3
2815
2816         struct ctl_path neigh_path[] = {
2817                 { .procname = "net",     },
2818                 { .procname = "proto",   },
2819                 { .procname = "neigh",   },
2820                 { .procname = "default", },
2821                 { },
2822         };
2823
2824         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2825         if (!t)
2826                 goto err;
2827
2828         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2829         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2830         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2831         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2832         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2833         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2834         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2835         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2836         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2837         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2838         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2839         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2840         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2841         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2842         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2843
2844         if (dev) {
2845                 dev_name_source = dev->name;
2846                 /* Terminate the table early */
2847                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2848                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2849         } else {
2850                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2851                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2852                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2853                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2854                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2855         }
2856
2857
2858         if (handler) {
2859                 /* RetransTime */
2860                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2861                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2862                 /* ReachableTime */
2863                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2864                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2865                 /* RetransTime (in milliseconds)*/
2866                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2867                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2868                 /* ReachableTime (in milliseconds) */
2869                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2870                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2871         }
2872
2873         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2874         if (!t->dev_name)
2875                 goto free;
2876
2877         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2878         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2879
2880         t->sysctl_header =
2881                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2882         if (!t->sysctl_header)
2883                 goto free_procname;
2884
2885         p->sysctl_table = t;
2886         return 0;
2887
2888 free_procname:
2889         kfree(t->dev_name);
2890 free:
2891         kfree(t);
2892 err:
2893         return -ENOBUFS;
2894 }
2895 EXPORT_SYMBOL(neigh_sysctl_register);
2896
2897 void neigh_sysctl_unregister(struct neigh_parms *p)
2898 {
2899         if (p->sysctl_table) {
2900                 struct neigh_sysctl_table *t = p->sysctl_table;
2901                 p->sysctl_table = NULL;
2902                 unregister_sysctl_table(t->sysctl_header);
2903                 kfree(t->dev_name);
2904                 kfree(t);
2905         }
2906 }
2907 EXPORT_SYMBOL(neigh_sysctl_unregister);
2908
2909 #endif  /* CONFIG_SYSCTL */
2910
2911 static int __init neigh_init(void)
2912 {
2913         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
2914         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
2915         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
2916
2917         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
2918                       NULL);
2919         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
2920
2921         return 0;
2922 }
2923
2924 subsys_initcall(neigh_init);
2925