neigh: Add device constructor/destructor capability.
[~shefty/rdma-dev.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK         0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82
83    Reference count prevents destruction.
84
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103         kfree_skb(skb);
104         return -ENETDOWN;
105 }
106
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109         if (neigh->parms->neigh_cleanup)
110                 neigh->parms->neigh_cleanup(neigh);
111
112         __neigh_notify(neigh, RTM_DELNEIGH, 0);
113         neigh_release(neigh);
114 }
115
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124         return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131         int shrunk = 0;
132         int i;
133         struct neigh_hash_table *nht;
134
135         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136
137         write_lock_bh(&tbl->lock);
138         nht = rcu_dereference_protected(tbl->nht,
139                                         lockdep_is_held(&tbl->lock));
140         for (i = 0; i < (1 << nht->hash_shift); i++) {
141                 struct neighbour *n;
142                 struct neighbour __rcu **np;
143
144                 np = &nht->hash_buckets[i];
145                 while ((n = rcu_dereference_protected(*np,
146                                         lockdep_is_held(&tbl->lock))) != NULL) {
147                         /* Neighbour record may be discarded if:
148                          * - nobody refers to it.
149                          * - it is not permanent
150                          */
151                         write_lock(&n->lock);
152                         if (atomic_read(&n->refcnt) == 1 &&
153                             !(n->nud_state & NUD_PERMANENT)) {
154                                 rcu_assign_pointer(*np,
155                                         rcu_dereference_protected(n->next,
156                                                   lockdep_is_held(&tbl->lock)));
157                                 n->dead = 1;
158                                 shrunk  = 1;
159                                 write_unlock(&n->lock);
160                                 neigh_cleanup_and_release(n);
161                                 continue;
162                         }
163                         write_unlock(&n->lock);
164                         np = &n->next;
165                 }
166         }
167
168         tbl->last_flush = jiffies;
169
170         write_unlock_bh(&tbl->lock);
171
172         return shrunk;
173 }
174
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177         neigh_hold(n);
178         if (unlikely(mod_timer(&n->timer, when))) {
179                 printk("NEIGH: BUG, double timer add, state is %x\n",
180                        n->nud_state);
181                 dump_stack();
182         }
183 }
184
185 static int neigh_del_timer(struct neighbour *n)
186 {
187         if ((n->nud_state & NUD_IN_TIMER) &&
188             del_timer(&n->timer)) {
189                 neigh_release(n);
190                 return 1;
191         }
192         return 0;
193 }
194
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197         struct sk_buff *skb;
198
199         while ((skb = skb_dequeue(list)) != NULL) {
200                 dev_put(skb->dev);
201                 kfree_skb(skb);
202         }
203 }
204
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207         int i;
208         struct neigh_hash_table *nht;
209
210         nht = rcu_dereference_protected(tbl->nht,
211                                         lockdep_is_held(&tbl->lock));
212
213         for (i = 0; i < (1 << nht->hash_shift); i++) {
214                 struct neighbour *n;
215                 struct neighbour __rcu **np = &nht->hash_buckets[i];
216
217                 while ((n = rcu_dereference_protected(*np,
218                                         lockdep_is_held(&tbl->lock))) != NULL) {
219                         if (dev && n->dev != dev) {
220                                 np = &n->next;
221                                 continue;
222                         }
223                         rcu_assign_pointer(*np,
224                                    rcu_dereference_protected(n->next,
225                                                 lockdep_is_held(&tbl->lock)));
226                         write_lock(&n->lock);
227                         neigh_del_timer(n);
228                         n->dead = 1;
229
230                         if (atomic_read(&n->refcnt) != 1) {
231                                 /* The most unpleasant situation.
232                                    We must destroy neighbour entry,
233                                    but someone still uses it.
234
235                                    The destroy will be delayed until
236                                    the last user releases us, but
237                                    we must kill timers etc. and move
238                                    it to safe state.
239                                  */
240                                 skb_queue_purge(&n->arp_queue);
241                                 n->arp_queue_len_bytes = 0;
242                                 n->output = neigh_blackhole;
243                                 if (n->nud_state & NUD_VALID)
244                                         n->nud_state = NUD_NOARP;
245                                 else
246                                         n->nud_state = NUD_NONE;
247                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
248                         }
249                         write_unlock(&n->lock);
250                         neigh_cleanup_and_release(n);
251                 }
252         }
253 }
254
255 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
256 {
257         write_lock_bh(&tbl->lock);
258         neigh_flush_dev(tbl, dev);
259         write_unlock_bh(&tbl->lock);
260 }
261 EXPORT_SYMBOL(neigh_changeaddr);
262
263 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
264 {
265         write_lock_bh(&tbl->lock);
266         neigh_flush_dev(tbl, dev);
267         pneigh_ifdown(tbl, dev);
268         write_unlock_bh(&tbl->lock);
269
270         del_timer_sync(&tbl->proxy_timer);
271         pneigh_queue_purge(&tbl->proxy_queue);
272         return 0;
273 }
274 EXPORT_SYMBOL(neigh_ifdown);
275
276 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
277 {
278         struct neighbour *n = NULL;
279         unsigned long now = jiffies;
280         int entries;
281
282         entries = atomic_inc_return(&tbl->entries) - 1;
283         if (entries >= tbl->gc_thresh3 ||
284             (entries >= tbl->gc_thresh2 &&
285              time_after(now, tbl->last_flush + 5 * HZ))) {
286                 if (!neigh_forced_gc(tbl) &&
287                     entries >= tbl->gc_thresh3)
288                         goto out_entries;
289         }
290
291         if (tbl->entry_size)
292                 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
293         else {
294                 int sz = sizeof(*n) + tbl->key_len;
295
296                 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
297                 sz += dev->neigh_priv_len;
298                 n = kzalloc(sz, GFP_ATOMIC);
299         }
300         if (!n)
301                 goto out_entries;
302
303         skb_queue_head_init(&n->arp_queue);
304         rwlock_init(&n->lock);
305         seqlock_init(&n->ha_lock);
306         n->updated        = n->used = now;
307         n->nud_state      = NUD_NONE;
308         n->output         = neigh_blackhole;
309         seqlock_init(&n->hh.hh_lock);
310         n->parms          = neigh_parms_clone(&tbl->parms);
311         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
312
313         NEIGH_CACHE_STAT_INC(tbl, allocs);
314         n->tbl            = tbl;
315         atomic_set(&n->refcnt, 1);
316         n->dead           = 1;
317 out:
318         return n;
319
320 out_entries:
321         atomic_dec(&tbl->entries);
322         goto out;
323 }
324
325 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
326 {
327         size_t size = (1 << shift) * sizeof(struct neighbour *);
328         struct neigh_hash_table *ret;
329         struct neighbour __rcu **buckets;
330
331         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
332         if (!ret)
333                 return NULL;
334         if (size <= PAGE_SIZE)
335                 buckets = kzalloc(size, GFP_ATOMIC);
336         else
337                 buckets = (struct neighbour __rcu **)
338                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
339                                            get_order(size));
340         if (!buckets) {
341                 kfree(ret);
342                 return NULL;
343         }
344         ret->hash_buckets = buckets;
345         ret->hash_shift = shift;
346         get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
347         ret->hash_rnd |= 1;
348         return ret;
349 }
350
351 static void neigh_hash_free_rcu(struct rcu_head *head)
352 {
353         struct neigh_hash_table *nht = container_of(head,
354                                                     struct neigh_hash_table,
355                                                     rcu);
356         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
357         struct neighbour __rcu **buckets = nht->hash_buckets;
358
359         if (size <= PAGE_SIZE)
360                 kfree(buckets);
361         else
362                 free_pages((unsigned long)buckets, get_order(size));
363         kfree(nht);
364 }
365
366 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
367                                                 unsigned long new_shift)
368 {
369         unsigned int i, hash;
370         struct neigh_hash_table *new_nht, *old_nht;
371
372         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
373
374         old_nht = rcu_dereference_protected(tbl->nht,
375                                             lockdep_is_held(&tbl->lock));
376         new_nht = neigh_hash_alloc(new_shift);
377         if (!new_nht)
378                 return old_nht;
379
380         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
381                 struct neighbour *n, *next;
382
383                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
384                                                    lockdep_is_held(&tbl->lock));
385                      n != NULL;
386                      n = next) {
387                         hash = tbl->hash(n->primary_key, n->dev,
388                                          new_nht->hash_rnd);
389
390                         hash >>= (32 - new_nht->hash_shift);
391                         next = rcu_dereference_protected(n->next,
392                                                 lockdep_is_held(&tbl->lock));
393
394                         rcu_assign_pointer(n->next,
395                                            rcu_dereference_protected(
396                                                 new_nht->hash_buckets[hash],
397                                                 lockdep_is_held(&tbl->lock)));
398                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
399                 }
400         }
401
402         rcu_assign_pointer(tbl->nht, new_nht);
403         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
404         return new_nht;
405 }
406
407 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
408                                struct net_device *dev)
409 {
410         struct neighbour *n;
411         int key_len = tbl->key_len;
412         u32 hash_val;
413         struct neigh_hash_table *nht;
414
415         NEIGH_CACHE_STAT_INC(tbl, lookups);
416
417         rcu_read_lock_bh();
418         nht = rcu_dereference_bh(tbl->nht);
419         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
420
421         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
422              n != NULL;
423              n = rcu_dereference_bh(n->next)) {
424                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
425                         if (!atomic_inc_not_zero(&n->refcnt))
426                                 n = NULL;
427                         NEIGH_CACHE_STAT_INC(tbl, hits);
428                         break;
429                 }
430         }
431
432         rcu_read_unlock_bh();
433         return n;
434 }
435 EXPORT_SYMBOL(neigh_lookup);
436
437 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
438                                      const void *pkey)
439 {
440         struct neighbour *n;
441         int key_len = tbl->key_len;
442         u32 hash_val;
443         struct neigh_hash_table *nht;
444
445         NEIGH_CACHE_STAT_INC(tbl, lookups);
446
447         rcu_read_lock_bh();
448         nht = rcu_dereference_bh(tbl->nht);
449         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
450
451         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
452              n != NULL;
453              n = rcu_dereference_bh(n->next)) {
454                 if (!memcmp(n->primary_key, pkey, key_len) &&
455                     net_eq(dev_net(n->dev), net)) {
456                         if (!atomic_inc_not_zero(&n->refcnt))
457                                 n = NULL;
458                         NEIGH_CACHE_STAT_INC(tbl, hits);
459                         break;
460                 }
461         }
462
463         rcu_read_unlock_bh();
464         return n;
465 }
466 EXPORT_SYMBOL(neigh_lookup_nodev);
467
468 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
469                                struct net_device *dev)
470 {
471         u32 hash_val;
472         int key_len = tbl->key_len;
473         int error;
474         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
475         struct neigh_hash_table *nht;
476
477         if (!n) {
478                 rc = ERR_PTR(-ENOBUFS);
479                 goto out;
480         }
481
482         memcpy(n->primary_key, pkey, key_len);
483         n->dev = dev;
484         dev_hold(dev);
485
486         /* Protocol specific setup. */
487         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
488                 rc = ERR_PTR(error);
489                 goto out_neigh_release;
490         }
491
492         if (dev->netdev_ops->ndo_neigh_construct) {
493                 error = dev->netdev_ops->ndo_neigh_construct(n);
494                 if (error < 0) {
495                         rc = ERR_PTR(error);
496                         goto out_neigh_release;
497                 }
498         }
499
500         /* Device specific setup. */
501         if (n->parms->neigh_setup &&
502             (error = n->parms->neigh_setup(n)) < 0) {
503                 rc = ERR_PTR(error);
504                 goto out_neigh_release;
505         }
506
507         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
508
509         write_lock_bh(&tbl->lock);
510         nht = rcu_dereference_protected(tbl->nht,
511                                         lockdep_is_held(&tbl->lock));
512
513         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
514                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
515
516         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
517
518         if (n->parms->dead) {
519                 rc = ERR_PTR(-EINVAL);
520                 goto out_tbl_unlock;
521         }
522
523         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
524                                             lockdep_is_held(&tbl->lock));
525              n1 != NULL;
526              n1 = rcu_dereference_protected(n1->next,
527                         lockdep_is_held(&tbl->lock))) {
528                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
529                         neigh_hold(n1);
530                         rc = n1;
531                         goto out_tbl_unlock;
532                 }
533         }
534
535         n->dead = 0;
536         neigh_hold(n);
537         rcu_assign_pointer(n->next,
538                            rcu_dereference_protected(nht->hash_buckets[hash_val],
539                                                      lockdep_is_held(&tbl->lock)));
540         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
541         write_unlock_bh(&tbl->lock);
542         NEIGH_PRINTK2("neigh %p is created.\n", n);
543         rc = n;
544 out:
545         return rc;
546 out_tbl_unlock:
547         write_unlock_bh(&tbl->lock);
548 out_neigh_release:
549         neigh_release(n);
550         goto out;
551 }
552 EXPORT_SYMBOL(neigh_create);
553
554 static u32 pneigh_hash(const void *pkey, int key_len)
555 {
556         u32 hash_val = *(u32 *)(pkey + key_len - 4);
557         hash_val ^= (hash_val >> 16);
558         hash_val ^= hash_val >> 8;
559         hash_val ^= hash_val >> 4;
560         hash_val &= PNEIGH_HASHMASK;
561         return hash_val;
562 }
563
564 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
565                                               struct net *net,
566                                               const void *pkey,
567                                               int key_len,
568                                               struct net_device *dev)
569 {
570         while (n) {
571                 if (!memcmp(n->key, pkey, key_len) &&
572                     net_eq(pneigh_net(n), net) &&
573                     (n->dev == dev || !n->dev))
574                         return n;
575                 n = n->next;
576         }
577         return NULL;
578 }
579
580 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
581                 struct net *net, const void *pkey, struct net_device *dev)
582 {
583         int key_len = tbl->key_len;
584         u32 hash_val = pneigh_hash(pkey, key_len);
585
586         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
587                                  net, pkey, key_len, dev);
588 }
589 EXPORT_SYMBOL_GPL(__pneigh_lookup);
590
591 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
592                                     struct net *net, const void *pkey,
593                                     struct net_device *dev, int creat)
594 {
595         struct pneigh_entry *n;
596         int key_len = tbl->key_len;
597         u32 hash_val = pneigh_hash(pkey, key_len);
598
599         read_lock_bh(&tbl->lock);
600         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
601                               net, pkey, key_len, dev);
602         read_unlock_bh(&tbl->lock);
603
604         if (n || !creat)
605                 goto out;
606
607         ASSERT_RTNL();
608
609         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
610         if (!n)
611                 goto out;
612
613         write_pnet(&n->net, hold_net(net));
614         memcpy(n->key, pkey, key_len);
615         n->dev = dev;
616         if (dev)
617                 dev_hold(dev);
618
619         if (tbl->pconstructor && tbl->pconstructor(n)) {
620                 if (dev)
621                         dev_put(dev);
622                 release_net(net);
623                 kfree(n);
624                 n = NULL;
625                 goto out;
626         }
627
628         write_lock_bh(&tbl->lock);
629         n->next = tbl->phash_buckets[hash_val];
630         tbl->phash_buckets[hash_val] = n;
631         write_unlock_bh(&tbl->lock);
632 out:
633         return n;
634 }
635 EXPORT_SYMBOL(pneigh_lookup);
636
637
638 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
639                   struct net_device *dev)
640 {
641         struct pneigh_entry *n, **np;
642         int key_len = tbl->key_len;
643         u32 hash_val = pneigh_hash(pkey, key_len);
644
645         write_lock_bh(&tbl->lock);
646         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
647              np = &n->next) {
648                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
649                     net_eq(pneigh_net(n), net)) {
650                         *np = n->next;
651                         write_unlock_bh(&tbl->lock);
652                         if (tbl->pdestructor)
653                                 tbl->pdestructor(n);
654                         if (n->dev)
655                                 dev_put(n->dev);
656                         release_net(pneigh_net(n));
657                         kfree(n);
658                         return 0;
659                 }
660         }
661         write_unlock_bh(&tbl->lock);
662         return -ENOENT;
663 }
664
665 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
666 {
667         struct pneigh_entry *n, **np;
668         u32 h;
669
670         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
671                 np = &tbl->phash_buckets[h];
672                 while ((n = *np) != NULL) {
673                         if (!dev || n->dev == dev) {
674                                 *np = n->next;
675                                 if (tbl->pdestructor)
676                                         tbl->pdestructor(n);
677                                 if (n->dev)
678                                         dev_put(n->dev);
679                                 release_net(pneigh_net(n));
680                                 kfree(n);
681                                 continue;
682                         }
683                         np = &n->next;
684                 }
685         }
686         return -ENOENT;
687 }
688
689 static void neigh_parms_destroy(struct neigh_parms *parms);
690
691 static inline void neigh_parms_put(struct neigh_parms *parms)
692 {
693         if (atomic_dec_and_test(&parms->refcnt))
694                 neigh_parms_destroy(parms);
695 }
696
697 /*
698  *      neighbour must already be out of the table;
699  *
700  */
701 void neigh_destroy(struct neighbour *neigh)
702 {
703         struct net_device *dev = neigh->dev;
704
705         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
706
707         if (!neigh->dead) {
708                 printk(KERN_WARNING
709                        "Destroying alive neighbour %p\n", neigh);
710                 dump_stack();
711                 return;
712         }
713
714         if (neigh_del_timer(neigh))
715                 printk(KERN_WARNING "Impossible event.\n");
716
717         skb_queue_purge(&neigh->arp_queue);
718         neigh->arp_queue_len_bytes = 0;
719
720         if (dev->netdev_ops->ndo_neigh_destroy)
721                 dev->netdev_ops->ndo_neigh_destroy(neigh);
722
723         dev_put(dev);
724         neigh_parms_put(neigh->parms);
725
726         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
727
728         atomic_dec(&neigh->tbl->entries);
729         kfree_rcu(neigh, rcu);
730 }
731 EXPORT_SYMBOL(neigh_destroy);
732
733 /* Neighbour state is suspicious;
734    disable fast path.
735
736    Called with write_locked neigh.
737  */
738 static void neigh_suspect(struct neighbour *neigh)
739 {
740         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
741
742         neigh->output = neigh->ops->output;
743 }
744
745 /* Neighbour state is OK;
746    enable fast path.
747
748    Called with write_locked neigh.
749  */
750 static void neigh_connect(struct neighbour *neigh)
751 {
752         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
753
754         neigh->output = neigh->ops->connected_output;
755 }
756
757 static void neigh_periodic_work(struct work_struct *work)
758 {
759         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
760         struct neighbour *n;
761         struct neighbour __rcu **np;
762         unsigned int i;
763         struct neigh_hash_table *nht;
764
765         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
766
767         write_lock_bh(&tbl->lock);
768         nht = rcu_dereference_protected(tbl->nht,
769                                         lockdep_is_held(&tbl->lock));
770
771         /*
772          *      periodically recompute ReachableTime from random function
773          */
774
775         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
776                 struct neigh_parms *p;
777                 tbl->last_rand = jiffies;
778                 for (p = &tbl->parms; p; p = p->next)
779                         p->reachable_time =
780                                 neigh_rand_reach_time(p->base_reachable_time);
781         }
782
783         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
784                 np = &nht->hash_buckets[i];
785
786                 while ((n = rcu_dereference_protected(*np,
787                                 lockdep_is_held(&tbl->lock))) != NULL) {
788                         unsigned int state;
789
790                         write_lock(&n->lock);
791
792                         state = n->nud_state;
793                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
794                                 write_unlock(&n->lock);
795                                 goto next_elt;
796                         }
797
798                         if (time_before(n->used, n->confirmed))
799                                 n->used = n->confirmed;
800
801                         if (atomic_read(&n->refcnt) == 1 &&
802                             (state == NUD_FAILED ||
803                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
804                                 *np = n->next;
805                                 n->dead = 1;
806                                 write_unlock(&n->lock);
807                                 neigh_cleanup_and_release(n);
808                                 continue;
809                         }
810                         write_unlock(&n->lock);
811
812 next_elt:
813                         np = &n->next;
814                 }
815                 /*
816                  * It's fine to release lock here, even if hash table
817                  * grows while we are preempted.
818                  */
819                 write_unlock_bh(&tbl->lock);
820                 cond_resched();
821                 write_lock_bh(&tbl->lock);
822         }
823         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
824          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
825          * base_reachable_time.
826          */
827         schedule_delayed_work(&tbl->gc_work,
828                               tbl->parms.base_reachable_time >> 1);
829         write_unlock_bh(&tbl->lock);
830 }
831
832 static __inline__ int neigh_max_probes(struct neighbour *n)
833 {
834         struct neigh_parms *p = n->parms;
835         return (n->nud_state & NUD_PROBE) ?
836                 p->ucast_probes :
837                 p->ucast_probes + p->app_probes + p->mcast_probes;
838 }
839
840 static void neigh_invalidate(struct neighbour *neigh)
841         __releases(neigh->lock)
842         __acquires(neigh->lock)
843 {
844         struct sk_buff *skb;
845
846         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
847         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
848         neigh->updated = jiffies;
849
850         /* It is very thin place. report_unreachable is very complicated
851            routine. Particularly, it can hit the same neighbour entry!
852
853            So that, we try to be accurate and avoid dead loop. --ANK
854          */
855         while (neigh->nud_state == NUD_FAILED &&
856                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
857                 write_unlock(&neigh->lock);
858                 neigh->ops->error_report(neigh, skb);
859                 write_lock(&neigh->lock);
860         }
861         skb_queue_purge(&neigh->arp_queue);
862         neigh->arp_queue_len_bytes = 0;
863 }
864
865 static void neigh_probe(struct neighbour *neigh)
866         __releases(neigh->lock)
867 {
868         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
869         /* keep skb alive even if arp_queue overflows */
870         if (skb)
871                 skb = skb_copy(skb, GFP_ATOMIC);
872         write_unlock(&neigh->lock);
873         neigh->ops->solicit(neigh, skb);
874         atomic_inc(&neigh->probes);
875         kfree_skb(skb);
876 }
877
878 /* Called when a timer expires for a neighbour entry. */
879
880 static void neigh_timer_handler(unsigned long arg)
881 {
882         unsigned long now, next;
883         struct neighbour *neigh = (struct neighbour *)arg;
884         unsigned state;
885         int notify = 0;
886
887         write_lock(&neigh->lock);
888
889         state = neigh->nud_state;
890         now = jiffies;
891         next = now + HZ;
892
893         if (!(state & NUD_IN_TIMER))
894                 goto out;
895
896         if (state & NUD_REACHABLE) {
897                 if (time_before_eq(now,
898                                    neigh->confirmed + neigh->parms->reachable_time)) {
899                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
900                         next = neigh->confirmed + neigh->parms->reachable_time;
901                 } else if (time_before_eq(now,
902                                           neigh->used + neigh->parms->delay_probe_time)) {
903                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
904                         neigh->nud_state = NUD_DELAY;
905                         neigh->updated = jiffies;
906                         neigh_suspect(neigh);
907                         next = now + neigh->parms->delay_probe_time;
908                 } else {
909                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
910                         neigh->nud_state = NUD_STALE;
911                         neigh->updated = jiffies;
912                         neigh_suspect(neigh);
913                         notify = 1;
914                 }
915         } else if (state & NUD_DELAY) {
916                 if (time_before_eq(now,
917                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
918                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
919                         neigh->nud_state = NUD_REACHABLE;
920                         neigh->updated = jiffies;
921                         neigh_connect(neigh);
922                         notify = 1;
923                         next = neigh->confirmed + neigh->parms->reachable_time;
924                 } else {
925                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
926                         neigh->nud_state = NUD_PROBE;
927                         neigh->updated = jiffies;
928                         atomic_set(&neigh->probes, 0);
929                         next = now + neigh->parms->retrans_time;
930                 }
931         } else {
932                 /* NUD_PROBE|NUD_INCOMPLETE */
933                 next = now + neigh->parms->retrans_time;
934         }
935
936         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
937             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
938                 neigh->nud_state = NUD_FAILED;
939                 notify = 1;
940                 neigh_invalidate(neigh);
941         }
942
943         if (neigh->nud_state & NUD_IN_TIMER) {
944                 if (time_before(next, jiffies + HZ/2))
945                         next = jiffies + HZ/2;
946                 if (!mod_timer(&neigh->timer, next))
947                         neigh_hold(neigh);
948         }
949         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
950                 neigh_probe(neigh);
951         } else {
952 out:
953                 write_unlock(&neigh->lock);
954         }
955
956         if (notify)
957                 neigh_update_notify(neigh);
958
959         neigh_release(neigh);
960 }
961
962 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
963 {
964         int rc;
965         bool immediate_probe = false;
966
967         write_lock_bh(&neigh->lock);
968
969         rc = 0;
970         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
971                 goto out_unlock_bh;
972
973         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
974                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
975                         unsigned long next, now = jiffies;
976
977                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
978                         neigh->nud_state     = NUD_INCOMPLETE;
979                         neigh->updated = now;
980                         next = now + max(neigh->parms->retrans_time, HZ/2);
981                         neigh_add_timer(neigh, next);
982                         immediate_probe = true;
983                 } else {
984                         neigh->nud_state = NUD_FAILED;
985                         neigh->updated = jiffies;
986                         write_unlock_bh(&neigh->lock);
987
988                         kfree_skb(skb);
989                         return 1;
990                 }
991         } else if (neigh->nud_state & NUD_STALE) {
992                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
993                 neigh->nud_state = NUD_DELAY;
994                 neigh->updated = jiffies;
995                 neigh_add_timer(neigh,
996                                 jiffies + neigh->parms->delay_probe_time);
997         }
998
999         if (neigh->nud_state == NUD_INCOMPLETE) {
1000                 if (skb) {
1001                         while (neigh->arp_queue_len_bytes + skb->truesize >
1002                                neigh->parms->queue_len_bytes) {
1003                                 struct sk_buff *buff;
1004
1005                                 buff = __skb_dequeue(&neigh->arp_queue);
1006                                 if (!buff)
1007                                         break;
1008                                 neigh->arp_queue_len_bytes -= buff->truesize;
1009                                 kfree_skb(buff);
1010                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1011                         }
1012                         skb_dst_force(skb);
1013                         __skb_queue_tail(&neigh->arp_queue, skb);
1014                         neigh->arp_queue_len_bytes += skb->truesize;
1015                 }
1016                 rc = 1;
1017         }
1018 out_unlock_bh:
1019         if (immediate_probe)
1020                 neigh_probe(neigh);
1021         else
1022                 write_unlock(&neigh->lock);
1023         local_bh_enable();
1024         return rc;
1025 }
1026 EXPORT_SYMBOL(__neigh_event_send);
1027
1028 static void neigh_update_hhs(struct neighbour *neigh)
1029 {
1030         struct hh_cache *hh;
1031         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1032                 = NULL;
1033
1034         if (neigh->dev->header_ops)
1035                 update = neigh->dev->header_ops->cache_update;
1036
1037         if (update) {
1038                 hh = &neigh->hh;
1039                 if (hh->hh_len) {
1040                         write_seqlock_bh(&hh->hh_lock);
1041                         update(hh, neigh->dev, neigh->ha);
1042                         write_sequnlock_bh(&hh->hh_lock);
1043                 }
1044         }
1045 }
1046
1047
1048
1049 /* Generic update routine.
1050    -- lladdr is new lladdr or NULL, if it is not supplied.
1051    -- new    is new state.
1052    -- flags
1053         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1054                                 if it is different.
1055         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1056                                 lladdr instead of overriding it
1057                                 if it is different.
1058                                 It also allows to retain current state
1059                                 if lladdr is unchanged.
1060         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1061
1062         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1063                                 NTF_ROUTER flag.
1064         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1065                                 a router.
1066
1067    Caller MUST hold reference count on the entry.
1068  */
1069
1070 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1071                  u32 flags)
1072 {
1073         u8 old;
1074         int err;
1075         int notify = 0;
1076         struct net_device *dev;
1077         int update_isrouter = 0;
1078
1079         write_lock_bh(&neigh->lock);
1080
1081         dev    = neigh->dev;
1082         old    = neigh->nud_state;
1083         err    = -EPERM;
1084
1085         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1086             (old & (NUD_NOARP | NUD_PERMANENT)))
1087                 goto out;
1088
1089         if (!(new & NUD_VALID)) {
1090                 neigh_del_timer(neigh);
1091                 if (old & NUD_CONNECTED)
1092                         neigh_suspect(neigh);
1093                 neigh->nud_state = new;
1094                 err = 0;
1095                 notify = old & NUD_VALID;
1096                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1097                     (new & NUD_FAILED)) {
1098                         neigh_invalidate(neigh);
1099                         notify = 1;
1100                 }
1101                 goto out;
1102         }
1103
1104         /* Compare new lladdr with cached one */
1105         if (!dev->addr_len) {
1106                 /* First case: device needs no address. */
1107                 lladdr = neigh->ha;
1108         } else if (lladdr) {
1109                 /* The second case: if something is already cached
1110                    and a new address is proposed:
1111                    - compare new & old
1112                    - if they are different, check override flag
1113                  */
1114                 if ((old & NUD_VALID) &&
1115                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1116                         lladdr = neigh->ha;
1117         } else {
1118                 /* No address is supplied; if we know something,
1119                    use it, otherwise discard the request.
1120                  */
1121                 err = -EINVAL;
1122                 if (!(old & NUD_VALID))
1123                         goto out;
1124                 lladdr = neigh->ha;
1125         }
1126
1127         if (new & NUD_CONNECTED)
1128                 neigh->confirmed = jiffies;
1129         neigh->updated = jiffies;
1130
1131         /* If entry was valid and address is not changed,
1132            do not change entry state, if new one is STALE.
1133          */
1134         err = 0;
1135         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1136         if (old & NUD_VALID) {
1137                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1138                         update_isrouter = 0;
1139                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1140                             (old & NUD_CONNECTED)) {
1141                                 lladdr = neigh->ha;
1142                                 new = NUD_STALE;
1143                         } else
1144                                 goto out;
1145                 } else {
1146                         if (lladdr == neigh->ha && new == NUD_STALE &&
1147                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1148                              (old & NUD_CONNECTED))
1149                             )
1150                                 new = old;
1151                 }
1152         }
1153
1154         if (new != old) {
1155                 neigh_del_timer(neigh);
1156                 if (new & NUD_IN_TIMER)
1157                         neigh_add_timer(neigh, (jiffies +
1158                                                 ((new & NUD_REACHABLE) ?
1159                                                  neigh->parms->reachable_time :
1160                                                  0)));
1161                 neigh->nud_state = new;
1162         }
1163
1164         if (lladdr != neigh->ha) {
1165                 write_seqlock(&neigh->ha_lock);
1166                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1167                 write_sequnlock(&neigh->ha_lock);
1168                 neigh_update_hhs(neigh);
1169                 if (!(new & NUD_CONNECTED))
1170                         neigh->confirmed = jiffies -
1171                                       (neigh->parms->base_reachable_time << 1);
1172                 notify = 1;
1173         }
1174         if (new == old)
1175                 goto out;
1176         if (new & NUD_CONNECTED)
1177                 neigh_connect(neigh);
1178         else
1179                 neigh_suspect(neigh);
1180         if (!(old & NUD_VALID)) {
1181                 struct sk_buff *skb;
1182
1183                 /* Again: avoid dead loop if something went wrong */
1184
1185                 while (neigh->nud_state & NUD_VALID &&
1186                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1187                         struct dst_entry *dst = skb_dst(skb);
1188                         struct neighbour *n2, *n1 = neigh;
1189                         write_unlock_bh(&neigh->lock);
1190
1191                         rcu_read_lock();
1192                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1193                         if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
1194                                 n1 = n2;
1195                         n1->output(n1, skb);
1196                         rcu_read_unlock();
1197
1198                         write_lock_bh(&neigh->lock);
1199                 }
1200                 skb_queue_purge(&neigh->arp_queue);
1201                 neigh->arp_queue_len_bytes = 0;
1202         }
1203 out:
1204         if (update_isrouter) {
1205                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1206                         (neigh->flags | NTF_ROUTER) :
1207                         (neigh->flags & ~NTF_ROUTER);
1208         }
1209         write_unlock_bh(&neigh->lock);
1210
1211         if (notify)
1212                 neigh_update_notify(neigh);
1213
1214         return err;
1215 }
1216 EXPORT_SYMBOL(neigh_update);
1217
1218 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1219                                  u8 *lladdr, void *saddr,
1220                                  struct net_device *dev)
1221 {
1222         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1223                                                  lladdr || !dev->addr_len);
1224         if (neigh)
1225                 neigh_update(neigh, lladdr, NUD_STALE,
1226                              NEIGH_UPDATE_F_OVERRIDE);
1227         return neigh;
1228 }
1229 EXPORT_SYMBOL(neigh_event_ns);
1230
1231 /* called with read_lock_bh(&n->lock); */
1232 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1233 {
1234         struct net_device *dev = dst->dev;
1235         __be16 prot = dst->ops->protocol;
1236         struct hh_cache *hh = &n->hh;
1237
1238         write_lock_bh(&n->lock);
1239
1240         /* Only one thread can come in here and initialize the
1241          * hh_cache entry.
1242          */
1243         if (!hh->hh_len)
1244                 dev->header_ops->cache(n, hh, prot);
1245
1246         write_unlock_bh(&n->lock);
1247 }
1248
1249 /* This function can be used in contexts, where only old dev_queue_xmit
1250  * worked, f.e. if you want to override normal output path (eql, shaper),
1251  * but resolution is not made yet.
1252  */
1253
1254 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1255 {
1256         struct net_device *dev = skb->dev;
1257
1258         __skb_pull(skb, skb_network_offset(skb));
1259
1260         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1261                             skb->len) < 0 &&
1262             dev->header_ops->rebuild(skb))
1263                 return 0;
1264
1265         return dev_queue_xmit(skb);
1266 }
1267 EXPORT_SYMBOL(neigh_compat_output);
1268
1269 /* Slow and careful. */
1270
1271 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1272 {
1273         struct dst_entry *dst = skb_dst(skb);
1274         int rc = 0;
1275
1276         if (!dst)
1277                 goto discard;
1278
1279         __skb_pull(skb, skb_network_offset(skb));
1280
1281         if (!neigh_event_send(neigh, skb)) {
1282                 int err;
1283                 struct net_device *dev = neigh->dev;
1284                 unsigned int seq;
1285
1286                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1287                         neigh_hh_init(neigh, dst);
1288
1289                 do {
1290                         seq = read_seqbegin(&neigh->ha_lock);
1291                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1292                                               neigh->ha, NULL, skb->len);
1293                 } while (read_seqretry(&neigh->ha_lock, seq));
1294
1295                 if (err >= 0)
1296                         rc = dev_queue_xmit(skb);
1297                 else
1298                         goto out_kfree_skb;
1299         }
1300 out:
1301         return rc;
1302 discard:
1303         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1304                       dst, neigh);
1305 out_kfree_skb:
1306         rc = -EINVAL;
1307         kfree_skb(skb);
1308         goto out;
1309 }
1310 EXPORT_SYMBOL(neigh_resolve_output);
1311
1312 /* As fast as possible without hh cache */
1313
1314 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1315 {
1316         struct net_device *dev = neigh->dev;
1317         unsigned int seq;
1318         int err;
1319
1320         __skb_pull(skb, skb_network_offset(skb));
1321
1322         do {
1323                 seq = read_seqbegin(&neigh->ha_lock);
1324                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1325                                       neigh->ha, NULL, skb->len);
1326         } while (read_seqretry(&neigh->ha_lock, seq));
1327
1328         if (err >= 0)
1329                 err = dev_queue_xmit(skb);
1330         else {
1331                 err = -EINVAL;
1332                 kfree_skb(skb);
1333         }
1334         return err;
1335 }
1336 EXPORT_SYMBOL(neigh_connected_output);
1337
1338 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1339 {
1340         return dev_queue_xmit(skb);
1341 }
1342 EXPORT_SYMBOL(neigh_direct_output);
1343
1344 static void neigh_proxy_process(unsigned long arg)
1345 {
1346         struct neigh_table *tbl = (struct neigh_table *)arg;
1347         long sched_next = 0;
1348         unsigned long now = jiffies;
1349         struct sk_buff *skb, *n;
1350
1351         spin_lock(&tbl->proxy_queue.lock);
1352
1353         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1354                 long tdif = NEIGH_CB(skb)->sched_next - now;
1355
1356                 if (tdif <= 0) {
1357                         struct net_device *dev = skb->dev;
1358
1359                         __skb_unlink(skb, &tbl->proxy_queue);
1360                         if (tbl->proxy_redo && netif_running(dev)) {
1361                                 rcu_read_lock();
1362                                 tbl->proxy_redo(skb);
1363                                 rcu_read_unlock();
1364                         } else {
1365                                 kfree_skb(skb);
1366                         }
1367
1368                         dev_put(dev);
1369                 } else if (!sched_next || tdif < sched_next)
1370                         sched_next = tdif;
1371         }
1372         del_timer(&tbl->proxy_timer);
1373         if (sched_next)
1374                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1375         spin_unlock(&tbl->proxy_queue.lock);
1376 }
1377
1378 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1379                     struct sk_buff *skb)
1380 {
1381         unsigned long now = jiffies;
1382         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1383
1384         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1385                 kfree_skb(skb);
1386                 return;
1387         }
1388
1389         NEIGH_CB(skb)->sched_next = sched_next;
1390         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1391
1392         spin_lock(&tbl->proxy_queue.lock);
1393         if (del_timer(&tbl->proxy_timer)) {
1394                 if (time_before(tbl->proxy_timer.expires, sched_next))
1395                         sched_next = tbl->proxy_timer.expires;
1396         }
1397         skb_dst_drop(skb);
1398         dev_hold(skb->dev);
1399         __skb_queue_tail(&tbl->proxy_queue, skb);
1400         mod_timer(&tbl->proxy_timer, sched_next);
1401         spin_unlock(&tbl->proxy_queue.lock);
1402 }
1403 EXPORT_SYMBOL(pneigh_enqueue);
1404
1405 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1406                                                       struct net *net, int ifindex)
1407 {
1408         struct neigh_parms *p;
1409
1410         for (p = &tbl->parms; p; p = p->next) {
1411                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1412                     (!p->dev && !ifindex))
1413                         return p;
1414         }
1415
1416         return NULL;
1417 }
1418
1419 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1420                                       struct neigh_table *tbl)
1421 {
1422         struct neigh_parms *p, *ref;
1423         struct net *net = dev_net(dev);
1424         const struct net_device_ops *ops = dev->netdev_ops;
1425
1426         ref = lookup_neigh_parms(tbl, net, 0);
1427         if (!ref)
1428                 return NULL;
1429
1430         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1431         if (p) {
1432                 p->tbl            = tbl;
1433                 atomic_set(&p->refcnt, 1);
1434                 p->reachable_time =
1435                                 neigh_rand_reach_time(p->base_reachable_time);
1436
1437                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1438                         kfree(p);
1439                         return NULL;
1440                 }
1441
1442                 dev_hold(dev);
1443                 p->dev = dev;
1444                 write_pnet(&p->net, hold_net(net));
1445                 p->sysctl_table = NULL;
1446                 write_lock_bh(&tbl->lock);
1447                 p->next         = tbl->parms.next;
1448                 tbl->parms.next = p;
1449                 write_unlock_bh(&tbl->lock);
1450         }
1451         return p;
1452 }
1453 EXPORT_SYMBOL(neigh_parms_alloc);
1454
1455 static void neigh_rcu_free_parms(struct rcu_head *head)
1456 {
1457         struct neigh_parms *parms =
1458                 container_of(head, struct neigh_parms, rcu_head);
1459
1460         neigh_parms_put(parms);
1461 }
1462
1463 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1464 {
1465         struct neigh_parms **p;
1466
1467         if (!parms || parms == &tbl->parms)
1468                 return;
1469         write_lock_bh(&tbl->lock);
1470         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1471                 if (*p == parms) {
1472                         *p = parms->next;
1473                         parms->dead = 1;
1474                         write_unlock_bh(&tbl->lock);
1475                         if (parms->dev)
1476                                 dev_put(parms->dev);
1477                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1478                         return;
1479                 }
1480         }
1481         write_unlock_bh(&tbl->lock);
1482         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1483 }
1484 EXPORT_SYMBOL(neigh_parms_release);
1485
1486 static void neigh_parms_destroy(struct neigh_parms *parms)
1487 {
1488         release_net(neigh_parms_net(parms));
1489         kfree(parms);
1490 }
1491
1492 static struct lock_class_key neigh_table_proxy_queue_class;
1493
1494 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1495 {
1496         unsigned long now = jiffies;
1497         unsigned long phsize;
1498
1499         write_pnet(&tbl->parms.net, &init_net);
1500         atomic_set(&tbl->parms.refcnt, 1);
1501         tbl->parms.reachable_time =
1502                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1503
1504         tbl->stats = alloc_percpu(struct neigh_statistics);
1505         if (!tbl->stats)
1506                 panic("cannot create neighbour cache statistics");
1507
1508 #ifdef CONFIG_PROC_FS
1509         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1510                               &neigh_stat_seq_fops, tbl))
1511                 panic("cannot create neighbour proc dir entry");
1512 #endif
1513
1514         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1515
1516         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1517         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1518
1519         if (!tbl->nht || !tbl->phash_buckets)
1520                 panic("cannot allocate neighbour cache hashes");
1521
1522         rwlock_init(&tbl->lock);
1523         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1524         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1525         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1526         skb_queue_head_init_class(&tbl->proxy_queue,
1527                         &neigh_table_proxy_queue_class);
1528
1529         tbl->last_flush = now;
1530         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1531 }
1532 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1533
1534 void neigh_table_init(struct neigh_table *tbl)
1535 {
1536         struct neigh_table *tmp;
1537
1538         neigh_table_init_no_netlink(tbl);
1539         write_lock(&neigh_tbl_lock);
1540         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1541                 if (tmp->family == tbl->family)
1542                         break;
1543         }
1544         tbl->next       = neigh_tables;
1545         neigh_tables    = tbl;
1546         write_unlock(&neigh_tbl_lock);
1547
1548         if (unlikely(tmp)) {
1549                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1550                        "family %d\n", tbl->family);
1551                 dump_stack();
1552         }
1553 }
1554 EXPORT_SYMBOL(neigh_table_init);
1555
1556 int neigh_table_clear(struct neigh_table *tbl)
1557 {
1558         struct neigh_table **tp;
1559
1560         /* It is not clean... Fix it to unload IPv6 module safely */
1561         cancel_delayed_work_sync(&tbl->gc_work);
1562         del_timer_sync(&tbl->proxy_timer);
1563         pneigh_queue_purge(&tbl->proxy_queue);
1564         neigh_ifdown(tbl, NULL);
1565         if (atomic_read(&tbl->entries))
1566                 printk(KERN_CRIT "neighbour leakage\n");
1567         write_lock(&neigh_tbl_lock);
1568         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1569                 if (*tp == tbl) {
1570                         *tp = tbl->next;
1571                         break;
1572                 }
1573         }
1574         write_unlock(&neigh_tbl_lock);
1575
1576         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1577                  neigh_hash_free_rcu);
1578         tbl->nht = NULL;
1579
1580         kfree(tbl->phash_buckets);
1581         tbl->phash_buckets = NULL;
1582
1583         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1584
1585         free_percpu(tbl->stats);
1586         tbl->stats = NULL;
1587
1588         return 0;
1589 }
1590 EXPORT_SYMBOL(neigh_table_clear);
1591
1592 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1593 {
1594         struct net *net = sock_net(skb->sk);
1595         struct ndmsg *ndm;
1596         struct nlattr *dst_attr;
1597         struct neigh_table *tbl;
1598         struct net_device *dev = NULL;
1599         int err = -EINVAL;
1600
1601         ASSERT_RTNL();
1602         if (nlmsg_len(nlh) < sizeof(*ndm))
1603                 goto out;
1604
1605         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1606         if (dst_attr == NULL)
1607                 goto out;
1608
1609         ndm = nlmsg_data(nlh);
1610         if (ndm->ndm_ifindex) {
1611                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1612                 if (dev == NULL) {
1613                         err = -ENODEV;
1614                         goto out;
1615                 }
1616         }
1617
1618         read_lock(&neigh_tbl_lock);
1619         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1620                 struct neighbour *neigh;
1621
1622                 if (tbl->family != ndm->ndm_family)
1623                         continue;
1624                 read_unlock(&neigh_tbl_lock);
1625
1626                 if (nla_len(dst_attr) < tbl->key_len)
1627                         goto out;
1628
1629                 if (ndm->ndm_flags & NTF_PROXY) {
1630                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1631                         goto out;
1632                 }
1633
1634                 if (dev == NULL)
1635                         goto out;
1636
1637                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1638                 if (neigh == NULL) {
1639                         err = -ENOENT;
1640                         goto out;
1641                 }
1642
1643                 err = neigh_update(neigh, NULL, NUD_FAILED,
1644                                    NEIGH_UPDATE_F_OVERRIDE |
1645                                    NEIGH_UPDATE_F_ADMIN);
1646                 neigh_release(neigh);
1647                 goto out;
1648         }
1649         read_unlock(&neigh_tbl_lock);
1650         err = -EAFNOSUPPORT;
1651
1652 out:
1653         return err;
1654 }
1655
1656 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1657 {
1658         struct net *net = sock_net(skb->sk);
1659         struct ndmsg *ndm;
1660         struct nlattr *tb[NDA_MAX+1];
1661         struct neigh_table *tbl;
1662         struct net_device *dev = NULL;
1663         int err;
1664
1665         ASSERT_RTNL();
1666         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1667         if (err < 0)
1668                 goto out;
1669
1670         err = -EINVAL;
1671         if (tb[NDA_DST] == NULL)
1672                 goto out;
1673
1674         ndm = nlmsg_data(nlh);
1675         if (ndm->ndm_ifindex) {
1676                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1677                 if (dev == NULL) {
1678                         err = -ENODEV;
1679                         goto out;
1680                 }
1681
1682                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1683                         goto out;
1684         }
1685
1686         read_lock(&neigh_tbl_lock);
1687         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1688                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1689                 struct neighbour *neigh;
1690                 void *dst, *lladdr;
1691
1692                 if (tbl->family != ndm->ndm_family)
1693                         continue;
1694                 read_unlock(&neigh_tbl_lock);
1695
1696                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1697                         goto out;
1698                 dst = nla_data(tb[NDA_DST]);
1699                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1700
1701                 if (ndm->ndm_flags & NTF_PROXY) {
1702                         struct pneigh_entry *pn;
1703
1704                         err = -ENOBUFS;
1705                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1706                         if (pn) {
1707                                 pn->flags = ndm->ndm_flags;
1708                                 err = 0;
1709                         }
1710                         goto out;
1711                 }
1712
1713                 if (dev == NULL)
1714                         goto out;
1715
1716                 neigh = neigh_lookup(tbl, dst, dev);
1717                 if (neigh == NULL) {
1718                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1719                                 err = -ENOENT;
1720                                 goto out;
1721                         }
1722
1723                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1724                         if (IS_ERR(neigh)) {
1725                                 err = PTR_ERR(neigh);
1726                                 goto out;
1727                         }
1728                 } else {
1729                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1730                                 err = -EEXIST;
1731                                 neigh_release(neigh);
1732                                 goto out;
1733                         }
1734
1735                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1736                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1737                 }
1738
1739                 if (ndm->ndm_flags & NTF_USE) {
1740                         neigh_event_send(neigh, NULL);
1741                         err = 0;
1742                 } else
1743                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1744                 neigh_release(neigh);
1745                 goto out;
1746         }
1747
1748         read_unlock(&neigh_tbl_lock);
1749         err = -EAFNOSUPPORT;
1750 out:
1751         return err;
1752 }
1753
1754 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1755 {
1756         struct nlattr *nest;
1757
1758         nest = nla_nest_start(skb, NDTA_PARMS);
1759         if (nest == NULL)
1760                 return -ENOBUFS;
1761
1762         if (parms->dev)
1763                 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1764
1765         NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1766         NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
1767         /* approximative value for deprecated QUEUE_LEN (in packets) */
1768         NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
1769                     DIV_ROUND_UP(parms->queue_len_bytes,
1770                                  SKB_TRUESIZE(ETH_FRAME_LEN)));
1771         NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1772         NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1773         NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1774         NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1775         NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1776         NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1777                       parms->base_reachable_time);
1778         NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1779         NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1780         NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1781         NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1782         NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1783         NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1784
1785         return nla_nest_end(skb, nest);
1786
1787 nla_put_failure:
1788         nla_nest_cancel(skb, nest);
1789         return -EMSGSIZE;
1790 }
1791
1792 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1793                               u32 pid, u32 seq, int type, int flags)
1794 {
1795         struct nlmsghdr *nlh;
1796         struct ndtmsg *ndtmsg;
1797
1798         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1799         if (nlh == NULL)
1800                 return -EMSGSIZE;
1801
1802         ndtmsg = nlmsg_data(nlh);
1803
1804         read_lock_bh(&tbl->lock);
1805         ndtmsg->ndtm_family = tbl->family;
1806         ndtmsg->ndtm_pad1   = 0;
1807         ndtmsg->ndtm_pad2   = 0;
1808
1809         NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1810         NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1811         NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1812         NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1813         NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1814
1815         {
1816                 unsigned long now = jiffies;
1817                 unsigned int flush_delta = now - tbl->last_flush;
1818                 unsigned int rand_delta = now - tbl->last_rand;
1819                 struct neigh_hash_table *nht;
1820                 struct ndt_config ndc = {
1821                         .ndtc_key_len           = tbl->key_len,
1822                         .ndtc_entry_size        = tbl->entry_size,
1823                         .ndtc_entries           = atomic_read(&tbl->entries),
1824                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1825                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1826                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1827                 };
1828
1829                 rcu_read_lock_bh();
1830                 nht = rcu_dereference_bh(tbl->nht);
1831                 ndc.ndtc_hash_rnd = nht->hash_rnd;
1832                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1833                 rcu_read_unlock_bh();
1834
1835                 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1836         }
1837
1838         {
1839                 int cpu;
1840                 struct ndt_stats ndst;
1841
1842                 memset(&ndst, 0, sizeof(ndst));
1843
1844                 for_each_possible_cpu(cpu) {
1845                         struct neigh_statistics *st;
1846
1847                         st = per_cpu_ptr(tbl->stats, cpu);
1848                         ndst.ndts_allocs                += st->allocs;
1849                         ndst.ndts_destroys              += st->destroys;
1850                         ndst.ndts_hash_grows            += st->hash_grows;
1851                         ndst.ndts_res_failed            += st->res_failed;
1852                         ndst.ndts_lookups               += st->lookups;
1853                         ndst.ndts_hits                  += st->hits;
1854                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1855                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1856                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1857                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1858                 }
1859
1860                 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1861         }
1862
1863         BUG_ON(tbl->parms.dev);
1864         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1865                 goto nla_put_failure;
1866
1867         read_unlock_bh(&tbl->lock);
1868         return nlmsg_end(skb, nlh);
1869
1870 nla_put_failure:
1871         read_unlock_bh(&tbl->lock);
1872         nlmsg_cancel(skb, nlh);
1873         return -EMSGSIZE;
1874 }
1875
1876 static int neightbl_fill_param_info(struct sk_buff *skb,
1877                                     struct neigh_table *tbl,
1878                                     struct neigh_parms *parms,
1879                                     u32 pid, u32 seq, int type,
1880                                     unsigned int flags)
1881 {
1882         struct ndtmsg *ndtmsg;
1883         struct nlmsghdr *nlh;
1884
1885         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1886         if (nlh == NULL)
1887                 return -EMSGSIZE;
1888
1889         ndtmsg = nlmsg_data(nlh);
1890
1891         read_lock_bh(&tbl->lock);
1892         ndtmsg->ndtm_family = tbl->family;
1893         ndtmsg->ndtm_pad1   = 0;
1894         ndtmsg->ndtm_pad2   = 0;
1895
1896         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1897             neightbl_fill_parms(skb, parms) < 0)
1898                 goto errout;
1899
1900         read_unlock_bh(&tbl->lock);
1901         return nlmsg_end(skb, nlh);
1902 errout:
1903         read_unlock_bh(&tbl->lock);
1904         nlmsg_cancel(skb, nlh);
1905         return -EMSGSIZE;
1906 }
1907
1908 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1909         [NDTA_NAME]             = { .type = NLA_STRING },
1910         [NDTA_THRESH1]          = { .type = NLA_U32 },
1911         [NDTA_THRESH2]          = { .type = NLA_U32 },
1912         [NDTA_THRESH3]          = { .type = NLA_U32 },
1913         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1914         [NDTA_PARMS]            = { .type = NLA_NESTED },
1915 };
1916
1917 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1918         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1919         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1920         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1921         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1922         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1923         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1924         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1925         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1926         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1927         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1928         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1929         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1930         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1931 };
1932
1933 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1934 {
1935         struct net *net = sock_net(skb->sk);
1936         struct neigh_table *tbl;
1937         struct ndtmsg *ndtmsg;
1938         struct nlattr *tb[NDTA_MAX+1];
1939         int err;
1940
1941         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1942                           nl_neightbl_policy);
1943         if (err < 0)
1944                 goto errout;
1945
1946         if (tb[NDTA_NAME] == NULL) {
1947                 err = -EINVAL;
1948                 goto errout;
1949         }
1950
1951         ndtmsg = nlmsg_data(nlh);
1952         read_lock(&neigh_tbl_lock);
1953         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1954                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1955                         continue;
1956
1957                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1958                         break;
1959         }
1960
1961         if (tbl == NULL) {
1962                 err = -ENOENT;
1963                 goto errout_locked;
1964         }
1965
1966         /*
1967          * We acquire tbl->lock to be nice to the periodic timers and
1968          * make sure they always see a consistent set of values.
1969          */
1970         write_lock_bh(&tbl->lock);
1971
1972         if (tb[NDTA_PARMS]) {
1973                 struct nlattr *tbp[NDTPA_MAX+1];
1974                 struct neigh_parms *p;
1975                 int i, ifindex = 0;
1976
1977                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1978                                        nl_ntbl_parm_policy);
1979                 if (err < 0)
1980                         goto errout_tbl_lock;
1981
1982                 if (tbp[NDTPA_IFINDEX])
1983                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1984
1985                 p = lookup_neigh_parms(tbl, net, ifindex);
1986                 if (p == NULL) {
1987                         err = -ENOENT;
1988                         goto errout_tbl_lock;
1989                 }
1990
1991                 for (i = 1; i <= NDTPA_MAX; i++) {
1992                         if (tbp[i] == NULL)
1993                                 continue;
1994
1995                         switch (i) {
1996                         case NDTPA_QUEUE_LEN:
1997                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
1998                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
1999                                 break;
2000                         case NDTPA_QUEUE_LENBYTES:
2001                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2002                                 break;
2003                         case NDTPA_PROXY_QLEN:
2004                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2005                                 break;
2006                         case NDTPA_APP_PROBES:
2007                                 p->app_probes = nla_get_u32(tbp[i]);
2008                                 break;
2009                         case NDTPA_UCAST_PROBES:
2010                                 p->ucast_probes = nla_get_u32(tbp[i]);
2011                                 break;
2012                         case NDTPA_MCAST_PROBES:
2013                                 p->mcast_probes = nla_get_u32(tbp[i]);
2014                                 break;
2015                         case NDTPA_BASE_REACHABLE_TIME:
2016                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2017                                 break;
2018                         case NDTPA_GC_STALETIME:
2019                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2020                                 break;
2021                         case NDTPA_DELAY_PROBE_TIME:
2022                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2023                                 break;
2024                         case NDTPA_RETRANS_TIME:
2025                                 p->retrans_time = nla_get_msecs(tbp[i]);
2026                                 break;
2027                         case NDTPA_ANYCAST_DELAY:
2028                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2029                                 break;
2030                         case NDTPA_PROXY_DELAY:
2031                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2032                                 break;
2033                         case NDTPA_LOCKTIME:
2034                                 p->locktime = nla_get_msecs(tbp[i]);
2035                                 break;
2036                         }
2037                 }
2038         }
2039
2040         if (tb[NDTA_THRESH1])
2041                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2042
2043         if (tb[NDTA_THRESH2])
2044                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2045
2046         if (tb[NDTA_THRESH3])
2047                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2048
2049         if (tb[NDTA_GC_INTERVAL])
2050                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2051
2052         err = 0;
2053
2054 errout_tbl_lock:
2055         write_unlock_bh(&tbl->lock);
2056 errout_locked:
2057         read_unlock(&neigh_tbl_lock);
2058 errout:
2059         return err;
2060 }
2061
2062 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2063 {
2064         struct net *net = sock_net(skb->sk);
2065         int family, tidx, nidx = 0;
2066         int tbl_skip = cb->args[0];
2067         int neigh_skip = cb->args[1];
2068         struct neigh_table *tbl;
2069
2070         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2071
2072         read_lock(&neigh_tbl_lock);
2073         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2074                 struct neigh_parms *p;
2075
2076                 if (tidx < tbl_skip || (family && tbl->family != family))
2077                         continue;
2078
2079                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2080                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2081                                        NLM_F_MULTI) <= 0)
2082                         break;
2083
2084                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2085                         if (!net_eq(neigh_parms_net(p), net))
2086                                 continue;
2087
2088                         if (nidx < neigh_skip)
2089                                 goto next;
2090
2091                         if (neightbl_fill_param_info(skb, tbl, p,
2092                                                      NETLINK_CB(cb->skb).pid,
2093                                                      cb->nlh->nlmsg_seq,
2094                                                      RTM_NEWNEIGHTBL,
2095                                                      NLM_F_MULTI) <= 0)
2096                                 goto out;
2097                 next:
2098                         nidx++;
2099                 }
2100
2101                 neigh_skip = 0;
2102         }
2103 out:
2104         read_unlock(&neigh_tbl_lock);
2105         cb->args[0] = tidx;
2106         cb->args[1] = nidx;
2107
2108         return skb->len;
2109 }
2110
2111 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2112                            u32 pid, u32 seq, int type, unsigned int flags)
2113 {
2114         unsigned long now = jiffies;
2115         struct nda_cacheinfo ci;
2116         struct nlmsghdr *nlh;
2117         struct ndmsg *ndm;
2118
2119         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2120         if (nlh == NULL)
2121                 return -EMSGSIZE;
2122
2123         ndm = nlmsg_data(nlh);
2124         ndm->ndm_family  = neigh->ops->family;
2125         ndm->ndm_pad1    = 0;
2126         ndm->ndm_pad2    = 0;
2127         ndm->ndm_flags   = neigh->flags;
2128         ndm->ndm_type    = neigh->type;
2129         ndm->ndm_ifindex = neigh->dev->ifindex;
2130
2131         NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2132
2133         read_lock_bh(&neigh->lock);
2134         ndm->ndm_state   = neigh->nud_state;
2135         if (neigh->nud_state & NUD_VALID) {
2136                 char haddr[MAX_ADDR_LEN];
2137
2138                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2139                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2140                         read_unlock_bh(&neigh->lock);
2141                         goto nla_put_failure;
2142                 }
2143         }
2144
2145         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2146         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2147         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2148         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2149         read_unlock_bh(&neigh->lock);
2150
2151         NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2152         NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2153
2154         return nlmsg_end(skb, nlh);
2155
2156 nla_put_failure:
2157         nlmsg_cancel(skb, nlh);
2158         return -EMSGSIZE;
2159 }
2160
2161 static void neigh_update_notify(struct neighbour *neigh)
2162 {
2163         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2164         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2165 }
2166
2167 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2168                             struct netlink_callback *cb)
2169 {
2170         struct net *net = sock_net(skb->sk);
2171         struct neighbour *n;
2172         int rc, h, s_h = cb->args[1];
2173         int idx, s_idx = idx = cb->args[2];
2174         struct neigh_hash_table *nht;
2175
2176         rcu_read_lock_bh();
2177         nht = rcu_dereference_bh(tbl->nht);
2178
2179         for (h = 0; h < (1 << nht->hash_shift); h++) {
2180                 if (h < s_h)
2181                         continue;
2182                 if (h > s_h)
2183                         s_idx = 0;
2184                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2185                      n != NULL;
2186                      n = rcu_dereference_bh(n->next)) {
2187                         if (!net_eq(dev_net(n->dev), net))
2188                                 continue;
2189                         if (idx < s_idx)
2190                                 goto next;
2191                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2192                                             cb->nlh->nlmsg_seq,
2193                                             RTM_NEWNEIGH,
2194                                             NLM_F_MULTI) <= 0) {
2195                                 rc = -1;
2196                                 goto out;
2197                         }
2198 next:
2199                         idx++;
2200                 }
2201         }
2202         rc = skb->len;
2203 out:
2204         rcu_read_unlock_bh();
2205         cb->args[1] = h;
2206         cb->args[2] = idx;
2207         return rc;
2208 }
2209
2210 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2211 {
2212         struct neigh_table *tbl;
2213         int t, family, s_t;
2214
2215         read_lock(&neigh_tbl_lock);
2216         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2217         s_t = cb->args[0];
2218
2219         for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2220                 if (t < s_t || (family && tbl->family != family))
2221                         continue;
2222                 if (t > s_t)
2223                         memset(&cb->args[1], 0, sizeof(cb->args) -
2224                                                 sizeof(cb->args[0]));
2225                 if (neigh_dump_table(tbl, skb, cb) < 0)
2226                         break;
2227         }
2228         read_unlock(&neigh_tbl_lock);
2229
2230         cb->args[0] = t;
2231         return skb->len;
2232 }
2233
2234 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2235 {
2236         int chain;
2237         struct neigh_hash_table *nht;
2238
2239         rcu_read_lock_bh();
2240         nht = rcu_dereference_bh(tbl->nht);
2241
2242         read_lock(&tbl->lock); /* avoid resizes */
2243         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2244                 struct neighbour *n;
2245
2246                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2247                      n != NULL;
2248                      n = rcu_dereference_bh(n->next))
2249                         cb(n, cookie);
2250         }
2251         read_unlock(&tbl->lock);
2252         rcu_read_unlock_bh();
2253 }
2254 EXPORT_SYMBOL(neigh_for_each);
2255
2256 /* The tbl->lock must be held as a writer and BH disabled. */
2257 void __neigh_for_each_release(struct neigh_table *tbl,
2258                               int (*cb)(struct neighbour *))
2259 {
2260         int chain;
2261         struct neigh_hash_table *nht;
2262
2263         nht = rcu_dereference_protected(tbl->nht,
2264                                         lockdep_is_held(&tbl->lock));
2265         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2266                 struct neighbour *n;
2267                 struct neighbour __rcu **np;
2268
2269                 np = &nht->hash_buckets[chain];
2270                 while ((n = rcu_dereference_protected(*np,
2271                                         lockdep_is_held(&tbl->lock))) != NULL) {
2272                         int release;
2273
2274                         write_lock(&n->lock);
2275                         release = cb(n);
2276                         if (release) {
2277                                 rcu_assign_pointer(*np,
2278                                         rcu_dereference_protected(n->next,
2279                                                 lockdep_is_held(&tbl->lock)));
2280                                 n->dead = 1;
2281                         } else
2282                                 np = &n->next;
2283                         write_unlock(&n->lock);
2284                         if (release)
2285                                 neigh_cleanup_and_release(n);
2286                 }
2287         }
2288 }
2289 EXPORT_SYMBOL(__neigh_for_each_release);
2290
2291 #ifdef CONFIG_PROC_FS
2292
2293 static struct neighbour *neigh_get_first(struct seq_file *seq)
2294 {
2295         struct neigh_seq_state *state = seq->private;
2296         struct net *net = seq_file_net(seq);
2297         struct neigh_hash_table *nht = state->nht;
2298         struct neighbour *n = NULL;
2299         int bucket = state->bucket;
2300
2301         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2302         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2303                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2304
2305                 while (n) {
2306                         if (!net_eq(dev_net(n->dev), net))
2307                                 goto next;
2308                         if (state->neigh_sub_iter) {
2309                                 loff_t fakep = 0;
2310                                 void *v;
2311
2312                                 v = state->neigh_sub_iter(state, n, &fakep);
2313                                 if (!v)
2314                                         goto next;
2315                         }
2316                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2317                                 break;
2318                         if (n->nud_state & ~NUD_NOARP)
2319                                 break;
2320 next:
2321                         n = rcu_dereference_bh(n->next);
2322                 }
2323
2324                 if (n)
2325                         break;
2326         }
2327         state->bucket = bucket;
2328
2329         return n;
2330 }
2331
2332 static struct neighbour *neigh_get_next(struct seq_file *seq,
2333                                         struct neighbour *n,
2334                                         loff_t *pos)
2335 {
2336         struct neigh_seq_state *state = seq->private;
2337         struct net *net = seq_file_net(seq);
2338         struct neigh_hash_table *nht = state->nht;
2339
2340         if (state->neigh_sub_iter) {
2341                 void *v = state->neigh_sub_iter(state, n, pos);
2342                 if (v)
2343                         return n;
2344         }
2345         n = rcu_dereference_bh(n->next);
2346
2347         while (1) {
2348                 while (n) {
2349                         if (!net_eq(dev_net(n->dev), net))
2350                                 goto next;
2351                         if (state->neigh_sub_iter) {
2352                                 void *v = state->neigh_sub_iter(state, n, pos);
2353                                 if (v)
2354                                         return n;
2355                                 goto next;
2356                         }
2357                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2358                                 break;
2359
2360                         if (n->nud_state & ~NUD_NOARP)
2361                                 break;
2362 next:
2363                         n = rcu_dereference_bh(n->next);
2364                 }
2365
2366                 if (n)
2367                         break;
2368
2369                 if (++state->bucket >= (1 << nht->hash_shift))
2370                         break;
2371
2372                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2373         }
2374
2375         if (n && pos)
2376                 --(*pos);
2377         return n;
2378 }
2379
2380 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2381 {
2382         struct neighbour *n = neigh_get_first(seq);
2383
2384         if (n) {
2385                 --(*pos);
2386                 while (*pos) {
2387                         n = neigh_get_next(seq, n, pos);
2388                         if (!n)
2389                                 break;
2390                 }
2391         }
2392         return *pos ? NULL : n;
2393 }
2394
2395 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2396 {
2397         struct neigh_seq_state *state = seq->private;
2398         struct net *net = seq_file_net(seq);
2399         struct neigh_table *tbl = state->tbl;
2400         struct pneigh_entry *pn = NULL;
2401         int bucket = state->bucket;
2402
2403         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2404         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2405                 pn = tbl->phash_buckets[bucket];
2406                 while (pn && !net_eq(pneigh_net(pn), net))
2407                         pn = pn->next;
2408                 if (pn)
2409                         break;
2410         }
2411         state->bucket = bucket;
2412
2413         return pn;
2414 }
2415
2416 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2417                                             struct pneigh_entry *pn,
2418                                             loff_t *pos)
2419 {
2420         struct neigh_seq_state *state = seq->private;
2421         struct net *net = seq_file_net(seq);
2422         struct neigh_table *tbl = state->tbl;
2423
2424         do {
2425                 pn = pn->next;
2426         } while (pn && !net_eq(pneigh_net(pn), net));
2427
2428         while (!pn) {
2429                 if (++state->bucket > PNEIGH_HASHMASK)
2430                         break;
2431                 pn = tbl->phash_buckets[state->bucket];
2432                 while (pn && !net_eq(pneigh_net(pn), net))
2433                         pn = pn->next;
2434                 if (pn)
2435                         break;
2436         }
2437
2438         if (pn && pos)
2439                 --(*pos);
2440
2441         return pn;
2442 }
2443
2444 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2445 {
2446         struct pneigh_entry *pn = pneigh_get_first(seq);
2447
2448         if (pn) {
2449                 --(*pos);
2450                 while (*pos) {
2451                         pn = pneigh_get_next(seq, pn, pos);
2452                         if (!pn)
2453                                 break;
2454                 }
2455         }
2456         return *pos ? NULL : pn;
2457 }
2458
2459 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2460 {
2461         struct neigh_seq_state *state = seq->private;
2462         void *rc;
2463         loff_t idxpos = *pos;
2464
2465         rc = neigh_get_idx(seq, &idxpos);
2466         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2467                 rc = pneigh_get_idx(seq, &idxpos);
2468
2469         return rc;
2470 }
2471
2472 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2473         __acquires(rcu_bh)
2474 {
2475         struct neigh_seq_state *state = seq->private;
2476
2477         state->tbl = tbl;
2478         state->bucket = 0;
2479         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2480
2481         rcu_read_lock_bh();
2482         state->nht = rcu_dereference_bh(tbl->nht);
2483
2484         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2485 }
2486 EXPORT_SYMBOL(neigh_seq_start);
2487
2488 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2489 {
2490         struct neigh_seq_state *state;
2491         void *rc;
2492
2493         if (v == SEQ_START_TOKEN) {
2494                 rc = neigh_get_first(seq);
2495                 goto out;
2496         }
2497
2498         state = seq->private;
2499         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2500                 rc = neigh_get_next(seq, v, NULL);
2501                 if (rc)
2502                         goto out;
2503                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2504                         rc = pneigh_get_first(seq);
2505         } else {
2506                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2507                 rc = pneigh_get_next(seq, v, NULL);
2508         }
2509 out:
2510         ++(*pos);
2511         return rc;
2512 }
2513 EXPORT_SYMBOL(neigh_seq_next);
2514
2515 void neigh_seq_stop(struct seq_file *seq, void *v)
2516         __releases(rcu_bh)
2517 {
2518         rcu_read_unlock_bh();
2519 }
2520 EXPORT_SYMBOL(neigh_seq_stop);
2521
2522 /* statistics via seq_file */
2523
2524 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2525 {
2526         struct neigh_table *tbl = seq->private;
2527         int cpu;
2528
2529         if (*pos == 0)
2530                 return SEQ_START_TOKEN;
2531
2532         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2533                 if (!cpu_possible(cpu))
2534                         continue;
2535                 *pos = cpu+1;
2536                 return per_cpu_ptr(tbl->stats, cpu);
2537         }
2538         return NULL;
2539 }
2540
2541 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2542 {
2543         struct neigh_table *tbl = seq->private;
2544         int cpu;
2545
2546         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2547                 if (!cpu_possible(cpu))
2548                         continue;
2549                 *pos = cpu+1;
2550                 return per_cpu_ptr(tbl->stats, cpu);
2551         }
2552         return NULL;
2553 }
2554
2555 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2556 {
2557
2558 }
2559
2560 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2561 {
2562         struct neigh_table *tbl = seq->private;
2563         struct neigh_statistics *st = v;
2564
2565         if (v == SEQ_START_TOKEN) {
2566                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2567                 return 0;
2568         }
2569
2570         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2571                         "%08lx %08lx  %08lx %08lx %08lx\n",
2572                    atomic_read(&tbl->entries),
2573
2574                    st->allocs,
2575                    st->destroys,
2576                    st->hash_grows,
2577
2578                    st->lookups,
2579                    st->hits,
2580
2581                    st->res_failed,
2582
2583                    st->rcv_probes_mcast,
2584                    st->rcv_probes_ucast,
2585
2586                    st->periodic_gc_runs,
2587                    st->forced_gc_runs,
2588                    st->unres_discards
2589                    );
2590
2591         return 0;
2592 }
2593
2594 static const struct seq_operations neigh_stat_seq_ops = {
2595         .start  = neigh_stat_seq_start,
2596         .next   = neigh_stat_seq_next,
2597         .stop   = neigh_stat_seq_stop,
2598         .show   = neigh_stat_seq_show,
2599 };
2600
2601 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2602 {
2603         int ret = seq_open(file, &neigh_stat_seq_ops);
2604
2605         if (!ret) {
2606                 struct seq_file *sf = file->private_data;
2607                 sf->private = PDE(inode)->data;
2608         }
2609         return ret;
2610 };
2611
2612 static const struct file_operations neigh_stat_seq_fops = {
2613         .owner   = THIS_MODULE,
2614         .open    = neigh_stat_seq_open,
2615         .read    = seq_read,
2616         .llseek  = seq_lseek,
2617         .release = seq_release,
2618 };
2619
2620 #endif /* CONFIG_PROC_FS */
2621
2622 static inline size_t neigh_nlmsg_size(void)
2623 {
2624         return NLMSG_ALIGN(sizeof(struct ndmsg))
2625                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2626                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2627                + nla_total_size(sizeof(struct nda_cacheinfo))
2628                + nla_total_size(4); /* NDA_PROBES */
2629 }
2630
2631 static void __neigh_notify(struct neighbour *n, int type, int flags)
2632 {
2633         struct net *net = dev_net(n->dev);
2634         struct sk_buff *skb;
2635         int err = -ENOBUFS;
2636
2637         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2638         if (skb == NULL)
2639                 goto errout;
2640
2641         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2642         if (err < 0) {
2643                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2644                 WARN_ON(err == -EMSGSIZE);
2645                 kfree_skb(skb);
2646                 goto errout;
2647         }
2648         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2649         return;
2650 errout:
2651         if (err < 0)
2652                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2653 }
2654
2655 #ifdef CONFIG_ARPD
2656 void neigh_app_ns(struct neighbour *n)
2657 {
2658         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2659 }
2660 EXPORT_SYMBOL(neigh_app_ns);
2661 #endif /* CONFIG_ARPD */
2662
2663 #ifdef CONFIG_SYSCTL
2664
2665 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2666                            size_t *lenp, loff_t *ppos)
2667 {
2668         int size, ret;
2669         ctl_table tmp = *ctl;
2670
2671         tmp.data = &size;
2672         size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2673         ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2674         if (write && !ret)
2675                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2676         return ret;
2677 }
2678
2679 enum {
2680         NEIGH_VAR_MCAST_PROBE,
2681         NEIGH_VAR_UCAST_PROBE,
2682         NEIGH_VAR_APP_PROBE,
2683         NEIGH_VAR_RETRANS_TIME,
2684         NEIGH_VAR_BASE_REACHABLE_TIME,
2685         NEIGH_VAR_DELAY_PROBE_TIME,
2686         NEIGH_VAR_GC_STALETIME,
2687         NEIGH_VAR_QUEUE_LEN,
2688         NEIGH_VAR_QUEUE_LEN_BYTES,
2689         NEIGH_VAR_PROXY_QLEN,
2690         NEIGH_VAR_ANYCAST_DELAY,
2691         NEIGH_VAR_PROXY_DELAY,
2692         NEIGH_VAR_LOCKTIME,
2693         NEIGH_VAR_RETRANS_TIME_MS,
2694         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2695         NEIGH_VAR_GC_INTERVAL,
2696         NEIGH_VAR_GC_THRESH1,
2697         NEIGH_VAR_GC_THRESH2,
2698         NEIGH_VAR_GC_THRESH3,
2699         NEIGH_VAR_MAX
2700 };
2701
2702 static struct neigh_sysctl_table {
2703         struct ctl_table_header *sysctl_header;
2704         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2705         char *dev_name;
2706 } neigh_sysctl_template __read_mostly = {
2707         .neigh_vars = {
2708                 [NEIGH_VAR_MCAST_PROBE] = {
2709                         .procname       = "mcast_solicit",
2710                         .maxlen         = sizeof(int),
2711                         .mode           = 0644,
2712                         .proc_handler   = proc_dointvec,
2713                 },
2714                 [NEIGH_VAR_UCAST_PROBE] = {
2715                         .procname       = "ucast_solicit",
2716                         .maxlen         = sizeof(int),
2717                         .mode           = 0644,
2718                         .proc_handler   = proc_dointvec,
2719                 },
2720                 [NEIGH_VAR_APP_PROBE] = {
2721                         .procname       = "app_solicit",
2722                         .maxlen         = sizeof(int),
2723                         .mode           = 0644,
2724                         .proc_handler   = proc_dointvec,
2725                 },
2726                 [NEIGH_VAR_RETRANS_TIME] = {
2727                         .procname       = "retrans_time",
2728                         .maxlen         = sizeof(int),
2729                         .mode           = 0644,
2730                         .proc_handler   = proc_dointvec_userhz_jiffies,
2731                 },
2732                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2733                         .procname       = "base_reachable_time",
2734                         .maxlen         = sizeof(int),
2735                         .mode           = 0644,
2736                         .proc_handler   = proc_dointvec_jiffies,
2737                 },
2738                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2739                         .procname       = "delay_first_probe_time",
2740                         .maxlen         = sizeof(int),
2741                         .mode           = 0644,
2742                         .proc_handler   = proc_dointvec_jiffies,
2743                 },
2744                 [NEIGH_VAR_GC_STALETIME] = {
2745                         .procname       = "gc_stale_time",
2746                         .maxlen         = sizeof(int),
2747                         .mode           = 0644,
2748                         .proc_handler   = proc_dointvec_jiffies,
2749                 },
2750                 [NEIGH_VAR_QUEUE_LEN] = {
2751                         .procname       = "unres_qlen",
2752                         .maxlen         = sizeof(int),
2753                         .mode           = 0644,
2754                         .proc_handler   = proc_unres_qlen,
2755                 },
2756                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2757                         .procname       = "unres_qlen_bytes",
2758                         .maxlen         = sizeof(int),
2759                         .mode           = 0644,
2760                         .proc_handler   = proc_dointvec,
2761                 },
2762                 [NEIGH_VAR_PROXY_QLEN] = {
2763                         .procname       = "proxy_qlen",
2764                         .maxlen         = sizeof(int),
2765                         .mode           = 0644,
2766                         .proc_handler   = proc_dointvec,
2767                 },
2768                 [NEIGH_VAR_ANYCAST_DELAY] = {
2769                         .procname       = "anycast_delay",
2770                         .maxlen         = sizeof(int),
2771                         .mode           = 0644,
2772                         .proc_handler   = proc_dointvec_userhz_jiffies,
2773                 },
2774                 [NEIGH_VAR_PROXY_DELAY] = {
2775                         .procname       = "proxy_delay",
2776                         .maxlen         = sizeof(int),
2777                         .mode           = 0644,
2778                         .proc_handler   = proc_dointvec_userhz_jiffies,
2779                 },
2780                 [NEIGH_VAR_LOCKTIME] = {
2781                         .procname       = "locktime",
2782                         .maxlen         = sizeof(int),
2783                         .mode           = 0644,
2784                         .proc_handler   = proc_dointvec_userhz_jiffies,
2785                 },
2786                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2787                         .procname       = "retrans_time_ms",
2788                         .maxlen         = sizeof(int),
2789                         .mode           = 0644,
2790                         .proc_handler   = proc_dointvec_ms_jiffies,
2791                 },
2792                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2793                         .procname       = "base_reachable_time_ms",
2794                         .maxlen         = sizeof(int),
2795                         .mode           = 0644,
2796                         .proc_handler   = proc_dointvec_ms_jiffies,
2797                 },
2798                 [NEIGH_VAR_GC_INTERVAL] = {
2799                         .procname       = "gc_interval",
2800                         .maxlen         = sizeof(int),
2801                         .mode           = 0644,
2802                         .proc_handler   = proc_dointvec_jiffies,
2803                 },
2804                 [NEIGH_VAR_GC_THRESH1] = {
2805                         .procname       = "gc_thresh1",
2806                         .maxlen         = sizeof(int),
2807                         .mode           = 0644,
2808                         .proc_handler   = proc_dointvec,
2809                 },
2810                 [NEIGH_VAR_GC_THRESH2] = {
2811                         .procname       = "gc_thresh2",
2812                         .maxlen         = sizeof(int),
2813                         .mode           = 0644,
2814                         .proc_handler   = proc_dointvec,
2815                 },
2816                 [NEIGH_VAR_GC_THRESH3] = {
2817                         .procname       = "gc_thresh3",
2818                         .maxlen         = sizeof(int),
2819                         .mode           = 0644,
2820                         .proc_handler   = proc_dointvec,
2821                 },
2822                 {},
2823         },
2824 };
2825
2826 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2827                           char *p_name, proc_handler *handler)
2828 {
2829         struct neigh_sysctl_table *t;
2830         const char *dev_name_source = NULL;
2831
2832 #define NEIGH_CTL_PATH_ROOT     0
2833 #define NEIGH_CTL_PATH_PROTO    1
2834 #define NEIGH_CTL_PATH_NEIGH    2
2835 #define NEIGH_CTL_PATH_DEV      3
2836
2837         struct ctl_path neigh_path[] = {
2838                 { .procname = "net",     },
2839                 { .procname = "proto",   },
2840                 { .procname = "neigh",   },
2841                 { .procname = "default", },
2842                 { },
2843         };
2844
2845         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2846         if (!t)
2847                 goto err;
2848
2849         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2850         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2851         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2852         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2853         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2854         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2855         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2856         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2857         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2858         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2859         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2860         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2861         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2862         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2863         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2864
2865         if (dev) {
2866                 dev_name_source = dev->name;
2867                 /* Terminate the table early */
2868                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2869                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2870         } else {
2871                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2872                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2873                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2874                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2875                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2876         }
2877
2878
2879         if (handler) {
2880                 /* RetransTime */
2881                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2882                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2883                 /* ReachableTime */
2884                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2885                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2886                 /* RetransTime (in milliseconds)*/
2887                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2888                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2889                 /* ReachableTime (in milliseconds) */
2890                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2891                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2892         }
2893
2894         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2895         if (!t->dev_name)
2896                 goto free;
2897
2898         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2899         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2900
2901         t->sysctl_header =
2902                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2903         if (!t->sysctl_header)
2904                 goto free_procname;
2905
2906         p->sysctl_table = t;
2907         return 0;
2908
2909 free_procname:
2910         kfree(t->dev_name);
2911 free:
2912         kfree(t);
2913 err:
2914         return -ENOBUFS;
2915 }
2916 EXPORT_SYMBOL(neigh_sysctl_register);
2917
2918 void neigh_sysctl_unregister(struct neigh_parms *p)
2919 {
2920         if (p->sysctl_table) {
2921                 struct neigh_sysctl_table *t = p->sysctl_table;
2922                 p->sysctl_table = NULL;
2923                 unregister_sysctl_table(t->sysctl_header);
2924                 kfree(t->dev_name);
2925                 kfree(t);
2926         }
2927 }
2928 EXPORT_SYMBOL(neigh_sysctl_unregister);
2929
2930 #endif  /* CONFIG_SYSCTL */
2931
2932 static int __init neigh_init(void)
2933 {
2934         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
2935         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
2936         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
2937
2938         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
2939                       NULL);
2940         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
2941
2942         return 0;
2943 }
2944
2945 subsys_initcall(neigh_init);
2946