neigh: Add infrastructure for allocating device neigh privates.
[~shefty/rdma-dev.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK         0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82
83    Reference count prevents destruction.
84
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103         kfree_skb(skb);
104         return -ENETDOWN;
105 }
106
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109         if (neigh->parms->neigh_cleanup)
110                 neigh->parms->neigh_cleanup(neigh);
111
112         __neigh_notify(neigh, RTM_DELNEIGH, 0);
113         neigh_release(neigh);
114 }
115
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124         return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131         int shrunk = 0;
132         int i;
133         struct neigh_hash_table *nht;
134
135         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136
137         write_lock_bh(&tbl->lock);
138         nht = rcu_dereference_protected(tbl->nht,
139                                         lockdep_is_held(&tbl->lock));
140         for (i = 0; i < (1 << nht->hash_shift); i++) {
141                 struct neighbour *n;
142                 struct neighbour __rcu **np;
143
144                 np = &nht->hash_buckets[i];
145                 while ((n = rcu_dereference_protected(*np,
146                                         lockdep_is_held(&tbl->lock))) != NULL) {
147                         /* Neighbour record may be discarded if:
148                          * - nobody refers to it.
149                          * - it is not permanent
150                          */
151                         write_lock(&n->lock);
152                         if (atomic_read(&n->refcnt) == 1 &&
153                             !(n->nud_state & NUD_PERMANENT)) {
154                                 rcu_assign_pointer(*np,
155                                         rcu_dereference_protected(n->next,
156                                                   lockdep_is_held(&tbl->lock)));
157                                 n->dead = 1;
158                                 shrunk  = 1;
159                                 write_unlock(&n->lock);
160                                 neigh_cleanup_and_release(n);
161                                 continue;
162                         }
163                         write_unlock(&n->lock);
164                         np = &n->next;
165                 }
166         }
167
168         tbl->last_flush = jiffies;
169
170         write_unlock_bh(&tbl->lock);
171
172         return shrunk;
173 }
174
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177         neigh_hold(n);
178         if (unlikely(mod_timer(&n->timer, when))) {
179                 printk("NEIGH: BUG, double timer add, state is %x\n",
180                        n->nud_state);
181                 dump_stack();
182         }
183 }
184
185 static int neigh_del_timer(struct neighbour *n)
186 {
187         if ((n->nud_state & NUD_IN_TIMER) &&
188             del_timer(&n->timer)) {
189                 neigh_release(n);
190                 return 1;
191         }
192         return 0;
193 }
194
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197         struct sk_buff *skb;
198
199         while ((skb = skb_dequeue(list)) != NULL) {
200                 dev_put(skb->dev);
201                 kfree_skb(skb);
202         }
203 }
204
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207         int i;
208         struct neigh_hash_table *nht;
209
210         nht = rcu_dereference_protected(tbl->nht,
211                                         lockdep_is_held(&tbl->lock));
212
213         for (i = 0; i < (1 << nht->hash_shift); i++) {
214                 struct neighbour *n;
215                 struct neighbour __rcu **np = &nht->hash_buckets[i];
216
217                 while ((n = rcu_dereference_protected(*np,
218                                         lockdep_is_held(&tbl->lock))) != NULL) {
219                         if (dev && n->dev != dev) {
220                                 np = &n->next;
221                                 continue;
222                         }
223                         rcu_assign_pointer(*np,
224                                    rcu_dereference_protected(n->next,
225                                                 lockdep_is_held(&tbl->lock)));
226                         write_lock(&n->lock);
227                         neigh_del_timer(n);
228                         n->dead = 1;
229
230                         if (atomic_read(&n->refcnt) != 1) {
231                                 /* The most unpleasant situation.
232                                    We must destroy neighbour entry,
233                                    but someone still uses it.
234
235                                    The destroy will be delayed until
236                                    the last user releases us, but
237                                    we must kill timers etc. and move
238                                    it to safe state.
239                                  */
240                                 skb_queue_purge(&n->arp_queue);
241                                 n->arp_queue_len_bytes = 0;
242                                 n->output = neigh_blackhole;
243                                 if (n->nud_state & NUD_VALID)
244                                         n->nud_state = NUD_NOARP;
245                                 else
246                                         n->nud_state = NUD_NONE;
247                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
248                         }
249                         write_unlock(&n->lock);
250                         neigh_cleanup_and_release(n);
251                 }
252         }
253 }
254
255 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
256 {
257         write_lock_bh(&tbl->lock);
258         neigh_flush_dev(tbl, dev);
259         write_unlock_bh(&tbl->lock);
260 }
261 EXPORT_SYMBOL(neigh_changeaddr);
262
263 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
264 {
265         write_lock_bh(&tbl->lock);
266         neigh_flush_dev(tbl, dev);
267         pneigh_ifdown(tbl, dev);
268         write_unlock_bh(&tbl->lock);
269
270         del_timer_sync(&tbl->proxy_timer);
271         pneigh_queue_purge(&tbl->proxy_queue);
272         return 0;
273 }
274 EXPORT_SYMBOL(neigh_ifdown);
275
276 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
277 {
278         struct neighbour *n = NULL;
279         unsigned long now = jiffies;
280         int entries;
281
282         entries = atomic_inc_return(&tbl->entries) - 1;
283         if (entries >= tbl->gc_thresh3 ||
284             (entries >= tbl->gc_thresh2 &&
285              time_after(now, tbl->last_flush + 5 * HZ))) {
286                 if (!neigh_forced_gc(tbl) &&
287                     entries >= tbl->gc_thresh3)
288                         goto out_entries;
289         }
290
291         if (tbl->entry_size)
292                 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
293         else {
294                 int sz = sizeof(*n) + tbl->key_len;
295
296                 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
297                 sz += dev->neigh_priv_len;
298                 n = kzalloc(sz, GFP_ATOMIC);
299         }
300         if (!n)
301                 goto out_entries;
302
303         skb_queue_head_init(&n->arp_queue);
304         rwlock_init(&n->lock);
305         seqlock_init(&n->ha_lock);
306         n->updated        = n->used = now;
307         n->nud_state      = NUD_NONE;
308         n->output         = neigh_blackhole;
309         seqlock_init(&n->hh.hh_lock);
310         n->parms          = neigh_parms_clone(&tbl->parms);
311         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
312
313         NEIGH_CACHE_STAT_INC(tbl, allocs);
314         n->tbl            = tbl;
315         atomic_set(&n->refcnt, 1);
316         n->dead           = 1;
317 out:
318         return n;
319
320 out_entries:
321         atomic_dec(&tbl->entries);
322         goto out;
323 }
324
325 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
326 {
327         size_t size = (1 << shift) * sizeof(struct neighbour *);
328         struct neigh_hash_table *ret;
329         struct neighbour __rcu **buckets;
330
331         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
332         if (!ret)
333                 return NULL;
334         if (size <= PAGE_SIZE)
335                 buckets = kzalloc(size, GFP_ATOMIC);
336         else
337                 buckets = (struct neighbour __rcu **)
338                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
339                                            get_order(size));
340         if (!buckets) {
341                 kfree(ret);
342                 return NULL;
343         }
344         ret->hash_buckets = buckets;
345         ret->hash_shift = shift;
346         get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
347         ret->hash_rnd |= 1;
348         return ret;
349 }
350
351 static void neigh_hash_free_rcu(struct rcu_head *head)
352 {
353         struct neigh_hash_table *nht = container_of(head,
354                                                     struct neigh_hash_table,
355                                                     rcu);
356         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
357         struct neighbour __rcu **buckets = nht->hash_buckets;
358
359         if (size <= PAGE_SIZE)
360                 kfree(buckets);
361         else
362                 free_pages((unsigned long)buckets, get_order(size));
363         kfree(nht);
364 }
365
366 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
367                                                 unsigned long new_shift)
368 {
369         unsigned int i, hash;
370         struct neigh_hash_table *new_nht, *old_nht;
371
372         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
373
374         old_nht = rcu_dereference_protected(tbl->nht,
375                                             lockdep_is_held(&tbl->lock));
376         new_nht = neigh_hash_alloc(new_shift);
377         if (!new_nht)
378                 return old_nht;
379
380         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
381                 struct neighbour *n, *next;
382
383                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
384                                                    lockdep_is_held(&tbl->lock));
385                      n != NULL;
386                      n = next) {
387                         hash = tbl->hash(n->primary_key, n->dev,
388                                          new_nht->hash_rnd);
389
390                         hash >>= (32 - new_nht->hash_shift);
391                         next = rcu_dereference_protected(n->next,
392                                                 lockdep_is_held(&tbl->lock));
393
394                         rcu_assign_pointer(n->next,
395                                            rcu_dereference_protected(
396                                                 new_nht->hash_buckets[hash],
397                                                 lockdep_is_held(&tbl->lock)));
398                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
399                 }
400         }
401
402         rcu_assign_pointer(tbl->nht, new_nht);
403         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
404         return new_nht;
405 }
406
407 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
408                                struct net_device *dev)
409 {
410         struct neighbour *n;
411         int key_len = tbl->key_len;
412         u32 hash_val;
413         struct neigh_hash_table *nht;
414
415         NEIGH_CACHE_STAT_INC(tbl, lookups);
416
417         rcu_read_lock_bh();
418         nht = rcu_dereference_bh(tbl->nht);
419         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
420
421         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
422              n != NULL;
423              n = rcu_dereference_bh(n->next)) {
424                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
425                         if (!atomic_inc_not_zero(&n->refcnt))
426                                 n = NULL;
427                         NEIGH_CACHE_STAT_INC(tbl, hits);
428                         break;
429                 }
430         }
431
432         rcu_read_unlock_bh();
433         return n;
434 }
435 EXPORT_SYMBOL(neigh_lookup);
436
437 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
438                                      const void *pkey)
439 {
440         struct neighbour *n;
441         int key_len = tbl->key_len;
442         u32 hash_val;
443         struct neigh_hash_table *nht;
444
445         NEIGH_CACHE_STAT_INC(tbl, lookups);
446
447         rcu_read_lock_bh();
448         nht = rcu_dereference_bh(tbl->nht);
449         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
450
451         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
452              n != NULL;
453              n = rcu_dereference_bh(n->next)) {
454                 if (!memcmp(n->primary_key, pkey, key_len) &&
455                     net_eq(dev_net(n->dev), net)) {
456                         if (!atomic_inc_not_zero(&n->refcnt))
457                                 n = NULL;
458                         NEIGH_CACHE_STAT_INC(tbl, hits);
459                         break;
460                 }
461         }
462
463         rcu_read_unlock_bh();
464         return n;
465 }
466 EXPORT_SYMBOL(neigh_lookup_nodev);
467
468 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
469                                struct net_device *dev)
470 {
471         u32 hash_val;
472         int key_len = tbl->key_len;
473         int error;
474         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
475         struct neigh_hash_table *nht;
476
477         if (!n) {
478                 rc = ERR_PTR(-ENOBUFS);
479                 goto out;
480         }
481
482         memcpy(n->primary_key, pkey, key_len);
483         n->dev = dev;
484         dev_hold(dev);
485
486         /* Protocol specific setup. */
487         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
488                 rc = ERR_PTR(error);
489                 goto out_neigh_release;
490         }
491
492         /* Device specific setup. */
493         if (n->parms->neigh_setup &&
494             (error = n->parms->neigh_setup(n)) < 0) {
495                 rc = ERR_PTR(error);
496                 goto out_neigh_release;
497         }
498
499         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
500
501         write_lock_bh(&tbl->lock);
502         nht = rcu_dereference_protected(tbl->nht,
503                                         lockdep_is_held(&tbl->lock));
504
505         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
506                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
507
508         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
509
510         if (n->parms->dead) {
511                 rc = ERR_PTR(-EINVAL);
512                 goto out_tbl_unlock;
513         }
514
515         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
516                                             lockdep_is_held(&tbl->lock));
517              n1 != NULL;
518              n1 = rcu_dereference_protected(n1->next,
519                         lockdep_is_held(&tbl->lock))) {
520                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
521                         neigh_hold(n1);
522                         rc = n1;
523                         goto out_tbl_unlock;
524                 }
525         }
526
527         n->dead = 0;
528         neigh_hold(n);
529         rcu_assign_pointer(n->next,
530                            rcu_dereference_protected(nht->hash_buckets[hash_val],
531                                                      lockdep_is_held(&tbl->lock)));
532         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
533         write_unlock_bh(&tbl->lock);
534         NEIGH_PRINTK2("neigh %p is created.\n", n);
535         rc = n;
536 out:
537         return rc;
538 out_tbl_unlock:
539         write_unlock_bh(&tbl->lock);
540 out_neigh_release:
541         neigh_release(n);
542         goto out;
543 }
544 EXPORT_SYMBOL(neigh_create);
545
546 static u32 pneigh_hash(const void *pkey, int key_len)
547 {
548         u32 hash_val = *(u32 *)(pkey + key_len - 4);
549         hash_val ^= (hash_val >> 16);
550         hash_val ^= hash_val >> 8;
551         hash_val ^= hash_val >> 4;
552         hash_val &= PNEIGH_HASHMASK;
553         return hash_val;
554 }
555
556 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
557                                               struct net *net,
558                                               const void *pkey,
559                                               int key_len,
560                                               struct net_device *dev)
561 {
562         while (n) {
563                 if (!memcmp(n->key, pkey, key_len) &&
564                     net_eq(pneigh_net(n), net) &&
565                     (n->dev == dev || !n->dev))
566                         return n;
567                 n = n->next;
568         }
569         return NULL;
570 }
571
572 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
573                 struct net *net, const void *pkey, struct net_device *dev)
574 {
575         int key_len = tbl->key_len;
576         u32 hash_val = pneigh_hash(pkey, key_len);
577
578         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
579                                  net, pkey, key_len, dev);
580 }
581 EXPORT_SYMBOL_GPL(__pneigh_lookup);
582
583 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
584                                     struct net *net, const void *pkey,
585                                     struct net_device *dev, int creat)
586 {
587         struct pneigh_entry *n;
588         int key_len = tbl->key_len;
589         u32 hash_val = pneigh_hash(pkey, key_len);
590
591         read_lock_bh(&tbl->lock);
592         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
593                               net, pkey, key_len, dev);
594         read_unlock_bh(&tbl->lock);
595
596         if (n || !creat)
597                 goto out;
598
599         ASSERT_RTNL();
600
601         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
602         if (!n)
603                 goto out;
604
605         write_pnet(&n->net, hold_net(net));
606         memcpy(n->key, pkey, key_len);
607         n->dev = dev;
608         if (dev)
609                 dev_hold(dev);
610
611         if (tbl->pconstructor && tbl->pconstructor(n)) {
612                 if (dev)
613                         dev_put(dev);
614                 release_net(net);
615                 kfree(n);
616                 n = NULL;
617                 goto out;
618         }
619
620         write_lock_bh(&tbl->lock);
621         n->next = tbl->phash_buckets[hash_val];
622         tbl->phash_buckets[hash_val] = n;
623         write_unlock_bh(&tbl->lock);
624 out:
625         return n;
626 }
627 EXPORT_SYMBOL(pneigh_lookup);
628
629
630 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
631                   struct net_device *dev)
632 {
633         struct pneigh_entry *n, **np;
634         int key_len = tbl->key_len;
635         u32 hash_val = pneigh_hash(pkey, key_len);
636
637         write_lock_bh(&tbl->lock);
638         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
639              np = &n->next) {
640                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
641                     net_eq(pneigh_net(n), net)) {
642                         *np = n->next;
643                         write_unlock_bh(&tbl->lock);
644                         if (tbl->pdestructor)
645                                 tbl->pdestructor(n);
646                         if (n->dev)
647                                 dev_put(n->dev);
648                         release_net(pneigh_net(n));
649                         kfree(n);
650                         return 0;
651                 }
652         }
653         write_unlock_bh(&tbl->lock);
654         return -ENOENT;
655 }
656
657 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
658 {
659         struct pneigh_entry *n, **np;
660         u32 h;
661
662         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
663                 np = &tbl->phash_buckets[h];
664                 while ((n = *np) != NULL) {
665                         if (!dev || n->dev == dev) {
666                                 *np = n->next;
667                                 if (tbl->pdestructor)
668                                         tbl->pdestructor(n);
669                                 if (n->dev)
670                                         dev_put(n->dev);
671                                 release_net(pneigh_net(n));
672                                 kfree(n);
673                                 continue;
674                         }
675                         np = &n->next;
676                 }
677         }
678         return -ENOENT;
679 }
680
681 static void neigh_parms_destroy(struct neigh_parms *parms);
682
683 static inline void neigh_parms_put(struct neigh_parms *parms)
684 {
685         if (atomic_dec_and_test(&parms->refcnt))
686                 neigh_parms_destroy(parms);
687 }
688
689 /*
690  *      neighbour must already be out of the table;
691  *
692  */
693 void neigh_destroy(struct neighbour *neigh)
694 {
695         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
696
697         if (!neigh->dead) {
698                 printk(KERN_WARNING
699                        "Destroying alive neighbour %p\n", neigh);
700                 dump_stack();
701                 return;
702         }
703
704         if (neigh_del_timer(neigh))
705                 printk(KERN_WARNING "Impossible event.\n");
706
707         skb_queue_purge(&neigh->arp_queue);
708         neigh->arp_queue_len_bytes = 0;
709
710         dev_put(neigh->dev);
711         neigh_parms_put(neigh->parms);
712
713         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
714
715         atomic_dec(&neigh->tbl->entries);
716         kfree_rcu(neigh, rcu);
717 }
718 EXPORT_SYMBOL(neigh_destroy);
719
720 /* Neighbour state is suspicious;
721    disable fast path.
722
723    Called with write_locked neigh.
724  */
725 static void neigh_suspect(struct neighbour *neigh)
726 {
727         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
728
729         neigh->output = neigh->ops->output;
730 }
731
732 /* Neighbour state is OK;
733    enable fast path.
734
735    Called with write_locked neigh.
736  */
737 static void neigh_connect(struct neighbour *neigh)
738 {
739         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
740
741         neigh->output = neigh->ops->connected_output;
742 }
743
744 static void neigh_periodic_work(struct work_struct *work)
745 {
746         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
747         struct neighbour *n;
748         struct neighbour __rcu **np;
749         unsigned int i;
750         struct neigh_hash_table *nht;
751
752         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
753
754         write_lock_bh(&tbl->lock);
755         nht = rcu_dereference_protected(tbl->nht,
756                                         lockdep_is_held(&tbl->lock));
757
758         /*
759          *      periodically recompute ReachableTime from random function
760          */
761
762         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
763                 struct neigh_parms *p;
764                 tbl->last_rand = jiffies;
765                 for (p = &tbl->parms; p; p = p->next)
766                         p->reachable_time =
767                                 neigh_rand_reach_time(p->base_reachable_time);
768         }
769
770         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
771                 np = &nht->hash_buckets[i];
772
773                 while ((n = rcu_dereference_protected(*np,
774                                 lockdep_is_held(&tbl->lock))) != NULL) {
775                         unsigned int state;
776
777                         write_lock(&n->lock);
778
779                         state = n->nud_state;
780                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
781                                 write_unlock(&n->lock);
782                                 goto next_elt;
783                         }
784
785                         if (time_before(n->used, n->confirmed))
786                                 n->used = n->confirmed;
787
788                         if (atomic_read(&n->refcnt) == 1 &&
789                             (state == NUD_FAILED ||
790                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
791                                 *np = n->next;
792                                 n->dead = 1;
793                                 write_unlock(&n->lock);
794                                 neigh_cleanup_and_release(n);
795                                 continue;
796                         }
797                         write_unlock(&n->lock);
798
799 next_elt:
800                         np = &n->next;
801                 }
802                 /*
803                  * It's fine to release lock here, even if hash table
804                  * grows while we are preempted.
805                  */
806                 write_unlock_bh(&tbl->lock);
807                 cond_resched();
808                 write_lock_bh(&tbl->lock);
809         }
810         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
811          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
812          * base_reachable_time.
813          */
814         schedule_delayed_work(&tbl->gc_work,
815                               tbl->parms.base_reachable_time >> 1);
816         write_unlock_bh(&tbl->lock);
817 }
818
819 static __inline__ int neigh_max_probes(struct neighbour *n)
820 {
821         struct neigh_parms *p = n->parms;
822         return (n->nud_state & NUD_PROBE) ?
823                 p->ucast_probes :
824                 p->ucast_probes + p->app_probes + p->mcast_probes;
825 }
826
827 static void neigh_invalidate(struct neighbour *neigh)
828         __releases(neigh->lock)
829         __acquires(neigh->lock)
830 {
831         struct sk_buff *skb;
832
833         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
834         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
835         neigh->updated = jiffies;
836
837         /* It is very thin place. report_unreachable is very complicated
838            routine. Particularly, it can hit the same neighbour entry!
839
840            So that, we try to be accurate and avoid dead loop. --ANK
841          */
842         while (neigh->nud_state == NUD_FAILED &&
843                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
844                 write_unlock(&neigh->lock);
845                 neigh->ops->error_report(neigh, skb);
846                 write_lock(&neigh->lock);
847         }
848         skb_queue_purge(&neigh->arp_queue);
849         neigh->arp_queue_len_bytes = 0;
850 }
851
852 static void neigh_probe(struct neighbour *neigh)
853         __releases(neigh->lock)
854 {
855         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
856         /* keep skb alive even if arp_queue overflows */
857         if (skb)
858                 skb = skb_copy(skb, GFP_ATOMIC);
859         write_unlock(&neigh->lock);
860         neigh->ops->solicit(neigh, skb);
861         atomic_inc(&neigh->probes);
862         kfree_skb(skb);
863 }
864
865 /* Called when a timer expires for a neighbour entry. */
866
867 static void neigh_timer_handler(unsigned long arg)
868 {
869         unsigned long now, next;
870         struct neighbour *neigh = (struct neighbour *)arg;
871         unsigned state;
872         int notify = 0;
873
874         write_lock(&neigh->lock);
875
876         state = neigh->nud_state;
877         now = jiffies;
878         next = now + HZ;
879
880         if (!(state & NUD_IN_TIMER))
881                 goto out;
882
883         if (state & NUD_REACHABLE) {
884                 if (time_before_eq(now,
885                                    neigh->confirmed + neigh->parms->reachable_time)) {
886                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
887                         next = neigh->confirmed + neigh->parms->reachable_time;
888                 } else if (time_before_eq(now,
889                                           neigh->used + neigh->parms->delay_probe_time)) {
890                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
891                         neigh->nud_state = NUD_DELAY;
892                         neigh->updated = jiffies;
893                         neigh_suspect(neigh);
894                         next = now + neigh->parms->delay_probe_time;
895                 } else {
896                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
897                         neigh->nud_state = NUD_STALE;
898                         neigh->updated = jiffies;
899                         neigh_suspect(neigh);
900                         notify = 1;
901                 }
902         } else if (state & NUD_DELAY) {
903                 if (time_before_eq(now,
904                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
905                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
906                         neigh->nud_state = NUD_REACHABLE;
907                         neigh->updated = jiffies;
908                         neigh_connect(neigh);
909                         notify = 1;
910                         next = neigh->confirmed + neigh->parms->reachable_time;
911                 } else {
912                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
913                         neigh->nud_state = NUD_PROBE;
914                         neigh->updated = jiffies;
915                         atomic_set(&neigh->probes, 0);
916                         next = now + neigh->parms->retrans_time;
917                 }
918         } else {
919                 /* NUD_PROBE|NUD_INCOMPLETE */
920                 next = now + neigh->parms->retrans_time;
921         }
922
923         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
924             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
925                 neigh->nud_state = NUD_FAILED;
926                 notify = 1;
927                 neigh_invalidate(neigh);
928         }
929
930         if (neigh->nud_state & NUD_IN_TIMER) {
931                 if (time_before(next, jiffies + HZ/2))
932                         next = jiffies + HZ/2;
933                 if (!mod_timer(&neigh->timer, next))
934                         neigh_hold(neigh);
935         }
936         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
937                 neigh_probe(neigh);
938         } else {
939 out:
940                 write_unlock(&neigh->lock);
941         }
942
943         if (notify)
944                 neigh_update_notify(neigh);
945
946         neigh_release(neigh);
947 }
948
949 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
950 {
951         int rc;
952         bool immediate_probe = false;
953
954         write_lock_bh(&neigh->lock);
955
956         rc = 0;
957         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
958                 goto out_unlock_bh;
959
960         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
961                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
962                         unsigned long next, now = jiffies;
963
964                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
965                         neigh->nud_state     = NUD_INCOMPLETE;
966                         neigh->updated = now;
967                         next = now + max(neigh->parms->retrans_time, HZ/2);
968                         neigh_add_timer(neigh, next);
969                         immediate_probe = true;
970                 } else {
971                         neigh->nud_state = NUD_FAILED;
972                         neigh->updated = jiffies;
973                         write_unlock_bh(&neigh->lock);
974
975                         kfree_skb(skb);
976                         return 1;
977                 }
978         } else if (neigh->nud_state & NUD_STALE) {
979                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
980                 neigh->nud_state = NUD_DELAY;
981                 neigh->updated = jiffies;
982                 neigh_add_timer(neigh,
983                                 jiffies + neigh->parms->delay_probe_time);
984         }
985
986         if (neigh->nud_state == NUD_INCOMPLETE) {
987                 if (skb) {
988                         while (neigh->arp_queue_len_bytes + skb->truesize >
989                                neigh->parms->queue_len_bytes) {
990                                 struct sk_buff *buff;
991
992                                 buff = __skb_dequeue(&neigh->arp_queue);
993                                 if (!buff)
994                                         break;
995                                 neigh->arp_queue_len_bytes -= buff->truesize;
996                                 kfree_skb(buff);
997                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
998                         }
999                         skb_dst_force(skb);
1000                         __skb_queue_tail(&neigh->arp_queue, skb);
1001                         neigh->arp_queue_len_bytes += skb->truesize;
1002                 }
1003                 rc = 1;
1004         }
1005 out_unlock_bh:
1006         if (immediate_probe)
1007                 neigh_probe(neigh);
1008         else
1009                 write_unlock(&neigh->lock);
1010         local_bh_enable();
1011         return rc;
1012 }
1013 EXPORT_SYMBOL(__neigh_event_send);
1014
1015 static void neigh_update_hhs(struct neighbour *neigh)
1016 {
1017         struct hh_cache *hh;
1018         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1019                 = NULL;
1020
1021         if (neigh->dev->header_ops)
1022                 update = neigh->dev->header_ops->cache_update;
1023
1024         if (update) {
1025                 hh = &neigh->hh;
1026                 if (hh->hh_len) {
1027                         write_seqlock_bh(&hh->hh_lock);
1028                         update(hh, neigh->dev, neigh->ha);
1029                         write_sequnlock_bh(&hh->hh_lock);
1030                 }
1031         }
1032 }
1033
1034
1035
1036 /* Generic update routine.
1037    -- lladdr is new lladdr or NULL, if it is not supplied.
1038    -- new    is new state.
1039    -- flags
1040         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1041                                 if it is different.
1042         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1043                                 lladdr instead of overriding it
1044                                 if it is different.
1045                                 It also allows to retain current state
1046                                 if lladdr is unchanged.
1047         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1048
1049         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1050                                 NTF_ROUTER flag.
1051         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1052                                 a router.
1053
1054    Caller MUST hold reference count on the entry.
1055  */
1056
1057 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1058                  u32 flags)
1059 {
1060         u8 old;
1061         int err;
1062         int notify = 0;
1063         struct net_device *dev;
1064         int update_isrouter = 0;
1065
1066         write_lock_bh(&neigh->lock);
1067
1068         dev    = neigh->dev;
1069         old    = neigh->nud_state;
1070         err    = -EPERM;
1071
1072         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1073             (old & (NUD_NOARP | NUD_PERMANENT)))
1074                 goto out;
1075
1076         if (!(new & NUD_VALID)) {
1077                 neigh_del_timer(neigh);
1078                 if (old & NUD_CONNECTED)
1079                         neigh_suspect(neigh);
1080                 neigh->nud_state = new;
1081                 err = 0;
1082                 notify = old & NUD_VALID;
1083                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1084                     (new & NUD_FAILED)) {
1085                         neigh_invalidate(neigh);
1086                         notify = 1;
1087                 }
1088                 goto out;
1089         }
1090
1091         /* Compare new lladdr with cached one */
1092         if (!dev->addr_len) {
1093                 /* First case: device needs no address. */
1094                 lladdr = neigh->ha;
1095         } else if (lladdr) {
1096                 /* The second case: if something is already cached
1097                    and a new address is proposed:
1098                    - compare new & old
1099                    - if they are different, check override flag
1100                  */
1101                 if ((old & NUD_VALID) &&
1102                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1103                         lladdr = neigh->ha;
1104         } else {
1105                 /* No address is supplied; if we know something,
1106                    use it, otherwise discard the request.
1107                  */
1108                 err = -EINVAL;
1109                 if (!(old & NUD_VALID))
1110                         goto out;
1111                 lladdr = neigh->ha;
1112         }
1113
1114         if (new & NUD_CONNECTED)
1115                 neigh->confirmed = jiffies;
1116         neigh->updated = jiffies;
1117
1118         /* If entry was valid and address is not changed,
1119            do not change entry state, if new one is STALE.
1120          */
1121         err = 0;
1122         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1123         if (old & NUD_VALID) {
1124                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1125                         update_isrouter = 0;
1126                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1127                             (old & NUD_CONNECTED)) {
1128                                 lladdr = neigh->ha;
1129                                 new = NUD_STALE;
1130                         } else
1131                                 goto out;
1132                 } else {
1133                         if (lladdr == neigh->ha && new == NUD_STALE &&
1134                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1135                              (old & NUD_CONNECTED))
1136                             )
1137                                 new = old;
1138                 }
1139         }
1140
1141         if (new != old) {
1142                 neigh_del_timer(neigh);
1143                 if (new & NUD_IN_TIMER)
1144                         neigh_add_timer(neigh, (jiffies +
1145                                                 ((new & NUD_REACHABLE) ?
1146                                                  neigh->parms->reachable_time :
1147                                                  0)));
1148                 neigh->nud_state = new;
1149         }
1150
1151         if (lladdr != neigh->ha) {
1152                 write_seqlock(&neigh->ha_lock);
1153                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1154                 write_sequnlock(&neigh->ha_lock);
1155                 neigh_update_hhs(neigh);
1156                 if (!(new & NUD_CONNECTED))
1157                         neigh->confirmed = jiffies -
1158                                       (neigh->parms->base_reachable_time << 1);
1159                 notify = 1;
1160         }
1161         if (new == old)
1162                 goto out;
1163         if (new & NUD_CONNECTED)
1164                 neigh_connect(neigh);
1165         else
1166                 neigh_suspect(neigh);
1167         if (!(old & NUD_VALID)) {
1168                 struct sk_buff *skb;
1169
1170                 /* Again: avoid dead loop if something went wrong */
1171
1172                 while (neigh->nud_state & NUD_VALID &&
1173                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1174                         struct dst_entry *dst = skb_dst(skb);
1175                         struct neighbour *n2, *n1 = neigh;
1176                         write_unlock_bh(&neigh->lock);
1177
1178                         rcu_read_lock();
1179                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1180                         if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
1181                                 n1 = n2;
1182                         n1->output(n1, skb);
1183                         rcu_read_unlock();
1184
1185                         write_lock_bh(&neigh->lock);
1186                 }
1187                 skb_queue_purge(&neigh->arp_queue);
1188                 neigh->arp_queue_len_bytes = 0;
1189         }
1190 out:
1191         if (update_isrouter) {
1192                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1193                         (neigh->flags | NTF_ROUTER) :
1194                         (neigh->flags & ~NTF_ROUTER);
1195         }
1196         write_unlock_bh(&neigh->lock);
1197
1198         if (notify)
1199                 neigh_update_notify(neigh);
1200
1201         return err;
1202 }
1203 EXPORT_SYMBOL(neigh_update);
1204
1205 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1206                                  u8 *lladdr, void *saddr,
1207                                  struct net_device *dev)
1208 {
1209         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1210                                                  lladdr || !dev->addr_len);
1211         if (neigh)
1212                 neigh_update(neigh, lladdr, NUD_STALE,
1213                              NEIGH_UPDATE_F_OVERRIDE);
1214         return neigh;
1215 }
1216 EXPORT_SYMBOL(neigh_event_ns);
1217
1218 /* called with read_lock_bh(&n->lock); */
1219 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1220 {
1221         struct net_device *dev = dst->dev;
1222         __be16 prot = dst->ops->protocol;
1223         struct hh_cache *hh = &n->hh;
1224
1225         write_lock_bh(&n->lock);
1226
1227         /* Only one thread can come in here and initialize the
1228          * hh_cache entry.
1229          */
1230         if (!hh->hh_len)
1231                 dev->header_ops->cache(n, hh, prot);
1232
1233         write_unlock_bh(&n->lock);
1234 }
1235
1236 /* This function can be used in contexts, where only old dev_queue_xmit
1237  * worked, f.e. if you want to override normal output path (eql, shaper),
1238  * but resolution is not made yet.
1239  */
1240
1241 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1242 {
1243         struct net_device *dev = skb->dev;
1244
1245         __skb_pull(skb, skb_network_offset(skb));
1246
1247         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1248                             skb->len) < 0 &&
1249             dev->header_ops->rebuild(skb))
1250                 return 0;
1251
1252         return dev_queue_xmit(skb);
1253 }
1254 EXPORT_SYMBOL(neigh_compat_output);
1255
1256 /* Slow and careful. */
1257
1258 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1259 {
1260         struct dst_entry *dst = skb_dst(skb);
1261         int rc = 0;
1262
1263         if (!dst)
1264                 goto discard;
1265
1266         __skb_pull(skb, skb_network_offset(skb));
1267
1268         if (!neigh_event_send(neigh, skb)) {
1269                 int err;
1270                 struct net_device *dev = neigh->dev;
1271                 unsigned int seq;
1272
1273                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1274                         neigh_hh_init(neigh, dst);
1275
1276                 do {
1277                         seq = read_seqbegin(&neigh->ha_lock);
1278                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1279                                               neigh->ha, NULL, skb->len);
1280                 } while (read_seqretry(&neigh->ha_lock, seq));
1281
1282                 if (err >= 0)
1283                         rc = dev_queue_xmit(skb);
1284                 else
1285                         goto out_kfree_skb;
1286         }
1287 out:
1288         return rc;
1289 discard:
1290         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1291                       dst, neigh);
1292 out_kfree_skb:
1293         rc = -EINVAL;
1294         kfree_skb(skb);
1295         goto out;
1296 }
1297 EXPORT_SYMBOL(neigh_resolve_output);
1298
1299 /* As fast as possible without hh cache */
1300
1301 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1302 {
1303         struct net_device *dev = neigh->dev;
1304         unsigned int seq;
1305         int err;
1306
1307         __skb_pull(skb, skb_network_offset(skb));
1308
1309         do {
1310                 seq = read_seqbegin(&neigh->ha_lock);
1311                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1312                                       neigh->ha, NULL, skb->len);
1313         } while (read_seqretry(&neigh->ha_lock, seq));
1314
1315         if (err >= 0)
1316                 err = dev_queue_xmit(skb);
1317         else {
1318                 err = -EINVAL;
1319                 kfree_skb(skb);
1320         }
1321         return err;
1322 }
1323 EXPORT_SYMBOL(neigh_connected_output);
1324
1325 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1326 {
1327         return dev_queue_xmit(skb);
1328 }
1329 EXPORT_SYMBOL(neigh_direct_output);
1330
1331 static void neigh_proxy_process(unsigned long arg)
1332 {
1333         struct neigh_table *tbl = (struct neigh_table *)arg;
1334         long sched_next = 0;
1335         unsigned long now = jiffies;
1336         struct sk_buff *skb, *n;
1337
1338         spin_lock(&tbl->proxy_queue.lock);
1339
1340         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1341                 long tdif = NEIGH_CB(skb)->sched_next - now;
1342
1343                 if (tdif <= 0) {
1344                         struct net_device *dev = skb->dev;
1345
1346                         __skb_unlink(skb, &tbl->proxy_queue);
1347                         if (tbl->proxy_redo && netif_running(dev)) {
1348                                 rcu_read_lock();
1349                                 tbl->proxy_redo(skb);
1350                                 rcu_read_unlock();
1351                         } else {
1352                                 kfree_skb(skb);
1353                         }
1354
1355                         dev_put(dev);
1356                 } else if (!sched_next || tdif < sched_next)
1357                         sched_next = tdif;
1358         }
1359         del_timer(&tbl->proxy_timer);
1360         if (sched_next)
1361                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1362         spin_unlock(&tbl->proxy_queue.lock);
1363 }
1364
1365 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1366                     struct sk_buff *skb)
1367 {
1368         unsigned long now = jiffies;
1369         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1370
1371         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1372                 kfree_skb(skb);
1373                 return;
1374         }
1375
1376         NEIGH_CB(skb)->sched_next = sched_next;
1377         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1378
1379         spin_lock(&tbl->proxy_queue.lock);
1380         if (del_timer(&tbl->proxy_timer)) {
1381                 if (time_before(tbl->proxy_timer.expires, sched_next))
1382                         sched_next = tbl->proxy_timer.expires;
1383         }
1384         skb_dst_drop(skb);
1385         dev_hold(skb->dev);
1386         __skb_queue_tail(&tbl->proxy_queue, skb);
1387         mod_timer(&tbl->proxy_timer, sched_next);
1388         spin_unlock(&tbl->proxy_queue.lock);
1389 }
1390 EXPORT_SYMBOL(pneigh_enqueue);
1391
1392 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1393                                                       struct net *net, int ifindex)
1394 {
1395         struct neigh_parms *p;
1396
1397         for (p = &tbl->parms; p; p = p->next) {
1398                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1399                     (!p->dev && !ifindex))
1400                         return p;
1401         }
1402
1403         return NULL;
1404 }
1405
1406 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1407                                       struct neigh_table *tbl)
1408 {
1409         struct neigh_parms *p, *ref;
1410         struct net *net = dev_net(dev);
1411         const struct net_device_ops *ops = dev->netdev_ops;
1412
1413         ref = lookup_neigh_parms(tbl, net, 0);
1414         if (!ref)
1415                 return NULL;
1416
1417         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1418         if (p) {
1419                 p->tbl            = tbl;
1420                 atomic_set(&p->refcnt, 1);
1421                 p->reachable_time =
1422                                 neigh_rand_reach_time(p->base_reachable_time);
1423
1424                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1425                         kfree(p);
1426                         return NULL;
1427                 }
1428
1429                 dev_hold(dev);
1430                 p->dev = dev;
1431                 write_pnet(&p->net, hold_net(net));
1432                 p->sysctl_table = NULL;
1433                 write_lock_bh(&tbl->lock);
1434                 p->next         = tbl->parms.next;
1435                 tbl->parms.next = p;
1436                 write_unlock_bh(&tbl->lock);
1437         }
1438         return p;
1439 }
1440 EXPORT_SYMBOL(neigh_parms_alloc);
1441
1442 static void neigh_rcu_free_parms(struct rcu_head *head)
1443 {
1444         struct neigh_parms *parms =
1445                 container_of(head, struct neigh_parms, rcu_head);
1446
1447         neigh_parms_put(parms);
1448 }
1449
1450 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1451 {
1452         struct neigh_parms **p;
1453
1454         if (!parms || parms == &tbl->parms)
1455                 return;
1456         write_lock_bh(&tbl->lock);
1457         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1458                 if (*p == parms) {
1459                         *p = parms->next;
1460                         parms->dead = 1;
1461                         write_unlock_bh(&tbl->lock);
1462                         if (parms->dev)
1463                                 dev_put(parms->dev);
1464                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1465                         return;
1466                 }
1467         }
1468         write_unlock_bh(&tbl->lock);
1469         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1470 }
1471 EXPORT_SYMBOL(neigh_parms_release);
1472
1473 static void neigh_parms_destroy(struct neigh_parms *parms)
1474 {
1475         release_net(neigh_parms_net(parms));
1476         kfree(parms);
1477 }
1478
1479 static struct lock_class_key neigh_table_proxy_queue_class;
1480
1481 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1482 {
1483         unsigned long now = jiffies;
1484         unsigned long phsize;
1485
1486         write_pnet(&tbl->parms.net, &init_net);
1487         atomic_set(&tbl->parms.refcnt, 1);
1488         tbl->parms.reachable_time =
1489                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1490
1491         tbl->stats = alloc_percpu(struct neigh_statistics);
1492         if (!tbl->stats)
1493                 panic("cannot create neighbour cache statistics");
1494
1495 #ifdef CONFIG_PROC_FS
1496         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1497                               &neigh_stat_seq_fops, tbl))
1498                 panic("cannot create neighbour proc dir entry");
1499 #endif
1500
1501         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1502
1503         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1504         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1505
1506         if (!tbl->nht || !tbl->phash_buckets)
1507                 panic("cannot allocate neighbour cache hashes");
1508
1509         rwlock_init(&tbl->lock);
1510         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1511         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1512         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1513         skb_queue_head_init_class(&tbl->proxy_queue,
1514                         &neigh_table_proxy_queue_class);
1515
1516         tbl->last_flush = now;
1517         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1518 }
1519 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1520
1521 void neigh_table_init(struct neigh_table *tbl)
1522 {
1523         struct neigh_table *tmp;
1524
1525         neigh_table_init_no_netlink(tbl);
1526         write_lock(&neigh_tbl_lock);
1527         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1528                 if (tmp->family == tbl->family)
1529                         break;
1530         }
1531         tbl->next       = neigh_tables;
1532         neigh_tables    = tbl;
1533         write_unlock(&neigh_tbl_lock);
1534
1535         if (unlikely(tmp)) {
1536                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1537                        "family %d\n", tbl->family);
1538                 dump_stack();
1539         }
1540 }
1541 EXPORT_SYMBOL(neigh_table_init);
1542
1543 int neigh_table_clear(struct neigh_table *tbl)
1544 {
1545         struct neigh_table **tp;
1546
1547         /* It is not clean... Fix it to unload IPv6 module safely */
1548         cancel_delayed_work_sync(&tbl->gc_work);
1549         del_timer_sync(&tbl->proxy_timer);
1550         pneigh_queue_purge(&tbl->proxy_queue);
1551         neigh_ifdown(tbl, NULL);
1552         if (atomic_read(&tbl->entries))
1553                 printk(KERN_CRIT "neighbour leakage\n");
1554         write_lock(&neigh_tbl_lock);
1555         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1556                 if (*tp == tbl) {
1557                         *tp = tbl->next;
1558                         break;
1559                 }
1560         }
1561         write_unlock(&neigh_tbl_lock);
1562
1563         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1564                  neigh_hash_free_rcu);
1565         tbl->nht = NULL;
1566
1567         kfree(tbl->phash_buckets);
1568         tbl->phash_buckets = NULL;
1569
1570         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1571
1572         free_percpu(tbl->stats);
1573         tbl->stats = NULL;
1574
1575         return 0;
1576 }
1577 EXPORT_SYMBOL(neigh_table_clear);
1578
1579 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1580 {
1581         struct net *net = sock_net(skb->sk);
1582         struct ndmsg *ndm;
1583         struct nlattr *dst_attr;
1584         struct neigh_table *tbl;
1585         struct net_device *dev = NULL;
1586         int err = -EINVAL;
1587
1588         ASSERT_RTNL();
1589         if (nlmsg_len(nlh) < sizeof(*ndm))
1590                 goto out;
1591
1592         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1593         if (dst_attr == NULL)
1594                 goto out;
1595
1596         ndm = nlmsg_data(nlh);
1597         if (ndm->ndm_ifindex) {
1598                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1599                 if (dev == NULL) {
1600                         err = -ENODEV;
1601                         goto out;
1602                 }
1603         }
1604
1605         read_lock(&neigh_tbl_lock);
1606         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1607                 struct neighbour *neigh;
1608
1609                 if (tbl->family != ndm->ndm_family)
1610                         continue;
1611                 read_unlock(&neigh_tbl_lock);
1612
1613                 if (nla_len(dst_attr) < tbl->key_len)
1614                         goto out;
1615
1616                 if (ndm->ndm_flags & NTF_PROXY) {
1617                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1618                         goto out;
1619                 }
1620
1621                 if (dev == NULL)
1622                         goto out;
1623
1624                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1625                 if (neigh == NULL) {
1626                         err = -ENOENT;
1627                         goto out;
1628                 }
1629
1630                 err = neigh_update(neigh, NULL, NUD_FAILED,
1631                                    NEIGH_UPDATE_F_OVERRIDE |
1632                                    NEIGH_UPDATE_F_ADMIN);
1633                 neigh_release(neigh);
1634                 goto out;
1635         }
1636         read_unlock(&neigh_tbl_lock);
1637         err = -EAFNOSUPPORT;
1638
1639 out:
1640         return err;
1641 }
1642
1643 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1644 {
1645         struct net *net = sock_net(skb->sk);
1646         struct ndmsg *ndm;
1647         struct nlattr *tb[NDA_MAX+1];
1648         struct neigh_table *tbl;
1649         struct net_device *dev = NULL;
1650         int err;
1651
1652         ASSERT_RTNL();
1653         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1654         if (err < 0)
1655                 goto out;
1656
1657         err = -EINVAL;
1658         if (tb[NDA_DST] == NULL)
1659                 goto out;
1660
1661         ndm = nlmsg_data(nlh);
1662         if (ndm->ndm_ifindex) {
1663                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1664                 if (dev == NULL) {
1665                         err = -ENODEV;
1666                         goto out;
1667                 }
1668
1669                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1670                         goto out;
1671         }
1672
1673         read_lock(&neigh_tbl_lock);
1674         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1675                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1676                 struct neighbour *neigh;
1677                 void *dst, *lladdr;
1678
1679                 if (tbl->family != ndm->ndm_family)
1680                         continue;
1681                 read_unlock(&neigh_tbl_lock);
1682
1683                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1684                         goto out;
1685                 dst = nla_data(tb[NDA_DST]);
1686                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1687
1688                 if (ndm->ndm_flags & NTF_PROXY) {
1689                         struct pneigh_entry *pn;
1690
1691                         err = -ENOBUFS;
1692                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1693                         if (pn) {
1694                                 pn->flags = ndm->ndm_flags;
1695                                 err = 0;
1696                         }
1697                         goto out;
1698                 }
1699
1700                 if (dev == NULL)
1701                         goto out;
1702
1703                 neigh = neigh_lookup(tbl, dst, dev);
1704                 if (neigh == NULL) {
1705                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1706                                 err = -ENOENT;
1707                                 goto out;
1708                         }
1709
1710                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1711                         if (IS_ERR(neigh)) {
1712                                 err = PTR_ERR(neigh);
1713                                 goto out;
1714                         }
1715                 } else {
1716                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1717                                 err = -EEXIST;
1718                                 neigh_release(neigh);
1719                                 goto out;
1720                         }
1721
1722                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1723                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1724                 }
1725
1726                 if (ndm->ndm_flags & NTF_USE) {
1727                         neigh_event_send(neigh, NULL);
1728                         err = 0;
1729                 } else
1730                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1731                 neigh_release(neigh);
1732                 goto out;
1733         }
1734
1735         read_unlock(&neigh_tbl_lock);
1736         err = -EAFNOSUPPORT;
1737 out:
1738         return err;
1739 }
1740
1741 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1742 {
1743         struct nlattr *nest;
1744
1745         nest = nla_nest_start(skb, NDTA_PARMS);
1746         if (nest == NULL)
1747                 return -ENOBUFS;
1748
1749         if (parms->dev)
1750                 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1751
1752         NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1753         NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
1754         /* approximative value for deprecated QUEUE_LEN (in packets) */
1755         NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
1756                     DIV_ROUND_UP(parms->queue_len_bytes,
1757                                  SKB_TRUESIZE(ETH_FRAME_LEN)));
1758         NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1759         NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1760         NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1761         NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1762         NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1763         NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1764                       parms->base_reachable_time);
1765         NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1766         NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1767         NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1768         NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1769         NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1770         NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1771
1772         return nla_nest_end(skb, nest);
1773
1774 nla_put_failure:
1775         nla_nest_cancel(skb, nest);
1776         return -EMSGSIZE;
1777 }
1778
1779 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1780                               u32 pid, u32 seq, int type, int flags)
1781 {
1782         struct nlmsghdr *nlh;
1783         struct ndtmsg *ndtmsg;
1784
1785         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1786         if (nlh == NULL)
1787                 return -EMSGSIZE;
1788
1789         ndtmsg = nlmsg_data(nlh);
1790
1791         read_lock_bh(&tbl->lock);
1792         ndtmsg->ndtm_family = tbl->family;
1793         ndtmsg->ndtm_pad1   = 0;
1794         ndtmsg->ndtm_pad2   = 0;
1795
1796         NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1797         NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1798         NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1799         NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1800         NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1801
1802         {
1803                 unsigned long now = jiffies;
1804                 unsigned int flush_delta = now - tbl->last_flush;
1805                 unsigned int rand_delta = now - tbl->last_rand;
1806                 struct neigh_hash_table *nht;
1807                 struct ndt_config ndc = {
1808                         .ndtc_key_len           = tbl->key_len,
1809                         .ndtc_entry_size        = tbl->entry_size,
1810                         .ndtc_entries           = atomic_read(&tbl->entries),
1811                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1812                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1813                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1814                 };
1815
1816                 rcu_read_lock_bh();
1817                 nht = rcu_dereference_bh(tbl->nht);
1818                 ndc.ndtc_hash_rnd = nht->hash_rnd;
1819                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1820                 rcu_read_unlock_bh();
1821
1822                 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1823         }
1824
1825         {
1826                 int cpu;
1827                 struct ndt_stats ndst;
1828
1829                 memset(&ndst, 0, sizeof(ndst));
1830
1831                 for_each_possible_cpu(cpu) {
1832                         struct neigh_statistics *st;
1833
1834                         st = per_cpu_ptr(tbl->stats, cpu);
1835                         ndst.ndts_allocs                += st->allocs;
1836                         ndst.ndts_destroys              += st->destroys;
1837                         ndst.ndts_hash_grows            += st->hash_grows;
1838                         ndst.ndts_res_failed            += st->res_failed;
1839                         ndst.ndts_lookups               += st->lookups;
1840                         ndst.ndts_hits                  += st->hits;
1841                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1842                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1843                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1844                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1845                 }
1846
1847                 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1848         }
1849
1850         BUG_ON(tbl->parms.dev);
1851         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1852                 goto nla_put_failure;
1853
1854         read_unlock_bh(&tbl->lock);
1855         return nlmsg_end(skb, nlh);
1856
1857 nla_put_failure:
1858         read_unlock_bh(&tbl->lock);
1859         nlmsg_cancel(skb, nlh);
1860         return -EMSGSIZE;
1861 }
1862
1863 static int neightbl_fill_param_info(struct sk_buff *skb,
1864                                     struct neigh_table *tbl,
1865                                     struct neigh_parms *parms,
1866                                     u32 pid, u32 seq, int type,
1867                                     unsigned int flags)
1868 {
1869         struct ndtmsg *ndtmsg;
1870         struct nlmsghdr *nlh;
1871
1872         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1873         if (nlh == NULL)
1874                 return -EMSGSIZE;
1875
1876         ndtmsg = nlmsg_data(nlh);
1877
1878         read_lock_bh(&tbl->lock);
1879         ndtmsg->ndtm_family = tbl->family;
1880         ndtmsg->ndtm_pad1   = 0;
1881         ndtmsg->ndtm_pad2   = 0;
1882
1883         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1884             neightbl_fill_parms(skb, parms) < 0)
1885                 goto errout;
1886
1887         read_unlock_bh(&tbl->lock);
1888         return nlmsg_end(skb, nlh);
1889 errout:
1890         read_unlock_bh(&tbl->lock);
1891         nlmsg_cancel(skb, nlh);
1892         return -EMSGSIZE;
1893 }
1894
1895 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1896         [NDTA_NAME]             = { .type = NLA_STRING },
1897         [NDTA_THRESH1]          = { .type = NLA_U32 },
1898         [NDTA_THRESH2]          = { .type = NLA_U32 },
1899         [NDTA_THRESH3]          = { .type = NLA_U32 },
1900         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1901         [NDTA_PARMS]            = { .type = NLA_NESTED },
1902 };
1903
1904 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1905         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1906         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1907         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1908         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1909         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1910         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1911         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1912         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1913         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1914         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1915         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1916         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1917         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1918 };
1919
1920 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1921 {
1922         struct net *net = sock_net(skb->sk);
1923         struct neigh_table *tbl;
1924         struct ndtmsg *ndtmsg;
1925         struct nlattr *tb[NDTA_MAX+1];
1926         int err;
1927
1928         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1929                           nl_neightbl_policy);
1930         if (err < 0)
1931                 goto errout;
1932
1933         if (tb[NDTA_NAME] == NULL) {
1934                 err = -EINVAL;
1935                 goto errout;
1936         }
1937
1938         ndtmsg = nlmsg_data(nlh);
1939         read_lock(&neigh_tbl_lock);
1940         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1941                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1942                         continue;
1943
1944                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1945                         break;
1946         }
1947
1948         if (tbl == NULL) {
1949                 err = -ENOENT;
1950                 goto errout_locked;
1951         }
1952
1953         /*
1954          * We acquire tbl->lock to be nice to the periodic timers and
1955          * make sure they always see a consistent set of values.
1956          */
1957         write_lock_bh(&tbl->lock);
1958
1959         if (tb[NDTA_PARMS]) {
1960                 struct nlattr *tbp[NDTPA_MAX+1];
1961                 struct neigh_parms *p;
1962                 int i, ifindex = 0;
1963
1964                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1965                                        nl_ntbl_parm_policy);
1966                 if (err < 0)
1967                         goto errout_tbl_lock;
1968
1969                 if (tbp[NDTPA_IFINDEX])
1970                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1971
1972                 p = lookup_neigh_parms(tbl, net, ifindex);
1973                 if (p == NULL) {
1974                         err = -ENOENT;
1975                         goto errout_tbl_lock;
1976                 }
1977
1978                 for (i = 1; i <= NDTPA_MAX; i++) {
1979                         if (tbp[i] == NULL)
1980                                 continue;
1981
1982                         switch (i) {
1983                         case NDTPA_QUEUE_LEN:
1984                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
1985                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
1986                                 break;
1987                         case NDTPA_QUEUE_LENBYTES:
1988                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
1989                                 break;
1990                         case NDTPA_PROXY_QLEN:
1991                                 p->proxy_qlen = nla_get_u32(tbp[i]);
1992                                 break;
1993                         case NDTPA_APP_PROBES:
1994                                 p->app_probes = nla_get_u32(tbp[i]);
1995                                 break;
1996                         case NDTPA_UCAST_PROBES:
1997                                 p->ucast_probes = nla_get_u32(tbp[i]);
1998                                 break;
1999                         case NDTPA_MCAST_PROBES:
2000                                 p->mcast_probes = nla_get_u32(tbp[i]);
2001                                 break;
2002                         case NDTPA_BASE_REACHABLE_TIME:
2003                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2004                                 break;
2005                         case NDTPA_GC_STALETIME:
2006                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2007                                 break;
2008                         case NDTPA_DELAY_PROBE_TIME:
2009                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2010                                 break;
2011                         case NDTPA_RETRANS_TIME:
2012                                 p->retrans_time = nla_get_msecs(tbp[i]);
2013                                 break;
2014                         case NDTPA_ANYCAST_DELAY:
2015                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2016                                 break;
2017                         case NDTPA_PROXY_DELAY:
2018                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2019                                 break;
2020                         case NDTPA_LOCKTIME:
2021                                 p->locktime = nla_get_msecs(tbp[i]);
2022                                 break;
2023                         }
2024                 }
2025         }
2026
2027         if (tb[NDTA_THRESH1])
2028                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2029
2030         if (tb[NDTA_THRESH2])
2031                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2032
2033         if (tb[NDTA_THRESH3])
2034                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2035
2036         if (tb[NDTA_GC_INTERVAL])
2037                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2038
2039         err = 0;
2040
2041 errout_tbl_lock:
2042         write_unlock_bh(&tbl->lock);
2043 errout_locked:
2044         read_unlock(&neigh_tbl_lock);
2045 errout:
2046         return err;
2047 }
2048
2049 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2050 {
2051         struct net *net = sock_net(skb->sk);
2052         int family, tidx, nidx = 0;
2053         int tbl_skip = cb->args[0];
2054         int neigh_skip = cb->args[1];
2055         struct neigh_table *tbl;
2056
2057         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2058
2059         read_lock(&neigh_tbl_lock);
2060         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2061                 struct neigh_parms *p;
2062
2063                 if (tidx < tbl_skip || (family && tbl->family != family))
2064                         continue;
2065
2066                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2067                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2068                                        NLM_F_MULTI) <= 0)
2069                         break;
2070
2071                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2072                         if (!net_eq(neigh_parms_net(p), net))
2073                                 continue;
2074
2075                         if (nidx < neigh_skip)
2076                                 goto next;
2077
2078                         if (neightbl_fill_param_info(skb, tbl, p,
2079                                                      NETLINK_CB(cb->skb).pid,
2080                                                      cb->nlh->nlmsg_seq,
2081                                                      RTM_NEWNEIGHTBL,
2082                                                      NLM_F_MULTI) <= 0)
2083                                 goto out;
2084                 next:
2085                         nidx++;
2086                 }
2087
2088                 neigh_skip = 0;
2089         }
2090 out:
2091         read_unlock(&neigh_tbl_lock);
2092         cb->args[0] = tidx;
2093         cb->args[1] = nidx;
2094
2095         return skb->len;
2096 }
2097
2098 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2099                            u32 pid, u32 seq, int type, unsigned int flags)
2100 {
2101         unsigned long now = jiffies;
2102         struct nda_cacheinfo ci;
2103         struct nlmsghdr *nlh;
2104         struct ndmsg *ndm;
2105
2106         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2107         if (nlh == NULL)
2108                 return -EMSGSIZE;
2109
2110         ndm = nlmsg_data(nlh);
2111         ndm->ndm_family  = neigh->ops->family;
2112         ndm->ndm_pad1    = 0;
2113         ndm->ndm_pad2    = 0;
2114         ndm->ndm_flags   = neigh->flags;
2115         ndm->ndm_type    = neigh->type;
2116         ndm->ndm_ifindex = neigh->dev->ifindex;
2117
2118         NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2119
2120         read_lock_bh(&neigh->lock);
2121         ndm->ndm_state   = neigh->nud_state;
2122         if (neigh->nud_state & NUD_VALID) {
2123                 char haddr[MAX_ADDR_LEN];
2124
2125                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2126                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2127                         read_unlock_bh(&neigh->lock);
2128                         goto nla_put_failure;
2129                 }
2130         }
2131
2132         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2133         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2134         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2135         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2136         read_unlock_bh(&neigh->lock);
2137
2138         NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2139         NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2140
2141         return nlmsg_end(skb, nlh);
2142
2143 nla_put_failure:
2144         nlmsg_cancel(skb, nlh);
2145         return -EMSGSIZE;
2146 }
2147
2148 static void neigh_update_notify(struct neighbour *neigh)
2149 {
2150         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2151         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2152 }
2153
2154 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2155                             struct netlink_callback *cb)
2156 {
2157         struct net *net = sock_net(skb->sk);
2158         struct neighbour *n;
2159         int rc, h, s_h = cb->args[1];
2160         int idx, s_idx = idx = cb->args[2];
2161         struct neigh_hash_table *nht;
2162
2163         rcu_read_lock_bh();
2164         nht = rcu_dereference_bh(tbl->nht);
2165
2166         for (h = 0; h < (1 << nht->hash_shift); h++) {
2167                 if (h < s_h)
2168                         continue;
2169                 if (h > s_h)
2170                         s_idx = 0;
2171                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2172                      n != NULL;
2173                      n = rcu_dereference_bh(n->next)) {
2174                         if (!net_eq(dev_net(n->dev), net))
2175                                 continue;
2176                         if (idx < s_idx)
2177                                 goto next;
2178                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2179                                             cb->nlh->nlmsg_seq,
2180                                             RTM_NEWNEIGH,
2181                                             NLM_F_MULTI) <= 0) {
2182                                 rc = -1;
2183                                 goto out;
2184                         }
2185 next:
2186                         idx++;
2187                 }
2188         }
2189         rc = skb->len;
2190 out:
2191         rcu_read_unlock_bh();
2192         cb->args[1] = h;
2193         cb->args[2] = idx;
2194         return rc;
2195 }
2196
2197 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2198 {
2199         struct neigh_table *tbl;
2200         int t, family, s_t;
2201
2202         read_lock(&neigh_tbl_lock);
2203         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2204         s_t = cb->args[0];
2205
2206         for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2207                 if (t < s_t || (family && tbl->family != family))
2208                         continue;
2209                 if (t > s_t)
2210                         memset(&cb->args[1], 0, sizeof(cb->args) -
2211                                                 sizeof(cb->args[0]));
2212                 if (neigh_dump_table(tbl, skb, cb) < 0)
2213                         break;
2214         }
2215         read_unlock(&neigh_tbl_lock);
2216
2217         cb->args[0] = t;
2218         return skb->len;
2219 }
2220
2221 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2222 {
2223         int chain;
2224         struct neigh_hash_table *nht;
2225
2226         rcu_read_lock_bh();
2227         nht = rcu_dereference_bh(tbl->nht);
2228
2229         read_lock(&tbl->lock); /* avoid resizes */
2230         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2231                 struct neighbour *n;
2232
2233                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2234                      n != NULL;
2235                      n = rcu_dereference_bh(n->next))
2236                         cb(n, cookie);
2237         }
2238         read_unlock(&tbl->lock);
2239         rcu_read_unlock_bh();
2240 }
2241 EXPORT_SYMBOL(neigh_for_each);
2242
2243 /* The tbl->lock must be held as a writer and BH disabled. */
2244 void __neigh_for_each_release(struct neigh_table *tbl,
2245                               int (*cb)(struct neighbour *))
2246 {
2247         int chain;
2248         struct neigh_hash_table *nht;
2249
2250         nht = rcu_dereference_protected(tbl->nht,
2251                                         lockdep_is_held(&tbl->lock));
2252         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2253                 struct neighbour *n;
2254                 struct neighbour __rcu **np;
2255
2256                 np = &nht->hash_buckets[chain];
2257                 while ((n = rcu_dereference_protected(*np,
2258                                         lockdep_is_held(&tbl->lock))) != NULL) {
2259                         int release;
2260
2261                         write_lock(&n->lock);
2262                         release = cb(n);
2263                         if (release) {
2264                                 rcu_assign_pointer(*np,
2265                                         rcu_dereference_protected(n->next,
2266                                                 lockdep_is_held(&tbl->lock)));
2267                                 n->dead = 1;
2268                         } else
2269                                 np = &n->next;
2270                         write_unlock(&n->lock);
2271                         if (release)
2272                                 neigh_cleanup_and_release(n);
2273                 }
2274         }
2275 }
2276 EXPORT_SYMBOL(__neigh_for_each_release);
2277
2278 #ifdef CONFIG_PROC_FS
2279
2280 static struct neighbour *neigh_get_first(struct seq_file *seq)
2281 {
2282         struct neigh_seq_state *state = seq->private;
2283         struct net *net = seq_file_net(seq);
2284         struct neigh_hash_table *nht = state->nht;
2285         struct neighbour *n = NULL;
2286         int bucket = state->bucket;
2287
2288         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2289         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2290                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2291
2292                 while (n) {
2293                         if (!net_eq(dev_net(n->dev), net))
2294                                 goto next;
2295                         if (state->neigh_sub_iter) {
2296                                 loff_t fakep = 0;
2297                                 void *v;
2298
2299                                 v = state->neigh_sub_iter(state, n, &fakep);
2300                                 if (!v)
2301                                         goto next;
2302                         }
2303                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2304                                 break;
2305                         if (n->nud_state & ~NUD_NOARP)
2306                                 break;
2307 next:
2308                         n = rcu_dereference_bh(n->next);
2309                 }
2310
2311                 if (n)
2312                         break;
2313         }
2314         state->bucket = bucket;
2315
2316         return n;
2317 }
2318
2319 static struct neighbour *neigh_get_next(struct seq_file *seq,
2320                                         struct neighbour *n,
2321                                         loff_t *pos)
2322 {
2323         struct neigh_seq_state *state = seq->private;
2324         struct net *net = seq_file_net(seq);
2325         struct neigh_hash_table *nht = state->nht;
2326
2327         if (state->neigh_sub_iter) {
2328                 void *v = state->neigh_sub_iter(state, n, pos);
2329                 if (v)
2330                         return n;
2331         }
2332         n = rcu_dereference_bh(n->next);
2333
2334         while (1) {
2335                 while (n) {
2336                         if (!net_eq(dev_net(n->dev), net))
2337                                 goto next;
2338                         if (state->neigh_sub_iter) {
2339                                 void *v = state->neigh_sub_iter(state, n, pos);
2340                                 if (v)
2341                                         return n;
2342                                 goto next;
2343                         }
2344                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2345                                 break;
2346
2347                         if (n->nud_state & ~NUD_NOARP)
2348                                 break;
2349 next:
2350                         n = rcu_dereference_bh(n->next);
2351                 }
2352
2353                 if (n)
2354                         break;
2355
2356                 if (++state->bucket >= (1 << nht->hash_shift))
2357                         break;
2358
2359                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2360         }
2361
2362         if (n && pos)
2363                 --(*pos);
2364         return n;
2365 }
2366
2367 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2368 {
2369         struct neighbour *n = neigh_get_first(seq);
2370
2371         if (n) {
2372                 --(*pos);
2373                 while (*pos) {
2374                         n = neigh_get_next(seq, n, pos);
2375                         if (!n)
2376                                 break;
2377                 }
2378         }
2379         return *pos ? NULL : n;
2380 }
2381
2382 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2383 {
2384         struct neigh_seq_state *state = seq->private;
2385         struct net *net = seq_file_net(seq);
2386         struct neigh_table *tbl = state->tbl;
2387         struct pneigh_entry *pn = NULL;
2388         int bucket = state->bucket;
2389
2390         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2391         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2392                 pn = tbl->phash_buckets[bucket];
2393                 while (pn && !net_eq(pneigh_net(pn), net))
2394                         pn = pn->next;
2395                 if (pn)
2396                         break;
2397         }
2398         state->bucket = bucket;
2399
2400         return pn;
2401 }
2402
2403 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2404                                             struct pneigh_entry *pn,
2405                                             loff_t *pos)
2406 {
2407         struct neigh_seq_state *state = seq->private;
2408         struct net *net = seq_file_net(seq);
2409         struct neigh_table *tbl = state->tbl;
2410
2411         do {
2412                 pn = pn->next;
2413         } while (pn && !net_eq(pneigh_net(pn), net));
2414
2415         while (!pn) {
2416                 if (++state->bucket > PNEIGH_HASHMASK)
2417                         break;
2418                 pn = tbl->phash_buckets[state->bucket];
2419                 while (pn && !net_eq(pneigh_net(pn), net))
2420                         pn = pn->next;
2421                 if (pn)
2422                         break;
2423         }
2424
2425         if (pn && pos)
2426                 --(*pos);
2427
2428         return pn;
2429 }
2430
2431 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2432 {
2433         struct pneigh_entry *pn = pneigh_get_first(seq);
2434
2435         if (pn) {
2436                 --(*pos);
2437                 while (*pos) {
2438                         pn = pneigh_get_next(seq, pn, pos);
2439                         if (!pn)
2440                                 break;
2441                 }
2442         }
2443         return *pos ? NULL : pn;
2444 }
2445
2446 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2447 {
2448         struct neigh_seq_state *state = seq->private;
2449         void *rc;
2450         loff_t idxpos = *pos;
2451
2452         rc = neigh_get_idx(seq, &idxpos);
2453         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2454                 rc = pneigh_get_idx(seq, &idxpos);
2455
2456         return rc;
2457 }
2458
2459 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2460         __acquires(rcu_bh)
2461 {
2462         struct neigh_seq_state *state = seq->private;
2463
2464         state->tbl = tbl;
2465         state->bucket = 0;
2466         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2467
2468         rcu_read_lock_bh();
2469         state->nht = rcu_dereference_bh(tbl->nht);
2470
2471         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2472 }
2473 EXPORT_SYMBOL(neigh_seq_start);
2474
2475 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2476 {
2477         struct neigh_seq_state *state;
2478         void *rc;
2479
2480         if (v == SEQ_START_TOKEN) {
2481                 rc = neigh_get_first(seq);
2482                 goto out;
2483         }
2484
2485         state = seq->private;
2486         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2487                 rc = neigh_get_next(seq, v, NULL);
2488                 if (rc)
2489                         goto out;
2490                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2491                         rc = pneigh_get_first(seq);
2492         } else {
2493                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2494                 rc = pneigh_get_next(seq, v, NULL);
2495         }
2496 out:
2497         ++(*pos);
2498         return rc;
2499 }
2500 EXPORT_SYMBOL(neigh_seq_next);
2501
2502 void neigh_seq_stop(struct seq_file *seq, void *v)
2503         __releases(rcu_bh)
2504 {
2505         rcu_read_unlock_bh();
2506 }
2507 EXPORT_SYMBOL(neigh_seq_stop);
2508
2509 /* statistics via seq_file */
2510
2511 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2512 {
2513         struct neigh_table *tbl = seq->private;
2514         int cpu;
2515
2516         if (*pos == 0)
2517                 return SEQ_START_TOKEN;
2518
2519         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2520                 if (!cpu_possible(cpu))
2521                         continue;
2522                 *pos = cpu+1;
2523                 return per_cpu_ptr(tbl->stats, cpu);
2524         }
2525         return NULL;
2526 }
2527
2528 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2529 {
2530         struct neigh_table *tbl = seq->private;
2531         int cpu;
2532
2533         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2534                 if (!cpu_possible(cpu))
2535                         continue;
2536                 *pos = cpu+1;
2537                 return per_cpu_ptr(tbl->stats, cpu);
2538         }
2539         return NULL;
2540 }
2541
2542 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2543 {
2544
2545 }
2546
2547 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2548 {
2549         struct neigh_table *tbl = seq->private;
2550         struct neigh_statistics *st = v;
2551
2552         if (v == SEQ_START_TOKEN) {
2553                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2554                 return 0;
2555         }
2556
2557         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2558                         "%08lx %08lx  %08lx %08lx %08lx\n",
2559                    atomic_read(&tbl->entries),
2560
2561                    st->allocs,
2562                    st->destroys,
2563                    st->hash_grows,
2564
2565                    st->lookups,
2566                    st->hits,
2567
2568                    st->res_failed,
2569
2570                    st->rcv_probes_mcast,
2571                    st->rcv_probes_ucast,
2572
2573                    st->periodic_gc_runs,
2574                    st->forced_gc_runs,
2575                    st->unres_discards
2576                    );
2577
2578         return 0;
2579 }
2580
2581 static const struct seq_operations neigh_stat_seq_ops = {
2582         .start  = neigh_stat_seq_start,
2583         .next   = neigh_stat_seq_next,
2584         .stop   = neigh_stat_seq_stop,
2585         .show   = neigh_stat_seq_show,
2586 };
2587
2588 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2589 {
2590         int ret = seq_open(file, &neigh_stat_seq_ops);
2591
2592         if (!ret) {
2593                 struct seq_file *sf = file->private_data;
2594                 sf->private = PDE(inode)->data;
2595         }
2596         return ret;
2597 };
2598
2599 static const struct file_operations neigh_stat_seq_fops = {
2600         .owner   = THIS_MODULE,
2601         .open    = neigh_stat_seq_open,
2602         .read    = seq_read,
2603         .llseek  = seq_lseek,
2604         .release = seq_release,
2605 };
2606
2607 #endif /* CONFIG_PROC_FS */
2608
2609 static inline size_t neigh_nlmsg_size(void)
2610 {
2611         return NLMSG_ALIGN(sizeof(struct ndmsg))
2612                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2613                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2614                + nla_total_size(sizeof(struct nda_cacheinfo))
2615                + nla_total_size(4); /* NDA_PROBES */
2616 }
2617
2618 static void __neigh_notify(struct neighbour *n, int type, int flags)
2619 {
2620         struct net *net = dev_net(n->dev);
2621         struct sk_buff *skb;
2622         int err = -ENOBUFS;
2623
2624         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2625         if (skb == NULL)
2626                 goto errout;
2627
2628         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2629         if (err < 0) {
2630                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2631                 WARN_ON(err == -EMSGSIZE);
2632                 kfree_skb(skb);
2633                 goto errout;
2634         }
2635         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2636         return;
2637 errout:
2638         if (err < 0)
2639                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2640 }
2641
2642 #ifdef CONFIG_ARPD
2643 void neigh_app_ns(struct neighbour *n)
2644 {
2645         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2646 }
2647 EXPORT_SYMBOL(neigh_app_ns);
2648 #endif /* CONFIG_ARPD */
2649
2650 #ifdef CONFIG_SYSCTL
2651
2652 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2653                            size_t *lenp, loff_t *ppos)
2654 {
2655         int size, ret;
2656         ctl_table tmp = *ctl;
2657
2658         tmp.data = &size;
2659         size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2660         ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2661         if (write && !ret)
2662                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2663         return ret;
2664 }
2665
2666 enum {
2667         NEIGH_VAR_MCAST_PROBE,
2668         NEIGH_VAR_UCAST_PROBE,
2669         NEIGH_VAR_APP_PROBE,
2670         NEIGH_VAR_RETRANS_TIME,
2671         NEIGH_VAR_BASE_REACHABLE_TIME,
2672         NEIGH_VAR_DELAY_PROBE_TIME,
2673         NEIGH_VAR_GC_STALETIME,
2674         NEIGH_VAR_QUEUE_LEN,
2675         NEIGH_VAR_QUEUE_LEN_BYTES,
2676         NEIGH_VAR_PROXY_QLEN,
2677         NEIGH_VAR_ANYCAST_DELAY,
2678         NEIGH_VAR_PROXY_DELAY,
2679         NEIGH_VAR_LOCKTIME,
2680         NEIGH_VAR_RETRANS_TIME_MS,
2681         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2682         NEIGH_VAR_GC_INTERVAL,
2683         NEIGH_VAR_GC_THRESH1,
2684         NEIGH_VAR_GC_THRESH2,
2685         NEIGH_VAR_GC_THRESH3,
2686         NEIGH_VAR_MAX
2687 };
2688
2689 static struct neigh_sysctl_table {
2690         struct ctl_table_header *sysctl_header;
2691         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2692         char *dev_name;
2693 } neigh_sysctl_template __read_mostly = {
2694         .neigh_vars = {
2695                 [NEIGH_VAR_MCAST_PROBE] = {
2696                         .procname       = "mcast_solicit",
2697                         .maxlen         = sizeof(int),
2698                         .mode           = 0644,
2699                         .proc_handler   = proc_dointvec,
2700                 },
2701                 [NEIGH_VAR_UCAST_PROBE] = {
2702                         .procname       = "ucast_solicit",
2703                         .maxlen         = sizeof(int),
2704                         .mode           = 0644,
2705                         .proc_handler   = proc_dointvec,
2706                 },
2707                 [NEIGH_VAR_APP_PROBE] = {
2708                         .procname       = "app_solicit",
2709                         .maxlen         = sizeof(int),
2710                         .mode           = 0644,
2711                         .proc_handler   = proc_dointvec,
2712                 },
2713                 [NEIGH_VAR_RETRANS_TIME] = {
2714                         .procname       = "retrans_time",
2715                         .maxlen         = sizeof(int),
2716                         .mode           = 0644,
2717                         .proc_handler   = proc_dointvec_userhz_jiffies,
2718                 },
2719                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2720                         .procname       = "base_reachable_time",
2721                         .maxlen         = sizeof(int),
2722                         .mode           = 0644,
2723                         .proc_handler   = proc_dointvec_jiffies,
2724                 },
2725                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2726                         .procname       = "delay_first_probe_time",
2727                         .maxlen         = sizeof(int),
2728                         .mode           = 0644,
2729                         .proc_handler   = proc_dointvec_jiffies,
2730                 },
2731                 [NEIGH_VAR_GC_STALETIME] = {
2732                         .procname       = "gc_stale_time",
2733                         .maxlen         = sizeof(int),
2734                         .mode           = 0644,
2735                         .proc_handler   = proc_dointvec_jiffies,
2736                 },
2737                 [NEIGH_VAR_QUEUE_LEN] = {
2738                         .procname       = "unres_qlen",
2739                         .maxlen         = sizeof(int),
2740                         .mode           = 0644,
2741                         .proc_handler   = proc_unres_qlen,
2742                 },
2743                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2744                         .procname       = "unres_qlen_bytes",
2745                         .maxlen         = sizeof(int),
2746                         .mode           = 0644,
2747                         .proc_handler   = proc_dointvec,
2748                 },
2749                 [NEIGH_VAR_PROXY_QLEN] = {
2750                         .procname       = "proxy_qlen",
2751                         .maxlen         = sizeof(int),
2752                         .mode           = 0644,
2753                         .proc_handler   = proc_dointvec,
2754                 },
2755                 [NEIGH_VAR_ANYCAST_DELAY] = {
2756                         .procname       = "anycast_delay",
2757                         .maxlen         = sizeof(int),
2758                         .mode           = 0644,
2759                         .proc_handler   = proc_dointvec_userhz_jiffies,
2760                 },
2761                 [NEIGH_VAR_PROXY_DELAY] = {
2762                         .procname       = "proxy_delay",
2763                         .maxlen         = sizeof(int),
2764                         .mode           = 0644,
2765                         .proc_handler   = proc_dointvec_userhz_jiffies,
2766                 },
2767                 [NEIGH_VAR_LOCKTIME] = {
2768                         .procname       = "locktime",
2769                         .maxlen         = sizeof(int),
2770                         .mode           = 0644,
2771                         .proc_handler   = proc_dointvec_userhz_jiffies,
2772                 },
2773                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2774                         .procname       = "retrans_time_ms",
2775                         .maxlen         = sizeof(int),
2776                         .mode           = 0644,
2777                         .proc_handler   = proc_dointvec_ms_jiffies,
2778                 },
2779                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2780                         .procname       = "base_reachable_time_ms",
2781                         .maxlen         = sizeof(int),
2782                         .mode           = 0644,
2783                         .proc_handler   = proc_dointvec_ms_jiffies,
2784                 },
2785                 [NEIGH_VAR_GC_INTERVAL] = {
2786                         .procname       = "gc_interval",
2787                         .maxlen         = sizeof(int),
2788                         .mode           = 0644,
2789                         .proc_handler   = proc_dointvec_jiffies,
2790                 },
2791                 [NEIGH_VAR_GC_THRESH1] = {
2792                         .procname       = "gc_thresh1",
2793                         .maxlen         = sizeof(int),
2794                         .mode           = 0644,
2795                         .proc_handler   = proc_dointvec,
2796                 },
2797                 [NEIGH_VAR_GC_THRESH2] = {
2798                         .procname       = "gc_thresh2",
2799                         .maxlen         = sizeof(int),
2800                         .mode           = 0644,
2801                         .proc_handler   = proc_dointvec,
2802                 },
2803                 [NEIGH_VAR_GC_THRESH3] = {
2804                         .procname       = "gc_thresh3",
2805                         .maxlen         = sizeof(int),
2806                         .mode           = 0644,
2807                         .proc_handler   = proc_dointvec,
2808                 },
2809                 {},
2810         },
2811 };
2812
2813 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2814                           char *p_name, proc_handler *handler)
2815 {
2816         struct neigh_sysctl_table *t;
2817         const char *dev_name_source = NULL;
2818
2819 #define NEIGH_CTL_PATH_ROOT     0
2820 #define NEIGH_CTL_PATH_PROTO    1
2821 #define NEIGH_CTL_PATH_NEIGH    2
2822 #define NEIGH_CTL_PATH_DEV      3
2823
2824         struct ctl_path neigh_path[] = {
2825                 { .procname = "net",     },
2826                 { .procname = "proto",   },
2827                 { .procname = "neigh",   },
2828                 { .procname = "default", },
2829                 { },
2830         };
2831
2832         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2833         if (!t)
2834                 goto err;
2835
2836         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2837         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2838         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2839         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2840         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2841         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2842         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2843         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2844         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2845         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2846         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2847         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2848         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2849         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2850         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2851
2852         if (dev) {
2853                 dev_name_source = dev->name;
2854                 /* Terminate the table early */
2855                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2856                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2857         } else {
2858                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2859                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2860                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2861                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2862                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2863         }
2864
2865
2866         if (handler) {
2867                 /* RetransTime */
2868                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2869                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2870                 /* ReachableTime */
2871                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2872                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2873                 /* RetransTime (in milliseconds)*/
2874                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2875                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2876                 /* ReachableTime (in milliseconds) */
2877                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2878                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2879         }
2880
2881         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2882         if (!t->dev_name)
2883                 goto free;
2884
2885         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2886         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2887
2888         t->sysctl_header =
2889                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2890         if (!t->sysctl_header)
2891                 goto free_procname;
2892
2893         p->sysctl_table = t;
2894         return 0;
2895
2896 free_procname:
2897         kfree(t->dev_name);
2898 free:
2899         kfree(t);
2900 err:
2901         return -ENOBUFS;
2902 }
2903 EXPORT_SYMBOL(neigh_sysctl_register);
2904
2905 void neigh_sysctl_unregister(struct neigh_parms *p)
2906 {
2907         if (p->sysctl_table) {
2908                 struct neigh_sysctl_table *t = p->sysctl_table;
2909                 p->sysctl_table = NULL;
2910                 unregister_sysctl_table(t->sysctl_header);
2911                 kfree(t->dev_name);
2912                 kfree(t);
2913         }
2914 }
2915 EXPORT_SYMBOL(neigh_sysctl_unregister);
2916
2917 #endif  /* CONFIG_SYSCTL */
2918
2919 static int __init neigh_init(void)
2920 {
2921         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
2922         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
2923         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
2924
2925         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
2926                       NULL);
2927         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
2928
2929         return 0;
2930 }
2931
2932 subsys_initcall(neigh_init);
2933