File: /usr/src/linux/net/ipv4/fib_hash.c

1     /*
2      * INET		An implementation of the TCP/IP protocol suite for the LINUX
3      *		operating system.  INET is implemented using the  BSD Socket
4      *		interface as the means of communication with the user level.
5      *
6      *		IPv4 FIB: lookup engine and maintenance routines.
7      *
8      * Version:	$Id: fib_hash.c,v 1.12 1999/08/31 07:03:27 davem Exp $
9      *
10      * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11      *
12      *		This program is free software; you can redistribute it and/or
13      *		modify it under the terms of the GNU General Public License
14      *		as published by the Free Software Foundation; either version
15      *		2 of the License, or (at your option) any later version.
16      */
17     
18     #include <linux/config.h>
19     #include <asm/uaccess.h>
20     #include <asm/system.h>
21     #include <asm/bitops.h>
22     #include <linux/types.h>
23     #include <linux/kernel.h>
24     #include <linux/sched.h>
25     #include <linux/mm.h>
26     #include <linux/string.h>
27     #include <linux/socket.h>
28     #include <linux/sockios.h>
29     #include <linux/errno.h>
30     #include <linux/in.h>
31     #include <linux/inet.h>
32     #include <linux/netdevice.h>
33     #include <linux/if_arp.h>
34     #include <linux/proc_fs.h>
35     #include <linux/skbuff.h>
36     #include <linux/netlink.h>
37     #include <linux/init.h>
38     
39     #include <net/ip.h>
40     #include <net/protocol.h>
41     #include <net/route.h>
42     #include <net/tcp.h>
43     #include <net/sock.h>
44     #include <net/ip_fib.h>
45     
46     #define FTprint(a...)
47     /*
48        printk(KERN_DEBUG a)
49      */
50     
51     static kmem_cache_t * fn_hash_kmem;
52     
53     /*
54        These bizarre types are just to force strict type checking.
55        When I reversed order of bytes and changed to natural mask lengths,
56        I forgot to make fixes in several places. Now I am lazy to return
57        it back.
58      */
59     
60     typedef struct {
61     	u32	datum;
62     } fn_key_t;
63     
64     typedef struct {
65     	u32	datum;
66     } fn_hash_idx_t;
67     
68     struct fib_node
69     {
70     	struct fib_node		*fn_next;
71     	struct fib_info		*fn_info;
72     #define FIB_INFO(f)	((f)->fn_info)
73     	fn_key_t		fn_key;
74     	u8			fn_tos;
75     	u8			fn_type;
76     	u8			fn_scope;
77     	u8			fn_state;
78     };
79     
80     #define FN_S_ZOMBIE	1
81     #define FN_S_ACCESSED	2
82     
83     static int fib_hash_zombies;
84     
85     struct fn_zone
86     {
87     	struct fn_zone	*fz_next;	/* Next not empty zone	*/
88     	struct fib_node	**fz_hash;	/* Hash table pointer	*/
89     	int		fz_nent;	/* Number of entries	*/
90     
91     	int		fz_divisor;	/* Hash divisor		*/
92     	u32		fz_hashmask;	/* (1<<fz_divisor) - 1	*/
93     #define FZ_HASHMASK(fz)	((fz)->fz_hashmask)
94     
95     	int		fz_order;	/* Zone order		*/
96     	u32		fz_mask;
97     #define FZ_MASK(fz)	((fz)->fz_mask)
98     };
99     
100     /* NOTE. On fast computers evaluation of fz_hashmask and fz_mask
101        can be cheaper than memory lookup, so that FZ_* macros are used.
102      */
103     
104     struct fn_hash
105     {
106     	struct fn_zone	*fn_zones[33];
107     	struct fn_zone	*fn_zone_list;
108     };
109     
110     static __inline__ fn_hash_idx_t fn_hash(fn_key_t key, struct fn_zone *fz)
111     {
112     	u32 h = ntohl(key.datum)>>(32 - fz->fz_order);
113     	h ^= (h>>20);
114     	h ^= (h>>10);
115     	h ^= (h>>5);
116     	h &= FZ_HASHMASK(fz);
117     	return *(fn_hash_idx_t*)&h;
118     }
119     
120     #define fz_key_0(key)		((key).datum = 0)
121     #define fz_prefix(key,fz)	((key).datum)
122     
123     static __inline__ fn_key_t fz_key(u32 dst, struct fn_zone *fz)
124     {
125     	fn_key_t k;
126     	k.datum = dst & FZ_MASK(fz);
127     	return k;
128     }
129     
130     static __inline__ struct fib_node ** fz_chain_p(fn_key_t key, struct fn_zone *fz)
131     {
132     	return &fz->fz_hash[fn_hash(key, fz).datum];
133     }
134     
135     static __inline__ struct fib_node * fz_chain(fn_key_t key, struct fn_zone *fz)
136     {
137     	return fz->fz_hash[fn_hash(key, fz).datum];
138     }
139     
140     extern __inline__ int fn_key_eq(fn_key_t a, fn_key_t b)
141     {
142     	return a.datum == b.datum;
143     }
144     
145     extern __inline__ int fn_key_leq(fn_key_t a, fn_key_t b)
146     {
147     	return a.datum <= b.datum;
148     }
149     
150     static rwlock_t fib_hash_lock = RW_LOCK_UNLOCKED;
151     
152     #define FZ_MAX_DIVISOR 1024
153     
154     #ifdef CONFIG_IP_ROUTE_LARGE_TABLES
155     
156     /* The fib hash lock must be held when this is called. */
157     static __inline__ void fn_rebuild_zone(struct fn_zone *fz,
158     				       struct fib_node **old_ht,
159     				       int old_divisor)
160     {
161     	int i;
162     	struct fib_node *f, **fp, *next;
163     
164     	for (i=0; i<old_divisor; i++) {
165     		for (f=old_ht[i]; f; f=next) {
166     			next = f->fn_next;
167     			for (fp = fz_chain_p(f->fn_key, fz);
168     			     *fp && fn_key_leq((*fp)->fn_key, f->fn_key);
169     			     fp = &(*fp)->fn_next)
170     				/* NONE */;
171     			f->fn_next = *fp;
172     			*fp = f;
173     		}
174     	}
175     }
176     
177     static void fn_rehash_zone(struct fn_zone *fz)
178     {
179     	struct fib_node **ht, **old_ht;
180     	int old_divisor, new_divisor;
181     	u32 new_hashmask;
182     		
183     	old_divisor = fz->fz_divisor;
184     
185     	switch (old_divisor) {
186     	case 16:
187     		new_divisor = 256;
188     		new_hashmask = 0xFF;
189     		break;
190     	case 256:
191     		new_divisor = 1024;
192     		new_hashmask = 0x3FF;
193     		break;
194     	default:
195     		printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
196     		return;
197     	}
198     #if RT_CACHE_DEBUG >= 2
199     	printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor);
200     #endif
201     
202     	ht = kmalloc(new_divisor*sizeof(struct fib_node*), GFP_KERNEL);
203     
204     	if (ht)	{
205     		memset(ht, 0, new_divisor*sizeof(struct fib_node*));
206     		write_lock_bh(&fib_hash_lock);
207     		old_ht = fz->fz_hash;
208     		fz->fz_hash = ht;
209     		fz->fz_hashmask = new_hashmask;
210     		fz->fz_divisor = new_divisor;
211     		fn_rebuild_zone(fz, old_ht, old_divisor);
212     		write_unlock_bh(&fib_hash_lock);
213     		kfree(old_ht);
214     	}
215     }
216     #endif /* CONFIG_IP_ROUTE_LARGE_TABLES */
217     
218     static void fn_free_node(struct fib_node * f)
219     {
220     	fib_release_info(FIB_INFO(f));
221     	kmem_cache_free(fn_hash_kmem, f);
222     }
223     
224     
225     static struct fn_zone *
226     fn_new_zone(struct fn_hash *table, int z)
227     {
228     	int i;
229     	struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL);
230     	if (!fz)
231     		return NULL;
232     
233     	memset(fz, 0, sizeof(struct fn_zone));
234     	if (z) {
235     		fz->fz_divisor = 16;
236     		fz->fz_hashmask = 0xF;
237     	} else {
238     		fz->fz_divisor = 1;
239     		fz->fz_hashmask = 0;
240     	}
241     	fz->fz_hash = kmalloc(fz->fz_divisor*sizeof(struct fib_node*), GFP_KERNEL);
242     	if (!fz->fz_hash) {
243     		kfree(fz);
244     		return NULL;
245     	}
246     	memset(fz->fz_hash, 0, fz->fz_divisor*sizeof(struct fib_node*));
247     	fz->fz_order = z;
248     	fz->fz_mask = inet_make_mask(z);
249     
250     	/* Find the first not empty zone with more specific mask */
251     	for (i=z+1; i<=32; i++)
252     		if (table->fn_zones[i])
253     			break;
254     	write_lock_bh(&fib_hash_lock);
255     	if (i>32) {
256     		/* No more specific masks, we are the first. */
257     		fz->fz_next = table->fn_zone_list;
258     		table->fn_zone_list = fz;
259     	} else {
260     		fz->fz_next = table->fn_zones[i]->fz_next;
261     		table->fn_zones[i]->fz_next = fz;
262     	}
263     	table->fn_zones[z] = fz;
264     	write_unlock_bh(&fib_hash_lock);
265     	return fz;
266     }
267     
268     static int
269     fn_hash_lookup(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
270     {
271     	int err;
272     	struct fn_zone *fz;
273     	struct fn_hash *t = (struct fn_hash*)tb->tb_data;
274     
275     	read_lock(&fib_hash_lock);
276     	for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
277     		struct fib_node *f;
278     		fn_key_t k = fz_key(key->dst, fz);
279     
280     		for (f = fz_chain(k, fz); f; f = f->fn_next) {
281     			if (!fn_key_eq(k, f->fn_key)) {
282     				if (fn_key_leq(k, f->fn_key))
283     					break;
284     				else
285     					continue;
286     			}
287     #ifdef CONFIG_IP_ROUTE_TOS
288     			if (f->fn_tos && f->fn_tos != key->tos)
289     				continue;
290     #endif
291     			f->fn_state |= FN_S_ACCESSED;
292     
293     			if (f->fn_state&FN_S_ZOMBIE)
294     				continue;
295     			if (f->fn_scope < key->scope)
296     				continue;
297     
298     			err = fib_semantic_match(f->fn_type, FIB_INFO(f), key, res);
299     			if (err == 0) {
300     				res->type = f->fn_type;
301     				res->scope = f->fn_scope;
302     				res->prefixlen = fz->fz_order;
303     				goto out;
304     			}
305     			if (err < 0)
306     				goto out;
307     		}
308     	}
309     	err = 1;
310     out:
311     	read_unlock(&fib_hash_lock);
312     	return err;
313     }
314     
315     static int fn_hash_last_dflt=-1;
316     
317     static int fib_detect_death(struct fib_info *fi, int order,
318     			    struct fib_info **last_resort, int *last_idx)
319     {
320     	struct neighbour *n;
321     	int state = NUD_NONE;
322     
323     	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
324     	if (n) {
325     		state = n->nud_state;
326     		neigh_release(n);
327     	}
328     	if (state==NUD_REACHABLE)
329     		return 0;
330     	if ((state&NUD_VALID) && order != fn_hash_last_dflt)
331     		return 0;
332     	if ((state&NUD_VALID) ||
333     	    (*last_idx<0 && order > fn_hash_last_dflt)) {
334     		*last_resort = fi;
335     		*last_idx = order;
336     	}
337     	return 1;
338     }
339     
340     static void
341     fn_hash_select_default(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
342     {
343     	int order, last_idx;
344     	struct fib_node *f;
345     	struct fib_info *fi = NULL;
346     	struct fib_info *last_resort;
347     	struct fn_hash *t = (struct fn_hash*)tb->tb_data;
348     	struct fn_zone *fz = t->fn_zones[0];
349     
350     	if (fz == NULL)
351     		return;
352     
353     	last_idx = -1;
354     	last_resort = NULL;
355     	order = -1;
356     
357     	read_lock(&fib_hash_lock);
358     	for (f = fz->fz_hash[0]; f; f = f->fn_next) {
359     		struct fib_info *next_fi = FIB_INFO(f);
360     
361     		if ((f->fn_state&FN_S_ZOMBIE) ||
362     		    f->fn_scope != res->scope ||
363     		    f->fn_type != RTN_UNICAST)
364     			continue;
365     
366     		if (next_fi->fib_priority > res->fi->fib_priority)
367     			break;
368     		if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
369     			continue;
370     		f->fn_state |= FN_S_ACCESSED;
371     
372     		if (fi == NULL) {
373     			if (next_fi != res->fi)
374     				break;
375     		} else if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
376     			if (res->fi)
377     				fib_info_put(res->fi);
378     			res->fi = fi;
379     			atomic_inc(&fi->fib_clntref);
380     			fn_hash_last_dflt = order;
381     			goto out;
382     		}
383     		fi = next_fi;
384     		order++;
385     	}
386     
387     	if (order<=0 || fi==NULL) {
388     		fn_hash_last_dflt = -1;
389     		goto out;
390     	}
391     
392     	if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
393     		if (res->fi)
394     			fib_info_put(res->fi);
395     		res->fi = fi;
396     		atomic_inc(&fi->fib_clntref);
397     		fn_hash_last_dflt = order;
398     		goto out;
399     	}
400     
401     	if (last_idx >= 0) {
402     		if (res->fi)
403     			fib_info_put(res->fi);
404     		res->fi = last_resort;
405     		if (last_resort)
406     			atomic_inc(&last_resort->fib_clntref);
407     	}
408     	fn_hash_last_dflt = last_idx;
409     out:
410     	read_unlock(&fib_hash_lock);
411     }
412     
413     #define FIB_SCAN(f, fp) \
414     for ( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
415     
416     #define FIB_SCAN_KEY(f, fp, key) \
417     for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
418     
419     #ifndef CONFIG_IP_ROUTE_TOS
420     #define FIB_SCAN_TOS(f, fp, key, tos) FIB_SCAN_KEY(f, fp, key)
421     #else
422     #define FIB_SCAN_TOS(f, fp, key, tos) \
423     for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)) && \
424          (f)->fn_tos == (tos) ; (fp) = &(f)->fn_next)
425     #endif
426     
427     
428     #ifdef CONFIG_RTNETLINK
429     static void rtmsg_fib(int, struct fib_node*, int, int,
430     		      struct nlmsghdr *n,
431     		      struct netlink_skb_parms *);
432     #else
433     #define rtmsg_fib(a, b, c, d, e, f)
434     #endif
435     
436     
437     static int
438     fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
439     		struct nlmsghdr *n, struct netlink_skb_parms *req)
440     {
441     	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
442     	struct fib_node *new_f, *f, **fp, **del_fp;
443     	struct fn_zone *fz;
444     	struct fib_info *fi;
445     
446     	int z = r->rtm_dst_len;
447     	int type = r->rtm_type;
448     #ifdef CONFIG_IP_ROUTE_TOS
449     	u8 tos = r->rtm_tos;
450     #endif
451     	fn_key_t key;
452     	int err;
453     
454     FTprint("tb(%d)_insert: %d %08x/%d %d %08x\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
455     *(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1,
456     rta->rta_prefsrc ? *(u32*)rta->rta_prefsrc : 0);
457     	if (z > 32)
458     		return -EINVAL;
459     	fz = table->fn_zones[z];
460     	if (!fz && !(fz = fn_new_zone(table, z)))
461     		return -ENOBUFS;
462     
463     	fz_key_0(key);
464     	if (rta->rta_dst) {
465     		u32 dst;
466     		memcpy(&dst, rta->rta_dst, 4);
467     		if (dst & ~FZ_MASK(fz))
468     			return -EINVAL;
469     		key = fz_key(dst, fz);
470     	}
471     
472     	if  ((fi = fib_create_info(r, rta, n, &err)) == NULL)
473     		return err;
474     
475     #ifdef CONFIG_IP_ROUTE_LARGE_TABLES
476     	if (fz->fz_nent > (fz->fz_divisor<<2) &&
477     	    fz->fz_divisor < FZ_MAX_DIVISOR &&
478     	    (z==32 || (1<<z) > fz->fz_divisor))
479     		fn_rehash_zone(fz);
480     #endif
481     
482     	fp = fz_chain_p(key, fz);
483     
484     
485     	/*
486     	 * Scan list to find the first route with the same destination
487     	 */
488     	FIB_SCAN(f, fp) {
489     		if (fn_key_leq(key,f->fn_key))
490     			break;
491     	}
492     
493     #ifdef CONFIG_IP_ROUTE_TOS
494     	/*
495     	 * Find route with the same destination and tos.
496     	 */
497     	FIB_SCAN_KEY(f, fp, key) {
498     		if (f->fn_tos <= tos)
499     			break;
500     	}
501     #endif
502     
503     	del_fp = NULL;
504     
505     	if (f && (f->fn_state&FN_S_ZOMBIE) &&
506     #ifdef CONFIG_IP_ROUTE_TOS
507     	    f->fn_tos == tos &&
508     #endif
509     	    fn_key_eq(f->fn_key, key)) {
510     		del_fp = fp;
511     		fp = &f->fn_next;
512     		f = *fp;
513     		goto create;
514     	}
515     
516     	FIB_SCAN_TOS(f, fp, key, tos) {
517     		if (fi->fib_priority <= FIB_INFO(f)->fib_priority)
518     			break;
519     	}
520     
521     	/* Now f==*fp points to the first node with the same
522     	   keys [prefix,tos,priority], if such key already
523     	   exists or to the node, before which we will insert new one.
524     	 */
525     
526     	if (f && 
527     #ifdef CONFIG_IP_ROUTE_TOS
528     	    f->fn_tos == tos &&
529     #endif
530     	    fn_key_eq(f->fn_key, key) &&
531     	    fi->fib_priority == FIB_INFO(f)->fib_priority) {
532     		struct fib_node **ins_fp;
533     
534     		err = -EEXIST;
535     		if (n->nlmsg_flags&NLM_F_EXCL)
536     			goto out;
537     
538     		if (n->nlmsg_flags&NLM_F_REPLACE) {
539     			del_fp = fp;
540     			fp = &f->fn_next;
541     			f = *fp;
542     			goto replace;
543     		}
544     
545     		ins_fp = fp;
546     		err = -EEXIST;
547     
548     		FIB_SCAN_TOS(f, fp, key, tos) {
549     			if (fi->fib_priority != FIB_INFO(f)->fib_priority)
550     				break;
551     			if (f->fn_type == type && f->fn_scope == r->rtm_scope
552     			    && FIB_INFO(f) == fi)
553     				goto out;
554     		}
555     
556     		if (!(n->nlmsg_flags&NLM_F_APPEND)) {
557     			fp = ins_fp;
558     			f = *fp;
559     		}
560     	}
561     
562     create:
563     	err = -ENOENT;
564     	if (!(n->nlmsg_flags&NLM_F_CREATE))
565     		goto out;
566     
567     replace:
568     	err = -ENOBUFS;
569     	new_f = kmem_cache_alloc(fn_hash_kmem, SLAB_KERNEL);
570     	if (new_f == NULL)
571     		goto out;
572     
573     	memset(new_f, 0, sizeof(struct fib_node));
574     
575     	new_f->fn_key = key;
576     #ifdef CONFIG_IP_ROUTE_TOS
577     	new_f->fn_tos = tos;
578     #endif
579     	new_f->fn_type = type;
580     	new_f->fn_scope = r->rtm_scope;
581     	FIB_INFO(new_f) = fi;
582     
583     	/*
584     	 * Insert new entry to the list.
585     	 */
586     
587     	new_f->fn_next = f;
588     	write_lock_bh(&fib_hash_lock);
589     	*fp = new_f;
590     	write_unlock_bh(&fib_hash_lock);
591     	fz->fz_nent++;
592     
593     	if (del_fp) {
594     		f = *del_fp;
595     		/* Unlink replaced node */
596     		write_lock_bh(&fib_hash_lock);
597     		*del_fp = f->fn_next;
598     		write_unlock_bh(&fib_hash_lock);
599     
600     		if (!(f->fn_state&FN_S_ZOMBIE))
601     			rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
602     		if (f->fn_state&FN_S_ACCESSED)
603     			rt_cache_flush(-1);
604     		fn_free_node(f);
605     		fz->fz_nent--;
606     	} else {
607     		rt_cache_flush(-1);
608     	}
609     	rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->tb_id, n, req);
610     	return 0;
611     
612     out:
613     	fib_release_info(fi);
614     	return err;
615     }
616     
617     
618     static int
619     fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
620     		struct nlmsghdr *n, struct netlink_skb_parms *req)
621     {
622     	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
623     	struct fib_node **fp, **del_fp, *f;
624     	int z = r->rtm_dst_len;
625     	struct fn_zone *fz;
626     	fn_key_t key;
627     	int matched;
628     #ifdef CONFIG_IP_ROUTE_TOS
629     	u8 tos = r->rtm_tos;
630     #endif
631     
632     FTprint("tb(%d)_delete: %d %08x/%d %d\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
633            *(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1);
634     	if (z > 32)
635     		return -EINVAL;
636     	if ((fz  = table->fn_zones[z]) == NULL)
637     		return -ESRCH;
638     
639     	fz_key_0(key);
640     	if (rta->rta_dst) {
641     		u32 dst;
642     		memcpy(&dst, rta->rta_dst, 4);
643     		if (dst & ~FZ_MASK(fz))
644     			return -EINVAL;
645     		key = fz_key(dst, fz);
646     	}
647     
648     	fp = fz_chain_p(key, fz);
649     
650     
651     	FIB_SCAN(f, fp) {
652     		if (fn_key_eq(f->fn_key, key))
653     			break;
654     		if (fn_key_leq(key, f->fn_key)) {
655     			return -ESRCH;
656     		}
657     	}
658     #ifdef CONFIG_IP_ROUTE_TOS
659     	FIB_SCAN_KEY(f, fp, key) {
660     		if (f->fn_tos == tos)
661     			break;
662     	}
663     #endif
664     
665     	matched = 0;
666     	del_fp = NULL;
667     	FIB_SCAN_TOS(f, fp, key, tos) {
668     		struct fib_info * fi = FIB_INFO(f);
669     
670     		if (f->fn_state&FN_S_ZOMBIE) {
671     			return -ESRCH;
672     		}
673     		matched++;
674     
675     		if (del_fp == NULL &&
676     		    (!r->rtm_type || f->fn_type == r->rtm_type) &&
677     		    (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
678     		    (!r->rtm_protocol || fi->fib_protocol == r->rtm_protocol) &&
679     		    fib_nh_match(r, n, rta, fi) == 0)
680     			del_fp = fp;
681     	}
682     
683     	if (del_fp) {
684     		f = *del_fp;
685     		rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
686     
687     		if (matched != 1) {
688     			write_lock_bh(&fib_hash_lock);
689     			*del_fp = f->fn_next;
690     			write_unlock_bh(&fib_hash_lock);
691     
692     			if (f->fn_state&FN_S_ACCESSED)
693     				rt_cache_flush(-1);
694     			fn_free_node(f);
695     			fz->fz_nent--;
696     		} else {
697     			f->fn_state |= FN_S_ZOMBIE;
698     			if (f->fn_state&FN_S_ACCESSED) {
699     				f->fn_state &= ~FN_S_ACCESSED;
700     				rt_cache_flush(-1);
701     			}
702     			if (++fib_hash_zombies > 128)
703     				fib_flush();
704     		}
705     
706     		return 0;
707     	}
708     	return -ESRCH;
709     }
710     
711     extern __inline__ int
712     fn_flush_list(struct fib_node ** fp, int z, struct fn_hash *table)
713     {
714     	int found = 0;
715     	struct fib_node *f;
716     
717     	while ((f = *fp) != NULL) {
718     		struct fib_info *fi = FIB_INFO(f);
719     
720     		if (fi && ((f->fn_state&FN_S_ZOMBIE) || (fi->fib_flags&RTNH_F_DEAD))) {
721     			write_lock_bh(&fib_hash_lock);
722     			*fp = f->fn_next;
723     			write_unlock_bh(&fib_hash_lock);
724     
725     			fn_free_node(f);
726     			found++;
727     			continue;
728     		}
729     		fp = &f->fn_next;
730     	}
731     	return found;
732     }
733     
734     static int fn_hash_flush(struct fib_table *tb)
735     {
736     	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
737     	struct fn_zone *fz;
738     	int found = 0;
739     
740     	fib_hash_zombies = 0;
741     	for (fz = table->fn_zone_list; fz; fz = fz->fz_next) {
742     		int i;
743     		int tmp = 0;
744     		for (i=fz->fz_divisor-1; i>=0; i--)
745     			tmp += fn_flush_list(&fz->fz_hash[i], fz->fz_order, table);
746     		fz->fz_nent -= tmp;
747     		found += tmp;
748     	}
749     	return found;
750     }
751     
752     
753     #ifdef CONFIG_PROC_FS
754     
755     static int fn_hash_get_info(struct fib_table *tb, char *buffer, int first, int count)
756     {
757     	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
758     	struct fn_zone *fz;
759     	int pos = 0;
760     	int n = 0;
761     
762     	read_lock(&fib_hash_lock);
763     	for (fz=table->fn_zone_list; fz; fz = fz->fz_next) {
764     		int i;
765     		struct fib_node *f;
766     		int maxslot = fz->fz_divisor;
767     		struct fib_node **fp = fz->fz_hash;
768     
769     		if (fz->fz_nent == 0)
770     			continue;
771     
772     		if (pos + fz->fz_nent <= first) {
773     			pos += fz->fz_nent;
774     			continue;
775     		}
776     
777     		for (i=0; i < maxslot; i++, fp++) {
778     			for (f = *fp; f; f = f->fn_next) {
779     				if (++pos <= first)
780     					continue;
781     				fib_node_get_info(f->fn_type,
782     						  f->fn_state&FN_S_ZOMBIE,
783     						  FIB_INFO(f),
784     						  fz_prefix(f->fn_key, fz),
785     						  FZ_MASK(fz), buffer);
786     				buffer += 128;
787     				if (++n >= count)
788     					goto out;
789     			}
790     		}
791     	}
792     out:
793     	read_unlock(&fib_hash_lock);
794       	return n;
795     }
796     #endif
797     
798     
799     #ifdef CONFIG_RTNETLINK
800     
801     extern __inline__ int
802     fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
803     		     struct fib_table *tb,
804     		     struct fn_zone *fz,
805     		     struct fib_node *f)
806     {
807     	int i, s_i;
808     
809     	s_i = cb->args[3];
810     	for (i=0; f; i++, f=f->fn_next) {
811     		if (i < s_i) continue;
812     		if (f->fn_state&FN_S_ZOMBIE) continue;
813     		if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
814     				  RTM_NEWROUTE,
815     				  tb->tb_id, (f->fn_state&FN_S_ZOMBIE) ? 0 : f->fn_type, f->fn_scope,
816     				  &f->fn_key, fz->fz_order, f->fn_tos,
817     				  f->fn_info) < 0) {
818     			cb->args[3] = i;
819     			return -1;
820     		}
821     	}
822     	cb->args[3] = i;
823     	return skb->len;
824     }
825     
826     extern __inline__ int
827     fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
828     		   struct fib_table *tb,
829     		   struct fn_zone *fz)
830     {
831     	int h, s_h;
832     
833     	s_h = cb->args[2];
834     	for (h=0; h < fz->fz_divisor; h++) {
835     		if (h < s_h) continue;
836     		if (h > s_h)
837     			memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
838     		if (fz->fz_hash == NULL || fz->fz_hash[h] == NULL)
839     			continue;
840     		if (fn_hash_dump_bucket(skb, cb, tb, fz, fz->fz_hash[h]) < 0) {
841     			cb->args[2] = h;
842     			return -1;
843     		}
844     	}
845     	cb->args[2] = h;
846     	return skb->len;
847     }
848     
849     static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
850     {
851     	int m, s_m;
852     	struct fn_zone *fz;
853     	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
854     
855     	s_m = cb->args[1];
856     	read_lock(&fib_hash_lock);
857     	for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
858     		if (m < s_m) continue;
859     		if (m > s_m)
860     			memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0]));
861     		if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
862     			cb->args[1] = m;
863     			read_unlock(&fib_hash_lock);
864     			return -1;
865     		}
866     	}
867     	read_unlock(&fib_hash_lock);
868     	cb->args[1] = m;
869     	return skb->len;
870     }
871     
872     static void rtmsg_fib(int event, struct fib_node* f, int z, int tb_id,
873     		      struct nlmsghdr *n, struct netlink_skb_parms *req)
874     {
875     	struct sk_buff *skb;
876     	u32 pid = req ? req->pid : 0;
877     	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
878     
879     	skb = alloc_skb(size, GFP_KERNEL);
880     	if (!skb)
881     		return;
882     
883     	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
884     			  f->fn_type, f->fn_scope, &f->fn_key, z, f->fn_tos,
885     			  FIB_INFO(f)) < 0) {
886     		kfree_skb(skb);
887     		return;
888     	}
889     	NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
890     	if (n->nlmsg_flags&NLM_F_ECHO)
891     		atomic_inc(&skb->users);
892     	netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
893     	if (n->nlmsg_flags&NLM_F_ECHO)
894     		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
895     }
896     
897     #endif /* CONFIG_RTNETLINK */
898     
899     #ifdef CONFIG_IP_MULTIPLE_TABLES
900     struct fib_table * fib_hash_init(int id)
901     #else
902     struct fib_table * __init fib_hash_init(int id)
903     #endif
904     {
905     	struct fib_table *tb;
906     
907     	if (fn_hash_kmem == NULL)
908     		fn_hash_kmem = kmem_cache_create("ip_fib_hash",
909     						 sizeof(struct fib_node),
910     						 0, SLAB_HWCACHE_ALIGN,
911     						 NULL, NULL);
912     
913     	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), GFP_KERNEL);
914     	if (tb == NULL)
915     		return NULL;
916     
917     	tb->tb_id = id;
918     	tb->tb_lookup = fn_hash_lookup;
919     	tb->tb_insert = fn_hash_insert;
920     	tb->tb_delete = fn_hash_delete;
921     	tb->tb_flush = fn_hash_flush;
922     	tb->tb_select_default = fn_hash_select_default;
923     #ifdef CONFIG_RTNETLINK
924     	tb->tb_dump = fn_hash_dump;
925     #endif
926     #ifdef CONFIG_PROC_FS
927     	tb->tb_get_info = fn_hash_get_info;
928     #endif
929     	memset(tb->tb_data, 0, sizeof(struct fn_hash));
930     	return tb;
931     }
932