File: /usr/src/linux/net/ipv4/fib_semantics.c

1     /*
2      * INET		An implementation of the TCP/IP protocol suite for the LINUX
3      *		operating system.  INET is implemented using the  BSD Socket
4      *		interface as the means of communication with the user level.
5      *
6      *		IPv4 Forwarding Information Base: semantics.
7      *
8      * Version:	$Id: fib_semantics.c,v 1.17 2000/08/19 23:22:56 davem Exp $
9      *
10      * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11      *
12      *		This program is free software; you can redistribute it and/or
13      *		modify it under the terms of the GNU General Public License
14      *		as published by the Free Software Foundation; either version
15      *		2 of the License, or (at your option) any later version.
16      */
17     
18     #include <linux/config.h>
19     #include <asm/uaccess.h>
20     #include <asm/system.h>
21     #include <asm/bitops.h>
22     #include <linux/types.h>
23     #include <linux/kernel.h>
24     #include <linux/sched.h>
25     #include <linux/mm.h>
26     #include <linux/string.h>
27     #include <linux/socket.h>
28     #include <linux/sockios.h>
29     #include <linux/errno.h>
30     #include <linux/in.h>
31     #include <linux/inet.h>
32     #include <linux/netdevice.h>
33     #include <linux/if_arp.h>
34     #include <linux/proc_fs.h>
35     #include <linux/skbuff.h>
36     #include <linux/netlink.h>
37     #include <linux/init.h>
38     
39     #include <net/ip.h>
40     #include <net/protocol.h>
41     #include <net/route.h>
42     #include <net/tcp.h>
43     #include <net/sock.h>
44     #include <net/ip_fib.h>
45     
46     #define FSprintk(a...)
47     
48     static struct fib_info 	*fib_info_list;
49     static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
50     int fib_info_cnt;
51     
52     #define for_fib_info() { struct fib_info *fi; \
53     	for (fi = fib_info_list; fi; fi = fi->fib_next)
54     
55     #define endfor_fib_info() }
56     
57     #ifdef CONFIG_IP_ROUTE_MULTIPATH
58     
59     #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
60     for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
61     
62     #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
63     for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
64     
65     #else /* CONFIG_IP_ROUTE_MULTIPATH */
66     
67     /* Hope, that gcc will optimize it to get rid of dummy loop */
68     
69     #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
70     for (nhsel=0; nhsel < 1; nhsel++)
71     
72     #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
73     for (nhsel=0; nhsel < 1; nhsel++)
74     
75     #endif /* CONFIG_IP_ROUTE_MULTIPATH */
76     
77     #define endfor_nexthops(fi) }
78     
79     
80     static struct 
81     {
82     	int	error;
83     	u8	scope;
84     } fib_props[RTA_MAX+1] = {
85             { 0, RT_SCOPE_NOWHERE},		/* RTN_UNSPEC */
86     	{ 0, RT_SCOPE_UNIVERSE},	/* RTN_UNICAST */
87     	{ 0, RT_SCOPE_HOST},		/* RTN_LOCAL */
88     	{ 0, RT_SCOPE_LINK},		/* RTN_BROADCAST */
89     	{ 0, RT_SCOPE_LINK},		/* RTN_ANYCAST */
90     	{ 0, RT_SCOPE_UNIVERSE},	/* RTN_MULTICAST */
91     	{ -EINVAL, RT_SCOPE_UNIVERSE},	/* RTN_BLACKHOLE */
92     	{ -EHOSTUNREACH, RT_SCOPE_UNIVERSE},/* RTN_UNREACHABLE */
93     	{ -EACCES, RT_SCOPE_UNIVERSE},	/* RTN_PROHIBIT */
94     	{ -EAGAIN, RT_SCOPE_UNIVERSE},	/* RTN_THROW */
95     #ifdef CONFIG_IP_ROUTE_NAT
96     	{ 0, RT_SCOPE_HOST},		/* RTN_NAT */
97     #else
98     	{ -EINVAL, RT_SCOPE_NOWHERE},	/* RTN_NAT */
99     #endif
100     	{ -EINVAL, RT_SCOPE_NOWHERE}	/* RTN_XRESOLVE */
101     };
102     
103     
104     /* Release a nexthop info record */
105     
106     void free_fib_info(struct fib_info *fi)
107     {
108     	if (fi->fib_dead == 0) {
109     		printk("Freeing alive fib_info %p\n", fi);
110     		return;
111     	}
112     	change_nexthops(fi) {
113     		if (nh->nh_dev)
114     			dev_put(nh->nh_dev);
115     		nh->nh_dev = NULL;
116     	} endfor_nexthops(fi);
117     	fib_info_cnt--;
118     	kfree(fi);
119     }
120     
121     void fib_release_info(struct fib_info *fi)
122     {
123     	write_lock(&fib_info_lock);
124     	if (fi && --fi->fib_treeref == 0) {
125     		if (fi->fib_next)
126     			fi->fib_next->fib_prev = fi->fib_prev;
127     		if (fi->fib_prev)
128     			fi->fib_prev->fib_next = fi->fib_next;
129     		if (fi == fib_info_list)
130     			fib_info_list = fi->fib_next;
131     		fi->fib_dead = 1;
132     		fib_info_put(fi);
133     	}
134     	write_unlock(&fib_info_lock);
135     }
136     
137     extern __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
138     {
139     	const struct fib_nh *onh = ofi->fib_nh;
140     
141     	for_nexthops(fi) {
142     		if (nh->nh_oif != onh->nh_oif ||
143     		    nh->nh_gw  != onh->nh_gw ||
144     		    nh->nh_scope != onh->nh_scope ||
145     #ifdef CONFIG_IP_ROUTE_MULTIPATH
146     		    nh->nh_weight != onh->nh_weight ||
147     #endif
148     #ifdef CONFIG_NET_CLS_ROUTE
149     		    nh->nh_tclassid != onh->nh_tclassid ||
150     #endif
151     		    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
152     			return -1;
153     		onh++;
154     	} endfor_nexthops(fi);
155     	return 0;
156     }
157     
158     extern __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
159     {
160     	for_fib_info() {
161     		if (fi->fib_nhs != nfi->fib_nhs)
162     			continue;
163     		if (nfi->fib_protocol == fi->fib_protocol &&
164     		    nfi->fib_prefsrc == fi->fib_prefsrc &&
165     		    nfi->fib_priority == fi->fib_priority &&
166     		    memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
167     		    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
168     		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
169     			return fi;
170     	} endfor_fib_info();
171     	return NULL;
172     }
173     
174     /* Check, that the gateway is already configured.
175        Used only by redirect accept routine.
176      */
177     
178     int ip_fib_check_default(u32 gw, struct net_device *dev)
179     {
180     	read_lock(&fib_info_lock);
181     	for_fib_info() {
182     		if (fi->fib_flags & RTNH_F_DEAD)
183     			continue;
184     		for_nexthops(fi) {
185     			if (nh->nh_dev == dev && nh->nh_gw == gw &&
186     			    !(nh->nh_flags&RTNH_F_DEAD)) {
187     				read_unlock(&fib_info_lock);
188     				return 0;
189     			}
190     		} endfor_nexthops(fi);
191     	} endfor_fib_info();
192     	read_unlock(&fib_info_lock);
193     	return -1;
194     }
195     
196     #ifdef CONFIG_IP_ROUTE_MULTIPATH
197     
198     static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
199     {
200     	while (RTA_OK(attr,attrlen)) {
201     		if (attr->rta_type == type)
202     			return *(u32*)RTA_DATA(attr);
203     		attr = RTA_NEXT(attr, attrlen);
204     	}
205     	return 0;
206     }
207     
208     static int
209     fib_count_nexthops(struct rtattr *rta)
210     {
211     	int nhs = 0;
212     	struct rtnexthop *nhp = RTA_DATA(rta);
213     	int nhlen = RTA_PAYLOAD(rta);
214     
215     	while (nhlen >= (int)sizeof(struct rtnexthop)) {
216     		if ((nhlen -= nhp->rtnh_len) < 0)
217     			return 0;
218     		nhs++;
219     		nhp = RTNH_NEXT(nhp);
220     	};
221     	return nhs;
222     }
223     
224     static int
225     fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
226     {
227     	struct rtnexthop *nhp = RTA_DATA(rta);
228     	int nhlen = RTA_PAYLOAD(rta);
229     
230     	change_nexthops(fi) {
231     		int attrlen = nhlen - sizeof(struct rtnexthop);
232     		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
233     			return -EINVAL;
234     		nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
235     		nh->nh_oif = nhp->rtnh_ifindex;
236     		nh->nh_weight = nhp->rtnh_hops + 1;
237     		if (attrlen) {
238     			nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
239     #ifdef CONFIG_NET_CLS_ROUTE
240     			nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
241     #endif
242     		}
243     		nhp = RTNH_NEXT(nhp);
244     	} endfor_nexthops(fi);
245     	return 0;
246     }
247     
248     #endif
249     
250     int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
251     		 struct fib_info *fi)
252     {
253     #ifdef CONFIG_IP_ROUTE_MULTIPATH
254     	struct rtnexthop *nhp;
255     	int nhlen;
256     #endif
257     
258     	if (rta->rta_priority &&
259     	    *rta->rta_priority != fi->fib_priority)
260     		return 1;
261     
262     	if (rta->rta_oif || rta->rta_gw) {
263     		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
264     		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
265     			return 0;
266     		return 1;
267     	}
268     
269     #ifdef CONFIG_IP_ROUTE_MULTIPATH
270     	if (rta->rta_mp == NULL)
271     		return 0;
272     	nhp = RTA_DATA(rta->rta_mp);
273     	nhlen = RTA_PAYLOAD(rta->rta_mp);
274     	
275     	for_nexthops(fi) {
276     		int attrlen = nhlen - sizeof(struct rtnexthop);
277     		u32 gw;
278     
279     		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
280     			return -EINVAL;
281     		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
282     			return 1;
283     		if (attrlen) {
284     			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
285     			if (gw && gw != nh->nh_gw)
286     				return 1;
287     #ifdef CONFIG_NET_CLS_ROUTE
288     			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
289     			if (gw && gw != nh->nh_tclassid)
290     				return 1;
291     #endif
292     		}
293     		nhp = RTNH_NEXT(nhp);
294     	} endfor_nexthops(fi);
295     #endif
296     	return 0;
297     }
298     
299     
300     /*
301        Picture
302        -------
303     
304        Semantics of nexthop is very messy by historical reasons.
305        We have to take into account, that:
306        a) gateway can be actually local interface address,
307           so that gatewayed route is direct.
308        b) gateway must be on-link address, possibly
309           described not by an ifaddr, but also by a direct route.
310        c) If both gateway and interface are specified, they should not
311           contradict.
312        d) If we use tunnel routes, gateway could be not on-link.
313     
314        Attempt to reconcile all of these (alas, self-contradictory) conditions
315        results in pretty ugly and hairy code with obscure logic.
316     
317        I choosed to generalized it instead, so that the size
318        of code does not increase practically, but it becomes
319        much more general.
320        Every prefix is assigned a "scope" value: "host" is local address,
321        "link" is direct route,
322        [ ... "site" ... "interior" ... ]
323        and "universe" is true gateway route with global meaning.
324     
325        Every prefix refers to a set of "nexthop"s (gw, oif),
326        where gw must have narrower scope. This recursion stops
327        when gw has LOCAL scope or if "nexthop" is declared ONLINK,
328        which means that gw is forced to be on link.
329     
330        Code is still hairy, but now it is apparently logically
331        consistent and very flexible. F.e. as by-product it allows
332        to co-exists in peace independent exterior and interior
333        routing processes.
334     
335        Normally it looks as following.
336     
337        {universe prefix}  -> (gw, oif) [scope link]
338                               |
339     			  |-> {link prefix} -> (gw, oif) [scope local]
340     			                        |
341     						|-> {local prefix} (terminal node)
342      */
343     
344     static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
345     {
346     	int err;
347     
348     	if (nh->nh_gw) {
349     		struct rt_key key;
350     		struct fib_result res;
351     
352     #ifdef CONFIG_IP_ROUTE_PERVASIVE
353     		if (nh->nh_flags&RTNH_F_PERVASIVE)
354     			return 0;
355     #endif
356     		if (nh->nh_flags&RTNH_F_ONLINK) {
357     			struct net_device *dev;
358     
359     			if (r->rtm_scope >= RT_SCOPE_LINK)
360     				return -EINVAL;
361     			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
362     				return -EINVAL;
363     			if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
364     				return -ENODEV;
365     			if (!(dev->flags&IFF_UP))
366     				return -ENETDOWN;
367     			nh->nh_dev = dev;
368     			atomic_inc(&dev->refcnt);
369     			nh->nh_scope = RT_SCOPE_LINK;
370     			return 0;
371     		}
372     		memset(&key, 0, sizeof(key));
373     		key.dst = nh->nh_gw;
374     		key.oif = nh->nh_oif;
375     		key.scope = r->rtm_scope + 1;
376     
377     		/* It is not necessary, but requires a bit of thinking */
378     		if (key.scope < RT_SCOPE_LINK)
379     			key.scope = RT_SCOPE_LINK;
380     
381     		if ((err = fib_lookup(&key, &res)) != 0)
382     			return err;
383     		nh->nh_scope = res.scope;
384     		nh->nh_oif = FIB_RES_OIF(res);
385     		nh->nh_dev = FIB_RES_DEV(res);
386     		if (nh->nh_dev)
387     			atomic_inc(&nh->nh_dev->refcnt);
388     		fib_res_put(&res);
389     	} else {
390     		struct in_device *in_dev;
391     
392     		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
393     			return -EINVAL;
394     
395     		in_dev = inetdev_by_index(nh->nh_oif);
396     		if (in_dev == NULL)
397     			return -ENODEV;
398     		if (!(in_dev->dev->flags&IFF_UP)) {
399     			in_dev_put(in_dev);
400     			return -ENETDOWN;
401     		}
402     		nh->nh_dev = in_dev->dev;
403     		atomic_inc(&nh->nh_dev->refcnt);
404     		nh->nh_scope = RT_SCOPE_HOST;
405     		in_dev_put(in_dev);
406     	}
407     	return 0;
408     }
409     
410     struct fib_info *
411     fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
412     		const struct nlmsghdr *nlh, int *errp)
413     {
414     	int err;
415     	struct fib_info *fi = NULL;
416     	struct fib_info *ofi;
417     #ifdef CONFIG_IP_ROUTE_MULTIPATH
418     	int nhs = 1;
419     #else
420     	const int nhs = 1;
421     #endif
422     
423     	/* Fast check to catch the most weird cases */
424     	if (fib_props[r->rtm_type].scope > r->rtm_scope)
425     		goto err_inval;
426     
427     #ifdef CONFIG_IP_ROUTE_MULTIPATH
428     	if (rta->rta_mp) {
429     		nhs = fib_count_nexthops(rta->rta_mp);
430     		if (nhs == 0)
431     			goto err_inval;
432     	}
433     #endif
434     
435     	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
436     	err = -ENOBUFS;
437     	if (fi == NULL)
438     		goto failure;
439     	fib_info_cnt++;
440     	memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
441     
442     	fi->fib_protocol = r->rtm_protocol;
443     	fi->fib_nhs = nhs;
444     	fi->fib_flags = r->rtm_flags;
445     	if (rta->rta_priority)
446     		fi->fib_priority = *rta->rta_priority;
447     	if (rta->rta_mx) {
448     		int attrlen = RTA_PAYLOAD(rta->rta_mx);
449     		struct rtattr *attr = RTA_DATA(rta->rta_mx);
450     
451     		while (RTA_OK(attr, attrlen)) {
452     			unsigned flavor = attr->rta_type;
453     			if (flavor) {
454     				if (flavor > RTAX_MAX)
455     					goto err_inval;
456     				fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
457     			}
458     			attr = RTA_NEXT(attr, attrlen);
459     		}
460     	}
461     	if (rta->rta_prefsrc)
462     		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
463     
464     	if (rta->rta_mp) {
465     #ifdef CONFIG_IP_ROUTE_MULTIPATH
466     		if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
467     			goto failure;
468     		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
469     			goto err_inval;
470     		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
471     			goto err_inval;
472     #ifdef CONFIG_NET_CLS_ROUTE
473     		if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
474     			goto err_inval;
475     #endif
476     #else
477     		goto err_inval;
478     #endif
479     	} else {
480     		struct fib_nh *nh = fi->fib_nh;
481     		if (rta->rta_oif)
482     			nh->nh_oif = *rta->rta_oif;
483     		if (rta->rta_gw)
484     			memcpy(&nh->nh_gw, rta->rta_gw, 4);
485     #ifdef CONFIG_NET_CLS_ROUTE
486     		if (rta->rta_flow)
487     			memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
488     #endif
489     		nh->nh_flags = r->rtm_flags;
490     #ifdef CONFIG_IP_ROUTE_MULTIPATH
491     		nh->nh_weight = 1;
492     #endif
493     	}
494     
495     #ifdef CONFIG_IP_ROUTE_NAT
496     	if (r->rtm_type == RTN_NAT) {
497     		if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
498     			goto err_inval;
499     		memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4);
500     		goto link_it;
501     	}
502     #endif
503     
504     	if (fib_props[r->rtm_type].error) {
505     		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
506     			goto err_inval;
507     		goto link_it;
508     	}
509     
510     	if (r->rtm_scope > RT_SCOPE_HOST)
511     		goto err_inval;
512     
513     	if (r->rtm_scope == RT_SCOPE_HOST) {
514     		struct fib_nh *nh = fi->fib_nh;
515     
516     		/* Local address is added. */
517     		if (nhs != 1 || nh->nh_gw)
518     			goto err_inval;
519     		nh->nh_scope = RT_SCOPE_NOWHERE;
520     		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
521     		err = -ENODEV;
522     		if (nh->nh_dev == NULL)
523     			goto failure;
524     	} else {
525     		change_nexthops(fi) {
526     			if ((err = fib_check_nh(r, fi, nh)) != 0)
527     				goto failure;
528     		} endfor_nexthops(fi)
529     	}
530     
531     	if (fi->fib_prefsrc) {
532     		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
533     		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
534     			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
535     				goto err_inval;
536     	}
537     
538     link_it:
539     	if ((ofi = fib_find_info(fi)) != NULL) {
540     		fi->fib_dead = 1;
541     		free_fib_info(fi);
542     		ofi->fib_treeref++;
543     		return ofi;
544     	}
545     
546     	fi->fib_treeref++;
547     	atomic_inc(&fi->fib_clntref);
548     	write_lock(&fib_info_lock);
549     	fi->fib_next = fib_info_list;
550     	fi->fib_prev = NULL;
551     	if (fib_info_list)
552     		fib_info_list->fib_prev = fi;
553     	fib_info_list = fi;
554     	write_unlock(&fib_info_lock);
555     	return fi;
556     
557     err_inval:
558     	err = -EINVAL;
559     
560     failure:
561             *errp = err;
562             if (fi) {
563     		fi->fib_dead = 1;
564     		free_fib_info(fi);
565     	}
566     	return NULL;
567     }
568     
569     int 
570     fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res)
571     {
572     	int err = fib_props[type].error;
573     
574     	if (err == 0) {
575     		if (fi->fib_flags&RTNH_F_DEAD)
576     			return 1;
577     
578     		res->fi = fi;
579     
580     		switch (type) {
581     #ifdef CONFIG_IP_ROUTE_NAT
582     		case RTN_NAT:
583     			FIB_RES_RESET(*res);
584     			atomic_inc(&fi->fib_clntref);
585     			return 0;
586     #endif
587     		case RTN_UNICAST:
588     		case RTN_LOCAL:
589     		case RTN_BROADCAST:
590     		case RTN_ANYCAST:
591     		case RTN_MULTICAST:
592     			for_nexthops(fi) {
593     				if (nh->nh_flags&RTNH_F_DEAD)
594     					continue;
595     				if (!key->oif || key->oif == nh->nh_oif)
596     					break;
597     			}
598     #ifdef CONFIG_IP_ROUTE_MULTIPATH
599     			if (nhsel < fi->fib_nhs) {
600     				res->nh_sel = nhsel;
601     				atomic_inc(&fi->fib_clntref);
602     				return 0;
603     			}
604     #else
605     			if (nhsel < 1) {
606     				atomic_inc(&fi->fib_clntref);
607     				return 0;
608     			}
609     #endif
610     			endfor_nexthops(fi);
611     			res->fi = NULL;
612     			return 1;
613     		default:
614     			res->fi = NULL;
615     			printk(KERN_DEBUG "impossible 102\n");
616     			return -EINVAL;
617     		}
618     	}
619     	return err;
620     }
621     
622     /* Find appropriate source address to this destination */
623     
624     u32 __fib_res_prefsrc(struct fib_result *res)
625     {
626     	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
627     }
628     
629     #ifdef CONFIG_RTNETLINK
630     
631     int
632     fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
633     	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
634     	      struct fib_info *fi)
635     {
636     	struct rtmsg *rtm;
637     	struct nlmsghdr  *nlh;
638     	unsigned char	 *b = skb->tail;
639     
640     	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
641     	rtm = NLMSG_DATA(nlh);
642     	rtm->rtm_family = AF_INET;
643     	rtm->rtm_dst_len = dst_len;
644     	rtm->rtm_src_len = 0;
645     	rtm->rtm_tos = tos;
646     	rtm->rtm_table = tb_id;
647     	rtm->rtm_type = type;
648     	rtm->rtm_flags = fi->fib_flags;
649     	rtm->rtm_scope = scope;
650     	if (rtm->rtm_dst_len)
651     		RTA_PUT(skb, RTA_DST, 4, dst);
652     	rtm->rtm_protocol = fi->fib_protocol;
653     	if (fi->fib_priority)
654     		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
655     #ifdef CONFIG_NET_CLS_ROUTE
656     	if (fi->fib_nh[0].nh_tclassid)
657     		RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
658     #endif
659     	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
660     		goto rtattr_failure;
661     	if (fi->fib_prefsrc)
662     		RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
663     	if (fi->fib_nhs == 1) {
664     		if (fi->fib_nh->nh_gw)
665     			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
666     		if (fi->fib_nh->nh_oif)
667     			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
668     	}
669     #ifdef CONFIG_IP_ROUTE_MULTIPATH
670     	if (fi->fib_nhs > 1) {
671     		struct rtnexthop *nhp;
672     		struct rtattr *mp_head;
673     		if (skb_tailroom(skb) <= RTA_SPACE(0))
674     			goto rtattr_failure;
675     		mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
676     
677     		for_nexthops(fi) {
678     			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
679     				goto rtattr_failure;
680     			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
681     			nhp->rtnh_flags = nh->nh_flags & 0xFF;
682     			nhp->rtnh_hops = nh->nh_weight-1;
683     			nhp->rtnh_ifindex = nh->nh_oif;
684     			if (nh->nh_gw)
685     				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
686     			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
687     		} endfor_nexthops(fi);
688     		mp_head->rta_type = RTA_MULTIPATH;
689     		mp_head->rta_len = skb->tail - (u8*)mp_head;
690     	}
691     #endif
692     	nlh->nlmsg_len = skb->tail - b;
693     	return skb->len;
694     
695     nlmsg_failure:
696     rtattr_failure:
697     	skb_trim(skb, b - skb->data);
698     	return -1;
699     }
700     
701     #endif /* CONFIG_RTNETLINK */
702     
703     #ifndef CONFIG_IP_NOSIOCRT
704     
705     int
706     fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
707     		    struct kern_rta *rta, struct rtentry *r)
708     {
709     	int    plen;
710     	u32    *ptr;
711     
712     	memset(rtm, 0, sizeof(*rtm));
713     	memset(rta, 0, sizeof(*rta));
714     
715     	if (r->rt_dst.sa_family != AF_INET)
716     		return -EAFNOSUPPORT;
717     
718     	/* Check mask for validity:
719     	   a) it must be contiguous.
720     	   b) destination must have all host bits clear.
721     	   c) if application forgot to set correct family (AF_INET),
722     	      reject request unless it is absolutely clear i.e.
723     	      both family and mask are zero.
724     	 */
725     	plen = 32;
726     	ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
727     	if (!(r->rt_flags&RTF_HOST)) {
728     		u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
729     		if (r->rt_genmask.sa_family != AF_INET) {
730     			if (mask || r->rt_genmask.sa_family)
731     				return -EAFNOSUPPORT;
732     		}
733     		if (bad_mask(mask, *ptr))
734     			return -EINVAL;
735     		plen = inet_mask_len(mask);
736     	}
737     
738     	nl->nlmsg_flags = NLM_F_REQUEST;
739     	nl->nlmsg_pid = 0;
740     	nl->nlmsg_seq = 0;
741     	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
742     	if (cmd == SIOCDELRT) {
743     		nl->nlmsg_type = RTM_DELROUTE;
744     		nl->nlmsg_flags = 0;
745     	} else {
746     		nl->nlmsg_type = RTM_NEWROUTE;
747     		nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
748     		rtm->rtm_protocol = RTPROT_BOOT;
749     	}
750     
751     	rtm->rtm_dst_len = plen;
752     	rta->rta_dst = ptr;
753     
754     	if (r->rt_metric) {
755     		*(u32*)&r->rt_pad3 = r->rt_metric - 1;
756     		rta->rta_priority = (u32*)&r->rt_pad3;
757     	}
758     	if (r->rt_flags&RTF_REJECT) {
759     		rtm->rtm_scope = RT_SCOPE_HOST;
760     		rtm->rtm_type = RTN_UNREACHABLE;
761     		return 0;
762     	}
763     	rtm->rtm_scope = RT_SCOPE_NOWHERE;
764     	rtm->rtm_type = RTN_UNICAST;
765     
766     	if (r->rt_dev) {
767     		char *colon;
768     		struct net_device *dev;
769     		char   devname[IFNAMSIZ];
770     
771     		if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
772     			return -EFAULT;
773     		devname[IFNAMSIZ-1] = 0;
774     		colon = strchr(devname, ':');
775     		if (colon)
776     			*colon = 0;
777     		dev = __dev_get_by_name(devname);
778     		if (!dev)
779     			return -ENODEV;
780     		rta->rta_oif = &dev->ifindex;
781     		if (colon) {
782     			struct in_ifaddr *ifa;
783     			struct in_device *in_dev = __in_dev_get(dev);
784     			if (!in_dev)
785     				return -ENODEV;
786     			*colon = ':';
787     			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
788     				if (strcmp(ifa->ifa_label, devname) == 0)
789     					break;
790     			if (ifa == NULL)
791     				return -ENODEV;
792     			rta->rta_prefsrc = &ifa->ifa_local;
793     		}
794     	}
795     
796     	ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
797     	if (r->rt_gateway.sa_family == AF_INET && *ptr) {
798     		rta->rta_gw = ptr;
799     		if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
800     			rtm->rtm_scope = RT_SCOPE_UNIVERSE;
801     	}
802     
803     	if (cmd == SIOCDELRT)
804     		return 0;
805     
806     	if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
807     		return -EINVAL;
808     
809     	if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
810     		rtm->rtm_scope = RT_SCOPE_LINK;
811     
812     	if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
813     		struct rtattr *rec;
814     		struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
815     		if (mx == NULL)
816     			return -ENOMEM;
817     		rta->rta_mx = mx;
818     		mx->rta_type = RTA_METRICS;
819     		mx->rta_len  = RTA_LENGTH(0);
820     		if (r->rt_flags&RTF_MTU) {
821     			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
822     			rec->rta_type = RTAX_ADVMSS;
823     			rec->rta_len = RTA_LENGTH(4);
824     			mx->rta_len += RTA_LENGTH(4);
825     			*(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
826     		}
827     		if (r->rt_flags&RTF_WINDOW) {
828     			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
829     			rec->rta_type = RTAX_WINDOW;
830     			rec->rta_len = RTA_LENGTH(4);
831     			mx->rta_len += RTA_LENGTH(4);
832     			*(u32*)RTA_DATA(rec) = r->rt_window;
833     		}
834     		if (r->rt_flags&RTF_IRTT) {
835     			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
836     			rec->rta_type = RTAX_RTT;
837     			rec->rta_len = RTA_LENGTH(4);
838     			mx->rta_len += RTA_LENGTH(4);
839     			*(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
840     		}
841     	}
842     	return 0;
843     }
844     
845     #endif
846     
847     /*
848        Update FIB if:
849        - local address disappeared -> we must delete all the entries
850          referring to it.
851        - device went down -> we must shutdown all nexthops going via it.
852      */
853     
854     int fib_sync_down(u32 local, struct net_device *dev, int force)
855     {
856     	int ret = 0;
857     	int scope = RT_SCOPE_NOWHERE;
858     	
859     	if (force)
860     		scope = -1;
861     
862     	for_fib_info() {
863     		if (local && fi->fib_prefsrc == local) {
864     			fi->fib_flags |= RTNH_F_DEAD;
865     			ret++;
866     		} else if (dev && fi->fib_nhs) {
867     			int dead = 0;
868     
869     			change_nexthops(fi) {
870     				if (nh->nh_flags&RTNH_F_DEAD)
871     					dead++;
872     				else if (nh->nh_dev == dev &&
873     					 nh->nh_scope != scope) {
874     					nh->nh_flags |= RTNH_F_DEAD;
875     #ifdef CONFIG_IP_ROUTE_MULTIPATH
876     					fi->fib_power -= nh->nh_power;
877     					nh->nh_power = 0;
878     #endif
879     					dead++;
880     				}
881     			} endfor_nexthops(fi)
882     			if (dead == fi->fib_nhs) {
883     				fi->fib_flags |= RTNH_F_DEAD;
884     				ret++;
885     			}
886     		}
887     	} endfor_fib_info();
888     	return ret;
889     }
890     
891     #ifdef CONFIG_IP_ROUTE_MULTIPATH
892     
893     /*
894        Dead device goes up. We wake up dead nexthops.
895        It takes sense only on multipath routes.
896      */
897     
898     int fib_sync_up(struct net_device *dev)
899     {
900     	int ret = 0;
901     
902     	if (!(dev->flags&IFF_UP))
903     		return 0;
904     
905     	for_fib_info() {
906     		int alive = 0;
907     
908     		change_nexthops(fi) {
909     			if (!(nh->nh_flags&RTNH_F_DEAD)) {
910     				alive++;
911     				continue;
912     			}
913     			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
914     				continue;
915     			if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
916     				continue;
917     			alive++;
918     			nh->nh_power = 0;
919     			nh->nh_flags &= ~RTNH_F_DEAD;
920     		} endfor_nexthops(fi)
921     
922     		if (alive > 0) {
923     			fi->fib_flags &= ~RTNH_F_DEAD;
924     			ret++;
925     		}
926     	} endfor_fib_info();
927     	return ret;
928     }
929     
930     /*
931        The algorithm is suboptimal, but it provides really
932        fair weighted route distribution.
933      */
934     
935     void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
936     {
937     	struct fib_info *fi = res->fi;
938     	int w;
939     
940     	if (fi->fib_power <= 0) {
941     		int power = 0;
942     		change_nexthops(fi) {
943     			if (!(nh->nh_flags&RTNH_F_DEAD)) {
944     				power += nh->nh_weight;
945     				nh->nh_power = nh->nh_weight;
946     			}
947     		} endfor_nexthops(fi);
948     		fi->fib_power = power;
949     #if 1
950     		if (power <= 0) {
951     			printk(KERN_CRIT "impossible 777\n");
952     			return;
953     		}
954     #endif
955     	}
956     
957     
958     	/* w should be random number [0..fi->fib_power-1],
959     	   it is pretty bad approximation.
960     	 */
961     
962     	w = jiffies % fi->fib_power;
963     
964     	change_nexthops(fi) {
965     		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
966     			if ((w -= nh->nh_power) <= 0) {
967     				nh->nh_power--;
968     				fi->fib_power--;
969     				res->nh_sel = nhsel;
970     				return;
971     			}
972     		}
973     	} endfor_nexthops(fi);
974     
975     #if 1
976     	printk(KERN_CRIT "impossible 888\n");
977     #endif
978     	return;
979     }
980     #endif
981     
982     
983     #ifdef CONFIG_PROC_FS
984     
985     static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi)
986     {
987     	static unsigned type2flags[RTN_MAX+1] = {
988     		0, 0, 0, 0, 0, 0, 0, RTF_REJECT, RTF_REJECT, 0, 0, 0
989     	};
990     	unsigned flags = type2flags[type];
991     
992     	if (fi && fi->fib_nh->nh_gw)
993     		flags |= RTF_GATEWAY;
994     	if (mask == 0xFFFFFFFF)
995     		flags |= RTF_HOST;
996     	if (!dead)
997     		flags |= RTF_UP;
998     	return flags;
999     }
1000     
1001     void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32 mask, char *buffer)
1002     {
1003     	int len;
1004     	unsigned flags = fib_flag_trans(type, dead, mask, fi);
1005     
1006     	if (fi) {
1007     		len = sprintf(buffer, "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1008     			      fi->fib_dev ? fi->fib_dev->name : "*", prefix,
1009     			      fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
1010     			      mask, fi->fib_advmss+40, fi->fib_window, fi->fib_rtt>>3);
1011     	} else {
1012     		len = sprintf(buffer, "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1013     			      prefix, 0,
1014     			      flags, 0, 0, 0,
1015     			      mask, 0, 0, 0);
1016     	}
1017     	memset(buffer+len, ' ', 127-len);
1018     	buffer[127] = '\n';
1019     }
1020     
1021     #endif
1022