File: /usr/src/linux/net/ipv4/fib_frontend.c

1     /*
2      * INET		An implementation of the TCP/IP protocol suite for the LINUX
3      *		operating system.  INET is implemented using the  BSD Socket
4      *		interface as the means of communication with the user level.
5      *
6      *		IPv4 Forwarding Information Base: FIB frontend.
7      *
8      * Version:	$Id: fib_frontend.c,v 1.25 2001/05/29 22:16:25 davem Exp $
9      *
10      * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11      *
12      *		This program is free software; you can redistribute it and/or
13      *		modify it under the terms of the GNU General Public License
14      *		as published by the Free Software Foundation; either version
15      *		2 of the License, or (at your option) any later version.
16      */
17     
18     #include <linux/config.h>
19     #include <asm/uaccess.h>
20     #include <asm/system.h>
21     #include <asm/bitops.h>
22     #include <linux/types.h>
23     #include <linux/kernel.h>
24     #include <linux/sched.h>
25     #include <linux/mm.h>
26     #include <linux/string.h>
27     #include <linux/socket.h>
28     #include <linux/sockios.h>
29     #include <linux/errno.h>
30     #include <linux/in.h>
31     #include <linux/inet.h>
32     #include <linux/netdevice.h>
33     #include <linux/if_arp.h>
34     #include <linux/proc_fs.h>
35     #include <linux/skbuff.h>
36     #include <linux/netlink.h>
37     #include <linux/init.h>
38     
39     #include <net/ip.h>
40     #include <net/protocol.h>
41     #include <net/route.h>
42     #include <net/tcp.h>
43     #include <net/sock.h>
44     #include <net/icmp.h>
45     #include <net/arp.h>
46     #include <net/ip_fib.h>
47     
48     #define FFprint(a...) printk(KERN_DEBUG a)
49     
50     #ifndef CONFIG_IP_MULTIPLE_TABLES
51     
52     #define RT_TABLE_MIN RT_TABLE_MAIN
53     
54     struct fib_table *local_table;
55     struct fib_table *main_table;
56     
57     #else
58     
59     #define RT_TABLE_MIN 1
60     
61     struct fib_table *fib_tables[RT_TABLE_MAX+1];
62     
63     struct fib_table *__fib_new_table(int id)
64     {
65     	struct fib_table *tb;
66     
67     	tb = fib_hash_init(id);
68     	if (!tb)
69     		return NULL;
70     	fib_tables[id] = tb;
71     	return tb;
72     }
73     
74     
75     #endif /* CONFIG_IP_MULTIPLE_TABLES */
76     
77     
78     void fib_flush(void)
79     {
80     	int flushed = 0;
81     #ifdef CONFIG_IP_MULTIPLE_TABLES
82     	struct fib_table *tb;
83     	int id;
84     
85     	for (id = RT_TABLE_MAX; id>0; id--) {
86     		if ((tb = fib_get_table(id))==NULL)
87     			continue;
88     		flushed += tb->tb_flush(tb);
89     	}
90     #else /* CONFIG_IP_MULTIPLE_TABLES */
91     	flushed += main_table->tb_flush(main_table);
92     	flushed += local_table->tb_flush(local_table);
93     #endif /* CONFIG_IP_MULTIPLE_TABLES */
94     
95     	if (flushed)
96     		rt_cache_flush(-1);
97     }
98     
99     
100     #ifdef CONFIG_PROC_FS
101     
102     /* 
103      *	Called from the PROCfs module. This outputs /proc/net/route.
104      *
105      *	It always works in backward compatibility mode.
106      *	The format of the file is not supposed to be changed.
107      */
108      
109     static int
110     fib_get_procinfo(char *buffer, char **start, off_t offset, int length)
111     {
112     	int first = offset/128;
113     	char *ptr = buffer;
114     	int count = (length+127)/128;
115     	int len;
116     
117     	*start = buffer + offset%128;
118     	
119     	if (--first < 0) {
120     		sprintf(buffer, "%-127s\n", "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
121     		--count;
122     		ptr += 128;
123     		first = 0;
124       	}
125     
126     	if (main_table && count > 0) {
127     		int n = main_table->tb_get_info(main_table, ptr, first, count);
128     		count -= n;
129     		ptr += n*128;
130     	}
131     	len = ptr - *start;
132     	if (len >= length)
133     		return length;
134     	if (len >= 0)
135     		return len;
136     	return 0;
137     }
138     
139     #endif /* CONFIG_PROC_FS */
140     
141     /*
142      *	Find the first device with a given source address.
143      */
144     
145     struct net_device * ip_dev_find(u32 addr)
146     {
147     	struct rt_key key;
148     	struct fib_result res;
149     	struct net_device *dev = NULL;
150     
151     	memset(&key, 0, sizeof(key));
152     	key.dst = addr;
153     #ifdef CONFIG_IP_MULTIPLE_TABLES
154     	res.r = NULL;
155     #endif
156     
157     	if (!local_table || local_table->tb_lookup(local_table, &key, &res)) {
158     		return NULL;
159     	}
160     	if (res.type != RTN_LOCAL)
161     		goto out;
162     	dev = FIB_RES_DEV(res);
163     	if (dev)
164     		atomic_inc(&dev->refcnt);
165     
166     out:
167     	fib_res_put(&res);
168     	return dev;
169     }
170     
171     unsigned inet_addr_type(u32 addr)
172     {
173     	struct rt_key		key;
174     	struct fib_result	res;
175     	unsigned ret = RTN_BROADCAST;
176     
177     	if (ZERONET(addr) || BADCLASS(addr))
178     		return RTN_BROADCAST;
179     	if (MULTICAST(addr))
180     		return RTN_MULTICAST;
181     
182     	memset(&key, 0, sizeof(key));
183     	key.dst = addr;
184     #ifdef CONFIG_IP_MULTIPLE_TABLES
185     	res.r = NULL;
186     #endif
187     	
188     	if (local_table) {
189     		ret = RTN_UNICAST;
190     		if (local_table->tb_lookup(local_table, &key, &res) == 0) {
191     			ret = res.type;
192     			fib_res_put(&res);
193     		}
194     	}
195     	return ret;
196     }
197     
198     /* Given (packet source, input interface) and optional (dst, oif, tos):
199        - (main) check, that source is valid i.e. not broadcast or our local
200          address.
201        - figure out what "logical" interface this packet arrived
202          and calculate "specific destination" address.
203        - check, that packet arrived from expected physical interface.
204      */
205     
206     int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
207     			struct net_device *dev, u32 *spec_dst, u32 *itag)
208     {
209     	struct in_device *in_dev;
210     	struct rt_key key;
211     	struct fib_result res;
212     	int no_addr, rpf;
213     	int ret;
214     
215     	key.dst = src;
216     	key.src = dst;
217     	key.tos = tos;
218     	key.oif = 0;
219     	key.iif = oif;
220     	key.scope = RT_SCOPE_UNIVERSE;
221     
222     	no_addr = rpf = 0;
223     	read_lock(&inetdev_lock);
224     	in_dev = __in_dev_get(dev);
225     	if (in_dev) {
226     		no_addr = in_dev->ifa_list == NULL;
227     		rpf = IN_DEV_RPFILTER(in_dev);
228     	}
229     	read_unlock(&inetdev_lock);
230     
231     	if (in_dev == NULL)
232     		goto e_inval;
233     
234     	if (fib_lookup(&key, &res))
235     		goto last_resort;
236     	if (res.type != RTN_UNICAST)
237     		goto e_inval_res;
238     	*spec_dst = FIB_RES_PREFSRC(res);
239     	fib_combine_itag(itag, &res);
240     #ifdef CONFIG_IP_ROUTE_MULTIPATH
241     	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
242     #else
243     	if (FIB_RES_DEV(res) == dev)
244     #endif
245     	{
246     		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
247     		fib_res_put(&res);
248     		return ret;
249     	}
250     	fib_res_put(&res);
251     	if (no_addr)
252     		goto last_resort;
253     	if (rpf)
254     		goto e_inval;
255     	key.oif = dev->ifindex;
256     
257     	ret = 0;
258     	if (fib_lookup(&key, &res) == 0) {
259     		if (res.type == RTN_UNICAST) {
260     			*spec_dst = FIB_RES_PREFSRC(res);
261     			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
262     		}
263     		fib_res_put(&res);
264     	}
265     	return ret;
266     
267     last_resort:
268     	if (rpf)
269     		goto e_inval;
270     	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
271     	*itag = 0;
272     	return 0;
273     
274     e_inval_res:
275     	fib_res_put(&res);
276     e_inval:
277     	return -EINVAL;
278     }
279     
280     #ifndef CONFIG_IP_NOSIOCRT
281     
282     /*
283      *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
284      */
285      
286     int ip_rt_ioctl(unsigned int cmd, void *arg)
287     {
288     	int err;
289     	struct kern_rta rta;
290     	struct rtentry  r;
291     	struct {
292     		struct nlmsghdr nlh;
293     		struct rtmsg	rtm;
294     	} req;
295     
296     	switch (cmd) {
297     	case SIOCADDRT:		/* Add a route */
298     	case SIOCDELRT:		/* Delete a route */
299     		if (!capable(CAP_NET_ADMIN))
300     			return -EPERM;
301     		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
302     			return -EFAULT;
303     		rtnl_lock();
304     		err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
305     		if (err == 0) {
306     			if (cmd == SIOCDELRT) {
307     				struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
308     				err = -ESRCH;
309     				if (tb)
310     					err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
311     			} else {
312     				struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
313     				err = -ENOBUFS;
314     				if (tb)
315     					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
316     			}
317     			if (rta.rta_mx)
318     				kfree(rta.rta_mx);
319     		}
320     		rtnl_unlock();
321     		return err;
322     	}
323     	return -EINVAL;
324     }
325     
326     #else
327     
328     int ip_rt_ioctl(unsigned int cmd, void *arg)
329     {
330     	return -EINVAL;
331     }
332     
333     #endif
334     
335     #ifdef CONFIG_RTNETLINK
336     
337     static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
338     {
339     	int i;
340     
341     	for (i=1; i<=RTA_MAX; i++) {
342     		struct rtattr *attr = rta[i-1];
343     		if (attr) {
344     			if (RTA_PAYLOAD(attr) < 4)
345     				return -EINVAL;
346     			if (i != RTA_MULTIPATH && i != RTA_METRICS)
347     				rta[i-1] = (struct rtattr*)RTA_DATA(attr);
348     		}
349     	}
350     	return 0;
351     }
352     
353     int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
354     {
355     	struct fib_table * tb;
356     	struct rtattr **rta = arg;
357     	struct rtmsg *r = NLMSG_DATA(nlh);
358     
359     	if (inet_check_attr(r, rta))
360     		return -EINVAL;
361     
362     	tb = fib_get_table(r->rtm_table);
363     	if (tb)
364     		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
365     	return -ESRCH;
366     }
367     
368     int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
369     {
370     	struct fib_table * tb;
371     	struct rtattr **rta = arg;
372     	struct rtmsg *r = NLMSG_DATA(nlh);
373     
374     	if (inet_check_attr(r, rta))
375     		return -EINVAL;
376     
377     	tb = fib_new_table(r->rtm_table);
378     	if (tb)
379     		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
380     	return -ENOBUFS;
381     }
382     
383     int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
384     {
385     	int t;
386     	int s_t;
387     	struct fib_table *tb;
388     
389     	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
390     	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
391     		return ip_rt_dump(skb, cb);
392     
393     	s_t = cb->args[0];
394     	if (s_t == 0)
395     		s_t = cb->args[0] = RT_TABLE_MIN;
396     
397     	for (t=s_t; t<=RT_TABLE_MAX; t++) {
398     		if (t < s_t) continue;
399     		if (t > s_t)
400     			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
401     		if ((tb = fib_get_table(t))==NULL)
402     			continue;
403     		if (tb->tb_dump(tb, skb, cb) < 0) 
404     			break;
405     	}
406     
407     	cb->args[0] = t;
408     
409     	return skb->len;
410     }
411     
412     #endif
413     
414     /* Prepare and feed intra-kernel routing request.
415        Really, it should be netlink message, but :-( netlink
416        can be not configured, so that we feed it directly
417        to fib engine. It is legal, because all events occur
418        only when netlink is already locked.
419      */
420     
421     static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
422     {
423     	struct fib_table * tb;
424     	struct {
425     		struct nlmsghdr	nlh;
426     		struct rtmsg	rtm;
427     	} req;
428     	struct kern_rta rta;
429     
430     	memset(&req.rtm, 0, sizeof(req.rtm));
431     	memset(&rta, 0, sizeof(rta));
432     
433     	if (type == RTN_UNICAST)
434     		tb = fib_new_table(RT_TABLE_MAIN);
435     	else
436     		tb = fib_new_table(RT_TABLE_LOCAL);
437     
438     	if (tb == NULL)
439     		return;
440     
441     	req.nlh.nlmsg_len = sizeof(req);
442     	req.nlh.nlmsg_type = cmd;
443     	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
444     	req.nlh.nlmsg_pid = 0;
445     	req.nlh.nlmsg_seq = 0;
446     
447     	req.rtm.rtm_dst_len = dst_len;
448     	req.rtm.rtm_table = tb->tb_id;
449     	req.rtm.rtm_protocol = RTPROT_KERNEL;
450     	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
451     	req.rtm.rtm_type = type;
452     
453     	rta.rta_dst = &dst;
454     	rta.rta_prefsrc = &ifa->ifa_local;
455     	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
456     
457     	if (cmd == RTM_NEWROUTE)
458     		tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
459     	else
460     		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
461     }
462     
463     static void fib_add_ifaddr(struct in_ifaddr *ifa)
464     {
465     	struct in_device *in_dev = ifa->ifa_dev;
466     	struct net_device *dev = in_dev->dev;
467     	struct in_ifaddr *prim = ifa;
468     	u32 mask = ifa->ifa_mask;
469     	u32 addr = ifa->ifa_local;
470     	u32 prefix = ifa->ifa_address&mask;
471     
472     	if (ifa->ifa_flags&IFA_F_SECONDARY) {
473     		prim = inet_ifa_byprefix(in_dev, prefix, mask);
474     		if (prim == NULL) {
475     			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
476     			return;
477     		}
478     	}
479     
480     	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
481     
482     	if (!(dev->flags&IFF_UP))
483     		return;
484     
485     	/* Add broadcast address, if it is explicitly assigned. */
486     	if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
487     		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
488     
489     	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
490     	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
491     		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
492     			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
493     
494     		/* Add network specific broadcasts, when it takes a sense */
495     		if (ifa->ifa_prefixlen < 31) {
496     			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
497     			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
498     		}
499     	}
500     }
501     
502     static void fib_del_ifaddr(struct in_ifaddr *ifa)
503     {
504     	struct in_device *in_dev = ifa->ifa_dev;
505     	struct net_device *dev = in_dev->dev;
506     	struct in_ifaddr *ifa1;
507     	struct in_ifaddr *prim = ifa;
508     	u32 brd = ifa->ifa_address|~ifa->ifa_mask;
509     	u32 any = ifa->ifa_address&ifa->ifa_mask;
510     #define LOCAL_OK	1
511     #define BRD_OK		2
512     #define BRD0_OK		4
513     #define BRD1_OK		8
514     	unsigned ok = 0;
515     
516     	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
517     		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
518     			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
519     	else {
520     		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
521     		if (prim == NULL) {
522     			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
523     			return;
524     		}
525     	}
526     
527     	/* Deletion is more complicated than add.
528     	   We should take care of not to delete too much :-)
529     
530     	   Scan address list to be sure that addresses are really gone.
531     	 */
532     
533     	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
534     		if (ifa->ifa_local == ifa1->ifa_local)
535     			ok |= LOCAL_OK;
536     		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
537     			ok |= BRD_OK;
538     		if (brd == ifa1->ifa_broadcast)
539     			ok |= BRD1_OK;
540     		if (any == ifa1->ifa_broadcast)
541     			ok |= BRD0_OK;
542     	}
543     
544     	if (!(ok&BRD_OK))
545     		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
546     	if (!(ok&BRD1_OK))
547     		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
548     	if (!(ok&BRD0_OK))
549     		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
550     	if (!(ok&LOCAL_OK)) {
551     		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
552     
553     		/* Check, that this local address finally disappeared. */
554     		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
555     			/* And the last, but not the least thing.
556     			   We must flush stray FIB entries.
557     
558     			   First of all, we scan fib_info list searching
559     			   for stray nexthop entries, then ignite fib_flush.
560     			*/
561     			if (fib_sync_down(ifa->ifa_local, NULL, 0))
562     				fib_flush();
563     		}
564     	}
565     #undef LOCAL_OK
566     #undef BRD_OK
567     #undef BRD0_OK
568     #undef BRD1_OK
569     }
570     
571     static void fib_disable_ip(struct net_device *dev, int force)
572     {
573     	if (fib_sync_down(0, dev, force))
574     		fib_flush();
575     	rt_cache_flush(0);
576     	arp_ifdown(dev);
577     }
578     
579     static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
580     {
581     	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
582     
583     	switch (event) {
584     	case NETDEV_UP:
585     		fib_add_ifaddr(ifa);
586     		rt_cache_flush(-1);
587     		break;
588     	case NETDEV_DOWN:
589     		fib_del_ifaddr(ifa);
590     		if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
591     			/* Last address was deleted from this interface.
592     			   Disable IP.
593     			 */
594     			fib_disable_ip(ifa->ifa_dev->dev, 1);
595     		} else {
596     			rt_cache_flush(-1);
597     		}
598     		break;
599     	}
600     	return NOTIFY_DONE;
601     }
602     
603     static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
604     {
605     	struct net_device *dev = ptr;
606     	struct in_device *in_dev = __in_dev_get(dev);
607     
608     	if (!in_dev)
609     		return NOTIFY_DONE;
610     
611     	switch (event) {
612     	case NETDEV_UP:
613     		for_ifa(in_dev) {
614     			fib_add_ifaddr(ifa);
615     		} endfor_ifa(in_dev);
616     #ifdef CONFIG_IP_ROUTE_MULTIPATH
617     		fib_sync_up(dev);
618     #endif
619     		rt_cache_flush(-1);
620     		break;
621     	case NETDEV_DOWN:
622     		fib_disable_ip(dev, 0);
623     		break;
624     	case NETDEV_UNREGISTER:
625     		fib_disable_ip(dev, 1);
626     		break;
627     	case NETDEV_CHANGEMTU:
628     	case NETDEV_CHANGE:
629     		rt_cache_flush(0);
630     		break;
631     	}
632     	return NOTIFY_DONE;
633     }
634     
635     struct notifier_block fib_inetaddr_notifier = {
636     	notifier_call:	fib_inetaddr_event,
637     };
638     
639     struct notifier_block fib_netdev_notifier = {
640     	notifier_call:	fib_netdev_event,
641     };
642     
643     void __init ip_fib_init(void)
644     {
645     #ifdef CONFIG_PROC_FS
646     	proc_net_create("route",0,fib_get_procinfo);
647     #endif		/* CONFIG_PROC_FS */
648     
649     #ifndef CONFIG_IP_MULTIPLE_TABLES
650     	local_table = fib_hash_init(RT_TABLE_LOCAL);
651     	main_table = fib_hash_init(RT_TABLE_MAIN);
652     #else
653     	fib_rules_init();
654     #endif
655     
656     	register_netdevice_notifier(&fib_netdev_notifier);
657     	register_inetaddr_notifier(&fib_inetaddr_notifier);
658     }
659     
660