File: /usr/src/linux/net/ipv4/fib_frontend.c
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.25 2001/05/29 22:16:25 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/icmp.h>
45 #include <net/arp.h>
46 #include <net/ip_fib.h>
47
48 #define FFprint(a...) printk(KERN_DEBUG a)
49
50 #ifndef CONFIG_IP_MULTIPLE_TABLES
51
52 #define RT_TABLE_MIN RT_TABLE_MAIN
53
54 struct fib_table *local_table;
55 struct fib_table *main_table;
56
57 #else
58
59 #define RT_TABLE_MIN 1
60
61 struct fib_table *fib_tables[RT_TABLE_MAX+1];
62
63 struct fib_table *__fib_new_table(int id)
64 {
65 struct fib_table *tb;
66
67 tb = fib_hash_init(id);
68 if (!tb)
69 return NULL;
70 fib_tables[id] = tb;
71 return tb;
72 }
73
74
75 #endif /* CONFIG_IP_MULTIPLE_TABLES */
76
77
78 void fib_flush(void)
79 {
80 int flushed = 0;
81 #ifdef CONFIG_IP_MULTIPLE_TABLES
82 struct fib_table *tb;
83 int id;
84
85 for (id = RT_TABLE_MAX; id>0; id--) {
86 if ((tb = fib_get_table(id))==NULL)
87 continue;
88 flushed += tb->tb_flush(tb);
89 }
90 #else /* CONFIG_IP_MULTIPLE_TABLES */
91 flushed += main_table->tb_flush(main_table);
92 flushed += local_table->tb_flush(local_table);
93 #endif /* CONFIG_IP_MULTIPLE_TABLES */
94
95 if (flushed)
96 rt_cache_flush(-1);
97 }
98
99
100 #ifdef CONFIG_PROC_FS
101
102 /*
103 * Called from the PROCfs module. This outputs /proc/net/route.
104 *
105 * It always works in backward compatibility mode.
106 * The format of the file is not supposed to be changed.
107 */
108
109 static int
110 fib_get_procinfo(char *buffer, char **start, off_t offset, int length)
111 {
112 int first = offset/128;
113 char *ptr = buffer;
114 int count = (length+127)/128;
115 int len;
116
117 *start = buffer + offset%128;
118
119 if (--first < 0) {
120 sprintf(buffer, "%-127s\n", "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
121 --count;
122 ptr += 128;
123 first = 0;
124 }
125
126 if (main_table && count > 0) {
127 int n = main_table->tb_get_info(main_table, ptr, first, count);
128 count -= n;
129 ptr += n*128;
130 }
131 len = ptr - *start;
132 if (len >= length)
133 return length;
134 if (len >= 0)
135 return len;
136 return 0;
137 }
138
139 #endif /* CONFIG_PROC_FS */
140
141 /*
142 * Find the first device with a given source address.
143 */
144
145 struct net_device * ip_dev_find(u32 addr)
146 {
147 struct rt_key key;
148 struct fib_result res;
149 struct net_device *dev = NULL;
150
151 memset(&key, 0, sizeof(key));
152 key.dst = addr;
153 #ifdef CONFIG_IP_MULTIPLE_TABLES
154 res.r = NULL;
155 #endif
156
157 if (!local_table || local_table->tb_lookup(local_table, &key, &res)) {
158 return NULL;
159 }
160 if (res.type != RTN_LOCAL)
161 goto out;
162 dev = FIB_RES_DEV(res);
163 if (dev)
164 atomic_inc(&dev->refcnt);
165
166 out:
167 fib_res_put(&res);
168 return dev;
169 }
170
171 unsigned inet_addr_type(u32 addr)
172 {
173 struct rt_key key;
174 struct fib_result res;
175 unsigned ret = RTN_BROADCAST;
176
177 if (ZERONET(addr) || BADCLASS(addr))
178 return RTN_BROADCAST;
179 if (MULTICAST(addr))
180 return RTN_MULTICAST;
181
182 memset(&key, 0, sizeof(key));
183 key.dst = addr;
184 #ifdef CONFIG_IP_MULTIPLE_TABLES
185 res.r = NULL;
186 #endif
187
188 if (local_table) {
189 ret = RTN_UNICAST;
190 if (local_table->tb_lookup(local_table, &key, &res) == 0) {
191 ret = res.type;
192 fib_res_put(&res);
193 }
194 }
195 return ret;
196 }
197
198 /* Given (packet source, input interface) and optional (dst, oif, tos):
199 - (main) check, that source is valid i.e. not broadcast or our local
200 address.
201 - figure out what "logical" interface this packet arrived
202 and calculate "specific destination" address.
203 - check, that packet arrived from expected physical interface.
204 */
205
206 int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
207 struct net_device *dev, u32 *spec_dst, u32 *itag)
208 {
209 struct in_device *in_dev;
210 struct rt_key key;
211 struct fib_result res;
212 int no_addr, rpf;
213 int ret;
214
215 key.dst = src;
216 key.src = dst;
217 key.tos = tos;
218 key.oif = 0;
219 key.iif = oif;
220 key.scope = RT_SCOPE_UNIVERSE;
221
222 no_addr = rpf = 0;
223 read_lock(&inetdev_lock);
224 in_dev = __in_dev_get(dev);
225 if (in_dev) {
226 no_addr = in_dev->ifa_list == NULL;
227 rpf = IN_DEV_RPFILTER(in_dev);
228 }
229 read_unlock(&inetdev_lock);
230
231 if (in_dev == NULL)
232 goto e_inval;
233
234 if (fib_lookup(&key, &res))
235 goto last_resort;
236 if (res.type != RTN_UNICAST)
237 goto e_inval_res;
238 *spec_dst = FIB_RES_PREFSRC(res);
239 fib_combine_itag(itag, &res);
240 #ifdef CONFIG_IP_ROUTE_MULTIPATH
241 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
242 #else
243 if (FIB_RES_DEV(res) == dev)
244 #endif
245 {
246 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
247 fib_res_put(&res);
248 return ret;
249 }
250 fib_res_put(&res);
251 if (no_addr)
252 goto last_resort;
253 if (rpf)
254 goto e_inval;
255 key.oif = dev->ifindex;
256
257 ret = 0;
258 if (fib_lookup(&key, &res) == 0) {
259 if (res.type == RTN_UNICAST) {
260 *spec_dst = FIB_RES_PREFSRC(res);
261 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
262 }
263 fib_res_put(&res);
264 }
265 return ret;
266
267 last_resort:
268 if (rpf)
269 goto e_inval;
270 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
271 *itag = 0;
272 return 0;
273
274 e_inval_res:
275 fib_res_put(&res);
276 e_inval:
277 return -EINVAL;
278 }
279
280 #ifndef CONFIG_IP_NOSIOCRT
281
282 /*
283 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
284 */
285
286 int ip_rt_ioctl(unsigned int cmd, void *arg)
287 {
288 int err;
289 struct kern_rta rta;
290 struct rtentry r;
291 struct {
292 struct nlmsghdr nlh;
293 struct rtmsg rtm;
294 } req;
295
296 switch (cmd) {
297 case SIOCADDRT: /* Add a route */
298 case SIOCDELRT: /* Delete a route */
299 if (!capable(CAP_NET_ADMIN))
300 return -EPERM;
301 if (copy_from_user(&r, arg, sizeof(struct rtentry)))
302 return -EFAULT;
303 rtnl_lock();
304 err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
305 if (err == 0) {
306 if (cmd == SIOCDELRT) {
307 struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
308 err = -ESRCH;
309 if (tb)
310 err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
311 } else {
312 struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
313 err = -ENOBUFS;
314 if (tb)
315 err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
316 }
317 if (rta.rta_mx)
318 kfree(rta.rta_mx);
319 }
320 rtnl_unlock();
321 return err;
322 }
323 return -EINVAL;
324 }
325
326 #else
327
328 int ip_rt_ioctl(unsigned int cmd, void *arg)
329 {
330 return -EINVAL;
331 }
332
333 #endif
334
335 #ifdef CONFIG_RTNETLINK
336
337 static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
338 {
339 int i;
340
341 for (i=1; i<=RTA_MAX; i++) {
342 struct rtattr *attr = rta[i-1];
343 if (attr) {
344 if (RTA_PAYLOAD(attr) < 4)
345 return -EINVAL;
346 if (i != RTA_MULTIPATH && i != RTA_METRICS)
347 rta[i-1] = (struct rtattr*)RTA_DATA(attr);
348 }
349 }
350 return 0;
351 }
352
353 int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
354 {
355 struct fib_table * tb;
356 struct rtattr **rta = arg;
357 struct rtmsg *r = NLMSG_DATA(nlh);
358
359 if (inet_check_attr(r, rta))
360 return -EINVAL;
361
362 tb = fib_get_table(r->rtm_table);
363 if (tb)
364 return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
365 return -ESRCH;
366 }
367
368 int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
369 {
370 struct fib_table * tb;
371 struct rtattr **rta = arg;
372 struct rtmsg *r = NLMSG_DATA(nlh);
373
374 if (inet_check_attr(r, rta))
375 return -EINVAL;
376
377 tb = fib_new_table(r->rtm_table);
378 if (tb)
379 return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
380 return -ENOBUFS;
381 }
382
383 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
384 {
385 int t;
386 int s_t;
387 struct fib_table *tb;
388
389 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
390 ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
391 return ip_rt_dump(skb, cb);
392
393 s_t = cb->args[0];
394 if (s_t == 0)
395 s_t = cb->args[0] = RT_TABLE_MIN;
396
397 for (t=s_t; t<=RT_TABLE_MAX; t++) {
398 if (t < s_t) continue;
399 if (t > s_t)
400 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
401 if ((tb = fib_get_table(t))==NULL)
402 continue;
403 if (tb->tb_dump(tb, skb, cb) < 0)
404 break;
405 }
406
407 cb->args[0] = t;
408
409 return skb->len;
410 }
411
412 #endif
413
414 /* Prepare and feed intra-kernel routing request.
415 Really, it should be netlink message, but :-( netlink
416 can be not configured, so that we feed it directly
417 to fib engine. It is legal, because all events occur
418 only when netlink is already locked.
419 */
420
421 static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
422 {
423 struct fib_table * tb;
424 struct {
425 struct nlmsghdr nlh;
426 struct rtmsg rtm;
427 } req;
428 struct kern_rta rta;
429
430 memset(&req.rtm, 0, sizeof(req.rtm));
431 memset(&rta, 0, sizeof(rta));
432
433 if (type == RTN_UNICAST)
434 tb = fib_new_table(RT_TABLE_MAIN);
435 else
436 tb = fib_new_table(RT_TABLE_LOCAL);
437
438 if (tb == NULL)
439 return;
440
441 req.nlh.nlmsg_len = sizeof(req);
442 req.nlh.nlmsg_type = cmd;
443 req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
444 req.nlh.nlmsg_pid = 0;
445 req.nlh.nlmsg_seq = 0;
446
447 req.rtm.rtm_dst_len = dst_len;
448 req.rtm.rtm_table = tb->tb_id;
449 req.rtm.rtm_protocol = RTPROT_KERNEL;
450 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
451 req.rtm.rtm_type = type;
452
453 rta.rta_dst = &dst;
454 rta.rta_prefsrc = &ifa->ifa_local;
455 rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
456
457 if (cmd == RTM_NEWROUTE)
458 tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
459 else
460 tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
461 }
462
463 static void fib_add_ifaddr(struct in_ifaddr *ifa)
464 {
465 struct in_device *in_dev = ifa->ifa_dev;
466 struct net_device *dev = in_dev->dev;
467 struct in_ifaddr *prim = ifa;
468 u32 mask = ifa->ifa_mask;
469 u32 addr = ifa->ifa_local;
470 u32 prefix = ifa->ifa_address&mask;
471
472 if (ifa->ifa_flags&IFA_F_SECONDARY) {
473 prim = inet_ifa_byprefix(in_dev, prefix, mask);
474 if (prim == NULL) {
475 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
476 return;
477 }
478 }
479
480 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
481
482 if (!(dev->flags&IFF_UP))
483 return;
484
485 /* Add broadcast address, if it is explicitly assigned. */
486 if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
487 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
488
489 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
490 (prefix != addr || ifa->ifa_prefixlen < 32)) {
491 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
492 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
493
494 /* Add network specific broadcasts, when it takes a sense */
495 if (ifa->ifa_prefixlen < 31) {
496 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
497 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
498 }
499 }
500 }
501
502 static void fib_del_ifaddr(struct in_ifaddr *ifa)
503 {
504 struct in_device *in_dev = ifa->ifa_dev;
505 struct net_device *dev = in_dev->dev;
506 struct in_ifaddr *ifa1;
507 struct in_ifaddr *prim = ifa;
508 u32 brd = ifa->ifa_address|~ifa->ifa_mask;
509 u32 any = ifa->ifa_address&ifa->ifa_mask;
510 #define LOCAL_OK 1
511 #define BRD_OK 2
512 #define BRD0_OK 4
513 #define BRD1_OK 8
514 unsigned ok = 0;
515
516 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
517 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
518 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
519 else {
520 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
521 if (prim == NULL) {
522 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
523 return;
524 }
525 }
526
527 /* Deletion is more complicated than add.
528 We should take care of not to delete too much :-)
529
530 Scan address list to be sure that addresses are really gone.
531 */
532
533 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
534 if (ifa->ifa_local == ifa1->ifa_local)
535 ok |= LOCAL_OK;
536 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
537 ok |= BRD_OK;
538 if (brd == ifa1->ifa_broadcast)
539 ok |= BRD1_OK;
540 if (any == ifa1->ifa_broadcast)
541 ok |= BRD0_OK;
542 }
543
544 if (!(ok&BRD_OK))
545 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
546 if (!(ok&BRD1_OK))
547 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
548 if (!(ok&BRD0_OK))
549 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
550 if (!(ok&LOCAL_OK)) {
551 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
552
553 /* Check, that this local address finally disappeared. */
554 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
555 /* And the last, but not the least thing.
556 We must flush stray FIB entries.
557
558 First of all, we scan fib_info list searching
559 for stray nexthop entries, then ignite fib_flush.
560 */
561 if (fib_sync_down(ifa->ifa_local, NULL, 0))
562 fib_flush();
563 }
564 }
565 #undef LOCAL_OK
566 #undef BRD_OK
567 #undef BRD0_OK
568 #undef BRD1_OK
569 }
570
571 static void fib_disable_ip(struct net_device *dev, int force)
572 {
573 if (fib_sync_down(0, dev, force))
574 fib_flush();
575 rt_cache_flush(0);
576 arp_ifdown(dev);
577 }
578
579 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
580 {
581 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
582
583 switch (event) {
584 case NETDEV_UP:
585 fib_add_ifaddr(ifa);
586 rt_cache_flush(-1);
587 break;
588 case NETDEV_DOWN:
589 fib_del_ifaddr(ifa);
590 if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
591 /* Last address was deleted from this interface.
592 Disable IP.
593 */
594 fib_disable_ip(ifa->ifa_dev->dev, 1);
595 } else {
596 rt_cache_flush(-1);
597 }
598 break;
599 }
600 return NOTIFY_DONE;
601 }
602
603 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
604 {
605 struct net_device *dev = ptr;
606 struct in_device *in_dev = __in_dev_get(dev);
607
608 if (!in_dev)
609 return NOTIFY_DONE;
610
611 switch (event) {
612 case NETDEV_UP:
613 for_ifa(in_dev) {
614 fib_add_ifaddr(ifa);
615 } endfor_ifa(in_dev);
616 #ifdef CONFIG_IP_ROUTE_MULTIPATH
617 fib_sync_up(dev);
618 #endif
619 rt_cache_flush(-1);
620 break;
621 case NETDEV_DOWN:
622 fib_disable_ip(dev, 0);
623 break;
624 case NETDEV_UNREGISTER:
625 fib_disable_ip(dev, 1);
626 break;
627 case NETDEV_CHANGEMTU:
628 case NETDEV_CHANGE:
629 rt_cache_flush(0);
630 break;
631 }
632 return NOTIFY_DONE;
633 }
634
635 struct notifier_block fib_inetaddr_notifier = {
636 notifier_call: fib_inetaddr_event,
637 };
638
639 struct notifier_block fib_netdev_notifier = {
640 notifier_call: fib_netdev_event,
641 };
642
643 void __init ip_fib_init(void)
644 {
645 #ifdef CONFIG_PROC_FS
646 proc_net_create("route",0,fib_get_procinfo);
647 #endif /* CONFIG_PROC_FS */
648
649 #ifndef CONFIG_IP_MULTIPLE_TABLES
650 local_table = fib_hash_init(RT_TABLE_LOCAL);
651 main_table = fib_hash_init(RT_TABLE_MAIN);
652 #else
653 fib_rules_init();
654 #endif
655
656 register_netdevice_notifier(&fib_netdev_notifier);
657 register_inetaddr_notifier(&fib_inetaddr_notifier);
658 }
659
660