File: /usr/src/linux/net/ipv6/route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.55 2001/09/18 22:29:10 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16 #include <linux/config.h>
17 #include <linux/errno.h>
18 #include <linux/types.h>
19 #include <linux/socket.h>
20 #include <linux/sockios.h>
21 #include <linux/net.h>
22 #include <linux/route.h>
23 #include <linux/netdevice.h>
24 #include <linux/in6.h>
25 #include <linux/init.h>
26 #include <linux/netlink.h>
27 #include <linux/if_arp.h>
28
29 #ifdef CONFIG_PROC_FS
30 #include <linux/proc_fs.h>
31 #endif
32
33 #include <net/snmp.h>
34 #include <net/ipv6.h>
35 #include <net/ip6_fib.h>
36 #include <net/ip6_route.h>
37 #include <net/ndisc.h>
38 #include <net/addrconf.h>
39 #include <net/tcp.h>
40 #include <linux/rtnetlink.h>
41
42 #include <asm/uaccess.h>
43
44 #ifdef CONFIG_SYSCTL
45 #include <linux/sysctl.h>
46 #endif
47
48 #undef CONFIG_RT6_POLICY
49
50 /* Set to 3 to get tracing. */
51 #define RT6_DEBUG 2
52
53 #if RT6_DEBUG >= 3
54 #define RDBG(x) printk x
55 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
56 #else
57 #define RDBG(x)
58 #define RT6_TRACE(x...) do { ; } while (0)
59 #endif
60
61
62 int ip6_rt_max_size = 4096;
63 int ip6_rt_gc_min_interval = 5*HZ;
64 int ip6_rt_gc_timeout = 60*HZ;
65 int ip6_rt_gc_interval = 30*HZ;
66 int ip6_rt_gc_elasticity = 9;
67 int ip6_rt_mtu_expires = 10*60*HZ;
68 int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
69
70 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
71 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
72 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst,
73 struct sk_buff *skb);
74 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
75 static int ip6_dst_gc(void);
76
77 static int ip6_pkt_discard(struct sk_buff *skb);
78 static void ip6_link_failure(struct sk_buff *skb);
79
80 struct dst_ops ip6_dst_ops = {
81 AF_INET6,
82 __constant_htons(ETH_P_IPV6),
83 1024,
84
85 ip6_dst_gc,
86 ip6_dst_check,
87 ip6_dst_reroute,
88 NULL,
89 ip6_negative_advice,
90 ip6_link_failure,
91 sizeof(struct rt6_info),
92 };
93
94 struct rt6_info ip6_null_entry = {
95 {{NULL, ATOMIC_INIT(1), 1, &loopback_dev,
96 -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97 -ENETUNREACH, NULL, NULL,
98 ip6_pkt_discard, ip6_pkt_discard,
99 #ifdef CONFIG_NET_CLS_ROUTE
100 0,
101 #endif
102 &ip6_dst_ops}},
103 NULL, {{{0}}}, RTF_REJECT|RTF_NONEXTHOP, ~0U,
104 255, ATOMIC_INIT(1), {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
105 };
106
107 struct fib6_node ip6_routing_table = {
108 NULL, NULL, NULL, NULL,
109 &ip6_null_entry,
110 0, RTN_ROOT|RTN_TL_ROOT|RTN_RTINFO, 0
111 };
112
113 #ifdef CONFIG_RT6_POLICY
114 int ip6_rt_policy = 0;
115
116 struct pol_chain *rt6_pol_list = NULL;
117
118
119 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb);
120 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk);
121
122 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
123 struct in6_addr *daddr,
124 struct in6_addr *saddr,
125 struct fl_acc_args *args);
126
127 #else
128 #define ip6_rt_policy (0)
129 #endif
130
131 /* Protects all the ip6 fib */
132
133 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
134
135
136 /*
137 * Route lookup. Any rt6_lock is implied.
138 */
139
140 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
141 int oif,
142 int strict)
143 {
144 struct rt6_info *local = NULL;
145 struct rt6_info *sprt;
146
147 if (oif) {
148 for (sprt = rt; sprt; sprt = sprt->u.next) {
149 struct net_device *dev = sprt->rt6i_dev;
150 if (dev->ifindex == oif)
151 return sprt;
152 if (dev->flags&IFF_LOOPBACK)
153 local = sprt;
154 }
155
156 if (local)
157 return local;
158
159 if (strict)
160 return &ip6_null_entry;
161 }
162 return rt;
163 }
164
165 /*
166 * pointer to the last default router chosen. BH is disabled locally.
167 */
168 static struct rt6_info *rt6_dflt_pointer = NULL;
169 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
170
171 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
172 {
173 struct rt6_info *match = NULL;
174 struct rt6_info *sprt;
175 int mpri = 0;
176
177 for (sprt = rt; sprt; sprt = sprt->u.next) {
178 struct neighbour *neigh;
179
180 if ((neigh = sprt->rt6i_nexthop) != NULL) {
181 int m = -1;
182
183 switch (neigh->nud_state) {
184 case NUD_REACHABLE:
185 if (sprt != rt6_dflt_pointer) {
186 rt = sprt;
187 goto out;
188 }
189 m = 2;
190 break;
191
192 case NUD_DELAY:
193 m = 1;
194 break;
195
196 case NUD_STALE:
197 m = 1;
198 break;
199 };
200
201 if (oif && sprt->rt6i_dev->ifindex == oif) {
202 m += 2;
203 }
204
205 if (m >= mpri) {
206 mpri = m;
207 match = sprt;
208 }
209 }
210 }
211
212 if (match) {
213 rt = match;
214 } else {
215 /*
216 * No default routers are known to be reachable.
217 * SHOULD round robin
218 */
219 spin_lock(&rt6_dflt_lock);
220 if (rt6_dflt_pointer) {
221 struct rt6_info *next;
222
223 if ((next = rt6_dflt_pointer->u.next) != NULL &&
224 next->u.dst.obsolete <= 0 &&
225 next->u.dst.error == 0)
226 rt = next;
227 }
228 spin_unlock(&rt6_dflt_lock);
229 }
230
231 out:
232 spin_lock(&rt6_dflt_lock);
233 rt6_dflt_pointer = rt;
234 spin_unlock(&rt6_dflt_lock);
235 return rt;
236 }
237
238 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
239 int oif, int strict)
240 {
241 struct fib6_node *fn;
242 struct rt6_info *rt;
243
244 read_lock_bh(&rt6_lock);
245 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
246 rt = rt6_device_match(fn->leaf, oif, strict);
247 dst_hold(&rt->u.dst);
248 rt->u.dst.__use++;
249 read_unlock_bh(&rt6_lock);
250
251 rt->u.dst.lastuse = jiffies;
252 if (rt->u.dst.error == 0)
253 return rt;
254 dst_release(&rt->u.dst);
255 return NULL;
256 }
257
258 /* rt6_ins is called with FREE rt6_lock.
259 It takes new route entry, the addition fails by any reason the
260 route is freed. In any case, if caller does not hold it, it may
261 be destroyed.
262 */
263
264 static int rt6_ins(struct rt6_info *rt)
265 {
266 int err;
267
268 write_lock_bh(&rt6_lock);
269 err = fib6_add(&ip6_routing_table, rt);
270 write_unlock_bh(&rt6_lock);
271
272 return err;
273 }
274
275 /* No rt6_lock! If COW faild, the function returns dead route entry
276 with dst->error set to errno value.
277 */
278
279 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
280 struct in6_addr *saddr)
281 {
282 int err;
283 struct rt6_info *rt;
284
285 /*
286 * Clone the route.
287 */
288
289 rt = ip6_rt_copy(ort);
290
291 if (rt) {
292 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
293
294 if (!(rt->rt6i_flags&RTF_GATEWAY))
295 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
296
297 rt->rt6i_dst.plen = 128;
298 rt->rt6i_flags |= RTF_CACHE;
299 rt->u.dst.flags |= DST_HOST;
300
301 #ifdef CONFIG_IPV6_SUBTREES
302 if (rt->rt6i_src.plen && saddr) {
303 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
304 rt->rt6i_src.plen = 128;
305 }
306 #endif
307
308 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
309
310 dst_clone(&rt->u.dst);
311
312 err = rt6_ins(rt);
313 if (err == 0)
314 return rt;
315
316 rt->u.dst.error = err;
317
318 return rt;
319 }
320 dst_clone(&ip6_null_entry.u.dst);
321 return &ip6_null_entry;
322 }
323
324 #ifdef CONFIG_RT6_POLICY
325 static __inline__ struct rt6_info *rt6_flow_lookup_in(struct rt6_info *rt,
326 struct sk_buff *skb)
327 {
328 struct in6_addr *daddr, *saddr;
329 struct fl_acc_args arg;
330
331 arg.type = FL_ARG_FORWARD;
332 arg.fl_u.skb = skb;
333
334 saddr = &skb->nh.ipv6h->saddr;
335 daddr = &skb->nh.ipv6h->daddr;
336
337 return rt6_flow_lookup(rt, daddr, saddr, &arg);
338 }
339
340 static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt,
341 struct sock *sk,
342 struct flowi *fl)
343 {
344 struct fl_acc_args arg;
345
346 arg.type = FL_ARG_ORIGIN;
347 arg.fl_u.fl_o.sk = sk;
348 arg.fl_u.fl_o.flow = fl;
349
350 return rt6_flow_lookup(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr,
351 &arg);
352 }
353
354 #endif
355
356 #define BACKTRACK() \
357 if (rt == &ip6_null_entry && strict) { \
358 while ((fn = fn->parent) != NULL) { \
359 if (fn->fn_flags & RTN_ROOT) { \
360 dst_clone(&rt->u.dst); \
361 goto out; \
362 } \
363 if (fn->fn_flags & RTN_RTINFO) \
364 goto restart; \
365 } \
366 }
367
368
369 void ip6_route_input(struct sk_buff *skb)
370 {
371 struct fib6_node *fn;
372 struct rt6_info *rt;
373 int strict;
374 int attempts = 3;
375
376 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
377
378 relookup:
379 read_lock_bh(&rt6_lock);
380
381 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
382 &skb->nh.ipv6h->saddr);
383
384 restart:
385 rt = fn->leaf;
386
387 if ((rt->rt6i_flags & RTF_CACHE)) {
388 if (ip6_rt_policy == 0) {
389 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
390 BACKTRACK();
391 dst_clone(&rt->u.dst);
392 goto out;
393 }
394
395 #ifdef CONFIG_RT6_POLICY
396 if ((rt->rt6i_flags & RTF_FLOW)) {
397 struct rt6_info *sprt;
398
399 for (sprt = rt; sprt; sprt = sprt->u.next) {
400 if (rt6_flow_match_in(sprt, skb)) {
401 rt = sprt;
402 dst_clone(&rt->u.dst);
403 goto out;
404 }
405 }
406 }
407 #endif
408 }
409
410 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
411 BACKTRACK();
412
413 if (ip6_rt_policy == 0) {
414 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
415 read_unlock_bh(&rt6_lock);
416
417 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
418 &skb->nh.ipv6h->saddr);
419
420 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
421 goto out2;
422 /* Race condition! In the gap, when rt6_lock was
423 released someone could insert this route. Relookup.
424 */
425 goto relookup;
426 }
427 dst_clone(&rt->u.dst);
428 } else {
429 #ifdef CONFIG_RT6_POLICY
430 rt = rt6_flow_lookup_in(rt, skb);
431 #else
432 /* NEVER REACHED */
433 #endif
434 }
435
436 out:
437 read_unlock_bh(&rt6_lock);
438 out2:
439 rt->u.dst.lastuse = jiffies;
440 rt->u.dst.__use++;
441 skb->dst = (struct dst_entry *) rt;
442 }
443
444 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
445 {
446 struct fib6_node *fn;
447 struct rt6_info *rt;
448 int strict;
449 int attempts = 3;
450
451 strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
452
453 relookup:
454 read_lock_bh(&rt6_lock);
455
456 fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr,
457 fl->nl_u.ip6_u.saddr);
458
459 restart:
460 rt = fn->leaf;
461
462 if ((rt->rt6i_flags & RTF_CACHE)) {
463 if (ip6_rt_policy == 0) {
464 rt = rt6_device_match(rt, fl->oif, strict);
465 BACKTRACK();
466 dst_clone(&rt->u.dst);
467 goto out;
468 }
469
470 #ifdef CONFIG_RT6_POLICY
471 if ((rt->rt6i_flags & RTF_FLOW)) {
472 struct rt6_info *sprt;
473
474 for (sprt = rt; sprt; sprt = sprt->u.next) {
475 if (rt6_flow_match_out(sprt, sk)) {
476 rt = sprt;
477 dst_clone(&rt->u.dst);
478 goto out;
479 }
480 }
481 }
482 #endif
483 }
484 if (rt->rt6i_flags & RTF_DEFAULT) {
485 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
486 rt = rt6_best_dflt(rt, fl->oif);
487 } else {
488 rt = rt6_device_match(rt, fl->oif, strict);
489 BACKTRACK();
490 }
491
492 if (ip6_rt_policy == 0) {
493 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
494 read_unlock_bh(&rt6_lock);
495
496 rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr,
497 fl->nl_u.ip6_u.saddr);
498
499 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
500 goto out2;
501
502 /* Race condition! In the gap, when rt6_lock was
503 released someone could insert this route. Relookup.
504 */
505 goto relookup;
506 }
507 dst_clone(&rt->u.dst);
508 } else {
509 #ifdef CONFIG_RT6_POLICY
510 rt = rt6_flow_lookup_out(rt, sk, fl);
511 #else
512 /* NEVER REACHED */
513 #endif
514 }
515
516 out:
517 read_unlock_bh(&rt6_lock);
518 out2:
519 rt->u.dst.lastuse = jiffies;
520 rt->u.dst.__use++;
521 return &rt->u.dst;
522 }
523
524
525 /*
526 * Destination cache support functions
527 */
528
529 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
530 {
531 struct rt6_info *rt;
532
533 rt = (struct rt6_info *) dst;
534
535 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
536 return dst;
537
538 dst_release(dst);
539 return NULL;
540 }
541
542 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
543 {
544 /*
545 * FIXME
546 */
547 RDBG(("ip6_dst_reroute(%p,%p)[%p] (AIEEE)\n", dst, skb,
548 __builtin_return_address(0)));
549 return NULL;
550 }
551
552 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
553 {
554 struct rt6_info *rt = (struct rt6_info *) dst;
555
556 if (rt) {
557 if (rt->rt6i_flags & RTF_CACHE)
558 ip6_del_rt(rt);
559 else
560 dst_release(dst);
561 }
562 return NULL;
563 }
564
565 static void ip6_link_failure(struct sk_buff *skb)
566 {
567 struct rt6_info *rt;
568
569 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
570
571 rt = (struct rt6_info *) skb->dst;
572 if (rt) {
573 if (rt->rt6i_flags&RTF_CACHE) {
574 dst_set_expires(&rt->u.dst, 0);
575 rt->rt6i_flags |= RTF_EXPIRES;
576 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
577 rt->rt6i_node->fn_sernum = -1;
578 }
579 }
580
581 static int ip6_dst_gc()
582 {
583 static unsigned expire = 30*HZ;
584 static unsigned long last_gc;
585 unsigned long now = jiffies;
586
587 if ((long)(now - last_gc) < ip6_rt_gc_min_interval &&
588 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
589 goto out;
590
591 expire++;
592 fib6_run_gc(expire);
593 last_gc = now;
594 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
595 expire = ip6_rt_gc_timeout>>1;
596
597 out:
598 expire -= expire>>ip6_rt_gc_elasticity;
599 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
600 }
601
602 /* Clean host part of a prefix. Not necessary in radix tree,
603 but results in cleaner routing tables.
604
605 Remove it only when all the things will work!
606 */
607
608 static void ipv6_wash_prefix(struct in6_addr *pfx, int plen)
609 {
610 int b = plen&0x7;
611 int o = (plen + 7)>>3;
612
613 if (o < 16)
614 memset(pfx->s6_addr + o, 0, 16 - o);
615 if (b != 0)
616 pfx->s6_addr[plen>>3] &= (0xFF<<(8-b));
617 }
618
619 static int ipv6_get_mtu(struct net_device *dev)
620 {
621 int mtu = IPV6_MIN_MTU;
622 struct inet6_dev *idev;
623
624 idev = in6_dev_get(dev);
625 if (idev) {
626 mtu = idev->cnf.mtu6;
627 in6_dev_put(idev);
628 }
629 return mtu;
630 }
631
632 static int ipv6_get_hoplimit(struct net_device *dev)
633 {
634 int hoplimit = ipv6_devconf.hop_limit;
635 struct inet6_dev *idev;
636
637 idev = in6_dev_get(dev);
638 if (idev) {
639 hoplimit = idev->cnf.hop_limit;
640 in6_dev_put(idev);
641 }
642 return hoplimit;
643 }
644
645 /*
646 *
647 */
648
649 int ip6_route_add(struct in6_rtmsg *rtmsg)
650 {
651 int err;
652 struct rt6_info *rt;
653 struct net_device *dev = NULL;
654 int addr_type;
655
656 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
657 return -EINVAL;
658 #ifndef CONFIG_IPV6_SUBTREES
659 if (rtmsg->rtmsg_src_len)
660 return -EINVAL;
661 #endif
662 if (rtmsg->rtmsg_metric == 0)
663 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
664
665 rt = dst_alloc(&ip6_dst_ops);
666
667 if (rt == NULL)
668 return -ENOMEM;
669
670 rt->u.dst.obsolete = -1;
671 rt->rt6i_expires = rtmsg->rtmsg_info;
672
673 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
674
675 if (addr_type & IPV6_ADDR_MULTICAST)
676 rt->u.dst.input = ip6_mc_input;
677 else
678 rt->u.dst.input = ip6_forward;
679
680 rt->u.dst.output = ip6_output;
681
682 if (rtmsg->rtmsg_ifindex) {
683 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
684 err = -ENODEV;
685 if (dev == NULL)
686 goto out;
687 }
688
689 ipv6_addr_copy(&rt->rt6i_dst.addr, &rtmsg->rtmsg_dst);
690 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
691 if (rt->rt6i_dst.plen == 128)
692 rt->u.dst.flags = DST_HOST;
693 ipv6_wash_prefix(&rt->rt6i_dst.addr, rt->rt6i_dst.plen);
694
695 #ifdef CONFIG_IPV6_SUBTREES
696 ipv6_addr_copy(&rt->rt6i_src.addr, &rtmsg->rtmsg_src);
697 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
698 ipv6_wash_prefix(&rt->rt6i_src.addr, rt->rt6i_src.plen);
699 #endif
700
701 rt->rt6i_metric = rtmsg->rtmsg_metric;
702
703 /* We cannot add true routes via loopback here,
704 they would result in kernel looping; promote them to reject routes
705 */
706 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
707 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
708 if (dev)
709 dev_put(dev);
710 dev = &loopback_dev;
711 dev_hold(dev);
712 rt->u.dst.output = ip6_pkt_discard;
713 rt->u.dst.input = ip6_pkt_discard;
714 rt->u.dst.error = -ENETUNREACH;
715 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
716 goto install_route;
717 }
718
719 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
720 struct in6_addr *gw_addr;
721 int gwa_type;
722
723 gw_addr = &rtmsg->rtmsg_gateway;
724 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
725 gwa_type = ipv6_addr_type(gw_addr);
726
727 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
728 struct rt6_info *grt;
729
730 /* IPv6 strictly inhibits using not link-local
731 addresses as nexthop address.
732 Otherwise, router will not able to send redirects.
733 It is very good, but in some (rare!) curcumstances
734 (SIT, PtP, NBMA NOARP links) it is handy to allow
735 some exceptions. --ANK
736 */
737 err = -EINVAL;
738 if (!(gwa_type&IPV6_ADDR_UNICAST))
739 goto out;
740
741 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
742
743 err = -EHOSTUNREACH;
744 if (grt == NULL)
745 goto out;
746 if (dev) {
747 if (dev != grt->rt6i_dev) {
748 dst_release(&grt->u.dst);
749 goto out;
750 }
751 } else {
752 dev = grt->rt6i_dev;
753 dev_hold(dev);
754 }
755 if (!(grt->rt6i_flags&RTF_GATEWAY))
756 err = 0;
757 dst_release(&grt->u.dst);
758
759 if (err)
760 goto out;
761 }
762 err = -EINVAL;
763 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
764 goto out;
765 }
766
767 err = -ENODEV;
768 if (dev == NULL)
769 goto out;
770
771 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
772 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
773 if (IS_ERR(rt->rt6i_nexthop)) {
774 err = PTR_ERR(rt->rt6i_nexthop);
775 rt->rt6i_nexthop = NULL;
776 goto out;
777 }
778 }
779
780 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
781 rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS;
782 else
783 rt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
784 rt->rt6i_flags = rtmsg->rtmsg_flags;
785
786 install_route:
787 rt->u.dst.pmtu = ipv6_get_mtu(dev);
788 rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
789 /* Maximal non-jumbo IPv6 payload is 65535 and corresponding
790 MSS is 65535 - tcp_header_size. 65535 is also valid and
791 means: "any MSS, rely only on pmtu discovery"
792 */
793 if (rt->u.dst.advmss > 65535-20)
794 rt->u.dst.advmss = 65535;
795 rt->u.dst.dev = dev;
796 return rt6_ins(rt);
797
798 out:
799 if (dev)
800 dev_put(dev);
801 dst_free((struct dst_entry *) rt);
802 return err;
803 }
804
805 int ip6_del_rt(struct rt6_info *rt)
806 {
807 int err;
808
809 write_lock_bh(&rt6_lock);
810
811 spin_lock_bh(&rt6_dflt_lock);
812 rt6_dflt_pointer = NULL;
813 spin_unlock_bh(&rt6_dflt_lock);
814
815 dst_release(&rt->u.dst);
816
817 err = fib6_del(rt);
818 write_unlock_bh(&rt6_lock);
819
820 return err;
821 }
822
823 int ip6_route_del(struct in6_rtmsg *rtmsg)
824 {
825 struct fib6_node *fn;
826 struct rt6_info *rt;
827 int err = -ESRCH;
828
829 read_lock_bh(&rt6_lock);
830
831 fn = fib6_locate(&ip6_routing_table,
832 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
833 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
834
835 if (fn) {
836 for (rt = fn->leaf; rt; rt = rt->u.next) {
837 if (rtmsg->rtmsg_ifindex &&
838 (rt->rt6i_dev == NULL ||
839 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
840 continue;
841 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
842 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
843 continue;
844 if (rtmsg->rtmsg_metric &&
845 rtmsg->rtmsg_metric != rt->rt6i_metric)
846 continue;
847 dst_clone(&rt->u.dst);
848 read_unlock_bh(&rt6_lock);
849
850 return ip6_del_rt(rt);
851 }
852 }
853 read_unlock_bh(&rt6_lock);
854
855 return err;
856 }
857
858 /*
859 * Handle redirects
860 */
861 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
862 struct neighbour *neigh, int on_link)
863 {
864 struct rt6_info *rt, *nrt;
865
866 /* Locate old route to this destination. */
867 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
868
869 if (rt == NULL)
870 return;
871
872 if (neigh->dev != rt->rt6i_dev)
873 goto out;
874
875 /* Redirect received -> path was valid.
876 Look, redirects are sent only in response to data packets,
877 so that this nexthop apparently is reachable. --ANK
878 */
879 dst_confirm(&rt->u.dst);
880
881 /* Duplicate redirect: silently ignore. */
882 if (neigh == rt->u.dst.neighbour)
883 goto out;
884
885 /* Current route is on-link; redirect is always invalid.
886
887 Seems, previous statement is not true. It could
888 be node, which looks for us as on-link (f.e. proxy ndisc)
889 But then router serving it might decide, that we should
890 know truth 8)8) --ANK (980726).
891 */
892 if (!(rt->rt6i_flags&RTF_GATEWAY))
893 goto out;
894
895 /*
896 * RFC 1970 specifies that redirects should only be
897 * accepted if they come from the nexthop to the target.
898 * Due to the way default routers are chosen, this notion
899 * is a bit fuzzy and one might need to check all default
900 * routers.
901 */
902
903 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
904 if (rt->rt6i_flags & RTF_DEFAULT) {
905 struct rt6_info *rt1;
906
907 read_lock(&rt6_lock);
908 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
909 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
910 dst_clone(&rt1->u.dst);
911 dst_release(&rt->u.dst);
912 read_unlock(&rt6_lock);
913 rt = rt1;
914 goto source_ok;
915 }
916 }
917 read_unlock(&rt6_lock);
918 }
919 if (net_ratelimit())
920 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
921 "for redirect target\n");
922 goto out;
923 }
924
925 source_ok:
926
927 /*
928 * We have finally decided to accept it.
929 */
930
931 nrt = ip6_rt_copy(rt);
932 if (nrt == NULL)
933 goto out;
934
935 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
936 if (on_link)
937 nrt->rt6i_flags &= ~RTF_GATEWAY;
938
939 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
940 nrt->rt6i_dst.plen = 128;
941 nrt->u.dst.flags |= DST_HOST;
942
943 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
944 nrt->rt6i_nexthop = neigh_clone(neigh);
945 /* Reset pmtu, it may be better */
946 nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev);
947 nrt->u.dst.advmss = max_t(unsigned int, nrt->u.dst.pmtu - 60, ip6_rt_min_advmss);
948 if (rt->u.dst.advmss > 65535-20)
949 rt->u.dst.advmss = 65535;
950 nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev);
951
952 if (rt6_ins(nrt))
953 goto out;
954
955 if (rt->rt6i_flags&RTF_CACHE) {
956 ip6_del_rt(rt);
957 return;
958 }
959
960 out:
961 dst_release(&rt->u.dst);
962 return;
963 }
964
965 /*
966 * Handle ICMP "packet too big" messages
967 * i.e. Path MTU discovery
968 */
969
970 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
971 struct net_device *dev, u32 pmtu)
972 {
973 struct rt6_info *rt, *nrt;
974
975 if (pmtu < IPV6_MIN_MTU) {
976 if (net_ratelimit())
977 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
978 pmtu);
979 return;
980 }
981
982 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
983
984 if (rt == NULL)
985 return;
986
987 if (pmtu >= rt->u.dst.pmtu)
988 goto out;
989
990 /* New mtu received -> path was valid.
991 They are sent only in response to data packets,
992 so that this nexthop apparently is reachable. --ANK
993 */
994 dst_confirm(&rt->u.dst);
995
996 /* Host route. If it is static, it would be better
997 not to override it, but add new one, so that
998 when cache entry will expire old pmtu
999 would return automatically.
1000 */
1001 if (rt->rt6i_flags & RTF_CACHE) {
1002 rt->u.dst.pmtu = pmtu;
1003 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1004 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1005 goto out;
1006 }
1007
1008 /* Network route.
1009 Two cases are possible:
1010 1. It is connected route. Action: COW
1011 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1012 */
1013 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1014 nrt = rt6_cow(rt, daddr, saddr);
1015 if (!nrt->u.dst.error) {
1016 nrt->u.dst.pmtu = pmtu;
1017 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1018 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1019 dst_release(&nrt->u.dst);
1020 }
1021 } else {
1022 nrt = ip6_rt_copy(rt);
1023 if (nrt == NULL)
1024 goto out;
1025 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1026 nrt->rt6i_dst.plen = 128;
1027 nrt->u.dst.flags |= DST_HOST;
1028 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1029 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1030 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1031 nrt->u.dst.pmtu = pmtu;
1032 rt6_ins(nrt);
1033 }
1034
1035 out:
1036 dst_release(&rt->u.dst);
1037 }
1038
1039 /*
1040 * Misc support functions
1041 */
1042
1043 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1044 {
1045 struct rt6_info *rt;
1046
1047 rt = dst_alloc(&ip6_dst_ops);
1048
1049 if (rt) {
1050 rt->u.dst.input = ort->u.dst.input;
1051 rt->u.dst.output = ort->u.dst.output;
1052
1053 memcpy(&rt->u.dst.mxlock, &ort->u.dst.mxlock, RTAX_MAX*sizeof(unsigned));
1054 rt->u.dst.dev = ort->u.dst.dev;
1055 if (rt->u.dst.dev)
1056 dev_hold(rt->u.dst.dev);
1057 rt->u.dst.lastuse = jiffies;
1058 rt->rt6i_hoplimit = ort->rt6i_hoplimit;
1059 rt->rt6i_expires = 0;
1060
1061 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1062 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1063 rt->rt6i_metric = 0;
1064
1065 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1066 #ifdef CONFIG_IPV6_SUBTREES
1067 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1068 #endif
1069 }
1070 return rt;
1071 }
1072
1073 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1074 {
1075 struct rt6_info *rt;
1076 struct fib6_node *fn;
1077
1078 fn = &ip6_routing_table;
1079
1080 write_lock_bh(&rt6_lock);
1081 for (rt = fn->leaf; rt; rt=rt->u.next) {
1082 if (dev == rt->rt6i_dev &&
1083 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1084 break;
1085 }
1086 if (rt)
1087 dst_clone(&rt->u.dst);
1088 write_unlock_bh(&rt6_lock);
1089 return rt;
1090 }
1091
1092 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1093 struct net_device *dev)
1094 {
1095 struct in6_rtmsg rtmsg;
1096
1097 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1098 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1099 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1100 rtmsg.rtmsg_metric = 1024;
1101 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1102
1103 rtmsg.rtmsg_ifindex = dev->ifindex;
1104
1105 ip6_route_add(&rtmsg);
1106 return rt6_get_dflt_router(gwaddr, dev);
1107 }
1108
1109 void rt6_purge_dflt_routers(int last_resort)
1110 {
1111 struct rt6_info *rt;
1112 u32 flags;
1113
1114 if (last_resort)
1115 flags = RTF_ALLONLINK;
1116 else
1117 flags = RTF_DEFAULT | RTF_ADDRCONF;
1118
1119 restart:
1120 read_lock_bh(&rt6_lock);
1121 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1122 if (rt->rt6i_flags & flags) {
1123 dst_hold(&rt->u.dst);
1124
1125 spin_lock_bh(&rt6_dflt_lock);
1126 rt6_dflt_pointer = NULL;
1127 spin_unlock_bh(&rt6_dflt_lock);
1128
1129 read_unlock_bh(&rt6_lock);
1130
1131 ip6_del_rt(rt);
1132
1133 goto restart;
1134 }
1135 }
1136 read_unlock_bh(&rt6_lock);
1137 }
1138
1139 int ipv6_route_ioctl(unsigned int cmd, void *arg)
1140 {
1141 struct in6_rtmsg rtmsg;
1142 int err;
1143
1144 switch(cmd) {
1145 case SIOCADDRT: /* Add a route */
1146 case SIOCDELRT: /* Delete a route */
1147 if (!capable(CAP_NET_ADMIN))
1148 return -EPERM;
1149 err = copy_from_user(&rtmsg, arg,
1150 sizeof(struct in6_rtmsg));
1151 if (err)
1152 return -EFAULT;
1153
1154 rtnl_lock();
1155 switch (cmd) {
1156 case SIOCADDRT:
1157 err = ip6_route_add(&rtmsg);
1158 break;
1159 case SIOCDELRT:
1160 err = ip6_route_del(&rtmsg);
1161 break;
1162 default:
1163 err = -EINVAL;
1164 }
1165 rtnl_unlock();
1166
1167 return err;
1168 };
1169
1170 return -EINVAL;
1171 }
1172
1173 /*
1174 * Drop the packet on the floor
1175 */
1176
1177 int ip6_pkt_discard(struct sk_buff *skb)
1178 {
1179 IP6_INC_STATS(Ip6OutNoRoutes);
1180 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
1181 kfree_skb(skb);
1182 return 0;
1183 }
1184
1185 /*
1186 * Add address
1187 */
1188
1189 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev)
1190 {
1191 struct rt6_info *rt;
1192
1193 rt = dst_alloc(&ip6_dst_ops);
1194 if (rt == NULL)
1195 return -ENOMEM;
1196
1197 rt->u.dst.flags = DST_HOST;
1198 rt->u.dst.input = ip6_input;
1199 rt->u.dst.output = ip6_output;
1200 rt->rt6i_dev = dev_get_by_name("lo");
1201 rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev);
1202 rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
1203 if (rt->u.dst.advmss > 65535-20)
1204 rt->u.dst.advmss = 65535;
1205 rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev);
1206 rt->u.dst.obsolete = -1;
1207
1208 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1209 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1210 if (rt->rt6i_nexthop == NULL) {
1211 dst_free((struct dst_entry *) rt);
1212 return -ENOMEM;
1213 }
1214
1215 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1216 rt->rt6i_dst.plen = 128;
1217 rt6_ins(rt);
1218
1219 return 0;
1220 }
1221
1222 /* Delete address. Warning: you should check that this address
1223 disappeared before calling this function.
1224 */
1225
1226 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1227 {
1228 struct rt6_info *rt;
1229 int err = -ENOENT;
1230
1231 rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1232 if (rt) {
1233 if (rt->rt6i_dst.plen == 128)
1234 err = ip6_del_rt(rt);
1235 else
1236 dst_release(&rt->u.dst);
1237 }
1238
1239 return err;
1240 }
1241
1242 #ifdef CONFIG_RT6_POLICY
1243
1244 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb)
1245 {
1246 struct flow_filter *frule;
1247 struct pkt_filter *filter;
1248 int res = 1;
1249
1250 if ((frule = rt->rt6i_filter) == NULL)
1251 goto out;
1252
1253 if (frule->type != FLR_INPUT) {
1254 res = 0;
1255 goto out;
1256 }
1257
1258 for (filter = frule->u.filter; filter; filter = filter->next) {
1259 __u32 *word;
1260
1261 word = (__u32 *) skb->h.raw;
1262 word += filter->offset;
1263
1264 if ((*word ^ filter->value) & filter->mask) {
1265 res = 0;
1266 break;
1267 }
1268 }
1269
1270 out:
1271 return res;
1272 }
1273
1274 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk)
1275 {
1276 struct flow_filter *frule;
1277 int res = 1;
1278
1279 if ((frule = rt->rt6i_filter) == NULL)
1280 goto out;
1281
1282 if (frule->type != FLR_INPUT) {
1283 res = 0;
1284 goto out;
1285 }
1286
1287 if (frule->u.sk != sk)
1288 res = 0;
1289 out:
1290 return res;
1291 }
1292
1293 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
1294 struct in6_addr *daddr,
1295 struct in6_addr *saddr,
1296 struct fl_acc_args *args)
1297 {
1298 struct flow_rule *frule;
1299 struct rt6_info *nrt = NULL;
1300 struct pol_chain *pol;
1301
1302 for (pol = rt6_pol_list; pol; pol = pol->next) {
1303 struct fib6_node *fn;
1304 struct rt6_info *sprt;
1305
1306 fn = fib6_lookup(pol->rules, daddr, saddr);
1307
1308 do {
1309 for (sprt = fn->leaf; sprt; sprt=sprt->u.next) {
1310 int res;
1311
1312 frule = sprt->rt6i_flowr;
1313 #if RT6_DEBUG >= 2
1314 if (frule == NULL) {
1315 printk(KERN_DEBUG "NULL flowr\n");
1316 goto error;
1317 }
1318 #endif
1319 res = frule->ops->accept(rt, sprt, args, &nrt);
1320
1321 switch (res) {
1322 case FLOWR_SELECT:
1323 goto found;
1324 case FLOWR_CLEAR:
1325 goto next_policy;
1326 case FLOWR_NODECISION:
1327 break;
1328 default:
1329 goto error;
1330 };
1331 }
1332
1333 fn = fn->parent;
1334
1335 } while ((fn->fn_flags & RTN_TL_ROOT) == 0);
1336
1337 next_policy:
1338 }
1339
1340 error:
1341 dst_clone(&ip6_null_entry.u.dst);
1342 return &ip6_null_entry;
1343
1344 found:
1345 if (nrt == NULL)
1346 goto error;
1347
1348 nrt->rt6i_flags |= RTF_CACHE;
1349 dst_clone(&nrt->u.dst);
1350 err = rt6_ins(nrt);
1351 if (err)
1352 nrt->u.dst.error = err;
1353 return nrt;
1354 }
1355 #endif
1356
1357 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1358 {
1359 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1360 rt != &ip6_null_entry) {
1361 RT6_TRACE("deleted by ifdown %p\n", rt);
1362 return -1;
1363 }
1364 return 0;
1365 }
1366
1367 void rt6_ifdown(struct net_device *dev)
1368 {
1369 write_lock_bh(&rt6_lock);
1370 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1371 write_unlock_bh(&rt6_lock);
1372 }
1373
1374 struct rt6_mtu_change_arg
1375 {
1376 struct net_device *dev;
1377 unsigned mtu;
1378 };
1379
1380 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1381 {
1382 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1383
1384 /* In IPv6 pmtu discovery is not optional,
1385 so that RTAX_MTU lock cannot disable it.
1386 We still use this lock to block changes
1387 caused by addrconf/ndisc.
1388 */
1389 if (rt->rt6i_dev == arg->dev &&
1390 rt->u.dst.pmtu > arg->mtu &&
1391 !(rt->u.dst.mxlock&(1<<RTAX_MTU)))
1392 rt->u.dst.pmtu = arg->mtu;
1393 rt->u.dst.advmss = max_t(unsigned int, arg->mtu - 60, ip6_rt_min_advmss);
1394 if (rt->u.dst.advmss > 65535-20)
1395 rt->u.dst.advmss = 65535;
1396 return 0;
1397 }
1398
1399 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1400 {
1401 struct rt6_mtu_change_arg arg;
1402
1403 arg.dev = dev;
1404 arg.mtu = mtu;
1405 read_lock_bh(&rt6_lock);
1406 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1407 read_unlock_bh(&rt6_lock);
1408 }
1409
1410 #ifdef CONFIG_RTNETLINK
1411
1412 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1413 struct in6_rtmsg *rtmsg)
1414 {
1415 memset(rtmsg, 0, sizeof(*rtmsg));
1416
1417 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1418 rtmsg->rtmsg_src_len = r->rtm_src_len;
1419 rtmsg->rtmsg_flags = RTF_UP;
1420 if (r->rtm_type == RTN_UNREACHABLE)
1421 rtmsg->rtmsg_flags |= RTF_REJECT;
1422
1423 if (rta[RTA_GATEWAY-1]) {
1424 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1425 return -EINVAL;
1426 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1427 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1428 }
1429 if (rta[RTA_DST-1]) {
1430 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1431 return -EINVAL;
1432 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1433 }
1434 if (rta[RTA_SRC-1]) {
1435 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1436 return -EINVAL;
1437 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1438 }
1439 if (rta[RTA_OIF-1]) {
1440 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1441 return -EINVAL;
1442 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1443 }
1444 if (rta[RTA_PRIORITY-1]) {
1445 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1446 return -EINVAL;
1447 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1448 }
1449 return 0;
1450 }
1451
1452 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1453 {
1454 struct rtmsg *r = NLMSG_DATA(nlh);
1455 struct in6_rtmsg rtmsg;
1456
1457 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1458 return -EINVAL;
1459 return ip6_route_del(&rtmsg);
1460 }
1461
1462 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1463 {
1464 struct rtmsg *r = NLMSG_DATA(nlh);
1465 struct in6_rtmsg rtmsg;
1466
1467 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1468 return -EINVAL;
1469 return ip6_route_add(&rtmsg);
1470 }
1471
1472 struct rt6_rtnl_dump_arg
1473 {
1474 struct sk_buff *skb;
1475 struct netlink_callback *cb;
1476 };
1477
1478 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1479 struct in6_addr *dst,
1480 struct in6_addr *src,
1481 int iif,
1482 int type, u32 pid, u32 seq)
1483 {
1484 struct rtmsg *rtm;
1485 struct nlmsghdr *nlh;
1486 unsigned char *b = skb->tail;
1487 struct rta_cacheinfo ci;
1488
1489 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1490 rtm = NLMSG_DATA(nlh);
1491 rtm->rtm_family = AF_INET6;
1492 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1493 rtm->rtm_src_len = rt->rt6i_src.plen;
1494 rtm->rtm_tos = 0;
1495 rtm->rtm_table = RT_TABLE_MAIN;
1496 if (rt->rt6i_flags&RTF_REJECT)
1497 rtm->rtm_type = RTN_UNREACHABLE;
1498 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1499 rtm->rtm_type = RTN_LOCAL;
1500 else
1501 rtm->rtm_type = RTN_UNICAST;
1502 rtm->rtm_flags = 0;
1503 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1504 rtm->rtm_protocol = RTPROT_BOOT;
1505 if (rt->rt6i_flags&RTF_DYNAMIC)
1506 rtm->rtm_protocol = RTPROT_REDIRECT;
1507 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1508 rtm->rtm_protocol = RTPROT_KERNEL;
1509 else if (rt->rt6i_flags&RTF_DEFAULT)
1510 rtm->rtm_protocol = RTPROT_RA;
1511
1512 if (rt->rt6i_flags&RTF_CACHE)
1513 rtm->rtm_flags |= RTM_F_CLONED;
1514
1515 if (dst) {
1516 RTA_PUT(skb, RTA_DST, 16, dst);
1517 rtm->rtm_dst_len = 128;
1518 } else if (rtm->rtm_dst_len)
1519 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1520 #ifdef CONFIG_IPV6_SUBTREES
1521 if (src) {
1522 RTA_PUT(skb, RTA_SRC, 16, src);
1523 rtm->rtm_src_len = 128;
1524 } else if (rtm->rtm_src_len)
1525 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1526 #endif
1527 if (iif)
1528 RTA_PUT(skb, RTA_IIF, 4, &iif);
1529 else if (dst) {
1530 struct in6_addr saddr_buf;
1531 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1532 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1533 }
1534 if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0)
1535 goto rtattr_failure;
1536 if (rt->u.dst.neighbour)
1537 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1538 if (rt->u.dst.dev)
1539 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1540 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1541 ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
1542 if (rt->rt6i_expires)
1543 ci.rta_expires = rt->rt6i_expires - jiffies;
1544 else
1545 ci.rta_expires = 0;
1546 ci.rta_used = rt->u.dst.__use;
1547 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1548 ci.rta_error = rt->u.dst.error;
1549 ci.rta_id = 0;
1550 ci.rta_ts = 0;
1551 ci.rta_tsage = 0;
1552 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1553 nlh->nlmsg_len = skb->tail - b;
1554 return skb->len;
1555
1556 nlmsg_failure:
1557 rtattr_failure:
1558 skb_trim(skb, b - skb->data);
1559 return -1;
1560 }
1561
1562 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1563 {
1564 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1565
1566 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1567 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq);
1568 }
1569
1570 static int fib6_dump_node(struct fib6_walker_t *w)
1571 {
1572 int res;
1573 struct rt6_info *rt;
1574
1575 for (rt = w->leaf; rt; rt = rt->u.next) {
1576 res = rt6_dump_route(rt, w->args);
1577 if (res < 0) {
1578 /* Frame is full, suspend walking */
1579 w->leaf = rt;
1580 return 1;
1581 }
1582 BUG_TRAP(res!=0);
1583 }
1584 w->leaf = NULL;
1585 return 0;
1586 }
1587
1588 static void fib6_dump_end(struct netlink_callback *cb)
1589 {
1590 struct fib6_walker_t *w = (void*)cb->args[0];
1591
1592 if (w) {
1593 cb->args[0] = 0;
1594 fib6_walker_unlink(w);
1595 kfree(w);
1596 }
1597 if (cb->args[1]) {
1598 cb->done = (void*)cb->args[1];
1599 cb->args[1] = 0;
1600 }
1601 }
1602
1603 static int fib6_dump_done(struct netlink_callback *cb)
1604 {
1605 fib6_dump_end(cb);
1606 return cb->done(cb);
1607 }
1608
1609 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1610 {
1611 struct rt6_rtnl_dump_arg arg;
1612 struct fib6_walker_t *w;
1613 int res;
1614
1615 arg.skb = skb;
1616 arg.cb = cb;
1617
1618 w = (void*)cb->args[0];
1619 if (w == NULL) {
1620 /* New dump:
1621 *
1622 * 1. hook callback destructor.
1623 */
1624 cb->args[1] = (long)cb->done;
1625 cb->done = fib6_dump_done;
1626
1627 /*
1628 * 2. allocate and initialize walker.
1629 */
1630 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1631 if (w == NULL)
1632 return -ENOMEM;
1633 RT6_TRACE("dump<%p", w);
1634 memset(w, 0, sizeof(*w));
1635 w->root = &ip6_routing_table;
1636 w->func = fib6_dump_node;
1637 w->args = &arg;
1638 cb->args[0] = (long)w;
1639 read_lock_bh(&rt6_lock);
1640 res = fib6_walk(w);
1641 read_unlock_bh(&rt6_lock);
1642 } else {
1643 w->args = &arg;
1644 read_lock_bh(&rt6_lock);
1645 res = fib6_walk_continue(w);
1646 read_unlock_bh(&rt6_lock);
1647 }
1648 #if RT6_DEBUG >= 3
1649 if (res <= 0 && skb->len == 0)
1650 RT6_TRACE("%p>dump end\n", w);
1651 #endif
1652 res = res < 0 ? res : skb->len;
1653 /* res < 0 is an error. (really, impossible)
1654 res == 0 means that dump is complete, but skb still can contain data.
1655 res > 0 dump is not complete, but frame is full.
1656 */
1657 /* Destroy walker, if dump of this table is complete. */
1658 if (res <= 0)
1659 fib6_dump_end(cb);
1660 return res;
1661 }
1662
1663 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1664 {
1665 struct rtattr **rta = arg;
1666 int iif = 0;
1667 int err;
1668 struct sk_buff *skb;
1669 struct flowi fl;
1670 struct rt6_info *rt;
1671
1672 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1673 if (skb == NULL)
1674 return -ENOBUFS;
1675
1676 /* Reserve room for dummy headers, this skb can pass
1677 through good chunk of routing engine.
1678 */
1679 skb->mac.raw = skb->data;
1680 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1681
1682 fl.proto = 0;
1683 fl.nl_u.ip6_u.daddr = NULL;
1684 fl.nl_u.ip6_u.saddr = NULL;
1685 fl.uli_u.icmpt.type = 0;
1686 fl.uli_u.icmpt.code = 0;
1687 if (rta[RTA_SRC-1])
1688 fl.nl_u.ip6_u.saddr = (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]);
1689 if (rta[RTA_DST-1])
1690 fl.nl_u.ip6_u.daddr = (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]);
1691
1692 if (rta[RTA_IIF-1])
1693 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1694
1695 if (iif) {
1696 struct net_device *dev;
1697 dev = __dev_get_by_index(iif);
1698 if (!dev)
1699 return -ENODEV;
1700 }
1701
1702 fl.oif = 0;
1703 if (rta[RTA_OIF-1])
1704 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1705
1706 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1707
1708 skb->dst = &rt->u.dst;
1709
1710 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1711 err = rt6_fill_node(skb, rt,
1712 fl.nl_u.ip6_u.daddr,
1713 fl.nl_u.ip6_u.saddr,
1714 iif,
1715 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq);
1716 if (err < 0)
1717 return -EMSGSIZE;
1718
1719 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1720 if (err < 0)
1721 return err;
1722 return 0;
1723 }
1724
1725 void inet6_rt_notify(int event, struct rt6_info *rt)
1726 {
1727 struct sk_buff *skb;
1728 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1729
1730 skb = alloc_skb(size, gfp_any());
1731 if (!skb) {
1732 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1733 return;
1734 }
1735 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0) < 0) {
1736 kfree_skb(skb);
1737 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1738 return;
1739 }
1740 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1741 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1742 }
1743
1744 #endif
1745
1746 /*
1747 * /proc
1748 */
1749
1750 #ifdef CONFIG_PROC_FS
1751
1752 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1753
1754 struct rt6_proc_arg
1755 {
1756 char *buffer;
1757 int offset;
1758 int length;
1759 int skip;
1760 int len;
1761 };
1762
1763 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1764 {
1765 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1766 int i;
1767
1768 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1769 arg->skip++;
1770 return 0;
1771 }
1772
1773 if (arg->len >= arg->length)
1774 return 0;
1775
1776 for (i=0; i<16; i++) {
1777 sprintf(arg->buffer + arg->len, "%02x",
1778 rt->rt6i_dst.addr.s6_addr[i]);
1779 arg->len += 2;
1780 }
1781 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1782 rt->rt6i_dst.plen);
1783
1784 #ifdef CONFIG_IPV6_SUBTREES
1785 for (i=0; i<16; i++) {
1786 sprintf(arg->buffer + arg->len, "%02x",
1787 rt->rt6i_src.addr.s6_addr[i]);
1788 arg->len += 2;
1789 }
1790 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1791 rt->rt6i_src.plen);
1792 #else
1793 sprintf(arg->buffer + arg->len,
1794 "00000000000000000000000000000000 00 ");
1795 arg->len += 36;
1796 #endif
1797
1798 if (rt->rt6i_nexthop) {
1799 for (i=0; i<16; i++) {
1800 sprintf(arg->buffer + arg->len, "%02x",
1801 rt->rt6i_nexthop->primary_key[i]);
1802 arg->len += 2;
1803 }
1804 } else {
1805 sprintf(arg->buffer + arg->len,
1806 "00000000000000000000000000000000");
1807 arg->len += 32;
1808 }
1809 arg->len += sprintf(arg->buffer + arg->len,
1810 " %08x %08x %08x %08x %8s\n",
1811 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1812 rt->u.dst.__use, rt->rt6i_flags,
1813 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1814 return 0;
1815 }
1816
1817 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1818 {
1819 struct rt6_proc_arg arg;
1820 arg.buffer = buffer;
1821 arg.offset = offset;
1822 arg.length = length;
1823 arg.skip = 0;
1824 arg.len = 0;
1825
1826 read_lock_bh(&rt6_lock);
1827 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1828 read_unlock_bh(&rt6_lock);
1829
1830 *start = buffer;
1831 if (offset)
1832 *start += offset % RT6_INFO_LEN;
1833
1834 arg.len -= offset % RT6_INFO_LEN;
1835
1836 if (arg.len > length)
1837 arg.len = length;
1838 if (arg.len < 0)
1839 arg.len = 0;
1840
1841 return arg.len;
1842 }
1843
1844 extern struct rt6_statistics rt6_stats;
1845
1846 static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length)
1847 {
1848 int len;
1849
1850 len = sprintf(buffer, "%04x %04x %04x %04x %04x %04x\n",
1851 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1852 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1853 rt6_stats.fib_rt_cache,
1854 atomic_read(&ip6_dst_ops.entries));
1855
1856 len -= offset;
1857
1858 if (len > length)
1859 len = length;
1860 if(len < 0)
1861 len = 0;
1862
1863 *start = buffer + offset;
1864
1865 return len;
1866 }
1867 #endif /* CONFIG_PROC_FS */
1868
1869 #ifdef CONFIG_SYSCTL
1870
1871 static int flush_delay;
1872
1873 static
1874 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1875 void *buffer, size_t *lenp)
1876 {
1877 if (write) {
1878 proc_dointvec(ctl, write, filp, buffer, lenp);
1879 if (flush_delay < 0)
1880 flush_delay = 0;
1881 fib6_run_gc((unsigned long)flush_delay);
1882 return 0;
1883 } else
1884 return -EINVAL;
1885 }
1886
1887 ctl_table ipv6_route_table[] = {
1888 {NET_IPV6_ROUTE_FLUSH, "flush",
1889 &flush_delay, sizeof(int), 0644, NULL,
1890 &ipv6_sysctl_rtcache_flush},
1891 {NET_IPV6_ROUTE_GC_THRESH, "gc_thresh",
1892 &ip6_dst_ops.gc_thresh, sizeof(int), 0644, NULL,
1893 &proc_dointvec},
1894 {NET_IPV6_ROUTE_MAX_SIZE, "max_size",
1895 &ip6_rt_max_size, sizeof(int), 0644, NULL,
1896 &proc_dointvec},
1897 {NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval",
1898 &ip6_rt_gc_min_interval, sizeof(int), 0644, NULL,
1899 &proc_dointvec_jiffies, &sysctl_jiffies},
1900 {NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout",
1901 &ip6_rt_gc_timeout, sizeof(int), 0644, NULL,
1902 &proc_dointvec_jiffies, &sysctl_jiffies},
1903 {NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval",
1904 &ip6_rt_gc_interval, sizeof(int), 0644, NULL,
1905 &proc_dointvec_jiffies, &sysctl_jiffies},
1906 {NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity",
1907 &ip6_rt_gc_elasticity, sizeof(int), 0644, NULL,
1908 &proc_dointvec_jiffies, &sysctl_jiffies},
1909 {NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires",
1910 &ip6_rt_mtu_expires, sizeof(int), 0644, NULL,
1911 &proc_dointvec_jiffies, &sysctl_jiffies},
1912 {NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss",
1913 &ip6_rt_min_advmss, sizeof(int), 0644, NULL,
1914 &proc_dointvec_jiffies, &sysctl_jiffies},
1915 {0}
1916 };
1917
1918 #endif
1919
1920
1921 void __init ip6_route_init(void)
1922 {
1923 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
1924 sizeof(struct rt6_info),
1925 0, SLAB_HWCACHE_ALIGN,
1926 NULL, NULL);
1927 fib6_init();
1928 #ifdef CONFIG_PROC_FS
1929 proc_net_create("ipv6_route", 0, rt6_proc_info);
1930 proc_net_create("rt6_stats", 0, rt6_proc_stats);
1931 #endif
1932 }
1933
1934 #ifdef MODULE
1935 void ip6_route_cleanup(void)
1936 {
1937 #ifdef CONFIG_PROC_FS
1938 proc_net_remove("ipv6_route");
1939 proc_net_remove("rt6_stats");
1940 #endif
1941
1942 rt6_ifdown(NULL);
1943 fib6_gc_cleanup();
1944 }
1945 #endif /* MODULE */
1946