File: /usr/src/linux/net/ipv4/ipmr.c
1 /*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
4 * (c) 1995 Alan Cox, <alan@redhat.com>
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Version: $Id: ipmr.c,v 1.64 2001/09/18 22:29:09 davem Exp $
13 *
14 * Fixes:
15 * Michael Chastain : Incorrect size of copying.
16 * Alan Cox : Added the cache manager code
17 * Alan Cox : Fixed the clone/copy bug and device race.
18 * Mike McLagan : Routing by source
19 * Malcolm Beattie : Buffer handling fixes.
20 * Alexey Kuznetsov : Double buffer free and other fixes.
21 * SVR Anand : Fixed several multicast bugs and problems.
22 * Alexey Kuznetsov : Status, optimisations and more.
23 * Brad Parker : Better behaviour on mrouted upcall
24 * overflow.
25 * Carlos Picoto : PIMv1 Support
26 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
27 * Relax this requrement to work with older peers.
28 *
29 */
30
31 #include <linux/config.h>
32 #include <asm/system.h>
33 #include <asm/uaccess.h>
34 #include <linux/types.h>
35 #include <linux/sched.h>
36 #include <linux/errno.h>
37 #include <linux/timer.h>
38 #include <linux/mm.h>
39 #include <linux/kernel.h>
40 #include <linux/fcntl.h>
41 #include <linux/stat.h>
42 #include <linux/socket.h>
43 #include <linux/in.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/inetdevice.h>
47 #include <linux/igmp.h>
48 #include <linux/proc_fs.h>
49 #include <linux/mroute.h>
50 #include <linux/init.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/sock.h>
55 #include <net/icmp.h>
56 #include <net/udp.h>
57 #include <net/raw.h>
58 #include <linux/notifier.h>
59 #include <linux/if_arp.h>
60 #include <linux/netfilter_ipv4.h>
61 #include <net/ipip.h>
62 #include <net/checksum.h>
63
64 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
65 #define CONFIG_IP_PIMSM 1
66 #endif
67
68 static struct sock *mroute_socket;
69
70
71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
72 Note that the changes are semaphored via rtnl_lock.
73 */
74
75 static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
76
77 /*
78 * Multicast router control variables
79 */
80
81 static struct vif_device vif_table[MAXVIFS]; /* Devices */
82 static int maxvif;
83
84 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
85
86 int mroute_do_assert; /* Set in PIM assert */
87 int mroute_do_pim;
88
89 static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
90
91 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
92 atomic_t cache_resolve_queue_len; /* Size of unresolved */
93
94 /* Special spinlock for queue of unresolved entries */
95 static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
96
97 /* We return to original Alan's scheme. Hash table of resolved
98 entries is changed only in process context and protected
99 with weak lock mrt_lock. Queue of unresolved entries is protected
100 with strong spinlock mfc_unres_lock.
101
102 In this case data path is free of exclusive locks at all.
103 */
104
105 kmem_cache_t *mrt_cachep;
106
107 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
108 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
109 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
110
111 extern struct inet_protocol pim_protocol;
112
113 static struct timer_list ipmr_expire_timer;
114
115 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
116
117 static
118 struct net_device *ipmr_new_tunnel(struct vifctl *v)
119 {
120 struct net_device *dev;
121
122 dev = __dev_get_by_name("tunl0");
123
124 if (dev) {
125 int err;
126 struct ifreq ifr;
127 mm_segment_t oldfs;
128 struct ip_tunnel_parm p;
129 struct in_device *in_dev;
130
131 memset(&p, 0, sizeof(p));
132 p.iph.daddr = v->vifc_rmt_addr.s_addr;
133 p.iph.saddr = v->vifc_lcl_addr.s_addr;
134 p.iph.version = 4;
135 p.iph.ihl = 5;
136 p.iph.protocol = IPPROTO_IPIP;
137 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138 ifr.ifr_ifru.ifru_data = (void*)&p;
139
140 oldfs = get_fs(); set_fs(KERNEL_DS);
141 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
142 set_fs(oldfs);
143
144 dev = NULL;
145
146 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
147 dev->flags |= IFF_MULTICAST;
148
149 in_dev = __in_dev_get(dev);
150 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
151 goto failure;
152 in_dev->cnf.rp_filter = 0;
153
154 if (dev_open(dev))
155 goto failure;
156 }
157 }
158 return dev;
159
160 failure:
161 unregister_netdevice(dev);
162 return NULL;
163 }
164
165 #ifdef CONFIG_IP_PIMSM
166
167 static int reg_vif_num = -1;
168
169 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
170 {
171 read_lock(&mrt_lock);
172 ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
173 ((struct net_device_stats*)dev->priv)->tx_packets++;
174 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
175 read_unlock(&mrt_lock);
176 kfree_skb(skb);
177 return 0;
178 }
179
180 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
181 {
182 return (struct net_device_stats*)dev->priv;
183 }
184
185 static
186 struct net_device *ipmr_reg_vif(struct vifctl *v)
187 {
188 struct net_device *dev;
189 struct in_device *in_dev;
190 int size;
191
192 size = sizeof(*dev) + sizeof(struct net_device_stats);
193 dev = kmalloc(size, GFP_KERNEL);
194 if (!dev)
195 return NULL;
196
197 memset(dev, 0, size);
198
199 dev->priv = dev + 1;
200
201 strcpy(dev->name, "pimreg");
202
203 dev->type = ARPHRD_PIMREG;
204 dev->mtu = 1500 - sizeof(struct iphdr) - 8;
205 dev->flags = IFF_NOARP;
206 dev->hard_start_xmit = reg_vif_xmit;
207 dev->get_stats = reg_vif_get_stats;
208 dev->features |= NETIF_F_DYNALLOC;
209
210 if (register_netdevice(dev)) {
211 kfree(dev);
212 return NULL;
213 }
214 dev->iflink = 0;
215
216 if ((in_dev = inetdev_init(dev)) == NULL)
217 goto failure;
218
219 in_dev->cnf.rp_filter = 0;
220
221 if (dev_open(dev))
222 goto failure;
223
224 return dev;
225
226 failure:
227 unregister_netdevice(dev);
228 return NULL;
229 }
230 #endif
231
232 /*
233 * Delete a VIF entry
234 */
235
236 static int vif_delete(int vifi)
237 {
238 struct vif_device *v;
239 struct net_device *dev;
240 struct in_device *in_dev;
241
242 if (vifi < 0 || vifi >= maxvif)
243 return -EADDRNOTAVAIL;
244
245 v = &vif_table[vifi];
246
247 write_lock_bh(&mrt_lock);
248 dev = v->dev;
249 v->dev = NULL;
250
251 if (!dev) {
252 write_unlock_bh(&mrt_lock);
253 return -EADDRNOTAVAIL;
254 }
255
256 #ifdef CONFIG_IP_PIMSM
257 if (vifi == reg_vif_num)
258 reg_vif_num = -1;
259 #endif
260
261 if (vifi+1 == maxvif) {
262 int tmp;
263 for (tmp=vifi-1; tmp>=0; tmp--) {
264 if (VIF_EXISTS(tmp))
265 break;
266 }
267 maxvif = tmp+1;
268 }
269
270 write_unlock_bh(&mrt_lock);
271
272 dev_set_allmulti(dev, -1);
273
274 if ((in_dev = __in_dev_get(dev)) != NULL) {
275 in_dev->cnf.mc_forwarding--;
276 ip_rt_multicast_event(in_dev);
277 }
278
279 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
280 unregister_netdevice(dev);
281
282 dev_put(dev);
283 return 0;
284 }
285
286 /* Destroy an unresolved cache entry, killing queued skbs
287 and reporting error to netlink readers.
288 */
289
290 static void ipmr_destroy_unres(struct mfc_cache *c)
291 {
292 struct sk_buff *skb;
293
294 atomic_dec(&cache_resolve_queue_len);
295
296 while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
297 #ifdef CONFIG_RTNETLINK
298 if (skb->nh.iph->version == 0) {
299 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
300 nlh->nlmsg_type = NLMSG_ERROR;
301 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
302 skb_trim(skb, nlh->nlmsg_len);
303 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
304 netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
305 } else
306 #endif
307 kfree_skb(skb);
308 }
309
310 kmem_cache_free(mrt_cachep, c);
311 }
312
313
314 /* Single timer process for all the unresolved queue. */
315
316 void ipmr_expire_process(unsigned long dummy)
317 {
318 unsigned long now;
319 unsigned long expires;
320 struct mfc_cache *c, **cp;
321
322 if (!spin_trylock(&mfc_unres_lock)) {
323 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
324 return;
325 }
326
327 if (atomic_read(&cache_resolve_queue_len) == 0)
328 goto out;
329
330 now = jiffies;
331 expires = 10*HZ;
332 cp = &mfc_unres_queue;
333
334 while ((c=*cp) != NULL) {
335 long interval = c->mfc_un.unres.expires - now;
336
337 if (interval > 0) {
338 if (interval < expires)
339 expires = interval;
340 cp = &c->next;
341 continue;
342 }
343
344 *cp = c->next;
345
346 ipmr_destroy_unres(c);
347 }
348
349 if (atomic_read(&cache_resolve_queue_len))
350 mod_timer(&ipmr_expire_timer, jiffies + expires);
351
352 out:
353 spin_unlock(&mfc_unres_lock);
354 }
355
356 /* Fill oifs list. It is called under write locked mrt_lock. */
357
358 static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
359 {
360 int vifi;
361
362 cache->mfc_un.res.minvif = MAXVIFS;
363 cache->mfc_un.res.maxvif = 0;
364 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
365
366 for (vifi=0; vifi<maxvif; vifi++) {
367 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
368 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
369 if (cache->mfc_un.res.minvif > vifi)
370 cache->mfc_un.res.minvif = vifi;
371 if (cache->mfc_un.res.maxvif <= vifi)
372 cache->mfc_un.res.maxvif = vifi + 1;
373 }
374 }
375 }
376
377 static int vif_add(struct vifctl *vifc, int mrtsock)
378 {
379 int vifi = vifc->vifc_vifi;
380 struct vif_device *v = &vif_table[vifi];
381 struct net_device *dev;
382 struct in_device *in_dev;
383
384 /* Is vif busy ? */
385 if (VIF_EXISTS(vifi))
386 return -EADDRINUSE;
387
388 switch (vifc->vifc_flags) {
389 #ifdef CONFIG_IP_PIMSM
390 case VIFF_REGISTER:
391 /*
392 * Special Purpose VIF in PIM
393 * All the packets will be sent to the daemon
394 */
395 if (reg_vif_num >= 0)
396 return -EADDRINUSE;
397 dev = ipmr_reg_vif(vifc);
398 if (!dev)
399 return -ENOBUFS;
400 break;
401 #endif
402 case VIFF_TUNNEL:
403 dev = ipmr_new_tunnel(vifc);
404 if (!dev)
405 return -ENOBUFS;
406 break;
407 case 0:
408 dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
409 if (!dev)
410 return -EADDRNOTAVAIL;
411 __dev_put(dev);
412 break;
413 default:
414 return -EINVAL;
415 }
416
417 if ((in_dev = __in_dev_get(dev)) == NULL)
418 return -EADDRNOTAVAIL;
419 in_dev->cnf.mc_forwarding++;
420 dev_set_allmulti(dev, +1);
421 ip_rt_multicast_event(in_dev);
422
423 /*
424 * Fill in the VIF structures
425 */
426 v->rate_limit=vifc->vifc_rate_limit;
427 v->local=vifc->vifc_lcl_addr.s_addr;
428 v->remote=vifc->vifc_rmt_addr.s_addr;
429 v->flags=vifc->vifc_flags;
430 if (!mrtsock)
431 v->flags |= VIFF_STATIC;
432 v->threshold=vifc->vifc_threshold;
433 v->bytes_in = 0;
434 v->bytes_out = 0;
435 v->pkt_in = 0;
436 v->pkt_out = 0;
437 v->link = dev->ifindex;
438 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
439 v->link = dev->iflink;
440
441 /* And finish update writing critical data */
442 write_lock_bh(&mrt_lock);
443 dev_hold(dev);
444 v->dev=dev;
445 #ifdef CONFIG_IP_PIMSM
446 if (v->flags&VIFF_REGISTER)
447 reg_vif_num = vifi;
448 #endif
449 if (vifi+1 > maxvif)
450 maxvif = vifi+1;
451 write_unlock_bh(&mrt_lock);
452 return 0;
453 }
454
455 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
456 {
457 int line=MFC_HASH(mcastgrp,origin);
458 struct mfc_cache *c;
459
460 for (c=mfc_cache_array[line]; c; c = c->next) {
461 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
462 break;
463 }
464 return c;
465 }
466
467 /*
468 * Allocate a multicast cache entry
469 */
470 static struct mfc_cache *ipmr_cache_alloc(void)
471 {
472 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
473 if(c==NULL)
474 return NULL;
475 memset(c, 0, sizeof(*c));
476 c->mfc_un.res.minvif = MAXVIFS;
477 return c;
478 }
479
480 static struct mfc_cache *ipmr_cache_alloc_unres(void)
481 {
482 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
483 if(c==NULL)
484 return NULL;
485 memset(c, 0, sizeof(*c));
486 skb_queue_head_init(&c->mfc_un.unres.unresolved);
487 c->mfc_un.unres.expires = jiffies + 10*HZ;
488 return c;
489 }
490
491 /*
492 * A cache entry has gone into a resolved state from queued
493 */
494
495 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
496 {
497 struct sk_buff *skb;
498
499 /*
500 * Play the pending entries through our router
501 */
502
503 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
504 #ifdef CONFIG_RTNETLINK
505 if (skb->nh.iph->version == 0) {
506 int err;
507 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
508
509 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
510 nlh->nlmsg_len = skb->tail - (u8*)nlh;
511 } else {
512 nlh->nlmsg_type = NLMSG_ERROR;
513 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
514 skb_trim(skb, nlh->nlmsg_len);
515 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
516 }
517 err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
518 } else
519 #endif
520 ip_mr_forward(skb, c, 0);
521 }
522 }
523
524 /*
525 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
526 * expects the following bizarre scheme.
527 *
528 * Called under mrt_lock.
529 */
530
531 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
532 {
533 struct sk_buff *skb;
534 int ihl = pkt->nh.iph->ihl<<2;
535 struct igmphdr *igmp;
536 struct igmpmsg *msg;
537 int ret;
538
539 #ifdef CONFIG_IP_PIMSM
540 if (assert == IGMPMSG_WHOLEPKT)
541 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
542 else
543 #endif
544 skb = alloc_skb(128, GFP_ATOMIC);
545
546 if(!skb)
547 return -ENOBUFS;
548
549 #ifdef CONFIG_IP_PIMSM
550 if (assert == IGMPMSG_WHOLEPKT) {
551 /* Ugly, but we have no choice with this interface.
552 Duplicate old header, fix ihl, length etc.
553 And all this only to mangle msg->im_msgtype and
554 to set msg->im_mbz to "mbz" :-)
555 */
556 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
557 skb->nh.raw = skb->h.raw = (u8*)msg;
558 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
559 msg->im_msgtype = IGMPMSG_WHOLEPKT;
560 msg->im_mbz = 0;
561 msg->im_vif = reg_vif_num;
562 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
563 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
564 } else
565 #endif
566 {
567
568 /*
569 * Copy the IP header
570 */
571
572 skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
573 memcpy(skb->data,pkt->data,ihl);
574 skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */
575 msg = (struct igmpmsg*)skb->nh.iph;
576 msg->im_vif = vifi;
577 skb->dst = dst_clone(pkt->dst);
578
579 /*
580 * Add our header
581 */
582
583 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
584 igmp->type =
585 msg->im_msgtype = assert;
586 igmp->code = 0;
587 skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */
588 skb->h.raw = skb->nh.raw;
589 }
590
591 if (mroute_socket == NULL) {
592 kfree_skb(skb);
593 return -EINVAL;
594 }
595
596 /*
597 * Deliver to mrouted
598 */
599 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
600 if (net_ratelimit())
601 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
602 kfree_skb(skb);
603 }
604
605 return ret;
606 }
607
608 /*
609 * Queue a packet for resolution. It gets locked cache entry!
610 */
611
612 static int
613 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
614 {
615 int err;
616 struct mfc_cache *c;
617
618 spin_lock_bh(&mfc_unres_lock);
619 for (c=mfc_unres_queue; c; c=c->next) {
620 if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
621 c->mfc_origin == skb->nh.iph->saddr)
622 break;
623 }
624
625 if (c == NULL) {
626 /*
627 * Create a new entry if allowable
628 */
629
630 if (atomic_read(&cache_resolve_queue_len)>=10 ||
631 (c=ipmr_cache_alloc_unres())==NULL) {
632 spin_unlock_bh(&mfc_unres_lock);
633
634 kfree_skb(skb);
635 return -ENOBUFS;
636 }
637
638 /*
639 * Fill in the new cache entry
640 */
641 c->mfc_parent=-1;
642 c->mfc_origin=skb->nh.iph->saddr;
643 c->mfc_mcastgrp=skb->nh.iph->daddr;
644
645 /*
646 * Reflect first query at mrouted.
647 */
648 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
649 /* If the report failed throw the cache entry
650 out - Brad Parker
651 */
652 spin_unlock_bh(&mfc_unres_lock);
653
654 kmem_cache_free(mrt_cachep, c);
655 kfree_skb(skb);
656 return err;
657 }
658
659 atomic_inc(&cache_resolve_queue_len);
660 c->next = mfc_unres_queue;
661 mfc_unres_queue = c;
662
663 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
664 }
665
666 /*
667 * See if we can append the packet
668 */
669 if (c->mfc_un.unres.unresolved.qlen>3) {
670 kfree_skb(skb);
671 err = -ENOBUFS;
672 } else {
673 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
674 err = 0;
675 }
676
677 spin_unlock_bh(&mfc_unres_lock);
678 return err;
679 }
680
681 /*
682 * MFC cache manipulation by user space mroute daemon
683 */
684
685 int ipmr_mfc_delete(struct mfcctl *mfc)
686 {
687 int line;
688 struct mfc_cache *c, **cp;
689
690 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
691
692 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
693 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
694 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
695 write_lock_bh(&mrt_lock);
696 *cp = c->next;
697 write_unlock_bh(&mrt_lock);
698
699 kmem_cache_free(mrt_cachep, c);
700 return 0;
701 }
702 }
703 return -ENOENT;
704 }
705
706 int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
707 {
708 int line;
709 struct mfc_cache *uc, *c, **cp;
710
711 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
712
713 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
714 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
715 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
716 break;
717 }
718
719 if (c != NULL) {
720 write_lock_bh(&mrt_lock);
721 c->mfc_parent = mfc->mfcc_parent;
722 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
723 if (!mrtsock)
724 c->mfc_flags |= MFC_STATIC;
725 write_unlock_bh(&mrt_lock);
726 return 0;
727 }
728
729 if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
730 return -EINVAL;
731
732 c=ipmr_cache_alloc();
733 if (c==NULL)
734 return -ENOMEM;
735
736 c->mfc_origin=mfc->mfcc_origin.s_addr;
737 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
738 c->mfc_parent=mfc->mfcc_parent;
739 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
740 if (!mrtsock)
741 c->mfc_flags |= MFC_STATIC;
742
743 write_lock_bh(&mrt_lock);
744 c->next = mfc_cache_array[line];
745 mfc_cache_array[line] = c;
746 write_unlock_bh(&mrt_lock);
747
748 /*
749 * Check to see if we resolved a queued list. If so we
750 * need to send on the frames and tidy up.
751 */
752 spin_lock_bh(&mfc_unres_lock);
753 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
754 cp = &uc->next) {
755 if (uc->mfc_origin == c->mfc_origin &&
756 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
757 *cp = uc->next;
758 if (atomic_dec_and_test(&cache_resolve_queue_len))
759 del_timer(&ipmr_expire_timer);
760 break;
761 }
762 }
763 spin_unlock_bh(&mfc_unres_lock);
764
765 if (uc) {
766 ipmr_cache_resolve(uc, c);
767 kmem_cache_free(mrt_cachep, uc);
768 }
769 return 0;
770 }
771
772 /*
773 * Close the multicast socket, and clear the vif tables etc
774 */
775
776 static void mroute_clean_tables(struct sock *sk)
777 {
778 int i;
779
780 /*
781 * Shut down all active vif entries
782 */
783 for(i=0; i<maxvif; i++) {
784 if (!(vif_table[i].flags&VIFF_STATIC))
785 vif_delete(i);
786 }
787
788 /*
789 * Wipe the cache
790 */
791 for (i=0;i<MFC_LINES;i++) {
792 struct mfc_cache *c, **cp;
793
794 cp = &mfc_cache_array[i];
795 while ((c = *cp) != NULL) {
796 if (c->mfc_flags&MFC_STATIC) {
797 cp = &c->next;
798 continue;
799 }
800 write_lock_bh(&mrt_lock);
801 *cp = c->next;
802 write_unlock_bh(&mrt_lock);
803
804 kmem_cache_free(mrt_cachep, c);
805 }
806 }
807
808 if (atomic_read(&cache_resolve_queue_len) != 0) {
809 struct mfc_cache *c;
810
811 spin_lock_bh(&mfc_unres_lock);
812 while (mfc_unres_queue != NULL) {
813 c = mfc_unres_queue;
814 mfc_unres_queue = c->next;
815 spin_unlock_bh(&mfc_unres_lock);
816
817 ipmr_destroy_unres(c);
818
819 spin_lock_bh(&mfc_unres_lock);
820 }
821 spin_unlock_bh(&mfc_unres_lock);
822 }
823 }
824
825 static void mrtsock_destruct(struct sock *sk)
826 {
827 rtnl_lock();
828 if (sk == mroute_socket) {
829 ipv4_devconf.mc_forwarding--;
830
831 write_lock_bh(&mrt_lock);
832 mroute_socket=NULL;
833 write_unlock_bh(&mrt_lock);
834
835 mroute_clean_tables(sk);
836 }
837 rtnl_unlock();
838 }
839
840 /*
841 * Socket options and virtual interface manipulation. The whole
842 * virtual interface system is a complete heap, but unfortunately
843 * that's how BSD mrouted happens to think. Maybe one day with a proper
844 * MOSPF/PIM router set up we can clean this up.
845 */
846
847 int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
848 {
849 int ret;
850 struct vifctl vif;
851 struct mfcctl mfc;
852
853 if(optname!=MRT_INIT)
854 {
855 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
856 return -EACCES;
857 }
858
859 switch(optname)
860 {
861 case MRT_INIT:
862 if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP)
863 return -EOPNOTSUPP;
864 if(optlen!=sizeof(int))
865 return -ENOPROTOOPT;
866
867 rtnl_lock();
868 if (mroute_socket) {
869 rtnl_unlock();
870 return -EADDRINUSE;
871 }
872
873 ret = ip_ra_control(sk, 1, mrtsock_destruct);
874 if (ret == 0) {
875 write_lock_bh(&mrt_lock);
876 mroute_socket=sk;
877 write_unlock_bh(&mrt_lock);
878
879 ipv4_devconf.mc_forwarding++;
880 }
881 rtnl_unlock();
882 return ret;
883 case MRT_DONE:
884 if (sk!=mroute_socket)
885 return -EACCES;
886 return ip_ra_control(sk, 0, NULL);
887 case MRT_ADD_VIF:
888 case MRT_DEL_VIF:
889 if(optlen!=sizeof(vif))
890 return -EINVAL;
891 if (copy_from_user(&vif,optval,sizeof(vif)))
892 return -EFAULT;
893 if(vif.vifc_vifi >= MAXVIFS)
894 return -ENFILE;
895 rtnl_lock();
896 if (optname==MRT_ADD_VIF) {
897 ret = vif_add(&vif, sk==mroute_socket);
898 } else {
899 ret = vif_delete(vif.vifc_vifi);
900 }
901 rtnl_unlock();
902 return ret;
903
904 /*
905 * Manipulate the forwarding caches. These live
906 * in a sort of kernel/user symbiosis.
907 */
908 case MRT_ADD_MFC:
909 case MRT_DEL_MFC:
910 if(optlen!=sizeof(mfc))
911 return -EINVAL;
912 if (copy_from_user(&mfc,optval, sizeof(mfc)))
913 return -EFAULT;
914 rtnl_lock();
915 if (optname==MRT_DEL_MFC)
916 ret = ipmr_mfc_delete(&mfc);
917 else
918 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
919 rtnl_unlock();
920 return ret;
921 /*
922 * Control PIM assert.
923 */
924 case MRT_ASSERT:
925 {
926 int v;
927 if(get_user(v,(int *)optval))
928 return -EFAULT;
929 mroute_do_assert=(v)?1:0;
930 return 0;
931 }
932 #ifdef CONFIG_IP_PIMSM
933 case MRT_PIM:
934 {
935 int v;
936 if(get_user(v,(int *)optval))
937 return -EFAULT;
938 v = (v)?1:0;
939 rtnl_lock();
940 if (v != mroute_do_pim) {
941 mroute_do_pim = v;
942 mroute_do_assert = v;
943 #ifdef CONFIG_IP_PIMSM_V2
944 if (mroute_do_pim)
945 inet_add_protocol(&pim_protocol);
946 else
947 inet_del_protocol(&pim_protocol);
948 #endif
949 }
950 rtnl_unlock();
951 return 0;
952 }
953 #endif
954 /*
955 * Spurious command, or MRT_VERSION which you cannot
956 * set.
957 */
958 default:
959 return -ENOPROTOOPT;
960 }
961 }
962
963 /*
964 * Getsock opt support for the multicast routing system.
965 */
966
967 int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
968 {
969 int olr;
970 int val;
971
972 if(optname!=MRT_VERSION &&
973 #ifdef CONFIG_IP_PIMSM
974 optname!=MRT_PIM &&
975 #endif
976 optname!=MRT_ASSERT)
977 return -ENOPROTOOPT;
978
979 if (get_user(olr, optlen))
980 return -EFAULT;
981
982 olr = min_t(unsigned int, olr, sizeof(int));
983 if (olr < 0)
984 return -EINVAL;
985
986 if(put_user(olr,optlen))
987 return -EFAULT;
988 if(optname==MRT_VERSION)
989 val=0x0305;
990 #ifdef CONFIG_IP_PIMSM
991 else if(optname==MRT_PIM)
992 val=mroute_do_pim;
993 #endif
994 else
995 val=mroute_do_assert;
996 if(copy_to_user(optval,&val,olr))
997 return -EFAULT;
998 return 0;
999 }
1000
1001 /*
1002 * The IP multicast ioctl support routines.
1003 */
1004
1005 int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
1006 {
1007 struct sioc_sg_req sr;
1008 struct sioc_vif_req vr;
1009 struct vif_device *vif;
1010 struct mfc_cache *c;
1011
1012 switch(cmd)
1013 {
1014 case SIOCGETVIFCNT:
1015 if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
1016 return -EFAULT;
1017 if(vr.vifi>=maxvif)
1018 return -EINVAL;
1019 read_lock(&mrt_lock);
1020 vif=&vif_table[vr.vifi];
1021 if(VIF_EXISTS(vr.vifi)) {
1022 vr.icount=vif->pkt_in;
1023 vr.ocount=vif->pkt_out;
1024 vr.ibytes=vif->bytes_in;
1025 vr.obytes=vif->bytes_out;
1026 read_unlock(&mrt_lock);
1027
1028 if (copy_to_user((void *)arg,&vr,sizeof(vr)))
1029 return -EFAULT;
1030 return 0;
1031 }
1032 read_unlock(&mrt_lock);
1033 return -EADDRNOTAVAIL;
1034 case SIOCGETSGCNT:
1035 if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
1036 return -EFAULT;
1037
1038 read_lock(&mrt_lock);
1039 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1040 if (c) {
1041 sr.pktcnt = c->mfc_un.res.pkt;
1042 sr.bytecnt = c->mfc_un.res.bytes;
1043 sr.wrong_if = c->mfc_un.res.wrong_if;
1044 read_unlock(&mrt_lock);
1045
1046 if (copy_to_user((void *)arg,&sr,sizeof(sr)))
1047 return -EFAULT;
1048 return 0;
1049 }
1050 read_unlock(&mrt_lock);
1051 return -EADDRNOTAVAIL;
1052 default:
1053 return -ENOIOCTLCMD;
1054 }
1055 }
1056
1057
1058 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1059 {
1060 struct vif_device *v;
1061 int ct;
1062 if (event != NETDEV_UNREGISTER)
1063 return NOTIFY_DONE;
1064 v=&vif_table[0];
1065 for(ct=0;ct<maxvif;ct++,v++) {
1066 if (v->dev==ptr)
1067 vif_delete(ct);
1068 }
1069 return NOTIFY_DONE;
1070 }
1071
1072
1073 static struct notifier_block ip_mr_notifier={
1074 ipmr_device_event,
1075 NULL,
1076 0
1077 };
1078
1079 /*
1080 * Encapsulate a packet by attaching a valid IPIP header to it.
1081 * This avoids tunnel drivers and other mess and gives us the speed so
1082 * important for multicast video.
1083 */
1084
1085 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1086 {
1087 struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1088
1089 iph->version = 4;
1090 iph->tos = skb->nh.iph->tos;
1091 iph->ttl = skb->nh.iph->ttl;
1092 iph->frag_off = 0;
1093 iph->daddr = daddr;
1094 iph->saddr = saddr;
1095 iph->protocol = IPPROTO_IPIP;
1096 iph->ihl = 5;
1097 iph->tot_len = htons(skb->len);
1098 ip_select_ident(iph, skb->dst, NULL);
1099 ip_send_check(iph);
1100
1101 skb->h.ipiph = skb->nh.iph;
1102 skb->nh.iph = iph;
1103 #ifdef CONFIG_NETFILTER
1104 nf_conntrack_put(skb->nfct);
1105 skb->nfct = NULL;
1106 #endif
1107 }
1108
1109 static inline int ipmr_forward_finish(struct sk_buff *skb)
1110 {
1111 struct dst_entry *dst = skb->dst;
1112
1113 if (skb->len <= dst->pmtu)
1114 return dst->output(skb);
1115 else
1116 return ip_fragment(skb, dst->output);
1117 }
1118
1119 /*
1120 * Processing handlers for ipmr_forward
1121 */
1122
1123 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
1124 int vifi, int last)
1125 {
1126 struct iphdr *iph = skb->nh.iph;
1127 struct vif_device *vif = &vif_table[vifi];
1128 struct net_device *dev;
1129 struct rtable *rt;
1130 int encap = 0;
1131 struct sk_buff *skb2;
1132
1133 if (vif->dev == NULL)
1134 return;
1135
1136 #ifdef CONFIG_IP_PIMSM
1137 if (vif->flags & VIFF_REGISTER) {
1138 vif->pkt_out++;
1139 vif->bytes_out+=skb->len;
1140 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1141 ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1142 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1143 return;
1144 }
1145 #endif
1146
1147 if (vif->flags&VIFF_TUNNEL) {
1148 if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
1149 return;
1150 encap = sizeof(struct iphdr);
1151 } else {
1152 if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
1153 return;
1154 }
1155
1156 dev = rt->u.dst.dev;
1157
1158 if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
1159 /* Do not fragment multicasts. Alas, IPv4 does not
1160 allow to send ICMP, so that packets will disappear
1161 to blackhole.
1162 */
1163
1164 IP_INC_STATS_BH(IpFragFails);
1165 ip_rt_put(rt);
1166 return;
1167 }
1168
1169 encap += dev->hard_header_len;
1170
1171 if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
1172 skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
1173 else if (atomic_read(&skb->users) != 1)
1174 skb2 = skb_clone(skb, GFP_ATOMIC);
1175 else {
1176 atomic_inc(&skb->users);
1177 skb2 = skb;
1178 }
1179
1180 if (skb2 == NULL) {
1181 ip_rt_put(rt);
1182 return;
1183 }
1184
1185 vif->pkt_out++;
1186 vif->bytes_out+=skb->len;
1187
1188 dst_release(skb2->dst);
1189 skb2->dst = &rt->u.dst;
1190 iph = skb2->nh.iph;
1191 ip_decrease_ttl(iph);
1192
1193 /* FIXME: forward and output firewalls used to be called here.
1194 * What do we do with netfilter? -- RR */
1195 if (vif->flags & VIFF_TUNNEL) {
1196 ip_encap(skb2, vif->local, vif->remote);
1197 /* FIXME: extra output firewall step used to be here. --RR */
1198 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1199 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
1200 }
1201
1202 IPCB(skb2)->flags |= IPSKB_FORWARDED;
1203
1204 /*
1205 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1206 * not only before forwarding, but after forwarding on all output
1207 * interfaces. It is clear, if mrouter runs a multicasting
1208 * program, it should receive packets not depending to what interface
1209 * program is joined.
1210 * If we will not make it, the program will have to join on all
1211 * interfaces. On the other hand, multihoming host (or router, but
1212 * not mrouter) cannot join to more than one interface - it will
1213 * result in receiving multiple packets.
1214 */
1215 NF_HOOK(PF_INET, NF_IP_FORWARD, skb2, skb->dev, dev,
1216 ipmr_forward_finish);
1217 }
1218
1219 int ipmr_find_vif(struct net_device *dev)
1220 {
1221 int ct;
1222 for (ct=maxvif-1; ct>=0; ct--) {
1223 if (vif_table[ct].dev == dev)
1224 break;
1225 }
1226 return ct;
1227 }
1228
1229 /* "local" means that we should preserve one skb (for local delivery) */
1230
1231 int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1232 {
1233 int psend = -1;
1234 int vif, ct;
1235
1236 vif = cache->mfc_parent;
1237 cache->mfc_un.res.pkt++;
1238 cache->mfc_un.res.bytes += skb->len;
1239
1240 /*
1241 * Wrong interface: drop packet and (maybe) send PIM assert.
1242 */
1243 if (vif_table[vif].dev != skb->dev) {
1244 int true_vifi;
1245
1246 if (((struct rtable*)skb->dst)->key.iif == 0) {
1247 /* It is our own packet, looped back.
1248 Very complicated situation...
1249
1250 The best workaround until routing daemons will be
1251 fixed is not to redistribute packet, if it was
1252 send through wrong interface. It means, that
1253 multicast applications WILL NOT work for
1254 (S,G), which have default multicast route pointing
1255 to wrong oif. In any case, it is not a good
1256 idea to use multicasting applications on router.
1257 */
1258 goto dont_forward;
1259 }
1260
1261 cache->mfc_un.res.wrong_if++;
1262 true_vifi = ipmr_find_vif(skb->dev);
1263
1264 if (true_vifi >= 0 && mroute_do_assert &&
1265 /* pimsm uses asserts, when switching from RPT to SPT,
1266 so that we cannot check that packet arrived on an oif.
1267 It is bad, but otherwise we would need to move pretty
1268 large chunk of pimd to kernel. Ough... --ANK
1269 */
1270 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1271 jiffies - cache->mfc_un.res.last_assert > MFC_ASSERT_THRESH) {
1272 cache->mfc_un.res.last_assert = jiffies;
1273 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1274 }
1275 goto dont_forward;
1276 }
1277
1278 vif_table[vif].pkt_in++;
1279 vif_table[vif].bytes_in+=skb->len;
1280
1281 /*
1282 * Forward the frame
1283 */
1284 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1285 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1286 if (psend != -1)
1287 ipmr_queue_xmit(skb, cache, psend, 0);
1288 psend=ct;
1289 }
1290 }
1291 if (psend != -1)
1292 ipmr_queue_xmit(skb, cache, psend, !local);
1293
1294 dont_forward:
1295 if (!local)
1296 kfree_skb(skb);
1297 return 0;
1298 }
1299
1300
1301 /*
1302 * Multicast packets for forwarding arrive here
1303 */
1304
1305 int ip_mr_input(struct sk_buff *skb)
1306 {
1307 struct mfc_cache *cache;
1308 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1309
1310 /* Packet is looped back after forward, it should not be
1311 forwarded second time, but still can be delivered locally.
1312 */
1313 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1314 goto dont_forward;
1315
1316 if (!local) {
1317 if (IPCB(skb)->opt.router_alert) {
1318 if (ip_call_ra_chain(skb))
1319 return 0;
1320 } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1321 /* IGMPv1 (and broken IGMPv2 implementations sort of
1322 Cisco IOS <= 11.2(8)) do not put router alert
1323 option to IGMP packets destined to routable
1324 groups. It is very bad, because it means
1325 that we can forward NO IGMP messages.
1326 */
1327 read_lock(&mrt_lock);
1328 if (mroute_socket) {
1329 raw_rcv(mroute_socket, skb);
1330 read_unlock(&mrt_lock);
1331 return 0;
1332 }
1333 read_unlock(&mrt_lock);
1334 }
1335 }
1336
1337 read_lock(&mrt_lock);
1338 cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1339
1340 /*
1341 * No usable cache entry
1342 */
1343 if (cache==NULL) {
1344 int vif;
1345
1346 if (local) {
1347 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1348 ip_local_deliver(skb);
1349 if (skb2 == NULL) {
1350 read_unlock(&mrt_lock);
1351 return -ENOBUFS;
1352 }
1353 skb = skb2;
1354 }
1355
1356 vif = ipmr_find_vif(skb->dev);
1357 if (vif >= 0) {
1358 int err = ipmr_cache_unresolved(vif, skb);
1359 read_unlock(&mrt_lock);
1360
1361 return err;
1362 }
1363 read_unlock(&mrt_lock);
1364 kfree_skb(skb);
1365 return -ENODEV;
1366 }
1367
1368 ip_mr_forward(skb, cache, local);
1369
1370 read_unlock(&mrt_lock);
1371
1372 if (local)
1373 return ip_local_deliver(skb);
1374
1375 return 0;
1376
1377 dont_forward:
1378 if (local)
1379 return ip_local_deliver(skb);
1380 kfree_skb(skb);
1381 return 0;
1382 }
1383
1384 #ifdef CONFIG_IP_PIMSM_V1
1385 /*
1386 * Handle IGMP messages of PIMv1
1387 */
1388
1389 int pim_rcv_v1(struct sk_buff * skb)
1390 {
1391 struct igmphdr *pim = (struct igmphdr*)skb->h.raw;
1392 struct iphdr *encap;
1393 struct net_device *reg_dev = NULL;
1394
1395 if (skb_is_nonlinear(skb)) {
1396 if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1397 kfree_skb(skb);
1398 return -ENOMEM;
1399 }
1400 pim = (struct igmphdr*)skb->h.raw;
1401 }
1402
1403 if (!mroute_do_pim ||
1404 skb->len < sizeof(*pim) + sizeof(*encap) ||
1405 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) {
1406 kfree_skb(skb);
1407 return -EINVAL;
1408 }
1409
1410 encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1411 /*
1412 Check that:
1413 a. packet is really destinted to a multicast group
1414 b. packet is not a NULL-REGISTER
1415 c. packet is not truncated
1416 */
1417 if (!MULTICAST(encap->daddr) ||
1418 ntohs(encap->tot_len) == 0 ||
1419 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1420 kfree_skb(skb);
1421 return -EINVAL;
1422 }
1423
1424 read_lock(&mrt_lock);
1425 if (reg_vif_num >= 0)
1426 reg_dev = vif_table[reg_vif_num].dev;
1427 if (reg_dev)
1428 dev_hold(reg_dev);
1429 read_unlock(&mrt_lock);
1430
1431 if (reg_dev == NULL) {
1432 kfree_skb(skb);
1433 return -EINVAL;
1434 }
1435
1436 skb->mac.raw = skb->nh.raw;
1437 skb_pull(skb, (u8*)encap - skb->data);
1438 skb->nh.iph = (struct iphdr *)skb->data;
1439 skb->dev = reg_dev;
1440 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1441 skb->protocol = __constant_htons(ETH_P_IP);
1442 skb->ip_summed = 0;
1443 skb->pkt_type = PACKET_HOST;
1444 dst_release(skb->dst);
1445 skb->dst = NULL;
1446 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1447 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1448 #ifdef CONFIG_NETFILTER
1449 nf_conntrack_put(skb->nfct);
1450 skb->nfct = NULL;
1451 #endif
1452 netif_rx(skb);
1453 dev_put(reg_dev);
1454 return 0;
1455 }
1456 #endif
1457
1458 #ifdef CONFIG_IP_PIMSM_V2
1459 int pim_rcv(struct sk_buff * skb)
1460 {
1461 struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw;
1462 struct iphdr *encap;
1463 struct net_device *reg_dev = NULL;
1464
1465 if (skb_is_nonlinear(skb)) {
1466 if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1467 kfree_skb(skb);
1468 return -ENOMEM;
1469 }
1470 pim = (struct pimreghdr*)skb->h.raw;
1471 }
1472
1473 if (skb->len < sizeof(*pim) + sizeof(*encap) ||
1474 pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1475 (pim->flags&PIM_NULL_REGISTER) ||
1476 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1477 ip_compute_csum((void *)pim, skb->len))) {
1478 kfree_skb(skb);
1479 return -EINVAL;
1480 }
1481
1482 /* check if the inner packet is destined to mcast group */
1483 encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1484 if (!MULTICAST(encap->daddr) ||
1485 ntohs(encap->tot_len) == 0 ||
1486 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1487 kfree_skb(skb);
1488 return -EINVAL;
1489 }
1490
1491 read_lock(&mrt_lock);
1492 if (reg_vif_num >= 0)
1493 reg_dev = vif_table[reg_vif_num].dev;
1494 if (reg_dev)
1495 dev_hold(reg_dev);
1496 read_unlock(&mrt_lock);
1497
1498 if (reg_dev == NULL) {
1499 kfree_skb(skb);
1500 return -EINVAL;
1501 }
1502
1503 skb->mac.raw = skb->nh.raw;
1504 skb_pull(skb, (u8*)encap - skb->data);
1505 skb->nh.iph = (struct iphdr *)skb->data;
1506 skb->dev = reg_dev;
1507 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1508 skb->protocol = __constant_htons(ETH_P_IP);
1509 skb->ip_summed = 0;
1510 skb->pkt_type = PACKET_HOST;
1511 dst_release(skb->dst);
1512 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1513 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1514 skb->dst = NULL;
1515 #ifdef CONFIG_NETFILTER
1516 nf_conntrack_put(skb->nfct);
1517 skb->nfct = NULL;
1518 #endif
1519 netif_rx(skb);
1520 dev_put(reg_dev);
1521 return 0;
1522 }
1523 #endif
1524
1525 #ifdef CONFIG_RTNETLINK
1526
1527 static int
1528 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1529 {
1530 int ct;
1531 struct rtnexthop *nhp;
1532 struct net_device *dev = vif_table[c->mfc_parent].dev;
1533 u8 *b = skb->tail;
1534 struct rtattr *mp_head;
1535
1536 if (dev)
1537 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1538
1539 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1540
1541 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1542 if (c->mfc_un.res.ttls[ct] < 255) {
1543 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1544 goto rtattr_failure;
1545 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1546 nhp->rtnh_flags = 0;
1547 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1548 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1549 nhp->rtnh_len = sizeof(*nhp);
1550 }
1551 }
1552 mp_head->rta_type = RTA_MULTIPATH;
1553 mp_head->rta_len = skb->tail - (u8*)mp_head;
1554 rtm->rtm_type = RTN_MULTICAST;
1555 return 1;
1556
1557 rtattr_failure:
1558 skb_trim(skb, b - skb->data);
1559 return -EMSGSIZE;
1560 }
1561
1562 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1563 {
1564 int err;
1565 struct mfc_cache *cache;
1566 struct rtable *rt = (struct rtable*)skb->dst;
1567
1568 read_lock(&mrt_lock);
1569 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1570
1571 if (cache==NULL) {
1572 struct net_device *dev;
1573 int vif;
1574
1575 if (nowait) {
1576 read_unlock(&mrt_lock);
1577 return -EAGAIN;
1578 }
1579
1580 dev = skb->dev;
1581 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1582 read_unlock(&mrt_lock);
1583 return -ENODEV;
1584 }
1585 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1586 skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1587 skb->nh.iph->saddr = rt->rt_src;
1588 skb->nh.iph->daddr = rt->rt_dst;
1589 skb->nh.iph->version = 0;
1590 err = ipmr_cache_unresolved(vif, skb);
1591 read_unlock(&mrt_lock);
1592 return err;
1593 }
1594
1595 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1596 cache->mfc_flags |= MFC_NOTIFY;
1597 err = ipmr_fill_mroute(skb, cache, rtm);
1598 read_unlock(&mrt_lock);
1599 return err;
1600 }
1601 #endif
1602
1603 #ifdef CONFIG_PROC_FS
1604 /*
1605 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1606 */
1607
1608 static int ipmr_vif_info(char *buffer, char **start, off_t offset, int length)
1609 {
1610 struct vif_device *vif;
1611 int len=0;
1612 off_t pos=0;
1613 off_t begin=0;
1614 int size;
1615 int ct;
1616
1617 len += sprintf(buffer,
1618 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1619 pos=len;
1620
1621 read_lock(&mrt_lock);
1622 for (ct=0;ct<maxvif;ct++)
1623 {
1624 char *name = "none";
1625 vif=&vif_table[ct];
1626 if(!VIF_EXISTS(ct))
1627 continue;
1628 if (vif->dev)
1629 name = vif->dev->name;
1630 size = sprintf(buffer+len, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1631 ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
1632 vif->flags, vif->local, vif->remote);
1633 len+=size;
1634 pos+=size;
1635 if(pos<offset)
1636 {
1637 len=0;
1638 begin=pos;
1639 }
1640 if(pos>offset+length)
1641 break;
1642 }
1643 read_unlock(&mrt_lock);
1644
1645 *start=buffer+(offset-begin);
1646 len-=(offset-begin);
1647 if(len>length)
1648 len=length;
1649 if (len<0)
1650 len = 0;
1651 return len;
1652 }
1653
1654 static int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length)
1655 {
1656 struct mfc_cache *mfc;
1657 int len=0;
1658 off_t pos=0;
1659 off_t begin=0;
1660 int size;
1661 int ct;
1662
1663 len += sprintf(buffer,
1664 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1665 pos=len;
1666
1667 read_lock(&mrt_lock);
1668 for (ct=0;ct<MFC_LINES;ct++)
1669 {
1670 for(mfc=mfc_cache_array[ct]; mfc; mfc=mfc->next)
1671 {
1672 int n;
1673
1674 /*
1675 * Interface forwarding map
1676 */
1677 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
1678 (unsigned long)mfc->mfc_mcastgrp,
1679 (unsigned long)mfc->mfc_origin,
1680 mfc->mfc_parent,
1681 mfc->mfc_un.res.pkt,
1682 mfc->mfc_un.res.bytes,
1683 mfc->mfc_un.res.wrong_if);
1684 for(n=mfc->mfc_un.res.minvif;n<mfc->mfc_un.res.maxvif;n++)
1685 {
1686 if(VIF_EXISTS(n) && mfc->mfc_un.res.ttls[n] < 255)
1687 size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]);
1688 }
1689 size += sprintf(buffer+len+size, "\n");
1690 len+=size;
1691 pos+=size;
1692 if(pos<offset)
1693 {
1694 len=0;
1695 begin=pos;
1696 }
1697 if(pos>offset+length)
1698 goto done;
1699 }
1700 }
1701
1702 spin_lock_bh(&mfc_unres_lock);
1703 for(mfc=mfc_unres_queue; mfc; mfc=mfc->next) {
1704 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld\n",
1705 (unsigned long)mfc->mfc_mcastgrp,
1706 (unsigned long)mfc->mfc_origin,
1707 -1,
1708 (long)mfc->mfc_un.unres.unresolved.qlen,
1709 0L, 0L);
1710 len+=size;
1711 pos+=size;
1712 if(pos<offset)
1713 {
1714 len=0;
1715 begin=pos;
1716 }
1717 if(pos>offset+length)
1718 break;
1719 }
1720 spin_unlock_bh(&mfc_unres_lock);
1721
1722 done:
1723 read_unlock(&mrt_lock);
1724 *start=buffer+(offset-begin);
1725 len-=(offset-begin);
1726 if(len>length)
1727 len=length;
1728 if (len < 0) {
1729 len = 0;
1730 }
1731 return len;
1732 }
1733
1734 #endif
1735
1736 #ifdef CONFIG_IP_PIMSM_V2
1737 struct inet_protocol pim_protocol =
1738 {
1739 pim_rcv, /* PIM handler */
1740 NULL, /* PIM error control */
1741 NULL, /* next */
1742 IPPROTO_PIM, /* protocol ID */
1743 0, /* copy */
1744 NULL, /* data */
1745 "PIM" /* name */
1746 };
1747 #endif
1748
1749
1750 /*
1751 * Setup for IP multicast routing
1752 */
1753
1754 void __init ip_mr_init(void)
1755 {
1756 printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
1757 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1758 sizeof(struct mfc_cache),
1759 0, SLAB_HWCACHE_ALIGN,
1760 NULL, NULL);
1761 init_timer(&ipmr_expire_timer);
1762 ipmr_expire_timer.function=ipmr_expire_process;
1763 register_netdevice_notifier(&ip_mr_notifier);
1764 #ifdef CONFIG_PROC_FS
1765 proc_net_create("ip_mr_vif",0,ipmr_vif_info);
1766 proc_net_create("ip_mr_cache",0,ipmr_mfc_info);
1767 #endif
1768 }
1769