File: /usr/src/linux/net/ipv4/ipmr.c

1     /*
2      *	IP multicast routing support for mrouted 3.6/3.8
3      *
4      *		(c) 1995 Alan Cox, <alan@redhat.com>
5      *	  Linux Consultancy and Custom Driver Development
6      *
7      *	This program is free software; you can redistribute it and/or
8      *	modify it under the terms of the GNU General Public License
9      *	as published by the Free Software Foundation; either version
10      *	2 of the License, or (at your option) any later version.
11      *
12      *	Version: $Id: ipmr.c,v 1.64 2001/09/18 22:29:09 davem Exp $
13      *
14      *	Fixes:
15      *	Michael Chastain	:	Incorrect size of copying.
16      *	Alan Cox		:	Added the cache manager code
17      *	Alan Cox		:	Fixed the clone/copy bug and device race.
18      *	Mike McLagan		:	Routing by source
19      *	Malcolm Beattie		:	Buffer handling fixes.
20      *	Alexey Kuznetsov	:	Double buffer free and other fixes.
21      *	SVR Anand		:	Fixed several multicast bugs and problems.
22      *	Alexey Kuznetsov	:	Status, optimisations and more.
23      *	Brad Parker		:	Better behaviour on mrouted upcall
24      *					overflow.
25      *      Carlos Picoto           :       PIMv1 Support
26      *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
27      *					Relax this requrement to work with older peers.
28      *
29      */
30     
31     #include <linux/config.h>
32     #include <asm/system.h>
33     #include <asm/uaccess.h>
34     #include <linux/types.h>
35     #include <linux/sched.h>
36     #include <linux/errno.h>
37     #include <linux/timer.h>
38     #include <linux/mm.h>
39     #include <linux/kernel.h>
40     #include <linux/fcntl.h>
41     #include <linux/stat.h>
42     #include <linux/socket.h>
43     #include <linux/in.h>
44     #include <linux/inet.h>
45     #include <linux/netdevice.h>
46     #include <linux/inetdevice.h>
47     #include <linux/igmp.h>
48     #include <linux/proc_fs.h>
49     #include <linux/mroute.h>
50     #include <linux/init.h>
51     #include <net/ip.h>
52     #include <net/protocol.h>
53     #include <linux/skbuff.h>
54     #include <net/sock.h>
55     #include <net/icmp.h>
56     #include <net/udp.h>
57     #include <net/raw.h>
58     #include <linux/notifier.h>
59     #include <linux/if_arp.h>
60     #include <linux/netfilter_ipv4.h>
61     #include <net/ipip.h>
62     #include <net/checksum.h>
63     
64     #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
65     #define CONFIG_IP_PIMSM	1
66     #endif
67     
68     static struct sock *mroute_socket;
69     
70     
71     /* Big lock, protecting vif table, mrt cache and mroute socket state.
72        Note that the changes are semaphored via rtnl_lock.
73      */
74     
75     static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
76     
77     /*
78      *	Multicast router control variables
79      */
80     
81     static struct vif_device vif_table[MAXVIFS];		/* Devices 		*/
82     static int maxvif;
83     
84     #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
85     
86     int mroute_do_assert;					/* Set in PIM assert	*/
87     int mroute_do_pim;
88     
89     static struct mfc_cache *mfc_cache_array[MFC_LINES];	/* Forwarding cache	*/
90     
91     static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
92     atomic_t cache_resolve_queue_len;			/* Size of unresolved	*/
93     
94     /* Special spinlock for queue of unresolved entries */
95     static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
96     
97     /* We return to original Alan's scheme. Hash table of resolved
98        entries is changed only in process context and protected
99        with weak lock mrt_lock. Queue of unresolved entries is protected
100        with strong spinlock mfc_unres_lock.
101     
102        In this case data path is free of exclusive locks at all.
103      */
104     
105     kmem_cache_t *mrt_cachep;
106     
107     static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
108     static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
109     static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
110     
111     extern struct inet_protocol pim_protocol;
112     
113     static struct timer_list ipmr_expire_timer;
114     
115     /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
116     
117     static
118     struct net_device *ipmr_new_tunnel(struct vifctl *v)
119     {
120     	struct net_device  *dev;
121     
122     	dev = __dev_get_by_name("tunl0");
123     
124     	if (dev) {
125     		int err;
126     		struct ifreq ifr;
127     		mm_segment_t	oldfs;
128     		struct ip_tunnel_parm p;
129     		struct in_device  *in_dev;
130     
131     		memset(&p, 0, sizeof(p));
132     		p.iph.daddr = v->vifc_rmt_addr.s_addr;
133     		p.iph.saddr = v->vifc_lcl_addr.s_addr;
134     		p.iph.version = 4;
135     		p.iph.ihl = 5;
136     		p.iph.protocol = IPPROTO_IPIP;
137     		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138     		ifr.ifr_ifru.ifru_data = (void*)&p;
139     
140     		oldfs = get_fs(); set_fs(KERNEL_DS);
141     		err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
142     		set_fs(oldfs);
143     
144     		dev = NULL;
145     
146     		if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
147     			dev->flags |= IFF_MULTICAST;
148     
149     			in_dev = __in_dev_get(dev);
150     			if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
151     				goto failure;
152     			in_dev->cnf.rp_filter = 0;
153     
154     			if (dev_open(dev))
155     				goto failure;
156     		}
157     	}
158     	return dev;
159     
160     failure:
161     	unregister_netdevice(dev);
162     	return NULL;
163     }
164     
165     #ifdef CONFIG_IP_PIMSM
166     
167     static int reg_vif_num = -1;
168     
169     static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
170     {
171     	read_lock(&mrt_lock);
172     	((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
173     	((struct net_device_stats*)dev->priv)->tx_packets++;
174     	ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
175     	read_unlock(&mrt_lock);
176     	kfree_skb(skb);
177     	return 0;
178     }
179     
180     static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
181     {
182     	return (struct net_device_stats*)dev->priv;
183     }
184     
185     static
186     struct net_device *ipmr_reg_vif(struct vifctl *v)
187     {
188     	struct net_device  *dev;
189     	struct in_device *in_dev;
190     	int size;
191     
192     	size = sizeof(*dev) + sizeof(struct net_device_stats);
193     	dev = kmalloc(size, GFP_KERNEL);
194     	if (!dev)
195     		return NULL;
196     
197     	memset(dev, 0, size);
198     
199     	dev->priv = dev + 1;
200     
201     	strcpy(dev->name, "pimreg");
202     
203     	dev->type		= ARPHRD_PIMREG;
204     	dev->mtu		= 1500 - sizeof(struct iphdr) - 8;
205     	dev->flags		= IFF_NOARP;
206     	dev->hard_start_xmit	= reg_vif_xmit;
207     	dev->get_stats		= reg_vif_get_stats;
208     	dev->features		|= NETIF_F_DYNALLOC;
209     
210     	if (register_netdevice(dev)) {
211     		kfree(dev);
212     		return NULL;
213     	}
214     	dev->iflink = 0;
215     
216     	if ((in_dev = inetdev_init(dev)) == NULL)
217     		goto failure;
218     
219     	in_dev->cnf.rp_filter = 0;
220     
221     	if (dev_open(dev))
222     		goto failure;
223     
224     	return dev;
225     
226     failure:
227     	unregister_netdevice(dev);
228     	return NULL;
229     }
230     #endif
231     
232     /*
233      *	Delete a VIF entry
234      */
235      
236     static int vif_delete(int vifi)
237     {
238     	struct vif_device *v;
239     	struct net_device *dev;
240     	struct in_device *in_dev;
241     
242     	if (vifi < 0 || vifi >= maxvif)
243     		return -EADDRNOTAVAIL;
244     
245     	v = &vif_table[vifi];
246     
247     	write_lock_bh(&mrt_lock);
248     	dev = v->dev;
249     	v->dev = NULL;
250     
251     	if (!dev) {
252     		write_unlock_bh(&mrt_lock);
253     		return -EADDRNOTAVAIL;
254     	}
255     
256     #ifdef CONFIG_IP_PIMSM
257     	if (vifi == reg_vif_num)
258     		reg_vif_num = -1;
259     #endif
260     
261     	if (vifi+1 == maxvif) {
262     		int tmp;
263     		for (tmp=vifi-1; tmp>=0; tmp--) {
264     			if (VIF_EXISTS(tmp))
265     				break;
266     		}
267     		maxvif = tmp+1;
268     	}
269     
270     	write_unlock_bh(&mrt_lock);
271     
272     	dev_set_allmulti(dev, -1);
273     
274     	if ((in_dev = __in_dev_get(dev)) != NULL) {
275     		in_dev->cnf.mc_forwarding--;
276     		ip_rt_multicast_event(in_dev);
277     	}
278     
279     	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
280     		unregister_netdevice(dev);
281     
282     	dev_put(dev);
283     	return 0;
284     }
285     
286     /* Destroy an unresolved cache entry, killing queued skbs
287        and reporting error to netlink readers.
288      */
289     
290     static void ipmr_destroy_unres(struct mfc_cache *c)
291     {
292     	struct sk_buff *skb;
293     
294     	atomic_dec(&cache_resolve_queue_len);
295     
296     	while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
297     #ifdef CONFIG_RTNETLINK
298     		if (skb->nh.iph->version == 0) {
299     			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
300     			nlh->nlmsg_type = NLMSG_ERROR;
301     			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
302     			skb_trim(skb, nlh->nlmsg_len);
303     			((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
304     			netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
305     		} else
306     #endif
307     			kfree_skb(skb);
308     	}
309     
310     	kmem_cache_free(mrt_cachep, c);
311     }
312     
313     
314     /* Single timer process for all the unresolved queue. */
315     
316     void ipmr_expire_process(unsigned long dummy)
317     {
318     	unsigned long now;
319     	unsigned long expires;
320     	struct mfc_cache *c, **cp;
321     
322     	if (!spin_trylock(&mfc_unres_lock)) {
323     		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
324     		return;
325     	}
326     
327     	if (atomic_read(&cache_resolve_queue_len) == 0)
328     		goto out;
329     
330     	now = jiffies;
331     	expires = 10*HZ;
332     	cp = &mfc_unres_queue;
333     
334     	while ((c=*cp) != NULL) {
335     		long interval = c->mfc_un.unres.expires - now;
336     
337     		if (interval > 0) {
338     			if (interval < expires)
339     				expires = interval;
340     			cp = &c->next;
341     			continue;
342     		}
343     
344     		*cp = c->next;
345     
346     		ipmr_destroy_unres(c);
347     	}
348     
349     	if (atomic_read(&cache_resolve_queue_len))
350     		mod_timer(&ipmr_expire_timer, jiffies + expires);
351     
352     out:
353     	spin_unlock(&mfc_unres_lock);
354     }
355     
356     /* Fill oifs list. It is called under write locked mrt_lock. */
357     
358     static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
359     {
360     	int vifi;
361     
362     	cache->mfc_un.res.minvif = MAXVIFS;
363     	cache->mfc_un.res.maxvif = 0;
364     	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
365     
366     	for (vifi=0; vifi<maxvif; vifi++) {
367     		if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
368     			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
369     			if (cache->mfc_un.res.minvif > vifi)
370     				cache->mfc_un.res.minvif = vifi;
371     			if (cache->mfc_un.res.maxvif <= vifi)
372     				cache->mfc_un.res.maxvif = vifi + 1;
373     		}
374     	}
375     }
376     
377     static int vif_add(struct vifctl *vifc, int mrtsock)
378     {
379     	int vifi = vifc->vifc_vifi;
380     	struct vif_device *v = &vif_table[vifi];
381     	struct net_device *dev;
382     	struct in_device *in_dev;
383     
384     	/* Is vif busy ? */
385     	if (VIF_EXISTS(vifi))
386     		return -EADDRINUSE;
387     
388     	switch (vifc->vifc_flags) {
389     #ifdef CONFIG_IP_PIMSM
390     	case VIFF_REGISTER:
391     		/*
392     		 * Special Purpose VIF in PIM
393     		 * All the packets will be sent to the daemon
394     		 */
395     		if (reg_vif_num >= 0)
396     			return -EADDRINUSE;
397     		dev = ipmr_reg_vif(vifc);
398     		if (!dev)
399     			return -ENOBUFS;
400     		break;
401     #endif
402     	case VIFF_TUNNEL:	
403     		dev = ipmr_new_tunnel(vifc);
404     		if (!dev)
405     			return -ENOBUFS;
406     		break;
407     	case 0:
408     		dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
409     		if (!dev)
410     			return -EADDRNOTAVAIL;
411     		__dev_put(dev);
412     		break;
413     	default:
414     		return -EINVAL;
415     	}
416     
417     	if ((in_dev = __in_dev_get(dev)) == NULL)
418     		return -EADDRNOTAVAIL;
419     	in_dev->cnf.mc_forwarding++;
420     	dev_set_allmulti(dev, +1);
421     	ip_rt_multicast_event(in_dev);
422     
423     	/*
424     	 *	Fill in the VIF structures
425     	 */
426     	v->rate_limit=vifc->vifc_rate_limit;
427     	v->local=vifc->vifc_lcl_addr.s_addr;
428     	v->remote=vifc->vifc_rmt_addr.s_addr;
429     	v->flags=vifc->vifc_flags;
430     	if (!mrtsock)
431     		v->flags |= VIFF_STATIC;
432     	v->threshold=vifc->vifc_threshold;
433     	v->bytes_in = 0;
434     	v->bytes_out = 0;
435     	v->pkt_in = 0;
436     	v->pkt_out = 0;
437     	v->link = dev->ifindex;
438     	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
439     		v->link = dev->iflink;
440     
441     	/* And finish update writing critical data */
442     	write_lock_bh(&mrt_lock);
443     	dev_hold(dev);
444     	v->dev=dev;
445     #ifdef CONFIG_IP_PIMSM
446     	if (v->flags&VIFF_REGISTER)
447     		reg_vif_num = vifi;
448     #endif
449     	if (vifi+1 > maxvif)
450     		maxvif = vifi+1;
451     	write_unlock_bh(&mrt_lock);
452     	return 0;
453     }
454     
455     static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
456     {
457     	int line=MFC_HASH(mcastgrp,origin);
458     	struct mfc_cache *c;
459     
460     	for (c=mfc_cache_array[line]; c; c = c->next) {
461     		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
462     			break;
463     	}
464     	return c;
465     }
466     
467     /*
468      *	Allocate a multicast cache entry
469      */
470     static struct mfc_cache *ipmr_cache_alloc(void)
471     {
472     	struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
473     	if(c==NULL)
474     		return NULL;
475     	memset(c, 0, sizeof(*c));
476     	c->mfc_un.res.minvif = MAXVIFS;
477     	return c;
478     }
479     
480     static struct mfc_cache *ipmr_cache_alloc_unres(void)
481     {
482     	struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
483     	if(c==NULL)
484     		return NULL;
485     	memset(c, 0, sizeof(*c));
486     	skb_queue_head_init(&c->mfc_un.unres.unresolved);
487     	c->mfc_un.unres.expires = jiffies + 10*HZ;
488     	return c;
489     }
490     
491     /*
492      *	A cache entry has gone into a resolved state from queued
493      */
494      
495     static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
496     {
497     	struct sk_buff *skb;
498     
499     	/*
500     	 *	Play the pending entries through our router
501     	 */
502     
503     	while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
504     #ifdef CONFIG_RTNETLINK
505     		if (skb->nh.iph->version == 0) {
506     			int err;
507     			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
508     
509     			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
510     				nlh->nlmsg_len = skb->tail - (u8*)nlh;
511     			} else {
512     				nlh->nlmsg_type = NLMSG_ERROR;
513     				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
514     				skb_trim(skb, nlh->nlmsg_len);
515     				((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
516     			}
517     			err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
518     		} else
519     #endif
520     			ip_mr_forward(skb, c, 0);
521     	}
522     }
523     
524     /*
525      *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
526      *	expects the following bizarre scheme.
527      *
528      *	Called under mrt_lock.
529      */
530      
531     static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
532     {
533     	struct sk_buff *skb;
534     	int ihl = pkt->nh.iph->ihl<<2;
535     	struct igmphdr *igmp;
536     	struct igmpmsg *msg;
537     	int ret;
538     
539     #ifdef CONFIG_IP_PIMSM
540     	if (assert == IGMPMSG_WHOLEPKT)
541     		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
542     	else
543     #endif
544     		skb = alloc_skb(128, GFP_ATOMIC);
545     
546     	if(!skb)
547     		return -ENOBUFS;
548     
549     #ifdef CONFIG_IP_PIMSM
550     	if (assert == IGMPMSG_WHOLEPKT) {
551     		/* Ugly, but we have no choice with this interface.
552     		   Duplicate old header, fix ihl, length etc.
553     		   And all this only to mangle msg->im_msgtype and
554     		   to set msg->im_mbz to "mbz" :-)
555     		 */
556     		msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
557     		skb->nh.raw = skb->h.raw = (u8*)msg;
558     		memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
559     		msg->im_msgtype = IGMPMSG_WHOLEPKT;
560     		msg->im_mbz = 0;
561      		msg->im_vif = reg_vif_num;
562     		skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
563     		skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
564     	} else 
565     #endif
566     	{	
567     		
568     	/*
569     	 *	Copy the IP header
570     	 */
571     
572     	skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
573     	memcpy(skb->data,pkt->data,ihl);
574     	skb->nh.iph->protocol = 0;			/* Flag to the kernel this is a route add */
575     	msg = (struct igmpmsg*)skb->nh.iph;
576     	msg->im_vif = vifi;
577     	skb->dst = dst_clone(pkt->dst);
578     
579     	/*
580     	 *	Add our header
581     	 */
582     
583     	igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
584     	igmp->type	=
585     	msg->im_msgtype = assert;
586     	igmp->code 	=	0;
587     	skb->nh.iph->tot_len=htons(skb->len);			/* Fix the length */
588     	skb->h.raw = skb->nh.raw;
589             }
590     
591     	if (mroute_socket == NULL) {
592     		kfree_skb(skb);
593     		return -EINVAL;
594     	}
595     
596     	/*
597     	 *	Deliver to mrouted
598     	 */
599     	if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
600     		if (net_ratelimit())
601     			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
602     		kfree_skb(skb);
603     	}
604     
605     	return ret;
606     }
607     
608     /*
609      *	Queue a packet for resolution. It gets locked cache entry!
610      */
611      
612     static int
613     ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
614     {
615     	int err;
616     	struct mfc_cache *c;
617     
618     	spin_lock_bh(&mfc_unres_lock);
619     	for (c=mfc_unres_queue; c; c=c->next) {
620     		if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
621     		    c->mfc_origin == skb->nh.iph->saddr)
622     			break;
623     	}
624     
625     	if (c == NULL) {
626     		/*
627     		 *	Create a new entry if allowable
628     		 */
629     
630     		if (atomic_read(&cache_resolve_queue_len)>=10 ||
631     		    (c=ipmr_cache_alloc_unres())==NULL) {
632     			spin_unlock_bh(&mfc_unres_lock);
633     
634     			kfree_skb(skb);
635     			return -ENOBUFS;
636     		}
637     
638     		/*
639     		 *	Fill in the new cache entry
640     		 */
641     		c->mfc_parent=-1;
642     		c->mfc_origin=skb->nh.iph->saddr;
643     		c->mfc_mcastgrp=skb->nh.iph->daddr;
644     
645     		/*
646     		 *	Reflect first query at mrouted.
647     		 */
648     		if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
649     			/* If the report failed throw the cache entry 
650     			   out - Brad Parker
651     			 */
652     			spin_unlock_bh(&mfc_unres_lock);
653     
654     			kmem_cache_free(mrt_cachep, c);
655     			kfree_skb(skb);
656     			return err;
657     		}
658     
659     		atomic_inc(&cache_resolve_queue_len);
660     		c->next = mfc_unres_queue;
661     		mfc_unres_queue = c;
662     
663     		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
664     	}
665     
666     	/*
667     	 *	See if we can append the packet
668     	 */
669     	if (c->mfc_un.unres.unresolved.qlen>3) {
670     		kfree_skb(skb);
671     		err = -ENOBUFS;
672     	} else {
673     		skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
674     		err = 0;
675     	}
676     
677     	spin_unlock_bh(&mfc_unres_lock);
678     	return err;
679     }
680     
681     /*
682      *	MFC cache manipulation by user space mroute daemon
683      */
684     
685     int ipmr_mfc_delete(struct mfcctl *mfc)
686     {
687     	int line;
688     	struct mfc_cache *c, **cp;
689     
690     	line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
691     
692     	for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
693     		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
694     		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
695     			write_lock_bh(&mrt_lock);
696     			*cp = c->next;
697     			write_unlock_bh(&mrt_lock);
698     
699     			kmem_cache_free(mrt_cachep, c);
700     			return 0;
701     		}
702     	}
703     	return -ENOENT;
704     }
705     
706     int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
707     {
708     	int line;
709     	struct mfc_cache *uc, *c, **cp;
710     
711     	line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
712     
713     	for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
714     		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
715     		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
716     			break;
717     	}
718     
719     	if (c != NULL) {
720     		write_lock_bh(&mrt_lock);
721     		c->mfc_parent = mfc->mfcc_parent;
722     		ipmr_update_threshoulds(c, mfc->mfcc_ttls);
723     		if (!mrtsock)
724     			c->mfc_flags |= MFC_STATIC;
725     		write_unlock_bh(&mrt_lock);
726     		return 0;
727     	}
728     
729     	if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
730     		return -EINVAL;
731     
732     	c=ipmr_cache_alloc();
733     	if (c==NULL)
734     		return -ENOMEM;
735     
736     	c->mfc_origin=mfc->mfcc_origin.s_addr;
737     	c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
738     	c->mfc_parent=mfc->mfcc_parent;
739     	ipmr_update_threshoulds(c, mfc->mfcc_ttls);
740     	if (!mrtsock)
741     		c->mfc_flags |= MFC_STATIC;
742     
743     	write_lock_bh(&mrt_lock);
744     	c->next = mfc_cache_array[line];
745     	mfc_cache_array[line] = c;
746     	write_unlock_bh(&mrt_lock);
747     
748     	/*
749     	 *	Check to see if we resolved a queued list. If so we
750     	 *	need to send on the frames and tidy up.
751     	 */
752     	spin_lock_bh(&mfc_unres_lock);
753     	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
754     	     cp = &uc->next) {
755     		if (uc->mfc_origin == c->mfc_origin &&
756     		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
757     			*cp = uc->next;
758     			if (atomic_dec_and_test(&cache_resolve_queue_len))
759     				del_timer(&ipmr_expire_timer);
760     			break;
761     		}
762     	}
763     	spin_unlock_bh(&mfc_unres_lock);
764     
765     	if (uc) {
766     		ipmr_cache_resolve(uc, c);
767     		kmem_cache_free(mrt_cachep, uc);
768     	}
769     	return 0;
770     }
771     
772     /*
773      *	Close the multicast socket, and clear the vif tables etc
774      */
775      
776     static void mroute_clean_tables(struct sock *sk)
777     {
778     	int i;
779     		
780     	/*
781     	 *	Shut down all active vif entries
782     	 */
783     	for(i=0; i<maxvif; i++) {
784     		if (!(vif_table[i].flags&VIFF_STATIC))
785     			vif_delete(i);
786     	}
787     
788     	/*
789     	 *	Wipe the cache
790     	 */
791     	for (i=0;i<MFC_LINES;i++) {
792     		struct mfc_cache *c, **cp;
793     
794     		cp = &mfc_cache_array[i];
795     		while ((c = *cp) != NULL) {
796     			if (c->mfc_flags&MFC_STATIC) {
797     				cp = &c->next;
798     				continue;
799     			}
800     			write_lock_bh(&mrt_lock);
801     			*cp = c->next;
802     			write_unlock_bh(&mrt_lock);
803     
804     			kmem_cache_free(mrt_cachep, c);
805     		}
806     	}
807     
808     	if (atomic_read(&cache_resolve_queue_len) != 0) {
809     		struct mfc_cache *c;
810     
811     		spin_lock_bh(&mfc_unres_lock);
812     		while (mfc_unres_queue != NULL) {
813     			c = mfc_unres_queue;
814     			mfc_unres_queue = c->next;
815     			spin_unlock_bh(&mfc_unres_lock);
816     
817     			ipmr_destroy_unres(c);
818     
819     			spin_lock_bh(&mfc_unres_lock);
820     		}
821     		spin_unlock_bh(&mfc_unres_lock);
822     	}
823     }
824     
825     static void mrtsock_destruct(struct sock *sk)
826     {
827     	rtnl_lock();
828     	if (sk == mroute_socket) {
829     		ipv4_devconf.mc_forwarding--;
830     
831     		write_lock_bh(&mrt_lock);
832     		mroute_socket=NULL;
833     		write_unlock_bh(&mrt_lock);
834     
835     		mroute_clean_tables(sk);
836     	}
837     	rtnl_unlock();
838     }
839     
840     /*
841      *	Socket options and virtual interface manipulation. The whole
842      *	virtual interface system is a complete heap, but unfortunately
843      *	that's how BSD mrouted happens to think. Maybe one day with a proper
844      *	MOSPF/PIM router set up we can clean this up.
845      */
846      
847     int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
848     {
849     	int ret;
850     	struct vifctl vif;
851     	struct mfcctl mfc;
852     	
853     	if(optname!=MRT_INIT)
854     	{
855     		if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
856     			return -EACCES;
857     	}
858     
859     	switch(optname)
860     	{
861     		case MRT_INIT:
862     			if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP)
863     				return -EOPNOTSUPP;
864     			if(optlen!=sizeof(int))
865     				return -ENOPROTOOPT;
866     
867     			rtnl_lock();
868     			if (mroute_socket) {
869     				rtnl_unlock();
870     				return -EADDRINUSE;
871     			}
872     
873     			ret = ip_ra_control(sk, 1, mrtsock_destruct);
874     			if (ret == 0) {
875     				write_lock_bh(&mrt_lock);
876     				mroute_socket=sk;
877     				write_unlock_bh(&mrt_lock);
878     
879     				ipv4_devconf.mc_forwarding++;
880     			}
881     			rtnl_unlock();
882     			return ret;
883     		case MRT_DONE:
884     			if (sk!=mroute_socket)
885     				return -EACCES;
886     			return ip_ra_control(sk, 0, NULL);
887     		case MRT_ADD_VIF:
888     		case MRT_DEL_VIF:
889     			if(optlen!=sizeof(vif))
890     				return -EINVAL;
891     			if (copy_from_user(&vif,optval,sizeof(vif)))
892     				return -EFAULT; 
893     			if(vif.vifc_vifi >= MAXVIFS)
894     				return -ENFILE;
895     			rtnl_lock();
896     			if (optname==MRT_ADD_VIF) {
897     				ret = vif_add(&vif, sk==mroute_socket);
898     			} else {
899     				ret = vif_delete(vif.vifc_vifi);
900     			}
901     			rtnl_unlock();
902     			return ret;
903     
904     		/*
905     		 *	Manipulate the forwarding caches. These live
906     		 *	in a sort of kernel/user symbiosis.
907     		 */
908     		case MRT_ADD_MFC:
909     		case MRT_DEL_MFC:
910     			if(optlen!=sizeof(mfc))
911     				return -EINVAL;
912     			if (copy_from_user(&mfc,optval, sizeof(mfc)))
913     				return -EFAULT;
914     			rtnl_lock();
915     			if (optname==MRT_DEL_MFC)
916     				ret = ipmr_mfc_delete(&mfc);
917     			else
918     				ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
919     			rtnl_unlock();
920     			return ret;
921     		/*
922     		 *	Control PIM assert.
923     		 */
924     		case MRT_ASSERT:
925     		{
926     			int v;
927     			if(get_user(v,(int *)optval))
928     				return -EFAULT;
929     			mroute_do_assert=(v)?1:0;
930     			return 0;
931     		}
932     #ifdef CONFIG_IP_PIMSM
933     		case MRT_PIM:
934     		{
935     			int v;
936     			if(get_user(v,(int *)optval))
937     				return -EFAULT;
938     			v = (v)?1:0;
939     			rtnl_lock();
940     			if (v != mroute_do_pim) {
941     				mroute_do_pim = v;
942     				mroute_do_assert = v;
943     #ifdef CONFIG_IP_PIMSM_V2
944     				if (mroute_do_pim)
945     					inet_add_protocol(&pim_protocol);
946     				else
947     					inet_del_protocol(&pim_protocol);
948     #endif
949     			}
950     			rtnl_unlock();
951     			return 0;
952     		}
953     #endif
954     		/*
955     		 *	Spurious command, or MRT_VERSION which you cannot
956     		 *	set.
957     		 */
958     		default:
959     			return -ENOPROTOOPT;
960     	}
961     }
962     
963     /*
964      *	Getsock opt support for the multicast routing system.
965      */
966      
967     int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
968     {
969     	int olr;
970     	int val;
971     
972     	if(optname!=MRT_VERSION && 
973     #ifdef CONFIG_IP_PIMSM
974     	   optname!=MRT_PIM &&
975     #endif
976     	   optname!=MRT_ASSERT)
977     		return -ENOPROTOOPT;
978     
979     	if (get_user(olr, optlen))
980     		return -EFAULT;
981     
982     	olr = min_t(unsigned int, olr, sizeof(int));
983     	if (olr < 0)
984     		return -EINVAL;
985     		
986     	if(put_user(olr,optlen))
987     		return -EFAULT;
988     	if(optname==MRT_VERSION)
989     		val=0x0305;
990     #ifdef CONFIG_IP_PIMSM
991     	else if(optname==MRT_PIM)
992     		val=mroute_do_pim;
993     #endif
994     	else
995     		val=mroute_do_assert;
996     	if(copy_to_user(optval,&val,olr))
997     		return -EFAULT;
998     	return 0;
999     }
1000     
1001     /*
1002      *	The IP multicast ioctl support routines.
1003      */
1004      
1005     int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
1006     {
1007     	struct sioc_sg_req sr;
1008     	struct sioc_vif_req vr;
1009     	struct vif_device *vif;
1010     	struct mfc_cache *c;
1011     	
1012     	switch(cmd)
1013     	{
1014     		case SIOCGETVIFCNT:
1015     			if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
1016     				return -EFAULT; 
1017     			if(vr.vifi>=maxvif)
1018     				return -EINVAL;
1019     			read_lock(&mrt_lock);
1020     			vif=&vif_table[vr.vifi];
1021     			if(VIF_EXISTS(vr.vifi))	{
1022     				vr.icount=vif->pkt_in;
1023     				vr.ocount=vif->pkt_out;
1024     				vr.ibytes=vif->bytes_in;
1025     				vr.obytes=vif->bytes_out;
1026     				read_unlock(&mrt_lock);
1027     
1028     				if (copy_to_user((void *)arg,&vr,sizeof(vr)))
1029     					return -EFAULT;
1030     				return 0;
1031     			}
1032     			read_unlock(&mrt_lock);
1033     			return -EADDRNOTAVAIL;
1034     		case SIOCGETSGCNT:
1035     			if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
1036     				return -EFAULT;
1037     
1038     			read_lock(&mrt_lock);
1039     			c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1040     			if (c) {
1041     				sr.pktcnt = c->mfc_un.res.pkt;
1042     				sr.bytecnt = c->mfc_un.res.bytes;
1043     				sr.wrong_if = c->mfc_un.res.wrong_if;
1044     				read_unlock(&mrt_lock);
1045     
1046     				if (copy_to_user((void *)arg,&sr,sizeof(sr)))
1047     					return -EFAULT;
1048     				return 0;
1049     			}
1050     			read_unlock(&mrt_lock);
1051     			return -EADDRNOTAVAIL;
1052     		default:
1053     			return -ENOIOCTLCMD;
1054     	}
1055     }
1056     
1057     
1058     static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1059     {
1060     	struct vif_device *v;
1061     	int ct;
1062     	if (event != NETDEV_UNREGISTER)
1063     		return NOTIFY_DONE;
1064     	v=&vif_table[0];
1065     	for(ct=0;ct<maxvif;ct++,v++) {
1066     		if (v->dev==ptr)
1067     			vif_delete(ct);
1068     	}
1069     	return NOTIFY_DONE;
1070     }
1071     
1072     
1073     static struct notifier_block ip_mr_notifier={
1074     	ipmr_device_event,
1075     	NULL,
1076     	0
1077     };
1078     
1079     /*
1080      * 	Encapsulate a packet by attaching a valid IPIP header to it.
1081      *	This avoids tunnel drivers and other mess and gives us the speed so
1082      *	important for multicast video.
1083      */
1084      
1085     static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1086     {
1087     	struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1088     
1089     	iph->version	= 	4;
1090     	iph->tos	=	skb->nh.iph->tos;
1091     	iph->ttl	=	skb->nh.iph->ttl;
1092     	iph->frag_off	=	0;
1093     	iph->daddr	=	daddr;
1094     	iph->saddr	=	saddr;
1095     	iph->protocol	=	IPPROTO_IPIP;
1096     	iph->ihl	=	5;
1097     	iph->tot_len	=	htons(skb->len);
1098     	ip_select_ident(iph, skb->dst, NULL);
1099     	ip_send_check(iph);
1100     
1101     	skb->h.ipiph = skb->nh.iph;
1102     	skb->nh.iph = iph;
1103     #ifdef CONFIG_NETFILTER
1104     	nf_conntrack_put(skb->nfct);
1105     	skb->nfct = NULL;
1106     #endif
1107     }
1108     
1109     static inline int ipmr_forward_finish(struct sk_buff *skb)
1110     {
1111     	struct dst_entry *dst = skb->dst;
1112     
1113     	if (skb->len <= dst->pmtu)
1114     		return dst->output(skb);
1115     	else
1116     		return ip_fragment(skb, dst->output);
1117     }
1118     
1119     /*
1120      *	Processing handlers for ipmr_forward
1121      */
1122     
1123     static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
1124     			   int vifi, int last)
1125     {
1126     	struct iphdr *iph = skb->nh.iph;
1127     	struct vif_device *vif = &vif_table[vifi];
1128     	struct net_device *dev;
1129     	struct rtable *rt;
1130     	int    encap = 0;
1131     	struct sk_buff *skb2;
1132     
1133     	if (vif->dev == NULL)
1134     		return;
1135     
1136     #ifdef CONFIG_IP_PIMSM
1137     	if (vif->flags & VIFF_REGISTER) {
1138     		vif->pkt_out++;
1139     		vif->bytes_out+=skb->len;
1140     		((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1141     		((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1142     		ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1143     		return;
1144     	}
1145     #endif
1146     
1147     	if (vif->flags&VIFF_TUNNEL) {
1148     		if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
1149     			return;
1150     		encap = sizeof(struct iphdr);
1151     	} else {
1152     		if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
1153     			return;
1154     	}
1155     
1156     	dev = rt->u.dst.dev;
1157     
1158     	if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
1159     		/* Do not fragment multicasts. Alas, IPv4 does not
1160     		   allow to send ICMP, so that packets will disappear
1161     		   to blackhole.
1162     		 */
1163     
1164     		IP_INC_STATS_BH(IpFragFails);
1165     		ip_rt_put(rt);
1166     		return;
1167     	}
1168     
1169     	encap += dev->hard_header_len;
1170     
1171     	if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
1172     		skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
1173     	else if (atomic_read(&skb->users) != 1)
1174     		skb2 = skb_clone(skb, GFP_ATOMIC);
1175     	else {
1176     		atomic_inc(&skb->users);
1177     		skb2 = skb;
1178     	}
1179     
1180     	if (skb2 == NULL) {
1181     		ip_rt_put(rt);
1182     		return;
1183     	}
1184     
1185     	vif->pkt_out++;
1186     	vif->bytes_out+=skb->len;
1187     
1188     	dst_release(skb2->dst);
1189     	skb2->dst = &rt->u.dst;
1190     	iph = skb2->nh.iph;
1191     	ip_decrease_ttl(iph);
1192     
1193     	/* FIXME: forward and output firewalls used to be called here.
1194     	 * What do we do with netfilter? -- RR */
1195     	if (vif->flags & VIFF_TUNNEL) {
1196     		ip_encap(skb2, vif->local, vif->remote);
1197     		/* FIXME: extra output firewall step used to be here. --RR */
1198     		((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1199     		((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
1200     	}
1201     
1202     	IPCB(skb2)->flags |= IPSKB_FORWARDED;
1203     
1204     	/*
1205     	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1206     	 * not only before forwarding, but after forwarding on all output
1207     	 * interfaces. It is clear, if mrouter runs a multicasting
1208     	 * program, it should receive packets not depending to what interface
1209     	 * program is joined.
1210     	 * If we will not make it, the program will have to join on all
1211     	 * interfaces. On the other hand, multihoming host (or router, but
1212     	 * not mrouter) cannot join to more than one interface - it will
1213     	 * result in receiving multiple packets.
1214     	 */
1215     	NF_HOOK(PF_INET, NF_IP_FORWARD, skb2, skb->dev, dev, 
1216     		ipmr_forward_finish);
1217     }
1218     
1219     int ipmr_find_vif(struct net_device *dev)
1220     {
1221     	int ct;
1222     	for (ct=maxvif-1; ct>=0; ct--) {
1223     		if (vif_table[ct].dev == dev)
1224     			break;
1225     	}
1226     	return ct;
1227     }
1228     
1229     /* "local" means that we should preserve one skb (for local delivery) */
1230     
1231     int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1232     {
1233     	int psend = -1;
1234     	int vif, ct;
1235     
1236     	vif = cache->mfc_parent;
1237     	cache->mfc_un.res.pkt++;
1238     	cache->mfc_un.res.bytes += skb->len;
1239     
1240     	/*
1241     	 * Wrong interface: drop packet and (maybe) send PIM assert.
1242     	 */
1243     	if (vif_table[vif].dev != skb->dev) {
1244     		int true_vifi;
1245     
1246     		if (((struct rtable*)skb->dst)->key.iif == 0) {
1247     			/* It is our own packet, looped back.
1248     			   Very complicated situation...
1249     
1250     			   The best workaround until routing daemons will be
1251     			   fixed is not to redistribute packet, if it was
1252     			   send through wrong interface. It means, that
1253     			   multicast applications WILL NOT work for
1254     			   (S,G), which have default multicast route pointing
1255     			   to wrong oif. In any case, it is not a good
1256     			   idea to use multicasting applications on router.
1257     			 */
1258     			goto dont_forward;
1259     		}
1260     
1261     		cache->mfc_un.res.wrong_if++;
1262     		true_vifi = ipmr_find_vif(skb->dev);
1263     
1264     		if (true_vifi >= 0 && mroute_do_assert &&
1265     		    /* pimsm uses asserts, when switching from RPT to SPT,
1266     		       so that we cannot check that packet arrived on an oif.
1267     		       It is bad, but otherwise we would need to move pretty
1268     		       large chunk of pimd to kernel. Ough... --ANK
1269     		     */
1270     		    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1271     		    jiffies - cache->mfc_un.res.last_assert > MFC_ASSERT_THRESH) {
1272     			cache->mfc_un.res.last_assert = jiffies;
1273     			ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1274     		}
1275     		goto dont_forward;
1276     	}
1277     
1278     	vif_table[vif].pkt_in++;
1279     	vif_table[vif].bytes_in+=skb->len;
1280     
1281     	/*
1282     	 *	Forward the frame
1283     	 */
1284     	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1285     		if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1286     			if (psend != -1)
1287     				ipmr_queue_xmit(skb, cache, psend, 0);
1288     			psend=ct;
1289     		}
1290     	}
1291     	if (psend != -1)
1292     		ipmr_queue_xmit(skb, cache, psend, !local);
1293     
1294     dont_forward:
1295     	if (!local)
1296     		kfree_skb(skb);
1297     	return 0;
1298     }
1299     
1300     
1301     /*
1302      *	Multicast packets for forwarding arrive here
1303      */
1304     
1305     int ip_mr_input(struct sk_buff *skb)
1306     {
1307     	struct mfc_cache *cache;
1308     	int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1309     
1310     	/* Packet is looped back after forward, it should not be
1311     	   forwarded second time, but still can be delivered locally.
1312     	 */
1313     	if (IPCB(skb)->flags&IPSKB_FORWARDED)
1314     		goto dont_forward;
1315     
1316     	if (!local) {
1317     		    if (IPCB(skb)->opt.router_alert) {
1318     			    if (ip_call_ra_chain(skb))
1319     				    return 0;
1320     		    } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1321     			    /* IGMPv1 (and broken IGMPv2 implementations sort of
1322     			       Cisco IOS <= 11.2(8)) do not put router alert
1323     			       option to IGMP packets destined to routable
1324     			       groups. It is very bad, because it means
1325     			       that we can forward NO IGMP messages.
1326     			     */
1327     			    read_lock(&mrt_lock);
1328     			    if (mroute_socket) {
1329     				    raw_rcv(mroute_socket, skb);
1330     				    read_unlock(&mrt_lock);
1331     				    return 0;
1332     			    }
1333     			    read_unlock(&mrt_lock);
1334     		    }
1335     	}
1336     
1337     	read_lock(&mrt_lock);
1338     	cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1339     
1340     	/*
1341     	 *	No usable cache entry
1342     	 */
1343     	if (cache==NULL) {
1344     		int vif;
1345     
1346     		if (local) {
1347     			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1348     			ip_local_deliver(skb);
1349     			if (skb2 == NULL) {
1350     				read_unlock(&mrt_lock);
1351     				return -ENOBUFS;
1352     			}
1353     			skb = skb2;
1354     		}
1355     
1356     		vif = ipmr_find_vif(skb->dev);
1357     		if (vif >= 0) {
1358     			int err = ipmr_cache_unresolved(vif, skb);
1359     			read_unlock(&mrt_lock);
1360     
1361     			return err;
1362     		}
1363     		read_unlock(&mrt_lock);
1364     		kfree_skb(skb);
1365     		return -ENODEV;
1366     	}
1367     
1368     	ip_mr_forward(skb, cache, local);
1369     
1370     	read_unlock(&mrt_lock);
1371     
1372     	if (local)
1373     		return ip_local_deliver(skb);
1374     
1375     	return 0;
1376     
1377     dont_forward:
1378     	if (local)
1379     		return ip_local_deliver(skb);
1380     	kfree_skb(skb);
1381     	return 0;
1382     }
1383     
1384     #ifdef CONFIG_IP_PIMSM_V1
1385     /*
1386      * Handle IGMP messages of PIMv1
1387      */
1388     
1389     int pim_rcv_v1(struct sk_buff * skb)
1390     {
1391     	struct igmphdr *pim = (struct igmphdr*)skb->h.raw;
1392     	struct iphdr   *encap;
1393     	struct net_device  *reg_dev = NULL;
1394     
1395     	if (skb_is_nonlinear(skb)) {
1396     		if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1397     			kfree_skb(skb);
1398     			return -ENOMEM;
1399     		}
1400     		pim = (struct igmphdr*)skb->h.raw;
1401     	}
1402     
1403             if (!mroute_do_pim ||
1404     	    skb->len < sizeof(*pim) + sizeof(*encap) ||
1405     	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) {
1406     		kfree_skb(skb);
1407                     return -EINVAL;
1408             }
1409     
1410     	encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1411     	/*
1412     	   Check that:
1413     	   a. packet is really destinted to a multicast group
1414     	   b. packet is not a NULL-REGISTER
1415     	   c. packet is not truncated
1416     	 */
1417     	if (!MULTICAST(encap->daddr) ||
1418     	    ntohs(encap->tot_len) == 0 ||
1419     	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1420     		kfree_skb(skb);
1421     		return -EINVAL;
1422     	}
1423     
1424     	read_lock(&mrt_lock);
1425     	if (reg_vif_num >= 0)
1426     		reg_dev = vif_table[reg_vif_num].dev;
1427     	if (reg_dev)
1428     		dev_hold(reg_dev);
1429     	read_unlock(&mrt_lock);
1430     
1431     	if (reg_dev == NULL) {
1432     		kfree_skb(skb);
1433     		return -EINVAL;
1434     	}
1435     
1436     	skb->mac.raw = skb->nh.raw;
1437     	skb_pull(skb, (u8*)encap - skb->data);
1438     	skb->nh.iph = (struct iphdr *)skb->data;
1439     	skb->dev = reg_dev;
1440     	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1441     	skb->protocol = __constant_htons(ETH_P_IP);
1442     	skb->ip_summed = 0;
1443     	skb->pkt_type = PACKET_HOST;
1444     	dst_release(skb->dst);
1445     	skb->dst = NULL;
1446     	((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1447     	((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1448     #ifdef CONFIG_NETFILTER
1449     	nf_conntrack_put(skb->nfct);
1450     	skb->nfct = NULL;
1451     #endif
1452     	netif_rx(skb);
1453     	dev_put(reg_dev);
1454     	return 0;
1455     }
1456     #endif
1457     
1458     #ifdef CONFIG_IP_PIMSM_V2
1459     int pim_rcv(struct sk_buff * skb)
1460     {
1461     	struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw;
1462     	struct iphdr   *encap;
1463     	struct net_device  *reg_dev = NULL;
1464     
1465     	if (skb_is_nonlinear(skb)) {
1466     		if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1467     			kfree_skb(skb);
1468     			return -ENOMEM;
1469     		}
1470     		pim = (struct pimreghdr*)skb->h.raw;
1471     	}
1472     
1473             if (skb->len < sizeof(*pim) + sizeof(*encap) ||
1474     	    pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1475     	    (pim->flags&PIM_NULL_REGISTER) ||
1476     	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1477     	     ip_compute_csum((void *)pim, skb->len))) {
1478     		kfree_skb(skb);
1479                     return -EINVAL;
1480             }
1481     
1482     	/* check if the inner packet is destined to mcast group */
1483     	encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1484     	if (!MULTICAST(encap->daddr) ||
1485     	    ntohs(encap->tot_len) == 0 ||
1486     	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1487     		kfree_skb(skb);
1488     		return -EINVAL;
1489     	}
1490     
1491     	read_lock(&mrt_lock);
1492     	if (reg_vif_num >= 0)
1493     		reg_dev = vif_table[reg_vif_num].dev;
1494     	if (reg_dev)
1495     		dev_hold(reg_dev);
1496     	read_unlock(&mrt_lock);
1497     
1498     	if (reg_dev == NULL) {
1499     		kfree_skb(skb);
1500     		return -EINVAL;
1501     	}
1502     
1503     	skb->mac.raw = skb->nh.raw;
1504     	skb_pull(skb, (u8*)encap - skb->data);
1505     	skb->nh.iph = (struct iphdr *)skb->data;
1506     	skb->dev = reg_dev;
1507     	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1508     	skb->protocol = __constant_htons(ETH_P_IP);
1509     	skb->ip_summed = 0;
1510     	skb->pkt_type = PACKET_HOST;
1511     	dst_release(skb->dst);
1512     	((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1513     	((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1514     	skb->dst = NULL;
1515     #ifdef CONFIG_NETFILTER
1516     	nf_conntrack_put(skb->nfct);
1517     	skb->nfct = NULL;
1518     #endif
1519     	netif_rx(skb);
1520     	dev_put(reg_dev);
1521     	return 0;
1522     }
1523     #endif
1524     
1525     #ifdef CONFIG_RTNETLINK
1526     
1527     static int
1528     ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1529     {
1530     	int ct;
1531     	struct rtnexthop *nhp;
1532     	struct net_device *dev = vif_table[c->mfc_parent].dev;
1533     	u8 *b = skb->tail;
1534     	struct rtattr *mp_head;
1535     
1536     	if (dev)
1537     		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1538     
1539     	mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1540     
1541     	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1542     		if (c->mfc_un.res.ttls[ct] < 255) {
1543     			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1544     				goto rtattr_failure;
1545     			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1546     			nhp->rtnh_flags = 0;
1547     			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1548     			nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1549     			nhp->rtnh_len = sizeof(*nhp);
1550     		}
1551     	}
1552     	mp_head->rta_type = RTA_MULTIPATH;
1553     	mp_head->rta_len = skb->tail - (u8*)mp_head;
1554     	rtm->rtm_type = RTN_MULTICAST;
1555     	return 1;
1556     
1557     rtattr_failure:
1558     	skb_trim(skb, b - skb->data);
1559     	return -EMSGSIZE;
1560     }
1561     
1562     int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1563     {
1564     	int err;
1565     	struct mfc_cache *cache;
1566     	struct rtable *rt = (struct rtable*)skb->dst;
1567     
1568     	read_lock(&mrt_lock);
1569     	cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1570     
1571     	if (cache==NULL) {
1572     		struct net_device *dev;
1573     		int vif;
1574     
1575     		if (nowait) {
1576     			read_unlock(&mrt_lock);
1577     			return -EAGAIN;
1578     		}
1579     
1580     		dev = skb->dev;
1581     		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1582     			read_unlock(&mrt_lock);
1583     			return -ENODEV;
1584     		}
1585     		skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1586     		skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1587     		skb->nh.iph->saddr = rt->rt_src;
1588     		skb->nh.iph->daddr = rt->rt_dst;
1589     		skb->nh.iph->version = 0;
1590     		err = ipmr_cache_unresolved(vif, skb);
1591     		read_unlock(&mrt_lock);
1592     		return err;
1593     	}
1594     
1595     	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1596     		cache->mfc_flags |= MFC_NOTIFY;
1597     	err = ipmr_fill_mroute(skb, cache, rtm);
1598     	read_unlock(&mrt_lock);
1599     	return err;
1600     }
1601     #endif
1602     
1603     #ifdef CONFIG_PROC_FS	
1604     /*
1605      *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1606      */
1607      
1608     static int ipmr_vif_info(char *buffer, char **start, off_t offset, int length)
1609     {
1610     	struct vif_device *vif;
1611     	int len=0;
1612     	off_t pos=0;
1613     	off_t begin=0;
1614     	int size;
1615     	int ct;
1616     
1617     	len += sprintf(buffer,
1618     		 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1619     	pos=len;
1620       
1621     	read_lock(&mrt_lock);
1622     	for (ct=0;ct<maxvif;ct++) 
1623     	{
1624     		char *name = "none";
1625     		vif=&vif_table[ct];
1626     		if(!VIF_EXISTS(ct))
1627     			continue;
1628     		if (vif->dev)
1629     			name = vif->dev->name;
1630             	size = sprintf(buffer+len, "%2d %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1631             		ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
1632             		vif->flags, vif->local, vif->remote);
1633     		len+=size;
1634     		pos+=size;
1635     		if(pos<offset)
1636     		{
1637     			len=0;
1638     			begin=pos;
1639     		}
1640     		if(pos>offset+length)
1641     			break;
1642       	}
1643     	read_unlock(&mrt_lock);
1644       	
1645       	*start=buffer+(offset-begin);
1646       	len-=(offset-begin);
1647       	if(len>length)
1648       		len=length;
1649     	if (len<0)
1650     		len = 0;
1651       	return len;
1652     }
1653     
1654     static int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length)
1655     {
1656     	struct mfc_cache *mfc;
1657     	int len=0;
1658     	off_t pos=0;
1659     	off_t begin=0;
1660     	int size;
1661     	int ct;
1662     
1663     	len += sprintf(buffer,
1664     		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1665     	pos=len;
1666     
1667     	read_lock(&mrt_lock);
1668     	for (ct=0;ct<MFC_LINES;ct++) 
1669     	{
1670     		for(mfc=mfc_cache_array[ct]; mfc; mfc=mfc->next)
1671     		{
1672     			int n;
1673     
1674     			/*
1675     			 *	Interface forwarding map
1676     			 */
1677     			size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
1678     				(unsigned long)mfc->mfc_mcastgrp,
1679     				(unsigned long)mfc->mfc_origin,
1680     				mfc->mfc_parent,
1681     				mfc->mfc_un.res.pkt,
1682     				mfc->mfc_un.res.bytes,
1683     				mfc->mfc_un.res.wrong_if);
1684     			for(n=mfc->mfc_un.res.minvif;n<mfc->mfc_un.res.maxvif;n++)
1685     			{
1686     				if(VIF_EXISTS(n) && mfc->mfc_un.res.ttls[n] < 255)
1687     					size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]);
1688     			}
1689     			size += sprintf(buffer+len+size, "\n");
1690     			len+=size;
1691     			pos+=size;
1692     			if(pos<offset)
1693     			{
1694     				len=0;
1695     				begin=pos;
1696     			}
1697     			if(pos>offset+length)
1698     				goto done;
1699     	  	}
1700       	}
1701     
1702     	spin_lock_bh(&mfc_unres_lock);
1703     	for(mfc=mfc_unres_queue; mfc; mfc=mfc->next) {
1704     		size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld\n",
1705     			       (unsigned long)mfc->mfc_mcastgrp,
1706     			       (unsigned long)mfc->mfc_origin,
1707     			       -1,
1708     				(long)mfc->mfc_un.unres.unresolved.qlen,
1709     				0L, 0L);
1710     		len+=size;
1711     		pos+=size;
1712     		if(pos<offset)
1713     		{
1714     			len=0;
1715     			begin=pos;
1716     		}
1717     		if(pos>offset+length)
1718     			break;
1719     	}
1720     	spin_unlock_bh(&mfc_unres_lock);
1721     
1722     done:
1723     	read_unlock(&mrt_lock);
1724       	*start=buffer+(offset-begin);
1725       	len-=(offset-begin);
1726       	if(len>length)
1727       		len=length;
1728     	if (len < 0) {
1729     		len = 0;
1730     	}
1731       	return len;
1732     }
1733     
1734     #endif	
1735     
1736     #ifdef CONFIG_IP_PIMSM_V2
1737     struct inet_protocol pim_protocol = 
1738     {
1739     	pim_rcv,		/* PIM handler		*/
1740     	NULL,			/* PIM error control	*/
1741     	NULL,			/* next			*/
1742     	IPPROTO_PIM,		/* protocol ID		*/
1743     	0,			/* copy			*/
1744     	NULL,			/* data			*/
1745     	"PIM"			/* name			*/
1746     };
1747     #endif
1748     
1749     
1750     /*
1751      *	Setup for IP multicast routing
1752      */
1753      
1754     void __init ip_mr_init(void)
1755     {
1756     	printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
1757     	mrt_cachep = kmem_cache_create("ip_mrt_cache",
1758     				       sizeof(struct mfc_cache),
1759     				       0, SLAB_HWCACHE_ALIGN,
1760     				       NULL, NULL);
1761     	init_timer(&ipmr_expire_timer);
1762     	ipmr_expire_timer.function=ipmr_expire_process;
1763     	register_netdevice_notifier(&ip_mr_notifier);
1764     #ifdef CONFIG_PROC_FS	
1765     	proc_net_create("ip_mr_vif",0,ipmr_vif_info);
1766     	proc_net_create("ip_mr_cache",0,ipmr_mfc_info);
1767     #endif	
1768     }
1769