File: /usr/src/linux/net/packet/af_packet.c

1     /*
2      * INET		An implementation of the TCP/IP protocol suite for the LINUX
3      *		operating system.  INET is implemented using the  BSD Socket
4      *		interface as the means of communication with the user level.
5      *
6      *		PACKET - implements raw packet sockets.
7      *
8      * Version:	$Id: af_packet.c,v 1.56 2001/08/06 13:21:16 davem Exp $
9      *
10      * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
11      *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12      *		Alan Cox, <gw4pts@gw4pts.ampr.org>
13      *
14      * Fixes:	
15      *		Alan Cox	:	verify_area() now used correctly
16      *		Alan Cox	:	new skbuff lists, look ma no backlogs!
17      *		Alan Cox	:	tidied skbuff lists.
18      *		Alan Cox	:	Now uses generic datagram routines I
19      *					added. Also fixed the peek/read crash
20      *					from all old Linux datagram code.
21      *		Alan Cox	:	Uses the improved datagram code.
22      *		Alan Cox	:	Added NULL's for socket options.
23      *		Alan Cox	:	Re-commented the code.
24      *		Alan Cox	:	Use new kernel side addressing
25      *		Rob Janssen	:	Correct MTU usage.
26      *		Dave Platt	:	Counter leaks caused by incorrect
27      *					interrupt locking and some slightly
28      *					dubious gcc output. Can you read
29      *					compiler: it said _VOLATILE_
30      *	Richard Kooijman	:	Timestamp fixes.
31      *		Alan Cox	:	New buffers. Use sk->mac.raw.
32      *		Alan Cox	:	sendmsg/recvmsg support.
33      *		Alan Cox	:	Protocol setting support
34      *	Alexey Kuznetsov	:	Untied from IPv4 stack.
35      *	Cyrus Durgin		:	Fixed kerneld for kmod.
36      *	Michal Ostrowski        :       Module initialization cleanup.
37      *
38      *		This program is free software; you can redistribute it and/or
39      *		modify it under the terms of the GNU General Public License
40      *		as published by the Free Software Foundation; either version
41      *		2 of the License, or (at your option) any later version.
42      *
43      */
44      
45     #include <linux/config.h>
46     #include <linux/types.h>
47     #include <linux/sched.h>
48     #include <linux/mm.h>
49     #include <linux/fcntl.h>
50     #include <linux/socket.h>
51     #include <linux/in.h>
52     #include <linux/inet.h>
53     #include <linux/netdevice.h>
54     #include <linux/if_packet.h>
55     #include <linux/wireless.h>
56     #include <linux/kmod.h>
57     #include <net/ip.h>
58     #include <net/protocol.h>
59     #include <linux/skbuff.h>
60     #include <net/sock.h>
61     #include <linux/errno.h>
62     #include <linux/timer.h>
63     #include <asm/system.h>
64     #include <asm/uaccess.h>
65     #include <asm/ioctls.h>
66     #include <linux/proc_fs.h>
67     #include <linux/poll.h>
68     #include <linux/module.h>
69     #include <linux/init.h>
70     #include <linux/if_bridge.h>
71     
72     #ifdef CONFIG_NET_DIVERT
73     #include <linux/divert.h>
74     #endif /* CONFIG_NET_DIVERT */
75     
76     #ifdef CONFIG_INET
77     #include <net/inet_common.h>
78     #endif
79     
80     #ifdef CONFIG_DLCI
81     extern int dlci_ioctl(unsigned int, void*);
82     #endif
83     
84     #define CONFIG_SOCK_PACKET	1
85     
86     /*
87        Proposed replacement for SIOC{ADD,DEL}MULTI and
88        IFF_PROMISC, IFF_ALLMULTI flags.
89     
90        It is more expensive, but I believe,
91        it is really correct solution: reentereble, safe and fault tolerant.
92     
93        IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
94        reference count and global flag, so that real status is
95        (gflag|(count != 0)), so that we can use obsolete faulty interface
96        not harming clever users.
97      */
98     #define CONFIG_PACKET_MULTICAST	1
99     
100     /*
101        Assumptions:
102        - if device has no dev->hard_header routine, it adds and removes ll header
103          inside itself. In this case ll header is invisible outside of device,
104          but higher levels still should reserve dev->hard_header_len.
105          Some devices are enough clever to reallocate skb, when header
106          will not fit to reserved space (tunnel), another ones are silly
107          (PPP).
108        - packet socket receives packets with pulled ll header,
109          so that SOCK_RAW should push it back.
110     
111     On receive:
112     -----------
113     
114     Incoming, dev->hard_header!=NULL
115        mac.raw -> ll header
116        data    -> data
117     
118     Outgoing, dev->hard_header!=NULL
119        mac.raw -> ll header
120        data    -> ll header
121     
122     Incoming, dev->hard_header==NULL
123        mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
124                   PPP makes it, that is wrong, because introduce assymetry
125     	      between rx and tx paths.
126        data    -> data
127     
128     Outgoing, dev->hard_header==NULL
129        mac.raw -> data. ll header is still not built!
130        data    -> data
131     
132     Resume
133       If dev->hard_header==NULL we are unlikely to restore sensible ll header.
134     
135     
136     On transmit:
137     ------------
138     
139     dev->hard_header != NULL
140        mac.raw -> ll header
141        data    -> ll header
142     
143     dev->hard_header == NULL (ll header is added by device, we cannot control it)
144        mac.raw -> data
145        data -> data
146     
147        We should set nh.raw on output to correct posistion,
148        packet classifier depends on it.
149      */
150     
151     /* List of all packet sockets. */
152     static struct sock * packet_sklist;
153     static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
154     
155     atomic_t packet_socks_nr;
156     
157     
158     /* Private packet socket structures. */
159     
160     #ifdef CONFIG_PACKET_MULTICAST
161     struct packet_mclist
162     {
163     	struct packet_mclist	*next;
164     	int			ifindex;
165     	int			count;
166     	unsigned short		type;
167     	unsigned short		alen;
168     	unsigned char		addr[8];
169     };
170     #endif
171     #ifdef CONFIG_PACKET_MMAP
172     static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
173     #endif
174     
175     static void packet_flush_mclist(struct sock *sk);
176     
177     struct packet_opt
178     {
179     	struct packet_type	prot_hook;
180     	spinlock_t		bind_lock;
181     	char			running;	/* prot_hook is attached*/
182     	int			ifindex;	/* bound device		*/
183     	struct tpacket_stats	stats;
184     #ifdef CONFIG_PACKET_MULTICAST
185     	struct packet_mclist	*mclist;
186     #endif
187     #ifdef CONFIG_PACKET_MMAP
188     	atomic_t		mapped;
189     	unsigned long		*pg_vec;
190     	unsigned int		pg_vec_order;
191     	unsigned int		pg_vec_pages;
192     	unsigned int		pg_vec_len;
193     
194     	struct tpacket_hdr	**iovec;
195     	unsigned int		frame_size;
196     	unsigned int		iovmax;
197     	unsigned int		head;
198     	int			copy_thresh;
199     #endif
200     };
201     
202     void packet_sock_destruct(struct sock *sk)
203     {
204     	BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
205     	BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
206     
207     	if (!sk->dead) {
208     		printk("Attempt to release alive packet socket: %p\n", sk);
209     		return;
210     	}
211     
212     	if (sk->protinfo.destruct_hook)
213     		kfree(sk->protinfo.destruct_hook);
214     	atomic_dec(&packet_socks_nr);
215     #ifdef PACKET_REFCNT_DEBUG
216     	printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
217     #endif
218     	MOD_DEC_USE_COUNT;
219     }
220     
221     
222     extern struct proto_ops packet_ops;
223     
224     #ifdef CONFIG_SOCK_PACKET
225     extern struct proto_ops packet_ops_spkt;
226     
227     static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
228     {
229     	struct sock *sk;
230     	struct sockaddr_pkt *spkt;
231     
232     	/*
233     	 *	When we registered the protocol we saved the socket in the data
234     	 *	field for just this event.
235     	 */
236     
237     	sk = (struct sock *) pt->data;
238     	
239     	/*
240     	 *	Yank back the headers [hope the device set this
241     	 *	right or kerboom...]
242     	 *
243     	 *	Incoming packets have ll header pulled,
244     	 *	push it back.
245     	 *
246     	 *	For outgoing ones skb->data == skb->mac.raw
247     	 *	so that this procedure is noop.
248     	 */
249     
250     	if (skb->pkt_type == PACKET_LOOPBACK)
251     		goto out;
252     
253     	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
254     		goto oom;
255     
256     	spkt = (struct sockaddr_pkt*)skb->cb;
257     
258     	skb_push(skb, skb->data-skb->mac.raw);
259     
260     	/*
261     	 *	The SOCK_PACKET socket receives _all_ frames.
262     	 */
263     
264     	spkt->spkt_family = dev->type;
265     	strncpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
266     	spkt->spkt_protocol = skb->protocol;
267     
268     	/*
269     	 *	Charge the memory to the socket. This is done specifically
270     	 *	to prevent sockets using all the memory up.
271     	 */
272     
273     	if (sock_queue_rcv_skb(sk,skb) == 0)
274     		return 0;
275     
276     out:
277     	kfree_skb(skb);
278     oom:
279     	return 0;
280     }
281     
282     
283     /*
284      *	Output a raw packet to a device layer. This bypasses all the other
285      *	protocol layers and you must therefore supply it with a complete frame
286      */
287      
288     static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, int len,
289     			       struct scm_cookie *scm)
290     {
291     	struct sock *sk = sock->sk;
292     	struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
293     	struct sk_buff *skb;
294     	struct net_device *dev;
295     	unsigned short proto=0;
296     	int err;
297     	
298     	/*
299     	 *	Get and verify the address. 
300     	 */
301     
302     	if (saddr)
303     	{
304     		if (msg->msg_namelen < sizeof(struct sockaddr))
305     			return(-EINVAL);
306     		if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
307     			proto=saddr->spkt_protocol;
308     	}
309     	else
310     		return(-ENOTCONN);	/* SOCK_PACKET must be sent giving an address */
311     
312     	/*
313     	 *	Find the device first to size check it 
314     	 */
315     
316     	saddr->spkt_device[13] = 0;
317     	dev = dev_get_by_name(saddr->spkt_device);
318     	err = -ENODEV;
319     	if (dev == NULL)
320     		goto out_unlock;
321     	
322     	/*
323     	 *	You may not queue a frame bigger than the mtu. This is the lowest level
324     	 *	raw protocol and you must do your own fragmentation at this level.
325     	 */
326     	 
327     	err = -EMSGSIZE;
328      	if(len>dev->mtu+dev->hard_header_len)
329     		goto out_unlock;
330     
331     	err = -ENOBUFS;
332     	skb = sock_wmalloc(sk, len+dev->hard_header_len+15, 0, GFP_KERNEL);
333     
334     	/*
335     	 *	If the write buffer is full, then tough. At this level the user gets to
336     	 *	deal with the problem - do your own algorithmic backoffs. That's far
337     	 *	more flexible.
338     	 */
339     	 
340     	if (skb == NULL) 
341     		goto out_unlock;
342     
343     	/*
344     	 *	Fill it in 
345     	 */
346     	 
347     	/* FIXME: Save some space for broken drivers that write a
348     	 * hard header at transmission time by themselves. PPP is the
349     	 * notable one here. This should really be fixed at the driver level.
350     	 */
351     	skb_reserve(skb,(dev->hard_header_len+15)&~15);
352     	skb->nh.raw = skb->data;
353     
354     	/* Try to align data part correctly */
355     	if (dev->hard_header) {
356     		skb->data -= dev->hard_header_len;
357     		skb->tail -= dev->hard_header_len;
358     	}
359     
360     	/* Returns -EFAULT on error */
361     	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
362     	skb->protocol = proto;
363     	skb->dev = dev;
364     	skb->priority = sk->priority;
365     	if (err)
366     		goto out_free;
367     
368     	err = -ENETDOWN;
369     	if (!(dev->flags & IFF_UP))
370     		goto out_free;
371     
372     	/*
373     	 *	Now send it
374     	 */
375     
376     	dev_queue_xmit(skb);
377     	dev_put(dev);
378     	return(len);
379     
380     out_free:
381     	kfree_skb(skb);
382     out_unlock:
383     	if (dev)
384     		dev_put(dev);
385     	return err;
386     }
387     #endif
388     
389     /*
390        This function makes lazy skb cloning in hope that most of packets
391        are discarded by BPF.
392     
393        Note tricky part: we DO mangle shared skb! skb->data, skb->len
394        and skb->cb are mangled. It works because (and until) packets
395        falling here are owned by current CPU. Output packets are cloned
396        by dev_queue_xmit_nit(), input packets are processed by net_bh
397        sequencially, so that if we return skb to original state on exit,
398        we will not harm anyone.
399      */
400     
401     static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
402     {
403     	struct sock *sk;
404     	struct sockaddr_ll *sll;
405     	struct packet_opt *po;
406     	u8 * skb_head = skb->data;
407     	int skb_len = skb->len;
408     #ifdef CONFIG_FILTER
409     	unsigned snaplen;
410     #endif
411     
412     	if (skb->pkt_type == PACKET_LOOPBACK)
413     		goto drop;
414     
415     	sk = (struct sock *) pt->data;
416     	po = sk->protinfo.af_packet;
417     
418     	skb->dev = dev;
419     
420     	if (dev->hard_header) {
421     		/* The device has an explicit notion of ll header,
422     		   exported to higher levels.
423     
424     		   Otherwise, the device hides datails of it frame
425     		   structure, so that corresponding packet head
426     		   never delivered to user.
427     		 */
428     		if (sk->type != SOCK_DGRAM)
429     			skb_push(skb, skb->data - skb->mac.raw);
430     		else if (skb->pkt_type == PACKET_OUTGOING) {
431     			/* Special case: outgoing packets have ll header at head */
432     			skb_pull(skb, skb->nh.raw - skb->data);
433     		}
434     	}
435     
436     #ifdef CONFIG_FILTER
437     	snaplen = skb->len;
438     
439     	if (sk->filter) {
440     		unsigned res = snaplen;
441     		struct sk_filter *filter;
442     
443     		bh_lock_sock(sk);
444     		if ((filter = sk->filter) != NULL)
445     			res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
446     		bh_unlock_sock(sk);
447     
448     		if (res == 0)
449     			goto drop_n_restore;
450     		if (snaplen > res)
451     			snaplen = res;
452     	}
453     #endif /* CONFIG_FILTER */
454     
455     	if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
456     		goto drop_n_acct;
457     
458     	if (skb_shared(skb)) {
459     		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
460     		if (nskb == NULL)
461     			goto drop_n_acct;
462     
463     		if (skb_head != skb->data) {
464     			skb->data = skb_head;
465     			skb->len = skb_len;
466     		}
467     		kfree_skb(skb);
468     		skb = nskb;
469     	}
470     
471     	sll = (struct sockaddr_ll*)skb->cb;
472     	sll->sll_family = AF_PACKET;
473     	sll->sll_hatype = dev->type;
474     	sll->sll_protocol = skb->protocol;
475     	sll->sll_pkttype = skb->pkt_type;
476     	sll->sll_ifindex = dev->ifindex;
477     	sll->sll_halen = 0;
478     
479     	if (dev->hard_header_parse)
480     		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
481     
482     #ifdef CONFIG_FILTER
483     	if (pskb_trim(skb, snaplen))
484     		goto drop_n_acct;
485     #endif
486     
487     	skb_set_owner_r(skb, sk);
488     	skb->dev = NULL;
489     	spin_lock(&sk->receive_queue.lock);
490     	po->stats.tp_packets++;
491     	__skb_queue_tail(&sk->receive_queue, skb);
492     	spin_unlock(&sk->receive_queue.lock);
493     	sk->data_ready(sk,skb->len);
494     	return 0;
495     
496     drop_n_acct:
497     	spin_lock(&sk->receive_queue.lock);
498     	po->stats.tp_drops++;
499     	spin_unlock(&sk->receive_queue.lock);
500     
501     #ifdef CONFIG_FILTER
502     drop_n_restore:
503     #endif
504     	if (skb_head != skb->data && skb_shared(skb)) {
505     		skb->data = skb_head;
506     		skb->len = skb_len;
507     	}
508     drop:
509     	kfree_skb(skb);
510     	return 0;
511     }
512     
513     #ifdef CONFIG_PACKET_MMAP
514     static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
515     {
516     	struct sock *sk;
517     	struct packet_opt *po;
518     	struct sockaddr_ll *sll;
519     	struct tpacket_hdr *h;
520     	u8 * skb_head = skb->data;
521     	int skb_len = skb->len;
522     	unsigned snaplen;
523     	unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
524     	unsigned short macoff, netoff;
525     	struct sk_buff *copy_skb = NULL;
526     
527     	if (skb->pkt_type == PACKET_LOOPBACK)
528     		goto drop;
529     
530     	sk = (struct sock *) pt->data;
531     	po = sk->protinfo.af_packet;
532     
533     	if (dev->hard_header) {
534     		if (sk->type != SOCK_DGRAM)
535     			skb_push(skb, skb->data - skb->mac.raw);
536     		else if (skb->pkt_type == PACKET_OUTGOING) {
537     			/* Special case: outgoing packets have ll header at head */
538     			skb_pull(skb, skb->nh.raw - skb->data);
539     			if (skb->ip_summed == CHECKSUM_HW)
540     				status |= TP_STATUS_CSUMNOTREADY;
541     		}
542     	}
543     
544     	snaplen = skb->len;
545     
546     #ifdef CONFIG_FILTER
547     	if (sk->filter) {
548     		unsigned res = snaplen;
549     		struct sk_filter *filter;
550     
551     		bh_lock_sock(sk);
552     		if ((filter = sk->filter) != NULL)
553     			res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
554     		bh_unlock_sock(sk);
555     
556     		if (res == 0)
557     			goto drop_n_restore;
558     		if (snaplen > res)
559     			snaplen = res;
560     	}
561     #endif
562     
563     	if (sk->type == SOCK_DGRAM) {
564     		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
565     	} else {
566     		unsigned maclen = skb->nh.raw - skb->data;
567     		netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
568     		macoff = netoff - maclen;
569     	}
570     
571     	if (macoff + snaplen > po->frame_size) {
572     		if (po->copy_thresh &&
573     		    atomic_read(&sk->rmem_alloc) + skb->truesize < (unsigned)sk->rcvbuf) {
574     			if (skb_shared(skb)) {
575     				copy_skb = skb_clone(skb, GFP_ATOMIC);
576     			} else {
577     				copy_skb = skb_get(skb);
578     				skb_head = skb->data;
579     			}
580     			if (copy_skb)
581     				skb_set_owner_r(copy_skb, sk);
582     		}
583     		snaplen = po->frame_size - macoff;
584     		if ((int)snaplen < 0)
585     			snaplen = 0;
586     	}
587     	if (snaplen > skb->len-skb->data_len)
588     		snaplen = skb->len-skb->data_len;
589     
590     	spin_lock(&sk->receive_queue.lock);
591     	h = po->iovec[po->head];
592     
593     	if (h->tp_status)
594     		goto ring_is_full;
595     	po->head = po->head != po->iovmax ? po->head+1 : 0;
596     	po->stats.tp_packets++;
597     	if (copy_skb) {
598     		status |= TP_STATUS_COPY;
599     		__skb_queue_tail(&sk->receive_queue, copy_skb);
600     	}
601     	if (!po->stats.tp_drops)
602     		status &= ~TP_STATUS_LOSING;
603     	spin_unlock(&sk->receive_queue.lock);
604     
605     	memcpy((u8*)h + macoff, skb->data, snaplen);
606     
607     	h->tp_len = skb->len;
608     	h->tp_snaplen = snaplen;
609     	h->tp_mac = macoff;
610     	h->tp_net = netoff;
611     	h->tp_sec = skb->stamp.tv_sec;
612     	h->tp_usec = skb->stamp.tv_usec;
613     
614     	sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
615     	sll->sll_halen = 0;
616     	if (dev->hard_header_parse)
617     		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
618     	sll->sll_family = AF_PACKET;
619     	sll->sll_hatype = dev->type;
620     	sll->sll_protocol = skb->protocol;
621     	sll->sll_pkttype = skb->pkt_type;
622     	sll->sll_ifindex = dev->ifindex;
623     
624     	h->tp_status = status;
625     	mb();
626     
627     	sk->data_ready(sk, 0);
628     
629     drop_n_restore:
630     	if (skb_head != skb->data && skb_shared(skb)) {
631     		skb->data = skb_head;
632     		skb->len = skb_len;
633     	}
634     drop:
635             kfree_skb(skb);
636     	return 0;
637     
638     ring_is_full:
639     	po->stats.tp_drops++;
640     	spin_unlock(&sk->receive_queue.lock);
641     
642     	sk->data_ready(sk, 0);
643     	if (copy_skb)
644     		kfree_skb(copy_skb);
645     	goto drop_n_restore;
646     }
647     
648     #endif
649     
650     
651     static int packet_sendmsg(struct socket *sock, struct msghdr *msg, int len,
652     			  struct scm_cookie *scm)
653     {
654     	struct sock *sk = sock->sk;
655     	struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
656     	struct sk_buff *skb;
657     	struct net_device *dev;
658     	unsigned short proto;
659     	unsigned char *addr;
660     	int ifindex, err, reserve = 0;
661     
662     	/*
663     	 *	Get and verify the address. 
664     	 */
665     	 
666     	if (saddr == NULL) {
667     		ifindex	= sk->protinfo.af_packet->ifindex;
668     		proto	= sk->num;
669     		addr	= NULL;
670     	} else {
671     		err = -EINVAL;
672     		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
673     			goto out;
674     		ifindex	= saddr->sll_ifindex;
675     		proto	= saddr->sll_protocol;
676     		addr	= saddr->sll_addr;
677     	}
678     
679     
680     	dev = dev_get_by_index(ifindex);
681     	err = -ENXIO;
682     	if (dev == NULL)
683     		goto out_unlock;
684     	if (sock->type == SOCK_RAW)
685     		reserve = dev->hard_header_len;
686     
687     	err = -EMSGSIZE;
688     	if (len > dev->mtu+reserve)
689     		goto out_unlock;
690     
691     	skb = sock_alloc_send_skb(sk, len+dev->hard_header_len+15, 
692     				msg->msg_flags & MSG_DONTWAIT, &err);
693     	if (skb==NULL)
694     		goto out_unlock;
695     
696     	skb_reserve(skb, (dev->hard_header_len+15)&~15);
697     	skb->nh.raw = skb->data;
698     
699     	if (dev->hard_header) {
700     		int res;
701     		err = -EINVAL;
702     		res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
703     		if (sock->type != SOCK_DGRAM) {
704     			skb->tail = skb->data;
705     			skb->len = 0;
706     		} else if (res < 0)
707     			goto out_free;
708     	}
709     
710     	/* Returns -EFAULT on error */
711     	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
712     	if (err)
713     		goto out_free;
714     
715     	skb->protocol = proto;
716     	skb->dev = dev;
717     	skb->priority = sk->priority;
718     
719     	err = -ENETDOWN;
720     	if (!(dev->flags & IFF_UP))
721     		goto out_free;
722     
723     	/*
724     	 *	Now send it
725     	 */
726     
727     	err = dev_queue_xmit(skb);
728     	if (err > 0 && (err = net_xmit_errno(err)) != 0)
729     		goto out_unlock;
730     
731     	dev_put(dev);
732     
733     	return(len);
734     
735     out_free:
736     	kfree_skb(skb);
737     out_unlock:
738     	if (dev)
739     		dev_put(dev);
740     out:
741     	return err;
742     }
743     
744     /*
745      *	Close a PACKET socket. This is fairly simple. We immediately go
746      *	to 'closed' state and remove our protocol entry in the device list.
747      */
748     
749     static int packet_release(struct socket *sock)
750     {
751     	struct sock *sk = sock->sk;
752     	struct sock **skp;
753     
754     	if (!sk)
755     		return 0;
756     
757     	write_lock_bh(&packet_sklist_lock);
758     	for (skp = &packet_sklist; *skp; skp = &(*skp)->next) {
759     		if (*skp == sk) {
760     			*skp = sk->next;
761     			__sock_put(sk);
762     			break;
763     		}
764     	}
765     	write_unlock_bh(&packet_sklist_lock);
766     
767     	/*
768     	 *	Unhook packet receive handler.
769     	 */
770     
771     	if (sk->protinfo.af_packet->running) {
772     		/*
773     		 *	Remove the protocol hook
774     		 */
775     		dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
776     		sk->protinfo.af_packet->running = 0;
777     		__sock_put(sk);
778     	}
779     
780     #ifdef CONFIG_PACKET_MULTICAST
781     	packet_flush_mclist(sk);
782     #endif
783     
784     #ifdef CONFIG_PACKET_MMAP
785     	if (sk->protinfo.af_packet->pg_vec) {
786     		struct tpacket_req req;
787     		memset(&req, 0, sizeof(req));
788     		packet_set_ring(sk, &req, 1);
789     	}
790     #endif
791     
792     	/*
793     	 *	Now the socket is dead. No more input will appear.
794     	 */
795     
796     	sock_orphan(sk);
797     	sock->sk = NULL;
798     
799     	/* Purge queues */
800     
801     	skb_queue_purge(&sk->receive_queue);
802     
803     	sock_put(sk);
804     	return 0;
805     }
806     
807     /*
808      *	Attach a packet hook.
809      */
810     
811     static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
812     {
813     	/*
814     	 *	Detach an existing hook if present.
815     	 */
816     
817     	lock_sock(sk);
818     
819     	spin_lock(&sk->protinfo.af_packet->bind_lock);
820     	if (sk->protinfo.af_packet->running) {
821     		dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
822     		__sock_put(sk);
823     		sk->protinfo.af_packet->running = 0;
824     	}
825     
826     	sk->num = protocol;
827     	sk->protinfo.af_packet->prot_hook.type = protocol;
828     	sk->protinfo.af_packet->prot_hook.dev = dev;
829     
830     	sk->protinfo.af_packet->ifindex = dev ? dev->ifindex : 0;
831     
832     	if (protocol == 0)
833     		goto out_unlock;
834     
835     	if (dev) {
836     		if (dev->flags&IFF_UP) {
837     			dev_add_pack(&sk->protinfo.af_packet->prot_hook);
838     			sock_hold(sk);
839     			sk->protinfo.af_packet->running = 1;
840     		} else {
841     			sk->err = ENETDOWN;
842     			if (!sk->dead)
843     				sk->error_report(sk);
844     		}
845     	} else {
846     		dev_add_pack(&sk->protinfo.af_packet->prot_hook);
847     		sock_hold(sk);
848     		sk->protinfo.af_packet->running = 1;
849     	}
850     
851     out_unlock:
852     	spin_unlock(&sk->protinfo.af_packet->bind_lock);
853     	release_sock(sk);
854     	return 0;
855     }
856     
857     /*
858      *	Bind a packet socket to a device
859      */
860     
861     #ifdef CONFIG_SOCK_PACKET
862     
863     static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
864     {
865     	struct sock *sk=sock->sk;
866     	char name[15];
867     	struct net_device *dev;
868     	int err = -ENODEV;
869     	
870     	/*
871     	 *	Check legality
872     	 */
873     	 
874     	if(addr_len!=sizeof(struct sockaddr))
875     		return -EINVAL;
876     	strncpy(name,uaddr->sa_data,14);
877     	name[14]=0;
878     
879     	dev = dev_get_by_name(name);
880     	if (dev) {
881     		err = packet_do_bind(sk, dev, sk->num);
882     		dev_put(dev);
883     	}
884     	return err;
885     }
886     #endif
887     
888     static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
889     {
890     	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
891     	struct sock *sk=sock->sk;
892     	struct net_device *dev = NULL;
893     	int err;
894     
895     
896     	/*
897     	 *	Check legality
898     	 */
899     	 
900     	if (addr_len < sizeof(struct sockaddr_ll))
901     		return -EINVAL;
902     	if (sll->sll_family != AF_PACKET)
903     		return -EINVAL;
904     
905     	if (sll->sll_ifindex) {
906     		err = -ENODEV;
907     		dev = dev_get_by_index(sll->sll_ifindex);
908     		if (dev == NULL)
909     			goto out;
910     	}
911     	err = packet_do_bind(sk, dev, sll->sll_protocol ? : sk->num);
912     	if (dev)
913     		dev_put(dev);
914     
915     out:
916     	return err;
917     }
918     
919     
920     /*
921      *	Create a packet of type SOCK_PACKET. 
922      */
923     
924     static int packet_create(struct socket *sock, int protocol)
925     {
926     	struct sock *sk;
927     	int err;
928     
929     	if (!capable(CAP_NET_RAW))
930     		return -EPERM;
931     	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
932     #ifdef CONFIG_SOCK_PACKET
933     	    && sock->type != SOCK_PACKET
934     #endif
935     	    )
936     		return -ESOCKTNOSUPPORT;
937     
938     	sock->state = SS_UNCONNECTED;
939     	MOD_INC_USE_COUNT;
940     
941     	err = -ENOBUFS;
942     	sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1);
943     	if (sk == NULL)
944     		goto out;
945     
946     	sock->ops = &packet_ops;
947     #ifdef CONFIG_SOCK_PACKET
948     	if (sock->type == SOCK_PACKET)
949     		sock->ops = &packet_ops_spkt;
950     #endif
951     	sock_init_data(sock,sk);
952     
953     	sk->protinfo.af_packet = kmalloc(sizeof(struct packet_opt), GFP_KERNEL);
954     	if (sk->protinfo.af_packet == NULL)
955     		goto out_free;
956     	memset(sk->protinfo.af_packet, 0, sizeof(struct packet_opt));
957     	sk->family = PF_PACKET;
958     	sk->num = protocol;
959     
960     	sk->destruct = packet_sock_destruct;
961     	atomic_inc(&packet_socks_nr);
962     
963     	/*
964     	 *	Attach a protocol block
965     	 */
966     
967     	spin_lock_init(&sk->protinfo.af_packet->bind_lock);
968     	sk->protinfo.af_packet->prot_hook.func = packet_rcv;
969     #ifdef CONFIG_SOCK_PACKET
970     	if (sock->type == SOCK_PACKET)
971     		sk->protinfo.af_packet->prot_hook.func = packet_rcv_spkt;
972     #endif
973     	sk->protinfo.af_packet->prot_hook.data = (void *)sk;
974     
975     	if (protocol) {
976     		sk->protinfo.af_packet->prot_hook.type = protocol;
977     		dev_add_pack(&sk->protinfo.af_packet->prot_hook);
978     		sock_hold(sk);
979     		sk->protinfo.af_packet->running = 1;
980     	}
981     
982     	write_lock_bh(&packet_sklist_lock);
983     	sk->next = packet_sklist;
984     	packet_sklist = sk;
985     	sock_hold(sk);
986     	write_unlock_bh(&packet_sklist_lock);
987     	return(0);
988     
989     out_free:
990     	sk_free(sk);
991     out:
992     	MOD_DEC_USE_COUNT;
993     	return err;
994     }
995     
996     /*
997      *	Pull a packet from our receive queue and hand it to the user.
998      *	If necessary we block.
999      */
1000     
1001     static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len,
1002     			  int flags, struct scm_cookie *scm)
1003     {
1004     	struct sock *sk = sock->sk;
1005     	struct sk_buff *skb;
1006     	int copied, err;
1007     
1008     	err = -EINVAL;
1009     	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC))
1010     		goto out;
1011     
1012     #if 0
1013     	/* What error should we return now? EUNATTACH? */
1014     	if (sk->protinfo.af_packet->ifindex < 0)
1015     		return -ENODEV;
1016     #endif
1017     
1018     	/*
1019     	 *	If the address length field is there to be filled in, we fill
1020     	 *	it in now.
1021     	 */
1022     
1023     	if (sock->type == SOCK_PACKET)
1024     		msg->msg_namelen = sizeof(struct sockaddr_pkt);
1025     	else
1026     		msg->msg_namelen = sizeof(struct sockaddr_ll);
1027     
1028     	/*
1029     	 *	Call the generic datagram receiver. This handles all sorts
1030     	 *	of horrible races and re-entrancy so we can forget about it
1031     	 *	in the protocol layers.
1032     	 *
1033     	 *	Now it will return ENETDOWN, if device have just gone down,
1034     	 *	but then it will block.
1035     	 */
1036     
1037     	skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1038     
1039     	/*
1040     	 *	An error occurred so return it. Because skb_recv_datagram() 
1041     	 *	handles the blocking we don't see and worry about blocking
1042     	 *	retries.
1043     	 */
1044     
1045     	if(skb==NULL)
1046     		goto out;
1047     
1048     	/*
1049     	 *	You lose any data beyond the buffer you gave. If it worries a
1050     	 *	user program they can ask the device for its MTU anyway.
1051     	 */
1052     
1053     	copied = skb->len;
1054     	if (copied > len)
1055     	{
1056     		copied=len;
1057     		msg->msg_flags|=MSG_TRUNC;
1058     	}
1059     
1060     	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1061     	if (err)
1062     		goto out_free;
1063     
1064     	sock_recv_timestamp(msg, sk, skb);
1065     
1066     	if (msg->msg_name)
1067     		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1068     
1069     	/*
1070     	 *	Free or return the buffer as appropriate. Again this
1071     	 *	hides all the races and re-entrancy issues from us.
1072     	 */
1073     	err = (flags&MSG_TRUNC) ? skb->len : copied;
1074     
1075     out_free:
1076     	skb_free_datagram(sk, skb);
1077     out:
1078     	return err;
1079     }
1080     
1081     #ifdef CONFIG_SOCK_PACKET
1082     static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1083     			       int *uaddr_len, int peer)
1084     {
1085     	struct net_device *dev;
1086     	struct sock *sk	= sock->sk;
1087     
1088     	if (peer)
1089     		return -EOPNOTSUPP;
1090     
1091     	uaddr->sa_family = AF_PACKET;
1092     	dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1093     	if (dev) {
1094     		strncpy(uaddr->sa_data, dev->name, 15);
1095     		dev_put(dev);
1096     	} else
1097     		memset(uaddr->sa_data, 0, 14);
1098     	*uaddr_len = sizeof(*uaddr);
1099     
1100     	return 0;
1101     }
1102     #endif
1103     
1104     static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1105     			  int *uaddr_len, int peer)
1106     {
1107     	struct net_device *dev;
1108     	struct sock *sk = sock->sk;
1109     	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1110     
1111     	if (peer)
1112     		return -EOPNOTSUPP;
1113     
1114     	sll->sll_family = AF_PACKET;
1115     	sll->sll_ifindex = sk->protinfo.af_packet->ifindex;
1116     	sll->sll_protocol = sk->num;
1117     	dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1118     	if (dev) {
1119     		sll->sll_hatype = dev->type;
1120     		sll->sll_halen = dev->addr_len;
1121     		memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1122     		dev_put(dev);
1123     	} else {
1124     		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
1125     		sll->sll_halen = 0;
1126     	}
1127     	*uaddr_len = sizeof(*sll);
1128     
1129     	return 0;
1130     }
1131     
1132     #ifdef CONFIG_PACKET_MULTICAST
1133     static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1134     {
1135     	switch (i->type) {
1136     	case PACKET_MR_MULTICAST:
1137     		if (what > 0)
1138     			dev_mc_add(dev, i->addr, i->alen, 0);
1139     		else
1140     			dev_mc_delete(dev, i->addr, i->alen, 0);
1141     		break;
1142     	case PACKET_MR_PROMISC:
1143     		dev_set_promiscuity(dev, what);
1144     		break;
1145     	case PACKET_MR_ALLMULTI:
1146     		dev_set_allmulti(dev, what);
1147     		break;
1148     	default:;
1149     	}
1150     }
1151     
1152     static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1153     {
1154     	for ( ; i; i=i->next) {
1155     		if (i->ifindex == dev->ifindex)
1156     			packet_dev_mc(dev, i, what);
1157     	}
1158     }
1159     
1160     static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1161     {
1162     	struct packet_mclist *ml, *i;
1163     	struct net_device *dev;
1164     	int err;
1165     
1166     	rtnl_lock();
1167     
1168     	err = -ENODEV;
1169     	dev = __dev_get_by_index(mreq->mr_ifindex);
1170     	if (!dev)
1171     		goto done;
1172     
1173     	err = -EINVAL;
1174     	if (mreq->mr_alen > dev->addr_len)
1175     		goto done;
1176     
1177     	err = -ENOBUFS;
1178     	i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1179     	if (i == NULL)
1180     		goto done;
1181     
1182     	err = 0;
1183     	for (ml=sk->protinfo.af_packet->mclist; ml; ml=ml->next) {
1184     		if (ml->ifindex == mreq->mr_ifindex &&
1185     		    ml->type == mreq->mr_type &&
1186     		    ml->alen == mreq->mr_alen &&
1187     		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1188     			ml->count++;
1189     			/* Free the new element ... */
1190     			kfree(i);
1191     			goto done;
1192     		}
1193     	}
1194     
1195     	i->type = mreq->mr_type;
1196     	i->ifindex = mreq->mr_ifindex;
1197     	i->alen = mreq->mr_alen;
1198     	memcpy(i->addr, mreq->mr_address, i->alen);
1199     	i->count = 1;
1200     	i->next = sk->protinfo.af_packet->mclist;
1201     	sk->protinfo.af_packet->mclist = i;
1202     	packet_dev_mc(dev, i, +1);
1203     
1204     done:
1205     	rtnl_unlock();
1206     	return err;
1207     }
1208     
1209     static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1210     {
1211     	struct packet_mclist *ml, **mlp;
1212     
1213     	rtnl_lock();
1214     
1215     	for (mlp=&sk->protinfo.af_packet->mclist; (ml=*mlp)!=NULL; mlp=&ml->next) {
1216     		if (ml->ifindex == mreq->mr_ifindex &&
1217     		    ml->type == mreq->mr_type &&
1218     		    ml->alen == mreq->mr_alen &&
1219     		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1220     			if (--ml->count == 0) {
1221     				struct net_device *dev;
1222     				*mlp = ml->next;
1223     				dev = dev_get_by_index(ml->ifindex);
1224     				if (dev) {
1225     					packet_dev_mc(dev, ml, -1);
1226     					dev_put(dev);
1227     				}
1228     				kfree(ml);
1229     			}
1230     			rtnl_unlock();
1231     			return 0;
1232     		}
1233     	}
1234     	rtnl_unlock();
1235     	return -EADDRNOTAVAIL;
1236     }
1237     
1238     static void packet_flush_mclist(struct sock *sk)
1239     {
1240     	struct packet_mclist *ml;
1241     
1242     	if (sk->protinfo.af_packet->mclist == NULL)
1243     		return;
1244     
1245     	rtnl_lock();
1246     	while ((ml=sk->protinfo.af_packet->mclist) != NULL) {
1247     		struct net_device *dev;
1248     		sk->protinfo.af_packet->mclist = ml->next;
1249     		if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1250     			packet_dev_mc(dev, ml, -1);
1251     			dev_put(dev);
1252     		}
1253     		kfree(ml);
1254     	}
1255     	rtnl_unlock();
1256     }
1257     #endif
1258     
1259     static int
1260     packet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen)
1261     {
1262     	struct sock *sk = sock->sk;
1263     	int ret;
1264     
1265     	if (level != SOL_PACKET)
1266     		return -ENOPROTOOPT;
1267     
1268     	switch(optname)	{
1269     #ifdef CONFIG_PACKET_MULTICAST
1270     	case PACKET_ADD_MEMBERSHIP:	
1271     	case PACKET_DROP_MEMBERSHIP:
1272     	{
1273     		struct packet_mreq mreq;
1274     		if (optlen<sizeof(mreq))
1275     			return -EINVAL;
1276     		if (copy_from_user(&mreq,optval,sizeof(mreq)))
1277     			return -EFAULT;
1278     		if (optname == PACKET_ADD_MEMBERSHIP)
1279     			ret = packet_mc_add(sk, &mreq);
1280     		else
1281     			ret = packet_mc_drop(sk, &mreq);
1282     		return ret;
1283     	}
1284     #endif
1285     #ifdef CONFIG_PACKET_MMAP
1286     	case PACKET_RX_RING:
1287     	{
1288     		struct tpacket_req req;
1289     
1290     		if (optlen<sizeof(req))
1291     			return -EINVAL;
1292     		if (copy_from_user(&req,optval,sizeof(req)))
1293     			return -EFAULT;
1294     		return packet_set_ring(sk, &req, 0);
1295     	}
1296     	case PACKET_COPY_THRESH:
1297     	{
1298     		int val;
1299     
1300     		if (optlen!=sizeof(val))
1301     			return -EINVAL;
1302     		if (copy_from_user(&val,optval,sizeof(val)))
1303     			return -EFAULT;
1304     
1305     		sk->protinfo.af_packet->copy_thresh = val;
1306     		return 0;
1307     	}
1308     #endif
1309     	default:
1310     		return -ENOPROTOOPT;
1311     	}
1312     }
1313     
1314     int packet_getsockopt(struct socket *sock, int level, int optname,
1315     		      char *optval, int *optlen)
1316     {
1317     	int len;
1318     	struct sock *sk = sock->sk;
1319     
1320     	if (level != SOL_PACKET)
1321     		return -ENOPROTOOPT;
1322     
1323       	if (get_user(len,optlen))
1324       		return -EFAULT;
1325     
1326     	if (len < 0)
1327     		return -EINVAL;
1328     		
1329     	switch(optname)	{
1330     	case PACKET_STATISTICS:
1331     	{
1332     		struct tpacket_stats st;
1333     
1334     		if (len > sizeof(struct tpacket_stats))
1335     			len = sizeof(struct tpacket_stats);
1336     		spin_lock_bh(&sk->receive_queue.lock);
1337     		st = sk->protinfo.af_packet->stats;
1338     		memset(&sk->protinfo.af_packet->stats, 0, sizeof(st));
1339     		spin_unlock_bh(&sk->receive_queue.lock);
1340     		st.tp_packets += st.tp_drops;
1341     
1342     		if (copy_to_user(optval, &st, len))
1343     			return -EFAULT;
1344     		break;
1345     	}
1346     	default:
1347     		return -ENOPROTOOPT;
1348     	}
1349     
1350       	if (put_user(len, optlen))
1351       		return -EFAULT;
1352       	return 0;
1353     }
1354     
1355     
1356     static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1357     {
1358     	struct sock *sk;
1359     	struct packet_opt *po;
1360     	struct net_device *dev = (struct net_device*)data;
1361     
1362     	read_lock(&packet_sklist_lock);
1363     	for (sk = packet_sklist; sk; sk = sk->next) {
1364     		po = sk->protinfo.af_packet;
1365     
1366     		switch (msg) {
1367     		case NETDEV_DOWN:
1368     		case NETDEV_UNREGISTER:
1369     			if (dev->ifindex == po->ifindex) {
1370     				spin_lock(&po->bind_lock);
1371     				if (po->running) {
1372     					dev_remove_pack(&po->prot_hook);
1373     					__sock_put(sk);
1374     					po->running = 0;
1375     					sk->err = ENETDOWN;
1376     					if (!sk->dead)
1377     						sk->error_report(sk);
1378     				}
1379     				if (msg == NETDEV_UNREGISTER) {
1380     					po->ifindex = -1;
1381     					po->prot_hook.dev = NULL;
1382     				}
1383     				spin_unlock(&po->bind_lock);
1384     			}
1385     #ifdef CONFIG_PACKET_MULTICAST
1386     			if (po->mclist)
1387     				packet_dev_mclist(dev, po->mclist, -1);
1388     #endif
1389     			break;
1390     		case NETDEV_UP:
1391     			spin_lock(&po->bind_lock);
1392     			if (dev->ifindex == po->ifindex && sk->num && po->running==0) {
1393     				dev_add_pack(&po->prot_hook);
1394     				sock_hold(sk);
1395     				po->running = 1;
1396     			}
1397     			spin_unlock(&po->bind_lock);
1398     #ifdef CONFIG_PACKET_MULTICAST
1399     			if (po->mclist)
1400     				packet_dev_mclist(dev, po->mclist, +1);
1401     #endif
1402     			break;
1403     		}
1404     	}
1405     	read_unlock(&packet_sklist_lock);
1406     	return NOTIFY_DONE;
1407     }
1408     
1409     
1410     static int packet_ioctl(struct socket *sock, unsigned int cmd,
1411     			unsigned long arg)
1412     {
1413     	struct sock *sk = sock->sk;
1414     
1415     	switch(cmd) 
1416     	{
1417     		case SIOCOUTQ:
1418     		{
1419     			int amount = atomic_read(&sk->wmem_alloc);
1420     			return put_user(amount, (int *)arg);
1421     		}
1422     		case SIOCINQ:
1423     		{
1424     			struct sk_buff *skb;
1425     			int amount = 0;
1426     
1427     			spin_lock_bh(&sk->receive_queue.lock);
1428     			skb = skb_peek(&sk->receive_queue);
1429     			if (skb)
1430     				amount = skb->len;
1431     			spin_unlock_bh(&sk->receive_queue.lock);
1432     			return put_user(amount, (int *)arg);
1433     		}
1434     		case FIOSETOWN:
1435     		case SIOCSPGRP: {
1436     			int pid;
1437     			if (get_user(pid, (int *) arg))
1438     				return -EFAULT; 
1439     			if (current->pid != pid && current->pgrp != -pid && 
1440     			    !capable(CAP_NET_ADMIN))
1441     				return -EPERM;
1442     			sk->proc = pid;
1443     			break;
1444     		}
1445     		case FIOGETOWN:
1446     		case SIOCGPGRP:
1447     			return put_user(sk->proc, (int *)arg);
1448     		case SIOCGSTAMP:
1449     			if(sk->stamp.tv_sec==0)
1450     				return -ENOENT;
1451     			if (copy_to_user((void *)arg, &sk->stamp,
1452     					 sizeof(struct timeval)))
1453     				return -EFAULT;
1454     			break;
1455     		case SIOCGIFFLAGS:
1456     #ifndef CONFIG_INET
1457     		case SIOCSIFFLAGS:
1458     #endif
1459     		case SIOCGIFCONF:
1460     		case SIOCGIFMETRIC:
1461     		case SIOCSIFMETRIC:
1462     		case SIOCGIFMEM:
1463     		case SIOCSIFMEM:
1464     		case SIOCGIFMTU:
1465     		case SIOCSIFMTU:
1466     		case SIOCSIFLINK:
1467     		case SIOCGIFHWADDR:
1468     		case SIOCSIFHWADDR:
1469     		case SIOCSIFMAP:
1470     		case SIOCGIFMAP:
1471     		case SIOCSIFSLAVE:
1472     		case SIOCGIFSLAVE:
1473     		case SIOCGIFINDEX:
1474     		case SIOCGIFNAME:
1475     		case SIOCGIFCOUNT:
1476     		case SIOCSIFHWBROADCAST:
1477     			return(dev_ioctl(cmd,(void *) arg));
1478     
1479     		case SIOCGIFBR:
1480     		case SIOCSIFBR:
1481     #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1482     #ifdef CONFIG_INET
1483     #ifdef CONFIG_KMOD
1484     			if (br_ioctl_hook == NULL)
1485     				request_module("bridge");
1486     #endif
1487     			if (br_ioctl_hook != NULL)
1488     				return br_ioctl_hook(arg);
1489     #endif
1490     #endif				
1491     			return -ENOPKG;
1492     
1493     		case SIOCGIFDIVERT:
1494     		case SIOCSIFDIVERT:
1495     #ifdef CONFIG_NET_DIVERT
1496     			return divert_ioctl(cmd, (struct divert_cf *) arg);
1497     #else
1498     			return -ENOPKG;
1499     #endif /* CONFIG_NET_DIVERT */
1500     			
1501     #ifdef CONFIG_INET
1502     		case SIOCADDRT:
1503     		case SIOCDELRT:
1504     		case SIOCDARP:
1505     		case SIOCGARP:
1506     		case SIOCSARP:
1507     		case SIOCGIFADDR:
1508     		case SIOCSIFADDR:
1509     		case SIOCGIFBRDADDR:
1510     		case SIOCSIFBRDADDR:
1511     		case SIOCGIFNETMASK:
1512     		case SIOCSIFNETMASK:
1513     		case SIOCGIFDSTADDR:
1514     		case SIOCSIFDSTADDR:
1515     		case SIOCSIFFLAGS:
1516     		case SIOCADDDLCI:
1517     		case SIOCDELDLCI:
1518     			return inet_dgram_ops.ioctl(sock, cmd, arg);
1519     #endif
1520     
1521     		default:
1522     			if ((cmd >= SIOCDEVPRIVATE) &&
1523     			    (cmd <= (SIOCDEVPRIVATE + 15)))
1524     				return(dev_ioctl(cmd,(void *) arg));
1525     
1526     #ifdef CONFIG_NET_RADIO
1527     			if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST))
1528     				return(dev_ioctl(cmd,(void *) arg));
1529     #endif
1530     			return -EOPNOTSUPP;
1531     	}
1532     	return 0;
1533     }
1534     
1535     #ifndef CONFIG_PACKET_MMAP
1536     #define packet_mmap sock_no_mmap
1537     #define packet_poll datagram_poll
1538     #else
1539     
1540     unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
1541     {
1542     	struct sock *sk = sock->sk;
1543     	struct packet_opt *po = sk->protinfo.af_packet;
1544     	unsigned int mask = datagram_poll(file, sock, wait);
1545     
1546     	spin_lock_bh(&sk->receive_queue.lock);
1547     	if (po->iovec) {
1548     		unsigned last = po->head ? po->head-1 : po->iovmax;
1549     
1550     		if (po->iovec[last]->tp_status)
1551     			mask |= POLLIN | POLLRDNORM;
1552     	}
1553     	spin_unlock_bh(&sk->receive_queue.lock);
1554     	return mask;
1555     }
1556     
1557     
1558     /* Dirty? Well, I still did not learn better way to account
1559      * for user mmaps.
1560      */
1561     
1562     static void packet_mm_open(struct vm_area_struct *vma)
1563     {
1564     	struct file *file = vma->vm_file;
1565     	struct inode *inode = file->f_dentry->d_inode;
1566     	struct socket * sock = &inode->u.socket_i;
1567     	struct sock *sk = sock->sk;
1568     	
1569     	if (sk)
1570     		atomic_inc(&sk->protinfo.af_packet->mapped);
1571     }
1572     
1573     static void packet_mm_close(struct vm_area_struct *vma)
1574     {
1575     	struct file *file = vma->vm_file;
1576     	struct inode *inode = file->f_dentry->d_inode;
1577     	struct socket * sock = &inode->u.socket_i;
1578     	struct sock *sk = sock->sk;
1579     	
1580     	if (sk)
1581     		atomic_dec(&sk->protinfo.af_packet->mapped);
1582     }
1583     
1584     static struct vm_operations_struct packet_mmap_ops = {
1585     	open:	packet_mm_open,
1586     	close:	packet_mm_close,
1587     };
1588     
1589     static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)
1590     {
1591     	int i;
1592     
1593     	for (i=0; i<len; i++) {
1594     		if (pg_vec[i]) {
1595     			struct page *page, *pend;
1596     
1597     			pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1598     			for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1599     				ClearPageReserved(page);
1600     			free_pages(pg_vec[i], order);
1601     		}
1602     	}
1603     	kfree(pg_vec);
1604     }
1605     
1606     
1607     static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1608     {
1609     	unsigned long *pg_vec = NULL;
1610     	struct tpacket_hdr **io_vec = NULL;
1611     	struct packet_opt *po = sk->protinfo.af_packet;
1612     	int order = 0;
1613     	int err = 0;
1614     
1615     	if (req->tp_block_nr) {
1616     		int i, l;
1617     		int frames_per_block;
1618     
1619     		/* Sanity tests and some calculations */
1620     		if ((int)req->tp_block_size <= 0)
1621     			return -EINVAL;
1622     		if (req->tp_block_size&(PAGE_SIZE-1))
1623     			return -EINVAL;
1624     		if (req->tp_frame_size < TPACKET_HDRLEN)
1625     			return -EINVAL;
1626     		if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1627     			return -EINVAL;
1628     		frames_per_block = req->tp_block_size/req->tp_frame_size;
1629     		if (frames_per_block <= 0)
1630     			return -EINVAL;
1631     		if (frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1632     			return -EINVAL;
1633     		/* OK! */
1634     
1635     		/* Allocate page vector */
1636     		while ((PAGE_SIZE<<order) < req->tp_block_size)
1637     			order++;
1638     
1639     		err = -ENOMEM;
1640     
1641     		pg_vec = kmalloc(req->tp_block_nr*sizeof(unsigned long*), GFP_KERNEL);
1642     		if (pg_vec == NULL)
1643     			goto out;
1644     		memset(pg_vec, 0, req->tp_block_nr*sizeof(unsigned long*));
1645     
1646     		for (i=0; i<req->tp_block_nr; i++) {
1647     			struct page *page, *pend;
1648     			pg_vec[i] = __get_free_pages(GFP_KERNEL, order);
1649     			if (!pg_vec[i])
1650     				goto out_free_pgvec;
1651     
1652     			pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1653     			for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1654     				SetPageReserved(page);
1655     		}
1656     		/* Page vector is allocated */
1657     
1658     		/* Draw frames */
1659     		io_vec = kmalloc(req->tp_frame_nr*sizeof(struct tpacket_hdr*), GFP_KERNEL);
1660     		if (io_vec == NULL)
1661     			goto out_free_pgvec;
1662     		memset(io_vec, 0, req->tp_frame_nr*sizeof(struct tpacket_hdr*));
1663     
1664     		l = 0;
1665     		for (i=0; i<req->tp_block_nr; i++) {
1666     			unsigned long ptr = pg_vec[i];
1667     			int k;
1668     
1669     			for (k=0; k<frames_per_block; k++, l++) {
1670     				io_vec[l] = (struct tpacket_hdr*)ptr;
1671     				io_vec[l]->tp_status = TP_STATUS_KERNEL;
1672     				ptr += req->tp_frame_size;
1673     			}
1674     		}
1675     		/* Done */
1676     	} else {
1677     		if (req->tp_frame_nr)
1678     			return -EINVAL;
1679     	}
1680     
1681     	lock_sock(sk);
1682     
1683     	/* Detach socket from network */
1684     	spin_lock(&po->bind_lock);
1685     	if (po->running)
1686     		dev_remove_pack(&po->prot_hook);
1687     	spin_unlock(&po->bind_lock);
1688     
1689     	err = -EBUSY;
1690     	if (closing || atomic_read(&po->mapped) == 0) {
1691     		err = 0;
1692     #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1693     
1694     		spin_lock_bh(&sk->receive_queue.lock);
1695     		pg_vec = XC(po->pg_vec, pg_vec);
1696     		io_vec = XC(po->iovec, io_vec);
1697     		po->iovmax = req->tp_frame_nr-1;
1698     		po->head = 0;
1699     		po->frame_size = req->tp_frame_size;
1700     		spin_unlock_bh(&sk->receive_queue.lock);
1701     
1702     		order = XC(po->pg_vec_order, order);
1703     		req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1704     
1705     		po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1706     		po->prot_hook.func = po->iovec ? tpacket_rcv : packet_rcv;
1707     		skb_queue_purge(&sk->receive_queue);
1708     #undef XC
1709     		if (atomic_read(&po->mapped))
1710     			printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1711     	}
1712     
1713     	spin_lock(&po->bind_lock);
1714     	if (po->running)
1715     		dev_add_pack(&po->prot_hook);
1716     	spin_unlock(&po->bind_lock);
1717     
1718     	release_sock(sk);
1719     
1720     	if (io_vec)
1721     		kfree(io_vec);
1722     
1723     out_free_pgvec:
1724     	if (pg_vec)
1725     		free_pg_vec(pg_vec, order, req->tp_block_nr);
1726     out:
1727     	return err;
1728     }
1729     
1730     static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1731     {
1732     	struct sock *sk = sock->sk;
1733     	struct packet_opt *po = sk->protinfo.af_packet;
1734     	unsigned long size;
1735     	unsigned long start;
1736     	int err = -EINVAL;
1737     	int i;
1738     
1739     	if (vma->vm_pgoff)
1740     		return -EINVAL;
1741     
1742     	size = vma->vm_end - vma->vm_start;
1743     
1744     	lock_sock(sk);
1745     	if (po->pg_vec == NULL)
1746     		goto out;
1747     	if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1748     		goto out;
1749     
1750     	atomic_inc(&po->mapped);
1751     	start = vma->vm_start;
1752     	err = -EAGAIN;
1753     	for (i=0; i<po->pg_vec_len; i++) {
1754     		if (remap_page_range(start, __pa(po->pg_vec[i]),
1755     				     po->pg_vec_pages*PAGE_SIZE,
1756     				     vma->vm_page_prot))
1757     			goto out;
1758     		start += po->pg_vec_pages*PAGE_SIZE;
1759     	}
1760     	vma->vm_ops = &packet_mmap_ops;
1761     	err = 0;
1762     
1763     out:
1764     	release_sock(sk);
1765     	return err;
1766     }
1767     #endif
1768     
1769     
1770     #ifdef CONFIG_SOCK_PACKET
1771     struct proto_ops packet_ops_spkt = {
1772     	family:		PF_PACKET,
1773     
1774     	release:	packet_release,
1775     	bind:		packet_bind_spkt,
1776     	connect:	sock_no_connect,
1777     	socketpair:	sock_no_socketpair,
1778     	accept:		sock_no_accept,
1779     	getname:	packet_getname_spkt,
1780     	poll:		datagram_poll,
1781     	ioctl:		packet_ioctl,
1782     	listen:		sock_no_listen,
1783     	shutdown:	sock_no_shutdown,
1784     	setsockopt:	sock_no_setsockopt,
1785     	getsockopt:	sock_no_getsockopt,
1786     	sendmsg:	packet_sendmsg_spkt,
1787     	recvmsg:	packet_recvmsg,
1788     	mmap:		sock_no_mmap,
1789     	sendpage:	sock_no_sendpage,
1790     };
1791     #endif
1792     
1793     struct proto_ops packet_ops = {
1794     	family:		PF_PACKET,
1795     
1796     	release:	packet_release,
1797     	bind:		packet_bind,
1798     	connect:	sock_no_connect,
1799     	socketpair:	sock_no_socketpair,
1800     	accept:		sock_no_accept,
1801     	getname:	packet_getname, 
1802     	poll:		packet_poll,
1803     	ioctl:		packet_ioctl,
1804     	listen:		sock_no_listen,
1805     	shutdown:	sock_no_shutdown,
1806     	setsockopt:	packet_setsockopt,
1807     	getsockopt:	packet_getsockopt,
1808     	sendmsg:	packet_sendmsg,
1809     	recvmsg:	packet_recvmsg,
1810     	mmap:		packet_mmap,
1811     	sendpage:	sock_no_sendpage,
1812     };
1813     
1814     static struct net_proto_family packet_family_ops = {
1815     	family:		PF_PACKET,
1816     	create:		packet_create,
1817     };
1818     
1819     static struct notifier_block packet_netdev_notifier = {
1820     	notifier_call:	packet_notifier,
1821     };
1822     
1823     #ifdef CONFIG_PROC_FS
1824     static int packet_read_proc(char *buffer, char **start, off_t offset,
1825     			     int length, int *eof, void *data)
1826     {
1827     	off_t pos=0;
1828     	off_t begin=0;
1829     	int len=0;
1830     	struct sock *s;
1831     	
1832     	len+= sprintf(buffer,"sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1833     
1834     	read_lock(&packet_sklist_lock);
1835     
1836     	for (s = packet_sklist; s; s = s->next) {
1837     		len+=sprintf(buffer+len,"%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu",
1838     			     s,
1839     			     atomic_read(&s->refcnt),
1840     			     s->type,
1841     			     ntohs(s->num),
1842     			     s->protinfo.af_packet->ifindex,
1843     			     s->protinfo.af_packet->running,
1844     			     atomic_read(&s->rmem_alloc),
1845     			     sock_i_uid(s),
1846     			     sock_i_ino(s)
1847     			     );
1848     
1849     		buffer[len++]='\n';
1850     		
1851     		pos=begin+len;
1852     		if(pos<offset) {
1853     			len=0;
1854     			begin=pos;
1855     		}
1856     		if(pos>offset+length)
1857     			goto done;
1858     	}
1859     	*eof = 1;
1860     
1861     done:
1862     	read_unlock(&packet_sklist_lock);
1863     	*start=buffer+(offset-begin);
1864     	len-=(offset-begin);
1865     	if(len>length)
1866     		len=length;
1867     	if(len<0)
1868     		len=0;
1869     	return len;
1870     }
1871     #endif
1872     
1873     static void __exit packet_exit(void)
1874     {
1875     	remove_proc_entry("net/packet", 0);
1876     	unregister_netdevice_notifier(&packet_netdev_notifier);
1877     	sock_unregister(PF_PACKET);
1878     	return;
1879     }
1880     
1881     static int __init packet_init(void)
1882     {
1883     	sock_register(&packet_family_ops);
1884     	register_netdevice_notifier(&packet_netdev_notifier);
1885     #ifdef CONFIG_PROC_FS
1886     	create_proc_read_entry("net/packet", 0, 0, packet_read_proc, NULL);
1887     #endif
1888     	return 0;
1889     }
1890     
1891     module_init(packet_init);
1892     module_exit(packet_exit);
1893