File: /usr/src/linux/net/packet/af_packet.c
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
8 * Version: $Id: af_packet.c,v 1.56 2001/08/06 13:21:16 davem Exp $
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 *
14 * Fixes:
15 * Alan Cox : verify_area() now used correctly
16 * Alan Cox : new skbuff lists, look ma no backlogs!
17 * Alan Cox : tidied skbuff lists.
18 * Alan Cox : Now uses generic datagram routines I
19 * added. Also fixed the peek/read crash
20 * from all old Linux datagram code.
21 * Alan Cox : Uses the improved datagram code.
22 * Alan Cox : Added NULL's for socket options.
23 * Alan Cox : Re-commented the code.
24 * Alan Cox : Use new kernel side addressing
25 * Rob Janssen : Correct MTU usage.
26 * Dave Platt : Counter leaks caused by incorrect
27 * interrupt locking and some slightly
28 * dubious gcc output. Can you read
29 * compiler: it said _VOLATILE_
30 * Richard Kooijman : Timestamp fixes.
31 * Alan Cox : New buffers. Use sk->mac.raw.
32 * Alan Cox : sendmsg/recvmsg support.
33 * Alan Cox : Protocol setting support
34 * Alexey Kuznetsov : Untied from IPv4 stack.
35 * Cyrus Durgin : Fixed kerneld for kmod.
36 * Michal Ostrowski : Module initialization cleanup.
37 *
38 * This program is free software; you can redistribute it and/or
39 * modify it under the terms of the GNU General Public License
40 * as published by the Free Software Foundation; either version
41 * 2 of the License, or (at your option) any later version.
42 *
43 */
44
45 #include <linux/config.h>
46 #include <linux/types.h>
47 #include <linux/sched.h>
48 #include <linux/mm.h>
49 #include <linux/fcntl.h>
50 #include <linux/socket.h>
51 #include <linux/in.h>
52 #include <linux/inet.h>
53 #include <linux/netdevice.h>
54 #include <linux/if_packet.h>
55 #include <linux/wireless.h>
56 #include <linux/kmod.h>
57 #include <net/ip.h>
58 #include <net/protocol.h>
59 #include <linux/skbuff.h>
60 #include <net/sock.h>
61 #include <linux/errno.h>
62 #include <linux/timer.h>
63 #include <asm/system.h>
64 #include <asm/uaccess.h>
65 #include <asm/ioctls.h>
66 #include <linux/proc_fs.h>
67 #include <linux/poll.h>
68 #include <linux/module.h>
69 #include <linux/init.h>
70 #include <linux/if_bridge.h>
71
72 #ifdef CONFIG_NET_DIVERT
73 #include <linux/divert.h>
74 #endif /* CONFIG_NET_DIVERT */
75
76 #ifdef CONFIG_INET
77 #include <net/inet_common.h>
78 #endif
79
80 #ifdef CONFIG_DLCI
81 extern int dlci_ioctl(unsigned int, void*);
82 #endif
83
84 #define CONFIG_SOCK_PACKET 1
85
86 /*
87 Proposed replacement for SIOC{ADD,DEL}MULTI and
88 IFF_PROMISC, IFF_ALLMULTI flags.
89
90 It is more expensive, but I believe,
91 it is really correct solution: reentereble, safe and fault tolerant.
92
93 IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
94 reference count and global flag, so that real status is
95 (gflag|(count != 0)), so that we can use obsolete faulty interface
96 not harming clever users.
97 */
98 #define CONFIG_PACKET_MULTICAST 1
99
100 /*
101 Assumptions:
102 - if device has no dev->hard_header routine, it adds and removes ll header
103 inside itself. In this case ll header is invisible outside of device,
104 but higher levels still should reserve dev->hard_header_len.
105 Some devices are enough clever to reallocate skb, when header
106 will not fit to reserved space (tunnel), another ones are silly
107 (PPP).
108 - packet socket receives packets with pulled ll header,
109 so that SOCK_RAW should push it back.
110
111 On receive:
112 -----------
113
114 Incoming, dev->hard_header!=NULL
115 mac.raw -> ll header
116 data -> data
117
118 Outgoing, dev->hard_header!=NULL
119 mac.raw -> ll header
120 data -> ll header
121
122 Incoming, dev->hard_header==NULL
123 mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
124 PPP makes it, that is wrong, because introduce assymetry
125 between rx and tx paths.
126 data -> data
127
128 Outgoing, dev->hard_header==NULL
129 mac.raw -> data. ll header is still not built!
130 data -> data
131
132 Resume
133 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
134
135
136 On transmit:
137 ------------
138
139 dev->hard_header != NULL
140 mac.raw -> ll header
141 data -> ll header
142
143 dev->hard_header == NULL (ll header is added by device, we cannot control it)
144 mac.raw -> data
145 data -> data
146
147 We should set nh.raw on output to correct posistion,
148 packet classifier depends on it.
149 */
150
151 /* List of all packet sockets. */
152 static struct sock * packet_sklist;
153 static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
154
155 atomic_t packet_socks_nr;
156
157
158 /* Private packet socket structures. */
159
160 #ifdef CONFIG_PACKET_MULTICAST
161 struct packet_mclist
162 {
163 struct packet_mclist *next;
164 int ifindex;
165 int count;
166 unsigned short type;
167 unsigned short alen;
168 unsigned char addr[8];
169 };
170 #endif
171 #ifdef CONFIG_PACKET_MMAP
172 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
173 #endif
174
175 static void packet_flush_mclist(struct sock *sk);
176
177 struct packet_opt
178 {
179 struct packet_type prot_hook;
180 spinlock_t bind_lock;
181 char running; /* prot_hook is attached*/
182 int ifindex; /* bound device */
183 struct tpacket_stats stats;
184 #ifdef CONFIG_PACKET_MULTICAST
185 struct packet_mclist *mclist;
186 #endif
187 #ifdef CONFIG_PACKET_MMAP
188 atomic_t mapped;
189 unsigned long *pg_vec;
190 unsigned int pg_vec_order;
191 unsigned int pg_vec_pages;
192 unsigned int pg_vec_len;
193
194 struct tpacket_hdr **iovec;
195 unsigned int frame_size;
196 unsigned int iovmax;
197 unsigned int head;
198 int copy_thresh;
199 #endif
200 };
201
202 void packet_sock_destruct(struct sock *sk)
203 {
204 BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
205 BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
206
207 if (!sk->dead) {
208 printk("Attempt to release alive packet socket: %p\n", sk);
209 return;
210 }
211
212 if (sk->protinfo.destruct_hook)
213 kfree(sk->protinfo.destruct_hook);
214 atomic_dec(&packet_socks_nr);
215 #ifdef PACKET_REFCNT_DEBUG
216 printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
217 #endif
218 MOD_DEC_USE_COUNT;
219 }
220
221
222 extern struct proto_ops packet_ops;
223
224 #ifdef CONFIG_SOCK_PACKET
225 extern struct proto_ops packet_ops_spkt;
226
227 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
228 {
229 struct sock *sk;
230 struct sockaddr_pkt *spkt;
231
232 /*
233 * When we registered the protocol we saved the socket in the data
234 * field for just this event.
235 */
236
237 sk = (struct sock *) pt->data;
238
239 /*
240 * Yank back the headers [hope the device set this
241 * right or kerboom...]
242 *
243 * Incoming packets have ll header pulled,
244 * push it back.
245 *
246 * For outgoing ones skb->data == skb->mac.raw
247 * so that this procedure is noop.
248 */
249
250 if (skb->pkt_type == PACKET_LOOPBACK)
251 goto out;
252
253 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
254 goto oom;
255
256 spkt = (struct sockaddr_pkt*)skb->cb;
257
258 skb_push(skb, skb->data-skb->mac.raw);
259
260 /*
261 * The SOCK_PACKET socket receives _all_ frames.
262 */
263
264 spkt->spkt_family = dev->type;
265 strncpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
266 spkt->spkt_protocol = skb->protocol;
267
268 /*
269 * Charge the memory to the socket. This is done specifically
270 * to prevent sockets using all the memory up.
271 */
272
273 if (sock_queue_rcv_skb(sk,skb) == 0)
274 return 0;
275
276 out:
277 kfree_skb(skb);
278 oom:
279 return 0;
280 }
281
282
283 /*
284 * Output a raw packet to a device layer. This bypasses all the other
285 * protocol layers and you must therefore supply it with a complete frame
286 */
287
288 static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, int len,
289 struct scm_cookie *scm)
290 {
291 struct sock *sk = sock->sk;
292 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
293 struct sk_buff *skb;
294 struct net_device *dev;
295 unsigned short proto=0;
296 int err;
297
298 /*
299 * Get and verify the address.
300 */
301
302 if (saddr)
303 {
304 if (msg->msg_namelen < sizeof(struct sockaddr))
305 return(-EINVAL);
306 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
307 proto=saddr->spkt_protocol;
308 }
309 else
310 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
311
312 /*
313 * Find the device first to size check it
314 */
315
316 saddr->spkt_device[13] = 0;
317 dev = dev_get_by_name(saddr->spkt_device);
318 err = -ENODEV;
319 if (dev == NULL)
320 goto out_unlock;
321
322 /*
323 * You may not queue a frame bigger than the mtu. This is the lowest level
324 * raw protocol and you must do your own fragmentation at this level.
325 */
326
327 err = -EMSGSIZE;
328 if(len>dev->mtu+dev->hard_header_len)
329 goto out_unlock;
330
331 err = -ENOBUFS;
332 skb = sock_wmalloc(sk, len+dev->hard_header_len+15, 0, GFP_KERNEL);
333
334 /*
335 * If the write buffer is full, then tough. At this level the user gets to
336 * deal with the problem - do your own algorithmic backoffs. That's far
337 * more flexible.
338 */
339
340 if (skb == NULL)
341 goto out_unlock;
342
343 /*
344 * Fill it in
345 */
346
347 /* FIXME: Save some space for broken drivers that write a
348 * hard header at transmission time by themselves. PPP is the
349 * notable one here. This should really be fixed at the driver level.
350 */
351 skb_reserve(skb,(dev->hard_header_len+15)&~15);
352 skb->nh.raw = skb->data;
353
354 /* Try to align data part correctly */
355 if (dev->hard_header) {
356 skb->data -= dev->hard_header_len;
357 skb->tail -= dev->hard_header_len;
358 }
359
360 /* Returns -EFAULT on error */
361 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
362 skb->protocol = proto;
363 skb->dev = dev;
364 skb->priority = sk->priority;
365 if (err)
366 goto out_free;
367
368 err = -ENETDOWN;
369 if (!(dev->flags & IFF_UP))
370 goto out_free;
371
372 /*
373 * Now send it
374 */
375
376 dev_queue_xmit(skb);
377 dev_put(dev);
378 return(len);
379
380 out_free:
381 kfree_skb(skb);
382 out_unlock:
383 if (dev)
384 dev_put(dev);
385 return err;
386 }
387 #endif
388
389 /*
390 This function makes lazy skb cloning in hope that most of packets
391 are discarded by BPF.
392
393 Note tricky part: we DO mangle shared skb! skb->data, skb->len
394 and skb->cb are mangled. It works because (and until) packets
395 falling here are owned by current CPU. Output packets are cloned
396 by dev_queue_xmit_nit(), input packets are processed by net_bh
397 sequencially, so that if we return skb to original state on exit,
398 we will not harm anyone.
399 */
400
401 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
402 {
403 struct sock *sk;
404 struct sockaddr_ll *sll;
405 struct packet_opt *po;
406 u8 * skb_head = skb->data;
407 int skb_len = skb->len;
408 #ifdef CONFIG_FILTER
409 unsigned snaplen;
410 #endif
411
412 if (skb->pkt_type == PACKET_LOOPBACK)
413 goto drop;
414
415 sk = (struct sock *) pt->data;
416 po = sk->protinfo.af_packet;
417
418 skb->dev = dev;
419
420 if (dev->hard_header) {
421 /* The device has an explicit notion of ll header,
422 exported to higher levels.
423
424 Otherwise, the device hides datails of it frame
425 structure, so that corresponding packet head
426 never delivered to user.
427 */
428 if (sk->type != SOCK_DGRAM)
429 skb_push(skb, skb->data - skb->mac.raw);
430 else if (skb->pkt_type == PACKET_OUTGOING) {
431 /* Special case: outgoing packets have ll header at head */
432 skb_pull(skb, skb->nh.raw - skb->data);
433 }
434 }
435
436 #ifdef CONFIG_FILTER
437 snaplen = skb->len;
438
439 if (sk->filter) {
440 unsigned res = snaplen;
441 struct sk_filter *filter;
442
443 bh_lock_sock(sk);
444 if ((filter = sk->filter) != NULL)
445 res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
446 bh_unlock_sock(sk);
447
448 if (res == 0)
449 goto drop_n_restore;
450 if (snaplen > res)
451 snaplen = res;
452 }
453 #endif /* CONFIG_FILTER */
454
455 if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
456 goto drop_n_acct;
457
458 if (skb_shared(skb)) {
459 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
460 if (nskb == NULL)
461 goto drop_n_acct;
462
463 if (skb_head != skb->data) {
464 skb->data = skb_head;
465 skb->len = skb_len;
466 }
467 kfree_skb(skb);
468 skb = nskb;
469 }
470
471 sll = (struct sockaddr_ll*)skb->cb;
472 sll->sll_family = AF_PACKET;
473 sll->sll_hatype = dev->type;
474 sll->sll_protocol = skb->protocol;
475 sll->sll_pkttype = skb->pkt_type;
476 sll->sll_ifindex = dev->ifindex;
477 sll->sll_halen = 0;
478
479 if (dev->hard_header_parse)
480 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
481
482 #ifdef CONFIG_FILTER
483 if (pskb_trim(skb, snaplen))
484 goto drop_n_acct;
485 #endif
486
487 skb_set_owner_r(skb, sk);
488 skb->dev = NULL;
489 spin_lock(&sk->receive_queue.lock);
490 po->stats.tp_packets++;
491 __skb_queue_tail(&sk->receive_queue, skb);
492 spin_unlock(&sk->receive_queue.lock);
493 sk->data_ready(sk,skb->len);
494 return 0;
495
496 drop_n_acct:
497 spin_lock(&sk->receive_queue.lock);
498 po->stats.tp_drops++;
499 spin_unlock(&sk->receive_queue.lock);
500
501 #ifdef CONFIG_FILTER
502 drop_n_restore:
503 #endif
504 if (skb_head != skb->data && skb_shared(skb)) {
505 skb->data = skb_head;
506 skb->len = skb_len;
507 }
508 drop:
509 kfree_skb(skb);
510 return 0;
511 }
512
513 #ifdef CONFIG_PACKET_MMAP
514 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
515 {
516 struct sock *sk;
517 struct packet_opt *po;
518 struct sockaddr_ll *sll;
519 struct tpacket_hdr *h;
520 u8 * skb_head = skb->data;
521 int skb_len = skb->len;
522 unsigned snaplen;
523 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
524 unsigned short macoff, netoff;
525 struct sk_buff *copy_skb = NULL;
526
527 if (skb->pkt_type == PACKET_LOOPBACK)
528 goto drop;
529
530 sk = (struct sock *) pt->data;
531 po = sk->protinfo.af_packet;
532
533 if (dev->hard_header) {
534 if (sk->type != SOCK_DGRAM)
535 skb_push(skb, skb->data - skb->mac.raw);
536 else if (skb->pkt_type == PACKET_OUTGOING) {
537 /* Special case: outgoing packets have ll header at head */
538 skb_pull(skb, skb->nh.raw - skb->data);
539 if (skb->ip_summed == CHECKSUM_HW)
540 status |= TP_STATUS_CSUMNOTREADY;
541 }
542 }
543
544 snaplen = skb->len;
545
546 #ifdef CONFIG_FILTER
547 if (sk->filter) {
548 unsigned res = snaplen;
549 struct sk_filter *filter;
550
551 bh_lock_sock(sk);
552 if ((filter = sk->filter) != NULL)
553 res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
554 bh_unlock_sock(sk);
555
556 if (res == 0)
557 goto drop_n_restore;
558 if (snaplen > res)
559 snaplen = res;
560 }
561 #endif
562
563 if (sk->type == SOCK_DGRAM) {
564 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
565 } else {
566 unsigned maclen = skb->nh.raw - skb->data;
567 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
568 macoff = netoff - maclen;
569 }
570
571 if (macoff + snaplen > po->frame_size) {
572 if (po->copy_thresh &&
573 atomic_read(&sk->rmem_alloc) + skb->truesize < (unsigned)sk->rcvbuf) {
574 if (skb_shared(skb)) {
575 copy_skb = skb_clone(skb, GFP_ATOMIC);
576 } else {
577 copy_skb = skb_get(skb);
578 skb_head = skb->data;
579 }
580 if (copy_skb)
581 skb_set_owner_r(copy_skb, sk);
582 }
583 snaplen = po->frame_size - macoff;
584 if ((int)snaplen < 0)
585 snaplen = 0;
586 }
587 if (snaplen > skb->len-skb->data_len)
588 snaplen = skb->len-skb->data_len;
589
590 spin_lock(&sk->receive_queue.lock);
591 h = po->iovec[po->head];
592
593 if (h->tp_status)
594 goto ring_is_full;
595 po->head = po->head != po->iovmax ? po->head+1 : 0;
596 po->stats.tp_packets++;
597 if (copy_skb) {
598 status |= TP_STATUS_COPY;
599 __skb_queue_tail(&sk->receive_queue, copy_skb);
600 }
601 if (!po->stats.tp_drops)
602 status &= ~TP_STATUS_LOSING;
603 spin_unlock(&sk->receive_queue.lock);
604
605 memcpy((u8*)h + macoff, skb->data, snaplen);
606
607 h->tp_len = skb->len;
608 h->tp_snaplen = snaplen;
609 h->tp_mac = macoff;
610 h->tp_net = netoff;
611 h->tp_sec = skb->stamp.tv_sec;
612 h->tp_usec = skb->stamp.tv_usec;
613
614 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
615 sll->sll_halen = 0;
616 if (dev->hard_header_parse)
617 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
618 sll->sll_family = AF_PACKET;
619 sll->sll_hatype = dev->type;
620 sll->sll_protocol = skb->protocol;
621 sll->sll_pkttype = skb->pkt_type;
622 sll->sll_ifindex = dev->ifindex;
623
624 h->tp_status = status;
625 mb();
626
627 sk->data_ready(sk, 0);
628
629 drop_n_restore:
630 if (skb_head != skb->data && skb_shared(skb)) {
631 skb->data = skb_head;
632 skb->len = skb_len;
633 }
634 drop:
635 kfree_skb(skb);
636 return 0;
637
638 ring_is_full:
639 po->stats.tp_drops++;
640 spin_unlock(&sk->receive_queue.lock);
641
642 sk->data_ready(sk, 0);
643 if (copy_skb)
644 kfree_skb(copy_skb);
645 goto drop_n_restore;
646 }
647
648 #endif
649
650
651 static int packet_sendmsg(struct socket *sock, struct msghdr *msg, int len,
652 struct scm_cookie *scm)
653 {
654 struct sock *sk = sock->sk;
655 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
656 struct sk_buff *skb;
657 struct net_device *dev;
658 unsigned short proto;
659 unsigned char *addr;
660 int ifindex, err, reserve = 0;
661
662 /*
663 * Get and verify the address.
664 */
665
666 if (saddr == NULL) {
667 ifindex = sk->protinfo.af_packet->ifindex;
668 proto = sk->num;
669 addr = NULL;
670 } else {
671 err = -EINVAL;
672 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
673 goto out;
674 ifindex = saddr->sll_ifindex;
675 proto = saddr->sll_protocol;
676 addr = saddr->sll_addr;
677 }
678
679
680 dev = dev_get_by_index(ifindex);
681 err = -ENXIO;
682 if (dev == NULL)
683 goto out_unlock;
684 if (sock->type == SOCK_RAW)
685 reserve = dev->hard_header_len;
686
687 err = -EMSGSIZE;
688 if (len > dev->mtu+reserve)
689 goto out_unlock;
690
691 skb = sock_alloc_send_skb(sk, len+dev->hard_header_len+15,
692 msg->msg_flags & MSG_DONTWAIT, &err);
693 if (skb==NULL)
694 goto out_unlock;
695
696 skb_reserve(skb, (dev->hard_header_len+15)&~15);
697 skb->nh.raw = skb->data;
698
699 if (dev->hard_header) {
700 int res;
701 err = -EINVAL;
702 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
703 if (sock->type != SOCK_DGRAM) {
704 skb->tail = skb->data;
705 skb->len = 0;
706 } else if (res < 0)
707 goto out_free;
708 }
709
710 /* Returns -EFAULT on error */
711 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
712 if (err)
713 goto out_free;
714
715 skb->protocol = proto;
716 skb->dev = dev;
717 skb->priority = sk->priority;
718
719 err = -ENETDOWN;
720 if (!(dev->flags & IFF_UP))
721 goto out_free;
722
723 /*
724 * Now send it
725 */
726
727 err = dev_queue_xmit(skb);
728 if (err > 0 && (err = net_xmit_errno(err)) != 0)
729 goto out_unlock;
730
731 dev_put(dev);
732
733 return(len);
734
735 out_free:
736 kfree_skb(skb);
737 out_unlock:
738 if (dev)
739 dev_put(dev);
740 out:
741 return err;
742 }
743
744 /*
745 * Close a PACKET socket. This is fairly simple. We immediately go
746 * to 'closed' state and remove our protocol entry in the device list.
747 */
748
749 static int packet_release(struct socket *sock)
750 {
751 struct sock *sk = sock->sk;
752 struct sock **skp;
753
754 if (!sk)
755 return 0;
756
757 write_lock_bh(&packet_sklist_lock);
758 for (skp = &packet_sklist; *skp; skp = &(*skp)->next) {
759 if (*skp == sk) {
760 *skp = sk->next;
761 __sock_put(sk);
762 break;
763 }
764 }
765 write_unlock_bh(&packet_sklist_lock);
766
767 /*
768 * Unhook packet receive handler.
769 */
770
771 if (sk->protinfo.af_packet->running) {
772 /*
773 * Remove the protocol hook
774 */
775 dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
776 sk->protinfo.af_packet->running = 0;
777 __sock_put(sk);
778 }
779
780 #ifdef CONFIG_PACKET_MULTICAST
781 packet_flush_mclist(sk);
782 #endif
783
784 #ifdef CONFIG_PACKET_MMAP
785 if (sk->protinfo.af_packet->pg_vec) {
786 struct tpacket_req req;
787 memset(&req, 0, sizeof(req));
788 packet_set_ring(sk, &req, 1);
789 }
790 #endif
791
792 /*
793 * Now the socket is dead. No more input will appear.
794 */
795
796 sock_orphan(sk);
797 sock->sk = NULL;
798
799 /* Purge queues */
800
801 skb_queue_purge(&sk->receive_queue);
802
803 sock_put(sk);
804 return 0;
805 }
806
807 /*
808 * Attach a packet hook.
809 */
810
811 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
812 {
813 /*
814 * Detach an existing hook if present.
815 */
816
817 lock_sock(sk);
818
819 spin_lock(&sk->protinfo.af_packet->bind_lock);
820 if (sk->protinfo.af_packet->running) {
821 dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
822 __sock_put(sk);
823 sk->protinfo.af_packet->running = 0;
824 }
825
826 sk->num = protocol;
827 sk->protinfo.af_packet->prot_hook.type = protocol;
828 sk->protinfo.af_packet->prot_hook.dev = dev;
829
830 sk->protinfo.af_packet->ifindex = dev ? dev->ifindex : 0;
831
832 if (protocol == 0)
833 goto out_unlock;
834
835 if (dev) {
836 if (dev->flags&IFF_UP) {
837 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
838 sock_hold(sk);
839 sk->protinfo.af_packet->running = 1;
840 } else {
841 sk->err = ENETDOWN;
842 if (!sk->dead)
843 sk->error_report(sk);
844 }
845 } else {
846 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
847 sock_hold(sk);
848 sk->protinfo.af_packet->running = 1;
849 }
850
851 out_unlock:
852 spin_unlock(&sk->protinfo.af_packet->bind_lock);
853 release_sock(sk);
854 return 0;
855 }
856
857 /*
858 * Bind a packet socket to a device
859 */
860
861 #ifdef CONFIG_SOCK_PACKET
862
863 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
864 {
865 struct sock *sk=sock->sk;
866 char name[15];
867 struct net_device *dev;
868 int err = -ENODEV;
869
870 /*
871 * Check legality
872 */
873
874 if(addr_len!=sizeof(struct sockaddr))
875 return -EINVAL;
876 strncpy(name,uaddr->sa_data,14);
877 name[14]=0;
878
879 dev = dev_get_by_name(name);
880 if (dev) {
881 err = packet_do_bind(sk, dev, sk->num);
882 dev_put(dev);
883 }
884 return err;
885 }
886 #endif
887
888 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
889 {
890 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
891 struct sock *sk=sock->sk;
892 struct net_device *dev = NULL;
893 int err;
894
895
896 /*
897 * Check legality
898 */
899
900 if (addr_len < sizeof(struct sockaddr_ll))
901 return -EINVAL;
902 if (sll->sll_family != AF_PACKET)
903 return -EINVAL;
904
905 if (sll->sll_ifindex) {
906 err = -ENODEV;
907 dev = dev_get_by_index(sll->sll_ifindex);
908 if (dev == NULL)
909 goto out;
910 }
911 err = packet_do_bind(sk, dev, sll->sll_protocol ? : sk->num);
912 if (dev)
913 dev_put(dev);
914
915 out:
916 return err;
917 }
918
919
920 /*
921 * Create a packet of type SOCK_PACKET.
922 */
923
924 static int packet_create(struct socket *sock, int protocol)
925 {
926 struct sock *sk;
927 int err;
928
929 if (!capable(CAP_NET_RAW))
930 return -EPERM;
931 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
932 #ifdef CONFIG_SOCK_PACKET
933 && sock->type != SOCK_PACKET
934 #endif
935 )
936 return -ESOCKTNOSUPPORT;
937
938 sock->state = SS_UNCONNECTED;
939 MOD_INC_USE_COUNT;
940
941 err = -ENOBUFS;
942 sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1);
943 if (sk == NULL)
944 goto out;
945
946 sock->ops = &packet_ops;
947 #ifdef CONFIG_SOCK_PACKET
948 if (sock->type == SOCK_PACKET)
949 sock->ops = &packet_ops_spkt;
950 #endif
951 sock_init_data(sock,sk);
952
953 sk->protinfo.af_packet = kmalloc(sizeof(struct packet_opt), GFP_KERNEL);
954 if (sk->protinfo.af_packet == NULL)
955 goto out_free;
956 memset(sk->protinfo.af_packet, 0, sizeof(struct packet_opt));
957 sk->family = PF_PACKET;
958 sk->num = protocol;
959
960 sk->destruct = packet_sock_destruct;
961 atomic_inc(&packet_socks_nr);
962
963 /*
964 * Attach a protocol block
965 */
966
967 spin_lock_init(&sk->protinfo.af_packet->bind_lock);
968 sk->protinfo.af_packet->prot_hook.func = packet_rcv;
969 #ifdef CONFIG_SOCK_PACKET
970 if (sock->type == SOCK_PACKET)
971 sk->protinfo.af_packet->prot_hook.func = packet_rcv_spkt;
972 #endif
973 sk->protinfo.af_packet->prot_hook.data = (void *)sk;
974
975 if (protocol) {
976 sk->protinfo.af_packet->prot_hook.type = protocol;
977 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
978 sock_hold(sk);
979 sk->protinfo.af_packet->running = 1;
980 }
981
982 write_lock_bh(&packet_sklist_lock);
983 sk->next = packet_sklist;
984 packet_sklist = sk;
985 sock_hold(sk);
986 write_unlock_bh(&packet_sklist_lock);
987 return(0);
988
989 out_free:
990 sk_free(sk);
991 out:
992 MOD_DEC_USE_COUNT;
993 return err;
994 }
995
996 /*
997 * Pull a packet from our receive queue and hand it to the user.
998 * If necessary we block.
999 */
1000
1001 static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len,
1002 int flags, struct scm_cookie *scm)
1003 {
1004 struct sock *sk = sock->sk;
1005 struct sk_buff *skb;
1006 int copied, err;
1007
1008 err = -EINVAL;
1009 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC))
1010 goto out;
1011
1012 #if 0
1013 /* What error should we return now? EUNATTACH? */
1014 if (sk->protinfo.af_packet->ifindex < 0)
1015 return -ENODEV;
1016 #endif
1017
1018 /*
1019 * If the address length field is there to be filled in, we fill
1020 * it in now.
1021 */
1022
1023 if (sock->type == SOCK_PACKET)
1024 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1025 else
1026 msg->msg_namelen = sizeof(struct sockaddr_ll);
1027
1028 /*
1029 * Call the generic datagram receiver. This handles all sorts
1030 * of horrible races and re-entrancy so we can forget about it
1031 * in the protocol layers.
1032 *
1033 * Now it will return ENETDOWN, if device have just gone down,
1034 * but then it will block.
1035 */
1036
1037 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1038
1039 /*
1040 * An error occurred so return it. Because skb_recv_datagram()
1041 * handles the blocking we don't see and worry about blocking
1042 * retries.
1043 */
1044
1045 if(skb==NULL)
1046 goto out;
1047
1048 /*
1049 * You lose any data beyond the buffer you gave. If it worries a
1050 * user program they can ask the device for its MTU anyway.
1051 */
1052
1053 copied = skb->len;
1054 if (copied > len)
1055 {
1056 copied=len;
1057 msg->msg_flags|=MSG_TRUNC;
1058 }
1059
1060 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1061 if (err)
1062 goto out_free;
1063
1064 sock_recv_timestamp(msg, sk, skb);
1065
1066 if (msg->msg_name)
1067 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1068
1069 /*
1070 * Free or return the buffer as appropriate. Again this
1071 * hides all the races and re-entrancy issues from us.
1072 */
1073 err = (flags&MSG_TRUNC) ? skb->len : copied;
1074
1075 out_free:
1076 skb_free_datagram(sk, skb);
1077 out:
1078 return err;
1079 }
1080
1081 #ifdef CONFIG_SOCK_PACKET
1082 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1083 int *uaddr_len, int peer)
1084 {
1085 struct net_device *dev;
1086 struct sock *sk = sock->sk;
1087
1088 if (peer)
1089 return -EOPNOTSUPP;
1090
1091 uaddr->sa_family = AF_PACKET;
1092 dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1093 if (dev) {
1094 strncpy(uaddr->sa_data, dev->name, 15);
1095 dev_put(dev);
1096 } else
1097 memset(uaddr->sa_data, 0, 14);
1098 *uaddr_len = sizeof(*uaddr);
1099
1100 return 0;
1101 }
1102 #endif
1103
1104 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1105 int *uaddr_len, int peer)
1106 {
1107 struct net_device *dev;
1108 struct sock *sk = sock->sk;
1109 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1110
1111 if (peer)
1112 return -EOPNOTSUPP;
1113
1114 sll->sll_family = AF_PACKET;
1115 sll->sll_ifindex = sk->protinfo.af_packet->ifindex;
1116 sll->sll_protocol = sk->num;
1117 dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1118 if (dev) {
1119 sll->sll_hatype = dev->type;
1120 sll->sll_halen = dev->addr_len;
1121 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1122 dev_put(dev);
1123 } else {
1124 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1125 sll->sll_halen = 0;
1126 }
1127 *uaddr_len = sizeof(*sll);
1128
1129 return 0;
1130 }
1131
1132 #ifdef CONFIG_PACKET_MULTICAST
1133 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1134 {
1135 switch (i->type) {
1136 case PACKET_MR_MULTICAST:
1137 if (what > 0)
1138 dev_mc_add(dev, i->addr, i->alen, 0);
1139 else
1140 dev_mc_delete(dev, i->addr, i->alen, 0);
1141 break;
1142 case PACKET_MR_PROMISC:
1143 dev_set_promiscuity(dev, what);
1144 break;
1145 case PACKET_MR_ALLMULTI:
1146 dev_set_allmulti(dev, what);
1147 break;
1148 default:;
1149 }
1150 }
1151
1152 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1153 {
1154 for ( ; i; i=i->next) {
1155 if (i->ifindex == dev->ifindex)
1156 packet_dev_mc(dev, i, what);
1157 }
1158 }
1159
1160 static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1161 {
1162 struct packet_mclist *ml, *i;
1163 struct net_device *dev;
1164 int err;
1165
1166 rtnl_lock();
1167
1168 err = -ENODEV;
1169 dev = __dev_get_by_index(mreq->mr_ifindex);
1170 if (!dev)
1171 goto done;
1172
1173 err = -EINVAL;
1174 if (mreq->mr_alen > dev->addr_len)
1175 goto done;
1176
1177 err = -ENOBUFS;
1178 i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1179 if (i == NULL)
1180 goto done;
1181
1182 err = 0;
1183 for (ml=sk->protinfo.af_packet->mclist; ml; ml=ml->next) {
1184 if (ml->ifindex == mreq->mr_ifindex &&
1185 ml->type == mreq->mr_type &&
1186 ml->alen == mreq->mr_alen &&
1187 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1188 ml->count++;
1189 /* Free the new element ... */
1190 kfree(i);
1191 goto done;
1192 }
1193 }
1194
1195 i->type = mreq->mr_type;
1196 i->ifindex = mreq->mr_ifindex;
1197 i->alen = mreq->mr_alen;
1198 memcpy(i->addr, mreq->mr_address, i->alen);
1199 i->count = 1;
1200 i->next = sk->protinfo.af_packet->mclist;
1201 sk->protinfo.af_packet->mclist = i;
1202 packet_dev_mc(dev, i, +1);
1203
1204 done:
1205 rtnl_unlock();
1206 return err;
1207 }
1208
1209 static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1210 {
1211 struct packet_mclist *ml, **mlp;
1212
1213 rtnl_lock();
1214
1215 for (mlp=&sk->protinfo.af_packet->mclist; (ml=*mlp)!=NULL; mlp=&ml->next) {
1216 if (ml->ifindex == mreq->mr_ifindex &&
1217 ml->type == mreq->mr_type &&
1218 ml->alen == mreq->mr_alen &&
1219 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1220 if (--ml->count == 0) {
1221 struct net_device *dev;
1222 *mlp = ml->next;
1223 dev = dev_get_by_index(ml->ifindex);
1224 if (dev) {
1225 packet_dev_mc(dev, ml, -1);
1226 dev_put(dev);
1227 }
1228 kfree(ml);
1229 }
1230 rtnl_unlock();
1231 return 0;
1232 }
1233 }
1234 rtnl_unlock();
1235 return -EADDRNOTAVAIL;
1236 }
1237
1238 static void packet_flush_mclist(struct sock *sk)
1239 {
1240 struct packet_mclist *ml;
1241
1242 if (sk->protinfo.af_packet->mclist == NULL)
1243 return;
1244
1245 rtnl_lock();
1246 while ((ml=sk->protinfo.af_packet->mclist) != NULL) {
1247 struct net_device *dev;
1248 sk->protinfo.af_packet->mclist = ml->next;
1249 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1250 packet_dev_mc(dev, ml, -1);
1251 dev_put(dev);
1252 }
1253 kfree(ml);
1254 }
1255 rtnl_unlock();
1256 }
1257 #endif
1258
1259 static int
1260 packet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen)
1261 {
1262 struct sock *sk = sock->sk;
1263 int ret;
1264
1265 if (level != SOL_PACKET)
1266 return -ENOPROTOOPT;
1267
1268 switch(optname) {
1269 #ifdef CONFIG_PACKET_MULTICAST
1270 case PACKET_ADD_MEMBERSHIP:
1271 case PACKET_DROP_MEMBERSHIP:
1272 {
1273 struct packet_mreq mreq;
1274 if (optlen<sizeof(mreq))
1275 return -EINVAL;
1276 if (copy_from_user(&mreq,optval,sizeof(mreq)))
1277 return -EFAULT;
1278 if (optname == PACKET_ADD_MEMBERSHIP)
1279 ret = packet_mc_add(sk, &mreq);
1280 else
1281 ret = packet_mc_drop(sk, &mreq);
1282 return ret;
1283 }
1284 #endif
1285 #ifdef CONFIG_PACKET_MMAP
1286 case PACKET_RX_RING:
1287 {
1288 struct tpacket_req req;
1289
1290 if (optlen<sizeof(req))
1291 return -EINVAL;
1292 if (copy_from_user(&req,optval,sizeof(req)))
1293 return -EFAULT;
1294 return packet_set_ring(sk, &req, 0);
1295 }
1296 case PACKET_COPY_THRESH:
1297 {
1298 int val;
1299
1300 if (optlen!=sizeof(val))
1301 return -EINVAL;
1302 if (copy_from_user(&val,optval,sizeof(val)))
1303 return -EFAULT;
1304
1305 sk->protinfo.af_packet->copy_thresh = val;
1306 return 0;
1307 }
1308 #endif
1309 default:
1310 return -ENOPROTOOPT;
1311 }
1312 }
1313
1314 int packet_getsockopt(struct socket *sock, int level, int optname,
1315 char *optval, int *optlen)
1316 {
1317 int len;
1318 struct sock *sk = sock->sk;
1319
1320 if (level != SOL_PACKET)
1321 return -ENOPROTOOPT;
1322
1323 if (get_user(len,optlen))
1324 return -EFAULT;
1325
1326 if (len < 0)
1327 return -EINVAL;
1328
1329 switch(optname) {
1330 case PACKET_STATISTICS:
1331 {
1332 struct tpacket_stats st;
1333
1334 if (len > sizeof(struct tpacket_stats))
1335 len = sizeof(struct tpacket_stats);
1336 spin_lock_bh(&sk->receive_queue.lock);
1337 st = sk->protinfo.af_packet->stats;
1338 memset(&sk->protinfo.af_packet->stats, 0, sizeof(st));
1339 spin_unlock_bh(&sk->receive_queue.lock);
1340 st.tp_packets += st.tp_drops;
1341
1342 if (copy_to_user(optval, &st, len))
1343 return -EFAULT;
1344 break;
1345 }
1346 default:
1347 return -ENOPROTOOPT;
1348 }
1349
1350 if (put_user(len, optlen))
1351 return -EFAULT;
1352 return 0;
1353 }
1354
1355
1356 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1357 {
1358 struct sock *sk;
1359 struct packet_opt *po;
1360 struct net_device *dev = (struct net_device*)data;
1361
1362 read_lock(&packet_sklist_lock);
1363 for (sk = packet_sklist; sk; sk = sk->next) {
1364 po = sk->protinfo.af_packet;
1365
1366 switch (msg) {
1367 case NETDEV_DOWN:
1368 case NETDEV_UNREGISTER:
1369 if (dev->ifindex == po->ifindex) {
1370 spin_lock(&po->bind_lock);
1371 if (po->running) {
1372 dev_remove_pack(&po->prot_hook);
1373 __sock_put(sk);
1374 po->running = 0;
1375 sk->err = ENETDOWN;
1376 if (!sk->dead)
1377 sk->error_report(sk);
1378 }
1379 if (msg == NETDEV_UNREGISTER) {
1380 po->ifindex = -1;
1381 po->prot_hook.dev = NULL;
1382 }
1383 spin_unlock(&po->bind_lock);
1384 }
1385 #ifdef CONFIG_PACKET_MULTICAST
1386 if (po->mclist)
1387 packet_dev_mclist(dev, po->mclist, -1);
1388 #endif
1389 break;
1390 case NETDEV_UP:
1391 spin_lock(&po->bind_lock);
1392 if (dev->ifindex == po->ifindex && sk->num && po->running==0) {
1393 dev_add_pack(&po->prot_hook);
1394 sock_hold(sk);
1395 po->running = 1;
1396 }
1397 spin_unlock(&po->bind_lock);
1398 #ifdef CONFIG_PACKET_MULTICAST
1399 if (po->mclist)
1400 packet_dev_mclist(dev, po->mclist, +1);
1401 #endif
1402 break;
1403 }
1404 }
1405 read_unlock(&packet_sklist_lock);
1406 return NOTIFY_DONE;
1407 }
1408
1409
1410 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1411 unsigned long arg)
1412 {
1413 struct sock *sk = sock->sk;
1414
1415 switch(cmd)
1416 {
1417 case SIOCOUTQ:
1418 {
1419 int amount = atomic_read(&sk->wmem_alloc);
1420 return put_user(amount, (int *)arg);
1421 }
1422 case SIOCINQ:
1423 {
1424 struct sk_buff *skb;
1425 int amount = 0;
1426
1427 spin_lock_bh(&sk->receive_queue.lock);
1428 skb = skb_peek(&sk->receive_queue);
1429 if (skb)
1430 amount = skb->len;
1431 spin_unlock_bh(&sk->receive_queue.lock);
1432 return put_user(amount, (int *)arg);
1433 }
1434 case FIOSETOWN:
1435 case SIOCSPGRP: {
1436 int pid;
1437 if (get_user(pid, (int *) arg))
1438 return -EFAULT;
1439 if (current->pid != pid && current->pgrp != -pid &&
1440 !capable(CAP_NET_ADMIN))
1441 return -EPERM;
1442 sk->proc = pid;
1443 break;
1444 }
1445 case FIOGETOWN:
1446 case SIOCGPGRP:
1447 return put_user(sk->proc, (int *)arg);
1448 case SIOCGSTAMP:
1449 if(sk->stamp.tv_sec==0)
1450 return -ENOENT;
1451 if (copy_to_user((void *)arg, &sk->stamp,
1452 sizeof(struct timeval)))
1453 return -EFAULT;
1454 break;
1455 case SIOCGIFFLAGS:
1456 #ifndef CONFIG_INET
1457 case SIOCSIFFLAGS:
1458 #endif
1459 case SIOCGIFCONF:
1460 case SIOCGIFMETRIC:
1461 case SIOCSIFMETRIC:
1462 case SIOCGIFMEM:
1463 case SIOCSIFMEM:
1464 case SIOCGIFMTU:
1465 case SIOCSIFMTU:
1466 case SIOCSIFLINK:
1467 case SIOCGIFHWADDR:
1468 case SIOCSIFHWADDR:
1469 case SIOCSIFMAP:
1470 case SIOCGIFMAP:
1471 case SIOCSIFSLAVE:
1472 case SIOCGIFSLAVE:
1473 case SIOCGIFINDEX:
1474 case SIOCGIFNAME:
1475 case SIOCGIFCOUNT:
1476 case SIOCSIFHWBROADCAST:
1477 return(dev_ioctl(cmd,(void *) arg));
1478
1479 case SIOCGIFBR:
1480 case SIOCSIFBR:
1481 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1482 #ifdef CONFIG_INET
1483 #ifdef CONFIG_KMOD
1484 if (br_ioctl_hook == NULL)
1485 request_module("bridge");
1486 #endif
1487 if (br_ioctl_hook != NULL)
1488 return br_ioctl_hook(arg);
1489 #endif
1490 #endif
1491 return -ENOPKG;
1492
1493 case SIOCGIFDIVERT:
1494 case SIOCSIFDIVERT:
1495 #ifdef CONFIG_NET_DIVERT
1496 return divert_ioctl(cmd, (struct divert_cf *) arg);
1497 #else
1498 return -ENOPKG;
1499 #endif /* CONFIG_NET_DIVERT */
1500
1501 #ifdef CONFIG_INET
1502 case SIOCADDRT:
1503 case SIOCDELRT:
1504 case SIOCDARP:
1505 case SIOCGARP:
1506 case SIOCSARP:
1507 case SIOCGIFADDR:
1508 case SIOCSIFADDR:
1509 case SIOCGIFBRDADDR:
1510 case SIOCSIFBRDADDR:
1511 case SIOCGIFNETMASK:
1512 case SIOCSIFNETMASK:
1513 case SIOCGIFDSTADDR:
1514 case SIOCSIFDSTADDR:
1515 case SIOCSIFFLAGS:
1516 case SIOCADDDLCI:
1517 case SIOCDELDLCI:
1518 return inet_dgram_ops.ioctl(sock, cmd, arg);
1519 #endif
1520
1521 default:
1522 if ((cmd >= SIOCDEVPRIVATE) &&
1523 (cmd <= (SIOCDEVPRIVATE + 15)))
1524 return(dev_ioctl(cmd,(void *) arg));
1525
1526 #ifdef CONFIG_NET_RADIO
1527 if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST))
1528 return(dev_ioctl(cmd,(void *) arg));
1529 #endif
1530 return -EOPNOTSUPP;
1531 }
1532 return 0;
1533 }
1534
1535 #ifndef CONFIG_PACKET_MMAP
1536 #define packet_mmap sock_no_mmap
1537 #define packet_poll datagram_poll
1538 #else
1539
1540 unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
1541 {
1542 struct sock *sk = sock->sk;
1543 struct packet_opt *po = sk->protinfo.af_packet;
1544 unsigned int mask = datagram_poll(file, sock, wait);
1545
1546 spin_lock_bh(&sk->receive_queue.lock);
1547 if (po->iovec) {
1548 unsigned last = po->head ? po->head-1 : po->iovmax;
1549
1550 if (po->iovec[last]->tp_status)
1551 mask |= POLLIN | POLLRDNORM;
1552 }
1553 spin_unlock_bh(&sk->receive_queue.lock);
1554 return mask;
1555 }
1556
1557
1558 /* Dirty? Well, I still did not learn better way to account
1559 * for user mmaps.
1560 */
1561
1562 static void packet_mm_open(struct vm_area_struct *vma)
1563 {
1564 struct file *file = vma->vm_file;
1565 struct inode *inode = file->f_dentry->d_inode;
1566 struct socket * sock = &inode->u.socket_i;
1567 struct sock *sk = sock->sk;
1568
1569 if (sk)
1570 atomic_inc(&sk->protinfo.af_packet->mapped);
1571 }
1572
1573 static void packet_mm_close(struct vm_area_struct *vma)
1574 {
1575 struct file *file = vma->vm_file;
1576 struct inode *inode = file->f_dentry->d_inode;
1577 struct socket * sock = &inode->u.socket_i;
1578 struct sock *sk = sock->sk;
1579
1580 if (sk)
1581 atomic_dec(&sk->protinfo.af_packet->mapped);
1582 }
1583
1584 static struct vm_operations_struct packet_mmap_ops = {
1585 open: packet_mm_open,
1586 close: packet_mm_close,
1587 };
1588
1589 static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)
1590 {
1591 int i;
1592
1593 for (i=0; i<len; i++) {
1594 if (pg_vec[i]) {
1595 struct page *page, *pend;
1596
1597 pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1598 for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1599 ClearPageReserved(page);
1600 free_pages(pg_vec[i], order);
1601 }
1602 }
1603 kfree(pg_vec);
1604 }
1605
1606
1607 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1608 {
1609 unsigned long *pg_vec = NULL;
1610 struct tpacket_hdr **io_vec = NULL;
1611 struct packet_opt *po = sk->protinfo.af_packet;
1612 int order = 0;
1613 int err = 0;
1614
1615 if (req->tp_block_nr) {
1616 int i, l;
1617 int frames_per_block;
1618
1619 /* Sanity tests and some calculations */
1620 if ((int)req->tp_block_size <= 0)
1621 return -EINVAL;
1622 if (req->tp_block_size&(PAGE_SIZE-1))
1623 return -EINVAL;
1624 if (req->tp_frame_size < TPACKET_HDRLEN)
1625 return -EINVAL;
1626 if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1627 return -EINVAL;
1628 frames_per_block = req->tp_block_size/req->tp_frame_size;
1629 if (frames_per_block <= 0)
1630 return -EINVAL;
1631 if (frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1632 return -EINVAL;
1633 /* OK! */
1634
1635 /* Allocate page vector */
1636 while ((PAGE_SIZE<<order) < req->tp_block_size)
1637 order++;
1638
1639 err = -ENOMEM;
1640
1641 pg_vec = kmalloc(req->tp_block_nr*sizeof(unsigned long*), GFP_KERNEL);
1642 if (pg_vec == NULL)
1643 goto out;
1644 memset(pg_vec, 0, req->tp_block_nr*sizeof(unsigned long*));
1645
1646 for (i=0; i<req->tp_block_nr; i++) {
1647 struct page *page, *pend;
1648 pg_vec[i] = __get_free_pages(GFP_KERNEL, order);
1649 if (!pg_vec[i])
1650 goto out_free_pgvec;
1651
1652 pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1653 for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1654 SetPageReserved(page);
1655 }
1656 /* Page vector is allocated */
1657
1658 /* Draw frames */
1659 io_vec = kmalloc(req->tp_frame_nr*sizeof(struct tpacket_hdr*), GFP_KERNEL);
1660 if (io_vec == NULL)
1661 goto out_free_pgvec;
1662 memset(io_vec, 0, req->tp_frame_nr*sizeof(struct tpacket_hdr*));
1663
1664 l = 0;
1665 for (i=0; i<req->tp_block_nr; i++) {
1666 unsigned long ptr = pg_vec[i];
1667 int k;
1668
1669 for (k=0; k<frames_per_block; k++, l++) {
1670 io_vec[l] = (struct tpacket_hdr*)ptr;
1671 io_vec[l]->tp_status = TP_STATUS_KERNEL;
1672 ptr += req->tp_frame_size;
1673 }
1674 }
1675 /* Done */
1676 } else {
1677 if (req->tp_frame_nr)
1678 return -EINVAL;
1679 }
1680
1681 lock_sock(sk);
1682
1683 /* Detach socket from network */
1684 spin_lock(&po->bind_lock);
1685 if (po->running)
1686 dev_remove_pack(&po->prot_hook);
1687 spin_unlock(&po->bind_lock);
1688
1689 err = -EBUSY;
1690 if (closing || atomic_read(&po->mapped) == 0) {
1691 err = 0;
1692 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1693
1694 spin_lock_bh(&sk->receive_queue.lock);
1695 pg_vec = XC(po->pg_vec, pg_vec);
1696 io_vec = XC(po->iovec, io_vec);
1697 po->iovmax = req->tp_frame_nr-1;
1698 po->head = 0;
1699 po->frame_size = req->tp_frame_size;
1700 spin_unlock_bh(&sk->receive_queue.lock);
1701
1702 order = XC(po->pg_vec_order, order);
1703 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1704
1705 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1706 po->prot_hook.func = po->iovec ? tpacket_rcv : packet_rcv;
1707 skb_queue_purge(&sk->receive_queue);
1708 #undef XC
1709 if (atomic_read(&po->mapped))
1710 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1711 }
1712
1713 spin_lock(&po->bind_lock);
1714 if (po->running)
1715 dev_add_pack(&po->prot_hook);
1716 spin_unlock(&po->bind_lock);
1717
1718 release_sock(sk);
1719
1720 if (io_vec)
1721 kfree(io_vec);
1722
1723 out_free_pgvec:
1724 if (pg_vec)
1725 free_pg_vec(pg_vec, order, req->tp_block_nr);
1726 out:
1727 return err;
1728 }
1729
1730 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1731 {
1732 struct sock *sk = sock->sk;
1733 struct packet_opt *po = sk->protinfo.af_packet;
1734 unsigned long size;
1735 unsigned long start;
1736 int err = -EINVAL;
1737 int i;
1738
1739 if (vma->vm_pgoff)
1740 return -EINVAL;
1741
1742 size = vma->vm_end - vma->vm_start;
1743
1744 lock_sock(sk);
1745 if (po->pg_vec == NULL)
1746 goto out;
1747 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1748 goto out;
1749
1750 atomic_inc(&po->mapped);
1751 start = vma->vm_start;
1752 err = -EAGAIN;
1753 for (i=0; i<po->pg_vec_len; i++) {
1754 if (remap_page_range(start, __pa(po->pg_vec[i]),
1755 po->pg_vec_pages*PAGE_SIZE,
1756 vma->vm_page_prot))
1757 goto out;
1758 start += po->pg_vec_pages*PAGE_SIZE;
1759 }
1760 vma->vm_ops = &packet_mmap_ops;
1761 err = 0;
1762
1763 out:
1764 release_sock(sk);
1765 return err;
1766 }
1767 #endif
1768
1769
1770 #ifdef CONFIG_SOCK_PACKET
1771 struct proto_ops packet_ops_spkt = {
1772 family: PF_PACKET,
1773
1774 release: packet_release,
1775 bind: packet_bind_spkt,
1776 connect: sock_no_connect,
1777 socketpair: sock_no_socketpair,
1778 accept: sock_no_accept,
1779 getname: packet_getname_spkt,
1780 poll: datagram_poll,
1781 ioctl: packet_ioctl,
1782 listen: sock_no_listen,
1783 shutdown: sock_no_shutdown,
1784 setsockopt: sock_no_setsockopt,
1785 getsockopt: sock_no_getsockopt,
1786 sendmsg: packet_sendmsg_spkt,
1787 recvmsg: packet_recvmsg,
1788 mmap: sock_no_mmap,
1789 sendpage: sock_no_sendpage,
1790 };
1791 #endif
1792
1793 struct proto_ops packet_ops = {
1794 family: PF_PACKET,
1795
1796 release: packet_release,
1797 bind: packet_bind,
1798 connect: sock_no_connect,
1799 socketpair: sock_no_socketpair,
1800 accept: sock_no_accept,
1801 getname: packet_getname,
1802 poll: packet_poll,
1803 ioctl: packet_ioctl,
1804 listen: sock_no_listen,
1805 shutdown: sock_no_shutdown,
1806 setsockopt: packet_setsockopt,
1807 getsockopt: packet_getsockopt,
1808 sendmsg: packet_sendmsg,
1809 recvmsg: packet_recvmsg,
1810 mmap: packet_mmap,
1811 sendpage: sock_no_sendpage,
1812 };
1813
1814 static struct net_proto_family packet_family_ops = {
1815 family: PF_PACKET,
1816 create: packet_create,
1817 };
1818
1819 static struct notifier_block packet_netdev_notifier = {
1820 notifier_call: packet_notifier,
1821 };
1822
1823 #ifdef CONFIG_PROC_FS
1824 static int packet_read_proc(char *buffer, char **start, off_t offset,
1825 int length, int *eof, void *data)
1826 {
1827 off_t pos=0;
1828 off_t begin=0;
1829 int len=0;
1830 struct sock *s;
1831
1832 len+= sprintf(buffer,"sk RefCnt Type Proto Iface R Rmem User Inode\n");
1833
1834 read_lock(&packet_sklist_lock);
1835
1836 for (s = packet_sklist; s; s = s->next) {
1837 len+=sprintf(buffer+len,"%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu",
1838 s,
1839 atomic_read(&s->refcnt),
1840 s->type,
1841 ntohs(s->num),
1842 s->protinfo.af_packet->ifindex,
1843 s->protinfo.af_packet->running,
1844 atomic_read(&s->rmem_alloc),
1845 sock_i_uid(s),
1846 sock_i_ino(s)
1847 );
1848
1849 buffer[len++]='\n';
1850
1851 pos=begin+len;
1852 if(pos<offset) {
1853 len=0;
1854 begin=pos;
1855 }
1856 if(pos>offset+length)
1857 goto done;
1858 }
1859 *eof = 1;
1860
1861 done:
1862 read_unlock(&packet_sklist_lock);
1863 *start=buffer+(offset-begin);
1864 len-=(offset-begin);
1865 if(len>length)
1866 len=length;
1867 if(len<0)
1868 len=0;
1869 return len;
1870 }
1871 #endif
1872
1873 static void __exit packet_exit(void)
1874 {
1875 remove_proc_entry("net/packet", 0);
1876 unregister_netdevice_notifier(&packet_netdev_notifier);
1877 sock_unregister(PF_PACKET);
1878 return;
1879 }
1880
1881 static int __init packet_init(void)
1882 {
1883 sock_register(&packet_family_ops);
1884 register_netdevice_notifier(&packet_netdev_notifier);
1885 #ifdef CONFIG_PROC_FS
1886 create_proc_read_entry("net/packet", 0, 0, packet_read_proc, NULL);
1887 #endif
1888 return 0;
1889 }
1890
1891 module_init(packet_init);
1892 module_exit(packet_exit);
1893