File: /usr/src/linux/net/core/dev.c
1 /*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dhinds@allegro.stanford.edu>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set to 2
24 * if register_netdev gets called before
25 * net_dev_init & also removed a few lines
26 * of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant stunts to
29 * keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into drivers
34 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
35 * Alan Cox : 100 backlog just doesn't cut it when
36 * you start doing multicast video 8)
37 * Alan Cox : Rewrote net_bh and list manager.
38 * Alan Cox : Fix ETH_P_ALL echoback lengths.
39 * Alan Cox : Took out transmit every packet pass
40 * Saved a few bytes in the ioctl handler
41 * Alan Cox : Network driver sets packet type before calling netif_rx. Saves
42 * a function call a packet.
43 * Alan Cox : Hashed net_bh()
44 * Richard Kooijman: Timestamp fixes.
45 * Alan Cox : Wrong field in SIOCGIFDSTADDR
46 * Alan Cox : Device lock protection.
47 * Alan Cox : Fixed nasty side effect of device close changes.
48 * Rudi Cilibrasi : Pass the right thing to set_mac_address()
49 * Dave Miller : 32bit quantity for the device lock to make it work out
50 * on a Sparc.
51 * Bjorn Ekwall : Added KERNELD hack.
52 * Alan Cox : Cleaned up the backlog initialise.
53 * Craig Metz : SIOCGIFCONF fix if space for under
54 * 1 device.
55 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
56 * is no device open function.
57 * Andi Kleen : Fix error reporting for SIOCGIFCONF
58 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
59 * Cyrus Durgin : Cleaned for KMOD
60 * Adam Sulmicki : Bug Fix : Network Device Unload
61 * A network device unload needs to purge
62 * the backlog queue.
63 * Paul Rusty Russell : SIOCSIFNAME
64 * Pekka Riikonen : Netdev boot-time settings code
65 * Andrew Morton : Make unregister_netdevice wait indefinitely on dev->refcnt
66 * J Hadi Salim : - Backlog queue sampling
67 * - netif_rx() feedback
68 */
69
70 #include <asm/uaccess.h>
71 #include <asm/system.h>
72 #include <asm/bitops.h>
73 #include <linux/config.h>
74 #include <linux/types.h>
75 #include <linux/kernel.h>
76 #include <linux/sched.h>
77 #include <linux/string.h>
78 #include <linux/mm.h>
79 #include <linux/socket.h>
80 #include <linux/sockios.h>
81 #include <linux/errno.h>
82 #include <linux/interrupt.h>
83 #include <linux/if_ether.h>
84 #include <linux/netdevice.h>
85 #include <linux/etherdevice.h>
86 #include <linux/notifier.h>
87 #include <linux/skbuff.h>
88 #include <linux/brlock.h>
89 #include <net/sock.h>
90 #include <linux/rtnetlink.h>
91 #include <linux/proc_fs.h>
92 #include <linux/stat.h>
93 #include <linux/if_bridge.h>
94 #include <linux/divert.h>
95 #include <net/dst.h>
96 #include <net/pkt_sched.h>
97 #include <net/profile.h>
98 #include <net/checksum.h>
99 #include <linux/highmem.h>
100 #include <linux/init.h>
101 #include <linux/kmod.h>
102 #include <linux/module.h>
103 #if defined(CONFIG_NET_RADIO) || defined(CONFIG_NET_PCMCIA_RADIO)
104 #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
105 #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
106 #ifdef CONFIG_PLIP
107 extern int plip_init(void);
108 #endif
109
110 /* This define, if set, will randomly drop a packet when congestion
111 * is more than moderate. It helps fairness in the multi-interface
112 * case when one of them is a hog, but it kills performance for the
113 * single interface case so it is off now by default.
114 */
115 #undef RAND_LIE
116
117 /* Setting this will sample the queue lengths and thus congestion
118 * via a timer instead of as each packet is received.
119 */
120 #undef OFFLINE_SAMPLE
121
122 NET_PROFILE_DEFINE(dev_queue_xmit)
123 NET_PROFILE_DEFINE(softnet_process)
124
125 const char *if_port_text[] = {
126 "unknown",
127 "BNC",
128 "10baseT",
129 "AUI",
130 "100baseT",
131 "100baseTX",
132 "100baseFX"
133 };
134
135 /*
136 * The list of packet types we will receive (as opposed to discard)
137 * and the routines to invoke.
138 *
139 * Why 16. Because with 16 the only overlap we get on a hash of the
140 * low nibble of the protocol value is RARP/SNAP/X.25.
141 *
142 * 0800 IP
143 * 0001 802.3
144 * 0002 AX.25
145 * 0004 802.2
146 * 8035 RARP
147 * 0005 SNAP
148 * 0805 X.25
149 * 0806 ARP
150 * 8137 IPX
151 * 0009 Localtalk
152 * 86DD IPv6
153 */
154
155 static struct packet_type *ptype_base[16]; /* 16 way hashed list */
156 static struct packet_type *ptype_all = NULL; /* Taps */
157
158 #ifdef OFFLINE_SAMPLE
159 static void sample_queue(unsigned long dummy);
160 static struct timer_list samp_timer = { function: sample_queue };
161 #endif
162
163 #ifdef CONFIG_HOTPLUG
164 static int net_run_sbin_hotplug(struct net_device *dev, char *action);
165 #else
166 #define net_run_sbin_hotplug(dev, action) ({ 0; })
167 #endif
168
169 /*
170 * Our notifier list
171 */
172
173 static struct notifier_block *netdev_chain=NULL;
174
175 /*
176 * Device drivers call our routines to queue packets here. We empty the
177 * queue in the local softnet handler.
178 */
179 struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
180
181 #ifdef CONFIG_NET_FASTROUTE
182 int netdev_fastroute;
183 int netdev_fastroute_obstacles;
184 #endif
185
186
187 /******************************************************************************************
188
189 Protocol management and registration routines
190
191 *******************************************************************************************/
192
193 /*
194 * For efficiency
195 */
196
197 int netdev_nit=0;
198
199 /*
200 * Add a protocol ID to the list. Now that the input handler is
201 * smarter we can dispense with all the messy stuff that used to be
202 * here.
203 *
204 * BEWARE!!! Protocol handlers, mangling input packets,
205 * MUST BE last in hash buckets and checking protocol handlers
206 * MUST start from promiscous ptype_all chain in net_bh.
207 * It is true now, do not change it.
208 * Explantion follows: if protocol handler, mangling packet, will
209 * be the first on list, it is not able to sense, that packet
210 * is cloned and should be copied-on-write, so that it will
211 * change it and subsequent readers will get broken packet.
212 * --ANK (980803)
213 */
214
215 /**
216 * dev_add_pack - add packet handler
217 * @pt: packet type declaration
218 *
219 * Add a protocol handler to the networking stack. The passed &packet_type
220 * is linked into kernel lists and may not be freed until it has been
221 * removed from the kernel lists.
222 */
223
224 void dev_add_pack(struct packet_type *pt)
225 {
226 int hash;
227
228 br_write_lock_bh(BR_NETPROTO_LOCK);
229
230 #ifdef CONFIG_NET_FASTROUTE
231 /* Hack to detect packet socket */
232 if (pt->data) {
233 netdev_fastroute_obstacles++;
234 dev_clear_fastroute(pt->dev);
235 }
236 #endif
237 if (pt->type == htons(ETH_P_ALL)) {
238 netdev_nit++;
239 pt->next=ptype_all;
240 ptype_all=pt;
241 } else {
242 hash=ntohs(pt->type)&15;
243 pt->next = ptype_base[hash];
244 ptype_base[hash] = pt;
245 }
246 br_write_unlock_bh(BR_NETPROTO_LOCK);
247 }
248
249
250 /**
251 * dev_remove_pack - remove packet handler
252 * @pt: packet type declaration
253 *
254 * Remove a protocol handler that was previously added to the kernel
255 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
256 * from the kernel lists and can be freed or reused once this function
257 * returns.
258 */
259
260 void dev_remove_pack(struct packet_type *pt)
261 {
262 struct packet_type **pt1;
263
264 br_write_lock_bh(BR_NETPROTO_LOCK);
265
266 if (pt->type == htons(ETH_P_ALL)) {
267 netdev_nit--;
268 pt1=&ptype_all;
269 } else {
270 pt1=&ptype_base[ntohs(pt->type)&15];
271 }
272
273 for (; (*pt1) != NULL; pt1 = &((*pt1)->next)) {
274 if (pt == (*pt1)) {
275 *pt1 = pt->next;
276 #ifdef CONFIG_NET_FASTROUTE
277 if (pt->data)
278 netdev_fastroute_obstacles--;
279 #endif
280 br_write_unlock_bh(BR_NETPROTO_LOCK);
281 return;
282 }
283 }
284 br_write_unlock_bh(BR_NETPROTO_LOCK);
285 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
286 }
287
288 /******************************************************************************
289
290 Device Boot-time Settings Routines
291
292 *******************************************************************************/
293
294 /* Boot time configuration table */
295 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
296
297 /**
298 * netdev_boot_setup_add - add new setup entry
299 * @name: name of the device
300 * @map: configured settings for the device
301 *
302 * Adds new setup entry to the dev_boot_setup list. The function
303 * returns 0 on error and 1 on success. This is a generic routine to
304 * all netdevices.
305 */
306 int netdev_boot_setup_add(char *name, struct ifmap *map)
307 {
308 struct netdev_boot_setup *s;
309 int i;
310
311 s = dev_boot_setup;
312 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
313 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
314 memset(s[i].name, 0, sizeof(s[i].name));
315 strcpy(s[i].name, name);
316 memcpy(&s[i].map, map, sizeof(s[i].map));
317 break;
318 }
319 }
320
321 if (i >= NETDEV_BOOT_SETUP_MAX)
322 return 0;
323
324 return 1;
325 }
326
327 /**
328 * netdev_boot_setup_check - check boot time settings
329 * @dev: the netdevice
330 *
331 * Check boot time settings for the device.
332 * The found settings are set for the device to be used
333 * later in the device probing.
334 * Returns 0 if no settings found, 1 if they are.
335 */
336 int netdev_boot_setup_check(struct net_device *dev)
337 {
338 struct netdev_boot_setup *s;
339 int i;
340
341 s = dev_boot_setup;
342 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
343 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
344 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
345 dev->irq = s[i].map.irq;
346 dev->base_addr = s[i].map.base_addr;
347 dev->mem_start = s[i].map.mem_start;
348 dev->mem_end = s[i].map.mem_end;
349 return 1;
350 }
351 }
352 return 0;
353 }
354
355 /*
356 * Saves at boot time configured settings for any netdevice.
357 */
358 int __init netdev_boot_setup(char *str)
359 {
360 int ints[5];
361 struct ifmap map;
362
363 str = get_options(str, ARRAY_SIZE(ints), ints);
364 if (!str || !*str)
365 return 0;
366
367 /* Save settings */
368 memset(&map, 0, sizeof(map));
369 if (ints[0] > 0)
370 map.irq = ints[1];
371 if (ints[0] > 1)
372 map.base_addr = ints[2];
373 if (ints[0] > 2)
374 map.mem_start = ints[3];
375 if (ints[0] > 3)
376 map.mem_end = ints[4];
377
378 /* Add new entry to the list */
379 return netdev_boot_setup_add(str, &map);
380 }
381
382 __setup("netdev=", netdev_boot_setup);
383
384 /*****************************************************************************************
385
386 Device Interface Subroutines
387
388 ******************************************************************************************/
389
390 /**
391 * __dev_get_by_name - find a device by its name
392 * @name: name to find
393 *
394 * Find an interface by name. Must be called under RTNL semaphore
395 * or @dev_base_lock. If the name is found a pointer to the device
396 * is returned. If the name is not found then %NULL is returned. The
397 * reference counters are not incremented so the caller must be
398 * careful with locks.
399 */
400
401
402 struct net_device *__dev_get_by_name(const char *name)
403 {
404 struct net_device *dev;
405
406 for (dev = dev_base; dev != NULL; dev = dev->next) {
407 if (strncmp(dev->name, name, IFNAMSIZ) == 0)
408 return dev;
409 }
410 return NULL;
411 }
412
413 /**
414 * dev_get_by_name - find a device by its name
415 * @name: name to find
416 *
417 * Find an interface by name. This can be called from any
418 * context and does its own locking. The returned handle has
419 * the usage count incremented and the caller must use dev_put() to
420 * release it when it is no longer needed. %NULL is returned if no
421 * matching device is found.
422 */
423
424 struct net_device *dev_get_by_name(const char *name)
425 {
426 struct net_device *dev;
427
428 read_lock(&dev_base_lock);
429 dev = __dev_get_by_name(name);
430 if (dev)
431 dev_hold(dev);
432 read_unlock(&dev_base_lock);
433 return dev;
434 }
435
436 /*
437 Return value is changed to int to prevent illegal usage in future.
438 It is still legal to use to check for device existance.
439
440 User should understand, that the result returned by this function
441 is meaningless, if it was not issued under rtnl semaphore.
442 */
443
444 /**
445 * dev_get - test if a device exists
446 * @name: name to test for
447 *
448 * Test if a name exists. Returns true if the name is found. In order
449 * to be sure the name is not allocated or removed during the test the
450 * caller must hold the rtnl semaphore.
451 *
452 * This function primarily exists for back compatibility with older
453 * drivers.
454 */
455
456 int dev_get(const char *name)
457 {
458 struct net_device *dev;
459
460 read_lock(&dev_base_lock);
461 dev = __dev_get_by_name(name);
462 read_unlock(&dev_base_lock);
463 return dev != NULL;
464 }
465
466 /**
467 * __dev_get_by_index - find a device by its ifindex
468 * @ifindex: index of device
469 *
470 * Search for an interface by index. Returns %NULL if the device
471 * is not found or a pointer to the device. The device has not
472 * had its reference counter increased so the caller must be careful
473 * about locking. The caller must hold either the RTNL semaphore
474 * or @dev_base_lock.
475 */
476
477 struct net_device * __dev_get_by_index(int ifindex)
478 {
479 struct net_device *dev;
480
481 for (dev = dev_base; dev != NULL; dev = dev->next) {
482 if (dev->ifindex == ifindex)
483 return dev;
484 }
485 return NULL;
486 }
487
488
489 /**
490 * dev_get_by_index - find a device by its ifindex
491 * @ifindex: index of device
492 *
493 * Search for an interface by index. Returns NULL if the device
494 * is not found or a pointer to the device. The device returned has
495 * had a reference added and the pointer is safe until the user calls
496 * dev_put to indicate they have finished with it.
497 */
498
499 struct net_device * dev_get_by_index(int ifindex)
500 {
501 struct net_device *dev;
502
503 read_lock(&dev_base_lock);
504 dev = __dev_get_by_index(ifindex);
505 if (dev)
506 dev_hold(dev);
507 read_unlock(&dev_base_lock);
508 return dev;
509 }
510
511 /**
512 * dev_getbyhwaddr - find a device by its hardware address
513 * @type: media type of device
514 * @ha: hardware address
515 *
516 * Search for an interface by MAC address. Returns NULL if the device
517 * is not found or a pointer to the device. The caller must hold the
518 * rtnl semaphore. The returned device has not had its ref count increased
519 * and the caller must therefore be careful about locking
520 *
521 * BUGS:
522 * If the API was consistent this would be __dev_get_by_hwaddr
523 */
524
525 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
526 {
527 struct net_device *dev;
528
529 ASSERT_RTNL();
530
531 for (dev = dev_base; dev != NULL; dev = dev->next) {
532 if (dev->type == type &&
533 memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
534 return dev;
535 }
536 return NULL;
537 }
538
539 /**
540 * dev_alloc_name - allocate a name for a device
541 * @dev: device
542 * @name: name format string
543 *
544 * Passed a format string - eg "lt%d" it will try and find a suitable
545 * id. Not efficient for many devices, not called a lot. The caller
546 * must hold the dev_base or rtnl lock while allocating the name and
547 * adding the device in order to avoid duplicates. Returns the number
548 * of the unit assigned or a negative errno code.
549 */
550
551 int dev_alloc_name(struct net_device *dev, const char *name)
552 {
553 int i;
554 char buf[32];
555
556 /*
557 * If you need over 100 please also fix the algorithm...
558 */
559 for (i = 0; i < 100; i++) {
560 sprintf(buf,name,i);
561 if (__dev_get_by_name(buf) == NULL) {
562 strcpy(dev->name, buf);
563 return i;
564 }
565 }
566 return -ENFILE; /* Over 100 of the things .. bail out! */
567 }
568
569 /**
570 * dev_alloc - allocate a network device and name
571 * @name: name format string
572 * @err: error return pointer
573 *
574 * Passed a format string, eg. "lt%d", it will allocate a network device
575 * and space for the name. %NULL is returned if no memory is available.
576 * If the allocation succeeds then the name is assigned and the
577 * device pointer returned. %NULL is returned if the name allocation
578 * failed. The cause of an error is returned as a negative errno code
579 * in the variable @err points to.
580 *
581 * The caller must hold the @dev_base or RTNL locks when doing this in
582 * order to avoid duplicate name allocations.
583 */
584
585 struct net_device *dev_alloc(const char *name, int *err)
586 {
587 struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL);
588 if (dev == NULL) {
589 *err = -ENOBUFS;
590 return NULL;
591 }
592 memset(dev, 0, sizeof(struct net_device));
593 *err = dev_alloc_name(dev, name);
594 if (*err < 0) {
595 kfree(dev);
596 return NULL;
597 }
598 return dev;
599 }
600
601 /**
602 * netdev_state_change - device changes state
603 * @dev: device to cause notification
604 *
605 * Called to indicate a device has changed state. This function calls
606 * the notifier chains for netdev_chain and sends a NEWLINK message
607 * to the routing socket.
608 */
609
610 void netdev_state_change(struct net_device *dev)
611 {
612 if (dev->flags&IFF_UP) {
613 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
614 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
615 }
616 }
617
618
619 #ifdef CONFIG_KMOD
620
621 /**
622 * dev_load - load a network module
623 * @name: name of interface
624 *
625 * If a network interface is not present and the process has suitable
626 * privileges this function loads the module. If module loading is not
627 * available in this kernel then it becomes a nop.
628 */
629
630 void dev_load(const char *name)
631 {
632 if (!dev_get(name) && capable(CAP_SYS_MODULE))
633 request_module(name);
634 }
635
636 #else
637
638 extern inline void dev_load(const char *unused){;}
639
640 #endif
641
642 static int default_rebuild_header(struct sk_buff *skb)
643 {
644 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
645 kfree_skb(skb);
646 return 1;
647 }
648
649 /**
650 * dev_open - prepare an interface for use.
651 * @dev: device to open
652 *
653 * Takes a device from down to up state. The device's private open
654 * function is invoked and then the multicast lists are loaded. Finally
655 * the device is moved into the up state and a %NETDEV_UP message is
656 * sent to the netdev notifier chain.
657 *
658 * Calling this function on an active interface is a nop. On a failure
659 * a negative errno code is returned.
660 */
661
662 int dev_open(struct net_device *dev)
663 {
664 int ret = 0;
665
666 /*
667 * Is it already up?
668 */
669
670 if (dev->flags&IFF_UP)
671 return 0;
672
673 /*
674 * Is it even present?
675 */
676 if (!netif_device_present(dev))
677 return -ENODEV;
678
679 /*
680 * Call device private open method
681 */
682 if (try_inc_mod_count(dev->owner)) {
683 if (dev->open) {
684 ret = dev->open(dev);
685 if (ret != 0 && dev->owner)
686 __MOD_DEC_USE_COUNT(dev->owner);
687 }
688 } else {
689 ret = -ENODEV;
690 }
691
692 /*
693 * If it went open OK then:
694 */
695
696 if (ret == 0)
697 {
698 /*
699 * Set the flags.
700 */
701 dev->flags |= IFF_UP;
702
703 set_bit(__LINK_STATE_START, &dev->state);
704
705 /*
706 * Initialize multicasting status
707 */
708 dev_mc_upload(dev);
709
710 /*
711 * Wakeup transmit queue engine
712 */
713 dev_activate(dev);
714
715 /*
716 * ... and announce new interface.
717 */
718 notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
719 }
720 return(ret);
721 }
722
723 #ifdef CONFIG_NET_FASTROUTE
724
725 static void dev_do_clear_fastroute(struct net_device *dev)
726 {
727 if (dev->accept_fastpath) {
728 int i;
729
730 for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) {
731 struct dst_entry *dst;
732
733 write_lock_irq(&dev->fastpath_lock);
734 dst = dev->fastpath[i];
735 dev->fastpath[i] = NULL;
736 write_unlock_irq(&dev->fastpath_lock);
737
738 dst_release(dst);
739 }
740 }
741 }
742
743 void dev_clear_fastroute(struct net_device *dev)
744 {
745 if (dev) {
746 dev_do_clear_fastroute(dev);
747 } else {
748 read_lock(&dev_base_lock);
749 for (dev = dev_base; dev; dev = dev->next)
750 dev_do_clear_fastroute(dev);
751 read_unlock(&dev_base_lock);
752 }
753 }
754 #endif
755
756 /**
757 * dev_close - shutdown an interface.
758 * @dev: device to shutdown
759 *
760 * This function moves an active device into down state. A
761 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
762 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
763 * chain.
764 */
765
766 int dev_close(struct net_device *dev)
767 {
768 if (!(dev->flags&IFF_UP))
769 return 0;
770
771 /*
772 * Tell people we are going down, so that they can
773 * prepare to death, when device is still operating.
774 */
775 notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
776
777 dev_deactivate(dev);
778
779 clear_bit(__LINK_STATE_START, &dev->state);
780
781 /*
782 * Call the device specific close. This cannot fail.
783 * Only if device is UP
784 *
785 * We allow it to be called even after a DETACH hot-plug
786 * event.
787 */
788
789 if (dev->stop)
790 dev->stop(dev);
791
792 /*
793 * Device is now down.
794 */
795
796 dev->flags &= ~IFF_UP;
797 #ifdef CONFIG_NET_FASTROUTE
798 dev_clear_fastroute(dev);
799 #endif
800
801 /*
802 * Tell people we are down
803 */
804 notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
805
806 /*
807 * Drop the module refcount
808 */
809 if (dev->owner)
810 __MOD_DEC_USE_COUNT(dev->owner);
811
812 return(0);
813 }
814
815
816 /*
817 * Device change register/unregister. These are not inline or static
818 * as we export them to the world.
819 */
820
821 /**
822 * register_netdevice_notifier - register a network notifier block
823 * @nb: notifier
824 *
825 * Register a notifier to be called when network device events occur.
826 * The notifier passed is linked into the kernel structures and must
827 * not be reused until it has been unregistered. A negative errno code
828 * is returned on a failure.
829 */
830
831 int register_netdevice_notifier(struct notifier_block *nb)
832 {
833 return notifier_chain_register(&netdev_chain, nb);
834 }
835
836 /**
837 * unregister_netdevice_notifier - unregister a network notifier block
838 * @nb: notifier
839 *
840 * Unregister a notifier previously registered by
841 * register_netdevice_notifier(). The notifier is unlinked into the
842 * kernel structures and may then be reused. A negative errno code
843 * is returned on a failure.
844 */
845
846 int unregister_netdevice_notifier(struct notifier_block *nb)
847 {
848 return notifier_chain_unregister(&netdev_chain,nb);
849 }
850
851 /*
852 * Support routine. Sends outgoing frames to any network
853 * taps currently in use.
854 */
855
856 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
857 {
858 struct packet_type *ptype;
859 get_fast_time(&skb->stamp);
860
861 br_read_lock(BR_NETPROTO_LOCK);
862 for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next)
863 {
864 /* Never send packets back to the socket
865 * they originated from - MvS (miquels@drinkel.ow.org)
866 */
867 if ((ptype->dev == dev || !ptype->dev) &&
868 ((struct sock *)ptype->data != skb->sk))
869 {
870 struct sk_buff *skb2;
871 if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)
872 break;
873
874 /* skb->nh should be correctly
875 set by sender, so that the second statement is
876 just protection against buggy protocols.
877 */
878 skb2->mac.raw = skb2->data;
879
880 if (skb2->nh.raw < skb2->data || skb2->nh.raw > skb2->tail) {
881 if (net_ratelimit())
882 printk(KERN_DEBUG "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name);
883 skb2->nh.raw = skb2->data;
884 }
885
886 skb2->h.raw = skb2->nh.raw;
887 skb2->pkt_type = PACKET_OUTGOING;
888 ptype->func(skb2, skb->dev, ptype);
889 }
890 }
891 br_read_unlock(BR_NETPROTO_LOCK);
892 }
893
894 /* Calculate csum in the case, when packet is misrouted.
895 * If it failed by some reason, ignore and send skb with wrong
896 * checksum.
897 */
898 struct sk_buff * skb_checksum_help(struct sk_buff *skb)
899 {
900 int offset;
901 unsigned int csum;
902
903 offset = skb->h.raw - skb->data;
904 if (offset > (int)skb->len)
905 BUG();
906 csum = skb_checksum(skb, offset, skb->len-offset, 0);
907
908 offset = skb->tail - skb->h.raw;
909 if (offset <= 0)
910 BUG();
911 if (skb->csum+2 > offset)
912 BUG();
913
914 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
915 skb->ip_summed = CHECKSUM_NONE;
916 return skb;
917 }
918
919 #ifdef CONFIG_HIGHMEM
920 /* Actually, we should eliminate this check as soon as we know, that:
921 * 1. IOMMU is present and allows to map all the memory.
922 * 2. No high memory really exists on this machine.
923 */
924
925 static inline int
926 illegal_highdma(struct net_device *dev, struct sk_buff *skb)
927 {
928 int i;
929
930 if (dev->features&NETIF_F_HIGHDMA)
931 return 0;
932
933 for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
934 if (skb_shinfo(skb)->frags[i].page >= highmem_start_page)
935 return 1;
936
937 return 0;
938 }
939 #else
940 #define illegal_highdma(dev, skb) (0)
941 #endif
942
943 /**
944 * dev_queue_xmit - transmit a buffer
945 * @skb: buffer to transmit
946 *
947 * Queue a buffer for transmission to a network device. The caller must
948 * have set the device and priority and built the buffer before calling this
949 * function. The function can be called from an interrupt.
950 *
951 * A negative errno code is returned on a failure. A success does not
952 * guarantee the frame will be transmitted as it may be dropped due
953 * to congestion or traffic shaping.
954 */
955
956 int dev_queue_xmit(struct sk_buff *skb)
957 {
958 struct net_device *dev = skb->dev;
959 struct Qdisc *q;
960
961 if (skb_shinfo(skb)->frag_list &&
962 !(dev->features&NETIF_F_FRAGLIST) &&
963 skb_linearize(skb, GFP_ATOMIC) != 0) {
964 kfree_skb(skb);
965 return -ENOMEM;
966 }
967
968 /* Fragmented skb is linearized if device does not support SG,
969 * or if at least one of fragments is in highmem and device
970 * does not support DMA from it.
971 */
972 if (skb_shinfo(skb)->nr_frags &&
973 (!(dev->features&NETIF_F_SG) || illegal_highdma(dev, skb)) &&
974 skb_linearize(skb, GFP_ATOMIC) != 0) {
975 kfree_skb(skb);
976 return -ENOMEM;
977 }
978
979 /* If packet is not checksummed and device does not support
980 * checksumming for this protocol, complete checksumming here.
981 */
982 if (skb->ip_summed == CHECKSUM_HW &&
983 (!(dev->features&(NETIF_F_HW_CSUM|NETIF_F_NO_CSUM)) &&
984 (!(dev->features&NETIF_F_IP_CSUM) ||
985 skb->protocol != htons(ETH_P_IP)))) {
986 if ((skb = skb_checksum_help(skb)) == NULL)
987 return -ENOMEM;
988 }
989
990 /* Grab device queue */
991 spin_lock_bh(&dev->queue_lock);
992 q = dev->qdisc;
993 if (q->enqueue) {
994 int ret = q->enqueue(skb, q);
995
996 qdisc_run(dev);
997
998 spin_unlock_bh(&dev->queue_lock);
999 return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret;
1000 }
1001
1002 /* The device has no queue. Common case for software devices:
1003 loopback, all the sorts of tunnels...
1004
1005 Really, it is unlikely that xmit_lock protection is necessary here.
1006 (f.e. loopback and IP tunnels are clean ignoring statistics counters.)
1007 However, it is possible, that they rely on protection
1008 made by us here.
1009
1010 Check this and shot the lock. It is not prone from deadlocks.
1011 Either shot noqueue qdisc, it is even simpler 8)
1012 */
1013 if (dev->flags&IFF_UP) {
1014 int cpu = smp_processor_id();
1015
1016 if (dev->xmit_lock_owner != cpu) {
1017 spin_unlock(&dev->queue_lock);
1018 spin_lock(&dev->xmit_lock);
1019 dev->xmit_lock_owner = cpu;
1020
1021 if (!netif_queue_stopped(dev)) {
1022 if (netdev_nit)
1023 dev_queue_xmit_nit(skb,dev);
1024
1025 if (dev->hard_start_xmit(skb, dev) == 0) {
1026 dev->xmit_lock_owner = -1;
1027 spin_unlock_bh(&dev->xmit_lock);
1028 return 0;
1029 }
1030 }
1031 dev->xmit_lock_owner = -1;
1032 spin_unlock_bh(&dev->xmit_lock);
1033 if (net_ratelimit())
1034 printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
1035 kfree_skb(skb);
1036 return -ENETDOWN;
1037 } else {
1038 /* Recursion is detected! It is possible, unfortunately */
1039 if (net_ratelimit())
1040 printk(KERN_DEBUG "Dead loop on virtual device %s, fix it urgently!\n", dev->name);
1041 }
1042 }
1043 spin_unlock_bh(&dev->queue_lock);
1044
1045 kfree_skb(skb);
1046 return -ENETDOWN;
1047 }
1048
1049
1050 /*=======================================================================
1051 Receiver routines
1052 =======================================================================*/
1053
1054 int netdev_max_backlog = 300;
1055 /* These numbers are selected based on intuition and some
1056 * experimentatiom, if you have more scientific way of doing this
1057 * please go ahead and fix things.
1058 */
1059 int no_cong_thresh = 10;
1060 int no_cong = 20;
1061 int lo_cong = 100;
1062 int mod_cong = 290;
1063
1064 struct netif_rx_stats netdev_rx_stat[NR_CPUS];
1065
1066
1067 #ifdef CONFIG_NET_HW_FLOWCONTROL
1068 atomic_t netdev_dropping = ATOMIC_INIT(0);
1069 static unsigned long netdev_fc_mask = 1;
1070 unsigned long netdev_fc_xoff = 0;
1071 spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
1072
1073 static struct
1074 {
1075 void (*stimul)(struct net_device *);
1076 struct net_device *dev;
1077 } netdev_fc_slots[BITS_PER_LONG];
1078
1079 int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev))
1080 {
1081 int bit = 0;
1082 unsigned long flags;
1083
1084 spin_lock_irqsave(&netdev_fc_lock, flags);
1085 if (netdev_fc_mask != ~0UL) {
1086 bit = ffz(netdev_fc_mask);
1087 netdev_fc_slots[bit].stimul = stimul;
1088 netdev_fc_slots[bit].dev = dev;
1089 set_bit(bit, &netdev_fc_mask);
1090 clear_bit(bit, &netdev_fc_xoff);
1091 }
1092 spin_unlock_irqrestore(&netdev_fc_lock, flags);
1093 return bit;
1094 }
1095
1096 void netdev_unregister_fc(int bit)
1097 {
1098 unsigned long flags;
1099
1100 spin_lock_irqsave(&netdev_fc_lock, flags);
1101 if (bit > 0) {
1102 netdev_fc_slots[bit].stimul = NULL;
1103 netdev_fc_slots[bit].dev = NULL;
1104 clear_bit(bit, &netdev_fc_mask);
1105 clear_bit(bit, &netdev_fc_xoff);
1106 }
1107 spin_unlock_irqrestore(&netdev_fc_lock, flags);
1108 }
1109
1110 static void netdev_wakeup(void)
1111 {
1112 unsigned long xoff;
1113
1114 spin_lock(&netdev_fc_lock);
1115 xoff = netdev_fc_xoff;
1116 netdev_fc_xoff = 0;
1117 while (xoff) {
1118 int i = ffz(~xoff);
1119 xoff &= ~(1<<i);
1120 netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
1121 }
1122 spin_unlock(&netdev_fc_lock);
1123 }
1124 #endif
1125
1126 static void get_sample_stats(int cpu)
1127 {
1128 #ifdef RAND_LIE
1129 unsigned long rd;
1130 int rq;
1131 #endif
1132 int blog = softnet_data[cpu].input_pkt_queue.qlen;
1133 int avg_blog = softnet_data[cpu].avg_blog;
1134
1135 avg_blog = (avg_blog >> 1)+ (blog >> 1);
1136
1137 if (avg_blog > mod_cong) {
1138 /* Above moderate congestion levels. */
1139 softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
1140 #ifdef RAND_LIE
1141 rd = net_random();
1142 rq = rd % netdev_max_backlog;
1143 if (rq < avg_blog) /* unlucky bastard */
1144 softnet_data[cpu].cng_level = NET_RX_DROP;
1145 #endif
1146 } else if (avg_blog > lo_cong) {
1147 softnet_data[cpu].cng_level = NET_RX_CN_MOD;
1148 #ifdef RAND_LIE
1149 rd = net_random();
1150 rq = rd % netdev_max_backlog;
1151 if (rq < avg_blog) /* unlucky bastard */
1152 softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
1153 #endif
1154 } else if (avg_blog > no_cong)
1155 softnet_data[cpu].cng_level = NET_RX_CN_LOW;
1156 else /* no congestion */
1157 softnet_data[cpu].cng_level = NET_RX_SUCCESS;
1158
1159 softnet_data[cpu].avg_blog = avg_blog;
1160 }
1161
1162 #ifdef OFFLINE_SAMPLE
1163 static void sample_queue(unsigned long dummy)
1164 {
1165 /* 10 ms 0r 1ms -- i dont care -- JHS */
1166 int next_tick = 1;
1167 int cpu = smp_processor_id();
1168
1169 get_sample_stats(cpu);
1170 next_tick += jiffies;
1171 mod_timer(&samp_timer, next_tick);
1172 }
1173 #endif
1174
1175
1176 /**
1177 * netif_rx - post buffer to the network code
1178 * @skb: buffer to post
1179 *
1180 * This function receives a packet from a device driver and queues it for
1181 * the upper (protocol) levels to process. It always succeeds. The buffer
1182 * may be dropped during processing for congestion control or by the
1183 * protocol layers.
1184 *
1185 * return values:
1186 * NET_RX_SUCCESS (no congestion)
1187 * NET_RX_CN_LOW (low congestion)
1188 * NET_RX_CN_MOD (moderate congestion)
1189 * NET_RX_CN_HIGH (high congestion)
1190 * NET_RX_DROP (packet was dropped)
1191 *
1192 *
1193 */
1194
1195 int netif_rx(struct sk_buff *skb)
1196 {
1197 int this_cpu = smp_processor_id();
1198 struct softnet_data *queue;
1199 unsigned long flags;
1200
1201 if (skb->stamp.tv_sec == 0)
1202 get_fast_time(&skb->stamp);
1203
1204 /* The code is rearranged so that the path is the most
1205 short when CPU is congested, but is still operating.
1206 */
1207 queue = &softnet_data[this_cpu];
1208
1209 local_irq_save(flags);
1210
1211 netdev_rx_stat[this_cpu].total++;
1212 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1213 if (queue->input_pkt_queue.qlen) {
1214 if (queue->throttle)
1215 goto drop;
1216
1217 enqueue:
1218 dev_hold(skb->dev);
1219 __skb_queue_tail(&queue->input_pkt_queue,skb);
1220 /* Runs from irqs or BH's, no need to wake BH */
1221 cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
1222 local_irq_restore(flags);
1223 #ifndef OFFLINE_SAMPLE
1224 get_sample_stats(this_cpu);
1225 #endif
1226 return softnet_data[this_cpu].cng_level;
1227 }
1228
1229 if (queue->throttle) {
1230 queue->throttle = 0;
1231 #ifdef CONFIG_NET_HW_FLOWCONTROL
1232 if (atomic_dec_and_test(&netdev_dropping))
1233 netdev_wakeup();
1234 #endif
1235 }
1236 goto enqueue;
1237 }
1238
1239 if (queue->throttle == 0) {
1240 queue->throttle = 1;
1241 netdev_rx_stat[this_cpu].throttled++;
1242 #ifdef CONFIG_NET_HW_FLOWCONTROL
1243 atomic_inc(&netdev_dropping);
1244 #endif
1245 }
1246
1247 drop:
1248 netdev_rx_stat[this_cpu].dropped++;
1249 local_irq_restore(flags);
1250
1251 kfree_skb(skb);
1252 return NET_RX_DROP;
1253 }
1254
1255 /* Deliver skb to an old protocol, which is not threaded well
1256 or which do not understand shared skbs.
1257 */
1258 static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
1259 {
1260 static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
1261 int ret = NET_RX_DROP;
1262
1263
1264 if (!last) {
1265 skb = skb_clone(skb, GFP_ATOMIC);
1266 if (skb == NULL)
1267 return ret;
1268 }
1269 if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
1270 kfree_skb(skb);
1271 return ret;
1272 }
1273
1274 /* The assumption (correct one) is that old protocols
1275 did not depened on BHs different of NET_BH and TIMER_BH.
1276 */
1277
1278 /* Emulate NET_BH with special spinlock */
1279 spin_lock(&net_bh_lock);
1280
1281 /* Disable timers and wait for all timers completion */
1282 tasklet_disable(bh_task_vec+TIMER_BH);
1283
1284 ret = pt->func(skb, skb->dev, pt);
1285
1286 tasklet_hi_enable(bh_task_vec+TIMER_BH);
1287 spin_unlock(&net_bh_lock);
1288 return ret;
1289 }
1290
1291 /* Reparent skb to master device. This function is called
1292 * only from net_rx_action under BR_NETPROTO_LOCK. It is misuse
1293 * of BR_NETPROTO_LOCK, but it is OK for now.
1294 */
1295 static __inline__ void skb_bond(struct sk_buff *skb)
1296 {
1297 struct net_device *dev = skb->dev;
1298
1299 if (dev->master) {
1300 dev_hold(dev->master);
1301 skb->dev = dev->master;
1302 dev_put(dev);
1303 }
1304 }
1305
1306 static void net_tx_action(struct softirq_action *h)
1307 {
1308 int cpu = smp_processor_id();
1309
1310 if (softnet_data[cpu].completion_queue) {
1311 struct sk_buff *clist;
1312
1313 local_irq_disable();
1314 clist = softnet_data[cpu].completion_queue;
1315 softnet_data[cpu].completion_queue = NULL;
1316 local_irq_enable();
1317
1318 while (clist != NULL) {
1319 struct sk_buff *skb = clist;
1320 clist = clist->next;
1321
1322 BUG_TRAP(atomic_read(&skb->users) == 0);
1323 __kfree_skb(skb);
1324 }
1325 }
1326
1327 if (softnet_data[cpu].output_queue) {
1328 struct net_device *head;
1329
1330 local_irq_disable();
1331 head = softnet_data[cpu].output_queue;
1332 softnet_data[cpu].output_queue = NULL;
1333 local_irq_enable();
1334
1335 while (head != NULL) {
1336 struct net_device *dev = head;
1337 head = head->next_sched;
1338
1339 smp_mb__before_clear_bit();
1340 clear_bit(__LINK_STATE_SCHED, &dev->state);
1341
1342 if (spin_trylock(&dev->queue_lock)) {
1343 qdisc_run(dev);
1344 spin_unlock(&dev->queue_lock);
1345 } else {
1346 netif_schedule(dev);
1347 }
1348 }
1349 }
1350 }
1351
1352 /**
1353 * net_call_rx_atomic
1354 * @fn: function to call
1355 *
1356 * Make a function call that is atomic with respect to the protocol
1357 * layers.
1358 */
1359
1360 void net_call_rx_atomic(void (*fn)(void))
1361 {
1362 br_write_lock_bh(BR_NETPROTO_LOCK);
1363 fn();
1364 br_write_unlock_bh(BR_NETPROTO_LOCK);
1365 }
1366
1367 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1368 void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL;
1369 #endif
1370
1371 static __inline__ int handle_bridge(struct sk_buff *skb,
1372 struct packet_type *pt_prev)
1373 {
1374 int ret = NET_RX_DROP;
1375
1376 if (pt_prev) {
1377 if (!pt_prev->data)
1378 ret = deliver_to_old_ones(pt_prev, skb, 0);
1379 else {
1380 atomic_inc(&skb->users);
1381 ret = pt_prev->func(skb, skb->dev, pt_prev);
1382 }
1383 }
1384
1385 br_handle_frame_hook(skb);
1386 return ret;
1387 }
1388
1389
1390 #ifdef CONFIG_NET_DIVERT
1391 static inline void handle_diverter(struct sk_buff *skb)
1392 {
1393 /* if diversion is supported on device, then divert */
1394 if (skb->dev->divert && skb->dev->divert->divert)
1395 divert_frame(skb);
1396 }
1397 #endif /* CONFIG_NET_DIVERT */
1398
1399
1400 static void net_rx_action(struct softirq_action *h)
1401 {
1402 int this_cpu = smp_processor_id();
1403 struct softnet_data *queue = &softnet_data[this_cpu];
1404 unsigned long start_time = jiffies;
1405 int bugdet = netdev_max_backlog;
1406
1407 br_read_lock(BR_NETPROTO_LOCK);
1408
1409 for (;;) {
1410 struct sk_buff *skb;
1411 struct net_device *rx_dev;
1412
1413 local_irq_disable();
1414 skb = __skb_dequeue(&queue->input_pkt_queue);
1415 local_irq_enable();
1416
1417 if (skb == NULL)
1418 break;
1419
1420 skb_bond(skb);
1421
1422 rx_dev = skb->dev;
1423
1424 #ifdef CONFIG_NET_FASTROUTE
1425 if (skb->pkt_type == PACKET_FASTROUTE) {
1426 netdev_rx_stat[this_cpu].fastroute_deferred_out++;
1427 dev_queue_xmit(skb);
1428 dev_put(rx_dev);
1429 continue;
1430 }
1431 #endif
1432 skb->h.raw = skb->nh.raw = skb->data;
1433 {
1434 struct packet_type *ptype, *pt_prev;
1435 unsigned short type = skb->protocol;
1436
1437 pt_prev = NULL;
1438 for (ptype = ptype_all; ptype; ptype = ptype->next) {
1439 if (!ptype->dev || ptype->dev == skb->dev) {
1440 if (pt_prev) {
1441 if (!pt_prev->data) {
1442 deliver_to_old_ones(pt_prev, skb, 0);
1443 } else {
1444 atomic_inc(&skb->users);
1445 pt_prev->func(skb,
1446 skb->dev,
1447 pt_prev);
1448 }
1449 }
1450 pt_prev = ptype;
1451 }
1452 }
1453
1454 #ifdef CONFIG_NET_DIVERT
1455 if (skb->dev->divert && skb->dev->divert->divert)
1456 handle_diverter(skb);
1457 #endif /* CONFIG_NET_DIVERT */
1458
1459
1460 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1461 if (skb->dev->br_port != NULL &&
1462 br_handle_frame_hook != NULL) {
1463 handle_bridge(skb, pt_prev);
1464 dev_put(rx_dev);
1465 continue;
1466 }
1467 #endif
1468
1469 for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) {
1470 if (ptype->type == type &&
1471 (!ptype->dev || ptype->dev == skb->dev)) {
1472 if (pt_prev) {
1473 if (!pt_prev->data)
1474 deliver_to_old_ones(pt_prev, skb, 0);
1475 else {
1476 atomic_inc(&skb->users);
1477 pt_prev->func(skb,
1478 skb->dev,
1479 pt_prev);
1480 }
1481 }
1482 pt_prev = ptype;
1483 }
1484 }
1485
1486 if (pt_prev) {
1487 if (!pt_prev->data)
1488 deliver_to_old_ones(pt_prev, skb, 1);
1489 else
1490 pt_prev->func(skb, skb->dev, pt_prev);
1491 } else
1492 kfree_skb(skb);
1493 }
1494
1495 dev_put(rx_dev);
1496
1497 if (bugdet-- < 0 || jiffies - start_time > 1)
1498 goto softnet_break;
1499
1500 #ifdef CONFIG_NET_HW_FLOWCONTROL
1501 if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) {
1502 if (atomic_dec_and_test(&netdev_dropping)) {
1503 queue->throttle = 0;
1504 netdev_wakeup();
1505 goto softnet_break;
1506 }
1507 }
1508 #endif
1509
1510 }
1511 br_read_unlock(BR_NETPROTO_LOCK);
1512
1513 local_irq_disable();
1514 if (queue->throttle) {
1515 queue->throttle = 0;
1516 #ifdef CONFIG_NET_HW_FLOWCONTROL
1517 if (atomic_dec_and_test(&netdev_dropping))
1518 netdev_wakeup();
1519 #endif
1520 }
1521 local_irq_enable();
1522
1523 NET_PROFILE_LEAVE(softnet_process);
1524 return;
1525
1526 softnet_break:
1527 br_read_unlock(BR_NETPROTO_LOCK);
1528
1529 local_irq_disable();
1530 netdev_rx_stat[this_cpu].time_squeeze++;
1531 /* This already runs in BH context, no need to wake up BH's */
1532 cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
1533 local_irq_enable();
1534
1535 NET_PROFILE_LEAVE(softnet_process);
1536 return;
1537 }
1538
1539 static gifconf_func_t * gifconf_list [NPROTO];
1540
1541 /**
1542 * register_gifconf - register a SIOCGIF handler
1543 * @family: Address family
1544 * @gifconf: Function handler
1545 *
1546 * Register protocol dependent address dumping routines. The handler
1547 * that is passed must not be freed or reused until it has been replaced
1548 * by another handler.
1549 */
1550
1551 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1552 {
1553 if (family>=NPROTO)
1554 return -EINVAL;
1555 gifconf_list[family] = gifconf;
1556 return 0;
1557 }
1558
1559
1560 /*
1561 * Map an interface index to its name (SIOCGIFNAME)
1562 */
1563
1564 /*
1565 * We need this ioctl for efficient implementation of the
1566 * if_indextoname() function required by the IPv6 API. Without
1567 * it, we would have to search all the interfaces to find a
1568 * match. --pb
1569 */
1570
1571 static int dev_ifname(struct ifreq *arg)
1572 {
1573 struct net_device *dev;
1574 struct ifreq ifr;
1575
1576 /*
1577 * Fetch the caller's info block.
1578 */
1579
1580 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1581 return -EFAULT;
1582
1583 read_lock(&dev_base_lock);
1584 dev = __dev_get_by_index(ifr.ifr_ifindex);
1585 if (!dev) {
1586 read_unlock(&dev_base_lock);
1587 return -ENODEV;
1588 }
1589
1590 strcpy(ifr.ifr_name, dev->name);
1591 read_unlock(&dev_base_lock);
1592
1593 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1594 return -EFAULT;
1595 return 0;
1596 }
1597
1598 /*
1599 * Perform a SIOCGIFCONF call. This structure will change
1600 * size eventually, and there is nothing I can do about it.
1601 * Thus we will need a 'compatibility mode'.
1602 */
1603
1604 static int dev_ifconf(char *arg)
1605 {
1606 struct ifconf ifc;
1607 struct net_device *dev;
1608 char *pos;
1609 int len;
1610 int total;
1611 int i;
1612
1613 /*
1614 * Fetch the caller's info block.
1615 */
1616
1617 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1618 return -EFAULT;
1619
1620 pos = ifc.ifc_buf;
1621 len = ifc.ifc_len;
1622
1623 /*
1624 * Loop over the interfaces, and write an info block for each.
1625 */
1626
1627 total = 0;
1628 for (dev = dev_base; dev != NULL; dev = dev->next) {
1629 for (i=0; i<NPROTO; i++) {
1630 if (gifconf_list[i]) {
1631 int done;
1632 if (pos==NULL) {
1633 done = gifconf_list[i](dev, NULL, 0);
1634 } else {
1635 done = gifconf_list[i](dev, pos+total, len-total);
1636 }
1637 if (done<0) {
1638 return -EFAULT;
1639 }
1640 total += done;
1641 }
1642 }
1643 }
1644
1645 /*
1646 * All done. Write the updated control block back to the caller.
1647 */
1648 ifc.ifc_len = total;
1649
1650 if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
1651 return -EFAULT;
1652
1653 /*
1654 * Both BSD and Solaris return 0 here, so we do too.
1655 */
1656 return 0;
1657 }
1658
1659 /*
1660 * This is invoked by the /proc filesystem handler to display a device
1661 * in detail.
1662 */
1663
1664 #ifdef CONFIG_PROC_FS
1665
1666 static int sprintf_stats(char *buffer, struct net_device *dev)
1667 {
1668 struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL);
1669 int size;
1670
1671 if (stats)
1672 size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1673 dev->name,
1674 stats->rx_bytes,
1675 stats->rx_packets, stats->rx_errors,
1676 stats->rx_dropped + stats->rx_missed_errors,
1677 stats->rx_fifo_errors,
1678 stats->rx_length_errors + stats->rx_over_errors
1679 + stats->rx_crc_errors + stats->rx_frame_errors,
1680 stats->rx_compressed, stats->multicast,
1681 stats->tx_bytes,
1682 stats->tx_packets, stats->tx_errors, stats->tx_dropped,
1683 stats->tx_fifo_errors, stats->collisions,
1684 stats->tx_carrier_errors + stats->tx_aborted_errors
1685 + stats->tx_window_errors + stats->tx_heartbeat_errors,
1686 stats->tx_compressed);
1687 else
1688 size = sprintf(buffer, "%6s: No statistics available.\n", dev->name);
1689
1690 return size;
1691 }
1692
1693 /*
1694 * Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface
1695 * to create /proc/net/dev
1696 */
1697
1698 static int dev_get_info(char *buffer, char **start, off_t offset, int length)
1699 {
1700 int len = 0;
1701 off_t begin = 0;
1702 off_t pos = 0;
1703 int size;
1704 struct net_device *dev;
1705
1706
1707 size = sprintf(buffer,
1708 "Inter-| Receive | Transmit\n"
1709 " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n");
1710
1711 pos += size;
1712 len += size;
1713
1714
1715 read_lock(&dev_base_lock);
1716 for (dev = dev_base; dev != NULL; dev = dev->next) {
1717 size = sprintf_stats(buffer+len, dev);
1718 len += size;
1719 pos = begin + len;
1720
1721 if (pos < offset) {
1722 len = 0;
1723 begin = pos;
1724 }
1725 if (pos > offset + length)
1726 break;
1727 }
1728 read_unlock(&dev_base_lock);
1729
1730 *start = buffer + (offset - begin); /* Start of wanted data */
1731 len -= (offset - begin); /* Start slop */
1732 if (len > length)
1733 len = length; /* Ending slop */
1734 if (len < 0)
1735 len = 0;
1736 return len;
1737 }
1738
1739 static int dev_proc_stats(char *buffer, char **start, off_t offset,
1740 int length, int *eof, void *data)
1741 {
1742 int i, lcpu;
1743 int len=0;
1744
1745 for (lcpu=0; lcpu<smp_num_cpus; lcpu++) {
1746 i = cpu_logical_map(lcpu);
1747 len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
1748 netdev_rx_stat[i].total,
1749 netdev_rx_stat[i].dropped,
1750 netdev_rx_stat[i].time_squeeze,
1751 netdev_rx_stat[i].throttled,
1752 netdev_rx_stat[i].fastroute_hit,
1753 netdev_rx_stat[i].fastroute_success,
1754 netdev_rx_stat[i].fastroute_defer,
1755 netdev_rx_stat[i].fastroute_deferred_out,
1756 #if 0
1757 netdev_rx_stat[i].fastroute_latency_reduction
1758 #else
1759 netdev_rx_stat[i].cpu_collision
1760 #endif
1761 );
1762 }
1763
1764 len -= offset;
1765
1766 if (len > length)
1767 len = length;
1768 if (len < 0)
1769 len = 0;
1770
1771 *start = buffer + offset;
1772 *eof = 1;
1773
1774 return len;
1775 }
1776
1777 #endif /* CONFIG_PROC_FS */
1778
1779
1780 #ifdef WIRELESS_EXT
1781 #ifdef CONFIG_PROC_FS
1782
1783 /*
1784 * Print one entry of /proc/net/wireless
1785 * This is a clone of /proc/net/dev (just above)
1786 */
1787 static int sprintf_wireless_stats(char *buffer, struct net_device *dev)
1788 {
1789 /* Get stats from the driver */
1790 struct iw_statistics *stats = (dev->get_wireless_stats ?
1791 dev->get_wireless_stats(dev) :
1792 (struct iw_statistics *) NULL);
1793 int size;
1794
1795 if (stats != (struct iw_statistics *) NULL) {
1796 size = sprintf(buffer,
1797 "%6s: %04x %3d%c %3d%c %3d%c %6d %6d %6d\n",
1798 dev->name,
1799 stats->status,
1800 stats->qual.qual,
1801 stats->qual.updated & 1 ? '.' : ' ',
1802 stats->qual.level,
1803 stats->qual.updated & 2 ? '.' : ' ',
1804 stats->qual.noise,
1805 stats->qual.updated & 4 ? '.' : ' ',
1806 stats->discard.nwid,
1807 stats->discard.code,
1808 stats->discard.misc);
1809 stats->qual.updated = 0;
1810 }
1811 else
1812 size = 0;
1813
1814 return size;
1815 }
1816
1817 /*
1818 * Print info for /proc/net/wireless (print all entries)
1819 * This is a clone of /proc/net/dev (just above)
1820 */
1821 static int dev_get_wireless_info(char * buffer, char **start, off_t offset,
1822 int length)
1823 {
1824 int len = 0;
1825 off_t begin = 0;
1826 off_t pos = 0;
1827 int size;
1828
1829 struct net_device * dev;
1830
1831 size = sprintf(buffer,
1832 "Inter-| sta-| Quality | Discarded packets\n"
1833 " face | tus | link level noise | nwid crypt misc\n"
1834 );
1835
1836 pos += size;
1837 len += size;
1838
1839 read_lock(&dev_base_lock);
1840 for (dev = dev_base; dev != NULL; dev = dev->next) {
1841 size = sprintf_wireless_stats(buffer + len, dev);
1842 len += size;
1843 pos = begin + len;
1844
1845 if (pos < offset) {
1846 len = 0;
1847 begin = pos;
1848 }
1849 if (pos > offset + length)
1850 break;
1851 }
1852 read_unlock(&dev_base_lock);
1853
1854 *start = buffer + (offset - begin); /* Start of wanted data */
1855 len -= (offset - begin); /* Start slop */
1856 if (len > length)
1857 len = length; /* Ending slop */
1858 if (len < 0)
1859 len = 0;
1860
1861 return len;
1862 }
1863 #endif /* CONFIG_PROC_FS */
1864 #endif /* WIRELESS_EXT */
1865
1866 /**
1867 * netdev_set_master - set up master/slave pair
1868 * @slave: slave device
1869 * @master: new master device
1870 *
1871 * Changes the master device of the slave. Pass %NULL to break the
1872 * bonding. The caller must hold the RTNL semaphore. On a failure
1873 * a negative errno code is returned. On success the reference counts
1874 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
1875 * function returns zero.
1876 */
1877
1878 int netdev_set_master(struct net_device *slave, struct net_device *master)
1879 {
1880 struct net_device *old = slave->master;
1881
1882 ASSERT_RTNL();
1883
1884 if (master) {
1885 if (old)
1886 return -EBUSY;
1887 dev_hold(master);
1888 }
1889
1890 br_write_lock_bh(BR_NETPROTO_LOCK);
1891 slave->master = master;
1892 br_write_unlock_bh(BR_NETPROTO_LOCK);
1893
1894 if (old)
1895 dev_put(old);
1896
1897 if (master)
1898 slave->flags |= IFF_SLAVE;
1899 else
1900 slave->flags &= ~IFF_SLAVE;
1901
1902 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
1903 return 0;
1904 }
1905
1906 /**
1907 * dev_set_promiscuity - update promiscuity count on a device
1908 * @dev: device
1909 * @inc: modifier
1910 *
1911 * Add or remove promsicuity from a device. While the count in the device
1912 * remains above zero the interface remains promiscuous. Once it hits zero
1913 * the device reverts back to normal filtering operation. A negative inc
1914 * value is used to drop promiscuity on the device.
1915 */
1916
1917 void dev_set_promiscuity(struct net_device *dev, int inc)
1918 {
1919 unsigned short old_flags = dev->flags;
1920
1921 dev->flags |= IFF_PROMISC;
1922 if ((dev->promiscuity += inc) == 0)
1923 dev->flags &= ~IFF_PROMISC;
1924 if (dev->flags^old_flags) {
1925 #ifdef CONFIG_NET_FASTROUTE
1926 if (dev->flags&IFF_PROMISC) {
1927 netdev_fastroute_obstacles++;
1928 dev_clear_fastroute(dev);
1929 } else
1930 netdev_fastroute_obstacles--;
1931 #endif
1932 dev_mc_upload(dev);
1933 printk(KERN_INFO "device %s %s promiscuous mode\n",
1934 dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
1935 }
1936 }
1937
1938 /**
1939 * dev_set_allmulti - update allmulti count on a device
1940 * @dev: device
1941 * @inc: modifier
1942 *
1943 * Add or remove reception of all multicast frames to a device. While the
1944 * count in the device remains above zero the interface remains listening
1945 * to all interfaces. Once it hits zero the device reverts back to normal
1946 * filtering operation. A negative @inc value is used to drop the counter
1947 * when releasing a resource needing all multicasts.
1948 */
1949
1950 void dev_set_allmulti(struct net_device *dev, int inc)
1951 {
1952 unsigned short old_flags = dev->flags;
1953
1954 dev->flags |= IFF_ALLMULTI;
1955 if ((dev->allmulti += inc) == 0)
1956 dev->flags &= ~IFF_ALLMULTI;
1957 if (dev->flags^old_flags)
1958 dev_mc_upload(dev);
1959 }
1960
1961 int dev_change_flags(struct net_device *dev, unsigned flags)
1962 {
1963 int ret;
1964 int old_flags = dev->flags;
1965
1966 /*
1967 * Set the flags on our device.
1968 */
1969
1970 dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC|
1971 IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
1972 (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
1973
1974 /*
1975 * Load in the correct multicast list now the flags have changed.
1976 */
1977
1978 dev_mc_upload(dev);
1979
1980 /*
1981 * Have we downed the interface. We handle IFF_UP ourselves
1982 * according to user attempts to set it, rather than blindly
1983 * setting it.
1984 */
1985
1986 ret = 0;
1987 if ((old_flags^flags)&IFF_UP) /* Bit is different ? */
1988 {
1989 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
1990
1991 if (ret == 0)
1992 dev_mc_upload(dev);
1993 }
1994
1995 if (dev->flags&IFF_UP &&
1996 ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
1997 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
1998
1999 if ((flags^dev->gflags)&IFF_PROMISC) {
2000 int inc = (flags&IFF_PROMISC) ? +1 : -1;
2001 dev->gflags ^= IFF_PROMISC;
2002 dev_set_promiscuity(dev, inc);
2003 }
2004
2005 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2006 is important. Some (broken) drivers set IFF_PROMISC, when
2007 IFF_ALLMULTI is requested not asking us and not reporting.
2008 */
2009 if ((flags^dev->gflags)&IFF_ALLMULTI) {
2010 int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
2011 dev->gflags ^= IFF_ALLMULTI;
2012 dev_set_allmulti(dev, inc);
2013 }
2014
2015 if (old_flags^dev->flags)
2016 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags);
2017
2018 return ret;
2019 }
2020
2021 /*
2022 * Perform the SIOCxIFxxx calls.
2023 */
2024
2025 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2026 {
2027 struct net_device *dev;
2028 int err;
2029
2030 if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
2031 return -ENODEV;
2032
2033 switch(cmd)
2034 {
2035 case SIOCGIFFLAGS: /* Get interface flags */
2036 ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING))
2037 |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
2038 if (netif_running(dev) && netif_carrier_ok(dev))
2039 ifr->ifr_flags |= IFF_RUNNING;
2040 return 0;
2041
2042 case SIOCSIFFLAGS: /* Set interface flags */
2043 return dev_change_flags(dev, ifr->ifr_flags);
2044
2045 case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */
2046 ifr->ifr_metric = 0;
2047 return 0;
2048
2049 case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */
2050 return -EOPNOTSUPP;
2051
2052 case SIOCGIFMTU: /* Get the MTU of a device */
2053 ifr->ifr_mtu = dev->mtu;
2054 return 0;
2055
2056 case SIOCSIFMTU: /* Set the MTU of a device */
2057 if (ifr->ifr_mtu == dev->mtu)
2058 return 0;
2059
2060 /*
2061 * MTU must be positive.
2062 */
2063
2064 if (ifr->ifr_mtu<0)
2065 return -EINVAL;
2066
2067 if (!netif_device_present(dev))
2068 return -ENODEV;
2069
2070 if (dev->change_mtu)
2071 err = dev->change_mtu(dev, ifr->ifr_mtu);
2072 else {
2073 dev->mtu = ifr->ifr_mtu;
2074 err = 0;
2075 }
2076 if (!err && dev->flags&IFF_UP)
2077 notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
2078 return err;
2079
2080 case SIOCGIFHWADDR:
2081 memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
2082 ifr->ifr_hwaddr.sa_family=dev->type;
2083 return 0;
2084
2085 case SIOCSIFHWADDR:
2086 if (dev->set_mac_address == NULL)
2087 return -EOPNOTSUPP;
2088 if (ifr->ifr_hwaddr.sa_family!=dev->type)
2089 return -EINVAL;
2090 if (!netif_device_present(dev))
2091 return -ENODEV;
2092 err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
2093 if (!err)
2094 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2095 return err;
2096
2097 case SIOCSIFHWBROADCAST:
2098 if (ifr->ifr_hwaddr.sa_family!=dev->type)
2099 return -EINVAL;
2100 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
2101 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2102 return 0;
2103
2104 case SIOCGIFMAP:
2105 ifr->ifr_map.mem_start=dev->mem_start;
2106 ifr->ifr_map.mem_end=dev->mem_end;
2107 ifr->ifr_map.base_addr=dev->base_addr;
2108 ifr->ifr_map.irq=dev->irq;
2109 ifr->ifr_map.dma=dev->dma;
2110 ifr->ifr_map.port=dev->if_port;
2111 return 0;
2112
2113 case SIOCSIFMAP:
2114 if (dev->set_config) {
2115 if (!netif_device_present(dev))
2116 return -ENODEV;
2117 return dev->set_config(dev,&ifr->ifr_map);
2118 }
2119 return -EOPNOTSUPP;
2120
2121 case SIOCADDMULTI:
2122 if (dev->set_multicast_list == NULL ||
2123 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2124 return -EINVAL;
2125 if (!netif_device_present(dev))
2126 return -ENODEV;
2127 dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
2128 return 0;
2129
2130 case SIOCDELMULTI:
2131 if (dev->set_multicast_list == NULL ||
2132 ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
2133 return -EINVAL;
2134 if (!netif_device_present(dev))
2135 return -ENODEV;
2136 dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
2137 return 0;
2138
2139 case SIOCGIFINDEX:
2140 ifr->ifr_ifindex = dev->ifindex;
2141 return 0;
2142
2143 case SIOCGIFTXQLEN:
2144 ifr->ifr_qlen = dev->tx_queue_len;
2145 return 0;
2146
2147 case SIOCSIFTXQLEN:
2148 if (ifr->ifr_qlen<0)
2149 return -EINVAL;
2150 dev->tx_queue_len = ifr->ifr_qlen;
2151 return 0;
2152
2153 case SIOCSIFNAME:
2154 if (dev->flags&IFF_UP)
2155 return -EBUSY;
2156 if (__dev_get_by_name(ifr->ifr_newname))
2157 return -EEXIST;
2158 memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
2159 dev->name[IFNAMSIZ-1] = 0;
2160 notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
2161 return 0;
2162
2163 /*
2164 * Unknown or private ioctl
2165 */
2166
2167 default:
2168 if ((cmd >= SIOCDEVPRIVATE &&
2169 cmd <= SIOCDEVPRIVATE + 15) ||
2170 cmd == SIOCETHTOOL ||
2171 cmd == SIOCGMIIPHY ||
2172 cmd == SIOCGMIIREG ||
2173 cmd == SIOCSMIIREG) {
2174 if (dev->do_ioctl) {
2175 if (!netif_device_present(dev))
2176 return -ENODEV;
2177 return dev->do_ioctl(dev, ifr, cmd);
2178 }
2179 return -EOPNOTSUPP;
2180 }
2181
2182 #ifdef WIRELESS_EXT
2183 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2184 if (dev->do_ioctl) {
2185 if (!netif_device_present(dev))
2186 return -ENODEV;
2187 return dev->do_ioctl(dev, ifr, cmd);
2188 }
2189 return -EOPNOTSUPP;
2190 }
2191 #endif /* WIRELESS_EXT */
2192
2193 }
2194 return -EINVAL;
2195 }
2196
2197 /*
2198 * This function handles all "interface"-type I/O control requests. The actual
2199 * 'doing' part of this is dev_ifsioc above.
2200 */
2201
2202 /**
2203 * dev_ioctl - network device ioctl
2204 * @cmd: command to issue
2205 * @arg: pointer to a struct ifreq in user space
2206 *
2207 * Issue ioctl functions to devices. This is normally called by the
2208 * user space syscall interfaces but can sometimes be useful for
2209 * other purposes. The return value is the return from the syscall if
2210 * positive or a negative errno code on error.
2211 */
2212
2213 int dev_ioctl(unsigned int cmd, void *arg)
2214 {
2215 struct ifreq ifr;
2216 int ret;
2217 char *colon;
2218
2219 /* One special case: SIOCGIFCONF takes ifconf argument
2220 and requires shared lock, because it sleeps writing
2221 to user space.
2222 */
2223
2224 if (cmd == SIOCGIFCONF) {
2225 rtnl_shlock();
2226 ret = dev_ifconf((char *) arg);
2227 rtnl_shunlock();
2228 return ret;
2229 }
2230 if (cmd == SIOCGIFNAME) {
2231 return dev_ifname((struct ifreq *)arg);
2232 }
2233
2234 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2235 return -EFAULT;
2236
2237 ifr.ifr_name[IFNAMSIZ-1] = 0;
2238
2239 colon = strchr(ifr.ifr_name, ':');
2240 if (colon)
2241 *colon = 0;
2242
2243 /*
2244 * See which interface the caller is talking about.
2245 */
2246
2247 switch(cmd)
2248 {
2249 /*
2250 * These ioctl calls:
2251 * - can be done by all.
2252 * - atomic and do not require locking.
2253 * - return a value
2254 */
2255
2256 case SIOCGIFFLAGS:
2257 case SIOCGIFMETRIC:
2258 case SIOCGIFMTU:
2259 case SIOCGIFHWADDR:
2260 case SIOCGIFSLAVE:
2261 case SIOCGIFMAP:
2262 case SIOCGIFINDEX:
2263 case SIOCGIFTXQLEN:
2264 dev_load(ifr.ifr_name);
2265 read_lock(&dev_base_lock);
2266 ret = dev_ifsioc(&ifr, cmd);
2267 read_unlock(&dev_base_lock);
2268 if (!ret) {
2269 if (colon)
2270 *colon = ':';
2271 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2272 return -EFAULT;
2273 }
2274 return ret;
2275
2276 /*
2277 * These ioctl calls:
2278 * - require superuser power.
2279 * - require strict serialization.
2280 * - return a value
2281 */
2282
2283 case SIOCETHTOOL:
2284 case SIOCGMIIPHY:
2285 case SIOCGMIIREG:
2286 if (!capable(CAP_NET_ADMIN))
2287 return -EPERM;
2288 dev_load(ifr.ifr_name);
2289 dev_probe_lock();
2290 rtnl_lock();
2291 ret = dev_ifsioc(&ifr, cmd);
2292 rtnl_unlock();
2293 dev_probe_unlock();
2294 if (!ret) {
2295 if (colon)
2296 *colon = ':';
2297 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2298 return -EFAULT;
2299 }
2300 return ret;
2301
2302 /*
2303 * These ioctl calls:
2304 * - require superuser power.
2305 * - require strict serialization.
2306 * - do not return a value
2307 */
2308
2309 case SIOCSIFFLAGS:
2310 case SIOCSIFMETRIC:
2311 case SIOCSIFMTU:
2312 case SIOCSIFMAP:
2313 case SIOCSIFHWADDR:
2314 case SIOCSIFSLAVE:
2315 case SIOCADDMULTI:
2316 case SIOCDELMULTI:
2317 case SIOCSIFHWBROADCAST:
2318 case SIOCSIFTXQLEN:
2319 case SIOCSIFNAME:
2320 case SIOCSMIIREG:
2321 if (!capable(CAP_NET_ADMIN))
2322 return -EPERM;
2323 dev_load(ifr.ifr_name);
2324 dev_probe_lock();
2325 rtnl_lock();
2326 ret = dev_ifsioc(&ifr, cmd);
2327 rtnl_unlock();
2328 dev_probe_unlock();
2329 return ret;
2330
2331 case SIOCGIFMEM:
2332 /* Get the per device memory space. We can add this but currently
2333 do not support it */
2334 case SIOCSIFMEM:
2335 /* Set the per device memory buffer space. Not applicable in our case */
2336 case SIOCSIFLINK:
2337 return -EINVAL;
2338
2339 /*
2340 * Unknown or private ioctl.
2341 */
2342
2343 default:
2344 if (cmd >= SIOCDEVPRIVATE &&
2345 cmd <= SIOCDEVPRIVATE + 15) {
2346 dev_load(ifr.ifr_name);
2347 dev_probe_lock();
2348 rtnl_lock();
2349 ret = dev_ifsioc(&ifr, cmd);
2350 rtnl_unlock();
2351 dev_probe_unlock();
2352 if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2353 return -EFAULT;
2354 return ret;
2355 }
2356 #ifdef WIRELESS_EXT
2357 /* Take care of Wireless Extensions */
2358 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2359 /* If command is `set a parameter', or
2360 * `get the encoding parameters', check if
2361 * the user has the right to do it */
2362 if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) {
2363 if(!capable(CAP_NET_ADMIN))
2364 return -EPERM;
2365 }
2366 dev_load(ifr.ifr_name);
2367 rtnl_lock();
2368 ret = dev_ifsioc(&ifr, cmd);
2369 rtnl_unlock();
2370 if (!ret && IW_IS_GET(cmd) &&
2371 copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2372 return -EFAULT;
2373 return ret;
2374 }
2375 #endif /* WIRELESS_EXT */
2376 return -EINVAL;
2377 }
2378 }
2379
2380
2381 /**
2382 * dev_new_index - allocate an ifindex
2383 *
2384 * Returns a suitable unique value for a new device interface
2385 * number. The caller must hold the rtnl semaphore or the
2386 * dev_base_lock to be sure it remains unique.
2387 */
2388
2389 int dev_new_index(void)
2390 {
2391 static int ifindex;
2392 for (;;) {
2393 if (++ifindex <= 0)
2394 ifindex=1;
2395 if (__dev_get_by_index(ifindex) == NULL)
2396 return ifindex;
2397 }
2398 }
2399
2400 static int dev_boot_phase = 1;
2401
2402 /**
2403 * register_netdevice - register a network device
2404 * @dev: device to register
2405 *
2406 * Take a completed network device structure and add it to the kernel
2407 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2408 * chain. 0 is returned on success. A negative errno code is returned
2409 * on a failure to set up the device, or if the name is a duplicate.
2410 *
2411 * Callers must hold the rtnl semaphore. See the comment at the
2412 * end of Space.c for details about the locking. You may want
2413 * register_netdev() instead of this.
2414 *
2415 * BUGS:
2416 * The locking appears insufficient to guarantee two parallel registers
2417 * will not get the same name.
2418 */
2419
2420 int net_dev_init(void);
2421
2422 int register_netdevice(struct net_device *dev)
2423 {
2424 struct net_device *d, **dp;
2425 #ifdef CONFIG_NET_DIVERT
2426 int ret;
2427 #endif
2428
2429 spin_lock_init(&dev->queue_lock);
2430 spin_lock_init(&dev->xmit_lock);
2431 dev->xmit_lock_owner = -1;
2432 #ifdef CONFIG_NET_FASTROUTE
2433 dev->fastpath_lock=RW_LOCK_UNLOCKED;
2434 #endif
2435
2436 if (dev_boot_phase)
2437 net_dev_init();
2438
2439 #ifdef CONFIG_NET_DIVERT
2440 ret = alloc_divert_blk(dev);
2441 if (ret)
2442 return ret;
2443 #endif /* CONFIG_NET_DIVERT */
2444
2445 dev->iflink = -1;
2446
2447 /* Init, if this function is available */
2448 if (dev->init && dev->init(dev) != 0) {
2449 #ifdef CONFIG_NET_DIVERT
2450 free_divert_blk(dev);
2451 #endif
2452 return -EIO;
2453 }
2454
2455 dev->ifindex = dev_new_index();
2456 if (dev->iflink == -1)
2457 dev->iflink = dev->ifindex;
2458
2459 /* Check for existence, and append to tail of chain */
2460 for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
2461 if (d == dev || strcmp(d->name, dev->name) == 0) {
2462 #ifdef CONFIG_NET_DIVERT
2463 free_divert_blk(dev);
2464 #endif
2465 return -EEXIST;
2466 }
2467 }
2468 /*
2469 * nil rebuild_header routine,
2470 * that should be never called and used as just bug trap.
2471 */
2472
2473 if (dev->rebuild_header == NULL)
2474 dev->rebuild_header = default_rebuild_header;
2475
2476 /*
2477 * Default initial state at registry is that the
2478 * device is present.
2479 */
2480
2481 set_bit(__LINK_STATE_PRESENT, &dev->state);
2482
2483 dev->next = NULL;
2484 dev_init_scheduler(dev);
2485 write_lock_bh(&dev_base_lock);
2486 *dp = dev;
2487 dev_hold(dev);
2488 dev->deadbeaf = 0;
2489 write_unlock_bh(&dev_base_lock);
2490
2491 /* Notify protocols, that a new device appeared. */
2492 notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2493
2494 net_run_sbin_hotplug(dev, "register");
2495
2496 return 0;
2497 }
2498
2499 /**
2500 * netdev_finish_unregister - complete unregistration
2501 * @dev: device
2502 *
2503 * Destroy and free a dead device. A value of zero is returned on
2504 * success.
2505 */
2506
2507 int netdev_finish_unregister(struct net_device *dev)
2508 {
2509 BUG_TRAP(dev->ip_ptr==NULL);
2510 BUG_TRAP(dev->ip6_ptr==NULL);
2511 BUG_TRAP(dev->dn_ptr==NULL);
2512
2513 if (!dev->deadbeaf) {
2514 printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name);
2515 return 0;
2516 }
2517 #ifdef NET_REFCNT_DEBUG
2518 printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name,
2519 (dev->features & NETIF_F_DYNALLOC)?"":", old style");
2520 #endif
2521 if (dev->destructor)
2522 dev->destructor(dev);
2523 if (dev->features & NETIF_F_DYNALLOC)
2524 kfree(dev);
2525 return 0;
2526 }
2527
2528 /**
2529 * unregister_netdevice - remove device from the kernel
2530 * @dev: device
2531 *
2532 * This function shuts down a device interface and removes it
2533 * from the kernel tables. On success 0 is returned, on a failure
2534 * a negative errno code is returned.
2535 *
2536 * Callers must hold the rtnl semaphore. See the comment at the
2537 * end of Space.c for details about the locking. You may want
2538 * unregister_netdev() instead of this.
2539 */
2540
2541 int unregister_netdevice(struct net_device *dev)
2542 {
2543 unsigned long now, warning_time;
2544 struct net_device *d, **dp;
2545
2546 /* If device is running, close it first. */
2547 if (dev->flags & IFF_UP)
2548 dev_close(dev);
2549
2550 BUG_TRAP(dev->deadbeaf==0);
2551 dev->deadbeaf = 1;
2552
2553 /* And unlink it from device chain. */
2554 for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
2555 if (d == dev) {
2556 write_lock_bh(&dev_base_lock);
2557 *dp = d->next;
2558 write_unlock_bh(&dev_base_lock);
2559 break;
2560 }
2561 }
2562 if (d == NULL) {
2563 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev);
2564 return -ENODEV;
2565 }
2566
2567 /* Synchronize to net_rx_action. */
2568 br_write_lock_bh(BR_NETPROTO_LOCK);
2569 br_write_unlock_bh(BR_NETPROTO_LOCK);
2570
2571 if (dev_boot_phase == 0) {
2572 #ifdef CONFIG_NET_FASTROUTE
2573 dev_clear_fastroute(dev);
2574 #endif
2575
2576 /* Shutdown queueing discipline. */
2577 dev_shutdown(dev);
2578
2579 net_run_sbin_hotplug(dev, "unregister");
2580
2581 /* Notify protocols, that we are about to destroy
2582 this device. They should clean all the things.
2583 */
2584 notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
2585
2586 /*
2587 * Flush the multicast chain
2588 */
2589 dev_mc_discard(dev);
2590 }
2591
2592 if (dev->uninit)
2593 dev->uninit(dev);
2594
2595 /* Notifier chain MUST detach us from master device. */
2596 BUG_TRAP(dev->master==NULL);
2597
2598 #ifdef CONFIG_NET_DIVERT
2599 free_divert_blk(dev);
2600 #endif
2601
2602 if (dev->features & NETIF_F_DYNALLOC) {
2603 #ifdef NET_REFCNT_DEBUG
2604 if (atomic_read(&dev->refcnt) != 1)
2605 printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1);
2606 #endif
2607 dev_put(dev);
2608 return 0;
2609 }
2610
2611 /* Last reference is our one */
2612 if (atomic_read(&dev->refcnt) == 1) {
2613 dev_put(dev);
2614 return 0;
2615 }
2616
2617 #ifdef NET_REFCNT_DEBUG
2618 printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt));
2619 #endif
2620
2621 /* EXPLANATION. If dev->refcnt is not now 1 (our own reference)
2622 it means that someone in the kernel still has a reference
2623 to this device and we cannot release it.
2624
2625 "New style" devices have destructors, hence we can return from this
2626 function and destructor will do all the work later. As of kernel 2.4.0
2627 there are very few "New Style" devices.
2628
2629 "Old style" devices expect that the device is free of any references
2630 upon exit from this function.
2631 We cannot return from this function until all such references have
2632 fallen away. This is because the caller of this function will probably
2633 immediately kfree(*dev) and then be unloaded via sys_delete_module.
2634
2635 So, we linger until all references fall away. The duration of the
2636 linger is basically unbounded! It is driven by, for example, the
2637 current setting of sysctl_ipfrag_time.
2638
2639 After 1 second, we start to rebroadcast unregister notifications
2640 in hope that careless clients will release the device.
2641
2642 */
2643
2644 now = warning_time = jiffies;
2645 while (atomic_read(&dev->refcnt) != 1) {
2646 if ((jiffies - now) > 1*HZ) {
2647 /* Rebroadcast unregister notification */
2648 notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
2649 }
2650 current->state = TASK_INTERRUPTIBLE;
2651 schedule_timeout(HZ/4);
2652 current->state = TASK_RUNNING;
2653 if ((jiffies - warning_time) > 10*HZ) {
2654 printk(KERN_EMERG "unregister_netdevice: waiting for %s to "
2655 "become free. Usage count = %d\n",
2656 dev->name, atomic_read(&dev->refcnt));
2657 warning_time = jiffies;
2658 }
2659 }
2660 dev_put(dev);
2661 return 0;
2662 }
2663
2664
2665 /*
2666 * Initialize the DEV module. At boot time this walks the device list and
2667 * unhooks any devices that fail to initialise (normally hardware not
2668 * present) and leaves us with a valid list of present and active devices.
2669 *
2670 */
2671
2672 extern void net_device_init(void);
2673 extern void ip_auto_config(void);
2674 #ifdef CONFIG_NET_DIVERT
2675 extern void dv_init(void);
2676 #endif /* CONFIG_NET_DIVERT */
2677
2678
2679 /*
2680 * Callers must hold the rtnl semaphore. See the comment at the
2681 * end of Space.c for details about the locking.
2682 */
2683 int __init net_dev_init(void)
2684 {
2685 struct net_device *dev, **dp;
2686 int i;
2687
2688 if (!dev_boot_phase)
2689 return 0;
2690
2691 #ifdef CONFIG_NET_DIVERT
2692 dv_init();
2693 #endif /* CONFIG_NET_DIVERT */
2694
2695 /*
2696 * Initialise the packet receive queues.
2697 */
2698
2699 for (i = 0; i < NR_CPUS; i++) {
2700 struct softnet_data *queue;
2701
2702 queue = &softnet_data[i];
2703 skb_queue_head_init(&queue->input_pkt_queue);
2704 queue->throttle = 0;
2705 queue->cng_level = 0;
2706 queue->avg_blog = 10; /* arbitrary non-zero */
2707 queue->completion_queue = NULL;
2708 }
2709
2710 #ifdef CONFIG_NET_PROFILE
2711 net_profile_init();
2712 NET_PROFILE_REGISTER(dev_queue_xmit);
2713 NET_PROFILE_REGISTER(softnet_process);
2714 #endif
2715
2716 #ifdef OFFLINE_SAMPLE
2717 samp_timer.expires = jiffies + (10 * HZ);
2718 add_timer(&samp_timer);
2719 #endif
2720
2721 /*
2722 * Add the devices.
2723 * If the call to dev->init fails, the dev is removed
2724 * from the chain disconnecting the device until the
2725 * next reboot.
2726 *
2727 * NB At boot phase networking is dead. No locking is required.
2728 * But we still preserve dev_base_lock for sanity.
2729 */
2730
2731 dp = &dev_base;
2732 while ((dev = *dp) != NULL) {
2733 spin_lock_init(&dev->queue_lock);
2734 spin_lock_init(&dev->xmit_lock);
2735 #ifdef CONFIG_NET_FASTROUTE
2736 dev->fastpath_lock = RW_LOCK_UNLOCKED;
2737 #endif
2738 dev->xmit_lock_owner = -1;
2739 dev->iflink = -1;
2740 dev_hold(dev);
2741
2742 /*
2743 * Allocate name. If the init() fails
2744 * the name will be reissued correctly.
2745 */
2746 if (strchr(dev->name, '%'))
2747 dev_alloc_name(dev, dev->name);
2748
2749 /*
2750 * Check boot time settings for the device.
2751 */
2752 netdev_boot_setup_check(dev);
2753
2754 if (dev->init && dev->init(dev)) {
2755 /*
2756 * It failed to come up. It will be unhooked later.
2757 * dev_alloc_name can now advance to next suitable
2758 * name that is checked next.
2759 */
2760 dev->deadbeaf = 1;
2761 dp = &dev->next;
2762 } else {
2763 dp = &dev->next;
2764 dev->ifindex = dev_new_index();
2765 if (dev->iflink == -1)
2766 dev->iflink = dev->ifindex;
2767 if (dev->rebuild_header == NULL)
2768 dev->rebuild_header = default_rebuild_header;
2769 dev_init_scheduler(dev);
2770 set_bit(__LINK_STATE_PRESENT, &dev->state);
2771 }
2772 }
2773
2774 /*
2775 * Unhook devices that failed to come up
2776 */
2777 dp = &dev_base;
2778 while ((dev = *dp) != NULL) {
2779 if (dev->deadbeaf) {
2780 write_lock_bh(&dev_base_lock);
2781 *dp = dev->next;
2782 write_unlock_bh(&dev_base_lock);
2783 dev_put(dev);
2784 } else {
2785 dp = &dev->next;
2786 }
2787 }
2788
2789 #ifdef CONFIG_PROC_FS
2790 proc_net_create("dev", 0, dev_get_info);
2791 create_proc_read_entry("net/softnet_stat", 0, 0, dev_proc_stats, NULL);
2792 #ifdef WIRELESS_EXT
2793 proc_net_create("wireless", 0, dev_get_wireless_info);
2794 #endif /* WIRELESS_EXT */
2795 #endif /* CONFIG_PROC_FS */
2796
2797 dev_boot_phase = 0;
2798
2799 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
2800 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
2801
2802 dst_init();
2803 dev_mcast_init();
2804
2805 #ifdef CONFIG_NET_SCHED
2806 pktsched_init();
2807 #endif
2808
2809 /*
2810 * Initialise network devices
2811 */
2812
2813 net_device_init();
2814
2815 return 0;
2816 }
2817
2818 #ifdef CONFIG_HOTPLUG
2819
2820 /* Notify userspace when a netdevice event occurs,
2821 * by running '/sbin/hotplug net' with certain
2822 * environment variables set.
2823 */
2824
2825 static int net_run_sbin_hotplug(struct net_device *dev, char *action)
2826 {
2827 char *argv[3], *envp[5], ifname[12 + IFNAMSIZ], action_str[32];
2828 int i;
2829
2830 sprintf(ifname, "INTERFACE=%s", dev->name);
2831 sprintf(action_str, "ACTION=%s", action);
2832
2833 i = 0;
2834 argv[i++] = hotplug_path;
2835 argv[i++] = "net";
2836 argv[i] = 0;
2837
2838 i = 0;
2839 /* minimal command environment */
2840 envp [i++] = "HOME=/";
2841 envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
2842 envp [i++] = ifname;
2843 envp [i++] = action_str;
2844 envp [i] = 0;
2845
2846 return call_usermodehelper(argv [0], argv, envp);
2847 }
2848 #endif
2849