File: /usr/src/linux/net/core/rtnetlink.c

1     /*
2      * INET		An implementation of the TCP/IP protocol suite for the LINUX
3      *		operating system.  INET is implemented using the  BSD Socket
4      *		interface as the means of communication with the user level.
5      *
6      *		Routing netlink socket interface: protocol independent part.
7      *
8      * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9      *
10      *		This program is free software; you can redistribute it and/or
11      *		modify it under the terms of the GNU General Public License
12      *		as published by the Free Software Foundation; either version
13      *		2 of the License, or (at your option) any later version.
14      *
15      *	Fixes:
16      *	Vitaly E. Lavrov		RTA_OK arithmetics was wrong.
17      */
18     
19     #include <linux/config.h>
20     #include <linux/errno.h>
21     #include <linux/types.h>
22     #include <linux/socket.h>
23     #include <linux/kernel.h>
24     #include <linux/major.h>
25     #include <linux/sched.h>
26     #include <linux/timer.h>
27     #include <linux/string.h>
28     #include <linux/sockios.h>
29     #include <linux/net.h>
30     #include <linux/fcntl.h>
31     #include <linux/mm.h>
32     #include <linux/slab.h>
33     #include <linux/interrupt.h>
34     #include <linux/capability.h>
35     #include <linux/skbuff.h>
36     #include <linux/init.h>
37     
38     #include <asm/uaccess.h>
39     #include <asm/system.h>
40     #include <asm/string.h>
41     
42     #include <linux/inet.h>
43     #include <linux/netdevice.h>
44     #include <net/ip.h>
45     #include <net/protocol.h>
46     #include <net/arp.h>
47     #include <net/route.h>
48     #include <net/tcp.h>
49     #include <net/udp.h>
50     #include <net/sock.h>
51     #include <net/pkt_sched.h>
52     
53     DECLARE_MUTEX(rtnl_sem);
54     
55     void rtnl_lock(void)
56     {
57     	rtnl_shlock();
58     	rtnl_exlock();
59     }
60      
61     void rtnl_unlock(void)
62     {
63     	rtnl_exunlock();
64     	rtnl_shunlock();
65     }
66     
67     int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
68     {
69     	memset(tb, 0, sizeof(struct rtattr*)*maxattr);
70     
71     	while (RTA_OK(rta, len)) {
72     		unsigned flavor = rta->rta_type;
73     		if (flavor && flavor <= maxattr)
74     			tb[flavor-1] = rta;
75     		rta = RTA_NEXT(rta, len);
76     	}
77     	return 0;
78     }
79     
80     #ifdef CONFIG_RTNETLINK
81     struct sock *rtnl;
82     
83     struct rtnetlink_link * rtnetlink_links[NPROTO];
84     
85     #define _S	1	/* superuser privileges required */
86     #define _X	2	/* exclusive access to tables required */
87     #define _G	4	/* GET request */
88     
89     static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
90     {
91     	NLMSG_LENGTH(sizeof(struct ifinfomsg)),
92     	NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
93     	NLMSG_LENGTH(sizeof(struct rtmsg)),
94     	NLMSG_LENGTH(sizeof(struct ndmsg)),
95     	NLMSG_LENGTH(sizeof(struct rtmsg)),
96     	NLMSG_LENGTH(sizeof(struct tcmsg)),
97     	NLMSG_LENGTH(sizeof(struct tcmsg)),
98     	NLMSG_LENGTH(sizeof(struct tcmsg))
99     };
100     
101     static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
102     {
103     	IFLA_MAX,
104     	IFA_MAX,
105     	RTA_MAX,
106     	NDA_MAX,
107     	RTA_MAX,
108     	TCA_MAX,
109     	TCA_MAX,
110     	TCA_MAX
111     };
112     
113     void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
114     {
115     	struct rtattr *rta;
116     	int size = RTA_LENGTH(attrlen);
117     
118     	rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size));
119     	rta->rta_type = attrtype;
120     	rta->rta_len = size;
121     	memcpy(RTA_DATA(rta), data, attrlen);
122     }
123     
124     int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
125     {
126     	int err = 0;
127     
128     	NETLINK_CB(skb).dst_groups = group;
129     	if (echo)
130     		atomic_inc(&skb->users);
131     	netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
132     	if (echo)
133     		err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
134     	return err;
135     }
136     
137     int rtnetlink_put_metrics(struct sk_buff *skb, unsigned *metrics)
138     {
139     	struct rtattr *mx = (struct rtattr*)skb->tail;
140     	int i;
141     
142     	RTA_PUT(skb, RTA_METRICS, 0, NULL);
143     	for (i=0; i<RTAX_MAX; i++) {
144     		if (metrics[i])
145     			RTA_PUT(skb, i+1, sizeof(unsigned), metrics+i);
146     	}
147     	mx->rta_len = skb->tail - (u8*)mx;
148     	if (mx->rta_len == RTA_LENGTH(0))
149     		skb_trim(skb, (u8*)mx - skb->data);
150     	return 0;
151     
152     rtattr_failure:
153     	skb_trim(skb, (u8*)mx - skb->data);
154     	return -1;
155     }
156     
157     
158     static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
159     				 int type, u32 pid, u32 seq, u32 change)
160     {
161     	struct ifinfomsg *r;
162     	struct nlmsghdr  *nlh;
163     	unsigned char	 *b = skb->tail;
164     
165     	nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
166     	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
167     	r = NLMSG_DATA(nlh);
168     	r->ifi_family = AF_UNSPEC;
169     	r->ifi_type = dev->type;
170     	r->ifi_index = dev->ifindex;
171     	r->ifi_flags = dev->flags;
172     	r->ifi_change = change;
173     
174     	if (!netif_running(dev) || !netif_carrier_ok(dev))
175     		r->ifi_flags &= ~IFF_RUNNING;
176     	else
177     		r->ifi_flags |= IFF_RUNNING;
178     
179     	RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
180     	if (dev->addr_len) {
181     		RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
182     		RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
183     	}
184     	if (1) {
185     		unsigned mtu = dev->mtu;
186     		RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
187     	}
188     	if (dev->ifindex != dev->iflink)
189     		RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink);
190     	if (dev->qdisc_sleeping)
191     		RTA_PUT(skb, IFLA_QDISC,
192     			strlen(dev->qdisc_sleeping->ops->id) + 1,
193     			dev->qdisc_sleeping->ops->id);
194     	if (dev->master)
195     		RTA_PUT(skb, IFLA_MASTER, sizeof(int), &dev->master->ifindex);
196     	if (dev->get_stats) {
197     		struct net_device_stats *stats = dev->get_stats(dev);
198     		if (stats)
199     			RTA_PUT(skb, IFLA_STATS, sizeof(*stats), stats);
200     	}
201     	nlh->nlmsg_len = skb->tail - b;
202     	return skb->len;
203     
204     nlmsg_failure:
205     rtattr_failure:
206     	skb_trim(skb, b - skb->data);
207     	return -1;
208     }
209     
210     int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
211     {
212     	int idx;
213     	int s_idx = cb->args[0];
214     	struct net_device *dev;
215     
216     	read_lock(&dev_base_lock);
217     	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
218     		if (idx < s_idx)
219     			continue;
220     		if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
221     			break;
222     	}
223     	read_unlock(&dev_base_lock);
224     	cb->args[0] = idx;
225     
226     	return skb->len;
227     }
228     
229     int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
230     {
231     	int idx;
232     	int s_idx = cb->family;
233     
234     	if (s_idx == 0)
235     		s_idx = 1;
236     	for (idx=1; idx<NPROTO; idx++) {
237     		int type = cb->nlh->nlmsg_type-RTM_BASE;
238     		if (idx < s_idx || idx == PF_PACKET)
239     			continue;
240     		if (rtnetlink_links[idx] == NULL ||
241     		    rtnetlink_links[idx][type].dumpit == NULL)
242     			continue;
243     		if (idx > s_idx)
244     			memset(&cb->args[0], 0, sizeof(cb->args));
245     		if (rtnetlink_links[idx][type].dumpit(skb, cb))
246     			break;
247     	}
248     	cb->family = idx;
249     
250     	return skb->len;
251     }
252     
253     void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
254     {
255     	struct sk_buff *skb;
256     	int size = NLMSG_GOODSIZE;
257     
258     	skb = alloc_skb(size, GFP_KERNEL);
259     	if (!skb)
260     		return;
261     
262     	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) {
263     		kfree_skb(skb);
264     		return;
265     	}
266     	NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
267     	netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL);
268     }
269     
270     static int rtnetlink_done(struct netlink_callback *cb)
271     {
272     	return 0;
273     }
274     
275     /* Process one rtnetlink message. */
276     
277     static __inline__ int
278     rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
279     {
280     	struct rtnetlink_link *link;
281     	struct rtnetlink_link *link_tab;
282     	struct rtattr	*rta[RTATTR_MAX];
283     
284     	int exclusive = 0;
285     	int sz_idx, kind;
286     	int min_len;
287     	int family;
288     	int type;
289     	int err;
290     
291     	/* Only requests are handled by kernel now */
292     	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
293     		return 0;
294     
295     	type = nlh->nlmsg_type;
296     
297     	/* A control message: ignore them */
298     	if (type < RTM_BASE)
299     		return 0;
300     
301     	/* Unknown message: reply with EINVAL */
302     	if (type > RTM_MAX)
303     		goto err_inval;
304     
305     	type -= RTM_BASE;
306     
307     	/* All the messages must have at least 1 byte length */
308     	if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
309     		return 0;
310     
311     	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
312     	if (family > NPROTO) {
313     		*errp = -EAFNOSUPPORT;
314     		return -1;
315     	}
316     
317     	link_tab = rtnetlink_links[family];
318     	if (link_tab == NULL)
319     		link_tab = rtnetlink_links[PF_UNSPEC];
320     	link = &link_tab[type];
321     
322     	sz_idx = type>>2;
323     	kind = type&3;
324     
325     	if (kind != 2 && !cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) {
326     		*errp = -EPERM;
327     		return -1;
328     	}
329     
330     	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
331     		u32 rlen;
332     
333     		if (link->dumpit == NULL)
334     			link = &(rtnetlink_links[PF_UNSPEC][type]);
335     
336     		if (link->dumpit == NULL)
337     			goto err_inval;
338     
339     		if ((*errp = netlink_dump_start(rtnl, skb, nlh,
340     						link->dumpit,
341     						rtnetlink_done)) != 0) {
342     			return -1;
343     		}
344     		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
345     		if (rlen > skb->len)
346     			rlen = skb->len;
347     		skb_pull(skb, rlen);
348     		return -1;
349     	}
350     
351     	if (kind != 2) {
352     		if (rtnl_exlock_nowait()) {
353     			*errp = 0;
354     			return -1;
355     		}
356     		exclusive = 1;
357     	}
358     
359     	memset(&rta, 0, sizeof(rta));
360     
361     	min_len = rtm_min[sz_idx];
362     	if (nlh->nlmsg_len < min_len)
363     		goto err_inval;
364     
365     	if (nlh->nlmsg_len > min_len) {
366     		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
367     		struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
368     
369     		while (RTA_OK(attr, attrlen)) {
370     			unsigned flavor = attr->rta_type;
371     			if (flavor) {
372     				if (flavor > rta_max[sz_idx])
373     					goto err_inval;
374     				rta[flavor-1] = attr;
375     			}
376     			attr = RTA_NEXT(attr, attrlen);
377     		}
378     	}
379     
380     	if (link->doit == NULL)
381     		link = &(rtnetlink_links[PF_UNSPEC][type]);
382     	if (link->doit == NULL)
383     		goto err_inval;
384     	err = link->doit(skb, nlh, (void *)&rta);
385     
386     	if (exclusive)
387     		rtnl_exunlock();
388     	*errp = err;
389     	return err;
390     
391     err_inval:
392     	if (exclusive)
393     		rtnl_exunlock();
394     	*errp = -EINVAL;
395     	return -1;
396     }
397     
398     /* 
399      * Process one packet of messages.
400      * Malformed skbs with wrong lengths of messages are discarded silently.
401      */
402     
403     extern __inline__ int rtnetlink_rcv_skb(struct sk_buff *skb)
404     {
405     	int err;
406     	struct nlmsghdr * nlh;
407     
408     	while (skb->len >= NLMSG_SPACE(0)) {
409     		u32 rlen;
410     
411     		nlh = (struct nlmsghdr *)skb->data;
412     		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
413     			return 0;
414     		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
415     		if (rlen > skb->len)
416     			rlen = skb->len;
417     		if (rtnetlink_rcv_msg(skb, nlh, &err)) {
418     			/* Not error, but we must interrupt processing here:
419     			 *   Note, that in this case we do not pull message
420     			 *   from skb, it will be processed later.
421     			 */
422     			if (err == 0)
423     				return -1;
424     			netlink_ack(skb, nlh, err);
425     		} else if (nlh->nlmsg_flags&NLM_F_ACK)
426     			netlink_ack(skb, nlh, 0);
427     		skb_pull(skb, rlen);
428     	}
429     
430     	return 0;
431     }
432     
433     /*
434      *  rtnetlink input queue processing routine:
435      *	- try to acquire shared lock. If it is failed, defer processing.
436      *	- feed skbs to rtnetlink_rcv_skb, until it refuse a message,
437      *	  that will occur, when a dump started and/or acquisition of
438      *	  exclusive lock failed.
439      */
440     
441     static void rtnetlink_rcv(struct sock *sk, int len)
442     {
443     	do {
444     		struct sk_buff *skb;
445     
446     		if (rtnl_shlock_nowait())
447     			return;
448     
449     		while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
450     			if (rtnetlink_rcv_skb(skb)) {
451     				if (skb->len)
452     					skb_queue_head(&sk->receive_queue, skb);
453     				else
454     					kfree_skb(skb);
455     				break;
456     			}
457     			kfree_skb(skb);
458     		}
459     
460     		up(&rtnl_sem);
461     	} while (rtnl && rtnl->receive_queue.qlen);
462     }
463     
464     static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
465     {
466     	{ NULL,			NULL,			},
467     	{ NULL,			NULL,			},
468     	{ NULL,			rtnetlink_dump_ifinfo,	},
469     	{ NULL,			NULL,			},
470     
471     	{ NULL,			NULL,			},
472     	{ NULL,			NULL,			},
473     	{ NULL,			rtnetlink_dump_all,	},
474     	{ NULL,			NULL,			},
475     
476     	{ NULL,			NULL,			},
477     	{ NULL,			NULL,			},
478     	{ NULL,			rtnetlink_dump_all,	},
479     	{ NULL,			NULL,			},
480     
481     	{ neigh_add,		NULL,			},
482     	{ neigh_delete,		NULL,			},
483     	{ NULL,			neigh_dump_info,	},
484     	{ NULL,			NULL,			},
485     
486     	{ NULL,			NULL,			},
487     	{ NULL,			NULL,			},
488     	{ NULL,			NULL,			},
489     	{ NULL,			NULL,			},
490     };
491     
492     
493     static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
494     {
495     	struct net_device *dev = ptr;
496     	switch (event) {
497     	case NETDEV_UNREGISTER:
498     		rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
499     		break;
500     	case NETDEV_REGISTER:
501     		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
502     		break;
503     	case NETDEV_UP:
504     	case NETDEV_DOWN:
505     		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
506     		break;
507     	case NETDEV_CHANGE:
508     	case NETDEV_GOING_DOWN:
509     		break;
510     	default:
511     		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
512     		break;
513     	}
514     	return NOTIFY_DONE;
515     }
516     
517     struct notifier_block rtnetlink_dev_notifier = {
518     	rtnetlink_event,
519     	NULL,
520     	0
521     };
522     
523     
524     void __init rtnetlink_init(void)
525     {
526     #ifdef RTNL_DEBUG
527     	printk("Initializing RT netlink socket\n");
528     #endif
529     	rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv);
530     	if (rtnl == NULL)
531     		panic("rtnetlink_init: cannot initialize rtnetlink\n");
532     	register_netdevice_notifier(&rtnetlink_dev_notifier);
533     	rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
534     	rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
535     }
536     
537     
538     
539     #endif
540