File: /usr/src/linux/net/ipv4/udp.c

1     /*
2      * INET		An implementation of the TCP/IP protocol suite for the LINUX
3      *		operating system.  INET is implemented using the  BSD Socket
4      *		interface as the means of communication with the user level.
5      *
6      *		The User Datagram Protocol (UDP).
7      *
8      * Version:	$Id: udp.c,v 1.99 2001/09/01 00:31:50 davem Exp $
9      *
10      * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
11      *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12      *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13      *		Alan Cox, <Alan.Cox@linux.org>
14      *
15      * Fixes:
16      *		Alan Cox	:	verify_area() calls
17      *		Alan Cox	: 	stopped close while in use off icmp
18      *					messages. Not a fix but a botch that
19      *					for udp at least is 'valid'.
20      *		Alan Cox	:	Fixed icmp handling properly
21      *		Alan Cox	: 	Correct error for oversized datagrams
22      *		Alan Cox	:	Tidied select() semantics. 
23      *		Alan Cox	:	udp_err() fixed properly, also now 
24      *					select and read wake correctly on errors
25      *		Alan Cox	:	udp_send verify_area moved to avoid mem leak
26      *		Alan Cox	:	UDP can count its memory
27      *		Alan Cox	:	send to an unknown connection causes
28      *					an ECONNREFUSED off the icmp, but
29      *					does NOT close.
30      *		Alan Cox	:	Switched to new sk_buff handlers. No more backlog!
31      *		Alan Cox	:	Using generic datagram code. Even smaller and the PEEK
32      *					bug no longer crashes it.
33      *		Fred Van Kempen	: 	Net2e support for sk->broadcast.
34      *		Alan Cox	:	Uses skb_free_datagram
35      *		Alan Cox	:	Added get/set sockopt support.
36      *		Alan Cox	:	Broadcasting without option set returns EACCES.
37      *		Alan Cox	:	No wakeup calls. Instead we now use the callbacks.
38      *		Alan Cox	:	Use ip_tos and ip_ttl
39      *		Alan Cox	:	SNMP Mibs
40      *		Alan Cox	:	MSG_DONTROUTE, and 0.0.0.0 support.
41      *		Matt Dillon	:	UDP length checks.
42      *		Alan Cox	:	Smarter af_inet used properly.
43      *		Alan Cox	:	Use new kernel side addressing.
44      *		Alan Cox	:	Incorrect return on truncated datagram receive.
45      *	Arnt Gulbrandsen 	:	New udp_send and stuff
46      *		Alan Cox	:	Cache last socket
47      *		Alan Cox	:	Route cache
48      *		Jon Peatfield	:	Minor efficiency fix to sendto().
49      *		Mike Shaver	:	RFC1122 checks.
50      *		Alan Cox	:	Nonblocking error fix.
51      *	Willy Konynenberg	:	Transparent proxying support.
52      *		Mike McLagan	:	Routing by source
53      *		David S. Miller	:	New socket lookup architecture.
54      *					Last socket cache retained as it
55      *					does have a high hit rate.
56      *		Olaf Kirch	:	Don't linearise iovec on sendmsg.
57      *		Andi Kleen	:	Some cleanups, cache destination entry
58      *					for connect. 
59      *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
60      *		Melvin Smith	:	Check msg_name not msg_namelen in sendto(),
61      *					return ENOTCONN for unconnected sockets (POSIX)
62      *		Janos Farkas	:	don't deliver multi/broadcasts to a different
63      *					bound-to-device socket
64      *
65      *
66      *		This program is free software; you can redistribute it and/or
67      *		modify it under the terms of the GNU General Public License
68      *		as published by the Free Software Foundation; either version
69      *		2 of the License, or (at your option) any later version.
70      */
71      
72     #include <asm/system.h>
73     #include <asm/uaccess.h>
74     #include <asm/ioctls.h>
75     #include <linux/types.h>
76     #include <linux/fcntl.h>
77     #include <linux/socket.h>
78     #include <linux/sockios.h>
79     #include <linux/in.h>
80     #include <linux/errno.h>
81     #include <linux/timer.h>
82     #include <linux/mm.h>
83     #include <linux/config.h>
84     #include <linux/inet.h>
85     #include <linux/netdevice.h>
86     #include <net/snmp.h>
87     #include <net/ip.h>
88     #include <net/protocol.h>
89     #include <linux/skbuff.h>
90     #include <net/sock.h>
91     #include <net/udp.h>
92     #include <net/icmp.h>
93     #include <net/route.h>
94     #include <net/inet_common.h>
95     #include <net/checksum.h>
96     
97     /*
98      *	Snmp MIB for the UDP layer
99      */
100     
101     struct udp_mib		udp_statistics[NR_CPUS*2];
102     
103     struct sock *udp_hash[UDP_HTABLE_SIZE];
104     rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED;
105     
106     /* Shared by v4/v6 udp. */
107     int udp_port_rover;
108     
109     static int udp_v4_get_port(struct sock *sk, unsigned short snum)
110     {
111     	write_lock_bh(&udp_hash_lock);
112     	if (snum == 0) {
113     		int best_size_so_far, best, result, i;
114     
115     		if (udp_port_rover > sysctl_local_port_range[1] ||
116     		    udp_port_rover < sysctl_local_port_range[0])
117     			udp_port_rover = sysctl_local_port_range[0];
118     		best_size_so_far = 32767;
119     		best = result = udp_port_rover;
120     		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
121     			struct sock *sk;
122     			int size;
123     
124     			sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
125     			if (!sk) {
126     				if (result > sysctl_local_port_range[1])
127     					result = sysctl_local_port_range[0] +
128     						((result - sysctl_local_port_range[0]) &
129     						 (UDP_HTABLE_SIZE - 1));
130     				goto gotit;
131     			}
132     			size = 0;
133     			do {
134     				if (++size >= best_size_so_far)
135     					goto next;
136     			} while ((sk = sk->next) != NULL);
137     			best_size_so_far = size;
138     			best = result;
139     		next:;
140     		}
141     		result = best;
142     		for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
143     			if (result > sysctl_local_port_range[1])
144     				result = sysctl_local_port_range[0]
145     					+ ((result - sysctl_local_port_range[0]) &
146     					   (UDP_HTABLE_SIZE - 1));
147     			if (!udp_lport_inuse(result))
148     				break;
149     		}
150     		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
151     			goto fail;
152     gotit:
153     		udp_port_rover = snum = result;
154     	} else {
155     		struct sock *sk2;
156     
157     		for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
158     		     sk2 != NULL;
159     		     sk2 = sk2->next) {
160     			if (sk2->num == snum &&
161     			    sk2 != sk &&
162     			    sk2->bound_dev_if == sk->bound_dev_if &&
163     			    (!sk2->rcv_saddr ||
164     			     !sk->rcv_saddr ||
165     			     sk2->rcv_saddr == sk->rcv_saddr) &&
166     			    (!sk2->reuse || !sk->reuse))
167     				goto fail;
168     		}
169     	}
170     	sk->num = snum;
171     	if (sk->pprev == NULL) {
172     		struct sock **skp = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
173     		if ((sk->next = *skp) != NULL)
174     			(*skp)->pprev = &sk->next;
175     		*skp = sk;
176     		sk->pprev = skp;
177     		sock_prot_inc_use(sk->prot);
178     		sock_hold(sk);
179     	}
180     	write_unlock_bh(&udp_hash_lock);
181     	return 0;
182     
183     fail:
184     	write_unlock_bh(&udp_hash_lock);
185     	return 1;
186     }
187     
188     static void udp_v4_hash(struct sock *sk)
189     {
190     	BUG();
191     }
192     
193     static void udp_v4_unhash(struct sock *sk)
194     {
195     	write_lock_bh(&udp_hash_lock);
196     	if (sk->pprev) {
197     		if (sk->next)
198     			sk->next->pprev = sk->pprev;
199     		*sk->pprev = sk->next;
200     		sk->pprev = NULL;
201     		sk->num = 0;
202     		sock_prot_dec_use(sk->prot);
203     		__sock_put(sk);
204     	}
205     	write_unlock_bh(&udp_hash_lock);
206     }
207     
208     /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
209      * harder than this. -DaveM
210      */
211     struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
212     {
213     	struct sock *sk, *result = NULL;
214     	unsigned short hnum = ntohs(dport);
215     	int badness = -1;
216     
217     	for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
218     		if(sk->num == hnum) {
219     			int score = 0;
220     			if(sk->rcv_saddr) {
221     				if(sk->rcv_saddr != daddr)
222     					continue;
223     				score++;
224     			}
225     			if(sk->daddr) {
226     				if(sk->daddr != saddr)
227     					continue;
228     				score++;
229     			}
230     			if(sk->dport) {
231     				if(sk->dport != sport)
232     					continue;
233     				score++;
234     			}
235     			if(sk->bound_dev_if) {
236     				if(sk->bound_dev_if != dif)
237     					continue;
238     				score++;
239     			}
240     			if(score == 4) {
241     				result = sk;
242     				break;
243     			} else if(score > badness) {
244     				result = sk;
245     				badness = score;
246     			}
247     		}
248     	}
249     	return result;
250     }
251     
252     __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
253     {
254     	struct sock *sk;
255     
256     	read_lock(&udp_hash_lock);
257     	sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
258     	if (sk)
259     		sock_hold(sk);
260     	read_unlock(&udp_hash_lock);
261     	return sk;
262     }
263     
264     static inline struct sock *udp_v4_mcast_next(struct sock *sk,
265     					     u16 loc_port, u32 loc_addr,
266     					     u16 rmt_port, u32 rmt_addr,
267     					     int dif)
268     {
269     	struct sock *s = sk;
270     	unsigned short hnum = ntohs(loc_port);
271     	for(; s; s = s->next) {
272     		if ((s->num != hnum)					||
273     		    (s->daddr && s->daddr!=rmt_addr)			||
274     		    (s->dport != rmt_port && s->dport != 0)			||
275     		    (s->rcv_saddr  && s->rcv_saddr != loc_addr)		||
276     		    (s->bound_dev_if && s->bound_dev_if != dif))
277     			continue;
278     		break;
279       	}
280       	return s;
281     }
282     
283     /*
284      * This routine is called by the ICMP module when it gets some
285      * sort of error condition.  If err < 0 then the socket should
286      * be closed and the error returned to the user.  If err > 0
287      * it's just the icmp type << 8 | icmp code.  
288      * Header points to the ip header of the error packet. We move
289      * on past this. Then (as it used to claim before adjustment)
290      * header points to the first 8 bytes of the udp header.  We need
291      * to find the appropriate port.
292      */
293     
294     void udp_err(struct sk_buff *skb, u32 info)
295     {
296     	struct iphdr *iph = (struct iphdr*)skb->data;
297     	struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
298     	int type = skb->h.icmph->type;
299     	int code = skb->h.icmph->code;
300     	struct sock *sk;
301     	int harderr;
302     	int err;
303     
304     	sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
305     	if (sk == NULL) {
306     		ICMP_INC_STATS_BH(IcmpInErrors);
307         	  	return;	/* No socket for error */
308     	}
309     
310     	err = 0;
311     	harderr = 0;
312     
313     	switch (type) {
314     	default:
315     	case ICMP_TIME_EXCEEDED:
316     		err = EHOSTUNREACH;
317     		break;
318     	case ICMP_SOURCE_QUENCH:
319     		goto out;
320     	case ICMP_PARAMETERPROB:
321     		err = EPROTO;
322     		harderr = 1;
323     		break;
324     	case ICMP_DEST_UNREACH:
325     		if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
326     			if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT) {
327     				err = EMSGSIZE;
328     				harderr = 1;
329     				break;
330     			}
331     			goto out;
332     		}
333     		err = EHOSTUNREACH;
334     		if (code <= NR_ICMP_UNREACH) {
335     			harderr = icmp_err_convert[code].fatal;
336     			err = icmp_err_convert[code].errno;
337     		}
338     		break;
339     	}
340     
341     	/*
342     	 *      RFC1122: OK.  Passes ICMP errors back to application, as per 
343     	 *	4.1.3.3.
344     	 */
345     	if (!sk->protinfo.af_inet.recverr) {
346     		if (!harderr || sk->state != TCP_ESTABLISHED)
347     			goto out;
348     	} else {
349     		ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
350     	}
351     	sk->err = err;
352     	sk->error_report(sk);
353     out:
354     	sock_put(sk);
355     }
356     
357     
358     static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
359     {
360     	return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
361     }
362     
363     struct udpfakehdr 
364     {
365     	struct udphdr uh;
366     	u32 saddr;
367     	u32 daddr;
368     	struct iovec *iov;
369     	u32 wcheck;
370     };
371     
372     /*
373      *	Copy and checksum a UDP packet from user space into a buffer.
374      */
375      
376     static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen) 
377     {
378     	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
379     	if (offset==0) {
380     		if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
381     						   fraglen-sizeof(struct udphdr), &ufh->wcheck))
382     			return -EFAULT;
383      		ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
384     					   ufh->wcheck);
385     		ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr, 
386     					  ntohs(ufh->uh.len),
387     					  IPPROTO_UDP, ufh->wcheck);
388     		if (ufh->uh.check == 0)
389     			ufh->uh.check = -1;
390     		memcpy(to, ufh, sizeof(struct udphdr));
391     		return 0;
392     	}
393     	if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
394     					   fraglen, &ufh->wcheck))
395     		return -EFAULT;
396     	return 0;
397     }
398     
399     /*
400      *	Copy a UDP packet from user space into a buffer without checksumming.
401      */
402      
403     static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen) 
404     {
405     	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
406     
407     	if (offset==0) {
408     		memcpy(to, ufh, sizeof(struct udphdr));
409     		return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
410     					   fraglen-sizeof(struct udphdr));
411     	}
412     	return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
413     				   fraglen);
414     }
415     
416     int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
417     {
418     	int ulen = len + sizeof(struct udphdr);
419     	struct ipcm_cookie ipc;
420     	struct udpfakehdr ufh;
421     	struct rtable *rt = NULL;
422     	int free = 0;
423     	int connected = 0;
424     	u32 daddr;
425     	u8  tos;
426     	int err;
427     
428     	/* This check is ONLY to check for arithmetic overflow
429     	   on integer(!) len. Not more! Real check will be made
430     	   in ip_build_xmit --ANK
431     
432     	   BTW socket.c -> af_*.c -> ... make multiple
433     	   invalid conversions size_t -> int. We MUST repair it f.e.
434     	   by replacing all of them with size_t and revise all
435     	   the places sort of len += sizeof(struct iphdr)
436     	   If len was ULONG_MAX-10 it would be cathastrophe  --ANK
437     	 */
438     
439     	if (len < 0 || len > 0xFFFF)
440     		return -EMSGSIZE;
441     
442     	/* 
443     	 *	Check the flags.
444     	 */
445     
446     	if (msg->msg_flags&MSG_OOB)	/* Mirror BSD error message compatibility */
447     		return -EOPNOTSUPP;
448     
449     	/*
450     	 *	Get and verify the address. 
451     	 */
452     	 
453     	if (msg->msg_name) {
454     		struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
455     		if (msg->msg_namelen < sizeof(*usin))
456     			return -EINVAL;
457     		if (usin->sin_family != AF_INET) {
458     			if (usin->sin_family != AF_UNSPEC)
459     				return -EINVAL;
460     		}
461     
462     		ufh.daddr = usin->sin_addr.s_addr;
463     		ufh.uh.dest = usin->sin_port;
464     		if (ufh.uh.dest == 0)
465     			return -EINVAL;
466     	} else {
467     		if (sk->state != TCP_ESTABLISHED)
468     			return -ENOTCONN;
469     		ufh.daddr = sk->daddr;
470     		ufh.uh.dest = sk->dport;
471     		/* Open fast path for connected socket.
472     		   Route will not be used, if at least one option is set.
473     		 */
474     		connected = 1;
475       	}
476     	ipc.addr = sk->saddr;
477     	ufh.uh.source = sk->sport;
478     
479     	ipc.opt = NULL;
480     	ipc.oif = sk->bound_dev_if;
481     	if (msg->msg_controllen) {
482     		err = ip_cmsg_send(msg, &ipc);
483     		if (err)
484     			return err;
485     		if (ipc.opt)
486     			free = 1;
487     		connected = 0;
488     	}
489     	if (!ipc.opt)
490     		ipc.opt = sk->protinfo.af_inet.opt;
491     
492     	ufh.saddr = ipc.addr;
493     	ipc.addr = daddr = ufh.daddr;
494     
495     	if (ipc.opt && ipc.opt->srr) {
496     		if (!daddr)
497     			return -EINVAL;
498     		daddr = ipc.opt->faddr;
499     		connected = 0;
500     	}
501     	tos = RT_TOS(sk->protinfo.af_inet.tos);
502     	if (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) || 
503     	    (ipc.opt && ipc.opt->is_strictroute)) {
504     		tos |= RTO_ONLINK;
505     		connected = 0;
506     	}
507     
508     	if (MULTICAST(daddr)) {
509     		if (!ipc.oif)
510     			ipc.oif = sk->protinfo.af_inet.mc_index;
511     		if (!ufh.saddr)
512     			ufh.saddr = sk->protinfo.af_inet.mc_addr;
513     		connected = 0;
514     	}
515     
516     	if (connected)
517     		rt = (struct rtable*)sk_dst_check(sk, 0);
518     
519     	if (rt == NULL) {
520     		err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
521     		if (err)
522     			goto out;
523     
524     		err = -EACCES;
525     		if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast) 
526     			goto out;
527     		if (connected)
528     			sk_dst_set(sk, dst_clone(&rt->u.dst));
529     	}
530     
531     	if (msg->msg_flags&MSG_CONFIRM)
532     		goto do_confirm;
533     back_from_confirm:
534     
535     	ufh.saddr = rt->rt_src;
536     	if (!ipc.addr)
537     		ufh.daddr = ipc.addr = rt->rt_dst;
538     	ufh.uh.len = htons(ulen);
539     	ufh.uh.check = 0;
540     	ufh.iov = msg->msg_iov;
541     	ufh.wcheck = 0;
542     
543     	/* RFC1122: OK.  Provides the checksumming facility (MUST) as per */
544     	/* 4.1.3.4. It's configurable by the application via setsockopt() */
545     	/* (MAY) and it defaults to on (MUST). */
546     
547     	err = ip_build_xmit(sk,
548     			    (sk->no_check == UDP_CSUM_NOXMIT ?
549     			     udp_getfrag_nosum :
550     			     udp_getfrag),
551     			    &ufh, ulen, &ipc, rt, msg->msg_flags);
552     
553     out:
554     	ip_rt_put(rt);
555     	if (free)
556     		kfree(ipc.opt);
557     	if (!err) {
558     		UDP_INC_STATS_USER(UdpOutDatagrams);
559     		return len;
560     	}
561     	return err;
562     
563     do_confirm:
564     	dst_confirm(&rt->u.dst);
565     	if (!(msg->msg_flags&MSG_PROBE) || len)
566     		goto back_from_confirm;
567     	err = 0;
568     	goto out;
569     }
570     
571     /*
572      *	IOCTL requests applicable to the UDP protocol
573      */
574      
575     int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
576     {
577     	switch(cmd) 
578     	{
579     		case SIOCOUTQ:
580     		{
581     			int amount = atomic_read(&sk->wmem_alloc);
582     			return put_user(amount, (int *)arg);
583     		}
584     
585     		case SIOCINQ:
586     		{
587     			struct sk_buff *skb;
588     			unsigned long amount;
589     
590     			amount = 0;
591     			spin_lock_irq(&sk->receive_queue.lock);
592     			skb = skb_peek(&sk->receive_queue);
593     			if (skb != NULL) {
594     				/*
595     				 * We will only return the amount
596     				 * of this packet since that is all
597     				 * that will be read.
598     				 */
599     				amount = skb->len - sizeof(struct udphdr);
600     			}
601     			spin_unlock_irq(&sk->receive_queue.lock);
602     			return put_user(amount, (int *)arg);
603     		}
604     
605     		default:
606     			return -ENOIOCTLCMD;
607     	}
608     	return(0);
609     }
610     
611     static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
612     {
613     	return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
614     }
615     
616     static __inline__ int udp_checksum_complete(struct sk_buff *skb)
617     {
618     	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
619     		__udp_checksum_complete(skb);
620     }
621     
622     /*
623      * 	This should be easy, if there is something there we
624      * 	return it, otherwise we block.
625      */
626     
627     int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
628     		int noblock, int flags, int *addr_len)
629     {
630       	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
631       	struct sk_buff *skb;
632       	int copied, err;
633     
634     	/*
635     	 *	Check any passed addresses
636     	 */
637     	if (addr_len)
638     		*addr_len=sizeof(*sin);
639     
640     	if (flags & MSG_ERRQUEUE)
641     		return ip_recv_error(sk, msg, len);
642     
643     	skb = skb_recv_datagram(sk, flags, noblock, &err);
644     	if (!skb)
645     		goto out;
646       
647       	copied = skb->len - sizeof(struct udphdr);
648     	if (copied > len) {
649     		copied = len;
650     		msg->msg_flags |= MSG_TRUNC;
651     	}
652     
653     	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
654     		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
655     					      copied);
656     	} else if (msg->msg_flags&MSG_TRUNC) {
657     		if (__udp_checksum_complete(skb))
658     			goto csum_copy_err;
659     		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
660     					      copied);
661     	} else {
662     		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
663     
664     		if (err == -EINVAL)
665     			goto csum_copy_err;
666     	}
667     
668     	if (err)
669     		goto out_free;
670     
671     	sock_recv_timestamp(msg, sk, skb);
672     
673     	/* Copy the address. */
674     	if (sin)
675     	{
676     		sin->sin_family = AF_INET;
677     		sin->sin_port = skb->h.uh->source;
678     		sin->sin_addr.s_addr = skb->nh.iph->saddr;
679     		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
680       	}
681     	if (sk->protinfo.af_inet.cmsg_flags)
682     		ip_cmsg_recv(msg, skb);
683     	err = copied;
684       
685     out_free:
686       	skb_free_datagram(sk, skb);
687     out:
688       	return err;
689     
690     csum_copy_err:
691     	UDP_INC_STATS_BH(UdpInErrors);
692     
693     	/* Clear queue. */
694     	if (flags&MSG_PEEK) {
695     		int clear = 0;
696     		spin_lock_irq(&sk->receive_queue.lock);
697     		if (skb == skb_peek(&sk->receive_queue)) {
698     			__skb_unlink(skb, &sk->receive_queue);
699     			clear = 1;
700     		}
701     		spin_unlock_irq(&sk->receive_queue.lock);
702     		if (clear)
703     			kfree_skb(skb);
704     	}
705     
706     	skb_free_datagram(sk, skb);
707     
708     	return -EAGAIN;	
709     }
710     
711     int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
712     {
713     	struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
714     	struct rtable *rt;
715     	int err;
716     
717     	
718     	if (addr_len < sizeof(*usin)) 
719     	  	return -EINVAL;
720     
721     	if (usin->sin_family != AF_INET) 
722     	  	return -EAFNOSUPPORT;
723     
724     	sk_dst_reset(sk);
725     
726     	err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
727     			       sk->protinfo.af_inet.tos|sk->localroute, sk->bound_dev_if);
728     	if (err)
729     		return err;
730     	if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
731     		ip_rt_put(rt);
732     		return -EACCES;
733     	}
734       	if(!sk->saddr)
735     	  	sk->saddr = rt->rt_src;		/* Update source address */
736     	if(!sk->rcv_saddr)
737     		sk->rcv_saddr = rt->rt_src;
738     	sk->daddr = rt->rt_dst;
739     	sk->dport = usin->sin_port;
740     	sk->state = TCP_ESTABLISHED;
741     	sk->protinfo.af_inet.id = jiffies;
742     
743     	sk_dst_set(sk, &rt->u.dst);
744     	return(0);
745     }
746     
747     int udp_disconnect(struct sock *sk, int flags)
748     {
749     	/*
750     	 *	1003.1g - break association.
751     	 */
752     	 
753     	sk->state = TCP_CLOSE;
754     	sk->daddr = 0;
755     	sk->dport = 0;
756     	sk->bound_dev_if = 0;
757     	if (!(sk->userlocks&SOCK_BINDADDR_LOCK)) {
758     		sk->rcv_saddr = 0;
759     		sk->saddr = 0;
760     #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
761     		memset(&sk->net_pinfo.af_inet6.saddr, 0, 16);
762     		memset(&sk->net_pinfo.af_inet6.rcv_saddr, 0, 16);
763     #endif
764     	}
765     	if (!(sk->userlocks&SOCK_BINDPORT_LOCK)) {
766     		sk->prot->unhash(sk);
767     		sk->sport = 0;
768     	}
769     	sk_dst_reset(sk);
770     	return 0;
771     }
772     
773     static void udp_close(struct sock *sk, long timeout)
774     {
775     	inet_sock_release(sk);
776     }
777     
778     static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
779     {
780     	/*
781     	 *	Charge it to the socket, dropping if the queue is full.
782     	 */
783     
784     #if defined(CONFIG_FILTER)
785     	if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
786     		if (__udp_checksum_complete(skb)) {
787     			UDP_INC_STATS_BH(UdpInErrors);
788     			IP_INC_STATS_BH(IpInDiscards);
789     			ip_statistics[smp_processor_id()*2].IpInDelivers--;
790     			kfree_skb(skb);
791     			return -1;
792     		}
793     		skb->ip_summed = CHECKSUM_UNNECESSARY;
794     	}
795     #endif
796     
797     	if (sock_queue_rcv_skb(sk,skb)<0) {
798     		UDP_INC_STATS_BH(UdpInErrors);
799     		IP_INC_STATS_BH(IpInDiscards);
800     		ip_statistics[smp_processor_id()*2].IpInDelivers--;
801     		kfree_skb(skb);
802     		return -1;
803     	}
804     	UDP_INC_STATS_BH(UdpInDatagrams);
805     	return 0;
806     }
807     
808     /*
809      *	Multicasts and broadcasts go to each listener.
810      *
811      *	Note: called only from the BH handler context,
812      *	so we don't need to lock the hashes.
813      */
814     static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
815     				 u32 saddr, u32 daddr)
816     {
817     	struct sock *sk;
818     	int dif;
819     
820     	read_lock(&udp_hash_lock);
821     	sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
822     	dif = skb->dev->ifindex;
823     	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
824     	if (sk) {
825     		struct sock *sknext = NULL;
826     
827     		do {
828     			struct sk_buff *skb1 = skb;
829     
830     			sknext = udp_v4_mcast_next(sk->next, uh->dest, daddr,
831     						   uh->source, saddr, dif);
832     			if(sknext)
833     				skb1 = skb_clone(skb, GFP_ATOMIC);
834     
835     			if(skb1)
836     				udp_queue_rcv_skb(sk, skb1);
837     			sk = sknext;
838     		} while(sknext);
839     	} else
840     		kfree_skb(skb);
841     	read_unlock(&udp_hash_lock);
842     	return 0;
843     }
844     
845     /* Initialize UDP checksum. If exited with zero value (success),
846      * CHECKSUM_UNNECESSARY means, that no more checks are required.
847      * Otherwise, csum completion requires chacksumming packet body,
848      * including udp header and folding it to skb->csum.
849      */
850     static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
851     			     unsigned short ulen, u32 saddr, u32 daddr)
852     {
853     	if (uh->check == 0) {
854     		skb->ip_summed = CHECKSUM_UNNECESSARY;
855     	} else if (skb->ip_summed == CHECKSUM_HW) {
856     		skb->ip_summed = CHECKSUM_UNNECESSARY;
857     		if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
858     			return 0;
859     		NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v4 hw csum failure.\n"));
860     		skb->ip_summed = CHECKSUM_NONE;
861     	}
862     	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
863     		skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
864     	/* Probably, we should checksum udp header (it should be in cache
865     	 * in any case) and data in tiny packets (< rx copybreak).
866     	 */
867     	return 0;
868     }
869     
870     /*
871      *	All we need to do is get the socket, and then do a checksum. 
872      */
873      
874     int udp_rcv(struct sk_buff *skb)
875     {
876       	struct sock *sk;
877       	struct udphdr *uh;
878     	unsigned short ulen;
879     	struct rtable *rt = (struct rtable*)skb->dst;
880     	u32 saddr = skb->nh.iph->saddr;
881     	u32 daddr = skb->nh.iph->daddr;
882     	int len = skb->len;
883     
884       	IP_INC_STATS_BH(IpInDelivers);
885     
886     	/*
887     	 *	Validate the packet and the UDP length.
888     	 */
889     	ulen = ntohs(skb->h.uh->len);
890     
891     	if (ulen > len || ulen < sizeof(*uh))
892     		goto short_packet;
893     
894     	if (pskb_trim(skb, ulen))
895     		goto short_packet;
896     
897       	uh = skb->h.uh;
898     
899     	if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
900     		goto csum_error;
901     
902     	if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
903     		return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
904     
905     	sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
906     
907     	if (sk != NULL) {
908     		udp_queue_rcv_skb(sk, skb);
909     		sock_put(sk);
910     		return 0;
911     	}
912     
913     	/* No socket. Drop packet silently, if checksum is wrong */
914     	if (udp_checksum_complete(skb))
915     		goto csum_error;
916     
917     	UDP_INC_STATS_BH(UdpNoPorts);
918     	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
919     
920     	/*
921     	 * Hmm.  We got an UDP packet to a port to which we
922     	 * don't wanna listen.  Ignore it.
923     	 */
924     	kfree_skb(skb);
925     	return(0);
926     
927     short_packet:
928     	NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len));
929     	UDP_INC_STATS_BH(UdpInErrors);
930     	kfree_skb(skb);
931     	return(0);
932     
933     csum_error:
934     	/* 
935     	 * RFC1122: OK.  Discards the bad packet silently (as far as 
936     	 * the network is concerned, anyway) as per 4.1.3.4 (MUST). 
937     	 */
938     	NETDEBUG(if (net_ratelimit())
939     		 printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
940     			NIPQUAD(saddr),
941     			ntohs(uh->source),
942     			NIPQUAD(daddr),
943     			ntohs(uh->dest),
944     			ulen));
945     	UDP_INC_STATS_BH(UdpInErrors);
946     	kfree_skb(skb);
947     	return(0);
948     }
949     
950     static void get_udp_sock(struct sock *sp, char *tmpbuf, int i)
951     {
952     	unsigned int dest, src;
953     	__u16 destp, srcp;
954     
955     	dest  = sp->daddr;
956     	src   = sp->rcv_saddr;
957     	destp = ntohs(sp->dport);
958     	srcp  = ntohs(sp->sport);
959     	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
960     		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p",
961     		i, src, srcp, dest, destp, sp->state, 
962     		atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
963     		0, 0L, 0,
964     		sock_i_uid(sp), 0,
965     		sock_i_ino(sp),
966     		atomic_read(&sp->refcnt), sp);
967     }
968     
969     int udp_get_info(char *buffer, char **start, off_t offset, int length)
970     {
971     	int len = 0, num = 0, i;
972     	off_t pos = 0;
973     	off_t begin;
974     	char tmpbuf[129];
975     
976     	if (offset < 128) 
977     		len += sprintf(buffer, "%-127s\n",
978     			       "  sl  local_address rem_address   st tx_queue "
979     			       "rx_queue tr tm->when retrnsmt   uid  timeout inode");
980     	pos = 128;
981     	read_lock(&udp_hash_lock);
982     	for (i = 0; i < UDP_HTABLE_SIZE; i++) {
983     		struct sock *sk;
984     
985     		for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
986     			if (sk->family != PF_INET)
987     				continue;
988     			pos += 128;
989     			if (pos <= offset)
990     				continue;
991     			get_udp_sock(sk, tmpbuf, i);
992     			len += sprintf(buffer+len, "%-127s\n", tmpbuf);
993     			if(len >= length)
994     				goto out;
995     		}
996     	}
997     out:
998     	read_unlock(&udp_hash_lock);
999     	begin = len - (pos - offset);
1000     	*start = buffer + begin;
1001     	len -= begin;
1002     	if(len > length)
1003     		len = length;
1004     	if (len < 0)
1005     		len = 0; 
1006     	return len;
1007     }
1008     
1009     struct proto udp_prot = {
1010      	name:		"UDP",
1011     	close:		udp_close,
1012     	connect:	udp_connect,
1013     	disconnect:	udp_disconnect,
1014     	ioctl:		udp_ioctl,
1015     	setsockopt:	ip_setsockopt,
1016     	getsockopt:	ip_getsockopt,
1017     	sendmsg:	udp_sendmsg,
1018     	recvmsg:	udp_recvmsg,
1019     	backlog_rcv:	udp_queue_rcv_skb,
1020     	hash:		udp_v4_hash,
1021     	unhash:		udp_v4_unhash,
1022     	get_port:	udp_v4_get_port,
1023     };
1024