File: /usr/src/linux/net/ipv4/udp.c
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * The User Datagram Protocol (UDP).
7 *
8 * Version: $Id: udp.c,v 1.99 2001/09/01 00:31:50 davem Exp $
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13 * Alan Cox, <Alan.Cox@linux.org>
14 *
15 * Fixes:
16 * Alan Cox : verify_area() calls
17 * Alan Cox : stopped close while in use off icmp
18 * messages. Not a fix but a botch that
19 * for udp at least is 'valid'.
20 * Alan Cox : Fixed icmp handling properly
21 * Alan Cox : Correct error for oversized datagrams
22 * Alan Cox : Tidied select() semantics.
23 * Alan Cox : udp_err() fixed properly, also now
24 * select and read wake correctly on errors
25 * Alan Cox : udp_send verify_area moved to avoid mem leak
26 * Alan Cox : UDP can count its memory
27 * Alan Cox : send to an unknown connection causes
28 * an ECONNREFUSED off the icmp, but
29 * does NOT close.
30 * Alan Cox : Switched to new sk_buff handlers. No more backlog!
31 * Alan Cox : Using generic datagram code. Even smaller and the PEEK
32 * bug no longer crashes it.
33 * Fred Van Kempen : Net2e support for sk->broadcast.
34 * Alan Cox : Uses skb_free_datagram
35 * Alan Cox : Added get/set sockopt support.
36 * Alan Cox : Broadcasting without option set returns EACCES.
37 * Alan Cox : No wakeup calls. Instead we now use the callbacks.
38 * Alan Cox : Use ip_tos and ip_ttl
39 * Alan Cox : SNMP Mibs
40 * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
41 * Matt Dillon : UDP length checks.
42 * Alan Cox : Smarter af_inet used properly.
43 * Alan Cox : Use new kernel side addressing.
44 * Alan Cox : Incorrect return on truncated datagram receive.
45 * Arnt Gulbrandsen : New udp_send and stuff
46 * Alan Cox : Cache last socket
47 * Alan Cox : Route cache
48 * Jon Peatfield : Minor efficiency fix to sendto().
49 * Mike Shaver : RFC1122 checks.
50 * Alan Cox : Nonblocking error fix.
51 * Willy Konynenberg : Transparent proxying support.
52 * Mike McLagan : Routing by source
53 * David S. Miller : New socket lookup architecture.
54 * Last socket cache retained as it
55 * does have a high hit rate.
56 * Olaf Kirch : Don't linearise iovec on sendmsg.
57 * Andi Kleen : Some cleanups, cache destination entry
58 * for connect.
59 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
60 * Melvin Smith : Check msg_name not msg_namelen in sendto(),
61 * return ENOTCONN for unconnected sockets (POSIX)
62 * Janos Farkas : don't deliver multi/broadcasts to a different
63 * bound-to-device socket
64 *
65 *
66 * This program is free software; you can redistribute it and/or
67 * modify it under the terms of the GNU General Public License
68 * as published by the Free Software Foundation; either version
69 * 2 of the License, or (at your option) any later version.
70 */
71
72 #include <asm/system.h>
73 #include <asm/uaccess.h>
74 #include <asm/ioctls.h>
75 #include <linux/types.h>
76 #include <linux/fcntl.h>
77 #include <linux/socket.h>
78 #include <linux/sockios.h>
79 #include <linux/in.h>
80 #include <linux/errno.h>
81 #include <linux/timer.h>
82 #include <linux/mm.h>
83 #include <linux/config.h>
84 #include <linux/inet.h>
85 #include <linux/netdevice.h>
86 #include <net/snmp.h>
87 #include <net/ip.h>
88 #include <net/protocol.h>
89 #include <linux/skbuff.h>
90 #include <net/sock.h>
91 #include <net/udp.h>
92 #include <net/icmp.h>
93 #include <net/route.h>
94 #include <net/inet_common.h>
95 #include <net/checksum.h>
96
97 /*
98 * Snmp MIB for the UDP layer
99 */
100
101 struct udp_mib udp_statistics[NR_CPUS*2];
102
103 struct sock *udp_hash[UDP_HTABLE_SIZE];
104 rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED;
105
106 /* Shared by v4/v6 udp. */
107 int udp_port_rover;
108
109 static int udp_v4_get_port(struct sock *sk, unsigned short snum)
110 {
111 write_lock_bh(&udp_hash_lock);
112 if (snum == 0) {
113 int best_size_so_far, best, result, i;
114
115 if (udp_port_rover > sysctl_local_port_range[1] ||
116 udp_port_rover < sysctl_local_port_range[0])
117 udp_port_rover = sysctl_local_port_range[0];
118 best_size_so_far = 32767;
119 best = result = udp_port_rover;
120 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
121 struct sock *sk;
122 int size;
123
124 sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
125 if (!sk) {
126 if (result > sysctl_local_port_range[1])
127 result = sysctl_local_port_range[0] +
128 ((result - sysctl_local_port_range[0]) &
129 (UDP_HTABLE_SIZE - 1));
130 goto gotit;
131 }
132 size = 0;
133 do {
134 if (++size >= best_size_so_far)
135 goto next;
136 } while ((sk = sk->next) != NULL);
137 best_size_so_far = size;
138 best = result;
139 next:;
140 }
141 result = best;
142 for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
143 if (result > sysctl_local_port_range[1])
144 result = sysctl_local_port_range[0]
145 + ((result - sysctl_local_port_range[0]) &
146 (UDP_HTABLE_SIZE - 1));
147 if (!udp_lport_inuse(result))
148 break;
149 }
150 if (i >= (1 << 16) / UDP_HTABLE_SIZE)
151 goto fail;
152 gotit:
153 udp_port_rover = snum = result;
154 } else {
155 struct sock *sk2;
156
157 for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
158 sk2 != NULL;
159 sk2 = sk2->next) {
160 if (sk2->num == snum &&
161 sk2 != sk &&
162 sk2->bound_dev_if == sk->bound_dev_if &&
163 (!sk2->rcv_saddr ||
164 !sk->rcv_saddr ||
165 sk2->rcv_saddr == sk->rcv_saddr) &&
166 (!sk2->reuse || !sk->reuse))
167 goto fail;
168 }
169 }
170 sk->num = snum;
171 if (sk->pprev == NULL) {
172 struct sock **skp = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
173 if ((sk->next = *skp) != NULL)
174 (*skp)->pprev = &sk->next;
175 *skp = sk;
176 sk->pprev = skp;
177 sock_prot_inc_use(sk->prot);
178 sock_hold(sk);
179 }
180 write_unlock_bh(&udp_hash_lock);
181 return 0;
182
183 fail:
184 write_unlock_bh(&udp_hash_lock);
185 return 1;
186 }
187
188 static void udp_v4_hash(struct sock *sk)
189 {
190 BUG();
191 }
192
193 static void udp_v4_unhash(struct sock *sk)
194 {
195 write_lock_bh(&udp_hash_lock);
196 if (sk->pprev) {
197 if (sk->next)
198 sk->next->pprev = sk->pprev;
199 *sk->pprev = sk->next;
200 sk->pprev = NULL;
201 sk->num = 0;
202 sock_prot_dec_use(sk->prot);
203 __sock_put(sk);
204 }
205 write_unlock_bh(&udp_hash_lock);
206 }
207
208 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
209 * harder than this. -DaveM
210 */
211 struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
212 {
213 struct sock *sk, *result = NULL;
214 unsigned short hnum = ntohs(dport);
215 int badness = -1;
216
217 for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
218 if(sk->num == hnum) {
219 int score = 0;
220 if(sk->rcv_saddr) {
221 if(sk->rcv_saddr != daddr)
222 continue;
223 score++;
224 }
225 if(sk->daddr) {
226 if(sk->daddr != saddr)
227 continue;
228 score++;
229 }
230 if(sk->dport) {
231 if(sk->dport != sport)
232 continue;
233 score++;
234 }
235 if(sk->bound_dev_if) {
236 if(sk->bound_dev_if != dif)
237 continue;
238 score++;
239 }
240 if(score == 4) {
241 result = sk;
242 break;
243 } else if(score > badness) {
244 result = sk;
245 badness = score;
246 }
247 }
248 }
249 return result;
250 }
251
252 __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
253 {
254 struct sock *sk;
255
256 read_lock(&udp_hash_lock);
257 sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
258 if (sk)
259 sock_hold(sk);
260 read_unlock(&udp_hash_lock);
261 return sk;
262 }
263
264 static inline struct sock *udp_v4_mcast_next(struct sock *sk,
265 u16 loc_port, u32 loc_addr,
266 u16 rmt_port, u32 rmt_addr,
267 int dif)
268 {
269 struct sock *s = sk;
270 unsigned short hnum = ntohs(loc_port);
271 for(; s; s = s->next) {
272 if ((s->num != hnum) ||
273 (s->daddr && s->daddr!=rmt_addr) ||
274 (s->dport != rmt_port && s->dport != 0) ||
275 (s->rcv_saddr && s->rcv_saddr != loc_addr) ||
276 (s->bound_dev_if && s->bound_dev_if != dif))
277 continue;
278 break;
279 }
280 return s;
281 }
282
283 /*
284 * This routine is called by the ICMP module when it gets some
285 * sort of error condition. If err < 0 then the socket should
286 * be closed and the error returned to the user. If err > 0
287 * it's just the icmp type << 8 | icmp code.
288 * Header points to the ip header of the error packet. We move
289 * on past this. Then (as it used to claim before adjustment)
290 * header points to the first 8 bytes of the udp header. We need
291 * to find the appropriate port.
292 */
293
294 void udp_err(struct sk_buff *skb, u32 info)
295 {
296 struct iphdr *iph = (struct iphdr*)skb->data;
297 struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
298 int type = skb->h.icmph->type;
299 int code = skb->h.icmph->code;
300 struct sock *sk;
301 int harderr;
302 int err;
303
304 sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
305 if (sk == NULL) {
306 ICMP_INC_STATS_BH(IcmpInErrors);
307 return; /* No socket for error */
308 }
309
310 err = 0;
311 harderr = 0;
312
313 switch (type) {
314 default:
315 case ICMP_TIME_EXCEEDED:
316 err = EHOSTUNREACH;
317 break;
318 case ICMP_SOURCE_QUENCH:
319 goto out;
320 case ICMP_PARAMETERPROB:
321 err = EPROTO;
322 harderr = 1;
323 break;
324 case ICMP_DEST_UNREACH:
325 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
326 if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT) {
327 err = EMSGSIZE;
328 harderr = 1;
329 break;
330 }
331 goto out;
332 }
333 err = EHOSTUNREACH;
334 if (code <= NR_ICMP_UNREACH) {
335 harderr = icmp_err_convert[code].fatal;
336 err = icmp_err_convert[code].errno;
337 }
338 break;
339 }
340
341 /*
342 * RFC1122: OK. Passes ICMP errors back to application, as per
343 * 4.1.3.3.
344 */
345 if (!sk->protinfo.af_inet.recverr) {
346 if (!harderr || sk->state != TCP_ESTABLISHED)
347 goto out;
348 } else {
349 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
350 }
351 sk->err = err;
352 sk->error_report(sk);
353 out:
354 sock_put(sk);
355 }
356
357
358 static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
359 {
360 return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
361 }
362
363 struct udpfakehdr
364 {
365 struct udphdr uh;
366 u32 saddr;
367 u32 daddr;
368 struct iovec *iov;
369 u32 wcheck;
370 };
371
372 /*
373 * Copy and checksum a UDP packet from user space into a buffer.
374 */
375
376 static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen)
377 {
378 struct udpfakehdr *ufh = (struct udpfakehdr *)p;
379 if (offset==0) {
380 if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
381 fraglen-sizeof(struct udphdr), &ufh->wcheck))
382 return -EFAULT;
383 ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
384 ufh->wcheck);
385 ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr,
386 ntohs(ufh->uh.len),
387 IPPROTO_UDP, ufh->wcheck);
388 if (ufh->uh.check == 0)
389 ufh->uh.check = -1;
390 memcpy(to, ufh, sizeof(struct udphdr));
391 return 0;
392 }
393 if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
394 fraglen, &ufh->wcheck))
395 return -EFAULT;
396 return 0;
397 }
398
399 /*
400 * Copy a UDP packet from user space into a buffer without checksumming.
401 */
402
403 static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen)
404 {
405 struct udpfakehdr *ufh = (struct udpfakehdr *)p;
406
407 if (offset==0) {
408 memcpy(to, ufh, sizeof(struct udphdr));
409 return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
410 fraglen-sizeof(struct udphdr));
411 }
412 return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
413 fraglen);
414 }
415
416 int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
417 {
418 int ulen = len + sizeof(struct udphdr);
419 struct ipcm_cookie ipc;
420 struct udpfakehdr ufh;
421 struct rtable *rt = NULL;
422 int free = 0;
423 int connected = 0;
424 u32 daddr;
425 u8 tos;
426 int err;
427
428 /* This check is ONLY to check for arithmetic overflow
429 on integer(!) len. Not more! Real check will be made
430 in ip_build_xmit --ANK
431
432 BTW socket.c -> af_*.c -> ... make multiple
433 invalid conversions size_t -> int. We MUST repair it f.e.
434 by replacing all of them with size_t and revise all
435 the places sort of len += sizeof(struct iphdr)
436 If len was ULONG_MAX-10 it would be cathastrophe --ANK
437 */
438
439 if (len < 0 || len > 0xFFFF)
440 return -EMSGSIZE;
441
442 /*
443 * Check the flags.
444 */
445
446 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
447 return -EOPNOTSUPP;
448
449 /*
450 * Get and verify the address.
451 */
452
453 if (msg->msg_name) {
454 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
455 if (msg->msg_namelen < sizeof(*usin))
456 return -EINVAL;
457 if (usin->sin_family != AF_INET) {
458 if (usin->sin_family != AF_UNSPEC)
459 return -EINVAL;
460 }
461
462 ufh.daddr = usin->sin_addr.s_addr;
463 ufh.uh.dest = usin->sin_port;
464 if (ufh.uh.dest == 0)
465 return -EINVAL;
466 } else {
467 if (sk->state != TCP_ESTABLISHED)
468 return -ENOTCONN;
469 ufh.daddr = sk->daddr;
470 ufh.uh.dest = sk->dport;
471 /* Open fast path for connected socket.
472 Route will not be used, if at least one option is set.
473 */
474 connected = 1;
475 }
476 ipc.addr = sk->saddr;
477 ufh.uh.source = sk->sport;
478
479 ipc.opt = NULL;
480 ipc.oif = sk->bound_dev_if;
481 if (msg->msg_controllen) {
482 err = ip_cmsg_send(msg, &ipc);
483 if (err)
484 return err;
485 if (ipc.opt)
486 free = 1;
487 connected = 0;
488 }
489 if (!ipc.opt)
490 ipc.opt = sk->protinfo.af_inet.opt;
491
492 ufh.saddr = ipc.addr;
493 ipc.addr = daddr = ufh.daddr;
494
495 if (ipc.opt && ipc.opt->srr) {
496 if (!daddr)
497 return -EINVAL;
498 daddr = ipc.opt->faddr;
499 connected = 0;
500 }
501 tos = RT_TOS(sk->protinfo.af_inet.tos);
502 if (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) ||
503 (ipc.opt && ipc.opt->is_strictroute)) {
504 tos |= RTO_ONLINK;
505 connected = 0;
506 }
507
508 if (MULTICAST(daddr)) {
509 if (!ipc.oif)
510 ipc.oif = sk->protinfo.af_inet.mc_index;
511 if (!ufh.saddr)
512 ufh.saddr = sk->protinfo.af_inet.mc_addr;
513 connected = 0;
514 }
515
516 if (connected)
517 rt = (struct rtable*)sk_dst_check(sk, 0);
518
519 if (rt == NULL) {
520 err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
521 if (err)
522 goto out;
523
524 err = -EACCES;
525 if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast)
526 goto out;
527 if (connected)
528 sk_dst_set(sk, dst_clone(&rt->u.dst));
529 }
530
531 if (msg->msg_flags&MSG_CONFIRM)
532 goto do_confirm;
533 back_from_confirm:
534
535 ufh.saddr = rt->rt_src;
536 if (!ipc.addr)
537 ufh.daddr = ipc.addr = rt->rt_dst;
538 ufh.uh.len = htons(ulen);
539 ufh.uh.check = 0;
540 ufh.iov = msg->msg_iov;
541 ufh.wcheck = 0;
542
543 /* RFC1122: OK. Provides the checksumming facility (MUST) as per */
544 /* 4.1.3.4. It's configurable by the application via setsockopt() */
545 /* (MAY) and it defaults to on (MUST). */
546
547 err = ip_build_xmit(sk,
548 (sk->no_check == UDP_CSUM_NOXMIT ?
549 udp_getfrag_nosum :
550 udp_getfrag),
551 &ufh, ulen, &ipc, rt, msg->msg_flags);
552
553 out:
554 ip_rt_put(rt);
555 if (free)
556 kfree(ipc.opt);
557 if (!err) {
558 UDP_INC_STATS_USER(UdpOutDatagrams);
559 return len;
560 }
561 return err;
562
563 do_confirm:
564 dst_confirm(&rt->u.dst);
565 if (!(msg->msg_flags&MSG_PROBE) || len)
566 goto back_from_confirm;
567 err = 0;
568 goto out;
569 }
570
571 /*
572 * IOCTL requests applicable to the UDP protocol
573 */
574
575 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
576 {
577 switch(cmd)
578 {
579 case SIOCOUTQ:
580 {
581 int amount = atomic_read(&sk->wmem_alloc);
582 return put_user(amount, (int *)arg);
583 }
584
585 case SIOCINQ:
586 {
587 struct sk_buff *skb;
588 unsigned long amount;
589
590 amount = 0;
591 spin_lock_irq(&sk->receive_queue.lock);
592 skb = skb_peek(&sk->receive_queue);
593 if (skb != NULL) {
594 /*
595 * We will only return the amount
596 * of this packet since that is all
597 * that will be read.
598 */
599 amount = skb->len - sizeof(struct udphdr);
600 }
601 spin_unlock_irq(&sk->receive_queue.lock);
602 return put_user(amount, (int *)arg);
603 }
604
605 default:
606 return -ENOIOCTLCMD;
607 }
608 return(0);
609 }
610
611 static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
612 {
613 return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
614 }
615
616 static __inline__ int udp_checksum_complete(struct sk_buff *skb)
617 {
618 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
619 __udp_checksum_complete(skb);
620 }
621
622 /*
623 * This should be easy, if there is something there we
624 * return it, otherwise we block.
625 */
626
627 int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
628 int noblock, int flags, int *addr_len)
629 {
630 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
631 struct sk_buff *skb;
632 int copied, err;
633
634 /*
635 * Check any passed addresses
636 */
637 if (addr_len)
638 *addr_len=sizeof(*sin);
639
640 if (flags & MSG_ERRQUEUE)
641 return ip_recv_error(sk, msg, len);
642
643 skb = skb_recv_datagram(sk, flags, noblock, &err);
644 if (!skb)
645 goto out;
646
647 copied = skb->len - sizeof(struct udphdr);
648 if (copied > len) {
649 copied = len;
650 msg->msg_flags |= MSG_TRUNC;
651 }
652
653 if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
654 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
655 copied);
656 } else if (msg->msg_flags&MSG_TRUNC) {
657 if (__udp_checksum_complete(skb))
658 goto csum_copy_err;
659 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
660 copied);
661 } else {
662 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
663
664 if (err == -EINVAL)
665 goto csum_copy_err;
666 }
667
668 if (err)
669 goto out_free;
670
671 sock_recv_timestamp(msg, sk, skb);
672
673 /* Copy the address. */
674 if (sin)
675 {
676 sin->sin_family = AF_INET;
677 sin->sin_port = skb->h.uh->source;
678 sin->sin_addr.s_addr = skb->nh.iph->saddr;
679 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
680 }
681 if (sk->protinfo.af_inet.cmsg_flags)
682 ip_cmsg_recv(msg, skb);
683 err = copied;
684
685 out_free:
686 skb_free_datagram(sk, skb);
687 out:
688 return err;
689
690 csum_copy_err:
691 UDP_INC_STATS_BH(UdpInErrors);
692
693 /* Clear queue. */
694 if (flags&MSG_PEEK) {
695 int clear = 0;
696 spin_lock_irq(&sk->receive_queue.lock);
697 if (skb == skb_peek(&sk->receive_queue)) {
698 __skb_unlink(skb, &sk->receive_queue);
699 clear = 1;
700 }
701 spin_unlock_irq(&sk->receive_queue.lock);
702 if (clear)
703 kfree_skb(skb);
704 }
705
706 skb_free_datagram(sk, skb);
707
708 return -EAGAIN;
709 }
710
711 int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
712 {
713 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
714 struct rtable *rt;
715 int err;
716
717
718 if (addr_len < sizeof(*usin))
719 return -EINVAL;
720
721 if (usin->sin_family != AF_INET)
722 return -EAFNOSUPPORT;
723
724 sk_dst_reset(sk);
725
726 err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
727 sk->protinfo.af_inet.tos|sk->localroute, sk->bound_dev_if);
728 if (err)
729 return err;
730 if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
731 ip_rt_put(rt);
732 return -EACCES;
733 }
734 if(!sk->saddr)
735 sk->saddr = rt->rt_src; /* Update source address */
736 if(!sk->rcv_saddr)
737 sk->rcv_saddr = rt->rt_src;
738 sk->daddr = rt->rt_dst;
739 sk->dport = usin->sin_port;
740 sk->state = TCP_ESTABLISHED;
741 sk->protinfo.af_inet.id = jiffies;
742
743 sk_dst_set(sk, &rt->u.dst);
744 return(0);
745 }
746
747 int udp_disconnect(struct sock *sk, int flags)
748 {
749 /*
750 * 1003.1g - break association.
751 */
752
753 sk->state = TCP_CLOSE;
754 sk->daddr = 0;
755 sk->dport = 0;
756 sk->bound_dev_if = 0;
757 if (!(sk->userlocks&SOCK_BINDADDR_LOCK)) {
758 sk->rcv_saddr = 0;
759 sk->saddr = 0;
760 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
761 memset(&sk->net_pinfo.af_inet6.saddr, 0, 16);
762 memset(&sk->net_pinfo.af_inet6.rcv_saddr, 0, 16);
763 #endif
764 }
765 if (!(sk->userlocks&SOCK_BINDPORT_LOCK)) {
766 sk->prot->unhash(sk);
767 sk->sport = 0;
768 }
769 sk_dst_reset(sk);
770 return 0;
771 }
772
773 static void udp_close(struct sock *sk, long timeout)
774 {
775 inet_sock_release(sk);
776 }
777
778 static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
779 {
780 /*
781 * Charge it to the socket, dropping if the queue is full.
782 */
783
784 #if defined(CONFIG_FILTER)
785 if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
786 if (__udp_checksum_complete(skb)) {
787 UDP_INC_STATS_BH(UdpInErrors);
788 IP_INC_STATS_BH(IpInDiscards);
789 ip_statistics[smp_processor_id()*2].IpInDelivers--;
790 kfree_skb(skb);
791 return -1;
792 }
793 skb->ip_summed = CHECKSUM_UNNECESSARY;
794 }
795 #endif
796
797 if (sock_queue_rcv_skb(sk,skb)<0) {
798 UDP_INC_STATS_BH(UdpInErrors);
799 IP_INC_STATS_BH(IpInDiscards);
800 ip_statistics[smp_processor_id()*2].IpInDelivers--;
801 kfree_skb(skb);
802 return -1;
803 }
804 UDP_INC_STATS_BH(UdpInDatagrams);
805 return 0;
806 }
807
808 /*
809 * Multicasts and broadcasts go to each listener.
810 *
811 * Note: called only from the BH handler context,
812 * so we don't need to lock the hashes.
813 */
814 static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
815 u32 saddr, u32 daddr)
816 {
817 struct sock *sk;
818 int dif;
819
820 read_lock(&udp_hash_lock);
821 sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
822 dif = skb->dev->ifindex;
823 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
824 if (sk) {
825 struct sock *sknext = NULL;
826
827 do {
828 struct sk_buff *skb1 = skb;
829
830 sknext = udp_v4_mcast_next(sk->next, uh->dest, daddr,
831 uh->source, saddr, dif);
832 if(sknext)
833 skb1 = skb_clone(skb, GFP_ATOMIC);
834
835 if(skb1)
836 udp_queue_rcv_skb(sk, skb1);
837 sk = sknext;
838 } while(sknext);
839 } else
840 kfree_skb(skb);
841 read_unlock(&udp_hash_lock);
842 return 0;
843 }
844
845 /* Initialize UDP checksum. If exited with zero value (success),
846 * CHECKSUM_UNNECESSARY means, that no more checks are required.
847 * Otherwise, csum completion requires chacksumming packet body,
848 * including udp header and folding it to skb->csum.
849 */
850 static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
851 unsigned short ulen, u32 saddr, u32 daddr)
852 {
853 if (uh->check == 0) {
854 skb->ip_summed = CHECKSUM_UNNECESSARY;
855 } else if (skb->ip_summed == CHECKSUM_HW) {
856 skb->ip_summed = CHECKSUM_UNNECESSARY;
857 if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
858 return 0;
859 NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v4 hw csum failure.\n"));
860 skb->ip_summed = CHECKSUM_NONE;
861 }
862 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
863 skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
864 /* Probably, we should checksum udp header (it should be in cache
865 * in any case) and data in tiny packets (< rx copybreak).
866 */
867 return 0;
868 }
869
870 /*
871 * All we need to do is get the socket, and then do a checksum.
872 */
873
874 int udp_rcv(struct sk_buff *skb)
875 {
876 struct sock *sk;
877 struct udphdr *uh;
878 unsigned short ulen;
879 struct rtable *rt = (struct rtable*)skb->dst;
880 u32 saddr = skb->nh.iph->saddr;
881 u32 daddr = skb->nh.iph->daddr;
882 int len = skb->len;
883
884 IP_INC_STATS_BH(IpInDelivers);
885
886 /*
887 * Validate the packet and the UDP length.
888 */
889 ulen = ntohs(skb->h.uh->len);
890
891 if (ulen > len || ulen < sizeof(*uh))
892 goto short_packet;
893
894 if (pskb_trim(skb, ulen))
895 goto short_packet;
896
897 uh = skb->h.uh;
898
899 if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
900 goto csum_error;
901
902 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
903 return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
904
905 sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
906
907 if (sk != NULL) {
908 udp_queue_rcv_skb(sk, skb);
909 sock_put(sk);
910 return 0;
911 }
912
913 /* No socket. Drop packet silently, if checksum is wrong */
914 if (udp_checksum_complete(skb))
915 goto csum_error;
916
917 UDP_INC_STATS_BH(UdpNoPorts);
918 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
919
920 /*
921 * Hmm. We got an UDP packet to a port to which we
922 * don't wanna listen. Ignore it.
923 */
924 kfree_skb(skb);
925 return(0);
926
927 short_packet:
928 NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len));
929 UDP_INC_STATS_BH(UdpInErrors);
930 kfree_skb(skb);
931 return(0);
932
933 csum_error:
934 /*
935 * RFC1122: OK. Discards the bad packet silently (as far as
936 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
937 */
938 NETDEBUG(if (net_ratelimit())
939 printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
940 NIPQUAD(saddr),
941 ntohs(uh->source),
942 NIPQUAD(daddr),
943 ntohs(uh->dest),
944 ulen));
945 UDP_INC_STATS_BH(UdpInErrors);
946 kfree_skb(skb);
947 return(0);
948 }
949
950 static void get_udp_sock(struct sock *sp, char *tmpbuf, int i)
951 {
952 unsigned int dest, src;
953 __u16 destp, srcp;
954
955 dest = sp->daddr;
956 src = sp->rcv_saddr;
957 destp = ntohs(sp->dport);
958 srcp = ntohs(sp->sport);
959 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
960 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p",
961 i, src, srcp, dest, destp, sp->state,
962 atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
963 0, 0L, 0,
964 sock_i_uid(sp), 0,
965 sock_i_ino(sp),
966 atomic_read(&sp->refcnt), sp);
967 }
968
969 int udp_get_info(char *buffer, char **start, off_t offset, int length)
970 {
971 int len = 0, num = 0, i;
972 off_t pos = 0;
973 off_t begin;
974 char tmpbuf[129];
975
976 if (offset < 128)
977 len += sprintf(buffer, "%-127s\n",
978 " sl local_address rem_address st tx_queue "
979 "rx_queue tr tm->when retrnsmt uid timeout inode");
980 pos = 128;
981 read_lock(&udp_hash_lock);
982 for (i = 0; i < UDP_HTABLE_SIZE; i++) {
983 struct sock *sk;
984
985 for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
986 if (sk->family != PF_INET)
987 continue;
988 pos += 128;
989 if (pos <= offset)
990 continue;
991 get_udp_sock(sk, tmpbuf, i);
992 len += sprintf(buffer+len, "%-127s\n", tmpbuf);
993 if(len >= length)
994 goto out;
995 }
996 }
997 out:
998 read_unlock(&udp_hash_lock);
999 begin = len - (pos - offset);
1000 *start = buffer + begin;
1001 len -= begin;
1002 if(len > length)
1003 len = length;
1004 if (len < 0)
1005 len = 0;
1006 return len;
1007 }
1008
1009 struct proto udp_prot = {
1010 name: "UDP",
1011 close: udp_close,
1012 connect: udp_connect,
1013 disconnect: udp_disconnect,
1014 ioctl: udp_ioctl,
1015 setsockopt: ip_setsockopt,
1016 getsockopt: ip_getsockopt,
1017 sendmsg: udp_sendmsg,
1018 recvmsg: udp_recvmsg,
1019 backlog_rcv: udp_queue_rcv_skb,
1020 hash: udp_v4_hash,
1021 unhash: udp_v4_unhash,
1022 get_port: udp_v4_get_port,
1023 };
1024