File: /usr/src/linux/net/ipv4/ip_input.c

1     /*
2      * INET		An implementation of the TCP/IP protocol suite for the LINUX
3      *		operating system.  INET is implemented using the  BSD Socket
4      *		interface as the means of communication with the user level.
5      *
6      *		The Internet Protocol (IP) module.
7      *
8      * Version:	$Id: ip_input.c,v 1.53 2000/12/18 19:01:50 davem Exp $
9      *
10      * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
11      *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12      *		Donald Becker, <becker@super.org>
13      *		Alan Cox, <Alan.Cox@linux.org>
14      *		Richard Underwood
15      *		Stefan Becker, <stefanb@yello.ping.de>
16      *		Jorge Cwik, <jorge@laser.satlink.net>
17      *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18      *		
19      *
20      * Fixes:
21      *		Alan Cox	:	Commented a couple of minor bits of surplus code
22      *		Alan Cox	:	Undefining IP_FORWARD doesn't include the code
23      *					(just stops a compiler warning).
24      *		Alan Cox	:	Frames with >=MAX_ROUTE record routes, strict routes or loose routes
25      *					are junked rather than corrupting things.
26      *		Alan Cox	:	Frames to bad broadcast subnets are dumped
27      *					We used to process them non broadcast and
28      *					boy could that cause havoc.
29      *		Alan Cox	:	ip_forward sets the free flag on the
30      *					new frame it queues. Still crap because
31      *					it copies the frame but at least it
32      *					doesn't eat memory too.
33      *		Alan Cox	:	Generic queue code and memory fixes.
34      *		Fred Van Kempen :	IP fragment support (borrowed from NET2E)
35      *		Gerhard Koerting:	Forward fragmented frames correctly.
36      *		Gerhard Koerting: 	Fixes to my fix of the above 8-).
37      *		Gerhard Koerting:	IP interface addressing fix.
38      *		Linus Torvalds	:	More robustness checks
39      *		Alan Cox	:	Even more checks: Still not as robust as it ought to be
40      *		Alan Cox	:	Save IP header pointer for later
41      *		Alan Cox	:	ip option setting
42      *		Alan Cox	:	Use ip_tos/ip_ttl settings
43      *		Alan Cox	:	Fragmentation bogosity removed
44      *					(Thanks to Mark.Bush@prg.ox.ac.uk)
45      *		Dmitry Gorodchanin :	Send of a raw packet crash fix.
46      *		Alan Cox	:	Silly ip bug when an overlength
47      *					fragment turns up. Now frees the
48      *					queue.
49      *		Linus Torvalds/ :	Memory leakage on fragmentation
50      *		Alan Cox	:	handling.
51      *		Gerhard Koerting:	Forwarding uses IP priority hints
52      *		Teemu Rantanen	:	Fragment problems.
53      *		Alan Cox	:	General cleanup, comments and reformat
54      *		Alan Cox	:	SNMP statistics
55      *		Alan Cox	:	BSD address rule semantics. Also see
56      *					UDP as there is a nasty checksum issue
57      *					if you do things the wrong way.
58      *		Alan Cox	:	Always defrag, moved IP_FORWARD to the config.in file
59      *		Alan Cox	: 	IP options adjust sk->priority.
60      *		Pedro Roque	:	Fix mtu/length error in ip_forward.
61      *		Alan Cox	:	Avoid ip_chk_addr when possible.
62      *	Richard Underwood	:	IP multicasting.
63      *		Alan Cox	:	Cleaned up multicast handlers.
64      *		Alan Cox	:	RAW sockets demultiplex in the BSD style.
65      *		Gunther Mayer	:	Fix the SNMP reporting typo
66      *		Alan Cox	:	Always in group 224.0.0.1
67      *	Pauline Middelink	:	Fast ip_checksum update when forwarding
68      *					Masquerading support.
69      *		Alan Cox	:	Multicast loopback error for 224.0.0.1
70      *		Alan Cox	:	IP_MULTICAST_LOOP option.
71      *		Alan Cox	:	Use notifiers.
72      *		Bjorn Ekwall	:	Removed ip_csum (from slhc.c too)
73      *		Bjorn Ekwall	:	Moved ip_fast_csum to ip.h (inline!)
74      *		Stefan Becker   :       Send out ICMP HOST REDIRECT
75      *	Arnt Gulbrandsen	:	ip_build_xmit
76      *		Alan Cox	:	Per socket routing cache
77      *		Alan Cox	:	Fixed routing cache, added header cache.
78      *		Alan Cox	:	Loopback didn't work right in original ip_build_xmit - fixed it.
79      *		Alan Cox	:	Only send ICMP_REDIRECT if src/dest are the same net.
80      *		Alan Cox	:	Incoming IP option handling.
81      *		Alan Cox	:	Set saddr on raw output frames as per BSD.
82      *		Alan Cox	:	Stopped broadcast source route explosions.
83      *		Alan Cox	:	Can disable source routing
84      *		Takeshi Sone    :	Masquerading didn't work.
85      *	Dave Bonn,Alan Cox	:	Faster IP forwarding whenever possible.
86      *		Alan Cox	:	Memory leaks, tramples, misc debugging.
87      *		Alan Cox	:	Fixed multicast (by popular demand 8))
88      *		Alan Cox	:	Fixed forwarding (by even more popular demand 8))
89      *		Alan Cox	:	Fixed SNMP statistics [I think]
90      *	Gerhard Koerting	:	IP fragmentation forwarding fix
91      *		Alan Cox	:	Device lock against page fault.
92      *		Alan Cox	:	IP_HDRINCL facility.
93      *	Werner Almesberger	:	Zero fragment bug
94      *		Alan Cox	:	RAW IP frame length bug
95      *		Alan Cox	:	Outgoing firewall on build_xmit
96      *		A.N.Kuznetsov	:	IP_OPTIONS support throughout the kernel
97      *		Alan Cox	:	Multicast routing hooks
98      *		Jos Vos		:	Do accounting *before* call_in_firewall
99      *	Willy Konynenberg	:	Transparent proxying support
100      *
101      *  
102      *
103      * To Fix:
104      *		IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
105      *		and could be made very efficient with the addition of some virtual memory hacks to permit
106      *		the allocation of a buffer that can then be 'grown' by twiddling page tables.
107      *		Output fragmentation wants updating along with the buffer management to use a single 
108      *		interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
109      *		output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
110      *		fragmentation anyway.
111      *
112      *		This program is free software; you can redistribute it and/or
113      *		modify it under the terms of the GNU General Public License
114      *		as published by the Free Software Foundation; either version
115      *		2 of the License, or (at your option) any later version.
116      */
117     
118     #include <asm/system.h>
119     #include <linux/types.h>
120     #include <linux/kernel.h>
121     #include <linux/string.h>
122     #include <linux/errno.h>
123     #include <linux/config.h>
124     
125     #include <linux/net.h>
126     #include <linux/socket.h>
127     #include <linux/sockios.h>
128     #include <linux/in.h>
129     #include <linux/inet.h>
130     #include <linux/netdevice.h>
131     #include <linux/etherdevice.h>
132     
133     #include <net/snmp.h>
134     #include <net/ip.h>
135     #include <net/protocol.h>
136     #include <net/route.h>
137     #include <linux/skbuff.h>
138     #include <net/sock.h>
139     #include <net/arp.h>
140     #include <net/icmp.h>
141     #include <net/raw.h>
142     #include <net/checksum.h>
143     #include <linux/netfilter_ipv4.h>
144     #include <linux/mroute.h>
145     #include <linux/netlink.h>
146     
147     /*
148      *	SNMP management statistics
149      */
150     
151     struct ip_mib ip_statistics[NR_CPUS*2];
152     
153     /*
154      *	Process Router Attention IP option
155      */ 
156     int ip_call_ra_chain(struct sk_buff *skb)
157     {
158     	struct ip_ra_chain *ra;
159     	u8 protocol = skb->nh.iph->protocol;
160     	struct sock *last = NULL;
161     
162     	read_lock(&ip_ra_lock);
163     	for (ra = ip_ra_chain; ra; ra = ra->next) {
164     		struct sock *sk = ra->sk;
165     
166     		/* If socket is bound to an interface, only report
167     		 * the packet if it came  from that interface.
168     		 */
169     		if (sk && sk->num == protocol 
170     		    && ((sk->bound_dev_if == 0) 
171     			|| (sk->bound_dev_if == skb->dev->ifindex))) {
172     			if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
173     				skb = ip_defrag(skb);
174     				if (skb == NULL) {
175     					read_unlock(&ip_ra_lock);
176     					return 1;
177     				}
178     			}
179     			if (last) {
180     				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
181     				if (skb2)
182     					raw_rcv(last, skb2);
183     			}
184     			last = sk;
185     		}
186     	}
187     
188     	if (last) {
189     		raw_rcv(last, skb);
190     		read_unlock(&ip_ra_lock);
191     		return 1;
192     	}
193     	read_unlock(&ip_ra_lock);
194     	return 0;
195     }
196     
197     /* Handle this out of line, it is rare. */
198     static int ip_run_ipprot(struct sk_buff *skb, struct iphdr *iph,
199     			 struct inet_protocol *ipprot, int force_copy)
200     {
201     	int ret = 0;
202     
203     	do {
204     		if (ipprot->protocol == iph->protocol) {
205     			struct sk_buff *skb2 = skb;
206     			if (ipprot->copy || force_copy)
207     				skb2 = skb_clone(skb, GFP_ATOMIC);
208     			if(skb2 != NULL) {
209     				ret = 1;
210     				ipprot->handler(skb2);
211     			}
212     		}
213     		ipprot = (struct inet_protocol *) ipprot->next;
214     	} while(ipprot != NULL);
215     
216     	return ret;
217     }
218     
219     static inline int ip_local_deliver_finish(struct sk_buff *skb)
220     {
221     	int ihl = skb->nh.iph->ihl*4;
222     
223     #ifdef CONFIG_NETFILTER_DEBUG
224     	nf_debug_ip_local_deliver(skb);
225     #endif /*CONFIG_NETFILTER_DEBUG*/
226     
227     	/* Pull out additionl 8 bytes to save some space in protocols. */
228     	if (!pskb_may_pull(skb, ihl+8))
229     		goto out;
230     	__skb_pull(skb, ihl);
231     
232     #ifdef CONFIG_NETFILTER
233     	/* Free reference early: we don't need it any more, and it may
234                hold ip_conntrack module loaded indefinitely. */
235     	nf_conntrack_put(skb->nfct);
236     	skb->nfct = NULL;
237     #endif /*CONFIG_NETFILTER*/
238     
239             /* Point into the IP datagram, just past the header. */
240             skb->h.raw = skb->data;
241     
242     	{
243     		/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
244     		int protocol = skb->nh.iph->protocol;
245     		int hash = protocol & (MAX_INET_PROTOS - 1);
246     		struct sock *raw_sk = raw_v4_htable[hash];
247     		struct inet_protocol *ipprot;
248     		int flag;
249     
250     		/* If there maybe a raw socket we must check - if not we
251     		 * don't care less
252     		 */
253     		if(raw_sk != NULL)
254     			raw_sk = raw_v4_input(skb, skb->nh.iph, hash);
255     
256     		ipprot = (struct inet_protocol *) inet_protos[hash];
257     		flag = 0;
258     		if(ipprot != NULL) {
259     			if(raw_sk == NULL &&
260     			   ipprot->next == NULL &&
261     			   ipprot->protocol == protocol) {
262     				int ret;
263     
264     				/* Fast path... */
265     				ret = ipprot->handler(skb);
266     
267     				return ret;
268     			} else {
269     				flag = ip_run_ipprot(skb, skb->nh.iph, ipprot, (raw_sk != NULL));
270     			}
271     		}
272     
273     		/* All protocols checked.
274     		 * If this packet was a broadcast, we may *not* reply to it, since that
275     		 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
276     		 * ICMP reply messages get queued up for transmission...)
277     		 */
278     		if(raw_sk != NULL) {	/* Shift to last raw user */
279     			raw_rcv(raw_sk, skb);
280     			sock_put(raw_sk);
281     		} else if (!flag) {		/* Free and report errors */
282     			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);	
283     out:
284     			kfree_skb(skb);
285     		}
286     	}
287     
288     	return 0;
289     }
290     
291     /*
292      * 	Deliver IP Packets to the higher protocol layers.
293      */ 
294     int ip_local_deliver(struct sk_buff *skb)
295     {
296     	/*
297     	 *	Reassemble IP fragments.
298     	 */
299     
300     	if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
301     		skb = ip_defrag(skb);
302     		if (!skb)
303     			return 0;
304     	}
305     
306     	return NF_HOOK(PF_INET, NF_IP_LOCAL_IN, skb, skb->dev, NULL,
307     		       ip_local_deliver_finish);
308     }
309     
310     static inline int ip_rcv_finish(struct sk_buff *skb)
311     {
312     	struct net_device *dev = skb->dev;
313     	struct iphdr *iph = skb->nh.iph;
314     
315     	/*
316     	 *	Initialise the virtual path cache for the packet. It describes
317     	 *	how the packet travels inside Linux networking.
318     	 */ 
319     	if (skb->dst == NULL) {
320     		if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))
321     			goto drop; 
322     	}
323     
324     #ifdef CONFIG_NET_CLS_ROUTE
325     	if (skb->dst->tclassid) {
326     		struct ip_rt_acct *st = ip_rt_acct + 256*smp_processor_id();
327     		u32 idx = skb->dst->tclassid;
328     		st[idx&0xFF].o_packets++;
329     		st[idx&0xFF].o_bytes+=skb->len;
330     		st[(idx>>16)&0xFF].i_packets++;
331     		st[(idx>>16)&0xFF].i_bytes+=skb->len;
332     	}
333     #endif
334     
335     	if (iph->ihl > 5) {
336     		struct ip_options *opt;
337     
338     		/* It looks as overkill, because not all
339     		   IP options require packet mangling.
340     		   But it is the easiest for now, especially taking
341     		   into account that combination of IP options
342     		   and running sniffer is extremely rare condition.
343     		                                      --ANK (980813)
344     		*/
345     
346     		if (skb_cow(skb, skb_headroom(skb)))
347     			goto drop;
348     		iph = skb->nh.iph;
349     
350     		skb->ip_summed = 0;
351     		if (ip_options_compile(NULL, skb))
352     			goto inhdr_error;
353     
354     		opt = &(IPCB(skb)->opt);
355     		if (opt->srr) {
356     			struct in_device *in_dev = in_dev_get(dev);
357     			if (in_dev) {
358     				if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
359     					if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
360     						printk(KERN_INFO "source route option %u.%u.%u.%u -> %u.%u.%u.%u\n",
361     						       NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
362     					in_dev_put(in_dev);
363     					goto drop;
364     				}
365     				in_dev_put(in_dev);
366     			}
367     			if (ip_options_rcv_srr(skb))
368     				goto drop;
369     		}
370     	}
371     
372     	return skb->dst->input(skb);
373     
374     inhdr_error:
375     	IP_INC_STATS_BH(IpInHdrErrors);
376     drop:
377             kfree_skb(skb);
378             return NET_RX_DROP;
379     }
380     
381     /*
382      * 	Main IP Receive routine.
383      */ 
384     int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
385     {
386     	struct iphdr *iph = skb->nh.iph;
387     
388     	/* When the interface is in promisc. mode, drop all the crap
389     	 * that it receives, do not try to analyse it.
390     	 */
391     	if (skb->pkt_type == PACKET_OTHERHOST)
392     		goto drop;
393     
394     	IP_INC_STATS_BH(IpInReceives);
395     
396     	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
397     		goto out;
398     
399     	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
400     		goto inhdr_error;
401     
402     	iph = skb->nh.iph;
403     
404     	/*
405     	 *	RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
406     	 *
407     	 *	Is the datagram acceptable?
408     	 *
409     	 *	1.	Length at least the size of an ip header
410     	 *	2.	Version of 4
411     	 *	3.	Checksums correctly. [Speed optimisation for later, skip loopback checksums]
412     	 *	4.	Doesn't have a bogus length
413     	 */
414     
415     	if (iph->ihl < 5 || iph->version != 4)
416     		goto inhdr_error; 
417     
418     	if (!pskb_may_pull(skb, iph->ihl*4))
419     		goto inhdr_error;
420     
421     	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
422     		goto inhdr_error; 
423     
424     	{
425     		__u32 len = ntohs(iph->tot_len); 
426     		if (skb->len < len || len < (iph->ihl<<2))
427     			goto inhdr_error;
428     
429     		/* Our transport medium may have padded the buffer out. Now we know it
430     		 * is IP we can trim to the true length of the frame.
431     		 * Note this now means skb->len holds ntohs(iph->tot_len).
432     		 */
433     		if (skb->len > len) {
434     			__pskb_trim(skb, len);
435     			if (skb->ip_summed == CHECKSUM_HW)
436     				skb->ip_summed = CHECKSUM_NONE;
437     		}
438     	}
439     
440     	return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL,
441     		       ip_rcv_finish);
442     
443     inhdr_error:
444     	IP_INC_STATS_BH(IpInHdrErrors);
445     drop:
446             kfree_skb(skb);
447     out:
448             return NET_RX_DROP;
449     }
450     
451