File: /usr/src/linux/net/ipv4/netfilter/ip_tables.c

1     /*
2      * Packet matching code.
3      *
4      * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5      */
6     #include <linux/config.h>
7     #include <linux/skbuff.h>
8     #include <linux/kmod.h>
9     #include <linux/vmalloc.h>
10     #include <linux/netdevice.h>
11     #include <linux/module.h>
12     #include <linux/tcp.h>
13     #include <linux/udp.h>
14     #include <linux/icmp.h>
15     #include <net/ip.h>
16     #include <asm/uaccess.h>
17     #include <asm/semaphore.h>
18     #include <linux/proc_fs.h>
19     
20     #include <linux/netfilter_ipv4/ip_tables.h>
21     
22     /*#define DEBUG_IP_FIREWALL*/
23     /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
24     /*#define DEBUG_IP_FIREWALL_USER*/
25     
26     #ifdef DEBUG_IP_FIREWALL
27     #define dprintf(format, args...)  printk(format , ## args)
28     #else
29     #define dprintf(format, args...)
30     #endif
31     
32     #ifdef DEBUG_IP_FIREWALL_USER
33     #define duprintf(format, args...) printk(format , ## args)
34     #else
35     #define duprintf(format, args...)
36     #endif
37     
38     #ifdef CONFIG_NETFILTER_DEBUG
39     #define IP_NF_ASSERT(x)						\
40     do {								\
41     	if (!(x))						\
42     		printk("IP_NF_ASSERT: %s:%s:%u\n",		\
43     		       __FUNCTION__, __FILE__, __LINE__);	\
44     } while(0)
45     #else
46     #define IP_NF_ASSERT(x)
47     #endif
48     #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
49     
50     /* Mutex protects lists (only traversed in user context). */
51     static DECLARE_MUTEX(ipt_mutex);
52     
53     /* Must have mutex */
54     #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
55     #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
56     #include <linux/netfilter_ipv4/lockhelp.h>
57     #include <linux/netfilter_ipv4/listhelp.h>
58     
59     #if 0
60     /* All the better to debug you with... */
61     #define static
62     #define inline
63     #endif
64     
65     /* Locking is simple: we assume at worst case there will be one packet
66        in user context and one from bottom halves (or soft irq if Alexey's
67        softnet patch was applied).
68     
69        We keep a set of rules for each CPU, so we can avoid write-locking
70        them; doing a readlock_bh() stops packets coming through if we're
71        in user context.
72     
73        To be cache friendly on SMP, we arrange them like so:
74        [ n-entries ]
75        ... cache-align padding ...
76        [ n-entries ]
77     
78        Hence the start of any table is given by get_table() below.  */
79     
80     /* The table itself */
81     struct ipt_table_info
82     {
83     	/* Size per table */
84     	unsigned int size;
85     	/* Number of entries: FIXME. --RR */
86     	unsigned int number;
87     
88     	/* Entry points and underflows */
89     	unsigned int hook_entry[NF_IP_NUMHOOKS];
90     	unsigned int underflow[NF_IP_NUMHOOKS];
91     
92     	/* ipt_entry tables: one per CPU */
93     	char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));
94     };
95     
96     static LIST_HEAD(ipt_target);
97     static LIST_HEAD(ipt_match);
98     static LIST_HEAD(ipt_tables);
99     #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
100     
101     #ifdef CONFIG_SMP
102     #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
103     #else
104     #define TABLE_OFFSET(t,p) 0
105     #endif
106     
107     #if 0
108     #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
109     #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
110     #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
111     #endif
112     
113     /* Returns whether matches rule or not. */
114     static inline int
115     ip_packet_match(const struct iphdr *ip,
116     		const char *indev,
117     		const char *outdev,
118     		const struct ipt_ip *ipinfo,
119     		int isfrag)
120     {
121     	size_t i;
122     	unsigned long ret;
123     
124     #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
125     
126     	if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
127     		  IPT_INV_SRCIP)
128     	    || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
129     		     IPT_INV_DSTIP)) {
130     		dprintf("Source or dest mismatch.\n");
131     
132     		dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
133     			NIPQUAD(ip->saddr),
134     			NIPQUAD(ipinfo->smsk.s_addr),
135     			NIPQUAD(ipinfo->src.s_addr),
136     			ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
137     		dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
138     			NIPQUAD(ip->daddr),
139     			NIPQUAD(ipinfo->dmsk.s_addr),
140     			NIPQUAD(ipinfo->dst.s_addr),
141     			ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
142     		return 0;
143     	}
144     
145     	/* Look for ifname matches; this should unroll nicely. */
146     	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
147     		ret |= (((const unsigned long *)indev)[i]
148     			^ ((const unsigned long *)ipinfo->iniface)[i])
149     			& ((const unsigned long *)ipinfo->iniface_mask)[i];
150     	}
151     
152     	if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
153     		dprintf("VIA in mismatch (%s vs %s).%s\n",
154     			indev, ipinfo->iniface,
155     			ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
156     		return 0;
157     	}
158     
159     	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
160     		ret |= (((const unsigned long *)outdev)[i]
161     			^ ((const unsigned long *)ipinfo->outiface)[i])
162     			& ((const unsigned long *)ipinfo->outiface_mask)[i];
163     	}
164     
165     	if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
166     		dprintf("VIA out mismatch (%s vs %s).%s\n",
167     			outdev, ipinfo->outiface,
168     			ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
169     		return 0;
170     	}
171     
172     	/* Check specific protocol */
173     	if (ipinfo->proto
174     	    && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
175     		dprintf("Packet protocol %hi does not match %hi.%s\n",
176     			ip->protocol, ipinfo->proto,
177     			ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
178     		return 0;
179     	}
180     
181     	/* If we have a fragment rule but the packet is not a fragment
182     	 * then we return zero */
183     	if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
184     		dprintf("Fragment rule but not fragment.%s\n",
185     			ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
186     		return 0;
187     	}
188     
189     	return 1;
190     }
191     
192     static inline int
193     ip_checkentry(const struct ipt_ip *ip)
194     {
195     	if (ip->flags & ~IPT_F_MASK) {
196     		duprintf("Unknown flag bits set: %08X\n",
197     			 ip->flags & ~IPT_F_MASK);
198     		return 0;
199     	}
200     	if (ip->invflags & ~IPT_INV_MASK) {
201     		duprintf("Unknown invflag bits set: %08X\n",
202     			 ip->invflags & ~IPT_INV_MASK);
203     		return 0;
204     	}
205     	return 1;
206     }
207     
208     static unsigned int
209     ipt_error(struct sk_buff **pskb,
210     	  unsigned int hooknum,
211     	  const struct net_device *in,
212     	  const struct net_device *out,
213     	  const void *targinfo,
214     	  void *userinfo)
215     {
216     	if (net_ratelimit())
217     		printk("ip_tables: error: `%s'\n", (char *)targinfo);
218     
219     	return NF_DROP;
220     }
221     
222     static inline
223     int do_match(struct ipt_entry_match *m,
224     	     const struct sk_buff *skb,
225     	     const struct net_device *in,
226     	     const struct net_device *out,
227     	     int offset,
228     	     const void *hdr,
229     	     u_int16_t datalen,
230     	     int *hotdrop)
231     {
232     	/* Stop iteration if it doesn't match */
233     	if (!m->u.kernel.match->match(skb, in, out, m->data,
234     				      offset, hdr, datalen, hotdrop))
235     		return 1;
236     	else
237     		return 0;
238     }
239     
240     static inline struct ipt_entry *
241     get_entry(void *base, unsigned int offset)
242     {
243     	return (struct ipt_entry *)(base + offset);
244     }
245     
246     /* Returns one of the generic firewall policies, like NF_ACCEPT. */
247     unsigned int
248     ipt_do_table(struct sk_buff **pskb,
249     	     unsigned int hook,
250     	     const struct net_device *in,
251     	     const struct net_device *out,
252     	     struct ipt_table *table,
253     	     void *userdata)
254     {
255     	static const char nulldevname[IFNAMSIZ] = { 0 };
256     	u_int16_t offset;
257     	struct iphdr *ip;
258     	void *protohdr;
259     	u_int16_t datalen;
260     	int hotdrop = 0;
261     	/* Initializing verdict to NF_DROP keeps gcc happy. */
262     	unsigned int verdict = NF_DROP;
263     	const char *indev, *outdev;
264     	void *table_base;
265     	struct ipt_entry *e, *back;
266     
267     	/* Initialization */
268     	ip = (*pskb)->nh.iph;
269     	protohdr = (u_int32_t *)ip + ip->ihl;
270     	datalen = (*pskb)->len - ip->ihl * 4;
271     	indev = in ? in->name : nulldevname;
272     	outdev = out ? out->name : nulldevname;
273     	/* We handle fragments by dealing with the first fragment as
274     	 * if it was a normal packet.  All other fragments are treated
275     	 * normally, except that they will NEVER match rules that ask
276     	 * things we don't know, ie. tcp syn flag or ports).  If the
277     	 * rule is also a fragment-specific rule, non-fragments won't
278     	 * match it. */
279     	offset = ntohs(ip->frag_off) & IP_OFFSET;
280     
281     	read_lock_bh(&table->lock);
282     	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
283     	table_base = (void *)table->private->entries
284     		+ TABLE_OFFSET(table->private,
285     			       cpu_number_map(smp_processor_id()));
286     	e = get_entry(table_base, table->private->hook_entry[hook]);
287     
288     #ifdef CONFIG_NETFILTER_DEBUG
289     	/* Check noone else using our table */
290     	if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
291     	    && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
292     		printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
293     		       smp_processor_id(),
294     		       table->name,
295     		       &((struct ipt_entry *)table_base)->comefrom,
296     		       ((struct ipt_entry *)table_base)->comefrom);
297     	}
298     	((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
299     #endif
300     
301     	/* For return from builtin chain */
302     	back = get_entry(table_base, table->private->underflow[hook]);
303     
304     	do {
305     		IP_NF_ASSERT(e);
306     		IP_NF_ASSERT(back);
307     		(*pskb)->nfcache |= e->nfcache;
308     		if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
309     			struct ipt_entry_target *t;
310     
311     			if (IPT_MATCH_ITERATE(e, do_match,
312     					      *pskb, in, out,
313     					      offset, protohdr,
314     					      datalen, &hotdrop) != 0)
315     				goto no_match;
316     
317     			ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
318     
319     			t = ipt_get_target(e);
320     			IP_NF_ASSERT(t->u.kernel.target);
321     			/* Standard target? */
322     			if (!t->u.kernel.target->target) {
323     				int v;
324     
325     				v = ((struct ipt_standard_target *)t)->verdict;
326     				if (v < 0) {
327     					/* Pop from stack? */
328     					if (v != IPT_RETURN) {
329     						verdict = (unsigned)(-v) - 1;
330     						break;
331     					}
332     					e = back;
333     					back = get_entry(table_base,
334     							 back->comefrom);
335     					continue;
336     				}
337     				if (table_base + v
338     				    != (void *)e + e->next_offset) {
339     					/* Save old back ptr in next entry */
340     					struct ipt_entry *next
341     						= (void *)e + e->next_offset;
342     					next->comefrom
343     						= (void *)back - table_base;
344     					/* set back pointer to next entry */
345     					back = next;
346     				}
347     
348     				e = get_entry(table_base, v);
349     			} else {
350     				/* Targets which reenter must return
351                                        abs. verdicts */
352     #ifdef CONFIG_NETFILTER_DEBUG
353     				((struct ipt_entry *)table_base)->comefrom
354     					= 0xeeeeeeec;
355     #endif
356     				verdict = t->u.kernel.target->target(pskb,
357     								     hook,
358     								     in, out,
359     								     t->data,
360     								     userdata);
361     
362     #ifdef CONFIG_NETFILTER_DEBUG
363     				if (((struct ipt_entry *)table_base)->comefrom
364     				    != 0xeeeeeeec
365     				    && verdict == IPT_CONTINUE) {
366     					printk("Target %s reentered!\n",
367     					       t->u.kernel.target->name);
368     					verdict = NF_DROP;
369     				}
370     				((struct ipt_entry *)table_base)->comefrom
371     					= 0x57acc001;
372     #endif
373     				/* Target might have changed stuff. */
374     				ip = (*pskb)->nh.iph;
375     				protohdr = (u_int32_t *)ip + ip->ihl;
376     				datalen = (*pskb)->len - ip->ihl * 4;
377     
378     				if (verdict == IPT_CONTINUE)
379     					e = (void *)e + e->next_offset;
380     				else
381     					/* Verdict */
382     					break;
383     			}
384     		} else {
385     
386     		no_match:
387     			e = (void *)e + e->next_offset;
388     		}
389     	} while (!hotdrop);
390     
391     #ifdef CONFIG_NETFILTER_DEBUG
392     	((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
393     #endif
394     	read_unlock_bh(&table->lock);
395     
396     #ifdef DEBUG_ALLOW_ALL
397     	return NF_ACCEPT;
398     #else
399     	if (hotdrop)
400     		return NF_DROP;
401     	else return verdict;
402     #endif
403     }
404     
405     /* If it succeeds, returns element and locks mutex */
406     static inline void *
407     find_inlist_lock_noload(struct list_head *head,
408     			const char *name,
409     			int *error,
410     			struct semaphore *mutex)
411     {
412     	void *ret;
413     
414     #if 0
415     	duprintf("find_inlist: searching for `%s' in %s.\n",
416     		 name, head == &ipt_target ? "ipt_target"
417     		 : head == &ipt_match ? "ipt_match"
418     		 : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
419     #endif
420     
421     	*error = down_interruptible(mutex);
422     	if (*error != 0)
423     		return NULL;
424     
425     	ret = list_named_find(head, name);
426     	if (!ret) {
427     		*error = -ENOENT;
428     		up(mutex);
429     	}
430     	return ret;
431     }
432     
433     #ifndef CONFIG_KMOD
434     #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
435     #else
436     static void *
437     find_inlist_lock(struct list_head *head,
438     		 const char *name,
439     		 const char *prefix,
440     		 int *error,
441     		 struct semaphore *mutex)
442     {
443     	void *ret;
444     
445     	ret = find_inlist_lock_noload(head, name, error, mutex);
446     	if (!ret) {
447     		char modulename[IPT_FUNCTION_MAXNAMELEN + strlen(prefix) + 1];
448     		strcpy(modulename, prefix);
449     		strcat(modulename, name);
450     		duprintf("find_inlist: loading `%s'.\n", modulename);
451     		request_module(modulename);
452     		ret = find_inlist_lock_noload(head, name, error, mutex);
453     	}
454     
455     	return ret;
456     }
457     #endif
458     
459     static inline struct ipt_table *
460     find_table_lock(const char *name, int *error, struct semaphore *mutex)
461     {
462     	return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
463     }
464     
465     static inline struct ipt_match *
466     find_match_lock(const char *name, int *error, struct semaphore *mutex)
467     {
468     	return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
469     }
470     
471     static inline struct ipt_target *
472     find_target_lock(const char *name, int *error, struct semaphore *mutex)
473     {
474     	return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
475     }
476     
477     /* All zeroes == unconditional rule. */
478     static inline int
479     unconditional(const struct ipt_ip *ip)
480     {
481     	unsigned int i;
482     
483     	for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
484     		if (((__u32 *)ip)[i])
485     			return 0;
486     
487     	return 1;
488     }
489     
490     /* Figures out from what hook each rule can be called: returns 0 if
491        there are loops.  Puts hook bitmask in comefrom. */
492     static int
493     mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
494     {
495     	unsigned int hook;
496     
497     	/* No recursion; use packet counter to save back ptrs (reset
498     	   to 0 as we leave), and comefrom to save source hook bitmask */
499     	for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
500     		unsigned int pos = newinfo->hook_entry[hook];
501     		struct ipt_entry *e
502     			= (struct ipt_entry *)(newinfo->entries + pos);
503     
504     		if (!(valid_hooks & (1 << hook)))
505     			continue;
506     
507     		/* Set initial back pointer. */
508     		e->counters.pcnt = pos;
509     
510     		for (;;) {
511     			struct ipt_standard_target *t
512     				= (void *)ipt_get_target(e);
513     
514     			if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
515     				printk("iptables: loop hook %u pos %u %08X.\n",
516     				       hook, pos, e->comefrom);
517     				return 0;
518     			}
519     			e->comefrom
520     				|= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
521     
522     			/* Unconditional return/END. */
523     			if (e->target_offset == sizeof(struct ipt_entry)
524     			    && (strcmp(t->target.u.user.name,
525     				       IPT_STANDARD_TARGET) == 0)
526     			    && t->verdict < 0
527     			    && unconditional(&e->ip)) {
528     				unsigned int oldpos, size;
529     
530     				/* Return: backtrack through the last
531     				   big jump. */
532     				do {
533     					e->comefrom ^= (1<<NF_IP_NUMHOOKS);
534     #ifdef DEBUG_IP_FIREWALL_USER
535     					if (e->comefrom
536     					    & (1 << NF_IP_NUMHOOKS)) {
537     						duprintf("Back unset "
538     							 "on hook %u "
539     							 "rule %u\n",
540     							 hook, pos);
541     					}
542     #endif
543     					oldpos = pos;
544     					pos = e->counters.pcnt;
545     					e->counters.pcnt = 0;
546     
547     					/* We're at the start. */
548     					if (pos == oldpos)
549     						goto next;
550     
551     					e = (struct ipt_entry *)
552     						(newinfo->entries + pos);
553     				} while (oldpos == pos + e->next_offset);
554     
555     				/* Move along one */
556     				size = e->next_offset;
557     				e = (struct ipt_entry *)
558     					(newinfo->entries + pos + size);
559     				e->counters.pcnt = pos;
560     				pos += size;
561     			} else {
562     				int newpos = t->verdict;
563     
564     				if (strcmp(t->target.u.user.name,
565     					   IPT_STANDARD_TARGET) == 0
566     				    && newpos >= 0) {
567     					/* This a jump; chase it. */
568     					duprintf("Jump rule %u -> %u\n",
569     						 pos, newpos);
570     				} else {
571     					/* ... this is a fallthru */
572     					newpos = pos + e->next_offset;
573     				}
574     				e = (struct ipt_entry *)
575     					(newinfo->entries + newpos);
576     				e->counters.pcnt = pos;
577     				pos = newpos;
578     			}
579     		}
580     		next:
581     		duprintf("Finished chain %u\n", hook);
582     	}
583     	return 1;
584     }
585     
586     static inline int
587     cleanup_match(struct ipt_entry_match *m, unsigned int *i)
588     {
589     	if (i && (*i)-- == 0)
590     		return 1;
591     
592     	if (m->u.kernel.match->destroy)
593     		m->u.kernel.match->destroy(m->data,
594     					   m->u.match_size - sizeof(*m));
595     
596     	if (m->u.kernel.match->me)
597     		__MOD_DEC_USE_COUNT(m->u.kernel.match->me);
598     
599     	return 0;
600     }
601     
602     static inline int
603     standard_check(const struct ipt_entry_target *t,
604     	       unsigned int max_offset)
605     {
606     	struct ipt_standard_target *targ = (void *)t;
607     
608     	/* Check standard info. */
609     	if (t->u.target_size
610     	    != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
611     		duprintf("standard_check: target size %u != %u\n",
612     			 t->u.target_size,
613     			 IPT_ALIGN(sizeof(struct ipt_standard_target)));
614     		return 0;
615     	}
616     
617     	if (targ->verdict >= 0
618     	    && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
619     		duprintf("ipt_standard_check: bad verdict (%i)\n",
620     			 targ->verdict);
621     		return 0;
622     	}
623     
624     	if (targ->verdict < -NF_MAX_VERDICT - 1) {
625     		duprintf("ipt_standard_check: bad negative verdict (%i)\n",
626     			 targ->verdict);
627     		return 0;
628     	}
629     	return 1;
630     }
631     
632     static inline int
633     check_match(struct ipt_entry_match *m,
634     	    const char *name,
635     	    const struct ipt_ip *ip,
636     	    unsigned int hookmask,
637     	    unsigned int *i)
638     {
639     	int ret;
640     	struct ipt_match *match;
641     
642     	match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
643     	if (!match) {
644     		duprintf("check_match: `%s' not found\n", m->u.user.name);
645     		return ret;
646     	}
647     	if (match->me)
648     		__MOD_INC_USE_COUNT(match->me);
649     	m->u.kernel.match = match;
650     	up(&ipt_mutex);
651     
652     	if (m->u.kernel.match->checkentry
653     	    && !m->u.kernel.match->checkentry(name, ip, m->data,
654     					      m->u.match_size - sizeof(*m),
655     					      hookmask)) {
656     		if (m->u.kernel.match->me)
657     			__MOD_DEC_USE_COUNT(m->u.kernel.match->me);
658     		duprintf("ip_tables: check failed for `%s'.\n",
659     			 m->u.kernel.match->name);
660     		return -EINVAL;
661     	}
662     
663     	(*i)++;
664     	return 0;
665     }
666     
667     static struct ipt_target ipt_standard_target;
668     
669     static inline int
670     check_entry(struct ipt_entry *e, const char *name, unsigned int size,
671     	    unsigned int *i)
672     {
673     	struct ipt_entry_target *t;
674     	struct ipt_target *target;
675     	int ret;
676     	unsigned int j;
677     
678     	if (!ip_checkentry(&e->ip)) {
679     		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
680     		return -EINVAL;
681     	}
682     
683     	j = 0;
684     	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
685     	if (ret != 0)
686     		goto cleanup_matches;
687     
688     	t = ipt_get_target(e);
689     	target = find_target_lock(t->u.user.name, &ret, &ipt_mutex);
690     	if (!target) {
691     		duprintf("check_entry: `%s' not found\n", t->u.user.name);
692     		goto cleanup_matches;
693     	}
694     	if (target->me)
695     		__MOD_INC_USE_COUNT(target->me);
696     	t->u.kernel.target = target;
697     	up(&ipt_mutex);
698     
699     	if (t->u.kernel.target == &ipt_standard_target) {
700     		if (!standard_check(t, size)) {
701     			ret = -EINVAL;
702     			goto cleanup_matches;
703     		}
704     	} else if (t->u.kernel.target->checkentry
705     		   && !t->u.kernel.target->checkentry(name, e, t->data,
706     						      t->u.target_size
707     						      - sizeof(*t),
708     						      e->comefrom)) {
709     		if (t->u.kernel.target->me)
710     			__MOD_DEC_USE_COUNT(t->u.kernel.target->me);
711     		duprintf("ip_tables: check failed for `%s'.\n",
712     			 t->u.kernel.target->name);
713     		ret = -EINVAL;
714     		goto cleanup_matches;
715     	}
716     
717     	(*i)++;
718     	return 0;
719     
720      cleanup_matches:
721     	IPT_MATCH_ITERATE(e, cleanup_match, &j);
722     	return ret;
723     }
724     
725     static inline int
726     check_entry_size_and_hooks(struct ipt_entry *e,
727     			   struct ipt_table_info *newinfo,
728     			   unsigned char *base,
729     			   unsigned char *limit,
730     			   const unsigned int *hook_entries,
731     			   const unsigned int *underflows,
732     			   unsigned int *i)
733     {
734     	unsigned int h;
735     
736     	if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
737     	    || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
738     		duprintf("Bad offset %p\n", e);
739     		return -EINVAL;
740     	}
741     
742     	if (e->next_offset
743     	    < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
744     		duprintf("checking: element %p size %u\n",
745     			 e, e->next_offset);
746     		return -EINVAL;
747     	}
748     
749     	/* Check hooks & underflows */
750     	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
751     		if ((unsigned char *)e - base == hook_entries[h])
752     			newinfo->hook_entry[h] = hook_entries[h];
753     		if ((unsigned char *)e - base == underflows[h])
754     			newinfo->underflow[h] = underflows[h];
755     	}
756     
757     	/* FIXME: underflows must be unconditional, standard verdicts
758                < 0 (not IPT_RETURN). --RR */
759     
760     	/* Clear counters and comefrom */
761     	e->counters = ((struct ipt_counters) { 0, 0 });
762     	e->comefrom = 0;
763     
764     	(*i)++;
765     	return 0;
766     }
767     
768     static inline int
769     cleanup_entry(struct ipt_entry *e, unsigned int *i)
770     {
771     	struct ipt_entry_target *t;
772     
773     	if (i && (*i)-- == 0)
774     		return 1;
775     
776     	/* Cleanup all matches */
777     	IPT_MATCH_ITERATE(e, cleanup_match, NULL);
778     	t = ipt_get_target(e);
779     	if (t->u.kernel.target->destroy)
780     		t->u.kernel.target->destroy(t->data,
781     					    t->u.target_size - sizeof(*t));
782     	if (t->u.kernel.target->me)
783     		__MOD_DEC_USE_COUNT(t->u.kernel.target->me);
784     
785     	return 0;
786     }
787     
788     /* Checks and translates the user-supplied table segment (held in
789        newinfo) */
790     static int
791     translate_table(const char *name,
792     		unsigned int valid_hooks,
793     		struct ipt_table_info *newinfo,
794     		unsigned int size,
795     		unsigned int number,
796     		const unsigned int *hook_entries,
797     		const unsigned int *underflows)
798     {
799     	unsigned int i;
800     	int ret;
801     
802     	newinfo->size = size;
803     	newinfo->number = number;
804     
805     	/* Init all hooks to impossible value. */
806     	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
807     		newinfo->hook_entry[i] = 0xFFFFFFFF;
808     		newinfo->underflow[i] = 0xFFFFFFFF;
809     	}
810     
811     	duprintf("translate_table: size %u\n", newinfo->size);
812     	i = 0;
813     	/* Walk through entries, checking offsets. */
814     	ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
815     				check_entry_size_and_hooks,
816     				newinfo,
817     				newinfo->entries,
818     				newinfo->entries + size,
819     				hook_entries, underflows, &i);
820     	if (ret != 0)
821     		return ret;
822     
823     	if (i != number) {
824     		duprintf("translate_table: %u not %u entries\n",
825     			 i, number);
826     		return -EINVAL;
827     	}
828     
829     	/* Check hooks all assigned */
830     	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
831     		/* Only hooks which are valid */
832     		if (!(valid_hooks & (1 << i)))
833     			continue;
834     		if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
835     			duprintf("Invalid hook entry %u %u\n",
836     				 i, hook_entries[i]);
837     			return -EINVAL;
838     		}
839     		if (newinfo->underflow[i] == 0xFFFFFFFF) {
840     			duprintf("Invalid underflow %u %u\n",
841     				 i, underflows[i]);
842     			return -EINVAL;
843     		}
844     	}
845     
846     	if (!mark_source_chains(newinfo, valid_hooks))
847     		return -ELOOP;
848     
849     	/* Finally, each sanity check must pass */
850     	i = 0;
851     	ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
852     				check_entry, name, size, &i);
853     
854     	if (ret != 0) {
855     		IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
856     				  cleanup_entry, &i);
857     		return ret;
858     	}
859     
860     	/* And one copy for every other CPU */
861     	for (i = 1; i < smp_num_cpus; i++) {
862     		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
863     		       newinfo->entries,
864     		       SMP_ALIGN(newinfo->size));
865     	}
866     
867     	return ret;
868     }
869     
870     static struct ipt_table_info *
871     replace_table(struct ipt_table *table,
872     	      unsigned int num_counters,
873     	      struct ipt_table_info *newinfo,
874     	      int *error)
875     {
876     	struct ipt_table_info *oldinfo;
877     
878     #ifdef CONFIG_NETFILTER_DEBUG
879     	{
880     		struct ipt_entry *table_base;
881     		unsigned int i;
882     
883     		for (i = 0; i < smp_num_cpus; i++) {
884     			table_base =
885     				(void *)newinfo->entries
886     				+ TABLE_OFFSET(newinfo, i);
887     
888     			table_base->comefrom = 0xdead57ac;
889     		}
890     	}
891     #endif
892     
893     	/* Do the substitution. */
894     	write_lock_bh(&table->lock);
895     	/* Check inside lock: is the old number correct? */
896     	if (num_counters != table->private->number) {
897     		duprintf("num_counters != table->private->number (%u/%u)\n",
898     			 num_counters, table->private->number);
899     		write_unlock_bh(&table->lock);
900     		*error = -EAGAIN;
901     		return NULL;
902     	}
903     	oldinfo = table->private;
904     	table->private = newinfo;
905     	write_unlock_bh(&table->lock);
906     
907     	return oldinfo;
908     }
909     
910     /* Gets counters. */
911     static inline int
912     add_entry_to_counter(const struct ipt_entry *e,
913     		     struct ipt_counters total[],
914     		     unsigned int *i)
915     {
916     	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
917     
918     	(*i)++;
919     	return 0;
920     }
921     
922     static void
923     get_counters(const struct ipt_table_info *t,
924     	     struct ipt_counters counters[])
925     {
926     	unsigned int cpu;
927     	unsigned int i;
928     
929     	for (cpu = 0; cpu < smp_num_cpus; cpu++) {
930     		i = 0;
931     		IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
932     				  t->size,
933     				  add_entry_to_counter,
934     				  counters,
935     				  &i);
936     	}
937     }
938     
939     static int
940     copy_entries_to_user(unsigned int total_size,
941     		     struct ipt_table *table,
942     		     void *userptr)
943     {
944     	unsigned int off, num, countersize;
945     	struct ipt_entry *e;
946     	struct ipt_counters *counters;
947     	int ret = 0;
948     
949     	/* We need atomic snapshot of counters: rest doesn't change
950     	   (other than comefrom, which userspace doesn't care
951     	   about). */
952     	countersize = sizeof(struct ipt_counters) * table->private->number;
953     	counters = vmalloc(countersize);
954     
955     	if (counters == NULL)
956     		return -ENOMEM;
957     
958     	/* First, sum counters... */
959     	memset(counters, 0, countersize);
960     	write_lock_bh(&table->lock);
961     	get_counters(table->private, counters);
962     	write_unlock_bh(&table->lock);
963     
964     	/* ... then copy entire thing from CPU 0... */
965     	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
966     		ret = -EFAULT;
967     		goto free_counters;
968     	}
969     
970     	/* FIXME: use iterator macros --RR */
971     	/* ... then go back and fix counters and names */
972     	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
973     		unsigned int i;
974     		struct ipt_entry_match *m;
975     		struct ipt_entry_target *t;
976     
977     		e = (struct ipt_entry *)(table->private->entries + off);
978     		if (copy_to_user(userptr + off
979     				 + offsetof(struct ipt_entry, counters),
980     				 &counters[num],
981     				 sizeof(counters[num])) != 0) {
982     			ret = -EFAULT;
983     			goto free_counters;
984     		}
985     
986     		for (i = sizeof(struct ipt_entry);
987     		     i < e->target_offset;
988     		     i += m->u.match_size) {
989     			m = (void *)e + i;
990     
991     			if (copy_to_user(userptr + off + i
992     					 + offsetof(struct ipt_entry_match,
993     						    u.user.name),
994     					 m->u.kernel.match->name,
995     					 strlen(m->u.kernel.match->name)+1)
996     			    != 0) {
997     				ret = -EFAULT;
998     				goto free_counters;
999     			}
1000     		}
1001     
1002     		t = ipt_get_target(e);
1003     		if (copy_to_user(userptr + off + e->target_offset
1004     				 + offsetof(struct ipt_entry_target,
1005     					    u.user.name),
1006     				 t->u.kernel.target->name,
1007     				 strlen(t->u.kernel.target->name)+1) != 0) {
1008     			ret = -EFAULT;
1009     			goto free_counters;
1010     		}
1011     	}
1012     
1013      free_counters:
1014     	vfree(counters);
1015     	return ret;
1016     }
1017     
1018     static int
1019     get_entries(const struct ipt_get_entries *entries,
1020     	    struct ipt_get_entries *uptr)
1021     {
1022     	int ret;
1023     	struct ipt_table *t;
1024     
1025     	t = find_table_lock(entries->name, &ret, &ipt_mutex);
1026     	if (t) {
1027     		duprintf("t->private->number = %u\n",
1028     			 t->private->number);
1029     		if (entries->size == t->private->size)
1030     			ret = copy_entries_to_user(t->private->size,
1031     						   t, uptr->entrytable);
1032     		else {
1033     			duprintf("get_entries: I've got %u not %u!\n",
1034     				 t->private->size,
1035     				 entries->size);
1036     			ret = -EINVAL;
1037     		}
1038     		up(&ipt_mutex);
1039     	} else
1040     		duprintf("get_entries: Can't find %s!\n",
1041     			 entries->name);
1042     
1043     	return ret;
1044     }
1045     
1046     static int
1047     do_replace(void *user, unsigned int len)
1048     {
1049     	int ret;
1050     	struct ipt_replace tmp;
1051     	struct ipt_table *t;
1052     	struct ipt_table_info *newinfo, *oldinfo;
1053     	struct ipt_counters *counters;
1054     
1055     	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1056     		return -EFAULT;
1057     
1058     	/* Hack: Causes ipchains to give correct error msg --RR */
1059     	if (len != sizeof(tmp) + tmp.size)
1060     		return -ENOPROTOOPT;
1061     
1062     	newinfo = vmalloc(sizeof(struct ipt_table_info)
1063     			  + SMP_ALIGN(tmp.size) * smp_num_cpus);
1064     	if (!newinfo)
1065     		return -ENOMEM;
1066     
1067     	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1068     			   tmp.size) != 0) {
1069     		ret = -EFAULT;
1070     		goto free_newinfo;
1071     	}
1072     
1073     	counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1074     	if (!counters) {
1075     		ret = -ENOMEM;
1076     		goto free_newinfo;
1077     	}
1078     	memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1079     
1080     	ret = translate_table(tmp.name, tmp.valid_hooks,
1081     			      newinfo, tmp.size, tmp.num_entries,
1082     			      tmp.hook_entry, tmp.underflow);
1083     	if (ret != 0)
1084     		goto free_newinfo_counters;
1085     
1086     	duprintf("ip_tables: Translated table\n");
1087     
1088     	t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1089     	if (!t)
1090     		goto free_newinfo_counters_untrans;
1091     
1092     	/* You lied! */
1093     	if (tmp.valid_hooks != t->valid_hooks) {
1094     		duprintf("Valid hook crap: %08X vs %08X\n",
1095     			 tmp.valid_hooks, t->valid_hooks);
1096     		ret = -EINVAL;
1097     		goto free_newinfo_counters_untrans_unlock;
1098     	}
1099     
1100     	oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1101     	if (!oldinfo)
1102     		goto free_newinfo_counters_untrans_unlock;
1103     
1104     	/* Get the old counters. */
1105     	get_counters(oldinfo, counters);
1106     	/* Decrease module usage counts and free resource */
1107     	IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1108     	vfree(oldinfo);
1109     	/* Silent error: too late now. */
1110     	copy_to_user(tmp.counters, counters,
1111     		     sizeof(struct ipt_counters) * tmp.num_counters);
1112     	vfree(counters);
1113     	up(&ipt_mutex);
1114     	return 0;
1115     
1116      free_newinfo_counters_untrans_unlock:
1117     	up(&ipt_mutex);
1118      free_newinfo_counters_untrans:
1119     	IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1120      free_newinfo_counters:
1121     	vfree(counters);
1122      free_newinfo:
1123     	vfree(newinfo);
1124     	return ret;
1125     }
1126     
1127     /* We're lazy, and add to the first CPU; overflow works its fey magic
1128      * and everything is OK. */
1129     static inline int
1130     add_counter_to_entry(struct ipt_entry *e,
1131     		     const struct ipt_counters addme[],
1132     		     unsigned int *i)
1133     {
1134     #if 0
1135     	duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1136     		 *i,
1137     		 (long unsigned int)e->counters.pcnt,
1138     		 (long unsigned int)e->counters.bcnt,
1139     		 (long unsigned int)addme[*i].pcnt,
1140     		 (long unsigned int)addme[*i].bcnt);
1141     #endif
1142     
1143     	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1144     
1145     	(*i)++;
1146     	return 0;
1147     }
1148     
1149     static int
1150     do_add_counters(void *user, unsigned int len)
1151     {
1152     	unsigned int i;
1153     	struct ipt_counters_info tmp, *paddc;
1154     	struct ipt_table *t;
1155     	int ret;
1156     
1157     	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1158     		return -EFAULT;
1159     
1160     	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1161     		return -EINVAL;
1162     
1163     	paddc = vmalloc(len);
1164     	if (!paddc)
1165     		return -ENOMEM;
1166     
1167     	if (copy_from_user(paddc, user, len) != 0) {
1168     		ret = -EFAULT;
1169     		goto free;
1170     	}
1171     
1172     	t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1173     	if (!t)
1174     		goto free;
1175     
1176     	write_lock_bh(&t->lock);
1177     	if (t->private->number != paddc->num_counters) {
1178     		ret = -EINVAL;
1179     		goto unlock_up_free;
1180     	}
1181     
1182     	i = 0;
1183     	IPT_ENTRY_ITERATE(t->private->entries,
1184     			  t->private->size,
1185     			  add_counter_to_entry,
1186     			  paddc->counters,
1187     			  &i);
1188      unlock_up_free:
1189     	write_unlock_bh(&t->lock);
1190     	up(&ipt_mutex);
1191      free:
1192     	vfree(paddc);
1193     
1194     	return ret;
1195     }
1196     
1197     static int
1198     do_ipt_set_ctl(struct sock *sk,	int cmd, void *user, unsigned int len)
1199     {
1200     	int ret;
1201     
1202     	if (!capable(CAP_NET_ADMIN))
1203     		return -EPERM;
1204     
1205     	switch (cmd) {
1206     	case IPT_SO_SET_REPLACE:
1207     		ret = do_replace(user, len);
1208     		break;
1209     
1210     	case IPT_SO_SET_ADD_COUNTERS:
1211     		ret = do_add_counters(user, len);
1212     		break;
1213     
1214     	default:
1215     		duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1216     		ret = -EINVAL;
1217     	}
1218     
1219     	return ret;
1220     }
1221     
1222     static int
1223     do_ipt_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1224     {
1225     	int ret;
1226     
1227     	if (!capable(CAP_NET_ADMIN))
1228     		return -EPERM;
1229     
1230     	switch (cmd) {
1231     	case IPT_SO_GET_INFO: {
1232     		char name[IPT_TABLE_MAXNAMELEN];
1233     		struct ipt_table *t;
1234     
1235     		if (*len != sizeof(struct ipt_getinfo)) {
1236     			duprintf("length %u != %u\n", *len,
1237     				 sizeof(struct ipt_getinfo));
1238     			ret = -EINVAL;
1239     			break;
1240     		}
1241     
1242     		if (copy_from_user(name, user, sizeof(name)) != 0) {
1243     			ret = -EFAULT;
1244     			break;
1245     		}
1246     		name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1247     		t = find_table_lock(name, &ret, &ipt_mutex);
1248     		if (t) {
1249     			struct ipt_getinfo info;
1250     
1251     			info.valid_hooks = t->valid_hooks;
1252     			memcpy(info.hook_entry, t->private->hook_entry,
1253     			       sizeof(info.hook_entry));
1254     			memcpy(info.underflow, t->private->underflow,
1255     			       sizeof(info.underflow));
1256     			info.num_entries = t->private->number;
1257     			info.size = t->private->size;
1258     			strcpy(info.name, name);
1259     
1260     			if (copy_to_user(user, &info, *len) != 0)
1261     				ret = -EFAULT;
1262     			else
1263     				ret = 0;
1264     
1265     			up(&ipt_mutex);
1266     		}
1267     	}
1268     	break;
1269     
1270     	case IPT_SO_GET_ENTRIES: {
1271     		struct ipt_get_entries get;
1272     
1273     		if (*len < sizeof(get)) {
1274     			duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1275     			ret = -EINVAL;
1276     		} else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1277     			ret = -EFAULT;
1278     		} else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1279     			duprintf("get_entries: %u != %u\n", *len,
1280     				 sizeof(struct ipt_get_entries) + get.size);
1281     			ret = -EINVAL;
1282     		} else
1283     			ret = get_entries(&get, user);
1284     		break;
1285     	}
1286     
1287     	default:
1288     		duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1289     		ret = -EINVAL;
1290     	}
1291     
1292     	return ret;
1293     }
1294     
1295     /* Registration hooks for targets. */
1296     int
1297     ipt_register_target(struct ipt_target *target)
1298     {
1299     	int ret;
1300     
1301     	MOD_INC_USE_COUNT;
1302     	ret = down_interruptible(&ipt_mutex);
1303     	if (ret != 0) {
1304     		MOD_DEC_USE_COUNT;
1305     		return ret;
1306     	}
1307     	if (!list_named_insert(&ipt_target, target)) {
1308     		duprintf("ipt_register_target: `%s' already in list!\n",
1309     			 target->name);
1310     		ret = -EINVAL;
1311     		MOD_DEC_USE_COUNT;
1312     	}
1313     	up(&ipt_mutex);
1314     	return ret;
1315     }
1316     
1317     void
1318     ipt_unregister_target(struct ipt_target *target)
1319     {
1320     	down(&ipt_mutex);
1321     	LIST_DELETE(&ipt_target, target);
1322     	up(&ipt_mutex);
1323     	MOD_DEC_USE_COUNT;
1324     }
1325     
1326     int
1327     ipt_register_match(struct ipt_match *match)
1328     {
1329     	int ret;
1330     
1331     	MOD_INC_USE_COUNT;
1332     	ret = down_interruptible(&ipt_mutex);
1333     	if (ret != 0) {
1334     		MOD_DEC_USE_COUNT;
1335     		return ret;
1336     	}
1337     	if (!list_named_insert(&ipt_match, match)) {
1338     		duprintf("ipt_register_match: `%s' already in list!\n",
1339     			 match->name);
1340     		MOD_DEC_USE_COUNT;
1341     		ret = -EINVAL;
1342     	}
1343     	up(&ipt_mutex);
1344     
1345     	return ret;
1346     }
1347     
1348     void
1349     ipt_unregister_match(struct ipt_match *match)
1350     {
1351     	down(&ipt_mutex);
1352     	LIST_DELETE(&ipt_match, match);
1353     	up(&ipt_mutex);
1354     	MOD_DEC_USE_COUNT;
1355     }
1356     
1357     int ipt_register_table(struct ipt_table *table)
1358     {
1359     	int ret;
1360     	struct ipt_table_info *newinfo;
1361     	static struct ipt_table_info bootstrap
1362     		= { 0, 0, { 0 }, { 0 }, { } };
1363     
1364     	MOD_INC_USE_COUNT;
1365     	newinfo = vmalloc(sizeof(struct ipt_table_info)
1366     			  + SMP_ALIGN(table->table->size) * smp_num_cpus);
1367     	if (!newinfo) {
1368     		ret = -ENOMEM;
1369     		MOD_DEC_USE_COUNT;
1370     		return ret;
1371     	}
1372     	memcpy(newinfo->entries, table->table->entries, table->table->size);
1373     
1374     	ret = translate_table(table->name, table->valid_hooks,
1375     			      newinfo, table->table->size,
1376     			      table->table->num_entries,
1377     			      table->table->hook_entry,
1378     			      table->table->underflow);
1379     	if (ret != 0) {
1380     		vfree(newinfo);
1381     		MOD_DEC_USE_COUNT;
1382     		return ret;
1383     	}
1384     
1385     	ret = down_interruptible(&ipt_mutex);
1386     	if (ret != 0) {
1387     		vfree(newinfo);
1388     		MOD_DEC_USE_COUNT;
1389     		return ret;
1390     	}
1391     
1392     	/* Don't autoload: we'd eat our tail... */
1393     	if (list_named_find(&ipt_tables, table->name)) {
1394     		ret = -EEXIST;
1395     		goto free_unlock;
1396     	}
1397     
1398     	/* Simplifies replace_table code. */
1399     	table->private = &bootstrap;
1400     	if (!replace_table(table, 0, newinfo, &ret))
1401     		goto free_unlock;
1402     
1403     	duprintf("table->private->number = %u\n",
1404     		 table->private->number);
1405     
1406     	table->lock = RW_LOCK_UNLOCKED;
1407     	list_prepend(&ipt_tables, table);
1408     
1409      unlock:
1410     	up(&ipt_mutex);
1411     	return ret;
1412     
1413      free_unlock:
1414     	vfree(newinfo);
1415     	MOD_DEC_USE_COUNT;
1416     	goto unlock;
1417     }
1418     
1419     void ipt_unregister_table(struct ipt_table *table)
1420     {
1421     	down(&ipt_mutex);
1422     	LIST_DELETE(&ipt_tables, table);
1423     	up(&ipt_mutex);
1424     
1425     	/* Decrease module usage counts and free resources */
1426     	IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1427     			  cleanup_entry, NULL);
1428     	vfree(table->private);
1429     	MOD_DEC_USE_COUNT;
1430     }
1431     
1432     /* Returns 1 if the port is matched by the range, 0 otherwise */
1433     static inline int
1434     port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1435     {
1436     	int ret;
1437     
1438     	ret = (port >= min && port <= max) ^ invert;
1439     	return ret;
1440     }
1441     
1442     static int
1443     tcp_find_option(u_int8_t option,
1444     		const struct tcphdr *tcp,
1445     		u_int16_t datalen,
1446     		int invert,
1447     		int *hotdrop)
1448     {
1449     	unsigned int i = sizeof(struct tcphdr);
1450     	const u_int8_t *opt = (u_int8_t *)tcp;
1451     
1452     	duprintf("tcp_match: finding option\n");
1453     	/* If we don't have the whole header, drop packet. */
1454     	if (tcp->doff * 4 > datalen) {
1455     		*hotdrop = 1;
1456     		return 0;
1457     	}
1458     
1459     	while (i < tcp->doff * 4) {
1460     		if (opt[i] == option) return !invert;
1461     		if (opt[i] < 2) i++;
1462     		else i += opt[i+1]?:1;
1463     	}
1464     
1465     	return invert;
1466     }
1467     
1468     static int
1469     tcp_match(const struct sk_buff *skb,
1470     	  const struct net_device *in,
1471     	  const struct net_device *out,
1472     	  const void *matchinfo,
1473     	  int offset,
1474     	  const void *hdr,
1475     	  u_int16_t datalen,
1476     	  int *hotdrop)
1477     {
1478     	const struct tcphdr *tcp = hdr;
1479     	const struct ipt_tcp *tcpinfo = matchinfo;
1480     
1481     	/* To quote Alan:
1482     
1483     	   Don't allow a fragment of TCP 8 bytes in. Nobody normal
1484     	   causes this. Its a cracker trying to break in by doing a
1485     	   flag overwrite to pass the direction checks.
1486     	*/
1487     
1488     	if (offset == 1) {
1489     		duprintf("Dropping evil TCP offset=1 frag.\n");
1490     		*hotdrop = 1;
1491     		return 0;
1492     	} else if (offset == 0 && datalen < sizeof(struct tcphdr)) {
1493     		/* We've been asked to examine this packet, and we
1494     		   can't.  Hence, no choice but to drop. */
1495     		duprintf("Dropping evil TCP offset=0 tinygram.\n");
1496     		*hotdrop = 1;
1497     		return 0;
1498     	}
1499     
1500     	/* FIXME: Try tcp doff >> packet len against various stacks --RR */
1501     
1502     #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1503     
1504     	/* Must not be a fragment. */
1505     	return !offset
1506     		&& port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1507     			      ntohs(tcp->source),
1508     			      !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT))
1509     		&& port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1510     			      ntohs(tcp->dest),
1511     			      !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT))
1512     		&& FWINVTCP((((unsigned char *)tcp)[13]
1513     			     & tcpinfo->flg_mask)
1514     			    == tcpinfo->flg_cmp,
1515     			    IPT_TCP_INV_FLAGS)
1516     		&& (!tcpinfo->option
1517     		    || tcp_find_option(tcpinfo->option, tcp, datalen,
1518     				       tcpinfo->invflags
1519     				       & IPT_TCP_INV_OPTION,
1520     				       hotdrop));
1521     }
1522     
1523     /* Called when user tries to insert an entry of this type. */
1524     static int
1525     tcp_checkentry(const char *tablename,
1526     	       const struct ipt_ip *ip,
1527     	       void *matchinfo,
1528     	       unsigned int matchsize,
1529     	       unsigned int hook_mask)
1530     {
1531     	const struct ipt_tcp *tcpinfo = matchinfo;
1532     
1533     	/* Must specify proto == TCP, and no unknown invflags */
1534     	return ip->proto == IPPROTO_TCP
1535     		&& !(ip->invflags & IPT_INV_PROTO)
1536     		&& matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1537     		&& !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1538     }
1539     
1540     static int
1541     udp_match(const struct sk_buff *skb,
1542     	  const struct net_device *in,
1543     	  const struct net_device *out,
1544     	  const void *matchinfo,
1545     	  int offset,
1546     	  const void *hdr,
1547     	  u_int16_t datalen,
1548     	  int *hotdrop)
1549     {
1550     	const struct udphdr *udp = hdr;
1551     	const struct ipt_udp *udpinfo = matchinfo;
1552     
1553     	if (offset == 0 && datalen < sizeof(struct udphdr)) {
1554     		/* We've been asked to examine this packet, and we
1555     		   can't.  Hence, no choice but to drop. */
1556     		duprintf("Dropping evil UDP tinygram.\n");
1557     		*hotdrop = 1;
1558     		return 0;
1559     	}
1560     
1561     	/* Must not be a fragment. */
1562     	return !offset
1563     		&& port_match(udpinfo->spts[0], udpinfo->spts[1],
1564     			      ntohs(udp->source),
1565     			      !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1566     		&& port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1567     			      ntohs(udp->dest),
1568     			      !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1569     }
1570     
1571     /* Called when user tries to insert an entry of this type. */
1572     static int
1573     udp_checkentry(const char *tablename,
1574     	       const struct ipt_ip *ip,
1575     	       void *matchinfo,
1576     	       unsigned int matchinfosize,
1577     	       unsigned int hook_mask)
1578     {
1579     	const struct ipt_udp *udpinfo = matchinfo;
1580     
1581     	/* Must specify proto == UDP, and no unknown invflags */
1582     	if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1583     		duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1584     			 IPPROTO_UDP);
1585     		return 0;
1586     	}
1587     	if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1588     		duprintf("ipt_udp: matchsize %u != %u\n",
1589     			 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1590     		return 0;
1591     	}
1592     	if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1593     		duprintf("ipt_udp: unknown flags %X\n",
1594     			 udpinfo->invflags);
1595     		return 0;
1596     	}
1597     
1598     	return 1;
1599     }
1600     
1601     /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1602     static inline int
1603     icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1604     		     u_int8_t type, u_int8_t code,
1605     		     int invert)
1606     {
1607     	return (type == test_type && code >= min_code && code <= max_code)
1608     		^ invert;
1609     }
1610     
1611     static int
1612     icmp_match(const struct sk_buff *skb,
1613     	   const struct net_device *in,
1614     	   const struct net_device *out,
1615     	   const void *matchinfo,
1616     	   int offset,
1617     	   const void *hdr,
1618     	   u_int16_t datalen,
1619     	   int *hotdrop)
1620     {
1621     	const struct icmphdr *icmp = hdr;
1622     	const struct ipt_icmp *icmpinfo = matchinfo;
1623     
1624     	if (offset == 0 && datalen < 2) {
1625     		/* We've been asked to examine this packet, and we
1626     		   can't.  Hence, no choice but to drop. */
1627     		duprintf("Dropping evil ICMP tinygram.\n");
1628     		*hotdrop = 1;
1629     		return 0;
1630     	}
1631     
1632     	/* Must not be a fragment. */
1633     	return !offset
1634     		&& icmp_type_code_match(icmpinfo->type,
1635     					icmpinfo->code[0],
1636     					icmpinfo->code[1],
1637     					icmp->type, icmp->code,
1638     					!!(icmpinfo->invflags&IPT_ICMP_INV));
1639     }
1640     
1641     /* Called when user tries to insert an entry of this type. */
1642     static int
1643     icmp_checkentry(const char *tablename,
1644     	   const struct ipt_ip *ip,
1645     	   void *matchinfo,
1646     	   unsigned int matchsize,
1647     	   unsigned int hook_mask)
1648     {
1649     	const struct ipt_icmp *icmpinfo = matchinfo;
1650     
1651     	/* Must specify proto == ICMP, and no unknown invflags */
1652     	return ip->proto == IPPROTO_ICMP
1653     		&& !(ip->invflags & IPT_INV_PROTO)
1654     		&& matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1655     		&& !(icmpinfo->invflags & ~IPT_ICMP_INV);
1656     }
1657     
1658     /* The built-in targets: standard (NULL) and error. */
1659     static struct ipt_target ipt_standard_target
1660     = { { NULL, NULL }, IPT_STANDARD_TARGET, NULL, NULL, NULL };
1661     static struct ipt_target ipt_error_target
1662     = { { NULL, NULL }, IPT_ERROR_TARGET, ipt_error, NULL, NULL };
1663     
1664     static struct nf_sockopt_ops ipt_sockopts
1665     = { { NULL, NULL }, PF_INET, IPT_BASE_CTL, IPT_SO_SET_MAX+1, do_ipt_set_ctl,
1666         IPT_BASE_CTL, IPT_SO_GET_MAX+1, do_ipt_get_ctl, 0, NULL  };
1667     
1668     static struct ipt_match tcp_matchstruct
1669     = { { NULL, NULL }, "tcp", &tcp_match, &tcp_checkentry, NULL };
1670     static struct ipt_match udp_matchstruct
1671     = { { NULL, NULL }, "udp", &udp_match, &udp_checkentry, NULL };
1672     static struct ipt_match icmp_matchstruct
1673     = { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL };
1674     
1675     #ifdef CONFIG_PROC_FS
1676     static inline int print_name(const struct ipt_table *t,
1677     			     off_t start_offset, char *buffer, int length,
1678     			     off_t *pos, unsigned int *count)
1679     {
1680     	if ((*count)++ >= start_offset) {
1681     		unsigned int namelen;
1682     
1683     		namelen = sprintf(buffer + *pos, "%s\n", t->name);
1684     		if (*pos + namelen > length) {
1685     			/* Stop iterating */
1686     			return 1;
1687     		}
1688     		*pos += namelen;
1689     	}
1690     	return 0;
1691     }
1692     
1693     static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1694     {
1695     	off_t pos = 0;
1696     	unsigned int count = 0;
1697     
1698     	if (down_interruptible(&ipt_mutex) != 0)
1699     		return 0;
1700     
1701     	LIST_FIND(&ipt_tables, print_name, struct ipt_table *,
1702     		  offset, buffer, length, &pos, &count);
1703     
1704     	up(&ipt_mutex);
1705     
1706     	/* `start' hack - see fs/proc/generic.c line ~105 */
1707     	*start=(char *)((unsigned long)count-offset);
1708     	return pos;
1709     }
1710     #endif /*CONFIG_PROC_FS*/
1711     
1712     static int __init init(void)
1713     {
1714     	int ret;
1715     
1716     	/* Noone else will be downing sem now, so we won't sleep */
1717     	down(&ipt_mutex);
1718     	list_append(&ipt_target, &ipt_standard_target);
1719     	list_append(&ipt_target, &ipt_error_target);
1720     	list_append(&ipt_match, &tcp_matchstruct);
1721     	list_append(&ipt_match, &udp_matchstruct);
1722     	list_append(&ipt_match, &icmp_matchstruct);
1723     	up(&ipt_mutex);
1724     
1725     	/* Register setsockopt */
1726     	ret = nf_register_sockopt(&ipt_sockopts);
1727     	if (ret < 0) {
1728     		duprintf("Unable to register sockopts.\n");
1729     		return ret;
1730     	}
1731     
1732     #ifdef CONFIG_PROC_FS
1733     	{
1734     	struct proc_dir_entry *proc;
1735     
1736     	proc = proc_net_create("ip_tables_names", 0, ipt_get_tables);
1737     	if (!proc) {
1738     		nf_unregister_sockopt(&ipt_sockopts);
1739     		return -ENOMEM;
1740     	}
1741     	proc->owner = THIS_MODULE;
1742     	}
1743     #endif
1744     
1745     	printk("ip_tables: (c)2000 Netfilter core team\n");
1746     	return 0;
1747     }
1748     
1749     static void __exit fini(void)
1750     {
1751     	nf_unregister_sockopt(&ipt_sockopts);
1752     #ifdef CONFIG_PROC_FS
1753     	proc_net_remove("ip_tables_names");
1754     #endif
1755     }
1756     
1757     EXPORT_SYMBOL(ipt_register_table);
1758     EXPORT_SYMBOL(ipt_unregister_table);
1759     EXPORT_SYMBOL(ipt_register_match);
1760     EXPORT_SYMBOL(ipt_unregister_match);
1761     EXPORT_SYMBOL(ipt_do_table);
1762     EXPORT_SYMBOL(ipt_register_target);
1763     EXPORT_SYMBOL(ipt_unregister_target);
1764     
1765     module_init(init);
1766     module_exit(fini);
1767