File: /usr/src/linux/net/ipv4/netfilter/ip_tables.c
1 /*
2 * Packet matching code.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 */
6 #include <linux/config.h>
7 #include <linux/skbuff.h>
8 #include <linux/kmod.h>
9 #include <linux/vmalloc.h>
10 #include <linux/netdevice.h>
11 #include <linux/module.h>
12 #include <linux/tcp.h>
13 #include <linux/udp.h>
14 #include <linux/icmp.h>
15 #include <net/ip.h>
16 #include <asm/uaccess.h>
17 #include <asm/semaphore.h>
18 #include <linux/proc_fs.h>
19
20 #include <linux/netfilter_ipv4/ip_tables.h>
21
22 /*#define DEBUG_IP_FIREWALL*/
23 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
24 /*#define DEBUG_IP_FIREWALL_USER*/
25
26 #ifdef DEBUG_IP_FIREWALL
27 #define dprintf(format, args...) printk(format , ## args)
28 #else
29 #define dprintf(format, args...)
30 #endif
31
32 #ifdef DEBUG_IP_FIREWALL_USER
33 #define duprintf(format, args...) printk(format , ## args)
34 #else
35 #define duprintf(format, args...)
36 #endif
37
38 #ifdef CONFIG_NETFILTER_DEBUG
39 #define IP_NF_ASSERT(x) \
40 do { \
41 if (!(x)) \
42 printk("IP_NF_ASSERT: %s:%s:%u\n", \
43 __FUNCTION__, __FILE__, __LINE__); \
44 } while(0)
45 #else
46 #define IP_NF_ASSERT(x)
47 #endif
48 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
49
50 /* Mutex protects lists (only traversed in user context). */
51 static DECLARE_MUTEX(ipt_mutex);
52
53 /* Must have mutex */
54 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
55 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
56 #include <linux/netfilter_ipv4/lockhelp.h>
57 #include <linux/netfilter_ipv4/listhelp.h>
58
59 #if 0
60 /* All the better to debug you with... */
61 #define static
62 #define inline
63 #endif
64
65 /* Locking is simple: we assume at worst case there will be one packet
66 in user context and one from bottom halves (or soft irq if Alexey's
67 softnet patch was applied).
68
69 We keep a set of rules for each CPU, so we can avoid write-locking
70 them; doing a readlock_bh() stops packets coming through if we're
71 in user context.
72
73 To be cache friendly on SMP, we arrange them like so:
74 [ n-entries ]
75 ... cache-align padding ...
76 [ n-entries ]
77
78 Hence the start of any table is given by get_table() below. */
79
80 /* The table itself */
81 struct ipt_table_info
82 {
83 /* Size per table */
84 unsigned int size;
85 /* Number of entries: FIXME. --RR */
86 unsigned int number;
87
88 /* Entry points and underflows */
89 unsigned int hook_entry[NF_IP_NUMHOOKS];
90 unsigned int underflow[NF_IP_NUMHOOKS];
91
92 /* ipt_entry tables: one per CPU */
93 char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));
94 };
95
96 static LIST_HEAD(ipt_target);
97 static LIST_HEAD(ipt_match);
98 static LIST_HEAD(ipt_tables);
99 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
100
101 #ifdef CONFIG_SMP
102 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
103 #else
104 #define TABLE_OFFSET(t,p) 0
105 #endif
106
107 #if 0
108 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
109 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
110 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
111 #endif
112
113 /* Returns whether matches rule or not. */
114 static inline int
115 ip_packet_match(const struct iphdr *ip,
116 const char *indev,
117 const char *outdev,
118 const struct ipt_ip *ipinfo,
119 int isfrag)
120 {
121 size_t i;
122 unsigned long ret;
123
124 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
125
126 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
127 IPT_INV_SRCIP)
128 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
129 IPT_INV_DSTIP)) {
130 dprintf("Source or dest mismatch.\n");
131
132 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
133 NIPQUAD(ip->saddr),
134 NIPQUAD(ipinfo->smsk.s_addr),
135 NIPQUAD(ipinfo->src.s_addr),
136 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
137 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
138 NIPQUAD(ip->daddr),
139 NIPQUAD(ipinfo->dmsk.s_addr),
140 NIPQUAD(ipinfo->dst.s_addr),
141 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
142 return 0;
143 }
144
145 /* Look for ifname matches; this should unroll nicely. */
146 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
147 ret |= (((const unsigned long *)indev)[i]
148 ^ ((const unsigned long *)ipinfo->iniface)[i])
149 & ((const unsigned long *)ipinfo->iniface_mask)[i];
150 }
151
152 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
153 dprintf("VIA in mismatch (%s vs %s).%s\n",
154 indev, ipinfo->iniface,
155 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
156 return 0;
157 }
158
159 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
160 ret |= (((const unsigned long *)outdev)[i]
161 ^ ((const unsigned long *)ipinfo->outiface)[i])
162 & ((const unsigned long *)ipinfo->outiface_mask)[i];
163 }
164
165 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
166 dprintf("VIA out mismatch (%s vs %s).%s\n",
167 outdev, ipinfo->outiface,
168 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
169 return 0;
170 }
171
172 /* Check specific protocol */
173 if (ipinfo->proto
174 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
175 dprintf("Packet protocol %hi does not match %hi.%s\n",
176 ip->protocol, ipinfo->proto,
177 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
178 return 0;
179 }
180
181 /* If we have a fragment rule but the packet is not a fragment
182 * then we return zero */
183 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
184 dprintf("Fragment rule but not fragment.%s\n",
185 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
186 return 0;
187 }
188
189 return 1;
190 }
191
192 static inline int
193 ip_checkentry(const struct ipt_ip *ip)
194 {
195 if (ip->flags & ~IPT_F_MASK) {
196 duprintf("Unknown flag bits set: %08X\n",
197 ip->flags & ~IPT_F_MASK);
198 return 0;
199 }
200 if (ip->invflags & ~IPT_INV_MASK) {
201 duprintf("Unknown invflag bits set: %08X\n",
202 ip->invflags & ~IPT_INV_MASK);
203 return 0;
204 }
205 return 1;
206 }
207
208 static unsigned int
209 ipt_error(struct sk_buff **pskb,
210 unsigned int hooknum,
211 const struct net_device *in,
212 const struct net_device *out,
213 const void *targinfo,
214 void *userinfo)
215 {
216 if (net_ratelimit())
217 printk("ip_tables: error: `%s'\n", (char *)targinfo);
218
219 return NF_DROP;
220 }
221
222 static inline
223 int do_match(struct ipt_entry_match *m,
224 const struct sk_buff *skb,
225 const struct net_device *in,
226 const struct net_device *out,
227 int offset,
228 const void *hdr,
229 u_int16_t datalen,
230 int *hotdrop)
231 {
232 /* Stop iteration if it doesn't match */
233 if (!m->u.kernel.match->match(skb, in, out, m->data,
234 offset, hdr, datalen, hotdrop))
235 return 1;
236 else
237 return 0;
238 }
239
240 static inline struct ipt_entry *
241 get_entry(void *base, unsigned int offset)
242 {
243 return (struct ipt_entry *)(base + offset);
244 }
245
246 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
247 unsigned int
248 ipt_do_table(struct sk_buff **pskb,
249 unsigned int hook,
250 const struct net_device *in,
251 const struct net_device *out,
252 struct ipt_table *table,
253 void *userdata)
254 {
255 static const char nulldevname[IFNAMSIZ] = { 0 };
256 u_int16_t offset;
257 struct iphdr *ip;
258 void *protohdr;
259 u_int16_t datalen;
260 int hotdrop = 0;
261 /* Initializing verdict to NF_DROP keeps gcc happy. */
262 unsigned int verdict = NF_DROP;
263 const char *indev, *outdev;
264 void *table_base;
265 struct ipt_entry *e, *back;
266
267 /* Initialization */
268 ip = (*pskb)->nh.iph;
269 protohdr = (u_int32_t *)ip + ip->ihl;
270 datalen = (*pskb)->len - ip->ihl * 4;
271 indev = in ? in->name : nulldevname;
272 outdev = out ? out->name : nulldevname;
273 /* We handle fragments by dealing with the first fragment as
274 * if it was a normal packet. All other fragments are treated
275 * normally, except that they will NEVER match rules that ask
276 * things we don't know, ie. tcp syn flag or ports). If the
277 * rule is also a fragment-specific rule, non-fragments won't
278 * match it. */
279 offset = ntohs(ip->frag_off) & IP_OFFSET;
280
281 read_lock_bh(&table->lock);
282 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
283 table_base = (void *)table->private->entries
284 + TABLE_OFFSET(table->private,
285 cpu_number_map(smp_processor_id()));
286 e = get_entry(table_base, table->private->hook_entry[hook]);
287
288 #ifdef CONFIG_NETFILTER_DEBUG
289 /* Check noone else using our table */
290 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
291 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
292 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
293 smp_processor_id(),
294 table->name,
295 &((struct ipt_entry *)table_base)->comefrom,
296 ((struct ipt_entry *)table_base)->comefrom);
297 }
298 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
299 #endif
300
301 /* For return from builtin chain */
302 back = get_entry(table_base, table->private->underflow[hook]);
303
304 do {
305 IP_NF_ASSERT(e);
306 IP_NF_ASSERT(back);
307 (*pskb)->nfcache |= e->nfcache;
308 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
309 struct ipt_entry_target *t;
310
311 if (IPT_MATCH_ITERATE(e, do_match,
312 *pskb, in, out,
313 offset, protohdr,
314 datalen, &hotdrop) != 0)
315 goto no_match;
316
317 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
318
319 t = ipt_get_target(e);
320 IP_NF_ASSERT(t->u.kernel.target);
321 /* Standard target? */
322 if (!t->u.kernel.target->target) {
323 int v;
324
325 v = ((struct ipt_standard_target *)t)->verdict;
326 if (v < 0) {
327 /* Pop from stack? */
328 if (v != IPT_RETURN) {
329 verdict = (unsigned)(-v) - 1;
330 break;
331 }
332 e = back;
333 back = get_entry(table_base,
334 back->comefrom);
335 continue;
336 }
337 if (table_base + v
338 != (void *)e + e->next_offset) {
339 /* Save old back ptr in next entry */
340 struct ipt_entry *next
341 = (void *)e + e->next_offset;
342 next->comefrom
343 = (void *)back - table_base;
344 /* set back pointer to next entry */
345 back = next;
346 }
347
348 e = get_entry(table_base, v);
349 } else {
350 /* Targets which reenter must return
351 abs. verdicts */
352 #ifdef CONFIG_NETFILTER_DEBUG
353 ((struct ipt_entry *)table_base)->comefrom
354 = 0xeeeeeeec;
355 #endif
356 verdict = t->u.kernel.target->target(pskb,
357 hook,
358 in, out,
359 t->data,
360 userdata);
361
362 #ifdef CONFIG_NETFILTER_DEBUG
363 if (((struct ipt_entry *)table_base)->comefrom
364 != 0xeeeeeeec
365 && verdict == IPT_CONTINUE) {
366 printk("Target %s reentered!\n",
367 t->u.kernel.target->name);
368 verdict = NF_DROP;
369 }
370 ((struct ipt_entry *)table_base)->comefrom
371 = 0x57acc001;
372 #endif
373 /* Target might have changed stuff. */
374 ip = (*pskb)->nh.iph;
375 protohdr = (u_int32_t *)ip + ip->ihl;
376 datalen = (*pskb)->len - ip->ihl * 4;
377
378 if (verdict == IPT_CONTINUE)
379 e = (void *)e + e->next_offset;
380 else
381 /* Verdict */
382 break;
383 }
384 } else {
385
386 no_match:
387 e = (void *)e + e->next_offset;
388 }
389 } while (!hotdrop);
390
391 #ifdef CONFIG_NETFILTER_DEBUG
392 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
393 #endif
394 read_unlock_bh(&table->lock);
395
396 #ifdef DEBUG_ALLOW_ALL
397 return NF_ACCEPT;
398 #else
399 if (hotdrop)
400 return NF_DROP;
401 else return verdict;
402 #endif
403 }
404
405 /* If it succeeds, returns element and locks mutex */
406 static inline void *
407 find_inlist_lock_noload(struct list_head *head,
408 const char *name,
409 int *error,
410 struct semaphore *mutex)
411 {
412 void *ret;
413
414 #if 0
415 duprintf("find_inlist: searching for `%s' in %s.\n",
416 name, head == &ipt_target ? "ipt_target"
417 : head == &ipt_match ? "ipt_match"
418 : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
419 #endif
420
421 *error = down_interruptible(mutex);
422 if (*error != 0)
423 return NULL;
424
425 ret = list_named_find(head, name);
426 if (!ret) {
427 *error = -ENOENT;
428 up(mutex);
429 }
430 return ret;
431 }
432
433 #ifndef CONFIG_KMOD
434 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
435 #else
436 static void *
437 find_inlist_lock(struct list_head *head,
438 const char *name,
439 const char *prefix,
440 int *error,
441 struct semaphore *mutex)
442 {
443 void *ret;
444
445 ret = find_inlist_lock_noload(head, name, error, mutex);
446 if (!ret) {
447 char modulename[IPT_FUNCTION_MAXNAMELEN + strlen(prefix) + 1];
448 strcpy(modulename, prefix);
449 strcat(modulename, name);
450 duprintf("find_inlist: loading `%s'.\n", modulename);
451 request_module(modulename);
452 ret = find_inlist_lock_noload(head, name, error, mutex);
453 }
454
455 return ret;
456 }
457 #endif
458
459 static inline struct ipt_table *
460 find_table_lock(const char *name, int *error, struct semaphore *mutex)
461 {
462 return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
463 }
464
465 static inline struct ipt_match *
466 find_match_lock(const char *name, int *error, struct semaphore *mutex)
467 {
468 return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
469 }
470
471 static inline struct ipt_target *
472 find_target_lock(const char *name, int *error, struct semaphore *mutex)
473 {
474 return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
475 }
476
477 /* All zeroes == unconditional rule. */
478 static inline int
479 unconditional(const struct ipt_ip *ip)
480 {
481 unsigned int i;
482
483 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
484 if (((__u32 *)ip)[i])
485 return 0;
486
487 return 1;
488 }
489
490 /* Figures out from what hook each rule can be called: returns 0 if
491 there are loops. Puts hook bitmask in comefrom. */
492 static int
493 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
494 {
495 unsigned int hook;
496
497 /* No recursion; use packet counter to save back ptrs (reset
498 to 0 as we leave), and comefrom to save source hook bitmask */
499 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
500 unsigned int pos = newinfo->hook_entry[hook];
501 struct ipt_entry *e
502 = (struct ipt_entry *)(newinfo->entries + pos);
503
504 if (!(valid_hooks & (1 << hook)))
505 continue;
506
507 /* Set initial back pointer. */
508 e->counters.pcnt = pos;
509
510 for (;;) {
511 struct ipt_standard_target *t
512 = (void *)ipt_get_target(e);
513
514 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
515 printk("iptables: loop hook %u pos %u %08X.\n",
516 hook, pos, e->comefrom);
517 return 0;
518 }
519 e->comefrom
520 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
521
522 /* Unconditional return/END. */
523 if (e->target_offset == sizeof(struct ipt_entry)
524 && (strcmp(t->target.u.user.name,
525 IPT_STANDARD_TARGET) == 0)
526 && t->verdict < 0
527 && unconditional(&e->ip)) {
528 unsigned int oldpos, size;
529
530 /* Return: backtrack through the last
531 big jump. */
532 do {
533 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
534 #ifdef DEBUG_IP_FIREWALL_USER
535 if (e->comefrom
536 & (1 << NF_IP_NUMHOOKS)) {
537 duprintf("Back unset "
538 "on hook %u "
539 "rule %u\n",
540 hook, pos);
541 }
542 #endif
543 oldpos = pos;
544 pos = e->counters.pcnt;
545 e->counters.pcnt = 0;
546
547 /* We're at the start. */
548 if (pos == oldpos)
549 goto next;
550
551 e = (struct ipt_entry *)
552 (newinfo->entries + pos);
553 } while (oldpos == pos + e->next_offset);
554
555 /* Move along one */
556 size = e->next_offset;
557 e = (struct ipt_entry *)
558 (newinfo->entries + pos + size);
559 e->counters.pcnt = pos;
560 pos += size;
561 } else {
562 int newpos = t->verdict;
563
564 if (strcmp(t->target.u.user.name,
565 IPT_STANDARD_TARGET) == 0
566 && newpos >= 0) {
567 /* This a jump; chase it. */
568 duprintf("Jump rule %u -> %u\n",
569 pos, newpos);
570 } else {
571 /* ... this is a fallthru */
572 newpos = pos + e->next_offset;
573 }
574 e = (struct ipt_entry *)
575 (newinfo->entries + newpos);
576 e->counters.pcnt = pos;
577 pos = newpos;
578 }
579 }
580 next:
581 duprintf("Finished chain %u\n", hook);
582 }
583 return 1;
584 }
585
586 static inline int
587 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
588 {
589 if (i && (*i)-- == 0)
590 return 1;
591
592 if (m->u.kernel.match->destroy)
593 m->u.kernel.match->destroy(m->data,
594 m->u.match_size - sizeof(*m));
595
596 if (m->u.kernel.match->me)
597 __MOD_DEC_USE_COUNT(m->u.kernel.match->me);
598
599 return 0;
600 }
601
602 static inline int
603 standard_check(const struct ipt_entry_target *t,
604 unsigned int max_offset)
605 {
606 struct ipt_standard_target *targ = (void *)t;
607
608 /* Check standard info. */
609 if (t->u.target_size
610 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
611 duprintf("standard_check: target size %u != %u\n",
612 t->u.target_size,
613 IPT_ALIGN(sizeof(struct ipt_standard_target)));
614 return 0;
615 }
616
617 if (targ->verdict >= 0
618 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
619 duprintf("ipt_standard_check: bad verdict (%i)\n",
620 targ->verdict);
621 return 0;
622 }
623
624 if (targ->verdict < -NF_MAX_VERDICT - 1) {
625 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
626 targ->verdict);
627 return 0;
628 }
629 return 1;
630 }
631
632 static inline int
633 check_match(struct ipt_entry_match *m,
634 const char *name,
635 const struct ipt_ip *ip,
636 unsigned int hookmask,
637 unsigned int *i)
638 {
639 int ret;
640 struct ipt_match *match;
641
642 match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
643 if (!match) {
644 duprintf("check_match: `%s' not found\n", m->u.user.name);
645 return ret;
646 }
647 if (match->me)
648 __MOD_INC_USE_COUNT(match->me);
649 m->u.kernel.match = match;
650 up(&ipt_mutex);
651
652 if (m->u.kernel.match->checkentry
653 && !m->u.kernel.match->checkentry(name, ip, m->data,
654 m->u.match_size - sizeof(*m),
655 hookmask)) {
656 if (m->u.kernel.match->me)
657 __MOD_DEC_USE_COUNT(m->u.kernel.match->me);
658 duprintf("ip_tables: check failed for `%s'.\n",
659 m->u.kernel.match->name);
660 return -EINVAL;
661 }
662
663 (*i)++;
664 return 0;
665 }
666
667 static struct ipt_target ipt_standard_target;
668
669 static inline int
670 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
671 unsigned int *i)
672 {
673 struct ipt_entry_target *t;
674 struct ipt_target *target;
675 int ret;
676 unsigned int j;
677
678 if (!ip_checkentry(&e->ip)) {
679 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
680 return -EINVAL;
681 }
682
683 j = 0;
684 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
685 if (ret != 0)
686 goto cleanup_matches;
687
688 t = ipt_get_target(e);
689 target = find_target_lock(t->u.user.name, &ret, &ipt_mutex);
690 if (!target) {
691 duprintf("check_entry: `%s' not found\n", t->u.user.name);
692 goto cleanup_matches;
693 }
694 if (target->me)
695 __MOD_INC_USE_COUNT(target->me);
696 t->u.kernel.target = target;
697 up(&ipt_mutex);
698
699 if (t->u.kernel.target == &ipt_standard_target) {
700 if (!standard_check(t, size)) {
701 ret = -EINVAL;
702 goto cleanup_matches;
703 }
704 } else if (t->u.kernel.target->checkentry
705 && !t->u.kernel.target->checkentry(name, e, t->data,
706 t->u.target_size
707 - sizeof(*t),
708 e->comefrom)) {
709 if (t->u.kernel.target->me)
710 __MOD_DEC_USE_COUNT(t->u.kernel.target->me);
711 duprintf("ip_tables: check failed for `%s'.\n",
712 t->u.kernel.target->name);
713 ret = -EINVAL;
714 goto cleanup_matches;
715 }
716
717 (*i)++;
718 return 0;
719
720 cleanup_matches:
721 IPT_MATCH_ITERATE(e, cleanup_match, &j);
722 return ret;
723 }
724
725 static inline int
726 check_entry_size_and_hooks(struct ipt_entry *e,
727 struct ipt_table_info *newinfo,
728 unsigned char *base,
729 unsigned char *limit,
730 const unsigned int *hook_entries,
731 const unsigned int *underflows,
732 unsigned int *i)
733 {
734 unsigned int h;
735
736 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
737 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
738 duprintf("Bad offset %p\n", e);
739 return -EINVAL;
740 }
741
742 if (e->next_offset
743 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
744 duprintf("checking: element %p size %u\n",
745 e, e->next_offset);
746 return -EINVAL;
747 }
748
749 /* Check hooks & underflows */
750 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
751 if ((unsigned char *)e - base == hook_entries[h])
752 newinfo->hook_entry[h] = hook_entries[h];
753 if ((unsigned char *)e - base == underflows[h])
754 newinfo->underflow[h] = underflows[h];
755 }
756
757 /* FIXME: underflows must be unconditional, standard verdicts
758 < 0 (not IPT_RETURN). --RR */
759
760 /* Clear counters and comefrom */
761 e->counters = ((struct ipt_counters) { 0, 0 });
762 e->comefrom = 0;
763
764 (*i)++;
765 return 0;
766 }
767
768 static inline int
769 cleanup_entry(struct ipt_entry *e, unsigned int *i)
770 {
771 struct ipt_entry_target *t;
772
773 if (i && (*i)-- == 0)
774 return 1;
775
776 /* Cleanup all matches */
777 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
778 t = ipt_get_target(e);
779 if (t->u.kernel.target->destroy)
780 t->u.kernel.target->destroy(t->data,
781 t->u.target_size - sizeof(*t));
782 if (t->u.kernel.target->me)
783 __MOD_DEC_USE_COUNT(t->u.kernel.target->me);
784
785 return 0;
786 }
787
788 /* Checks and translates the user-supplied table segment (held in
789 newinfo) */
790 static int
791 translate_table(const char *name,
792 unsigned int valid_hooks,
793 struct ipt_table_info *newinfo,
794 unsigned int size,
795 unsigned int number,
796 const unsigned int *hook_entries,
797 const unsigned int *underflows)
798 {
799 unsigned int i;
800 int ret;
801
802 newinfo->size = size;
803 newinfo->number = number;
804
805 /* Init all hooks to impossible value. */
806 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
807 newinfo->hook_entry[i] = 0xFFFFFFFF;
808 newinfo->underflow[i] = 0xFFFFFFFF;
809 }
810
811 duprintf("translate_table: size %u\n", newinfo->size);
812 i = 0;
813 /* Walk through entries, checking offsets. */
814 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
815 check_entry_size_and_hooks,
816 newinfo,
817 newinfo->entries,
818 newinfo->entries + size,
819 hook_entries, underflows, &i);
820 if (ret != 0)
821 return ret;
822
823 if (i != number) {
824 duprintf("translate_table: %u not %u entries\n",
825 i, number);
826 return -EINVAL;
827 }
828
829 /* Check hooks all assigned */
830 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
831 /* Only hooks which are valid */
832 if (!(valid_hooks & (1 << i)))
833 continue;
834 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
835 duprintf("Invalid hook entry %u %u\n",
836 i, hook_entries[i]);
837 return -EINVAL;
838 }
839 if (newinfo->underflow[i] == 0xFFFFFFFF) {
840 duprintf("Invalid underflow %u %u\n",
841 i, underflows[i]);
842 return -EINVAL;
843 }
844 }
845
846 if (!mark_source_chains(newinfo, valid_hooks))
847 return -ELOOP;
848
849 /* Finally, each sanity check must pass */
850 i = 0;
851 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
852 check_entry, name, size, &i);
853
854 if (ret != 0) {
855 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
856 cleanup_entry, &i);
857 return ret;
858 }
859
860 /* And one copy for every other CPU */
861 for (i = 1; i < smp_num_cpus; i++) {
862 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
863 newinfo->entries,
864 SMP_ALIGN(newinfo->size));
865 }
866
867 return ret;
868 }
869
870 static struct ipt_table_info *
871 replace_table(struct ipt_table *table,
872 unsigned int num_counters,
873 struct ipt_table_info *newinfo,
874 int *error)
875 {
876 struct ipt_table_info *oldinfo;
877
878 #ifdef CONFIG_NETFILTER_DEBUG
879 {
880 struct ipt_entry *table_base;
881 unsigned int i;
882
883 for (i = 0; i < smp_num_cpus; i++) {
884 table_base =
885 (void *)newinfo->entries
886 + TABLE_OFFSET(newinfo, i);
887
888 table_base->comefrom = 0xdead57ac;
889 }
890 }
891 #endif
892
893 /* Do the substitution. */
894 write_lock_bh(&table->lock);
895 /* Check inside lock: is the old number correct? */
896 if (num_counters != table->private->number) {
897 duprintf("num_counters != table->private->number (%u/%u)\n",
898 num_counters, table->private->number);
899 write_unlock_bh(&table->lock);
900 *error = -EAGAIN;
901 return NULL;
902 }
903 oldinfo = table->private;
904 table->private = newinfo;
905 write_unlock_bh(&table->lock);
906
907 return oldinfo;
908 }
909
910 /* Gets counters. */
911 static inline int
912 add_entry_to_counter(const struct ipt_entry *e,
913 struct ipt_counters total[],
914 unsigned int *i)
915 {
916 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
917
918 (*i)++;
919 return 0;
920 }
921
922 static void
923 get_counters(const struct ipt_table_info *t,
924 struct ipt_counters counters[])
925 {
926 unsigned int cpu;
927 unsigned int i;
928
929 for (cpu = 0; cpu < smp_num_cpus; cpu++) {
930 i = 0;
931 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
932 t->size,
933 add_entry_to_counter,
934 counters,
935 &i);
936 }
937 }
938
939 static int
940 copy_entries_to_user(unsigned int total_size,
941 struct ipt_table *table,
942 void *userptr)
943 {
944 unsigned int off, num, countersize;
945 struct ipt_entry *e;
946 struct ipt_counters *counters;
947 int ret = 0;
948
949 /* We need atomic snapshot of counters: rest doesn't change
950 (other than comefrom, which userspace doesn't care
951 about). */
952 countersize = sizeof(struct ipt_counters) * table->private->number;
953 counters = vmalloc(countersize);
954
955 if (counters == NULL)
956 return -ENOMEM;
957
958 /* First, sum counters... */
959 memset(counters, 0, countersize);
960 write_lock_bh(&table->lock);
961 get_counters(table->private, counters);
962 write_unlock_bh(&table->lock);
963
964 /* ... then copy entire thing from CPU 0... */
965 if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
966 ret = -EFAULT;
967 goto free_counters;
968 }
969
970 /* FIXME: use iterator macros --RR */
971 /* ... then go back and fix counters and names */
972 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
973 unsigned int i;
974 struct ipt_entry_match *m;
975 struct ipt_entry_target *t;
976
977 e = (struct ipt_entry *)(table->private->entries + off);
978 if (copy_to_user(userptr + off
979 + offsetof(struct ipt_entry, counters),
980 &counters[num],
981 sizeof(counters[num])) != 0) {
982 ret = -EFAULT;
983 goto free_counters;
984 }
985
986 for (i = sizeof(struct ipt_entry);
987 i < e->target_offset;
988 i += m->u.match_size) {
989 m = (void *)e + i;
990
991 if (copy_to_user(userptr + off + i
992 + offsetof(struct ipt_entry_match,
993 u.user.name),
994 m->u.kernel.match->name,
995 strlen(m->u.kernel.match->name)+1)
996 != 0) {
997 ret = -EFAULT;
998 goto free_counters;
999 }
1000 }
1001
1002 t = ipt_get_target(e);
1003 if (copy_to_user(userptr + off + e->target_offset
1004 + offsetof(struct ipt_entry_target,
1005 u.user.name),
1006 t->u.kernel.target->name,
1007 strlen(t->u.kernel.target->name)+1) != 0) {
1008 ret = -EFAULT;
1009 goto free_counters;
1010 }
1011 }
1012
1013 free_counters:
1014 vfree(counters);
1015 return ret;
1016 }
1017
1018 static int
1019 get_entries(const struct ipt_get_entries *entries,
1020 struct ipt_get_entries *uptr)
1021 {
1022 int ret;
1023 struct ipt_table *t;
1024
1025 t = find_table_lock(entries->name, &ret, &ipt_mutex);
1026 if (t) {
1027 duprintf("t->private->number = %u\n",
1028 t->private->number);
1029 if (entries->size == t->private->size)
1030 ret = copy_entries_to_user(t->private->size,
1031 t, uptr->entrytable);
1032 else {
1033 duprintf("get_entries: I've got %u not %u!\n",
1034 t->private->size,
1035 entries->size);
1036 ret = -EINVAL;
1037 }
1038 up(&ipt_mutex);
1039 } else
1040 duprintf("get_entries: Can't find %s!\n",
1041 entries->name);
1042
1043 return ret;
1044 }
1045
1046 static int
1047 do_replace(void *user, unsigned int len)
1048 {
1049 int ret;
1050 struct ipt_replace tmp;
1051 struct ipt_table *t;
1052 struct ipt_table_info *newinfo, *oldinfo;
1053 struct ipt_counters *counters;
1054
1055 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1056 return -EFAULT;
1057
1058 /* Hack: Causes ipchains to give correct error msg --RR */
1059 if (len != sizeof(tmp) + tmp.size)
1060 return -ENOPROTOOPT;
1061
1062 newinfo = vmalloc(sizeof(struct ipt_table_info)
1063 + SMP_ALIGN(tmp.size) * smp_num_cpus);
1064 if (!newinfo)
1065 return -ENOMEM;
1066
1067 if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1068 tmp.size) != 0) {
1069 ret = -EFAULT;
1070 goto free_newinfo;
1071 }
1072
1073 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1074 if (!counters) {
1075 ret = -ENOMEM;
1076 goto free_newinfo;
1077 }
1078 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1079
1080 ret = translate_table(tmp.name, tmp.valid_hooks,
1081 newinfo, tmp.size, tmp.num_entries,
1082 tmp.hook_entry, tmp.underflow);
1083 if (ret != 0)
1084 goto free_newinfo_counters;
1085
1086 duprintf("ip_tables: Translated table\n");
1087
1088 t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1089 if (!t)
1090 goto free_newinfo_counters_untrans;
1091
1092 /* You lied! */
1093 if (tmp.valid_hooks != t->valid_hooks) {
1094 duprintf("Valid hook crap: %08X vs %08X\n",
1095 tmp.valid_hooks, t->valid_hooks);
1096 ret = -EINVAL;
1097 goto free_newinfo_counters_untrans_unlock;
1098 }
1099
1100 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1101 if (!oldinfo)
1102 goto free_newinfo_counters_untrans_unlock;
1103
1104 /* Get the old counters. */
1105 get_counters(oldinfo, counters);
1106 /* Decrease module usage counts and free resource */
1107 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1108 vfree(oldinfo);
1109 /* Silent error: too late now. */
1110 copy_to_user(tmp.counters, counters,
1111 sizeof(struct ipt_counters) * tmp.num_counters);
1112 vfree(counters);
1113 up(&ipt_mutex);
1114 return 0;
1115
1116 free_newinfo_counters_untrans_unlock:
1117 up(&ipt_mutex);
1118 free_newinfo_counters_untrans:
1119 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1120 free_newinfo_counters:
1121 vfree(counters);
1122 free_newinfo:
1123 vfree(newinfo);
1124 return ret;
1125 }
1126
1127 /* We're lazy, and add to the first CPU; overflow works its fey magic
1128 * and everything is OK. */
1129 static inline int
1130 add_counter_to_entry(struct ipt_entry *e,
1131 const struct ipt_counters addme[],
1132 unsigned int *i)
1133 {
1134 #if 0
1135 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1136 *i,
1137 (long unsigned int)e->counters.pcnt,
1138 (long unsigned int)e->counters.bcnt,
1139 (long unsigned int)addme[*i].pcnt,
1140 (long unsigned int)addme[*i].bcnt);
1141 #endif
1142
1143 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1144
1145 (*i)++;
1146 return 0;
1147 }
1148
1149 static int
1150 do_add_counters(void *user, unsigned int len)
1151 {
1152 unsigned int i;
1153 struct ipt_counters_info tmp, *paddc;
1154 struct ipt_table *t;
1155 int ret;
1156
1157 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1158 return -EFAULT;
1159
1160 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1161 return -EINVAL;
1162
1163 paddc = vmalloc(len);
1164 if (!paddc)
1165 return -ENOMEM;
1166
1167 if (copy_from_user(paddc, user, len) != 0) {
1168 ret = -EFAULT;
1169 goto free;
1170 }
1171
1172 t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1173 if (!t)
1174 goto free;
1175
1176 write_lock_bh(&t->lock);
1177 if (t->private->number != paddc->num_counters) {
1178 ret = -EINVAL;
1179 goto unlock_up_free;
1180 }
1181
1182 i = 0;
1183 IPT_ENTRY_ITERATE(t->private->entries,
1184 t->private->size,
1185 add_counter_to_entry,
1186 paddc->counters,
1187 &i);
1188 unlock_up_free:
1189 write_unlock_bh(&t->lock);
1190 up(&ipt_mutex);
1191 free:
1192 vfree(paddc);
1193
1194 return ret;
1195 }
1196
1197 static int
1198 do_ipt_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1199 {
1200 int ret;
1201
1202 if (!capable(CAP_NET_ADMIN))
1203 return -EPERM;
1204
1205 switch (cmd) {
1206 case IPT_SO_SET_REPLACE:
1207 ret = do_replace(user, len);
1208 break;
1209
1210 case IPT_SO_SET_ADD_COUNTERS:
1211 ret = do_add_counters(user, len);
1212 break;
1213
1214 default:
1215 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1216 ret = -EINVAL;
1217 }
1218
1219 return ret;
1220 }
1221
1222 static int
1223 do_ipt_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1224 {
1225 int ret;
1226
1227 if (!capable(CAP_NET_ADMIN))
1228 return -EPERM;
1229
1230 switch (cmd) {
1231 case IPT_SO_GET_INFO: {
1232 char name[IPT_TABLE_MAXNAMELEN];
1233 struct ipt_table *t;
1234
1235 if (*len != sizeof(struct ipt_getinfo)) {
1236 duprintf("length %u != %u\n", *len,
1237 sizeof(struct ipt_getinfo));
1238 ret = -EINVAL;
1239 break;
1240 }
1241
1242 if (copy_from_user(name, user, sizeof(name)) != 0) {
1243 ret = -EFAULT;
1244 break;
1245 }
1246 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1247 t = find_table_lock(name, &ret, &ipt_mutex);
1248 if (t) {
1249 struct ipt_getinfo info;
1250
1251 info.valid_hooks = t->valid_hooks;
1252 memcpy(info.hook_entry, t->private->hook_entry,
1253 sizeof(info.hook_entry));
1254 memcpy(info.underflow, t->private->underflow,
1255 sizeof(info.underflow));
1256 info.num_entries = t->private->number;
1257 info.size = t->private->size;
1258 strcpy(info.name, name);
1259
1260 if (copy_to_user(user, &info, *len) != 0)
1261 ret = -EFAULT;
1262 else
1263 ret = 0;
1264
1265 up(&ipt_mutex);
1266 }
1267 }
1268 break;
1269
1270 case IPT_SO_GET_ENTRIES: {
1271 struct ipt_get_entries get;
1272
1273 if (*len < sizeof(get)) {
1274 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1275 ret = -EINVAL;
1276 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1277 ret = -EFAULT;
1278 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1279 duprintf("get_entries: %u != %u\n", *len,
1280 sizeof(struct ipt_get_entries) + get.size);
1281 ret = -EINVAL;
1282 } else
1283 ret = get_entries(&get, user);
1284 break;
1285 }
1286
1287 default:
1288 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1289 ret = -EINVAL;
1290 }
1291
1292 return ret;
1293 }
1294
1295 /* Registration hooks for targets. */
1296 int
1297 ipt_register_target(struct ipt_target *target)
1298 {
1299 int ret;
1300
1301 MOD_INC_USE_COUNT;
1302 ret = down_interruptible(&ipt_mutex);
1303 if (ret != 0) {
1304 MOD_DEC_USE_COUNT;
1305 return ret;
1306 }
1307 if (!list_named_insert(&ipt_target, target)) {
1308 duprintf("ipt_register_target: `%s' already in list!\n",
1309 target->name);
1310 ret = -EINVAL;
1311 MOD_DEC_USE_COUNT;
1312 }
1313 up(&ipt_mutex);
1314 return ret;
1315 }
1316
1317 void
1318 ipt_unregister_target(struct ipt_target *target)
1319 {
1320 down(&ipt_mutex);
1321 LIST_DELETE(&ipt_target, target);
1322 up(&ipt_mutex);
1323 MOD_DEC_USE_COUNT;
1324 }
1325
1326 int
1327 ipt_register_match(struct ipt_match *match)
1328 {
1329 int ret;
1330
1331 MOD_INC_USE_COUNT;
1332 ret = down_interruptible(&ipt_mutex);
1333 if (ret != 0) {
1334 MOD_DEC_USE_COUNT;
1335 return ret;
1336 }
1337 if (!list_named_insert(&ipt_match, match)) {
1338 duprintf("ipt_register_match: `%s' already in list!\n",
1339 match->name);
1340 MOD_DEC_USE_COUNT;
1341 ret = -EINVAL;
1342 }
1343 up(&ipt_mutex);
1344
1345 return ret;
1346 }
1347
1348 void
1349 ipt_unregister_match(struct ipt_match *match)
1350 {
1351 down(&ipt_mutex);
1352 LIST_DELETE(&ipt_match, match);
1353 up(&ipt_mutex);
1354 MOD_DEC_USE_COUNT;
1355 }
1356
1357 int ipt_register_table(struct ipt_table *table)
1358 {
1359 int ret;
1360 struct ipt_table_info *newinfo;
1361 static struct ipt_table_info bootstrap
1362 = { 0, 0, { 0 }, { 0 }, { } };
1363
1364 MOD_INC_USE_COUNT;
1365 newinfo = vmalloc(sizeof(struct ipt_table_info)
1366 + SMP_ALIGN(table->table->size) * smp_num_cpus);
1367 if (!newinfo) {
1368 ret = -ENOMEM;
1369 MOD_DEC_USE_COUNT;
1370 return ret;
1371 }
1372 memcpy(newinfo->entries, table->table->entries, table->table->size);
1373
1374 ret = translate_table(table->name, table->valid_hooks,
1375 newinfo, table->table->size,
1376 table->table->num_entries,
1377 table->table->hook_entry,
1378 table->table->underflow);
1379 if (ret != 0) {
1380 vfree(newinfo);
1381 MOD_DEC_USE_COUNT;
1382 return ret;
1383 }
1384
1385 ret = down_interruptible(&ipt_mutex);
1386 if (ret != 0) {
1387 vfree(newinfo);
1388 MOD_DEC_USE_COUNT;
1389 return ret;
1390 }
1391
1392 /* Don't autoload: we'd eat our tail... */
1393 if (list_named_find(&ipt_tables, table->name)) {
1394 ret = -EEXIST;
1395 goto free_unlock;
1396 }
1397
1398 /* Simplifies replace_table code. */
1399 table->private = &bootstrap;
1400 if (!replace_table(table, 0, newinfo, &ret))
1401 goto free_unlock;
1402
1403 duprintf("table->private->number = %u\n",
1404 table->private->number);
1405
1406 table->lock = RW_LOCK_UNLOCKED;
1407 list_prepend(&ipt_tables, table);
1408
1409 unlock:
1410 up(&ipt_mutex);
1411 return ret;
1412
1413 free_unlock:
1414 vfree(newinfo);
1415 MOD_DEC_USE_COUNT;
1416 goto unlock;
1417 }
1418
1419 void ipt_unregister_table(struct ipt_table *table)
1420 {
1421 down(&ipt_mutex);
1422 LIST_DELETE(&ipt_tables, table);
1423 up(&ipt_mutex);
1424
1425 /* Decrease module usage counts and free resources */
1426 IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1427 cleanup_entry, NULL);
1428 vfree(table->private);
1429 MOD_DEC_USE_COUNT;
1430 }
1431
1432 /* Returns 1 if the port is matched by the range, 0 otherwise */
1433 static inline int
1434 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1435 {
1436 int ret;
1437
1438 ret = (port >= min && port <= max) ^ invert;
1439 return ret;
1440 }
1441
1442 static int
1443 tcp_find_option(u_int8_t option,
1444 const struct tcphdr *tcp,
1445 u_int16_t datalen,
1446 int invert,
1447 int *hotdrop)
1448 {
1449 unsigned int i = sizeof(struct tcphdr);
1450 const u_int8_t *opt = (u_int8_t *)tcp;
1451
1452 duprintf("tcp_match: finding option\n");
1453 /* If we don't have the whole header, drop packet. */
1454 if (tcp->doff * 4 > datalen) {
1455 *hotdrop = 1;
1456 return 0;
1457 }
1458
1459 while (i < tcp->doff * 4) {
1460 if (opt[i] == option) return !invert;
1461 if (opt[i] < 2) i++;
1462 else i += opt[i+1]?:1;
1463 }
1464
1465 return invert;
1466 }
1467
1468 static int
1469 tcp_match(const struct sk_buff *skb,
1470 const struct net_device *in,
1471 const struct net_device *out,
1472 const void *matchinfo,
1473 int offset,
1474 const void *hdr,
1475 u_int16_t datalen,
1476 int *hotdrop)
1477 {
1478 const struct tcphdr *tcp = hdr;
1479 const struct ipt_tcp *tcpinfo = matchinfo;
1480
1481 /* To quote Alan:
1482
1483 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1484 causes this. Its a cracker trying to break in by doing a
1485 flag overwrite to pass the direction checks.
1486 */
1487
1488 if (offset == 1) {
1489 duprintf("Dropping evil TCP offset=1 frag.\n");
1490 *hotdrop = 1;
1491 return 0;
1492 } else if (offset == 0 && datalen < sizeof(struct tcphdr)) {
1493 /* We've been asked to examine this packet, and we
1494 can't. Hence, no choice but to drop. */
1495 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1496 *hotdrop = 1;
1497 return 0;
1498 }
1499
1500 /* FIXME: Try tcp doff >> packet len against various stacks --RR */
1501
1502 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1503
1504 /* Must not be a fragment. */
1505 return !offset
1506 && port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1507 ntohs(tcp->source),
1508 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT))
1509 && port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1510 ntohs(tcp->dest),
1511 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT))
1512 && FWINVTCP((((unsigned char *)tcp)[13]
1513 & tcpinfo->flg_mask)
1514 == tcpinfo->flg_cmp,
1515 IPT_TCP_INV_FLAGS)
1516 && (!tcpinfo->option
1517 || tcp_find_option(tcpinfo->option, tcp, datalen,
1518 tcpinfo->invflags
1519 & IPT_TCP_INV_OPTION,
1520 hotdrop));
1521 }
1522
1523 /* Called when user tries to insert an entry of this type. */
1524 static int
1525 tcp_checkentry(const char *tablename,
1526 const struct ipt_ip *ip,
1527 void *matchinfo,
1528 unsigned int matchsize,
1529 unsigned int hook_mask)
1530 {
1531 const struct ipt_tcp *tcpinfo = matchinfo;
1532
1533 /* Must specify proto == TCP, and no unknown invflags */
1534 return ip->proto == IPPROTO_TCP
1535 && !(ip->invflags & IPT_INV_PROTO)
1536 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1537 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1538 }
1539
1540 static int
1541 udp_match(const struct sk_buff *skb,
1542 const struct net_device *in,
1543 const struct net_device *out,
1544 const void *matchinfo,
1545 int offset,
1546 const void *hdr,
1547 u_int16_t datalen,
1548 int *hotdrop)
1549 {
1550 const struct udphdr *udp = hdr;
1551 const struct ipt_udp *udpinfo = matchinfo;
1552
1553 if (offset == 0 && datalen < sizeof(struct udphdr)) {
1554 /* We've been asked to examine this packet, and we
1555 can't. Hence, no choice but to drop. */
1556 duprintf("Dropping evil UDP tinygram.\n");
1557 *hotdrop = 1;
1558 return 0;
1559 }
1560
1561 /* Must not be a fragment. */
1562 return !offset
1563 && port_match(udpinfo->spts[0], udpinfo->spts[1],
1564 ntohs(udp->source),
1565 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1566 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1567 ntohs(udp->dest),
1568 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1569 }
1570
1571 /* Called when user tries to insert an entry of this type. */
1572 static int
1573 udp_checkentry(const char *tablename,
1574 const struct ipt_ip *ip,
1575 void *matchinfo,
1576 unsigned int matchinfosize,
1577 unsigned int hook_mask)
1578 {
1579 const struct ipt_udp *udpinfo = matchinfo;
1580
1581 /* Must specify proto == UDP, and no unknown invflags */
1582 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1583 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1584 IPPROTO_UDP);
1585 return 0;
1586 }
1587 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1588 duprintf("ipt_udp: matchsize %u != %u\n",
1589 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1590 return 0;
1591 }
1592 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1593 duprintf("ipt_udp: unknown flags %X\n",
1594 udpinfo->invflags);
1595 return 0;
1596 }
1597
1598 return 1;
1599 }
1600
1601 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1602 static inline int
1603 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1604 u_int8_t type, u_int8_t code,
1605 int invert)
1606 {
1607 return (type == test_type && code >= min_code && code <= max_code)
1608 ^ invert;
1609 }
1610
1611 static int
1612 icmp_match(const struct sk_buff *skb,
1613 const struct net_device *in,
1614 const struct net_device *out,
1615 const void *matchinfo,
1616 int offset,
1617 const void *hdr,
1618 u_int16_t datalen,
1619 int *hotdrop)
1620 {
1621 const struct icmphdr *icmp = hdr;
1622 const struct ipt_icmp *icmpinfo = matchinfo;
1623
1624 if (offset == 0 && datalen < 2) {
1625 /* We've been asked to examine this packet, and we
1626 can't. Hence, no choice but to drop. */
1627 duprintf("Dropping evil ICMP tinygram.\n");
1628 *hotdrop = 1;
1629 return 0;
1630 }
1631
1632 /* Must not be a fragment. */
1633 return !offset
1634 && icmp_type_code_match(icmpinfo->type,
1635 icmpinfo->code[0],
1636 icmpinfo->code[1],
1637 icmp->type, icmp->code,
1638 !!(icmpinfo->invflags&IPT_ICMP_INV));
1639 }
1640
1641 /* Called when user tries to insert an entry of this type. */
1642 static int
1643 icmp_checkentry(const char *tablename,
1644 const struct ipt_ip *ip,
1645 void *matchinfo,
1646 unsigned int matchsize,
1647 unsigned int hook_mask)
1648 {
1649 const struct ipt_icmp *icmpinfo = matchinfo;
1650
1651 /* Must specify proto == ICMP, and no unknown invflags */
1652 return ip->proto == IPPROTO_ICMP
1653 && !(ip->invflags & IPT_INV_PROTO)
1654 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1655 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1656 }
1657
1658 /* The built-in targets: standard (NULL) and error. */
1659 static struct ipt_target ipt_standard_target
1660 = { { NULL, NULL }, IPT_STANDARD_TARGET, NULL, NULL, NULL };
1661 static struct ipt_target ipt_error_target
1662 = { { NULL, NULL }, IPT_ERROR_TARGET, ipt_error, NULL, NULL };
1663
1664 static struct nf_sockopt_ops ipt_sockopts
1665 = { { NULL, NULL }, PF_INET, IPT_BASE_CTL, IPT_SO_SET_MAX+1, do_ipt_set_ctl,
1666 IPT_BASE_CTL, IPT_SO_GET_MAX+1, do_ipt_get_ctl, 0, NULL };
1667
1668 static struct ipt_match tcp_matchstruct
1669 = { { NULL, NULL }, "tcp", &tcp_match, &tcp_checkentry, NULL };
1670 static struct ipt_match udp_matchstruct
1671 = { { NULL, NULL }, "udp", &udp_match, &udp_checkentry, NULL };
1672 static struct ipt_match icmp_matchstruct
1673 = { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL };
1674
1675 #ifdef CONFIG_PROC_FS
1676 static inline int print_name(const struct ipt_table *t,
1677 off_t start_offset, char *buffer, int length,
1678 off_t *pos, unsigned int *count)
1679 {
1680 if ((*count)++ >= start_offset) {
1681 unsigned int namelen;
1682
1683 namelen = sprintf(buffer + *pos, "%s\n", t->name);
1684 if (*pos + namelen > length) {
1685 /* Stop iterating */
1686 return 1;
1687 }
1688 *pos += namelen;
1689 }
1690 return 0;
1691 }
1692
1693 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1694 {
1695 off_t pos = 0;
1696 unsigned int count = 0;
1697
1698 if (down_interruptible(&ipt_mutex) != 0)
1699 return 0;
1700
1701 LIST_FIND(&ipt_tables, print_name, struct ipt_table *,
1702 offset, buffer, length, &pos, &count);
1703
1704 up(&ipt_mutex);
1705
1706 /* `start' hack - see fs/proc/generic.c line ~105 */
1707 *start=(char *)((unsigned long)count-offset);
1708 return pos;
1709 }
1710 #endif /*CONFIG_PROC_FS*/
1711
1712 static int __init init(void)
1713 {
1714 int ret;
1715
1716 /* Noone else will be downing sem now, so we won't sleep */
1717 down(&ipt_mutex);
1718 list_append(&ipt_target, &ipt_standard_target);
1719 list_append(&ipt_target, &ipt_error_target);
1720 list_append(&ipt_match, &tcp_matchstruct);
1721 list_append(&ipt_match, &udp_matchstruct);
1722 list_append(&ipt_match, &icmp_matchstruct);
1723 up(&ipt_mutex);
1724
1725 /* Register setsockopt */
1726 ret = nf_register_sockopt(&ipt_sockopts);
1727 if (ret < 0) {
1728 duprintf("Unable to register sockopts.\n");
1729 return ret;
1730 }
1731
1732 #ifdef CONFIG_PROC_FS
1733 {
1734 struct proc_dir_entry *proc;
1735
1736 proc = proc_net_create("ip_tables_names", 0, ipt_get_tables);
1737 if (!proc) {
1738 nf_unregister_sockopt(&ipt_sockopts);
1739 return -ENOMEM;
1740 }
1741 proc->owner = THIS_MODULE;
1742 }
1743 #endif
1744
1745 printk("ip_tables: (c)2000 Netfilter core team\n");
1746 return 0;
1747 }
1748
1749 static void __exit fini(void)
1750 {
1751 nf_unregister_sockopt(&ipt_sockopts);
1752 #ifdef CONFIG_PROC_FS
1753 proc_net_remove("ip_tables_names");
1754 #endif
1755 }
1756
1757 EXPORT_SYMBOL(ipt_register_table);
1758 EXPORT_SYMBOL(ipt_unregister_table);
1759 EXPORT_SYMBOL(ipt_register_match);
1760 EXPORT_SYMBOL(ipt_unregister_match);
1761 EXPORT_SYMBOL(ipt_do_table);
1762 EXPORT_SYMBOL(ipt_register_target);
1763 EXPORT_SYMBOL(ipt_unregister_target);
1764
1765 module_init(init);
1766 module_exit(fini);
1767