File: /usr/src/linux/net/ipv4/netfilter/ip_nat_standalone.c

1     /* This file contains all the functions required for the standalone
2        ip_nat module.
3     
4        These are not required by the compatibility layer.
5     */
6     
7     /* (c) 1999 Paul `Rusty' Russell.  Licenced under the GNU General
8        Public Licence. */
9     
10     #include <linux/config.h>
11     #include <linux/types.h>
12     #include <linux/ip.h>
13     #include <linux/netfilter.h>
14     #include <linux/netfilter_ipv4.h>
15     #include <linux/module.h>
16     #include <linux/skbuff.h>
17     #include <linux/proc_fs.h>
18     #include <net/checksum.h>
19     #include <linux/spinlock.h>
20     #include <linux/version.h>
21     #include <linux/brlock.h>
22     
23     #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
24     #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
25     
26     #include <linux/netfilter_ipv4/ip_nat.h>
27     #include <linux/netfilter_ipv4/ip_nat_rule.h>
28     #include <linux/netfilter_ipv4/ip_nat_protocol.h>
29     #include <linux/netfilter_ipv4/ip_nat_core.h>
30     #include <linux/netfilter_ipv4/ip_nat_helper.h>
31     #include <linux/netfilter_ipv4/ip_tables.h>
32     #include <linux/netfilter_ipv4/ip_conntrack_core.h>
33     #include <linux/netfilter_ipv4/listhelp.h>
34     
35     #if 0
36     #define DEBUGP printk
37     #else
38     #define DEBUGP(format, args...)
39     #endif
40     
41     #define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING"  \
42     			   : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \
43     			      : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT"  \
44     				 : "*ERROR*")))
45     
46     static unsigned int
47     ip_nat_fn(unsigned int hooknum,
48     	  struct sk_buff **pskb,
49     	  const struct net_device *in,
50     	  const struct net_device *out,
51     	  int (*okfn)(struct sk_buff *))
52     {
53     	struct ip_conntrack *ct;
54     	enum ip_conntrack_info ctinfo;
55     	struct ip_nat_info *info;
56     	/* maniptype == SRC for postrouting. */
57     	enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
58     
59     	/* We never see fragments: conntrack defrags on pre-routing
60     	   and local-out, and ip_nat_out protects post-routing. */
61     	IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
62     		       & __constant_htons(IP_MF|IP_OFFSET)));
63     
64     	(*pskb)->nfcache |= NFC_UNKNOWN;
65     
66     	/* If we had a hardware checksum before, it's now invalid */
67     	if ((*pskb)->ip_summed == CHECKSUM_HW)
68     		(*pskb)->ip_summed = CHECKSUM_NONE;
69     
70     	ct = ip_conntrack_get(*pskb, &ctinfo);
71     	/* Can't track?  It's not due to stress, or conntrack would
72     	   have dropped it.  Hence it's the user's responsibilty to
73     	   packet filter it out, or implement conntrack/NAT for that
74     	   protocol. 8) --RR */
75     	if (!ct) {
76     		/* Exception: ICMP redirect to new connection (not in
77                        hash table yet).  We must not let this through, in
78                        case we're doing NAT to the same network. */
79     		struct iphdr *iph = (*pskb)->nh.iph;
80     		struct icmphdr *hdr = (struct icmphdr *)
81     			((u_int32_t *)iph + iph->ihl);
82     		if (iph->protocol == IPPROTO_ICMP
83     		    && hdr->type == ICMP_REDIRECT)
84     			return NF_DROP;
85     		return NF_ACCEPT;
86     	}
87     
88     	switch (ctinfo) {
89     	case IP_CT_RELATED:
90     	case IP_CT_RELATED+IP_CT_IS_REPLY:
91     		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
92     			return icmp_reply_translation(*pskb, ct, hooknum,
93     						      CTINFO2DIR(ctinfo));
94     		}
95     		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
96     	case IP_CT_NEW:
97     		info = &ct->nat.info;
98     
99     		WRITE_LOCK(&ip_nat_lock);
100     		/* Seen it before?  This can happen for loopback, retrans,
101     		   or local packets.. */
102     		if (!(info->initialized & (1 << maniptype))) {
103     			int in_hashes = info->initialized;
104     			unsigned int ret;
105     
106     			ret = ip_nat_rule_find(pskb, hooknum, in, out,
107     					       ct, info);
108     			if (ret != NF_ACCEPT) {
109     				WRITE_UNLOCK(&ip_nat_lock);
110     				return ret;
111     			}
112     
113     			if (in_hashes) {
114     				IP_NF_ASSERT(info->bysource.conntrack);
115     				replace_in_hashes(ct, info);
116     			} else {
117     				place_in_hashes(ct, info);
118     			}
119     		} else
120     			DEBUGP("Already setup manip %s for ct %p\n",
121     			       maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
122     			       ct);
123     		WRITE_UNLOCK(&ip_nat_lock);
124     		break;
125     
126     	default:
127     		/* ESTABLISHED */
128     		IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED
129     			     || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
130     		info = &ct->nat.info;
131     	}
132     
133     	IP_NF_ASSERT(info);
134     	return do_bindings(ct, ctinfo, info, hooknum, pskb);
135     }
136     
137     static unsigned int
138     ip_nat_out(unsigned int hooknum,
139     	   struct sk_buff **pskb,
140     	   const struct net_device *in,
141     	   const struct net_device *out,
142     	   int (*okfn)(struct sk_buff *))
143     {
144     	/* root is playing with raw sockets. */
145     	if ((*pskb)->len < sizeof(struct iphdr)
146     	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
147     		return NF_ACCEPT;
148     
149     	/* We can hit fragment here; forwarded packets get
150     	   defragmented by connection tracking coming in, then
151     	   fragmented (grr) by the forward code.
152     
153     	   In future: If we have nfct != NULL, AND we have NAT
154     	   initialized, AND there is no helper, then we can do full
155     	   NAPT on the head, and IP-address-only NAT on the rest.
156     
157     	   I'm starting to have nightmares about fragments.  */
158     
159     	if ((*pskb)->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
160     		*pskb = ip_ct_gather_frags(*pskb);
161     
162     		if (!*pskb)
163     			return NF_STOLEN;
164     	}
165     
166     	return ip_nat_fn(hooknum, pskb, in, out, okfn);
167     }
168     
169     /* FIXME: change in oif may mean change in hh_len.  Check and realloc
170        --RR */
171     static int
172     route_me_harder(struct sk_buff *skb)
173     {
174     	struct iphdr *iph = skb->nh.iph;
175     	struct rtable *rt;
176     	struct rt_key key = { dst:iph->daddr,
177     			      src:iph->saddr,
178     			      oif:skb->sk ? skb->sk->bound_dev_if : 0,
179     			      tos:RT_TOS(iph->tos)|RTO_CONN,
180     #ifdef CONFIG_IP_ROUTE_FWMARK
181     			      fwmark:skb->nfmark
182     #endif
183     			    };
184     
185     	if (ip_route_output_key(&rt, &key) != 0) {
186     		printk("route_me_harder: No more route.\n");
187     		return -EINVAL;
188     	}
189     
190     	/* Drop old route. */
191     	dst_release(skb->dst);
192     
193     	skb->dst = &rt->u.dst;
194     	return 0;
195     }
196     
197     static unsigned int
198     ip_nat_local_fn(unsigned int hooknum,
199     		struct sk_buff **pskb,
200     		const struct net_device *in,
201     		const struct net_device *out,
202     		int (*okfn)(struct sk_buff *))
203     {
204     	u_int32_t saddr, daddr;
205     	unsigned int ret;
206     
207     	/* root is playing with raw sockets. */
208     	if ((*pskb)->len < sizeof(struct iphdr)
209     	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
210     		return NF_ACCEPT;
211     
212     	saddr = (*pskb)->nh.iph->saddr;
213     	daddr = (*pskb)->nh.iph->daddr;
214     
215     	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
216     	if (ret != NF_DROP && ret != NF_STOLEN
217     	    && ((*pskb)->nh.iph->saddr != saddr
218     		|| (*pskb)->nh.iph->daddr != daddr))
219     		return route_me_harder(*pskb) == 0 ? ret : NF_DROP;
220     	return ret;
221     }
222     
223     /* We must be after connection tracking and before packet filtering. */
224     
225     /* Before packet filtering, change destination */
226     static struct nf_hook_ops ip_nat_in_ops
227     = { { NULL, NULL }, ip_nat_fn, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_NAT_DST };
228     /* After packet filtering, change source */
229     static struct nf_hook_ops ip_nat_out_ops
230     = { { NULL, NULL }, ip_nat_out, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_NAT_SRC};
231     /* Before packet filtering, change destination */
232     static struct nf_hook_ops ip_nat_local_out_ops
233     = { { NULL, NULL }, ip_nat_local_fn, PF_INET, NF_IP_LOCAL_OUT, NF_IP_PRI_NAT_DST };
234     
235     /* Protocol registration. */
236     int ip_nat_protocol_register(struct ip_nat_protocol *proto)
237     {
238     	int ret = 0;
239     	struct list_head *i;
240     
241     	WRITE_LOCK(&ip_nat_lock);
242     	for (i = protos.next; i != &protos; i = i->next) {
243     		if (((struct ip_nat_protocol *)i)->protonum
244     		    == proto->protonum) {
245     			ret = -EBUSY;
246     			goto out;
247     		}
248     	}
249     
250     	list_prepend(&protos, proto);
251     	MOD_INC_USE_COUNT;
252     
253      out:
254     	WRITE_UNLOCK(&ip_nat_lock);
255     	return ret;
256     }
257     
258     /* Noone stores the protocol anywhere; simply delete it. */
259     void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
260     {
261     	WRITE_LOCK(&ip_nat_lock);
262     	LIST_DELETE(&protos, proto);
263     	WRITE_UNLOCK(&ip_nat_lock);
264     
265     	/* Someone could be still looking at the proto in a bh. */
266     	br_write_lock_bh(BR_NETPROTO_LOCK);
267     	br_write_unlock_bh(BR_NETPROTO_LOCK);
268     
269     	MOD_DEC_USE_COUNT;
270     }
271     
272     static int init_or_cleanup(int init)
273     {
274     	int ret = 0;
275     
276     	if (!init) goto cleanup;
277     
278     	ret = ip_nat_rule_init();
279     	if (ret < 0) {
280     		printk("ip_nat_init: can't setup rules.\n");
281     		goto cleanup_nothing;
282     	}
283     	ret = ip_nat_init();
284     	if (ret < 0) {
285     		printk("ip_nat_init: can't setup rules.\n");
286     		goto cleanup_rule_init;
287     	}
288     	ret = nf_register_hook(&ip_nat_in_ops);
289     	if (ret < 0) {
290     		printk("ip_nat_init: can't register in hook.\n");
291     		goto cleanup_nat;
292     	}
293     	ret = nf_register_hook(&ip_nat_out_ops);
294     	if (ret < 0) {
295     		printk("ip_nat_init: can't register out hook.\n");
296     		goto cleanup_inops;
297     	}
298     	ret = nf_register_hook(&ip_nat_local_out_ops);
299     	if (ret < 0) {
300     		printk("ip_nat_init: can't register local out hook.\n");
301     		goto cleanup_outops;
302     	}
303     	if (ip_conntrack_module)
304     		__MOD_INC_USE_COUNT(ip_conntrack_module);
305     	return ret;
306     
307      cleanup:
308     	if (ip_conntrack_module)
309     		__MOD_DEC_USE_COUNT(ip_conntrack_module);
310     	nf_unregister_hook(&ip_nat_local_out_ops);
311      cleanup_outops:
312     	nf_unregister_hook(&ip_nat_out_ops);
313      cleanup_inops:
314     	nf_unregister_hook(&ip_nat_in_ops);
315      cleanup_nat:
316     	ip_nat_cleanup();
317      cleanup_rule_init:
318     	ip_nat_rule_cleanup();
319      cleanup_nothing:
320     	MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock);
321     	return ret;
322     }
323     
324     static int __init init(void)
325     {
326     	return init_or_cleanup(1);
327     }
328     
329     static void __exit fini(void)
330     {
331     	init_or_cleanup(0);
332     }
333     
334     module_init(init);
335     module_exit(fini);
336     
337     EXPORT_SYMBOL(ip_nat_setup_info);
338     EXPORT_SYMBOL(ip_nat_helper_register);
339     EXPORT_SYMBOL(ip_nat_helper_unregister);
340     EXPORT_SYMBOL(ip_nat_expect_register);
341     EXPORT_SYMBOL(ip_nat_expect_unregister);
342     EXPORT_SYMBOL(ip_nat_cheat_check);
343     EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
344     EXPORT_SYMBOL(ip_nat_seq_adjust);
345     EXPORT_SYMBOL(ip_nat_delete_sack);
346