File: /usr/src/linux/net/sched/cls_tcindex.c

1     /*
2      * net/sched/cls_tcindex.c	Packet classifier for skb->tc_index
3      *
4      * Written 1998,1999 by Werner Almesberger, EPFL ICA
5      */
6     
7     #include <linux/config.h>
8     #include <linux/module.h>
9     #include <linux/types.h>
10     #include <linux/kernel.h>
11     #include <linux/skbuff.h>
12     #include <linux/errno.h>
13     #include <linux/netdevice.h>
14     #include <net/ip.h>
15     #include <net/pkt_sched.h>
16     #include <net/route.h>
17     
18     
19     /*
20      * Not quite sure if we need all the xchgs Alexey uses when accessing things.
21      * Can always add them later ... :)
22      */
23     
24     /*
25      * Passing parameters to the root seems to be done more awkwardly than really
26      * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
27      * verified. FIXME.
28      */
29     
30     #define PERFECT_HASH_THRESHOLD	64	/* use perfect hash if not bigger */
31     #define DEFAULT_HASH_SIZE	64	/* optimized for diffserv */
32     
33     
34     #if 1 /* control */
35     #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
36     #else
37     #define DPRINTK(format,args...)
38     #endif
39     
40     #if 0 /* data */
41     #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
42     #else
43     #define D2PRINTK(format,args...)
44     #endif
45     
46     
47     #define	PRIV(tp)	((struct tcindex_data *) (tp)->root)
48     
49     
50     struct tcindex_filter_result {
51     	struct tcf_police *police;
52     	struct tcf_result res;
53     };
54     
55     struct tcindex_filter {
56     	__u16 key;
57     	struct tcindex_filter_result result;
58     	struct tcindex_filter *next;
59     };
60     
61     
62     struct tcindex_data {
63     	struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
64     	struct tcindex_filter **h; /* imperfect hash; only used if !perfect;
65     				      NULL if unused */
66     	__u16 mask;		/* AND key with mask */
67     	int shift;		/* shift ANDed key to the right */
68     	int hash;		/* hash table size; 0 if undefined */
69     	int alloc_hash;		/* allocated size */
70     	int fall_through;	/* 0: only classify if explicit match */
71     };
72     
73     
74     static struct tcindex_filter_result *lookup(struct tcindex_data *p,__u16 key)
75     {
76     	struct tcindex_filter *f;
77     
78     	if (p->perfect)
79     		return p->perfect[key].res.class ? p->perfect+key : NULL;
80     	if (!p->h)
81     		return NULL;
82     	for (f = p->h[key % p->hash]; f; f = f->next) {
83     		if (f->key == key)
84     			return &f->result;
85     	}
86     	return NULL;
87     }
88     
89     
90     static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp,
91     			  struct tcf_result *res)
92     {
93     	struct tcindex_data *p = PRIV(tp);
94     	struct tcindex_filter_result *f;
95     
96     	D2PRINTK("tcindex_classify(skb %p,tp %p,res %p),p %p\n",skb,tp,res,p);
97     
98     	f = lookup(p,(skb->tc_index & p->mask) >> p->shift);
99     	if (!f) {
100     		if (!p->fall_through)
101     			return -1;
102     		res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle),
103     		    (skb->tc_index& p->mask) >> p->shift);
104     		res->class = 0;
105     		D2PRINTK("alg 0x%x\n",res->classid);
106     		return 0;
107     	}
108     	*res = f->res;
109     	D2PRINTK("map 0x%x\n",res->classid);
110     #ifdef CONFIG_NET_CLS_POLICE
111     	if (f->police) {
112     		int result;
113     
114     		result = tcf_police(skb,f->police);
115     		D2PRINTK("police %d\n",res);
116     		return result;
117     	}
118     #endif
119     	return 0;
120     }
121     
122     
123     static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
124     {
125     	struct tcindex_data *p = PRIV(tp);
126     	struct tcindex_filter_result *r;
127     
128     	DPRINTK("tcindex_get(tp %p,handle 0x%08x)\n",tp,handle);
129     	if (p->perfect && handle >= p->alloc_hash)
130     		return 0;
131     	r = lookup(PRIV(tp),handle);
132     	return r && r->res.class ? (unsigned long) r : 0;
133     }
134     
135     
136     static void tcindex_put(struct tcf_proto *tp, unsigned long f)
137     {
138     	DPRINTK("tcindex_put(tp %p,f 0x%lx)\n",tp,f);
139     }
140     
141     
142     static int tcindex_init(struct tcf_proto *tp)
143     {
144     	struct tcindex_data *p;
145     
146     	DPRINTK("tcindex_init(tp %p)\n",tp);
147     	MOD_INC_USE_COUNT;
148     	p = kmalloc(sizeof(struct tcindex_data),GFP_KERNEL);
149     	if (!p) {
150     		MOD_DEC_USE_COUNT;
151     		return -ENOMEM;
152     	}
153     	tp->root = p;
154     	p->perfect = NULL;
155     	p->h = NULL;
156     	p->hash = 0;
157     	p->mask = 0xffff;
158     	p->shift = 0;
159     	p->fall_through = 1;
160     	return 0;
161     }
162     
163     
164     static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
165     {
166     	struct tcindex_data *p = PRIV(tp);
167     	struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
168     	struct tcindex_filter *f = NULL;
169     	unsigned long cl;
170     
171     	DPRINTK("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n",tp,arg,p,f);
172     	if (p->perfect) {
173     		if (!r->res.class)
174     			return -ENOENT;
175     	} else {
176     		int i;
177     		struct tcindex_filter **walk = NULL;
178     
179     		for (i = 0; i < p->hash; i++)
180     			for (walk = p->h+i; *walk; walk = &(*walk)->next)
181     				if (&(*walk)->result == r)
182     					goto found;
183     		return -ENOENT;
184     
185     found:
186     		f = *walk;
187     		tcf_tree_lock(tp); 
188     		*walk = f->next;
189     		tcf_tree_unlock(tp);
190     	}
191     	cl = __cls_set_class(&r->res.class,0);
192     	if (cl)
193     		tp->q->ops->cl_ops->unbind_tcf(tp->q,cl);
194     #ifdef CONFIG_NET_CLS_POLICE
195     	tcf_police_release(r->police);
196     #endif
197     	if (f)
198     		kfree(f);
199     	return 0;
200     }
201     
202     
203     /*
204      * There are no parameters for tcindex_init, so we overload tcindex_change
205      */
206     
207     
208     static int tcindex_change(struct tcf_proto *tp,unsigned long base,u32 handle,
209         struct rtattr **tca,unsigned long *arg)
210     {
211     	struct tcindex_filter_result new_filter_result = {
212     		NULL,		/* no policing */
213     		{ 0,0 },	/* no classification */
214     	};
215     	struct rtattr *opt = tca[TCA_OPTIONS-1];
216     	struct rtattr *tb[TCA_TCINDEX_MAX];
217     	struct tcindex_data *p = PRIV(tp);
218     	struct tcindex_filter *f;
219     	struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
220     	struct tcindex_filter **walk;
221     	int hash,shift;
222     	__u16 mask;
223     
224     	DPRINTK("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
225     	    "p %p,r %p\n",tp,handle,tca,arg,opt,p,r);
226     	if (arg)
227     		DPRINTK("*arg = 0x%lx\n",*arg);
228     	if (!opt)
229     		return 0;
230     	if (rtattr_parse(tb,TCA_TCINDEX_MAX,RTA_DATA(opt),RTA_PAYLOAD(opt)) < 0)
231     		return -EINVAL;
232     	if (!tb[TCA_TCINDEX_HASH-1]) {
233     		hash = p->hash;
234     	} else {
235     		if (RTA_PAYLOAD(tb[TCA_TCINDEX_HASH-1]) < sizeof(int))
236     			return -EINVAL;
237     		hash = *(int *) RTA_DATA(tb[TCA_TCINDEX_HASH-1]);
238     	}
239     	if (!tb[TCA_TCINDEX_MASK-1]) {
240     		mask = p->mask;
241     	} else {
242     		if (RTA_PAYLOAD(tb[TCA_TCINDEX_MASK-1]) < sizeof(__u16))
243     			return -EINVAL;
244     		mask = *(__u16 *) RTA_DATA(tb[TCA_TCINDEX_MASK-1]);
245     	}
246     	if (!tb[TCA_TCINDEX_SHIFT-1])
247     		shift = p->shift;
248     	else {
249     		if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(__u16))
250     			return -EINVAL;
251     		shift = *(int *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
252     	}
253     	if (p->perfect && hash <= (mask >> shift))
254     		return -EBUSY;
255     	if (p->perfect && hash > p->alloc_hash)
256     		return -EBUSY;
257     	if (p->h && hash != p->alloc_hash)
258     		return -EBUSY;
259     	p->hash = hash;
260     	p->mask = mask;
261     	p->shift = shift;
262     	if (tb[TCA_TCINDEX_FALL_THROUGH-1]) {
263     		if (RTA_PAYLOAD(tb[TCA_TCINDEX_FALL_THROUGH-1]) < sizeof(int))
264     			return -EINVAL;
265     		p->fall_through =
266     		    *(int *) RTA_DATA(tb[TCA_TCINDEX_FALL_THROUGH-1]);
267     	}
268     	DPRINTK("classid/police %p/%p\n",tb[TCA_TCINDEX_CLASSID-1],
269     	    tb[TCA_TCINDEX_POLICE-1]);
270     	if (!tb[TCA_TCINDEX_CLASSID-1] && !tb[TCA_TCINDEX_POLICE-1])
271     		return 0;
272     	if (!hash) {
273     		if ((mask >> shift) < PERFECT_HASH_THRESHOLD) {
274     			p->hash = (mask >> shift)+1;
275     		} else {
276     			p->hash = DEFAULT_HASH_SIZE;
277     		}
278     	}
279     	if (!p->perfect && !p->h) {
280     		p->alloc_hash = p->hash;
281     		DPRINTK("hash %d mask %d\n",p->hash,p->mask);
282     		if (p->hash > (mask >> shift)) {
283     			p->perfect = kmalloc(p->hash*
284     			    sizeof(struct tcindex_filter_result),GFP_KERNEL);
285     			if (!p->perfect)
286     				return -ENOMEM;
287     			memset(p->perfect, 0,
288     			       p->hash * sizeof(struct tcindex_filter_result));
289     		} else {
290     			p->h = kmalloc(p->hash*sizeof(struct tcindex_filter *),
291     			    GFP_KERNEL);
292     			if (!p->h)
293     				return -ENOMEM;
294     			memset(p->h, 0, p->hash*sizeof(struct tcindex_filter *));
295     		}
296     	}
297     	/*
298     	 * Note: this could be as restrictive as
299     	 * if (handle & ~(mask >> shift))
300     	 * but then, we'd fail handles that may become valid after some
301     	 * future mask change. While this is extremely unlikely to ever
302     	 * matter, the check below is safer (and also more
303     	 * backwards-compatible).
304     	 */
305     	if (p->perfect && handle >= p->alloc_hash)
306     		return -EINVAL;
307     	if (p->perfect) {
308     		r = p->perfect+handle;
309     	} else {
310     		r = lookup(p,handle);
311     		DPRINTK("r=%p\n",r);
312     		if (!r)
313     			r = &new_filter_result;
314     	}
315     	DPRINTK("r=%p\n",r);
316     	if (tb[TCA_TCINDEX_CLASSID-1]) {
317     		unsigned long cl = cls_set_class(tp,&r->res.class,0);
318     
319     		if (cl)
320     			tp->q->ops->cl_ops->unbind_tcf(tp->q,cl);
321     		r->res.classid = *(__u32 *) RTA_DATA(tb[TCA_TCINDEX_CLASSID-1]);
322     		r->res.class = tp->q->ops->cl_ops->bind_tcf(tp->q,base,
323     							    r->res.classid);
324     		if (!r->res.class) {
325     			r->res.classid = 0;
326     			return -ENOENT;
327     		}
328             }
329     #ifdef CONFIG_NET_CLS_POLICE
330     	{
331     		struct tcf_police *police;
332     
333     		police = tb[TCA_TCINDEX_POLICE-1] ?
334     		    tcf_police_locate(tb[TCA_TCINDEX_POLICE-1],NULL) : NULL;
335     		tcf_tree_lock(tp);
336     		police = xchg(&r->police,police);
337     		tcf_tree_unlock(tp);
338     		tcf_police_release(police);
339     	}
340     #endif
341     	if (r != &new_filter_result)
342     		return 0;
343     	f = kmalloc(sizeof(struct tcindex_filter),GFP_KERNEL);
344     	if (!f)
345     		return -ENOMEM;
346     	f->key = handle;
347     	f->result = new_filter_result;
348     	f->next = NULL;
349     	for (walk = p->h+(handle % p->hash); *walk; walk = &(*walk)->next)
350     		/* nothing */;
351     	wmb();
352     	*walk = f;
353     	return 0;
354     }
355     
356     
357     static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
358     {
359     	struct tcindex_data *p = PRIV(tp);
360     	struct tcindex_filter *f,*next;
361     	int i;
362     
363     	DPRINTK("tcindex_walk(tp %p,walker %p),p %p\n",tp,walker,p);
364     	if (p->perfect) {
365     		for (i = 0; i < p->hash; i++) {
366     			if (!p->perfect[i].res.class)
367     				continue;
368     			if (walker->count >= walker->skip) {
369     				if (walker->fn(tp,
370     				    (unsigned long) (p->perfect+i), walker)
371     				     < 0) {
372     					walker->stop = 1;
373     					return;
374     				}
375     			}
376     			walker->count++;
377     		}
378     	}
379     	if (!p->h)
380     		return;
381     	for (i = 0; i < p->hash; i++) {
382     		for (f = p->h[i]; f; f = next) {
383     			next = f->next;
384     			if (walker->count >= walker->skip) {
385     				if (walker->fn(tp,(unsigned long) &f->result,
386     				    walker) < 0) {
387     					walker->stop = 1;
388     					return;
389     				}
390     			}
391     			walker->count++;
392     		}
393     	}
394     }
395     
396     
397     static int tcindex_destroy_element(struct tcf_proto *tp,
398         unsigned long arg, struct tcf_walker *walker)
399     {
400     	return tcindex_delete(tp,arg);
401     }
402     
403     
404     static void tcindex_destroy(struct tcf_proto *tp)
405     {
406     	struct tcindex_data *p = PRIV(tp);
407     	struct tcf_walker walker;
408     
409     	DPRINTK("tcindex_destroy(tp %p),p %p\n",tp,p);
410     	walker.count = 0;
411     	walker.skip = 0;
412     	walker.fn = &tcindex_destroy_element;
413     	tcindex_walk(tp,&walker);
414     	if (p->perfect)
415     		kfree(p->perfect);
416     	if (p->h)
417     		kfree(p->h);
418     	kfree(p);
419     	tp->root = NULL;
420     	MOD_DEC_USE_COUNT;
421     }
422     
423     
424     #ifdef CONFIG_RTNETLINK
425     
426     static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
427         struct sk_buff *skb, struct tcmsg *t)
428     {
429     	struct tcindex_data *p = PRIV(tp);
430     	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
431     	unsigned char *b = skb->tail;
432     	struct rtattr *rta;
433     
434     	DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
435     	    tp,fh,skb,t,p,r,b);
436     	DPRINTK("p->perfect %p p->h %p\n",p->perfect,p->h);
437     	rta = (struct rtattr *) b;
438     	RTA_PUT(skb,TCA_OPTIONS,0,NULL);
439     	if (!fh) {
440     		t->tcm_handle = ~0; /* whatever ... */
441     		RTA_PUT(skb,TCA_TCINDEX_HASH,sizeof(p->hash),&p->hash);
442     		RTA_PUT(skb,TCA_TCINDEX_MASK,sizeof(p->mask),&p->mask);
443     		RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift);
444     		RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through),
445     		    &p->fall_through);
446     	} else {
447     		if (p->perfect) {
448     			t->tcm_handle = r-p->perfect;
449     		} else {
450     			struct tcindex_filter *f;
451     			int i;
452     
453     			t->tcm_handle = 0;
454     			for (i = 0; !t->tcm_handle && i < p->hash; i++) {
455     				for (f = p->h[i]; !t->tcm_handle && f;
456     				     f = f->next) {
457     					if (&f->result == r)
458     						t->tcm_handle = f->key;
459     				}
460     			}
461     		}
462     		DPRINTK("handle = %d\n",t->tcm_handle);
463     		if (r->res.class)
464     			RTA_PUT(skb, TCA_TCINDEX_CLASSID, 4, &r->res.classid);
465     #ifdef CONFIG_NET_CLS_POLICE
466     		if (r->police) {
467     			struct rtattr *p_rta = (struct rtattr *) skb->tail;
468     
469     			RTA_PUT(skb,TCA_TCINDEX_POLICE,0,NULL);
470     			if (tcf_police_dump(skb,r->police) < 0)
471     				goto rtattr_failure;
472     			p_rta->rta_len = skb->tail-(u8 *) p_rta;
473     		}
474     #endif
475     	}
476     	rta->rta_len = skb->tail-b;
477     	return skb->len;
478     
479     rtattr_failure:
480     	skb_trim(skb, b - skb->data);
481     	return -1;
482     }
483     
484     #endif
485     
486     
487     struct tcf_proto_ops cls_tcindex_ops = {
488     	NULL,
489     	"tcindex",
490     	tcindex_classify,
491     	tcindex_init,
492     	tcindex_destroy,
493     
494     	tcindex_get,
495     	tcindex_put,
496     	tcindex_change,
497     	tcindex_delete,
498     	tcindex_walk,
499     #ifdef CONFIG_RTNETLINK
500     	tcindex_dump
501     #else
502     	NULL
503     #endif
504     };
505     
506     
507     #ifdef MODULE
508     int init_module(void)
509     {
510     	return register_tcf_proto_ops(&cls_tcindex_ops);
511     }
512     
513     void cleanup_module(void) 
514     {
515     	unregister_tcf_proto_ops(&cls_tcindex_ops);
516     }
517     #endif
518