File: /usr/src/linux/net/sched/cls_tcindex.c
1 /*
2 * net/sched/cls_tcindex.c Packet classifier for skb->tc_index
3 *
4 * Written 1998,1999 by Werner Almesberger, EPFL ICA
5 */
6
7 #include <linux/config.h>
8 #include <linux/module.h>
9 #include <linux/types.h>
10 #include <linux/kernel.h>
11 #include <linux/skbuff.h>
12 #include <linux/errno.h>
13 #include <linux/netdevice.h>
14 #include <net/ip.h>
15 #include <net/pkt_sched.h>
16 #include <net/route.h>
17
18
19 /*
20 * Not quite sure if we need all the xchgs Alexey uses when accessing things.
21 * Can always add them later ... :)
22 */
23
24 /*
25 * Passing parameters to the root seems to be done more awkwardly than really
26 * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
27 * verified. FIXME.
28 */
29
30 #define PERFECT_HASH_THRESHOLD 64 /* use perfect hash if not bigger */
31 #define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */
32
33
34 #if 1 /* control */
35 #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
36 #else
37 #define DPRINTK(format,args...)
38 #endif
39
40 #if 0 /* data */
41 #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
42 #else
43 #define D2PRINTK(format,args...)
44 #endif
45
46
47 #define PRIV(tp) ((struct tcindex_data *) (tp)->root)
48
49
50 struct tcindex_filter_result {
51 struct tcf_police *police;
52 struct tcf_result res;
53 };
54
55 struct tcindex_filter {
56 __u16 key;
57 struct tcindex_filter_result result;
58 struct tcindex_filter *next;
59 };
60
61
62 struct tcindex_data {
63 struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
64 struct tcindex_filter **h; /* imperfect hash; only used if !perfect;
65 NULL if unused */
66 __u16 mask; /* AND key with mask */
67 int shift; /* shift ANDed key to the right */
68 int hash; /* hash table size; 0 if undefined */
69 int alloc_hash; /* allocated size */
70 int fall_through; /* 0: only classify if explicit match */
71 };
72
73
74 static struct tcindex_filter_result *lookup(struct tcindex_data *p,__u16 key)
75 {
76 struct tcindex_filter *f;
77
78 if (p->perfect)
79 return p->perfect[key].res.class ? p->perfect+key : NULL;
80 if (!p->h)
81 return NULL;
82 for (f = p->h[key % p->hash]; f; f = f->next) {
83 if (f->key == key)
84 return &f->result;
85 }
86 return NULL;
87 }
88
89
90 static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp,
91 struct tcf_result *res)
92 {
93 struct tcindex_data *p = PRIV(tp);
94 struct tcindex_filter_result *f;
95
96 D2PRINTK("tcindex_classify(skb %p,tp %p,res %p),p %p\n",skb,tp,res,p);
97
98 f = lookup(p,(skb->tc_index & p->mask) >> p->shift);
99 if (!f) {
100 if (!p->fall_through)
101 return -1;
102 res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle),
103 (skb->tc_index& p->mask) >> p->shift);
104 res->class = 0;
105 D2PRINTK("alg 0x%x\n",res->classid);
106 return 0;
107 }
108 *res = f->res;
109 D2PRINTK("map 0x%x\n",res->classid);
110 #ifdef CONFIG_NET_CLS_POLICE
111 if (f->police) {
112 int result;
113
114 result = tcf_police(skb,f->police);
115 D2PRINTK("police %d\n",res);
116 return result;
117 }
118 #endif
119 return 0;
120 }
121
122
123 static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
124 {
125 struct tcindex_data *p = PRIV(tp);
126 struct tcindex_filter_result *r;
127
128 DPRINTK("tcindex_get(tp %p,handle 0x%08x)\n",tp,handle);
129 if (p->perfect && handle >= p->alloc_hash)
130 return 0;
131 r = lookup(PRIV(tp),handle);
132 return r && r->res.class ? (unsigned long) r : 0;
133 }
134
135
136 static void tcindex_put(struct tcf_proto *tp, unsigned long f)
137 {
138 DPRINTK("tcindex_put(tp %p,f 0x%lx)\n",tp,f);
139 }
140
141
142 static int tcindex_init(struct tcf_proto *tp)
143 {
144 struct tcindex_data *p;
145
146 DPRINTK("tcindex_init(tp %p)\n",tp);
147 MOD_INC_USE_COUNT;
148 p = kmalloc(sizeof(struct tcindex_data),GFP_KERNEL);
149 if (!p) {
150 MOD_DEC_USE_COUNT;
151 return -ENOMEM;
152 }
153 tp->root = p;
154 p->perfect = NULL;
155 p->h = NULL;
156 p->hash = 0;
157 p->mask = 0xffff;
158 p->shift = 0;
159 p->fall_through = 1;
160 return 0;
161 }
162
163
164 static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
165 {
166 struct tcindex_data *p = PRIV(tp);
167 struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
168 struct tcindex_filter *f = NULL;
169 unsigned long cl;
170
171 DPRINTK("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n",tp,arg,p,f);
172 if (p->perfect) {
173 if (!r->res.class)
174 return -ENOENT;
175 } else {
176 int i;
177 struct tcindex_filter **walk = NULL;
178
179 for (i = 0; i < p->hash; i++)
180 for (walk = p->h+i; *walk; walk = &(*walk)->next)
181 if (&(*walk)->result == r)
182 goto found;
183 return -ENOENT;
184
185 found:
186 f = *walk;
187 tcf_tree_lock(tp);
188 *walk = f->next;
189 tcf_tree_unlock(tp);
190 }
191 cl = __cls_set_class(&r->res.class,0);
192 if (cl)
193 tp->q->ops->cl_ops->unbind_tcf(tp->q,cl);
194 #ifdef CONFIG_NET_CLS_POLICE
195 tcf_police_release(r->police);
196 #endif
197 if (f)
198 kfree(f);
199 return 0;
200 }
201
202
203 /*
204 * There are no parameters for tcindex_init, so we overload tcindex_change
205 */
206
207
208 static int tcindex_change(struct tcf_proto *tp,unsigned long base,u32 handle,
209 struct rtattr **tca,unsigned long *arg)
210 {
211 struct tcindex_filter_result new_filter_result = {
212 NULL, /* no policing */
213 { 0,0 }, /* no classification */
214 };
215 struct rtattr *opt = tca[TCA_OPTIONS-1];
216 struct rtattr *tb[TCA_TCINDEX_MAX];
217 struct tcindex_data *p = PRIV(tp);
218 struct tcindex_filter *f;
219 struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
220 struct tcindex_filter **walk;
221 int hash,shift;
222 __u16 mask;
223
224 DPRINTK("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
225 "p %p,r %p\n",tp,handle,tca,arg,opt,p,r);
226 if (arg)
227 DPRINTK("*arg = 0x%lx\n",*arg);
228 if (!opt)
229 return 0;
230 if (rtattr_parse(tb,TCA_TCINDEX_MAX,RTA_DATA(opt),RTA_PAYLOAD(opt)) < 0)
231 return -EINVAL;
232 if (!tb[TCA_TCINDEX_HASH-1]) {
233 hash = p->hash;
234 } else {
235 if (RTA_PAYLOAD(tb[TCA_TCINDEX_HASH-1]) < sizeof(int))
236 return -EINVAL;
237 hash = *(int *) RTA_DATA(tb[TCA_TCINDEX_HASH-1]);
238 }
239 if (!tb[TCA_TCINDEX_MASK-1]) {
240 mask = p->mask;
241 } else {
242 if (RTA_PAYLOAD(tb[TCA_TCINDEX_MASK-1]) < sizeof(__u16))
243 return -EINVAL;
244 mask = *(__u16 *) RTA_DATA(tb[TCA_TCINDEX_MASK-1]);
245 }
246 if (!tb[TCA_TCINDEX_SHIFT-1])
247 shift = p->shift;
248 else {
249 if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(__u16))
250 return -EINVAL;
251 shift = *(int *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
252 }
253 if (p->perfect && hash <= (mask >> shift))
254 return -EBUSY;
255 if (p->perfect && hash > p->alloc_hash)
256 return -EBUSY;
257 if (p->h && hash != p->alloc_hash)
258 return -EBUSY;
259 p->hash = hash;
260 p->mask = mask;
261 p->shift = shift;
262 if (tb[TCA_TCINDEX_FALL_THROUGH-1]) {
263 if (RTA_PAYLOAD(tb[TCA_TCINDEX_FALL_THROUGH-1]) < sizeof(int))
264 return -EINVAL;
265 p->fall_through =
266 *(int *) RTA_DATA(tb[TCA_TCINDEX_FALL_THROUGH-1]);
267 }
268 DPRINTK("classid/police %p/%p\n",tb[TCA_TCINDEX_CLASSID-1],
269 tb[TCA_TCINDEX_POLICE-1]);
270 if (!tb[TCA_TCINDEX_CLASSID-1] && !tb[TCA_TCINDEX_POLICE-1])
271 return 0;
272 if (!hash) {
273 if ((mask >> shift) < PERFECT_HASH_THRESHOLD) {
274 p->hash = (mask >> shift)+1;
275 } else {
276 p->hash = DEFAULT_HASH_SIZE;
277 }
278 }
279 if (!p->perfect && !p->h) {
280 p->alloc_hash = p->hash;
281 DPRINTK("hash %d mask %d\n",p->hash,p->mask);
282 if (p->hash > (mask >> shift)) {
283 p->perfect = kmalloc(p->hash*
284 sizeof(struct tcindex_filter_result),GFP_KERNEL);
285 if (!p->perfect)
286 return -ENOMEM;
287 memset(p->perfect, 0,
288 p->hash * sizeof(struct tcindex_filter_result));
289 } else {
290 p->h = kmalloc(p->hash*sizeof(struct tcindex_filter *),
291 GFP_KERNEL);
292 if (!p->h)
293 return -ENOMEM;
294 memset(p->h, 0, p->hash*sizeof(struct tcindex_filter *));
295 }
296 }
297 /*
298 * Note: this could be as restrictive as
299 * if (handle & ~(mask >> shift))
300 * but then, we'd fail handles that may become valid after some
301 * future mask change. While this is extremely unlikely to ever
302 * matter, the check below is safer (and also more
303 * backwards-compatible).
304 */
305 if (p->perfect && handle >= p->alloc_hash)
306 return -EINVAL;
307 if (p->perfect) {
308 r = p->perfect+handle;
309 } else {
310 r = lookup(p,handle);
311 DPRINTK("r=%p\n",r);
312 if (!r)
313 r = &new_filter_result;
314 }
315 DPRINTK("r=%p\n",r);
316 if (tb[TCA_TCINDEX_CLASSID-1]) {
317 unsigned long cl = cls_set_class(tp,&r->res.class,0);
318
319 if (cl)
320 tp->q->ops->cl_ops->unbind_tcf(tp->q,cl);
321 r->res.classid = *(__u32 *) RTA_DATA(tb[TCA_TCINDEX_CLASSID-1]);
322 r->res.class = tp->q->ops->cl_ops->bind_tcf(tp->q,base,
323 r->res.classid);
324 if (!r->res.class) {
325 r->res.classid = 0;
326 return -ENOENT;
327 }
328 }
329 #ifdef CONFIG_NET_CLS_POLICE
330 {
331 struct tcf_police *police;
332
333 police = tb[TCA_TCINDEX_POLICE-1] ?
334 tcf_police_locate(tb[TCA_TCINDEX_POLICE-1],NULL) : NULL;
335 tcf_tree_lock(tp);
336 police = xchg(&r->police,police);
337 tcf_tree_unlock(tp);
338 tcf_police_release(police);
339 }
340 #endif
341 if (r != &new_filter_result)
342 return 0;
343 f = kmalloc(sizeof(struct tcindex_filter),GFP_KERNEL);
344 if (!f)
345 return -ENOMEM;
346 f->key = handle;
347 f->result = new_filter_result;
348 f->next = NULL;
349 for (walk = p->h+(handle % p->hash); *walk; walk = &(*walk)->next)
350 /* nothing */;
351 wmb();
352 *walk = f;
353 return 0;
354 }
355
356
357 static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
358 {
359 struct tcindex_data *p = PRIV(tp);
360 struct tcindex_filter *f,*next;
361 int i;
362
363 DPRINTK("tcindex_walk(tp %p,walker %p),p %p\n",tp,walker,p);
364 if (p->perfect) {
365 for (i = 0; i < p->hash; i++) {
366 if (!p->perfect[i].res.class)
367 continue;
368 if (walker->count >= walker->skip) {
369 if (walker->fn(tp,
370 (unsigned long) (p->perfect+i), walker)
371 < 0) {
372 walker->stop = 1;
373 return;
374 }
375 }
376 walker->count++;
377 }
378 }
379 if (!p->h)
380 return;
381 for (i = 0; i < p->hash; i++) {
382 for (f = p->h[i]; f; f = next) {
383 next = f->next;
384 if (walker->count >= walker->skip) {
385 if (walker->fn(tp,(unsigned long) &f->result,
386 walker) < 0) {
387 walker->stop = 1;
388 return;
389 }
390 }
391 walker->count++;
392 }
393 }
394 }
395
396
397 static int tcindex_destroy_element(struct tcf_proto *tp,
398 unsigned long arg, struct tcf_walker *walker)
399 {
400 return tcindex_delete(tp,arg);
401 }
402
403
404 static void tcindex_destroy(struct tcf_proto *tp)
405 {
406 struct tcindex_data *p = PRIV(tp);
407 struct tcf_walker walker;
408
409 DPRINTK("tcindex_destroy(tp %p),p %p\n",tp,p);
410 walker.count = 0;
411 walker.skip = 0;
412 walker.fn = &tcindex_destroy_element;
413 tcindex_walk(tp,&walker);
414 if (p->perfect)
415 kfree(p->perfect);
416 if (p->h)
417 kfree(p->h);
418 kfree(p);
419 tp->root = NULL;
420 MOD_DEC_USE_COUNT;
421 }
422
423
424 #ifdef CONFIG_RTNETLINK
425
426 static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
427 struct sk_buff *skb, struct tcmsg *t)
428 {
429 struct tcindex_data *p = PRIV(tp);
430 struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
431 unsigned char *b = skb->tail;
432 struct rtattr *rta;
433
434 DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
435 tp,fh,skb,t,p,r,b);
436 DPRINTK("p->perfect %p p->h %p\n",p->perfect,p->h);
437 rta = (struct rtattr *) b;
438 RTA_PUT(skb,TCA_OPTIONS,0,NULL);
439 if (!fh) {
440 t->tcm_handle = ~0; /* whatever ... */
441 RTA_PUT(skb,TCA_TCINDEX_HASH,sizeof(p->hash),&p->hash);
442 RTA_PUT(skb,TCA_TCINDEX_MASK,sizeof(p->mask),&p->mask);
443 RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift);
444 RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through),
445 &p->fall_through);
446 } else {
447 if (p->perfect) {
448 t->tcm_handle = r-p->perfect;
449 } else {
450 struct tcindex_filter *f;
451 int i;
452
453 t->tcm_handle = 0;
454 for (i = 0; !t->tcm_handle && i < p->hash; i++) {
455 for (f = p->h[i]; !t->tcm_handle && f;
456 f = f->next) {
457 if (&f->result == r)
458 t->tcm_handle = f->key;
459 }
460 }
461 }
462 DPRINTK("handle = %d\n",t->tcm_handle);
463 if (r->res.class)
464 RTA_PUT(skb, TCA_TCINDEX_CLASSID, 4, &r->res.classid);
465 #ifdef CONFIG_NET_CLS_POLICE
466 if (r->police) {
467 struct rtattr *p_rta = (struct rtattr *) skb->tail;
468
469 RTA_PUT(skb,TCA_TCINDEX_POLICE,0,NULL);
470 if (tcf_police_dump(skb,r->police) < 0)
471 goto rtattr_failure;
472 p_rta->rta_len = skb->tail-(u8 *) p_rta;
473 }
474 #endif
475 }
476 rta->rta_len = skb->tail-b;
477 return skb->len;
478
479 rtattr_failure:
480 skb_trim(skb, b - skb->data);
481 return -1;
482 }
483
484 #endif
485
486
487 struct tcf_proto_ops cls_tcindex_ops = {
488 NULL,
489 "tcindex",
490 tcindex_classify,
491 tcindex_init,
492 tcindex_destroy,
493
494 tcindex_get,
495 tcindex_put,
496 tcindex_change,
497 tcindex_delete,
498 tcindex_walk,
499 #ifdef CONFIG_RTNETLINK
500 tcindex_dump
501 #else
502 NULL
503 #endif
504 };
505
506
507 #ifdef MODULE
508 int init_module(void)
509 {
510 return register_tcf_proto_ops(&cls_tcindex_ops);
511 }
512
513 void cleanup_module(void)
514 {
515 unregister_tcf_proto_ops(&cls_tcindex_ops);
516 }
517 #endif
518