File: /usr/src/linux/net/sched/sch_gred.c

1     /*
2      * net/sched/sch_gred.c	Generic Random Early Detection queue.
3      *
4      *
5      *              This program is free software; you can redistribute it and/or
6      *              modify it under the terms of the GNU General Public License
7      *              as published by the Free Software Foundation; either version
8      *              2 of the License, or (at your option) any later version.
9      *
10      * Authors:    J Hadi Salim (hadi@nortelnetworks.com) 1998,1999
11      *
12      *             991129: -  Bug fix with grio mode
13      *		       - a better sing. AvgQ mode with Grio(WRED)
14      *		       - A finer grained VQ dequeue based on sugestion
15      *		         from Ren Liu
16      *		       - More error checks
17      *
18      *
19      *
20      *  For all the glorious comments look at Alexey's sch_red.c
21      */
22     
23     #include <linux/config.h>
24     #include <linux/module.h>
25     #include <asm/uaccess.h>
26     #include <asm/system.h>
27     #include <asm/bitops.h>
28     #include <linux/types.h>
29     #include <linux/kernel.h>
30     #include <linux/sched.h>
31     #include <linux/string.h>
32     #include <linux/mm.h>
33     #include <linux/socket.h>
34     #include <linux/sockios.h>
35     #include <linux/in.h>
36     #include <linux/errno.h>
37     #include <linux/interrupt.h>
38     #include <linux/if_ether.h>
39     #include <linux/inet.h>
40     #include <linux/netdevice.h>
41     #include <linux/etherdevice.h>
42     #include <linux/notifier.h>
43     #include <net/ip.h>
44     #include <net/route.h>
45     #include <linux/skbuff.h>
46     #include <net/sock.h>
47     #include <net/pkt_sched.h>
48     
49     #if 1 /* control */
50     #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
51     #else
52     #define DPRINTK(format,args...)
53     #endif
54     
55     #if 0 /* data */
56     #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
57     #else
58     #define D2PRINTK(format,args...)
59     #endif
60     
61     struct gred_sched_data;
62     struct gred_sched;
63     
64     struct gred_sched_data
65     {
66     /* Parameters */
67     	u32		limit;		/* HARD maximal queue length	*/
68     	u32		qth_min;	/* Min average length threshold: A scaled */
69     	u32		qth_max;	/* Max average length threshold: A scaled */
70     	u32      	DP;		/* the drop pramaters */
71     	char		Wlog;		/* log(W)		*/
72     	char		Plog;		/* random number bits	*/
73     	u32		Scell_max;
74     	u32		Rmask;
75     	u32		bytesin;	/* bytes seen on virtualQ so far*/
76     	u32		packetsin;	/* packets seen on virtualQ so far*/
77     	u32		backlog;	/* bytes on the virtualQ */
78     	u32		forced;	/* packets dropped for exceeding limits */
79     	u32		early;	/* packets dropped as a warning */
80     	u32		other;	/* packets dropped by invoking drop() */
81     	u32		pdrop;	/* packets dropped because we exceeded physical queue limits */
82     	char		Scell_log;
83     	u8		Stab[256];
84     	u8              prio;        /* the prio of this vq */
85     
86     /* Variables */
87     	unsigned long	qave;		/* Average queue length: A scaled */
88     	int		qcount;		/* Packets since last random number generation */
89     	u32		qR;		/* Cached random number */
90     
91     	psched_time_t	qidlestart;	/* Start of idle period	*/
92     };
93     
94     struct gred_sched
95     {
96     	struct gred_sched_data *tab[MAX_DPs];
97     	u32 		DPs;   
98     	u32 		def; 
99     	u8 		initd; 
100     	u8 		grio; 
101     	u8 		eqp; 
102     };
103     
104     static int
105     gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
106     {
107     	psched_time_t now;
108     	struct gred_sched_data *q=NULL;
109     	struct gred_sched *t= (struct gred_sched *)sch->data;
110     	unsigned long	qave=0;	
111     	int i=0;
112     
113     	if (!t->initd && skb_queue_len(&sch->q) <= sch->dev->tx_queue_len) {
114     		D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n");
115     		goto do_enqueue;
116     	}
117     
118     
119     	if ( ((skb->tc_index&0xf) > t->DPs) || !(q=t->tab[skb->tc_index&0xf])) {
120     		printk("GRED: setting to default (%d)\n ",t->def);
121     		if (!(q=t->tab[t->def])) {
122     			DPRINTK("GRED: setting to default FAILED! dropping!! "
123     			    "(%d)\n ", t->def);
124     			goto drop;
125     		}
126     		/* fix tc_index? --could be controvesial but needed for
127     		   requeueing */
128     		skb->tc_index=(skb->tc_index&0xfffffff0) | t->def;
129     	}
130     
131     	D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d "
132     	    "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog,
133     	    sch->stats.backlog);
134     	/* sum up all the qaves of prios <= to ours to get the new qave*/
135     	if (!t->eqp && t->grio) {
136     		for (i=0;i<t->DPs;i++) {
137     			if ((!t->tab[i]) || (i==q->DP))	
138     				continue; 
139     				
140     			if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart)))
141     				qave +=t->tab[i]->qave;
142     		}
143     			
144     	}
145     
146     	q->packetsin++;
147     	q->bytesin+=skb->len;
148     
149     	if (t->eqp && t->grio) {
150     		qave=0;
151     		q->qave=t->tab[t->def]->qave;
152     		q->qidlestart=t->tab[t->def]->qidlestart;
153     	}
154     
155     	if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
156     		long us_idle;
157     		PSCHED_GET_TIME(now);
158     		us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max, 0);
159     		PSCHED_SET_PASTPERFECT(q->qidlestart);
160     
161     		q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF];
162     	} else {
163     		if (t->eqp) {
164     			q->qave += sch->stats.backlog - (q->qave >> q->Wlog);
165     		} else {
166     			q->qave += q->backlog - (q->qave >> q->Wlog);
167     		}
168     
169     	}
170     	
171     
172     	if (t->eqp && t->grio) 
173     		t->tab[t->def]->qave=q->qave;
174     
175     	if ((q->qave+qave) < q->qth_min) {
176     		q->qcount = -1;
177     enqueue:
178     		if (q->backlog <= q->limit) {
179     			q->backlog += skb->len;
180     do_enqueue:
181     			__skb_queue_tail(&sch->q, skb);
182     			sch->stats.backlog += skb->len;
183     			sch->stats.bytes += skb->len;
184     			sch->stats.packets++;
185     			return 0;
186     		} else {
187     			q->pdrop++;
188     		}
189     
190     drop:
191     		kfree_skb(skb);
192     		sch->stats.drops++;
193     		return NET_XMIT_DROP;
194     	}
195     	if ((q->qave+qave) >= q->qth_max) {
196     		q->qcount = -1;
197     		sch->stats.overlimits++;
198     		q->forced++;
199     		goto drop;
200     	}
201     	if (++q->qcount) {
202     		if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
203     			goto enqueue;
204     		q->qcount = 0;
205     		q->qR = net_random()&q->Rmask;
206     		sch->stats.overlimits++;
207     		q->early++;
208     		goto drop;
209     	}
210     	q->qR = net_random()&q->Rmask;
211     	goto enqueue;
212     }
213     
214     static int
215     gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
216     {
217     	struct gred_sched_data *q;
218     	struct gred_sched *t= (struct gred_sched *)sch->data;
219     	q= t->tab[(skb->tc_index&0xf)];
220     /* error checking here -- probably unnecessary */
221     	PSCHED_SET_PASTPERFECT(q->qidlestart);
222     
223     	__skb_queue_head(&sch->q, skb);
224     	sch->stats.backlog += skb->len;
225     	q->backlog += skb->len;
226     	return 0;
227     }
228     
229     static struct sk_buff *
230     gred_dequeue(struct Qdisc* sch)
231     {
232     	struct sk_buff *skb;
233     	struct gred_sched_data *q;
234     	struct gred_sched *t= (struct gred_sched *)sch->data;
235     
236     	skb = __skb_dequeue(&sch->q);
237     	if (skb) {
238     		sch->stats.backlog -= skb->len;
239     		q= t->tab[(skb->tc_index&0xf)];
240     		if (q) {
241     			q->backlog -= skb->len;
242     			if (!q->backlog && !t->eqp)
243     				PSCHED_GET_TIME(q->qidlestart);
244     		} else {
245     			D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 
246     		}
247     		return skb;
248     	}
249     
250     	if (t->eqp) {
251     			q= t->tab[t->def];
252     			if (!q)	
253     				D2PRINTK("no default VQ set: Results will be "
254     				       "screwed up\n");
255     			else
256     				PSCHED_GET_TIME(q->qidlestart);
257     	}
258     
259     	return NULL;
260     }
261     
262     static int
263     gred_drop(struct Qdisc* sch)
264     {
265     	struct sk_buff *skb;
266     
267     	struct gred_sched_data *q;
268     	struct gred_sched *t= (struct gred_sched *)sch->data;
269     
270     	skb = __skb_dequeue_tail(&sch->q);
271     	if (skb) {
272     		sch->stats.backlog -= skb->len;
273     		sch->stats.drops++;
274     		q= t->tab[(skb->tc_index&0xf)];
275     		if (q) {
276     			q->backlog -= skb->len;
277     			q->other++;
278     			if (!q->backlog && !t->eqp)
279     				PSCHED_GET_TIME(q->qidlestart);
280     			} else {
281     				D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 
282     			}
283     
284     		kfree_skb(skb);
285     		return 1;
286     	}
287     
288     	q=t->tab[t->def];
289     	if (!q) {
290     		D2PRINTK("no default VQ set: Results might be screwed up\n");
291     		return 0;
292     	}
293     
294     	PSCHED_GET_TIME(q->qidlestart);
295     	return 0;
296     
297     }
298     
299     static void gred_reset(struct Qdisc* sch)
300     {
301     	int i;
302     	struct gred_sched_data *q;
303     	struct gred_sched *t= (struct gred_sched *)sch->data;
304     
305     	__skb_queue_purge(&sch->q);
306     
307     	sch->stats.backlog = 0;
308     
309             for (i=0;i<t->DPs;i++) {
310     	        q= t->tab[i];
311     		if (!q)	
312     			continue; 
313     		PSCHED_SET_PASTPERFECT(q->qidlestart);
314     		q->qave = 0;
315     		q->qcount = -1;
316     		q->backlog = 0;
317     		q->other=0;
318     		q->forced=0;
319     		q->pdrop=0;
320     		q->early=0;
321     	}
322     }
323     
324     static int gred_change(struct Qdisc *sch, struct rtattr *opt)
325     {
326     	struct gred_sched *table = (struct gred_sched *)sch->data;
327     	struct gred_sched_data *q;
328     	struct tc_gred_qopt *ctl;
329     	struct tc_gred_sopt *sopt;
330     	struct rtattr *tb[TCA_GRED_STAB];
331     	struct rtattr *tb2[TCA_GRED_STAB];
332     	int i;
333     
334     	if (opt == NULL ||
335     		rtattr_parse(tb, TCA_GRED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) )
336     			return -EINVAL;
337     
338     	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0 &&
339     	    tb[TCA_GRED_DPS-1] != 0) {
340     		rtattr_parse(tb2, TCA_GRED_DPS, RTA_DATA(opt),
341     		    RTA_PAYLOAD(opt));
342     
343     		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
344     		table->DPs=sopt->DPs;   
345     		table->def=sopt->def_DP; 
346     		table->grio=sopt->grio; 
347     		table->initd=0;
348     		/* probably need to clear all the table DP entries as well */
349     		MOD_INC_USE_COUNT;
350     		return 0;
351     	    }
352     
353     
354     	if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 ||
355     		RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) ||
356     		RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
357     			return -EINVAL;
358     
359     	ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
360     	if (ctl->DP > MAX_DPs-1 ) {
361     		/* misbehaving is punished! Put in the default drop probability */
362     		DPRINTK("\nGRED: DP %u not in  the proper range fixed. New DP "
363     			"set to default at %d\n",ctl->DP,table->def);
364     		ctl->DP=table->def;
365     	}
366     	
367     	if (table->tab[ctl->DP] == NULL) {
368     		table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data),
369     					    GFP_KERNEL);
370     		if (NULL == table->tab[ctl->DP])
371     			return -ENOMEM;
372     		memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data)));
373     	}
374     	q= table->tab[ctl->DP]; 
375     
376     	if (table->grio) {
377     		if (ctl->prio <=0) {
378     			if (table->def && table->tab[table->def]) {
379     				DPRINTK("\nGRED: DP %u does not have a prio"
380     					"setting default to %d\n",ctl->DP,
381     					table->tab[table->def]->prio);
382     				q->prio=table->tab[table->def]->prio;
383     			} else { 
384     				DPRINTK("\nGRED: DP %u does not have a prio"
385     					" setting default to 8\n",ctl->DP);
386     				q->prio=8;
387     			}
388     		} else {
389     			q->prio=ctl->prio;
390     		}
391     	} else {
392     		q->prio=8;
393     	}
394     
395     
396     	q->DP=ctl->DP;
397     	q->Wlog = ctl->Wlog;
398     	q->Plog = ctl->Plog;
399     	q->limit = ctl->limit;
400     	q->Scell_log = ctl->Scell_log;
401     	q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
402     	q->Scell_max = (255<<q->Scell_log);
403     	q->qth_min = ctl->qth_min<<ctl->Wlog;
404     	q->qth_max = ctl->qth_max<<ctl->Wlog;
405     	q->qave=0;
406     	q->backlog=0;
407     	q->qcount = -1;
408     	q->other=0;
409     	q->forced=0;
410     	q->pdrop=0;
411     	q->early=0;
412     
413     	PSCHED_SET_PASTPERFECT(q->qidlestart);
414     	memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
415     
416     	if ( table->initd && table->grio) {
417     	/* this looks ugly but its not in the fast path */
418     		for (i=0;i<table->DPs;i++) {
419     			if ((!table->tab[i]) || (i==q->DP) )    
420     				continue; 
421     			if (table->tab[i]->prio == q->prio ){
422     				/* WRED mode detected */
423     				table->eqp=1;
424     				break;
425     			}
426     		}
427     	}
428     
429     	if (!table->initd) {
430     		table->initd=1;
431     		/* 
432             	the first entry also goes into the default until
433             	over-written 
434     		*/
435     
436     		if (table->tab[table->def] == NULL) {
437     			table->tab[table->def]=
438     				kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL);
439     			if (NULL == table->tab[ctl->DP])
440     				return -ENOMEM;
441     
442     			memset(table->tab[table->def], 0,
443     			       (sizeof(struct gred_sched_data)));
444     		}
445     		q= table->tab[table->def]; 
446     		q->DP=table->def;
447     		q->Wlog = ctl->Wlog;
448     		q->Plog = ctl->Plog;
449     		q->limit = ctl->limit;
450     		q->Scell_log = ctl->Scell_log;
451     		q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
452     		q->Scell_max = (255<<q->Scell_log);
453     		q->qth_min = ctl->qth_min<<ctl->Wlog;
454     		q->qth_max = ctl->qth_max<<ctl->Wlog;
455     
456     		if (table->grio)
457     			q->prio=table->tab[ctl->DP]->prio;
458     		else
459     			q->prio=8;
460     
461     		q->qcount = -1;
462     		PSCHED_SET_PASTPERFECT(q->qidlestart);
463     		memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
464     	}
465     	return 0;
466     
467     }
468     
469     static int gred_init(struct Qdisc *sch, struct rtattr *opt)
470     {
471     	struct gred_sched *table = (struct gred_sched *)sch->data;
472     	struct tc_gred_sopt *sopt;
473     	struct rtattr *tb[TCA_GRED_STAB];
474     	struct rtattr *tb2[TCA_GRED_STAB];
475     
476     	if (opt == NULL ||
477     		rtattr_parse(tb, TCA_GRED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) )
478     			return -EINVAL;
479     
480     	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0 &&
481     	    tb[TCA_GRED_DPS-1] != 0) {
482     		rtattr_parse(tb2, TCA_GRED_DPS, RTA_DATA(opt),RTA_PAYLOAD(opt));
483     
484     		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
485     		table->DPs=sopt->DPs;   
486     		table->def=sopt->def_DP; 
487     		table->grio=sopt->grio; 
488     		table->initd=0;
489     		MOD_INC_USE_COUNT;
490     		return 0;
491     	}
492     
493     	DPRINTK("\n GRED_INIT error!\n");
494     	return -EINVAL;
495     }
496     
497     #ifdef CONFIG_RTNETLINK
498     static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
499     {
500     	unsigned long qave;
501     	struct rtattr *rta;
502     	struct tc_gred_qopt *opt;
503     	struct tc_gred_qopt *dst;
504     	struct gred_sched *table = (struct gred_sched *)sch->data;
505     	struct gred_sched_data *q;
506     	int i;
507     	unsigned char	 *b = skb->tail;
508     
509     	rta = (struct rtattr*)b;
510     	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
511     
512     	opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL);
513     
514     	if (opt  == NULL) {
515     		DPRINTK("gred_dump:failed to malloc for %Zd\n",
516     		    sizeof(struct tc_gred_qopt)*MAX_DPs);
517     		goto rtattr_failure;
518     	}
519     
520     	memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs);
521     
522     	if (!table->initd) {
523     		DPRINTK("NO GRED Queues setup!\n");
524     		return -1;
525     	}
526     
527     	for (i=0;i<MAX_DPs;i++) {
528     		dst= &opt[i]; 
529     		q= table->tab[i]; 
530     
531     		if (!q) {
532     			/* hack -- fix at some point with proper message
533     			   This is how we indicate to tc that there is no VQ
534     			   at this DP */
535     
536     			dst->DP=MAX_DPs+i;
537     			continue;
538     		}
539     
540     		dst->limit=q->limit;
541     		dst->qth_min=q->qth_min>>q->Wlog;
542     		dst->qth_max=q->qth_max>>q->Wlog;
543     		dst->DP=q->DP;
544     		dst->backlog=q->backlog;
545     		if (q->qave) {
546     			if (table->eqp && table->grio) {
547     				q->qidlestart=table->tab[table->def]->qidlestart;
548     				q->qave=table->tab[table->def]->qave;
549     			}
550     			if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
551     				long idle;
552     				psched_time_t now;
553     				PSCHED_GET_TIME(now);
554     				idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max, 0);
555     				qave  = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF];
556     				dst->qave = qave >> q->Wlog;
557     
558     			} else {
559     				dst->qave = q->qave >> q->Wlog;
560     			}
561     		} else {
562     			dst->qave = 0;
563     		}
564     		
565     
566     		dst->Wlog = q->Wlog;
567     		dst->Plog = q->Plog;
568     		dst->Scell_log = q->Scell_log;
569     		dst->other = q->other;
570     		dst->forced = q->forced;
571     		dst->early = q->early;
572     		dst->pdrop = q->pdrop;
573     		dst->prio = q->prio;
574     		dst->packets=q->packetsin;
575     		dst->bytesin=q->bytesin;
576     	}
577     
578     	RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt);
579     	rta->rta_len = skb->tail - b;
580     
581     	return skb->len;
582     
583     rtattr_failure:
584     	DPRINTK("gred_dump: FAILURE!!!!\n");
585     
586     /* also free the opt struct here */
587     	skb_trim(skb, b - skb->data);
588     	return -1;
589     }
590     #endif
591     
592     static void gred_destroy(struct Qdisc *sch)
593     {
594     	struct gred_sched *table = (struct gred_sched *)sch->data;
595     	int i;
596     
597     	for (i = 0;i < table->DPs; i++) {
598     		if (table->tab[i])
599     			kfree(table->tab[i]);
600     	}
601     	MOD_DEC_USE_COUNT;
602     }
603     
604     struct Qdisc_ops gred_qdisc_ops =
605     {
606     	NULL,
607     	NULL,
608     	"gred",
609     	sizeof(struct gred_sched),
610     	gred_enqueue,
611     	gred_dequeue,
612     	gred_requeue,
613     	gred_drop,
614     	gred_init,
615     	gred_reset,
616     	gred_destroy,
617     	gred_change, /* change */
618     #ifdef CONFIG_RTNETLINK
619     	gred_dump,
620     #endif
621     };
622     
623     
624     #ifdef MODULE
625     int init_module(void)
626     {
627     	return register_qdisc(&gred_qdisc_ops);
628     }
629     
630     void cleanup_module(void) 
631     {
632     	unregister_qdisc(&gred_qdisc_ops);
633     }
634     #endif
635