blob: baead1c035f74fc3b022b7039d26f03d81d8a905 [file] [log] [blame]
carlosmscabralf40ecd12013-02-01 18:15:58 -02001#define OFBUF (1)
2/*
3 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 * Authors: Martin Devera, <devik@cdi.cz>
11 *
12 * Credits (in time order) for older HTB versions:
13 * Stef Coene <stef.coene@docum.org>
14 * HTB support at LARTC mailing list
15 * Ondrej Kraus, <krauso@barr.cz>
16 * found missing INIT_QDISC(htb)
17 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
18 * helped a lot to locate nasty class stall bug
19 * Andi Kleen, Jamal Hadi, Bert Hubert
20 * code review and helpful comments on shaping
21 * Tomasz Wrona, <tw@eter.tym.pl>
22 * created test case so that I was able to fix nasty bug
23 * Wilfried Weissmann
24 * spotted bug in dequeue code and helped with fix
25 * Jiri Fojtasek
26 * fixed requeue routine
27 * and many others. thanks.
28 */
29#include <linux/module.h>
30#include <linux/moduleparam.h>
31#include <linux/types.h>
32#include <linux/kernel.h>
33#include <linux/string.h>
34#include <linux/errno.h>
35#include <linux/skbuff.h>
36#include <linux/list.h>
37#include <linux/compiler.h>
38#include <linux/rbtree.h>
39#include <linux/workqueue.h>
40#include <linux/slab.h>
41#include <net/netlink.h>
42#include <net/pkt_sched.h>
43
44/* HTB algorithm.
45 Author: devik@cdi.cz
46 ========================================================================
47 HTB is like TBF with multiple classes. It is also similar to CBQ because
48 it allows to assign priority to each class in hierarchy.
49 In fact it is another implementation of Floyd's formal sharing.
50
51 Levels:
52 Each class is assigned level. Leaf has ALWAYS level 0 and root
53 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
54 one less than their parent.
55*/
56
57static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
58#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
59
60#if HTB_VER >> 16 != TC_HTB_PROTOVER
61#error "Mismatched sch_htb.c and pkt_sch.h"
62#endif
63
64/* Module parameter and sysfs export */
65module_param (htb_hysteresis, int, 0640);
66MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
67
68/* used internaly to keep status of single class */
69enum htb_cmode {
70 HTB_CANT_SEND, /* class can't send and can't borrow */
71 HTB_MAY_BORROW, /* class can't send but may borrow */
72 HTB_CAN_SEND /* class can send */
73};
74
75/* interior & leaf nodes; props specific to leaves are marked L: */
76struct htb_class {
77 struct Qdisc_class_common common;
78 /* general class parameters */
79 struct gnet_stats_basic_packed bstats;
80 struct gnet_stats_queue qstats;
81 struct gnet_stats_rate_est rate_est;
82 struct tc_htb_xstats xstats; /* our special stats */
83 int refcnt; /* usage count of this class */
84
85 /* topology */
86 int level; /* our level (see above) */
87 unsigned int children;
88 struct htb_class *parent; /* parent class */
89
90 int prio; /* these two are used only by leaves... */
91 int quantum; /* but stored for parent-to-leaf return */
92
93 union {
94 struct htb_class_leaf {
95 struct Qdisc *q;
96 int deficit[TC_HTB_MAXDEPTH];
97 struct list_head drop_list;
98 } leaf;
99 struct htb_class_inner {
100 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
101 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
102 /* When class changes from state 1->2 and disconnects from
103 * parent's feed then we lost ptr value and start from the
104 * first child again. Here we store classid of the
105 * last valid ptr (used when ptr is NULL).
106 */
107 u32 last_ptr_id[TC_HTB_NUMPRIO];
108 } inner;
109 } un;
110 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
111 struct rb_node pq_node; /* node for event queue */
112 psched_time_t pq_key;
113
114 int prio_activity; /* for which prios are we active */
115 enum htb_cmode cmode; /* current mode of the class */
116
117 /* class attached filters */
118 struct tcf_proto *filter_list;
119 int filter_cnt;
120
121 /* token bucket parameters */
122 struct qdisc_rate_table *rate; /* rate table of the class itself */
123 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
124 long buffer, cbuffer; /* token bucket depth/rate */
125 psched_tdiff_t mbuffer; /* max wait time */
126 long tokens, ctokens; /* current number of tokens */
127 psched_time_t t_c; /* checkpoint time */
128};
129
130struct htb_sched {
131 struct Qdisc_class_hash clhash;
132 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
133
134 /* self list - roots of self generating tree */
135 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
136 int row_mask[TC_HTB_MAXDEPTH];
137 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
138 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
139
140 /* self wait list - roots of wait PQs per row */
141 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
142
143 /* time of nearest event per level (row) */
144 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
145
146 int defcls; /* class where unclassified flows go to */
147
148 /* filters for qdisc itself */
149 struct tcf_proto *filter_list;
150
151 int rate2quantum; /* quant = rate / rate2quantum */
152 psched_time_t now; /* cached dequeue time */
153 struct qdisc_watchdog watchdog;
154
155 /* non shaped skbs; let them go directly thru */
156 struct sk_buff_head direct_queue;
157 int direct_qlen; /* max qlen of above */
158
159 long direct_pkts;
160
161#if OFBUF
162 /* overflow buffer */
163 struct sk_buff_head ofbuf;
164 int ofbuf_queued; /* # packets queued in above */
165#endif
166
167#define HTB_WARN_TOOMANYEVENTS 0x1
168 unsigned int warned; /* only one warning */
169 struct work_struct work;
170};
171
172/* find class in global hash table using given handle */
173static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
174{
175 struct htb_sched *q = qdisc_priv(sch);
176 struct Qdisc_class_common *clc;
177
178 clc = qdisc_class_find(&q->clhash, handle);
179 if (clc == NULL)
180 return NULL;
181 return container_of(clc, struct htb_class, common);
182}
183
184/**
185 * htb_classify - classify a packet into class
186 *
187 * It returns NULL if the packet should be dropped or -1 if the packet
188 * should be passed directly thru. In all other cases leaf class is returned.
189 * We allow direct class selection by classid in priority. The we examine
190 * filters in qdisc and in inner nodes (if higher filter points to the inner
191 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
192 * internal fifo (direct). These packets then go directly thru. If we still
193 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
194 * then finish and return direct queue.
195 */
196#define HTB_DIRECT ((struct htb_class *)-1L)
197
198static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
199 int *qerr)
200{
201 struct htb_sched *q = qdisc_priv(sch);
202 struct htb_class *cl;
203 struct tcf_result res;
204 struct tcf_proto *tcf;
205 int result;
206
207 /* allow to select class by setting skb->priority to valid classid;
208 * note that nfmark can be used too by attaching filter fw with no
209 * rules in it
210 */
211 if (skb->priority == sch->handle)
212 return HTB_DIRECT; /* X:0 (direct flow) selected */
213 cl = htb_find(skb->priority, sch);
214 if (cl && cl->level == 0)
215 return cl;
216
217 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
218 tcf = q->filter_list;
219 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
220#ifdef CONFIG_NET_CLS_ACT
221 switch (result) {
222 case TC_ACT_QUEUED:
223 case TC_ACT_STOLEN:
224 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
225 case TC_ACT_SHOT:
226 return NULL;
227 }
228#endif
229 cl = (void *)res.class;
230 if (!cl) {
231 if (res.classid == sch->handle)
232 return HTB_DIRECT; /* X:0 (direct flow) */
233 cl = htb_find(res.classid, sch);
234 if (!cl)
235 break; /* filter selected invalid classid */
236 }
237 if (!cl->level)
238 return cl; /* we hit leaf; return it */
239
240 /* we have got inner class; apply inner filter chain */
241 tcf = cl->filter_list;
242 }
243 /* classification failed; try to use default class */
244 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
245 if (!cl || cl->level)
246 return HTB_DIRECT; /* bad default .. this is safe bet */
247 return cl;
248}
249
250/**
251 * htb_add_to_id_tree - adds class to the round robin list
252 *
253 * Routine adds class to the list (actually tree) sorted by classid.
254 * Make sure that class is not already on such list for given prio.
255 */
256static void htb_add_to_id_tree(struct rb_root *root,
257 struct htb_class *cl, int prio)
258{
259 struct rb_node **p = &root->rb_node, *parent = NULL;
260
261 while (*p) {
262 struct htb_class *c;
263 parent = *p;
264 c = rb_entry(parent, struct htb_class, node[prio]);
265
266 if (cl->common.classid > c->common.classid)
267 p = &parent->rb_right;
268 else
269 p = &parent->rb_left;
270 }
271 rb_link_node(&cl->node[prio], parent, p);
272 rb_insert_color(&cl->node[prio], root);
273}
274
275/**
276 * htb_add_to_wait_tree - adds class to the event queue with delay
277 *
278 * The class is added to priority event queue to indicate that class will
279 * change its mode in cl->pq_key microseconds. Make sure that class is not
280 * already in the queue.
281 */
282static void htb_add_to_wait_tree(struct htb_sched *q,
283 struct htb_class *cl, long delay)
284{
285 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
286
287 cl->pq_key = q->now + delay;
288 if (cl->pq_key == q->now)
289 cl->pq_key++;
290
291 /* update the nearest event cache */
292 if (q->near_ev_cache[cl->level] > cl->pq_key)
293 q->near_ev_cache[cl->level] = cl->pq_key;
294
295 while (*p) {
296 struct htb_class *c;
297 parent = *p;
298 c = rb_entry(parent, struct htb_class, pq_node);
299 if (cl->pq_key >= c->pq_key)
300 p = &parent->rb_right;
301 else
302 p = &parent->rb_left;
303 }
304 rb_link_node(&cl->pq_node, parent, p);
305 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
306}
307
308/**
309 * htb_next_rb_node - finds next node in binary tree
310 *
311 * When we are past last key we return NULL.
312 * Average complexity is 2 steps per call.
313 */
314static inline void htb_next_rb_node(struct rb_node **n)
315{
316 *n = rb_next(*n);
317}
318
319/**
320 * htb_add_class_to_row - add class to its row
321 *
322 * The class is added to row at priorities marked in mask.
323 * It does nothing if mask == 0.
324 */
325static inline void htb_add_class_to_row(struct htb_sched *q,
326 struct htb_class *cl, int mask)
327{
328 q->row_mask[cl->level] |= mask;
329 while (mask) {
330 int prio = ffz(~mask);
331 mask &= ~(1 << prio);
332 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
333 }
334}
335
336/* If this triggers, it is a bug in this code, but it need not be fatal */
337static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
338{
339 if (RB_EMPTY_NODE(rb)) {
340 WARN_ON(1);
341 } else {
342 rb_erase(rb, root);
343 RB_CLEAR_NODE(rb);
344 }
345}
346
347
348/**
349 * htb_remove_class_from_row - removes class from its row
350 *
351 * The class is removed from row at priorities marked in mask.
352 * It does nothing if mask == 0.
353 */
354static inline void htb_remove_class_from_row(struct htb_sched *q,
355 struct htb_class *cl, int mask)
356{
357 int m = 0;
358
359 while (mask) {
360 int prio = ffz(~mask);
361
362 mask &= ~(1 << prio);
363 if (q->ptr[cl->level][prio] == cl->node + prio)
364 htb_next_rb_node(q->ptr[cl->level] + prio);
365
366 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
367 if (!q->row[cl->level][prio].rb_node)
368 m |= 1 << prio;
369 }
370 q->row_mask[cl->level] &= ~m;
371}
372
373/**
374 * htb_activate_prios - creates active classe's feed chain
375 *
376 * The class is connected to ancestors and/or appropriate rows
377 * for priorities it is participating on. cl->cmode must be new
378 * (activated) mode. It does nothing if cl->prio_activity == 0.
379 */
380static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
381{
382 struct htb_class *p = cl->parent;
383 long m, mask = cl->prio_activity;
384
385 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
386 m = mask;
387 while (m) {
388 int prio = ffz(~m);
389 m &= ~(1 << prio);
390
391 if (p->un.inner.feed[prio].rb_node)
392 /* parent already has its feed in use so that
393 * reset bit in mask as parent is already ok
394 */
395 mask &= ~(1 << prio);
396
397 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
398 }
399 p->prio_activity |= mask;
400 cl = p;
401 p = cl->parent;
402
403 }
404 if (cl->cmode == HTB_CAN_SEND && mask)
405 htb_add_class_to_row(q, cl, mask);
406}
407
408/**
409 * htb_deactivate_prios - remove class from feed chain
410 *
411 * cl->cmode must represent old mode (before deactivation). It does
412 * nothing if cl->prio_activity == 0. Class is removed from all feed
413 * chains and rows.
414 */
415static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
416{
417 struct htb_class *p = cl->parent;
418 long m, mask = cl->prio_activity;
419
420 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
421 m = mask;
422 mask = 0;
423 while (m) {
424 int prio = ffz(~m);
425 m &= ~(1 << prio);
426
427 if (p->un.inner.ptr[prio] == cl->node + prio) {
428 /* we are removing child which is pointed to from
429 * parent feed - forget the pointer but remember
430 * classid
431 */
432 p->un.inner.last_ptr_id[prio] = cl->common.classid;
433 p->un.inner.ptr[prio] = NULL;
434 }
435
436 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
437
438 if (!p->un.inner.feed[prio].rb_node)
439 mask |= 1 << prio;
440 }
441
442 p->prio_activity &= ~mask;
443 cl = p;
444 p = cl->parent;
445
446 }
447 if (cl->cmode == HTB_CAN_SEND && mask)
448 htb_remove_class_from_row(q, cl, mask);
449}
450
451static inline long htb_lowater(const struct htb_class *cl)
452{
453 if (htb_hysteresis)
454 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
455 else
456 return 0;
457}
458static inline long htb_hiwater(const struct htb_class *cl)
459{
460 if (htb_hysteresis)
461 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
462 else
463 return 0;
464}
465
466
467/**
468 * htb_class_mode - computes and returns current class mode
469 *
470 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
471 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
472 * from now to time when cl will change its state.
473 * Also it is worth to note that class mode doesn't change simply
474 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
475 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
476 * mode transitions per time unit. The speed gain is about 1/6.
477 */
478static inline enum htb_cmode
479htb_class_mode(struct htb_class *cl, long *diff)
480{
481 long toks;
482
483 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
484 *diff = -toks;
485 return HTB_CANT_SEND;
486 }
487
488 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
489 return HTB_CAN_SEND;
490
491 *diff = -toks;
492 return HTB_MAY_BORROW;
493}
494
495/**
496 * htb_change_class_mode - changes classe's mode
497 *
498 * This should be the only way how to change classe's mode under normal
499 * cirsumstances. Routine will update feed lists linkage, change mode
500 * and add class to the wait event queue if appropriate. New mode should
501 * be different from old one and cl->pq_key has to be valid if changing
502 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
503 */
504static void
505htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
506{
507 enum htb_cmode new_mode = htb_class_mode(cl, diff);
508
509 if (new_mode == cl->cmode)
510 return;
511
512 if (cl->prio_activity) { /* not necessary: speed optimization */
513 if (cl->cmode != HTB_CANT_SEND)
514 htb_deactivate_prios(q, cl);
515 cl->cmode = new_mode;
516 if (new_mode != HTB_CANT_SEND)
517 htb_activate_prios(q, cl);
518 } else
519 cl->cmode = new_mode;
520}
521
522/**
523 * htb_activate - inserts leaf cl into appropriate active feeds
524 *
525 * Routine learns (new) priority of leaf and activates feed chain
526 * for the prio. It can be called on already active leaf safely.
527 * It also adds leaf into droplist.
528 */
529static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
530{
531 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
532
533 if (!cl->prio_activity) {
534 cl->prio_activity = 1 << cl->prio;
535 htb_activate_prios(q, cl);
536 list_add_tail(&cl->un.leaf.drop_list,
537 q->drops + cl->prio);
538 }
539}
540
541/**
542 * htb_deactivate - remove leaf cl from active feeds
543 *
544 * Make sure that leaf is active. In the other words it can't be called
545 * with non-active leaf. It also removes class from the drop list.
546 */
547static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
548{
549 WARN_ON(!cl->prio_activity);
550
551 htb_deactivate_prios(q, cl);
552 cl->prio_activity = 0;
553 list_del_init(&cl->un.leaf.drop_list);
554}
555
556static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
557{
558 int uninitialized_var(ret);
559 struct htb_sched *q = qdisc_priv(sch);
560 struct htb_class *cl = htb_classify(skb, sch, &ret);
561
562#if OFBUF
563 if(cl != HTB_DIRECT && cl)
564 skb_get(skb);
565#endif
566
567 if (cl == HTB_DIRECT) {
568 /* enqueue to helper queue */
569 if (q->direct_queue.qlen < q->direct_qlen) {
570 __skb_queue_tail(&q->direct_queue, skb);
571 q->direct_pkts++;
572 } else {
573 kfree_skb(skb);
574 sch->qstats.drops++;
575 return NET_XMIT_DROP;
576 }
577#ifdef CONFIG_NET_CLS_ACT
578 } else if (!cl) {
579 if (ret & __NET_XMIT_BYPASS)
580 sch->qstats.drops++;
581 kfree_skb(skb);
582 return ret;
583#endif
584 } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
585 /* We shouldn't drop this, but enqueue it into ofbuf */
586 // TODO: is skb actually valid?
587 // Ans: looks like qdisc_enqueue will end up freeing the packet
588 // if enqueue failed. So we should incr refcnt before calling qdisc_enqueue...
589#if OFBUF
590 __skb_queue_tail(&q->ofbuf, skb);
591 q->ofbuf_queued++;
592#else
593 if (net_xmit_drop_count(ret)) {
594 sch->qstats.drops++;
595 cl->qstats.drops++;
596 }
597 return ret;
598#endif
599 } else {
600 bstats_update(&cl->bstats, skb);
601 htb_activate(q, cl);
602#if OFBUF
603 kfree_skb(skb);
604#endif
605 }
606
607 sch->q.qlen++;
608 return NET_XMIT_SUCCESS;
609}
610
611static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
612{
613 long toks = diff + cl->tokens;
614
615 if (toks > cl->buffer)
616 toks = cl->buffer;
617 toks -= (long) qdisc_l2t(cl->rate, bytes);
618 if (toks <= -cl->mbuffer)
619 toks = 1 - cl->mbuffer;
620
621 cl->tokens = toks;
622}
623
624static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
625{
626 long toks = diff + cl->ctokens;
627
628 if (toks > cl->cbuffer)
629 toks = cl->cbuffer;
630 toks -= (long) qdisc_l2t(cl->ceil, bytes);
631 if (toks <= -cl->mbuffer)
632 toks = 1 - cl->mbuffer;
633
634 cl->ctokens = toks;
635}
636
637/**
638 * htb_charge_class - charges amount "bytes" to leaf and ancestors
639 *
640 * Routine assumes that packet "bytes" long was dequeued from leaf cl
641 * borrowing from "level". It accounts bytes to ceil leaky bucket for
642 * leaf and all ancestors and to rate bucket for ancestors at levels
643 * "level" and higher. It also handles possible change of mode resulting
644 * from the update. Note that mode can also increase here (MAY_BORROW to
645 * CAN_SEND) because we can use more precise clock that event queue here.
646 * In such case we remove class from event queue first.
647 */
648static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
649 int level, struct sk_buff *skb)
650{
651 int bytes = qdisc_pkt_len(skb);
652 enum htb_cmode old_mode;
653 long diff;
654
655 while (cl) {
656 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
657 if (cl->level >= level) {
658 if (cl->level == level)
659 cl->xstats.lends++;
660 htb_accnt_tokens(cl, bytes, diff);
661 } else {
662 cl->xstats.borrows++;
663 cl->tokens += diff; /* we moved t_c; update tokens */
664 }
665 htb_accnt_ctokens(cl, bytes, diff);
666 cl->t_c = q->now;
667
668 old_mode = cl->cmode;
669 diff = 0;
670 htb_change_class_mode(q, cl, &diff);
671 if (old_mode != cl->cmode) {
672 if (old_mode != HTB_CAN_SEND)
673 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
674 if (cl->cmode != HTB_CAN_SEND)
675 htb_add_to_wait_tree(q, cl, diff);
676 }
677
678 /* update basic stats except for leaves which are already updated */
679 if (cl->level)
680 bstats_update(&cl->bstats, skb);
681
682 cl = cl->parent;
683 }
684}
685
686/**
687 * htb_do_events - make mode changes to classes at the level
688 *
689 * Scans event queue for pending events and applies them. Returns time of
690 * next pending event (0 for no event in pq, q->now for too many events).
691 * Note: Applied are events whose have cl->pq_key <= q->now.
692 */
693static psched_time_t htb_do_events(struct htb_sched *q, int level,
694 unsigned long start)
695{
696 /* don't run for longer than 2 jiffies; 2 is used instead of
697 * 1 to simplify things when jiffy is going to be incremented
698 * too soon
699 */
700 unsigned long stop_at = start + 2;
701 while (time_before(jiffies, stop_at)) {
702 struct htb_class *cl;
703 long diff;
704 struct rb_node *p = rb_first(&q->wait_pq[level]);
705
706 if (!p)
707 return 0;
708
709 cl = rb_entry(p, struct htb_class, pq_node);
710 if (cl->pq_key > q->now)
711 return cl->pq_key;
712
713 htb_safe_rb_erase(p, q->wait_pq + level);
714 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
715 htb_change_class_mode(q, cl, &diff);
716 if (cl->cmode != HTB_CAN_SEND)
717 htb_add_to_wait_tree(q, cl, diff);
718 }
719
720 /* too much load - let's continue after a break for scheduling */
721 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
722 pr_warning("htb: too many events!\n");
723 q->warned |= HTB_WARN_TOOMANYEVENTS;
724 }
725
726 return q->now;
727}
728
729/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
730 * is no such one exists.
731 */
732static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
733 u32 id)
734{
735 struct rb_node *r = NULL;
736 while (n) {
737 struct htb_class *cl =
738 rb_entry(n, struct htb_class, node[prio]);
739
740 if (id > cl->common.classid) {
741 n = n->rb_right;
742 } else if (id < cl->common.classid) {
743 r = n;
744 n = n->rb_left;
745 } else {
746 return n;
747 }
748 }
749 return r;
750}
751
752/**
753 * htb_lookup_leaf - returns next leaf class in DRR order
754 *
755 * Find leaf where current feed pointers points to.
756 */
757static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
758 struct rb_node **pptr, u32 * pid)
759{
760 int i;
761 struct {
762 struct rb_node *root;
763 struct rb_node **pptr;
764 u32 *pid;
765 } stk[TC_HTB_MAXDEPTH], *sp = stk;
766
767 BUG_ON(!tree->rb_node);
768 sp->root = tree->rb_node;
769 sp->pptr = pptr;
770 sp->pid = pid;
771
772 for (i = 0; i < 65535; i++) {
773 if (!*sp->pptr && *sp->pid) {
774 /* ptr was invalidated but id is valid - try to recover
775 * the original or next ptr
776 */
777 *sp->pptr =
778 htb_id_find_next_upper(prio, sp->root, *sp->pid);
779 }
780 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
781 * can become out of date quickly
782 */
783 if (!*sp->pptr) { /* we are at right end; rewind & go up */
784 *sp->pptr = sp->root;
785 while ((*sp->pptr)->rb_left)
786 *sp->pptr = (*sp->pptr)->rb_left;
787 if (sp > stk) {
788 sp--;
789 if (!*sp->pptr) {
790 WARN_ON(1);
791 return NULL;
792 }
793 htb_next_rb_node(sp->pptr);
794 }
795 } else {
796 struct htb_class *cl;
797 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
798 if (!cl->level)
799 return cl;
800 (++sp)->root = cl->un.inner.feed[prio].rb_node;
801 sp->pptr = cl->un.inner.ptr + prio;
802 sp->pid = cl->un.inner.last_ptr_id + prio;
803 }
804 }
805 WARN_ON(1);
806 return NULL;
807}
808
809/* dequeues packet at given priority and level; call only if
810 * you are sure that there is active class at prio/level
811 */
812static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
813 int level)
814{
815 struct sk_buff *skb = NULL;
816 struct htb_class *cl, *start;
817 /* look initial class up in the row */
818 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
819 q->ptr[level] + prio,
820 q->last_ptr_id[level] + prio);
821
822 do {
823next:
824 if (unlikely(!cl))
825 return NULL;
826
827 /* class can be empty - it is unlikely but can be true if leaf
828 * qdisc drops packets in enqueue routine or if someone used
829 * graft operation on the leaf since last dequeue;
830 * simply deactivate and skip such class
831 */
832 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
833 struct htb_class *next;
834 htb_deactivate(q, cl);
835
836 /* row/level might become empty */
837 if ((q->row_mask[level] & (1 << prio)) == 0)
838 return NULL;
839
840 next = htb_lookup_leaf(q->row[level] + prio,
841 prio, q->ptr[level] + prio,
842 q->last_ptr_id[level] + prio);
843
844 if (cl == start) /* fix start if we just deleted it */
845 start = next;
846 cl = next;
847 goto next;
848 }
849
850 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
851 if (likely(skb != NULL))
852 break;
853
854 qdisc_warn_nonwc("htb", cl->un.leaf.q);
855 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
856 ptr[0]) + prio);
857 cl = htb_lookup_leaf(q->row[level] + prio, prio,
858 q->ptr[level] + prio,
859 q->last_ptr_id[level] + prio);
860
861 } while (cl != start);
862
863 if (likely(skb != NULL)) {
864 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
865 if (cl->un.leaf.deficit[level] < 0) {
866 cl->un.leaf.deficit[level] += cl->quantum;
867 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
868 ptr[0]) + prio);
869 }
870 /* this used to be after charge_class but this constelation
871 * gives us slightly better performance
872 */
873 if (!cl->un.leaf.q->q.qlen)
874 htb_deactivate(q, cl);
875 htb_charge_class(q, cl, level, skb);
876 }
877 return skb;
878}
879
880static struct sk_buff *htb_dequeue(struct Qdisc *sch)
881{
882 struct sk_buff *skb;
883 struct htb_sched *q = qdisc_priv(sch);
884 int level;
885 psched_time_t next_event;
886 unsigned long start_at;
887 u32 r, i;
888 struct sk_buff *pkt;
889
890 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
891 skb = __skb_dequeue(&q->direct_queue);
892 if (skb != NULL) {
893ok:
894 qdisc_bstats_update(sch, skb);
895 qdisc_unthrottled(sch);
896 sch->q.qlen--;
897#if OFBUF
898 if(q->ofbuf_queued > 0) {
899 i = 0;
900 r = net_random() % q->ofbuf_queued;
901 // enqueue the rth packet and drop the rest
902 while((pkt = __skb_dequeue(&q->ofbuf)) != NULL) {
903 if(i == r) {
904 // the chosen one
905 htb_enqueue(pkt, sch);
906 } else {
907 kfree_skb(pkt);
908 }
909 i++;
910 }
911 q->ofbuf_queued = 0;
912 }
913#endif
914 return skb;
915 }
916
917 if (!sch->q.qlen)
918 goto fin;
919 q->now = psched_get_time();
920 start_at = jiffies;
921
922 next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
923
924 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
925 /* common case optimization - skip event handler quickly */
926 int m;
927 psched_time_t event;
928
929 if (q->now >= q->near_ev_cache[level]) {
930 event = htb_do_events(q, level, start_at);
931 if (!event)
932 event = q->now + PSCHED_TICKS_PER_SEC;
933 q->near_ev_cache[level] = event;
934 } else
935 event = q->near_ev_cache[level];
936
937 if (next_event > event)
938 next_event = event;
939
940 m = ~q->row_mask[level];
941 while (m != (int)(-1)) {
942 int prio = ffz(m);
943
944 m |= 1 << prio;
945 skb = htb_dequeue_tree(q, prio, level);
946 if (likely(skb != NULL))
947 goto ok;
948 }
949 }
950 sch->qstats.overlimits++;
951 if (likely(next_event > q->now))
952 qdisc_watchdog_schedule(&q->watchdog, next_event);
953 else
954 schedule_work(&q->work);
955fin:
956 return skb;
957}
958
959/* try to drop from each class (by prio) until one succeed */
960static unsigned int htb_drop(struct Qdisc *sch)
961{
962 struct htb_sched *q = qdisc_priv(sch);
963 int prio;
964
965 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
966 struct list_head *p;
967 list_for_each(p, q->drops + prio) {
968 struct htb_class *cl = list_entry(p, struct htb_class,
969 un.leaf.drop_list);
970 unsigned int len;
971 if (cl->un.leaf.q->ops->drop &&
972 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
973 sch->q.qlen--;
974 if (!cl->un.leaf.q->q.qlen)
975 htb_deactivate(q, cl);
976 return len;
977 }
978 }
979 }
980 return 0;
981}
982
983/* reset all classes */
984/* always caled under BH & queue lock */
985static void htb_reset(struct Qdisc *sch)
986{
987 struct htb_sched *q = qdisc_priv(sch);
988 struct htb_class *cl;
989 struct hlist_node *n;
990 unsigned int i;
991
992 for (i = 0; i < q->clhash.hashsize; i++) {
993 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
994 if (cl->level)
995 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
996 else {
997 if (cl->un.leaf.q)
998 qdisc_reset(cl->un.leaf.q);
999 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1000 }
1001 cl->prio_activity = 0;
1002 cl->cmode = HTB_CAN_SEND;
1003
1004 }
1005 }
1006 qdisc_watchdog_cancel(&q->watchdog);
1007 __skb_queue_purge(&q->direct_queue);
1008 sch->q.qlen = 0;
1009#if OFBUF
1010 __skb_queue_purge(&q->ofbuf);
1011 q->ofbuf_queued = 0;
1012#endif
1013 memset(q->row, 0, sizeof(q->row));
1014 memset(q->row_mask, 0, sizeof(q->row_mask));
1015 memset(q->wait_pq, 0, sizeof(q->wait_pq));
1016 memset(q->ptr, 0, sizeof(q->ptr));
1017 for (i = 0; i < TC_HTB_NUMPRIO; i++)
1018 INIT_LIST_HEAD(q->drops + i);
1019}
1020
1021static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1022 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1023 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
1024 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1025 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1026};
1027
1028static void htb_work_func(struct work_struct *work)
1029{
1030 struct htb_sched *q = container_of(work, struct htb_sched, work);
1031 struct Qdisc *sch = q->watchdog.qdisc;
1032
1033 __netif_schedule(qdisc_root(sch));
1034}
1035
1036static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1037{
1038 struct htb_sched *q = qdisc_priv(sch);
1039 struct nlattr *tb[TCA_HTB_INIT + 1];
1040 struct tc_htb_glob *gopt;
1041 int err;
1042 int i;
1043
1044 if (!opt)
1045 return -EINVAL;
1046
1047 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
1048 if (err < 0)
1049 return err;
1050
1051 if (tb[TCA_HTB_INIT] == NULL) {
1052 pr_err("HTB: hey probably you have bad tc tool ?\n");
1053 return -EINVAL;
1054 }
1055 gopt = nla_data(tb[TCA_HTB_INIT]);
1056 if (gopt->version != HTB_VER >> 16) {
1057 pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
1058 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1059 return -EINVAL;
1060 }
1061
1062 err = qdisc_class_hash_init(&q->clhash);
1063 if (err < 0)
1064 return err;
1065 for (i = 0; i < TC_HTB_NUMPRIO; i++)
1066 INIT_LIST_HEAD(q->drops + i);
1067
1068 qdisc_watchdog_init(&q->watchdog, sch);
1069 INIT_WORK(&q->work, htb_work_func);
1070 skb_queue_head_init(&q->direct_queue);
1071
1072#if OFBUF
1073 skb_queue_head_init(&q->ofbuf);
1074 q->ofbuf_queued = 0;
1075#endif
1076
1077 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1078
1079 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1080 q->direct_qlen = 2;
1081
1082 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1083 q->rate2quantum = 1;
1084 q->defcls = gopt->defcls;
1085
1086 return 0;
1087}
1088
1089static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1090{
1091 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1092 struct htb_sched *q = qdisc_priv(sch);
1093 struct nlattr *nest;
1094 struct tc_htb_glob gopt;
1095
1096 spin_lock_bh(root_lock);
1097
1098 gopt.direct_pkts = q->direct_pkts;
1099 gopt.version = HTB_VER;
1100 gopt.rate2quantum = q->rate2quantum;
1101 gopt.defcls = q->defcls;
1102 gopt.debug = 0;
1103
1104 nest = nla_nest_start(skb, TCA_OPTIONS);
1105 if (nest == NULL)
1106 goto nla_put_failure;
1107 NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1108 nla_nest_end(skb, nest);
1109
1110 spin_unlock_bh(root_lock);
1111 return skb->len;
1112
1113nla_put_failure:
1114 spin_unlock_bh(root_lock);
1115 nla_nest_cancel(skb, nest);
1116 return -1;
1117}
1118
1119static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1120 struct sk_buff *skb, struct tcmsg *tcm)
1121{
1122 struct htb_class *cl = (struct htb_class *)arg;
1123 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1124 struct nlattr *nest;
1125 struct tc_htb_opt opt;
1126
1127 spin_lock_bh(root_lock);
1128 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1129 tcm->tcm_handle = cl->common.classid;
1130 if (!cl->level && cl->un.leaf.q)
1131 tcm->tcm_info = cl->un.leaf.q->handle;
1132
1133 nest = nla_nest_start(skb, TCA_OPTIONS);
1134 if (nest == NULL)
1135 goto nla_put_failure;
1136
1137 memset(&opt, 0, sizeof(opt));
1138
1139 opt.rate = cl->rate->rate;
1140 opt.buffer = cl->buffer;
1141 opt.ceil = cl->ceil->rate;
1142 opt.cbuffer = cl->cbuffer;
1143 opt.quantum = cl->quantum;
1144 opt.prio = cl->prio;
1145 opt.level = cl->level;
1146 NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1147
1148 nla_nest_end(skb, nest);
1149 spin_unlock_bh(root_lock);
1150 return skb->len;
1151
1152nla_put_failure:
1153 spin_unlock_bh(root_lock);
1154 nla_nest_cancel(skb, nest);
1155 return -1;
1156}
1157
1158static int
1159htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1160{
1161 struct htb_class *cl = (struct htb_class *)arg;
1162
1163 if (!cl->level && cl->un.leaf.q)
1164 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1165 cl->xstats.tokens = cl->tokens;
1166 cl->xstats.ctokens = cl->ctokens;
1167
1168 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1169 gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
1170 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1171 return -1;
1172
1173 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1174}
1175
1176static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1177 struct Qdisc **old)
1178{
1179 struct htb_class *cl = (struct htb_class *)arg;
1180
1181 if (cl->level)
1182 return -EINVAL;
1183 if (new == NULL &&
1184 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1185 cl->common.classid)) == NULL)
1186 return -ENOBUFS;
1187
1188 sch_tree_lock(sch);
1189 *old = cl->un.leaf.q;
1190 cl->un.leaf.q = new;
1191 if (*old != NULL) {
1192 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1193 qdisc_reset(*old);
1194 }
1195 sch_tree_unlock(sch);
1196 return 0;
1197}
1198
1199static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1200{
1201 struct htb_class *cl = (struct htb_class *)arg;
1202 return !cl->level ? cl->un.leaf.q : NULL;
1203}
1204
1205static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1206{
1207 struct htb_class *cl = (struct htb_class *)arg;
1208
1209 if (cl->un.leaf.q->q.qlen == 0)
1210 htb_deactivate(qdisc_priv(sch), cl);
1211}
1212
1213static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1214{
1215 struct htb_class *cl = htb_find(classid, sch);
1216 if (cl)
1217 cl->refcnt++;
1218 return (unsigned long)cl;
1219}
1220
1221static inline int htb_parent_last_child(struct htb_class *cl)
1222{
1223 if (!cl->parent)
1224 /* the root class */
1225 return 0;
1226 if (cl->parent->children > 1)
1227 /* not the last child */
1228 return 0;
1229 return 1;
1230}
1231
1232static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1233 struct Qdisc *new_q)
1234{
1235 struct htb_class *parent = cl->parent;
1236
1237 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
1238
1239 if (parent->cmode != HTB_CAN_SEND)
1240 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1241
1242 parent->level = 0;
1243 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1244 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1245 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1246 parent->tokens = parent->buffer;
1247 parent->ctokens = parent->cbuffer;
1248 parent->t_c = psched_get_time();
1249 parent->cmode = HTB_CAN_SEND;
1250}
1251
1252static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1253{
1254 if (!cl->level) {
1255 WARN_ON(!cl->un.leaf.q);
1256 qdisc_destroy(cl->un.leaf.q);
1257 }
1258 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1259 qdisc_put_rtab(cl->rate);
1260 qdisc_put_rtab(cl->ceil);
1261
1262 tcf_destroy_chain(&cl->filter_list);
1263 kfree(cl);
1264}
1265
1266static void htb_destroy(struct Qdisc *sch)
1267{
1268 struct htb_sched *q = qdisc_priv(sch);
1269 struct hlist_node *n, *next;
1270 struct htb_class *cl;
1271 unsigned int i;
1272
1273 cancel_work_sync(&q->work);
1274 qdisc_watchdog_cancel(&q->watchdog);
1275 /* This line used to be after htb_destroy_class call below
1276 * and surprisingly it worked in 2.4. But it must precede it
1277 * because filter need its target class alive to be able to call
1278 * unbind_filter on it (without Oops).
1279 */
1280 tcf_destroy_chain(&q->filter_list);
1281
1282 for (i = 0; i < q->clhash.hashsize; i++) {
1283 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
1284 tcf_destroy_chain(&cl->filter_list);
1285 }
1286 for (i = 0; i < q->clhash.hashsize; i++) {
1287 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1288 common.hnode)
1289 htb_destroy_class(sch, cl);
1290 }
1291 qdisc_class_hash_destroy(&q->clhash);
1292 __skb_queue_purge(&q->direct_queue);
1293#if OFBUF
1294 __skb_queue_purge(&q->ofbuf);
1295 q->ofbuf_queued = 0;
1296#endif
1297}
1298
1299static int htb_delete(struct Qdisc *sch, unsigned long arg)
1300{
1301 struct htb_sched *q = qdisc_priv(sch);
1302 struct htb_class *cl = (struct htb_class *)arg;
1303 unsigned int qlen;
1304 struct Qdisc *new_q = NULL;
1305 int last_child = 0;
1306
1307 // TODO: why don't allow to delete subtree ? references ? does
1308 // tc subsys quarantee us that in htb_destroy it holds no class
1309 // refs so that we can remove children safely there ?
1310 if (cl->children || cl->filter_cnt)
1311 return -EBUSY;
1312
1313 if (!cl->level && htb_parent_last_child(cl)) {
1314 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1315 cl->parent->common.classid);
1316 last_child = 1;
1317 }
1318
1319 sch_tree_lock(sch);
1320
1321 if (!cl->level) {
1322 qlen = cl->un.leaf.q->q.qlen;
1323 qdisc_reset(cl->un.leaf.q);
1324 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
1325 }
1326
1327 /* delete from hash and active; remainder in destroy_class */
1328 qdisc_class_hash_remove(&q->clhash, &cl->common);
1329 if (cl->parent)
1330 cl->parent->children--;
1331
1332 if (cl->prio_activity)
1333 htb_deactivate(q, cl);
1334
1335 if (cl->cmode != HTB_CAN_SEND)
1336 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1337
1338 if (last_child)
1339 htb_parent_to_leaf(q, cl, new_q);
1340
1341 BUG_ON(--cl->refcnt == 0);
1342 /*
1343 * This shouldn't happen: we "hold" one cops->get() when called
1344 * from tc_ctl_tclass; the destroy method is done from cops->put().
1345 */
1346
1347 sch_tree_unlock(sch);
1348 return 0;
1349}
1350
1351static void htb_put(struct Qdisc *sch, unsigned long arg)
1352{
1353 struct htb_class *cl = (struct htb_class *)arg;
1354
1355 if (--cl->refcnt == 0)
1356 htb_destroy_class(sch, cl);
1357}
1358
1359static int htb_change_class(struct Qdisc *sch, u32 classid,
1360 u32 parentid, struct nlattr **tca,
1361 unsigned long *arg)
1362{
1363 int err = -EINVAL;
1364 struct htb_sched *q = qdisc_priv(sch);
1365 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1366 struct nlattr *opt = tca[TCA_OPTIONS];
1367 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1368 struct nlattr *tb[__TCA_HTB_MAX];
1369 struct tc_htb_opt *hopt;
1370
1371 /* extract all subattrs from opt attr */
1372 if (!opt)
1373 goto failure;
1374
1375 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
1376 if (err < 0)
1377 goto failure;
1378
1379 err = -EINVAL;
1380 if (tb[TCA_HTB_PARMS] == NULL)
1381 goto failure;
1382
1383 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1384
1385 hopt = nla_data(tb[TCA_HTB_PARMS]);
1386
1387 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1388 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1389 if (!rtab || !ctab)
1390 goto failure;
1391
1392 if (!cl) { /* new class */
1393 struct Qdisc *new_q;
1394 int prio;
1395 struct {
1396 struct nlattr nla;
1397 struct gnet_estimator opt;
1398 } est = {
1399 .nla = {
1400 .nla_len = nla_attr_size(sizeof(est.opt)),
1401 .nla_type = TCA_RATE,
1402 },
1403 .opt = {
1404 /* 4s interval, 16s averaging constant */
1405 .interval = 2,
1406 .ewma_log = 2,
1407 },
1408 };
1409
1410 /* check for valid classid */
1411 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1412 htb_find(classid, sch))
1413 goto failure;
1414
1415 /* check maximal depth */
1416 if (parent && parent->parent && parent->parent->level < 2) {
1417 pr_err("htb: tree is too deep\n");
1418 goto failure;
1419 }
1420 err = -ENOBUFS;
1421 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1422 if (!cl)
1423 goto failure;
1424
1425 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1426 qdisc_root_sleeping_lock(sch),
1427 tca[TCA_RATE] ? : &est.nla);
1428 if (err) {
1429 kfree(cl);
1430 goto failure;
1431 }
1432
1433 cl->refcnt = 1;
1434 cl->children = 0;
1435 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1436 RB_CLEAR_NODE(&cl->pq_node);
1437
1438 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1439 RB_CLEAR_NODE(&cl->node[prio]);
1440
1441 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1442 * so that can't be used inside of sch_tree_lock
1443 * -- thanks to Karlis Peisenieks
1444 */
1445 new_q = qdisc_create_dflt(sch->dev_queue,
1446 &pfifo_qdisc_ops, classid);
1447 sch_tree_lock(sch);
1448 if (parent && !parent->level) {
1449 unsigned int qlen = parent->un.leaf.q->q.qlen;
1450
1451 /* turn parent into inner node */
1452 qdisc_reset(parent->un.leaf.q);
1453 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
1454 qdisc_destroy(parent->un.leaf.q);
1455 if (parent->prio_activity)
1456 htb_deactivate(q, parent);
1457
1458 /* remove from evt list because of level change */
1459 if (parent->cmode != HTB_CAN_SEND) {
1460 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1461 parent->cmode = HTB_CAN_SEND;
1462 }
1463 parent->level = (parent->parent ? parent->parent->level
1464 : TC_HTB_MAXDEPTH) - 1;
1465 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1466 }
1467 /* leaf (we) needs elementary qdisc */
1468 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1469
1470 cl->common.classid = classid;
1471 cl->parent = parent;
1472
1473 /* set class to be in HTB_CAN_SEND state */
1474 cl->tokens = hopt->buffer;
1475 cl->ctokens = hopt->cbuffer;
1476 cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
1477 cl->t_c = psched_get_time();
1478 cl->cmode = HTB_CAN_SEND;
1479
1480 /* attach to the hash list and parent's family */
1481 qdisc_class_hash_insert(&q->clhash, &cl->common);
1482 if (parent)
1483 parent->children++;
1484 } else {
1485 if (tca[TCA_RATE]) {
1486 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1487 qdisc_root_sleeping_lock(sch),
1488 tca[TCA_RATE]);
1489 if (err)
1490 return err;
1491 }
1492 sch_tree_lock(sch);
1493 }
1494
1495 /* it used to be a nasty bug here, we have to check that node
1496 * is really leaf before changing cl->un.leaf !
1497 */
1498 if (!cl->level) {
1499 cl->quantum = rtab->rate.rate / q->rate2quantum;
1500 if (!hopt->quantum && cl->quantum < 1000) {
1501 pr_warning(
1502 "HTB: quantum of class %X is small. Consider r2q change.\n",
1503 cl->common.classid);
1504 cl->quantum = 1000;
1505 }
1506 if (!hopt->quantum && cl->quantum > 200000) {
1507 pr_warning(
1508 "HTB: quantum of class %X is big. Consider r2q change.\n",
1509 cl->common.classid);
1510 cl->quantum = 200000;
1511 }
1512 if (hopt->quantum)
1513 cl->quantum = hopt->quantum;
1514 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1515 cl->prio = TC_HTB_NUMPRIO - 1;
1516 }
1517
1518 cl->buffer = hopt->buffer;
1519 cl->cbuffer = hopt->cbuffer;
1520 if (cl->rate)
1521 qdisc_put_rtab(cl->rate);
1522 cl->rate = rtab;
1523 if (cl->ceil)
1524 qdisc_put_rtab(cl->ceil);
1525 cl->ceil = ctab;
1526 sch_tree_unlock(sch);
1527
1528 qdisc_class_hash_grow(sch, &q->clhash);
1529
1530 *arg = (unsigned long)cl;
1531 return 0;
1532
1533failure:
1534 if (rtab)
1535 qdisc_put_rtab(rtab);
1536 if (ctab)
1537 qdisc_put_rtab(ctab);
1538 return err;
1539}
1540
1541static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1542{
1543 struct htb_sched *q = qdisc_priv(sch);
1544 struct htb_class *cl = (struct htb_class *)arg;
1545 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1546
1547 return fl;
1548}
1549
1550static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1551 u32 classid)
1552{
1553 struct htb_class *cl = htb_find(classid, sch);
1554
1555 /*if (cl && !cl->level) return 0;
1556 * The line above used to be there to prevent attaching filters to
1557 * leaves. But at least tc_index filter uses this just to get class
1558 * for other reasons so that we have to allow for it.
1559 * ----
1560 * 19.6.2002 As Werner explained it is ok - bind filter is just
1561 * another way to "lock" the class - unlike "get" this lock can
1562 * be broken by class during destroy IIUC.
1563 */
1564 if (cl)
1565 cl->filter_cnt++;
1566 return (unsigned long)cl;
1567}
1568
1569static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1570{
1571 struct htb_class *cl = (struct htb_class *)arg;
1572
1573 if (cl)
1574 cl->filter_cnt--;
1575}
1576
1577static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1578{
1579 struct htb_sched *q = qdisc_priv(sch);
1580 struct htb_class *cl;
1581 struct hlist_node *n;
1582 unsigned int i;
1583
1584 if (arg->stop)
1585 return;
1586
1587 for (i = 0; i < q->clhash.hashsize; i++) {
1588 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1589 if (arg->count < arg->skip) {
1590 arg->count++;
1591 continue;
1592 }
1593 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1594 arg->stop = 1;
1595 return;
1596 }
1597 arg->count++;
1598 }
1599 }
1600}
1601
1602static const struct Qdisc_class_ops htb_class_ops = {
1603 .graft = htb_graft,
1604 .leaf = htb_leaf,
1605 .qlen_notify = htb_qlen_notify,
1606 .get = htb_get,
1607 .put = htb_put,
1608 .change = htb_change_class,
1609 .delete = htb_delete,
1610 .walk = htb_walk,
1611 .tcf_chain = htb_find_tcf,
1612 .bind_tcf = htb_bind_filter,
1613 .unbind_tcf = htb_unbind_filter,
1614 .dump = htb_dump_class,
1615 .dump_stats = htb_dump_class_stats,
1616};
1617
1618static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1619 .cl_ops = &htb_class_ops,
1620 .id = "htb",
1621 .priv_size = sizeof(struct htb_sched),
1622 .enqueue = htb_enqueue,
1623 .dequeue = htb_dequeue,
1624 .peek = qdisc_peek_dequeued,
1625 .drop = htb_drop,
1626 .init = htb_init,
1627 .reset = htb_reset,
1628 .destroy = htb_destroy,
1629 .dump = htb_dump,
1630 .owner = THIS_MODULE,
1631};
1632
1633static int __init htb_module_init(void)
1634{
1635 return register_qdisc(&htb_qdisc_ops);
1636}
1637static void __exit htb_module_exit(void)
1638{
1639 unregister_qdisc(&htb_qdisc_ops);
1640}
1641
1642module_init(htb_module_init)
1643module_exit(htb_module_exit)
1644MODULE_LICENSE("GPL");