2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
23 /* Class-Based Queueing (CBQ) algorithm.
24 =======================================
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
27 Management Models for Packet Networks",
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
38 -----------------------------------------------------------------------
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
45 --- The WRR algorithm is different. Our version looks more
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
71 struct cbq_sched_data;
76 struct Qdisc_class_common common;
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
80 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
84 #ifdef CONFIG_NET_CLS_ACT
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
95 struct qdisc_rate_table *R_tab;
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
99 psched_tdiff_t penalty;
101 /* General scheduler (WRR) parameters */
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
115 struct Qdisc *q; /* Elementary queueing discipline */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
129 long deficit; /* Saved deficit for WRR */
130 psched_time_t penalized;
131 struct gnet_stats_basic bstats;
132 struct gnet_stats_queue qstats;
133 struct gnet_stats_rate_est rate_est;
134 struct tc_cbq_xstats xstats;
136 struct tcf_proto *filter_list;
141 struct cbq_class *defaults[TC_PRIO_MAX+1];
144 struct cbq_sched_data
146 struct Qdisc_class_hash clhash; /* Hash table of all classes */
147 int nclasses[TC_CBQ_MAXPRIO+1];
148 unsigned quanta[TC_CBQ_MAXPRIO+1];
150 struct cbq_class link;
153 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
156 #ifdef CONFIG_NET_CLS_ACT
157 struct cbq_class *rx_class;
159 struct cbq_class *tx_class;
160 struct cbq_class *tx_borrowed;
162 psched_time_t now; /* Cached timestamp */
163 psched_time_t now_rt; /* Cached real time */
166 struct hrtimer delay_timer;
167 struct qdisc_watchdog watchdog; /* Watchdog timer,
171 psched_tdiff_t wd_expires;
177 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
179 static __inline__ struct cbq_class *
180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
182 struct Qdisc_class_common *clc;
184 clc = qdisc_class_find(&q->clhash, classid);
187 return container_of(clc, struct cbq_class, common);
190 #ifdef CONFIG_NET_CLS_ACT
192 static struct cbq_class *
193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
195 struct cbq_class *cl, *new;
197 for (cl = this->tparent; cl; cl = cl->tparent)
198 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
206 /* Classify packet. The procedure is pretty complicated, but
207 it allows us to combine link sharing and priority scheduling
210 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 so that it resolves to split nodes. Then packets are classified
212 by logical priority, or a more specific classifier may be attached
216 static struct cbq_class *
217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
224 struct tcf_result res;
227 * Step 1. If skb->priority points to one of our classes, use it.
229 if (TC_H_MAJ(prio^sch->handle) == 0 &&
230 (cl = cbq_class_lookup(q, prio)) != NULL)
233 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
236 defmap = head->defaults;
239 * Step 2+n. Apply classifier.
241 if (!head->filter_list ||
242 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
245 if ((cl = (void*)res.class) == NULL) {
246 if (TC_H_MAJ(res.classid))
247 cl = cbq_class_lookup(q, res.classid);
248 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
249 cl = defmap[TC_PRIO_BESTEFFORT];
251 if (cl == NULL || cl->level >= head->level)
255 #ifdef CONFIG_NET_CLS_ACT
259 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
262 case TC_ACT_RECLASSIFY:
263 return cbq_reclassify(skb, cl);
270 * Step 3+n. If classifier selected a link sharing class,
271 * apply agency specific classifier.
272 * Repeat this procdure until we hit a leaf node.
281 * Step 4. No success...
283 if (TC_H_MAJ(prio) == 0 &&
284 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
285 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
292 A packet has just been enqueued on the empty class.
293 cbq_activate_class adds it to the tail of active class list
294 of its priority band.
297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
299 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
300 int prio = cl->cpriority;
301 struct cbq_class *cl_tail;
303 cl_tail = q->active[prio];
304 q->active[prio] = cl;
306 if (cl_tail != NULL) {
307 cl->next_alive = cl_tail->next_alive;
308 cl_tail->next_alive = cl;
311 q->activemask |= (1<<prio);
316 Unlink class from active chain.
317 Note that this same procedure is done directly in cbq_dequeue*
318 during round-robin procedure.
321 static void cbq_deactivate_class(struct cbq_class *this)
323 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
324 int prio = this->cpriority;
325 struct cbq_class *cl;
326 struct cbq_class *cl_prev = q->active[prio];
329 cl = cl_prev->next_alive;
331 cl_prev->next_alive = cl->next_alive;
332 cl->next_alive = NULL;
334 if (cl == q->active[prio]) {
335 q->active[prio] = cl_prev;
336 if (cl == q->active[prio]) {
337 q->active[prio] = NULL;
338 q->activemask &= ~(1<<prio);
344 } while ((cl_prev = cl) != q->active[prio]);
348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
350 int toplevel = q->toplevel;
352 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
356 now = psched_get_time();
357 incr = now - q->now_rt;
361 if (cl->undertime < now) {
362 q->toplevel = cl->level;
365 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
372 struct cbq_sched_data *q = qdisc_priv(sch);
373 int uninitialized_var(ret);
374 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
376 #ifdef CONFIG_NET_CLS_ACT
380 if (ret & __NET_XMIT_BYPASS)
386 #ifdef CONFIG_NET_CLS_ACT
387 cl->q->__parent = sch;
389 ret = qdisc_enqueue(skb, cl->q);
390 if (ret == NET_XMIT_SUCCESS) {
392 sch->bstats.packets++;
393 sch->bstats.bytes += qdisc_pkt_len(skb);
394 cbq_mark_toplevel(q, cl);
396 cbq_activate_class(cl);
400 if (net_xmit_drop_count(ret)) {
402 cbq_mark_toplevel(q, cl);
409 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
411 struct cbq_sched_data *q = qdisc_priv(sch);
412 struct cbq_class *cl;
415 if ((cl = q->tx_class) == NULL) {
422 cbq_mark_toplevel(q, cl);
424 #ifdef CONFIG_NET_CLS_ACT
426 cl->q->__parent = sch;
428 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
430 sch->qstats.requeues++;
432 cbq_activate_class(cl);
435 if (net_xmit_drop_count(ret)) {
442 /* Overlimit actions */
444 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
446 static void cbq_ovl_classic(struct cbq_class *cl)
448 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
449 psched_tdiff_t delay = cl->undertime - q->now;
452 delay += cl->offtime;
455 Class goes to sleep, so that it will have no
456 chance to work avgidle. Let's forgive it 8)
458 BTW cbq-2.0 has a crap in this
459 place, apparently they forgot to shift it by cl->ewma_log.
462 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
463 if (cl->avgidle < cl->minidle)
464 cl->avgidle = cl->minidle;
467 cl->undertime = q->now + delay;
469 cl->xstats.overactions++;
472 if (q->wd_expires == 0 || q->wd_expires > delay)
473 q->wd_expires = delay;
475 /* Dirty work! We must schedule wakeups based on
476 real available rate, rather than leaf rate,
477 which may be tiny (even zero).
479 if (q->toplevel == TC_CBQ_MAXLEVEL) {
481 psched_tdiff_t base_delay = q->wd_expires;
483 for (b = cl->borrow; b; b = b->borrow) {
484 delay = b->undertime - q->now;
485 if (delay < base_delay) {
492 q->wd_expires = base_delay;
496 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
500 static void cbq_ovl_rclassic(struct cbq_class *cl)
502 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
503 struct cbq_class *this = cl;
506 if (cl->level > q->toplevel) {
510 } while ((cl = cl->borrow) != NULL);
517 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
519 static void cbq_ovl_delay(struct cbq_class *cl)
521 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
522 psched_tdiff_t delay = cl->undertime - q->now;
524 if (test_bit(__QDISC_STATE_DEACTIVATED,
525 &qdisc_root_sleeping(cl->qdisc)->state))
529 psched_time_t sched = q->now;
532 delay += cl->offtime;
534 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
535 if (cl->avgidle < cl->minidle)
536 cl->avgidle = cl->minidle;
537 cl->undertime = q->now + delay;
540 sched += delay + cl->penalty;
541 cl->penalized = sched;
542 cl->cpriority = TC_CBQ_MAXPRIO;
543 q->pmask |= (1<<TC_CBQ_MAXPRIO);
545 expires = ktime_set(0, 0);
546 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
547 if (hrtimer_try_to_cancel(&q->delay_timer) &&
548 ktime_to_ns(ktime_sub(
549 hrtimer_get_expires(&q->delay_timer),
551 hrtimer_set_expires(&q->delay_timer, expires);
552 hrtimer_restart(&q->delay_timer);
554 cl->xstats.overactions++;
559 if (q->wd_expires == 0 || q->wd_expires > delay)
560 q->wd_expires = delay;
563 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
565 static void cbq_ovl_lowprio(struct cbq_class *cl)
567 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
569 cl->penalized = q->now + cl->penalty;
571 if (cl->cpriority != cl->priority2) {
572 cl->cpriority = cl->priority2;
573 q->pmask |= (1<<cl->cpriority);
574 cl->xstats.overactions++;
579 /* TC_CBQ_OVL_DROP: penalize class by dropping */
581 static void cbq_ovl_drop(struct cbq_class *cl)
583 if (cl->q->ops->drop)
584 if (cl->q->ops->drop(cl->q))
586 cl->xstats.overactions++;
590 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
593 struct cbq_class *cl;
594 struct cbq_class *cl_prev = q->active[prio];
595 psched_time_t sched = now;
601 cl = cl_prev->next_alive;
602 if (now - cl->penalized > 0) {
603 cl_prev->next_alive = cl->next_alive;
604 cl->next_alive = NULL;
605 cl->cpriority = cl->priority;
607 cbq_activate_class(cl);
609 if (cl == q->active[prio]) {
610 q->active[prio] = cl_prev;
611 if (cl == q->active[prio]) {
612 q->active[prio] = NULL;
617 cl = cl_prev->next_alive;
618 } else if (sched - cl->penalized > 0)
619 sched = cl->penalized;
620 } while ((cl_prev = cl) != q->active[prio]);
625 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
627 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
629 struct Qdisc *sch = q->watchdog.qdisc;
631 psched_tdiff_t delay = 0;
634 now = psched_get_time();
640 int prio = ffz(~pmask);
645 tmp = cbq_undelay_prio(q, prio, now);
648 if (tmp < delay || delay == 0)
656 time = ktime_set(0, 0);
657 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
658 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
661 sch->flags &= ~TCQ_F_THROTTLED;
662 __netif_schedule(qdisc_root(sch));
663 return HRTIMER_NORESTART;
666 #ifdef CONFIG_NET_CLS_ACT
667 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
669 struct Qdisc *sch = child->__parent;
670 struct cbq_sched_data *q = qdisc_priv(sch);
671 struct cbq_class *cl = q->rx_class;
675 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
678 cbq_mark_toplevel(q, cl);
681 cl->q->__parent = sch;
683 ret = qdisc_enqueue(skb, cl->q);
684 if (ret == NET_XMIT_SUCCESS) {
686 sch->bstats.packets++;
687 sch->bstats.bytes += qdisc_pkt_len(skb);
689 cbq_activate_class(cl);
692 if (net_xmit_drop_count(ret))
703 It is mission critical procedure.
705 We "regenerate" toplevel cutoff, if transmitting class
706 has backlog and it is not regulated. It is not part of
707 original CBQ description, but looks more reasonable.
708 Probably, it is wrong. This question needs further investigation.
711 static __inline__ void
712 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
713 struct cbq_class *borrowed)
715 if (cl && q->toplevel >= borrowed->level) {
716 if (cl->q->q.qlen > 1) {
718 if (borrowed->undertime == PSCHED_PASTPERFECT) {
719 q->toplevel = borrowed->level;
722 } while ((borrowed=borrowed->borrow) != NULL);
725 /* It is not necessary now. Uncommenting it
726 will save CPU cycles, but decrease fairness.
728 q->toplevel = TC_CBQ_MAXLEVEL;
734 cbq_update(struct cbq_sched_data *q)
736 struct cbq_class *this = q->tx_class;
737 struct cbq_class *cl = this;
742 for ( ; cl; cl = cl->share) {
743 long avgidle = cl->avgidle;
746 cl->bstats.packets++;
747 cl->bstats.bytes += len;
750 (now - last) is total time between packet right edges.
751 (last_pktlen/rate) is "virtual" busy time, so that
753 idle = (now - last) - last_pktlen/rate
756 idle = q->now - cl->last;
757 if ((unsigned long)idle > 128*1024*1024) {
758 avgidle = cl->maxidle;
760 idle -= L2T(cl, len);
762 /* true_avgidle := (1-W)*true_avgidle + W*idle,
763 where W=2^{-ewma_log}. But cl->avgidle is scaled:
764 cl->avgidle == true_avgidle/W,
767 avgidle += idle - (avgidle>>cl->ewma_log);
771 /* Overlimit or at-limit */
773 if (avgidle < cl->minidle)
774 avgidle = cl->minidle;
776 cl->avgidle = avgidle;
778 /* Calculate expected time, when this class
779 will be allowed to send.
781 (1-W)*true_avgidle + W*delay = 0, i.e.
782 idle = (1/W - 1)*(-true_avgidle)
784 idle = (1 - W)*(-cl->avgidle);
786 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
790 To maintain the rate allocated to the class,
791 we add to undertime virtual clock,
792 necessary to complete transmitted packet.
793 (len/phys_bandwidth has been already passed
794 to the moment of cbq_update)
797 idle -= L2T(&q->link, len);
798 idle += L2T(cl, len);
800 cl->undertime = q->now + idle;
804 cl->undertime = PSCHED_PASTPERFECT;
805 if (avgidle > cl->maxidle)
806 cl->avgidle = cl->maxidle;
808 cl->avgidle = avgidle;
813 cbq_update_toplevel(q, this, q->tx_borrowed);
816 static __inline__ struct cbq_class *
817 cbq_under_limit(struct cbq_class *cl)
819 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
820 struct cbq_class *this_cl = cl;
822 if (cl->tparent == NULL)
825 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
831 /* It is very suspicious place. Now overlimit
832 action is generated for not bounded classes
833 only if link is completely congested.
834 Though it is in agree with ancestor-only paradigm,
835 it looks very stupid. Particularly,
836 it means that this chunk of code will either
837 never be called or result in strong amplification
838 of burstiness. Dangerous, silly, and, however,
839 no another solution exists.
841 if ((cl = cl->borrow) == NULL) {
842 this_cl->qstats.overlimits++;
843 this_cl->overlimit(this_cl);
846 if (cl->level > q->toplevel)
848 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
854 static __inline__ struct sk_buff *
855 cbq_dequeue_prio(struct Qdisc *sch, int prio)
857 struct cbq_sched_data *q = qdisc_priv(sch);
858 struct cbq_class *cl_tail, *cl_prev, *cl;
862 cl_tail = cl_prev = q->active[prio];
863 cl = cl_prev->next_alive;
870 struct cbq_class *borrow = cl;
873 (borrow = cbq_under_limit(cl)) == NULL)
876 if (cl->deficit <= 0) {
877 /* Class exhausted its allotment per
878 this round. Switch to the next one.
881 cl->deficit += cl->quantum;
885 skb = cl->q->dequeue(cl->q);
887 /* Class did not give us any skb :-(
888 It could occur even if cl->q->q.qlen != 0
889 f.e. if cl->q == "tbf"
894 cl->deficit -= qdisc_pkt_len(skb);
896 q->tx_borrowed = borrow;
898 #ifndef CBQ_XSTATS_BORROWS_BYTES
899 borrow->xstats.borrows++;
900 cl->xstats.borrows++;
902 borrow->xstats.borrows += qdisc_pkt_len(skb);
903 cl->xstats.borrows += qdisc_pkt_len(skb);
906 q->tx_len = qdisc_pkt_len(skb);
908 if (cl->deficit <= 0) {
909 q->active[prio] = cl;
911 cl->deficit += cl->quantum;
916 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
917 /* Class is empty or penalized.
918 Unlink it from active chain.
920 cl_prev->next_alive = cl->next_alive;
921 cl->next_alive = NULL;
923 /* Did cl_tail point to it? */
928 /* Was it the last class in this band? */
931 q->active[prio] = NULL;
932 q->activemask &= ~(1<<prio);
934 cbq_activate_class(cl);
938 q->active[prio] = cl_tail;
941 cbq_activate_class(cl);
949 } while (cl_prev != cl_tail);
952 q->active[prio] = cl_prev;
957 static __inline__ struct sk_buff *
958 cbq_dequeue_1(struct Qdisc *sch)
960 struct cbq_sched_data *q = qdisc_priv(sch);
964 activemask = q->activemask&0xFF;
966 int prio = ffz(~activemask);
967 activemask &= ~(1<<prio);
968 skb = cbq_dequeue_prio(sch, prio);
975 static struct sk_buff *
976 cbq_dequeue(struct Qdisc *sch)
979 struct cbq_sched_data *q = qdisc_priv(sch);
983 now = psched_get_time();
984 incr = now - q->now_rt;
987 psched_tdiff_t incr2;
988 /* Time integrator. We calculate EOS time
989 by adding expected packet transmission time.
990 If real time is greater, we warp artificial clock,
993 cbq_time = max(real_time, work);
995 incr2 = L2T(&q->link, q->tx_len);
998 if ((incr -= incr2) < 0)
1007 skb = cbq_dequeue_1(sch);
1010 sch->flags &= ~TCQ_F_THROTTLED;
1014 /* All the classes are overlimit.
1018 1. Scheduler is empty.
1019 2. Toplevel cutoff inhibited borrowing.
1020 3. Root class is overlimit.
1022 Reset 2d and 3d conditions and retry.
1024 Note, that NS and cbq-2.0 are buggy, peeking
1025 an arbitrary class is appropriate for ancestor-only
1026 sharing, but not for toplevel algorithm.
1028 Our version is better, but slower, because it requires
1029 two passes, but it is unavoidable with top-level sharing.
1032 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1033 q->link.undertime == PSCHED_PASTPERFECT)
1036 q->toplevel = TC_CBQ_MAXLEVEL;
1037 q->link.undertime = PSCHED_PASTPERFECT;
1040 /* No packets in scheduler or nobody wants to give them to us :-(
1041 Sigh... start watchdog timer in the last case. */
1044 sch->qstats.overlimits++;
1046 qdisc_watchdog_schedule(&q->watchdog,
1047 now + q->wd_expires);
1052 /* CBQ class maintanance routines */
1054 static void cbq_adjust_levels(struct cbq_class *this)
1061 struct cbq_class *cl;
1063 if ((cl = this->children) != NULL) {
1065 if (cl->level > level)
1067 } while ((cl = cl->sibling) != this->children);
1069 this->level = level+1;
1070 } while ((this = this->tparent) != NULL);
1073 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1075 struct cbq_class *cl;
1076 struct hlist_node *n;
1079 if (q->quanta[prio] == 0)
1082 for (h = 0; h < q->clhash.hashsize; h++) {
1083 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1084 /* BUGGGG... Beware! This expression suffer of
1085 arithmetic overflows!
1087 if (cl->priority == prio) {
1088 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1091 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1092 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1093 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1099 static void cbq_sync_defmap(struct cbq_class *cl)
1101 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1102 struct cbq_class *split = cl->split;
1109 for (i=0; i<=TC_PRIO_MAX; i++) {
1110 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1111 split->defaults[i] = NULL;
1114 for (i=0; i<=TC_PRIO_MAX; i++) {
1115 int level = split->level;
1117 if (split->defaults[i])
1120 for (h = 0; h < q->clhash.hashsize; h++) {
1121 struct hlist_node *n;
1122 struct cbq_class *c;
1124 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1126 if (c->split == split && c->level < level &&
1128 split->defaults[i] = c;
1136 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1138 struct cbq_class *split = NULL;
1141 if ((split = cl->split) == NULL)
1143 splitid = split->common.classid;
1146 if (split == NULL || split->common.classid != splitid) {
1147 for (split = cl->tparent; split; split = split->tparent)
1148 if (split->common.classid == splitid)
1155 if (cl->split != split) {
1157 cbq_sync_defmap(cl);
1159 cl->defmap = def&mask;
1161 cl->defmap = (cl->defmap&~mask)|(def&mask);
1163 cbq_sync_defmap(cl);
1166 static void cbq_unlink_class(struct cbq_class *this)
1168 struct cbq_class *cl, **clp;
1169 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1171 qdisc_class_hash_remove(&q->clhash, &this->common);
1173 if (this->tparent) {
1182 } while ((cl = *clp) != this->sibling);
1184 if (this->tparent->children == this) {
1185 this->tparent->children = this->sibling;
1186 if (this->sibling == this)
1187 this->tparent->children = NULL;
1190 WARN_ON(this->sibling != this);
1194 static void cbq_link_class(struct cbq_class *this)
1196 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1197 struct cbq_class *parent = this->tparent;
1199 this->sibling = this;
1200 qdisc_class_hash_insert(&q->clhash, &this->common);
1205 if (parent->children == NULL) {
1206 parent->children = this;
1208 this->sibling = parent->children->sibling;
1209 parent->children->sibling = this;
1213 static unsigned int cbq_drop(struct Qdisc* sch)
1215 struct cbq_sched_data *q = qdisc_priv(sch);
1216 struct cbq_class *cl, *cl_head;
1220 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1221 if ((cl_head = q->active[prio]) == NULL)
1226 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1229 cbq_deactivate_class(cl);
1232 } while ((cl = cl->next_alive) != cl_head);
1238 cbq_reset(struct Qdisc* sch)
1240 struct cbq_sched_data *q = qdisc_priv(sch);
1241 struct cbq_class *cl;
1242 struct hlist_node *n;
1249 q->tx_borrowed = NULL;
1250 qdisc_watchdog_cancel(&q->watchdog);
1251 hrtimer_cancel(&q->delay_timer);
1252 q->toplevel = TC_CBQ_MAXLEVEL;
1253 q->now = psched_get_time();
1256 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1257 q->active[prio] = NULL;
1259 for (h = 0; h < q->clhash.hashsize; h++) {
1260 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1263 cl->next_alive = NULL;
1264 cl->undertime = PSCHED_PASTPERFECT;
1265 cl->avgidle = cl->maxidle;
1266 cl->deficit = cl->quantum;
1267 cl->cpriority = cl->priority;
1274 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1276 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1277 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1278 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1280 if (lss->change&TCF_CBQ_LSS_EWMA)
1281 cl->ewma_log = lss->ewma_log;
1282 if (lss->change&TCF_CBQ_LSS_AVPKT)
1283 cl->avpkt = lss->avpkt;
1284 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1285 cl->minidle = -(long)lss->minidle;
1286 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1287 cl->maxidle = lss->maxidle;
1288 cl->avgidle = lss->maxidle;
1290 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1291 cl->offtime = lss->offtime;
1295 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1297 q->nclasses[cl->priority]--;
1298 q->quanta[cl->priority] -= cl->weight;
1299 cbq_normalize_quanta(q, cl->priority);
1302 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1304 q->nclasses[cl->priority]++;
1305 q->quanta[cl->priority] += cl->weight;
1306 cbq_normalize_quanta(q, cl->priority);
1309 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1311 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1314 cl->allot = wrr->allot;
1316 cl->weight = wrr->weight;
1317 if (wrr->priority) {
1318 cl->priority = wrr->priority-1;
1319 cl->cpriority = cl->priority;
1320 if (cl->priority >= cl->priority2)
1321 cl->priority2 = TC_CBQ_MAXPRIO-1;
1328 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1330 switch (ovl->strategy) {
1331 case TC_CBQ_OVL_CLASSIC:
1332 cl->overlimit = cbq_ovl_classic;
1334 case TC_CBQ_OVL_DELAY:
1335 cl->overlimit = cbq_ovl_delay;
1337 case TC_CBQ_OVL_LOWPRIO:
1338 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1339 ovl->priority2-1 <= cl->priority)
1341 cl->priority2 = ovl->priority2-1;
1342 cl->overlimit = cbq_ovl_lowprio;
1344 case TC_CBQ_OVL_DROP:
1345 cl->overlimit = cbq_ovl_drop;
1347 case TC_CBQ_OVL_RCLASSIC:
1348 cl->overlimit = cbq_ovl_rclassic;
1353 cl->penalty = ovl->penalty;
1357 #ifdef CONFIG_NET_CLS_ACT
1358 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1360 cl->police = p->police;
1362 if (cl->q->handle) {
1363 if (p->police == TC_POLICE_RECLASSIFY)
1364 cl->q->reshape_fail = cbq_reshape_fail;
1366 cl->q->reshape_fail = NULL;
1372 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1374 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1378 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1379 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1380 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1381 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1382 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1383 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1384 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1385 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1388 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1390 struct cbq_sched_data *q = qdisc_priv(sch);
1391 struct nlattr *tb[TCA_CBQ_MAX + 1];
1392 struct tc_ratespec *r;
1395 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1399 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1402 r = nla_data(tb[TCA_CBQ_RATE]);
1404 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1407 err = qdisc_class_hash_init(&q->clhash);
1412 q->link.sibling = &q->link;
1413 q->link.common.classid = sch->handle;
1414 q->link.qdisc = sch;
1415 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1418 q->link.q = &noop_qdisc;
1420 q->link.priority = TC_CBQ_MAXPRIO-1;
1421 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1422 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1423 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1424 q->link.overlimit = cbq_ovl_classic;
1425 q->link.allot = psched_mtu(qdisc_dev(sch));
1426 q->link.quantum = q->link.allot;
1427 q->link.weight = q->link.R_tab->rate.rate;
1429 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1430 q->link.avpkt = q->link.allot/2;
1431 q->link.minidle = -0x7FFFFFFF;
1433 qdisc_watchdog_init(&q->watchdog, sch);
1434 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1435 q->delay_timer.function = cbq_undelay;
1436 q->toplevel = TC_CBQ_MAXLEVEL;
1437 q->now = psched_get_time();
1440 cbq_link_class(&q->link);
1442 if (tb[TCA_CBQ_LSSOPT])
1443 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1445 cbq_addprio(q, &q->link);
1449 qdisc_put_rtab(q->link.R_tab);
1453 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1455 unsigned char *b = skb_tail_pointer(skb);
1457 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1465 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1467 unsigned char *b = skb_tail_pointer(skb);
1468 struct tc_cbq_lssopt opt;
1471 if (cl->borrow == NULL)
1472 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1473 if (cl->share == NULL)
1474 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1475 opt.ewma_log = cl->ewma_log;
1476 opt.level = cl->level;
1477 opt.avpkt = cl->avpkt;
1478 opt.maxidle = cl->maxidle;
1479 opt.minidle = (u32)(-cl->minidle);
1480 opt.offtime = cl->offtime;
1482 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1490 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1492 unsigned char *b = skb_tail_pointer(skb);
1493 struct tc_cbq_wrropt opt;
1496 opt.allot = cl->allot;
1497 opt.priority = cl->priority+1;
1498 opt.cpriority = cl->cpriority+1;
1499 opt.weight = cl->weight;
1500 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1508 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1510 unsigned char *b = skb_tail_pointer(skb);
1511 struct tc_cbq_ovl opt;
1513 opt.strategy = cl->ovl_strategy;
1514 opt.priority2 = cl->priority2+1;
1516 opt.penalty = cl->penalty;
1517 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1525 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1527 unsigned char *b = skb_tail_pointer(skb);
1528 struct tc_cbq_fopt opt;
1530 if (cl->split || cl->defmap) {
1531 opt.split = cl->split ? cl->split->common.classid : 0;
1532 opt.defmap = cl->defmap;
1534 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1543 #ifdef CONFIG_NET_CLS_ACT
1544 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1546 unsigned char *b = skb_tail_pointer(skb);
1547 struct tc_cbq_police opt;
1550 opt.police = cl->police;
1553 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1563 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1565 if (cbq_dump_lss(skb, cl) < 0 ||
1566 cbq_dump_rate(skb, cl) < 0 ||
1567 cbq_dump_wrr(skb, cl) < 0 ||
1568 cbq_dump_ovl(skb, cl) < 0 ||
1569 #ifdef CONFIG_NET_CLS_ACT
1570 cbq_dump_police(skb, cl) < 0 ||
1572 cbq_dump_fopt(skb, cl) < 0)
1577 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1579 struct cbq_sched_data *q = qdisc_priv(sch);
1580 struct nlattr *nest;
1582 nest = nla_nest_start(skb, TCA_OPTIONS);
1584 goto nla_put_failure;
1585 if (cbq_dump_attr(skb, &q->link) < 0)
1586 goto nla_put_failure;
1587 nla_nest_end(skb, nest);
1591 nla_nest_cancel(skb, nest);
1596 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1598 struct cbq_sched_data *q = qdisc_priv(sch);
1600 q->link.xstats.avgidle = q->link.avgidle;
1601 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1605 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1606 struct sk_buff *skb, struct tcmsg *tcm)
1608 struct cbq_class *cl = (struct cbq_class*)arg;
1609 struct nlattr *nest;
1612 tcm->tcm_parent = cl->tparent->common.classid;
1614 tcm->tcm_parent = TC_H_ROOT;
1615 tcm->tcm_handle = cl->common.classid;
1616 tcm->tcm_info = cl->q->handle;
1618 nest = nla_nest_start(skb, TCA_OPTIONS);
1620 goto nla_put_failure;
1621 if (cbq_dump_attr(skb, cl) < 0)
1622 goto nla_put_failure;
1623 nla_nest_end(skb, nest);
1627 nla_nest_cancel(skb, nest);
1632 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1633 struct gnet_dump *d)
1635 struct cbq_sched_data *q = qdisc_priv(sch);
1636 struct cbq_class *cl = (struct cbq_class*)arg;
1638 cl->qstats.qlen = cl->q->q.qlen;
1639 cl->xstats.avgidle = cl->avgidle;
1640 cl->xstats.undertime = 0;
1642 if (cl->undertime != PSCHED_PASTPERFECT)
1643 cl->xstats.undertime = cl->undertime - q->now;
1645 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1646 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1647 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1650 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1653 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1656 struct cbq_class *cl = (struct cbq_class*)arg;
1660 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1662 cl->common.classid);
1666 #ifdef CONFIG_NET_CLS_ACT
1667 if (cl->police == TC_POLICE_RECLASSIFY)
1668 new->reshape_fail = cbq_reshape_fail;
1672 *old = xchg(&cl->q, new);
1673 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1675 sch_tree_unlock(sch);
1682 static struct Qdisc *
1683 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1685 struct cbq_class *cl = (struct cbq_class*)arg;
1687 return cl ? cl->q : NULL;
1690 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1692 struct cbq_class *cl = (struct cbq_class *)arg;
1694 if (cl->q->q.qlen == 0)
1695 cbq_deactivate_class(cl);
1698 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1700 struct cbq_sched_data *q = qdisc_priv(sch);
1701 struct cbq_class *cl = cbq_class_lookup(q, classid);
1705 return (unsigned long)cl;
1710 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1712 struct cbq_sched_data *q = qdisc_priv(sch);
1714 WARN_ON(cl->filters);
1716 tcf_destroy_chain(&cl->filter_list);
1717 qdisc_destroy(cl->q);
1718 qdisc_put_rtab(cl->R_tab);
1719 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1725 cbq_destroy(struct Qdisc* sch)
1727 struct cbq_sched_data *q = qdisc_priv(sch);
1728 struct hlist_node *n, *next;
1729 struct cbq_class *cl;
1732 #ifdef CONFIG_NET_CLS_ACT
1736 * Filters must be destroyed first because we don't destroy the
1737 * classes from root to leafs which means that filters can still
1738 * be bound to classes which have been destroyed already. --TGR '04
1740 for (h = 0; h < q->clhash.hashsize; h++) {
1741 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1742 tcf_destroy_chain(&cl->filter_list);
1744 for (h = 0; h < q->clhash.hashsize; h++) {
1745 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1747 cbq_destroy_class(sch, cl);
1749 qdisc_class_hash_destroy(&q->clhash);
1752 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1754 struct cbq_class *cl = (struct cbq_class*)arg;
1756 if (--cl->refcnt == 0) {
1757 #ifdef CONFIG_NET_CLS_ACT
1758 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1759 struct cbq_sched_data *q = qdisc_priv(sch);
1761 spin_lock_bh(root_lock);
1762 if (q->rx_class == cl)
1764 spin_unlock_bh(root_lock);
1767 cbq_destroy_class(sch, cl);
1772 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1776 struct cbq_sched_data *q = qdisc_priv(sch);
1777 struct cbq_class *cl = (struct cbq_class*)*arg;
1778 struct nlattr *opt = tca[TCA_OPTIONS];
1779 struct nlattr *tb[TCA_CBQ_MAX + 1];
1780 struct cbq_class *parent;
1781 struct qdisc_rate_table *rtab = NULL;
1786 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1794 cl->tparent->common.classid != parentid)
1796 if (!cl->tparent && parentid != TC_H_ROOT)
1800 if (tb[TCA_CBQ_RATE]) {
1801 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1806 /* Change class parameters */
1809 if (cl->next_alive != NULL)
1810 cbq_deactivate_class(cl);
1813 rtab = xchg(&cl->R_tab, rtab);
1814 qdisc_put_rtab(rtab);
1817 if (tb[TCA_CBQ_LSSOPT])
1818 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1820 if (tb[TCA_CBQ_WRROPT]) {
1822 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1825 if (tb[TCA_CBQ_OVL_STRATEGY])
1826 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1828 #ifdef CONFIG_NET_CLS_ACT
1829 if (tb[TCA_CBQ_POLICE])
1830 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1833 if (tb[TCA_CBQ_FOPT])
1834 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1837 cbq_activate_class(cl);
1839 sch_tree_unlock(sch);
1842 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1843 qdisc_root_sleeping_lock(sch),
1848 if (parentid == TC_H_ROOT)
1851 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1852 tb[TCA_CBQ_LSSOPT] == NULL)
1855 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1861 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1865 classid = TC_H_MAKE(sch->handle,0x8000);
1867 for (i=0; i<0x8000; i++) {
1868 if (++q->hgenerator >= 0x8000)
1870 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1876 classid = classid|q->hgenerator;
1881 parent = cbq_class_lookup(q, parentid);
1888 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1894 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1895 &pfifo_qdisc_ops, classid)))
1896 cl->q = &noop_qdisc;
1897 cl->common.classid = classid;
1898 cl->tparent = parent;
1900 cl->allot = parent->allot;
1901 cl->quantum = cl->allot;
1902 cl->weight = cl->R_tab->rate.rate;
1906 cl->borrow = cl->tparent;
1907 if (cl->tparent != &q->link)
1908 cl->share = cl->tparent;
1909 cbq_adjust_levels(parent);
1910 cl->minidle = -0x7FFFFFFF;
1911 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1912 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1913 if (cl->ewma_log==0)
1914 cl->ewma_log = q->link.ewma_log;
1916 cl->maxidle = q->link.maxidle;
1918 cl->avpkt = q->link.avpkt;
1919 cl->overlimit = cbq_ovl_classic;
1920 if (tb[TCA_CBQ_OVL_STRATEGY])
1921 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1922 #ifdef CONFIG_NET_CLS_ACT
1923 if (tb[TCA_CBQ_POLICE])
1924 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1926 if (tb[TCA_CBQ_FOPT])
1927 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1928 sch_tree_unlock(sch);
1930 qdisc_class_hash_grow(sch, &q->clhash);
1933 gen_new_estimator(&cl->bstats, &cl->rate_est,
1934 qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
1936 *arg = (unsigned long)cl;
1940 qdisc_put_rtab(rtab);
1944 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1946 struct cbq_sched_data *q = qdisc_priv(sch);
1947 struct cbq_class *cl = (struct cbq_class*)arg;
1950 if (cl->filters || cl->children || cl == &q->link)
1955 qlen = cl->q->q.qlen;
1957 qdisc_tree_decrease_qlen(cl->q, qlen);
1960 cbq_deactivate_class(cl);
1962 if (q->tx_borrowed == cl)
1963 q->tx_borrowed = q->tx_class;
1964 if (q->tx_class == cl) {
1966 q->tx_borrowed = NULL;
1968 #ifdef CONFIG_NET_CLS_ACT
1969 if (q->rx_class == cl)
1973 cbq_unlink_class(cl);
1974 cbq_adjust_levels(cl->tparent);
1976 cbq_sync_defmap(cl);
1979 sch_tree_unlock(sch);
1981 if (--cl->refcnt == 0)
1982 cbq_destroy_class(sch, cl);
1987 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1989 struct cbq_sched_data *q = qdisc_priv(sch);
1990 struct cbq_class *cl = (struct cbq_class *)arg;
1995 return &cl->filter_list;
1998 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2001 struct cbq_sched_data *q = qdisc_priv(sch);
2002 struct cbq_class *p = (struct cbq_class*)parent;
2003 struct cbq_class *cl = cbq_class_lookup(q, classid);
2006 if (p && p->level <= cl->level)
2009 return (unsigned long)cl;
2014 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2016 struct cbq_class *cl = (struct cbq_class*)arg;
2021 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2023 struct cbq_sched_data *q = qdisc_priv(sch);
2024 struct cbq_class *cl;
2025 struct hlist_node *n;
2031 for (h = 0; h < q->clhash.hashsize; h++) {
2032 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2033 if (arg->count < arg->skip) {
2037 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2046 static const struct Qdisc_class_ops cbq_class_ops = {
2049 .qlen_notify = cbq_qlen_notify,
2052 .change = cbq_change_class,
2053 .delete = cbq_delete,
2055 .tcf_chain = cbq_find_tcf,
2056 .bind_tcf = cbq_bind_filter,
2057 .unbind_tcf = cbq_unbind_filter,
2058 .dump = cbq_dump_class,
2059 .dump_stats = cbq_dump_class_stats,
2062 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2064 .cl_ops = &cbq_class_ops,
2066 .priv_size = sizeof(struct cbq_sched_data),
2067 .enqueue = cbq_enqueue,
2068 .dequeue = cbq_dequeue,
2069 .requeue = cbq_requeue,
2073 .destroy = cbq_destroy,
2076 .dump_stats = cbq_dump_stats,
2077 .owner = THIS_MODULE,
2080 static int __init cbq_module_init(void)
2082 return register_qdisc(&cbq_qdisc_ops);
2084 static void __exit cbq_module_exit(void)
2086 unregister_qdisc(&cbq_qdisc_ops);
2088 module_init(cbq_module_init)
2089 module_exit(cbq_module_exit)
2090 MODULE_LICENSE("GPL");