2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/string.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
24 #include <linux/errno.h>
25 #include <linux/interrupt.h>
26 #include <linux/if_ether.h>
27 #include <linux/inet.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/notifier.h>
32 #include <net/route.h>
33 #include <linux/skbuff.h>
35 #include <net/pkt_sched.h>
38 /* Class-Based Queueing (CBQ) algorithm.
39 =======================================
41 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
42 Management Models for Packet Networks",
43 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
47 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
50 [4] Sally Floyd and Michael Speer, "Experimental Results
51 for Class-Based Queueing", 1998, not published.
53 -----------------------------------------------------------------------
55 Algorithm skeleton was taken from NS simulator cbq.cc.
56 If someone wants to check this code against the LBL version,
57 he should take into account that ONLY the skeleton was borrowed,
58 the implementation is different. Particularly:
60 --- The WRR algorithm is different. Our version looks more
61 reasonable (I hope) and works when quanta are allowed to be
62 less than MTU, which is always the case when real time classes
63 have small rates. Note, that the statement of [3] is
64 incomplete, delay may actually be estimated even if class
65 per-round allotment is less than MTU. Namely, if per-round
66 allotment is W*r_i, and r_1+...+r_k = r < 1
68 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70 In the worst case we have IntServ estimate with D = W*r+k*MTU
71 and C = MTU*r. The proof (if correct at all) is trivial.
74 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
75 interpret some places, which look like wrong translations
76 from NS. Anyone is advised to find these differences
77 and explain to me, why I am wrong 8).
79 --- Linux has no EOI event, so that we cannot estimate true class
80 idle time. Workaround is to consider the next dequeue event
81 as sign that previous packet is finished. This is wrong because of
82 internal device queueing, but on a permanently loaded link it is true.
83 Moreover, combined with clock integrator, this scheme looks
84 very close to an ideal solution. */
86 struct cbq_sched_data;
91 struct cbq_class *next; /* hash table link */
92 struct cbq_class *next_alive; /* next class with backlog in this priority band */
96 unsigned char priority; /* class priority */
97 unsigned char priority2; /* priority to be used after overlimit */
98 unsigned char ewma_log; /* time constant for idle time calculation */
99 unsigned char ovl_strategy;
100 #ifdef CONFIG_NET_CLS_POLICE
101 unsigned char police;
106 /* Link-sharing scheduler parameters */
107 long maxidle; /* Class parameters: see below. */
111 struct qdisc_rate_table *R_tab;
113 /* Overlimit strategy parameters */
114 void (*overlimit)(struct cbq_class *cl);
117 /* General scheduler (WRR) parameters */
119 long quantum; /* Allotment per WRR round */
120 long weight; /* Relative allotment: see below */
122 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
123 struct cbq_class *split; /* Ptr to split node */
124 struct cbq_class *share; /* Ptr to LS parent in the class tree */
125 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
126 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
128 struct cbq_class *sibling; /* Sibling chain */
129 struct cbq_class *children; /* Pointer to children chain */
131 struct Qdisc *q; /* Elementary queueing discipline */
135 unsigned char cpriority; /* Effective priority */
136 unsigned char delayed;
137 unsigned char level; /* level of the class in hierarchy:
138 0 for leaf classes, and maximal
139 level of children + 1 for nodes.
142 psched_time_t last; /* Last end of service */
143 psched_time_t undertime;
145 long deficit; /* Saved deficit for WRR */
146 unsigned long penalized;
147 struct gnet_stats_basic bstats;
148 struct gnet_stats_queue qstats;
149 struct gnet_stats_rate_est rate_est;
150 spinlock_t *stats_lock;
151 struct tc_cbq_xstats xstats;
153 struct tcf_proto *filter_list;
158 struct cbq_class *defaults[TC_PRIO_MAX+1];
161 struct cbq_sched_data
163 struct cbq_class *classes[16]; /* Hash table of all classes */
164 int nclasses[TC_CBQ_MAXPRIO+1];
165 unsigned quanta[TC_CBQ_MAXPRIO+1];
167 struct cbq_class link;
170 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
173 #ifdef CONFIG_NET_CLS_POLICE
174 struct cbq_class *rx_class;
176 struct cbq_class *tx_class;
177 struct cbq_class *tx_borrowed;
179 psched_time_t now; /* Cached timestamp */
180 psched_time_t now_rt; /* Cached real time */
183 struct timer_list delay_timer;
184 struct qdisc_watchdog watchdog; /* Watchdog timer,
188 psched_tdiff_t wd_expires;
194 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
197 static __inline__ unsigned cbq_hash(u32 h)
204 static __inline__ struct cbq_class *
205 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
207 struct cbq_class *cl;
209 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
210 if (cl->classid == classid)
215 #ifdef CONFIG_NET_CLS_POLICE
217 static struct cbq_class *
218 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
220 struct cbq_class *cl, *new;
222 for (cl = this->tparent; cl; cl = cl->tparent)
223 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
231 /* Classify packet. The procedure is pretty complicated, but
232 it allows us to combine link sharing and priority scheduling
235 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
236 so that it resolves to split nodes. Then packets are classified
237 by logical priority, or a more specific classifier may be attached
241 static struct cbq_class *
242 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
244 struct cbq_sched_data *q = qdisc_priv(sch);
245 struct cbq_class *head = &q->link;
246 struct cbq_class **defmap;
247 struct cbq_class *cl = NULL;
248 u32 prio = skb->priority;
249 struct tcf_result res;
252 * Step 1. If skb->priority points to one of our classes, use it.
254 if (TC_H_MAJ(prio^sch->handle) == 0 &&
255 (cl = cbq_class_lookup(q, prio)) != NULL)
258 *qerr = NET_XMIT_BYPASS;
261 defmap = head->defaults;
264 * Step 2+n. Apply classifier.
266 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
269 if ((cl = (void*)res.class) == NULL) {
270 if (TC_H_MAJ(res.classid))
271 cl = cbq_class_lookup(q, res.classid);
272 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
273 cl = defmap[TC_PRIO_BESTEFFORT];
275 if (cl == NULL || cl->level >= head->level)
279 #ifdef CONFIG_NET_CLS_ACT
283 *qerr = NET_XMIT_SUCCESS;
287 #elif defined(CONFIG_NET_CLS_POLICE)
289 case TC_POLICE_RECLASSIFY:
290 return cbq_reclassify(skb, cl);
301 * Step 3+n. If classifier selected a link sharing class,
302 * apply agency specific classifier.
303 * Repeat this procdure until we hit a leaf node.
312 * Step 4. No success...
314 if (TC_H_MAJ(prio) == 0 &&
315 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
316 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
323 A packet has just been enqueued on the empty class.
324 cbq_activate_class adds it to the tail of active class list
325 of its priority band.
328 static __inline__ void cbq_activate_class(struct cbq_class *cl)
330 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
331 int prio = cl->cpriority;
332 struct cbq_class *cl_tail;
334 cl_tail = q->active[prio];
335 q->active[prio] = cl;
337 if (cl_tail != NULL) {
338 cl->next_alive = cl_tail->next_alive;
339 cl_tail->next_alive = cl;
342 q->activemask |= (1<<prio);
347 Unlink class from active chain.
348 Note that this same procedure is done directly in cbq_dequeue*
349 during round-robin procedure.
352 static void cbq_deactivate_class(struct cbq_class *this)
354 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
355 int prio = this->cpriority;
356 struct cbq_class *cl;
357 struct cbq_class *cl_prev = q->active[prio];
360 cl = cl_prev->next_alive;
362 cl_prev->next_alive = cl->next_alive;
363 cl->next_alive = NULL;
365 if (cl == q->active[prio]) {
366 q->active[prio] = cl_prev;
367 if (cl == q->active[prio]) {
368 q->active[prio] = NULL;
369 q->activemask &= ~(1<<prio);
375 } while ((cl_prev = cl) != q->active[prio]);
379 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
381 int toplevel = q->toplevel;
383 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
387 PSCHED_GET_TIME(now);
388 incr = PSCHED_TDIFF(now, q->now_rt);
389 PSCHED_TADD2(q->now, incr, now);
392 if (PSCHED_TLESS(cl->undertime, now)) {
393 q->toplevel = cl->level;
396 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
401 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
403 struct cbq_sched_data *q = qdisc_priv(sch);
406 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
408 #ifdef CONFIG_NET_CLS_POLICE
412 if (ret == NET_XMIT_BYPASS)
418 #ifdef CONFIG_NET_CLS_POLICE
419 cl->q->__parent = sch;
421 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
423 sch->bstats.packets++;
424 sch->bstats.bytes+=len;
425 cbq_mark_toplevel(q, cl);
427 cbq_activate_class(cl);
432 cbq_mark_toplevel(q, cl);
438 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
440 struct cbq_sched_data *q = qdisc_priv(sch);
441 struct cbq_class *cl;
444 if ((cl = q->tx_class) == NULL) {
451 cbq_mark_toplevel(q, cl);
453 #ifdef CONFIG_NET_CLS_POLICE
455 cl->q->__parent = sch;
457 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
459 sch->qstats.requeues++;
461 cbq_activate_class(cl);
469 /* Overlimit actions */
471 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
473 static void cbq_ovl_classic(struct cbq_class *cl)
475 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
476 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
479 delay += cl->offtime;
482 Class goes to sleep, so that it will have no
483 chance to work avgidle. Let's forgive it 8)
485 BTW cbq-2.0 has a crap in this
486 place, apparently they forgot to shift it by cl->ewma_log.
489 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
490 if (cl->avgidle < cl->minidle)
491 cl->avgidle = cl->minidle;
494 PSCHED_TADD2(q->now, delay, cl->undertime);
496 cl->xstats.overactions++;
499 if (q->wd_expires == 0 || q->wd_expires > delay)
500 q->wd_expires = delay;
502 /* Dirty work! We must schedule wakeups based on
503 real available rate, rather than leaf rate,
504 which may be tiny (even zero).
506 if (q->toplevel == TC_CBQ_MAXLEVEL) {
508 psched_tdiff_t base_delay = q->wd_expires;
510 for (b = cl->borrow; b; b = b->borrow) {
511 delay = PSCHED_TDIFF(b->undertime, q->now);
512 if (delay < base_delay) {
519 q->wd_expires = base_delay;
523 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
527 static void cbq_ovl_rclassic(struct cbq_class *cl)
529 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
530 struct cbq_class *this = cl;
533 if (cl->level > q->toplevel) {
537 } while ((cl = cl->borrow) != NULL);
544 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
546 static void cbq_ovl_delay(struct cbq_class *cl)
548 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
549 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
552 unsigned long sched = jiffies;
554 delay += cl->offtime;
556 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
557 if (cl->avgidle < cl->minidle)
558 cl->avgidle = cl->minidle;
559 PSCHED_TADD2(q->now, delay, cl->undertime);
562 sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
563 cl->penalized = sched;
564 cl->cpriority = TC_CBQ_MAXPRIO;
565 q->pmask |= (1<<TC_CBQ_MAXPRIO);
566 if (del_timer(&q->delay_timer) &&
567 (long)(q->delay_timer.expires - sched) > 0)
568 q->delay_timer.expires = sched;
569 add_timer(&q->delay_timer);
571 cl->xstats.overactions++;
576 if (q->wd_expires == 0 || q->wd_expires > delay)
577 q->wd_expires = delay;
580 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
582 static void cbq_ovl_lowprio(struct cbq_class *cl)
584 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
586 cl->penalized = jiffies + cl->penalty;
588 if (cl->cpriority != cl->priority2) {
589 cl->cpriority = cl->priority2;
590 q->pmask |= (1<<cl->cpriority);
591 cl->xstats.overactions++;
596 /* TC_CBQ_OVL_DROP: penalize class by dropping */
598 static void cbq_ovl_drop(struct cbq_class *cl)
600 if (cl->q->ops->drop)
601 if (cl->q->ops->drop(cl->q))
603 cl->xstats.overactions++;
607 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
609 struct cbq_class *cl;
610 struct cbq_class *cl_prev = q->active[prio];
611 unsigned long now = jiffies;
612 unsigned long sched = now;
618 cl = cl_prev->next_alive;
619 if ((long)(now - cl->penalized) > 0) {
620 cl_prev->next_alive = cl->next_alive;
621 cl->next_alive = NULL;
622 cl->cpriority = cl->priority;
624 cbq_activate_class(cl);
626 if (cl == q->active[prio]) {
627 q->active[prio] = cl_prev;
628 if (cl == q->active[prio]) {
629 q->active[prio] = NULL;
634 cl = cl_prev->next_alive;
635 } else if ((long)(sched - cl->penalized) > 0)
636 sched = cl->penalized;
637 } while ((cl_prev = cl) != q->active[prio]);
639 return (long)(sched - now);
642 static void cbq_undelay(unsigned long arg)
644 struct Qdisc *sch = (struct Qdisc*)arg;
645 struct cbq_sched_data *q = qdisc_priv(sch);
653 int prio = ffz(~pmask);
658 tmp = cbq_undelay_prio(q, prio);
661 if (tmp < delay || delay == 0)
667 q->delay_timer.expires = jiffies + delay;
668 add_timer(&q->delay_timer);
671 sch->flags &= ~TCQ_F_THROTTLED;
672 netif_schedule(sch->dev);
676 #ifdef CONFIG_NET_CLS_POLICE
678 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
681 struct Qdisc *sch = child->__parent;
682 struct cbq_sched_data *q = qdisc_priv(sch);
683 struct cbq_class *cl = q->rx_class;
687 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
689 cbq_mark_toplevel(q, cl);
692 cl->q->__parent = sch;
694 if (cl->q->enqueue(skb, cl->q) == 0) {
696 sch->bstats.packets++;
697 sch->bstats.bytes+=len;
699 cbq_activate_class(cl);
712 It is mission critical procedure.
714 We "regenerate" toplevel cutoff, if transmitting class
715 has backlog and it is not regulated. It is not part of
716 original CBQ description, but looks more reasonable.
717 Probably, it is wrong. This question needs further investigation.
720 static __inline__ void
721 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
722 struct cbq_class *borrowed)
724 if (cl && q->toplevel >= borrowed->level) {
725 if (cl->q->q.qlen > 1) {
727 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
728 q->toplevel = borrowed->level;
731 } while ((borrowed=borrowed->borrow) != NULL);
734 /* It is not necessary now. Uncommenting it
735 will save CPU cycles, but decrease fairness.
737 q->toplevel = TC_CBQ_MAXLEVEL;
743 cbq_update(struct cbq_sched_data *q)
745 struct cbq_class *this = q->tx_class;
746 struct cbq_class *cl = this;
751 for ( ; cl; cl = cl->share) {
752 long avgidle = cl->avgidle;
755 cl->bstats.packets++;
756 cl->bstats.bytes += len;
759 (now - last) is total time between packet right edges.
760 (last_pktlen/rate) is "virtual" busy time, so that
762 idle = (now - last) - last_pktlen/rate
765 idle = PSCHED_TDIFF(q->now, cl->last);
766 if ((unsigned long)idle > 128*1024*1024) {
767 avgidle = cl->maxidle;
769 idle -= L2T(cl, len);
771 /* true_avgidle := (1-W)*true_avgidle + W*idle,
772 where W=2^{-ewma_log}. But cl->avgidle is scaled:
773 cl->avgidle == true_avgidle/W,
776 avgidle += idle - (avgidle>>cl->ewma_log);
780 /* Overlimit or at-limit */
782 if (avgidle < cl->minidle)
783 avgidle = cl->minidle;
785 cl->avgidle = avgidle;
787 /* Calculate expected time, when this class
788 will be allowed to send.
790 (1-W)*true_avgidle + W*delay = 0, i.e.
791 idle = (1/W - 1)*(-true_avgidle)
793 idle = (1 - W)*(-cl->avgidle);
795 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
799 To maintain the rate allocated to the class,
800 we add to undertime virtual clock,
801 necessary to complete transmitted packet.
802 (len/phys_bandwidth has been already passed
803 to the moment of cbq_update)
806 idle -= L2T(&q->link, len);
807 idle += L2T(cl, len);
809 PSCHED_AUDIT_TDIFF(idle);
811 PSCHED_TADD2(q->now, idle, cl->undertime);
815 PSCHED_SET_PASTPERFECT(cl->undertime);
816 if (avgidle > cl->maxidle)
817 cl->avgidle = cl->maxidle;
819 cl->avgidle = avgidle;
824 cbq_update_toplevel(q, this, q->tx_borrowed);
827 static __inline__ struct cbq_class *
828 cbq_under_limit(struct cbq_class *cl)
830 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
831 struct cbq_class *this_cl = cl;
833 if (cl->tparent == NULL)
836 if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
837 !PSCHED_TLESS(q->now, cl->undertime)) {
843 /* It is very suspicious place. Now overlimit
844 action is generated for not bounded classes
845 only if link is completely congested.
846 Though it is in agree with ancestor-only paradigm,
847 it looks very stupid. Particularly,
848 it means that this chunk of code will either
849 never be called or result in strong amplification
850 of burstiness. Dangerous, silly, and, however,
851 no another solution exists.
853 if ((cl = cl->borrow) == NULL) {
854 this_cl->qstats.overlimits++;
855 this_cl->overlimit(this_cl);
858 if (cl->level > q->toplevel)
860 } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
861 PSCHED_TLESS(q->now, cl->undertime));
867 static __inline__ struct sk_buff *
868 cbq_dequeue_prio(struct Qdisc *sch, int prio)
870 struct cbq_sched_data *q = qdisc_priv(sch);
871 struct cbq_class *cl_tail, *cl_prev, *cl;
875 cl_tail = cl_prev = q->active[prio];
876 cl = cl_prev->next_alive;
883 struct cbq_class *borrow = cl;
886 (borrow = cbq_under_limit(cl)) == NULL)
889 if (cl->deficit <= 0) {
890 /* Class exhausted its allotment per
891 this round. Switch to the next one.
894 cl->deficit += cl->quantum;
898 skb = cl->q->dequeue(cl->q);
900 /* Class did not give us any skb :-(
901 It could occur even if cl->q->q.qlen != 0
902 f.e. if cl->q == "tbf"
907 cl->deficit -= skb->len;
909 q->tx_borrowed = borrow;
911 #ifndef CBQ_XSTATS_BORROWS_BYTES
912 borrow->xstats.borrows++;
913 cl->xstats.borrows++;
915 borrow->xstats.borrows += skb->len;
916 cl->xstats.borrows += skb->len;
919 q->tx_len = skb->len;
921 if (cl->deficit <= 0) {
922 q->active[prio] = cl;
924 cl->deficit += cl->quantum;
929 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
930 /* Class is empty or penalized.
931 Unlink it from active chain.
933 cl_prev->next_alive = cl->next_alive;
934 cl->next_alive = NULL;
936 /* Did cl_tail point to it? */
941 /* Was it the last class in this band? */
944 q->active[prio] = NULL;
945 q->activemask &= ~(1<<prio);
947 cbq_activate_class(cl);
951 q->active[prio] = cl_tail;
954 cbq_activate_class(cl);
962 } while (cl_prev != cl_tail);
965 q->active[prio] = cl_prev;
970 static __inline__ struct sk_buff *
971 cbq_dequeue_1(struct Qdisc *sch)
973 struct cbq_sched_data *q = qdisc_priv(sch);
977 activemask = q->activemask&0xFF;
979 int prio = ffz(~activemask);
980 activemask &= ~(1<<prio);
981 skb = cbq_dequeue_prio(sch, prio);
988 static struct sk_buff *
989 cbq_dequeue(struct Qdisc *sch)
992 struct cbq_sched_data *q = qdisc_priv(sch);
996 PSCHED_GET_TIME(now);
997 incr = PSCHED_TDIFF(now, q->now_rt);
1000 psched_tdiff_t incr2;
1001 /* Time integrator. We calculate EOS time
1002 by adding expected packet transmission time.
1003 If real time is greater, we warp artificial clock,
1006 cbq_time = max(real_time, work);
1008 incr2 = L2T(&q->link, q->tx_len);
1009 PSCHED_TADD(q->now, incr2);
1011 if ((incr -= incr2) < 0)
1014 PSCHED_TADD(q->now, incr);
1020 skb = cbq_dequeue_1(sch);
1023 sch->flags &= ~TCQ_F_THROTTLED;
1027 /* All the classes are overlimit.
1031 1. Scheduler is empty.
1032 2. Toplevel cutoff inhibited borrowing.
1033 3. Root class is overlimit.
1035 Reset 2d and 3d conditions and retry.
1037 Note, that NS and cbq-2.0 are buggy, peeking
1038 an arbitrary class is appropriate for ancestor-only
1039 sharing, but not for toplevel algorithm.
1041 Our version is better, but slower, because it requires
1042 two passes, but it is unavoidable with top-level sharing.
1045 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1046 PSCHED_IS_PASTPERFECT(q->link.undertime))
1049 q->toplevel = TC_CBQ_MAXLEVEL;
1050 PSCHED_SET_PASTPERFECT(q->link.undertime);
1053 /* No packets in scheduler or nobody wants to give them to us :-(
1054 Sigh... start watchdog timer in the last case. */
1057 sch->qstats.overlimits++;
1059 qdisc_watchdog_schedule(&q->watchdog,
1060 q->now + q->wd_expires);
1065 /* CBQ class maintanance routines */
1067 static void cbq_adjust_levels(struct cbq_class *this)
1074 struct cbq_class *cl;
1076 if ((cl = this->children) != NULL) {
1078 if (cl->level > level)
1080 } while ((cl = cl->sibling) != this->children);
1082 this->level = level+1;
1083 } while ((this = this->tparent) != NULL);
1086 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1088 struct cbq_class *cl;
1091 if (q->quanta[prio] == 0)
1094 for (h=0; h<16; h++) {
1095 for (cl = q->classes[h]; cl; cl = cl->next) {
1096 /* BUGGGG... Beware! This expression suffer of
1097 arithmetic overflows!
1099 if (cl->priority == prio) {
1100 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1103 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1104 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1105 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1111 static void cbq_sync_defmap(struct cbq_class *cl)
1113 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1114 struct cbq_class *split = cl->split;
1121 for (i=0; i<=TC_PRIO_MAX; i++) {
1122 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1123 split->defaults[i] = NULL;
1126 for (i=0; i<=TC_PRIO_MAX; i++) {
1127 int level = split->level;
1129 if (split->defaults[i])
1132 for (h=0; h<16; h++) {
1133 struct cbq_class *c;
1135 for (c = q->classes[h]; c; c = c->next) {
1136 if (c->split == split && c->level < level &&
1138 split->defaults[i] = c;
1146 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1148 struct cbq_class *split = NULL;
1151 if ((split = cl->split) == NULL)
1153 splitid = split->classid;
1156 if (split == NULL || split->classid != splitid) {
1157 for (split = cl->tparent; split; split = split->tparent)
1158 if (split->classid == splitid)
1165 if (cl->split != split) {
1167 cbq_sync_defmap(cl);
1169 cl->defmap = def&mask;
1171 cl->defmap = (cl->defmap&~mask)|(def&mask);
1173 cbq_sync_defmap(cl);
1176 static void cbq_unlink_class(struct cbq_class *this)
1178 struct cbq_class *cl, **clp;
1179 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1181 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1189 if (this->tparent) {
1198 } while ((cl = *clp) != this->sibling);
1200 if (this->tparent->children == this) {
1201 this->tparent->children = this->sibling;
1202 if (this->sibling == this)
1203 this->tparent->children = NULL;
1206 BUG_TRAP(this->sibling == this);
1210 static void cbq_link_class(struct cbq_class *this)
1212 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1213 unsigned h = cbq_hash(this->classid);
1214 struct cbq_class *parent = this->tparent;
1216 this->sibling = this;
1217 this->next = q->classes[h];
1218 q->classes[h] = this;
1223 if (parent->children == NULL) {
1224 parent->children = this;
1226 this->sibling = parent->children->sibling;
1227 parent->children->sibling = this;
1231 static unsigned int cbq_drop(struct Qdisc* sch)
1233 struct cbq_sched_data *q = qdisc_priv(sch);
1234 struct cbq_class *cl, *cl_head;
1238 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1239 if ((cl_head = q->active[prio]) == NULL)
1244 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1247 cbq_deactivate_class(cl);
1250 } while ((cl = cl->next_alive) != cl_head);
1256 cbq_reset(struct Qdisc* sch)
1258 struct cbq_sched_data *q = qdisc_priv(sch);
1259 struct cbq_class *cl;
1266 q->tx_borrowed = NULL;
1267 qdisc_watchdog_cancel(&q->watchdog);
1268 del_timer(&q->delay_timer);
1269 q->toplevel = TC_CBQ_MAXLEVEL;
1270 PSCHED_GET_TIME(q->now);
1273 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1274 q->active[prio] = NULL;
1276 for (h = 0; h < 16; h++) {
1277 for (cl = q->classes[h]; cl; cl = cl->next) {
1280 cl->next_alive = NULL;
1281 PSCHED_SET_PASTPERFECT(cl->undertime);
1282 cl->avgidle = cl->maxidle;
1283 cl->deficit = cl->quantum;
1284 cl->cpriority = cl->priority;
1291 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1293 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1294 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1295 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1297 if (lss->change&TCF_CBQ_LSS_EWMA)
1298 cl->ewma_log = lss->ewma_log;
1299 if (lss->change&TCF_CBQ_LSS_AVPKT)
1300 cl->avpkt = lss->avpkt;
1301 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1302 cl->minidle = -(long)lss->minidle;
1303 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1304 cl->maxidle = lss->maxidle;
1305 cl->avgidle = lss->maxidle;
1307 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1308 cl->offtime = lss->offtime;
1312 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1314 q->nclasses[cl->priority]--;
1315 q->quanta[cl->priority] -= cl->weight;
1316 cbq_normalize_quanta(q, cl->priority);
1319 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1321 q->nclasses[cl->priority]++;
1322 q->quanta[cl->priority] += cl->weight;
1323 cbq_normalize_quanta(q, cl->priority);
1326 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1328 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1331 cl->allot = wrr->allot;
1333 cl->weight = wrr->weight;
1334 if (wrr->priority) {
1335 cl->priority = wrr->priority-1;
1336 cl->cpriority = cl->priority;
1337 if (cl->priority >= cl->priority2)
1338 cl->priority2 = TC_CBQ_MAXPRIO-1;
1345 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1347 switch (ovl->strategy) {
1348 case TC_CBQ_OVL_CLASSIC:
1349 cl->overlimit = cbq_ovl_classic;
1351 case TC_CBQ_OVL_DELAY:
1352 cl->overlimit = cbq_ovl_delay;
1354 case TC_CBQ_OVL_LOWPRIO:
1355 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1356 ovl->priority2-1 <= cl->priority)
1358 cl->priority2 = ovl->priority2-1;
1359 cl->overlimit = cbq_ovl_lowprio;
1361 case TC_CBQ_OVL_DROP:
1362 cl->overlimit = cbq_ovl_drop;
1364 case TC_CBQ_OVL_RCLASSIC:
1365 cl->overlimit = cbq_ovl_rclassic;
1370 cl->penalty = (ovl->penalty*HZ)/1000;
1374 #ifdef CONFIG_NET_CLS_POLICE
1375 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1377 cl->police = p->police;
1379 if (cl->q->handle) {
1380 if (p->police == TC_POLICE_RECLASSIFY)
1381 cl->q->reshape_fail = cbq_reshape_fail;
1383 cl->q->reshape_fail = NULL;
1389 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1391 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1395 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1397 struct cbq_sched_data *q = qdisc_priv(sch);
1398 struct rtattr *tb[TCA_CBQ_MAX];
1399 struct tc_ratespec *r;
1401 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1402 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1403 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1406 if (tb[TCA_CBQ_LSSOPT-1] &&
1407 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1410 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1412 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1416 q->link.sibling = &q->link;
1417 q->link.classid = sch->handle;
1418 q->link.qdisc = sch;
1419 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1421 q->link.q = &noop_qdisc;
1423 q->link.priority = TC_CBQ_MAXPRIO-1;
1424 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1425 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1426 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1427 q->link.overlimit = cbq_ovl_classic;
1428 q->link.allot = psched_mtu(sch->dev);
1429 q->link.quantum = q->link.allot;
1430 q->link.weight = q->link.R_tab->rate.rate;
1432 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1433 q->link.avpkt = q->link.allot/2;
1434 q->link.minidle = -0x7FFFFFFF;
1435 q->link.stats_lock = &sch->dev->queue_lock;
1437 qdisc_watchdog_init(&q->watchdog, sch);
1438 init_timer(&q->delay_timer);
1439 q->delay_timer.data = (unsigned long)sch;
1440 q->delay_timer.function = cbq_undelay;
1441 q->toplevel = TC_CBQ_MAXLEVEL;
1442 PSCHED_GET_TIME(q->now);
1445 cbq_link_class(&q->link);
1447 if (tb[TCA_CBQ_LSSOPT-1])
1448 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1450 cbq_addprio(q, &q->link);
1454 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1456 unsigned char *b = skb->tail;
1458 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1462 skb_trim(skb, b - skb->data);
1466 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1468 unsigned char *b = skb->tail;
1469 struct tc_cbq_lssopt opt;
1472 if (cl->borrow == NULL)
1473 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1474 if (cl->share == NULL)
1475 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1476 opt.ewma_log = cl->ewma_log;
1477 opt.level = cl->level;
1478 opt.avpkt = cl->avpkt;
1479 opt.maxidle = cl->maxidle;
1480 opt.minidle = (u32)(-cl->minidle);
1481 opt.offtime = cl->offtime;
1483 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1487 skb_trim(skb, b - skb->data);
1491 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1493 unsigned char *b = skb->tail;
1494 struct tc_cbq_wrropt opt;
1497 opt.allot = cl->allot;
1498 opt.priority = cl->priority+1;
1499 opt.cpriority = cl->cpriority+1;
1500 opt.weight = cl->weight;
1501 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1505 skb_trim(skb, b - skb->data);
1509 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1511 unsigned char *b = skb->tail;
1512 struct tc_cbq_ovl opt;
1514 opt.strategy = cl->ovl_strategy;
1515 opt.priority2 = cl->priority2+1;
1517 opt.penalty = (cl->penalty*1000)/HZ;
1518 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1522 skb_trim(skb, b - skb->data);
1526 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1528 unsigned char *b = skb->tail;
1529 struct tc_cbq_fopt opt;
1531 if (cl->split || cl->defmap) {
1532 opt.split = cl->split ? cl->split->classid : 0;
1533 opt.defmap = cl->defmap;
1535 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1540 skb_trim(skb, b - skb->data);
1544 #ifdef CONFIG_NET_CLS_POLICE
1545 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1547 unsigned char *b = skb->tail;
1548 struct tc_cbq_police opt;
1551 opt.police = cl->police;
1554 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1559 skb_trim(skb, b - skb->data);
1564 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1566 if (cbq_dump_lss(skb, cl) < 0 ||
1567 cbq_dump_rate(skb, cl) < 0 ||
1568 cbq_dump_wrr(skb, cl) < 0 ||
1569 cbq_dump_ovl(skb, cl) < 0 ||
1570 #ifdef CONFIG_NET_CLS_POLICE
1571 cbq_dump_police(skb, cl) < 0 ||
1573 cbq_dump_fopt(skb, cl) < 0)
1578 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1580 struct cbq_sched_data *q = qdisc_priv(sch);
1581 unsigned char *b = skb->tail;
1584 rta = (struct rtattr*)b;
1585 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1586 if (cbq_dump_attr(skb, &q->link) < 0)
1587 goto rtattr_failure;
1588 rta->rta_len = skb->tail - b;
1592 skb_trim(skb, b - skb->data);
1597 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1599 struct cbq_sched_data *q = qdisc_priv(sch);
1601 q->link.xstats.avgidle = q->link.avgidle;
1602 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1606 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1607 struct sk_buff *skb, struct tcmsg *tcm)
1609 struct cbq_class *cl = (struct cbq_class*)arg;
1610 unsigned char *b = skb->tail;
1614 tcm->tcm_parent = cl->tparent->classid;
1616 tcm->tcm_parent = TC_H_ROOT;
1617 tcm->tcm_handle = cl->classid;
1618 tcm->tcm_info = cl->q->handle;
1620 rta = (struct rtattr*)b;
1621 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1622 if (cbq_dump_attr(skb, cl) < 0)
1623 goto rtattr_failure;
1624 rta->rta_len = skb->tail - b;
1628 skb_trim(skb, b - skb->data);
1633 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1634 struct gnet_dump *d)
1636 struct cbq_sched_data *q = qdisc_priv(sch);
1637 struct cbq_class *cl = (struct cbq_class*)arg;
1639 cl->qstats.qlen = cl->q->q.qlen;
1640 cl->xstats.avgidle = cl->avgidle;
1641 cl->xstats.undertime = 0;
1643 if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1644 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1646 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1647 #ifdef CONFIG_NET_ESTIMATOR
1648 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1650 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1653 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1656 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1659 struct cbq_class *cl = (struct cbq_class*)arg;
1663 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1664 cl->classid)) == NULL)
1667 #ifdef CONFIG_NET_CLS_POLICE
1668 if (cl->police == TC_POLICE_RECLASSIFY)
1669 new->reshape_fail = cbq_reshape_fail;
1673 *old = xchg(&cl->q, new);
1674 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1676 sch_tree_unlock(sch);
1683 static struct Qdisc *
1684 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1686 struct cbq_class *cl = (struct cbq_class*)arg;
1688 return cl ? cl->q : NULL;
1691 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1693 struct cbq_class *cl = (struct cbq_class *)arg;
1695 if (cl->q->q.qlen == 0)
1696 cbq_deactivate_class(cl);
1699 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1701 struct cbq_sched_data *q = qdisc_priv(sch);
1702 struct cbq_class *cl = cbq_class_lookup(q, classid);
1706 return (unsigned long)cl;
1711 static void cbq_destroy_filters(struct cbq_class *cl)
1713 struct tcf_proto *tp;
1715 while ((tp = cl->filter_list) != NULL) {
1716 cl->filter_list = tp->next;
1721 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1723 struct cbq_sched_data *q = qdisc_priv(sch);
1725 BUG_TRAP(!cl->filters);
1727 cbq_destroy_filters(cl);
1728 qdisc_destroy(cl->q);
1729 qdisc_put_rtab(cl->R_tab);
1730 #ifdef CONFIG_NET_ESTIMATOR
1731 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1738 cbq_destroy(struct Qdisc* sch)
1740 struct cbq_sched_data *q = qdisc_priv(sch);
1741 struct cbq_class *cl;
1744 #ifdef CONFIG_NET_CLS_POLICE
1748 * Filters must be destroyed first because we don't destroy the
1749 * classes from root to leafs which means that filters can still
1750 * be bound to classes which have been destroyed already. --TGR '04
1752 for (h = 0; h < 16; h++)
1753 for (cl = q->classes[h]; cl; cl = cl->next)
1754 cbq_destroy_filters(cl);
1756 for (h = 0; h < 16; h++) {
1757 struct cbq_class *next;
1759 for (cl = q->classes[h]; cl; cl = next) {
1761 cbq_destroy_class(sch, cl);
1766 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1768 struct cbq_class *cl = (struct cbq_class*)arg;
1770 if (--cl->refcnt == 0) {
1771 #ifdef CONFIG_NET_CLS_POLICE
1772 struct cbq_sched_data *q = qdisc_priv(sch);
1774 spin_lock_bh(&sch->dev->queue_lock);
1775 if (q->rx_class == cl)
1777 spin_unlock_bh(&sch->dev->queue_lock);
1780 cbq_destroy_class(sch, cl);
1785 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1789 struct cbq_sched_data *q = qdisc_priv(sch);
1790 struct cbq_class *cl = (struct cbq_class*)*arg;
1791 struct rtattr *opt = tca[TCA_OPTIONS-1];
1792 struct rtattr *tb[TCA_CBQ_MAX];
1793 struct cbq_class *parent;
1794 struct qdisc_rate_table *rtab = NULL;
1796 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1799 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1800 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1803 if (tb[TCA_CBQ_FOPT-1] &&
1804 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1807 if (tb[TCA_CBQ_RATE-1] &&
1808 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1811 if (tb[TCA_CBQ_LSSOPT-1] &&
1812 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1815 if (tb[TCA_CBQ_WRROPT-1] &&
1816 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1819 #ifdef CONFIG_NET_CLS_POLICE
1820 if (tb[TCA_CBQ_POLICE-1] &&
1821 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1828 if (cl->tparent && cl->tparent->classid != parentid)
1830 if (!cl->tparent && parentid != TC_H_ROOT)
1834 if (tb[TCA_CBQ_RATE-1]) {
1835 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1840 /* Change class parameters */
1843 if (cl->next_alive != NULL)
1844 cbq_deactivate_class(cl);
1847 rtab = xchg(&cl->R_tab, rtab);
1848 qdisc_put_rtab(rtab);
1851 if (tb[TCA_CBQ_LSSOPT-1])
1852 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1854 if (tb[TCA_CBQ_WRROPT-1]) {
1856 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1859 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1860 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1862 #ifdef CONFIG_NET_CLS_POLICE
1863 if (tb[TCA_CBQ_POLICE-1])
1864 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1867 if (tb[TCA_CBQ_FOPT-1])
1868 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1871 cbq_activate_class(cl);
1873 sch_tree_unlock(sch);
1875 #ifdef CONFIG_NET_ESTIMATOR
1876 if (tca[TCA_RATE-1])
1877 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1878 cl->stats_lock, tca[TCA_RATE-1]);
1883 if (parentid == TC_H_ROOT)
1886 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1887 tb[TCA_CBQ_LSSOPT-1] == NULL)
1890 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1896 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1900 classid = TC_H_MAKE(sch->handle,0x8000);
1902 for (i=0; i<0x8000; i++) {
1903 if (++q->hgenerator >= 0x8000)
1905 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1911 classid = classid|q->hgenerator;
1916 parent = cbq_class_lookup(q, parentid);
1923 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1929 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1930 cl->q = &noop_qdisc;
1931 cl->classid = classid;
1932 cl->tparent = parent;
1934 cl->allot = parent->allot;
1935 cl->quantum = cl->allot;
1936 cl->weight = cl->R_tab->rate.rate;
1937 cl->stats_lock = &sch->dev->queue_lock;
1941 cl->borrow = cl->tparent;
1942 if (cl->tparent != &q->link)
1943 cl->share = cl->tparent;
1944 cbq_adjust_levels(parent);
1945 cl->minidle = -0x7FFFFFFF;
1946 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1947 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1948 if (cl->ewma_log==0)
1949 cl->ewma_log = q->link.ewma_log;
1951 cl->maxidle = q->link.maxidle;
1953 cl->avpkt = q->link.avpkt;
1954 cl->overlimit = cbq_ovl_classic;
1955 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1956 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1957 #ifdef CONFIG_NET_CLS_POLICE
1958 if (tb[TCA_CBQ_POLICE-1])
1959 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1961 if (tb[TCA_CBQ_FOPT-1])
1962 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1963 sch_tree_unlock(sch);
1965 #ifdef CONFIG_NET_ESTIMATOR
1966 if (tca[TCA_RATE-1])
1967 gen_new_estimator(&cl->bstats, &cl->rate_est,
1968 cl->stats_lock, tca[TCA_RATE-1]);
1971 *arg = (unsigned long)cl;
1975 qdisc_put_rtab(rtab);
1979 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1981 struct cbq_sched_data *q = qdisc_priv(sch);
1982 struct cbq_class *cl = (struct cbq_class*)arg;
1985 if (cl->filters || cl->children || cl == &q->link)
1990 qlen = cl->q->q.qlen;
1992 qdisc_tree_decrease_qlen(cl->q, qlen);
1995 cbq_deactivate_class(cl);
1997 if (q->tx_borrowed == cl)
1998 q->tx_borrowed = q->tx_class;
1999 if (q->tx_class == cl) {
2001 q->tx_borrowed = NULL;
2003 #ifdef CONFIG_NET_CLS_POLICE
2004 if (q->rx_class == cl)
2008 cbq_unlink_class(cl);
2009 cbq_adjust_levels(cl->tparent);
2011 cbq_sync_defmap(cl);
2014 sch_tree_unlock(sch);
2016 if (--cl->refcnt == 0)
2017 cbq_destroy_class(sch, cl);
2022 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2024 struct cbq_sched_data *q = qdisc_priv(sch);
2025 struct cbq_class *cl = (struct cbq_class *)arg;
2030 return &cl->filter_list;
2033 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2036 struct cbq_sched_data *q = qdisc_priv(sch);
2037 struct cbq_class *p = (struct cbq_class*)parent;
2038 struct cbq_class *cl = cbq_class_lookup(q, classid);
2041 if (p && p->level <= cl->level)
2044 return (unsigned long)cl;
2049 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2051 struct cbq_class *cl = (struct cbq_class*)arg;
2056 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2058 struct cbq_sched_data *q = qdisc_priv(sch);
2064 for (h = 0; h < 16; h++) {
2065 struct cbq_class *cl;
2067 for (cl = q->classes[h]; cl; cl = cl->next) {
2068 if (arg->count < arg->skip) {
2072 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2081 static struct Qdisc_class_ops cbq_class_ops = {
2084 .qlen_notify = cbq_qlen_notify,
2087 .change = cbq_change_class,
2088 .delete = cbq_delete,
2090 .tcf_chain = cbq_find_tcf,
2091 .bind_tcf = cbq_bind_filter,
2092 .unbind_tcf = cbq_unbind_filter,
2093 .dump = cbq_dump_class,
2094 .dump_stats = cbq_dump_class_stats,
2097 static struct Qdisc_ops cbq_qdisc_ops = {
2099 .cl_ops = &cbq_class_ops,
2101 .priv_size = sizeof(struct cbq_sched_data),
2102 .enqueue = cbq_enqueue,
2103 .dequeue = cbq_dequeue,
2104 .requeue = cbq_requeue,
2108 .destroy = cbq_destroy,
2111 .dump_stats = cbq_dump_stats,
2112 .owner = THIS_MODULE,
2115 static int __init cbq_module_init(void)
2117 return register_qdisc(&cbq_qdisc_ops);
2119 static void __exit cbq_module_exit(void)
2121 unregister_qdisc(&cbq_qdisc_ops);
2123 module_init(cbq_module_init)
2124 module_exit(cbq_module_exit)
2125 MODULE_LICENSE("GPL");