1 // SPDX-License-Identifier: GPL-2.0-or-later 1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 2 /* 3 * net/sched/sch_htb.c Hierarchical token buc 3 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version 4 * 4 * 5 * Authors: Martin Devera, <devik@cdi.cz> 5 * Authors: Martin Devera, <devik@cdi.cz> 6 * 6 * 7 * Credits (in time order) for older HTB versi 7 * Credits (in time order) for older HTB versions: 8 * Stef Coene <stef.coene@docum.o 8 * Stef Coene <stef.coene@docum.org> 9 * HTB support at LARTC m 9 * HTB support at LARTC mailing list 10 * Ondrej Kraus, <krauso@barr.cz> 10 * Ondrej Kraus, <krauso@barr.cz> 11 * found missing INIT_QDI 11 * found missing INIT_QDISC(htb) 12 * Vladimir Smelhaus, Aamer Akhte 12 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert 13 * helped a lot to locate 13 * helped a lot to locate nasty class stall bug 14 * Andi Kleen, Jamal Hadi, Bert H 14 * Andi Kleen, Jamal Hadi, Bert Hubert 15 * code review and helpfu 15 * code review and helpful comments on shaping 16 * Tomasz Wrona, <tw@eter.tym.pl> 16 * Tomasz Wrona, <tw@eter.tym.pl> 17 * created test case so t 17 * created test case so that I was able to fix nasty bug 18 * Wilfried Weissmann 18 * Wilfried Weissmann 19 * spotted bug in dequeue 19 * spotted bug in dequeue code and helped with fix 20 * Jiri Fojtasek 20 * Jiri Fojtasek 21 * fixed requeue routine 21 * fixed requeue routine 22 * and many others. thanks. 22 * and many others. thanks. 23 */ 23 */ 24 #include <linux/module.h> 24 #include <linux/module.h> 25 #include <linux/moduleparam.h> 25 #include <linux/moduleparam.h> 26 #include <linux/types.h> 26 #include <linux/types.h> 27 #include <linux/kernel.h> 27 #include <linux/kernel.h> 28 #include <linux/string.h> 28 #include <linux/string.h> 29 #include <linux/errno.h> 29 #include <linux/errno.h> 30 #include <linux/skbuff.h> 30 #include <linux/skbuff.h> 31 #include <linux/list.h> 31 #include <linux/list.h> 32 #include <linux/compiler.h> 32 #include <linux/compiler.h> 33 #include <linux/rbtree.h> 33 #include <linux/rbtree.h> 34 #include <linux/workqueue.h> 34 #include <linux/workqueue.h> 35 #include <linux/slab.h> 35 #include <linux/slab.h> 36 #include <net/netlink.h> 36 #include <net/netlink.h> 37 #include <net/sch_generic.h> 37 #include <net/sch_generic.h> 38 #include <net/pkt_sched.h> 38 #include <net/pkt_sched.h> 39 #include <net/pkt_cls.h> 39 #include <net/pkt_cls.h> 40 40 41 /* HTB algorithm. 41 /* HTB algorithm. 42 Author: devik@cdi.cz 42 Author: devik@cdi.cz 43 ========================================== 43 ======================================================================== 44 HTB is like TBF with multiple classes. It 44 HTB is like TBF with multiple classes. It is also similar to CBQ because 45 it allows to assign priority to each class 45 it allows to assign priority to each class in hierarchy. 46 In fact it is another implementation of Fl 46 In fact it is another implementation of Floyd's formal sharing. 47 47 48 Levels: 48 Levels: 49 Each class is assigned level. Leaf has ALW 49 Each class is assigned level. Leaf has ALWAYS level 0 and root 50 classes have level TC_HTB_MAXDEPTH-1. Inte 50 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level 51 one less than their parent. 51 one less than their parent. 52 */ 52 */ 53 53 54 static int htb_hysteresis __read_mostly = 0; / 54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */ 55 #define HTB_VER 0x30011 /* major must !! 55 #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ 56 56 57 #if HTB_VER >> 16 != TC_HTB_PROTOVER 57 #if HTB_VER >> 16 != TC_HTB_PROTOVER 58 #error "Mismatched sch_htb.c and pkt_sch.h" 58 #error "Mismatched sch_htb.c and pkt_sch.h" 59 #endif 59 #endif 60 60 61 /* Module parameter and sysfs export */ 61 /* Module parameter and sysfs export */ 62 module_param (htb_hysteresis, int, 0640); 62 module_param (htb_hysteresis, int, 0640); 63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis m 63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate"); 64 64 65 static int htb_rate_est = 0; /* htb classes ha 65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */ 66 module_param(htb_rate_est, int, 0640); 66 module_param(htb_rate_est, int, 0640); 67 MODULE_PARM_DESC(htb_rate_est, "setup a defaul 67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes"); 68 68 69 /* used internaly to keep status of single cla 69 /* used internaly to keep status of single class */ 70 enum htb_cmode { 70 enum htb_cmode { 71 HTB_CANT_SEND, /* class can't 71 HTB_CANT_SEND, /* class can't send and can't borrow */ 72 HTB_MAY_BORROW, /* class can't 72 HTB_MAY_BORROW, /* class can't send but may borrow */ 73 HTB_CAN_SEND /* class can s 73 HTB_CAN_SEND /* class can send */ 74 }; 74 }; 75 75 76 struct htb_prio { 76 struct htb_prio { 77 union { 77 union { 78 struct rb_root row; 78 struct rb_root row; 79 struct rb_root feed; 79 struct rb_root feed; 80 }; 80 }; 81 struct rb_node *ptr; 81 struct rb_node *ptr; 82 /* When class changes from state 1->2 82 /* When class changes from state 1->2 and disconnects from 83 * parent's feed then we lost ptr valu 83 * parent's feed then we lost ptr value and start from the 84 * first child again. Here we store cl 84 * first child again. Here we store classid of the 85 * last valid ptr (used when ptr is NU 85 * last valid ptr (used when ptr is NULL). 86 */ 86 */ 87 u32 last_ptr_id; 87 u32 last_ptr_id; 88 }; 88 }; 89 89 90 /* interior & leaf nodes; props specific to le 90 /* interior & leaf nodes; props specific to leaves are marked L: 91 * To reduce false sharing, place mostly read 91 * To reduce false sharing, place mostly read fields at beginning, 92 * and mostly written ones at the end. 92 * and mostly written ones at the end. 93 */ 93 */ 94 struct htb_class { 94 struct htb_class { 95 struct Qdisc_class_common common; 95 struct Qdisc_class_common common; 96 struct psched_ratecfg rate; 96 struct psched_ratecfg rate; 97 struct psched_ratecfg ceil; 97 struct psched_ratecfg ceil; 98 s64 buffer, cbuffe 98 s64 buffer, cbuffer;/* token bucket depth/rate */ 99 s64 mbuffer; 99 s64 mbuffer; /* max wait time */ 100 u32 prio; 100 u32 prio; /* these two are used only by leaves... */ 101 int quantum; 101 int quantum; /* but stored for parent-to-leaf return */ 102 102 103 struct tcf_proto __rcu *filter_list; 103 struct tcf_proto __rcu *filter_list; /* class attached filters */ 104 struct tcf_block *block; 104 struct tcf_block *block; >> 105 int filter_cnt; 105 106 106 int level; 107 int level; /* our level (see above) */ 107 unsigned int children; 108 unsigned int children; 108 struct htb_class *parent; 109 struct htb_class *parent; /* parent class */ 109 110 110 struct net_rate_estimator __rcu *rate_ 111 struct net_rate_estimator __rcu *rate_est; 111 112 112 /* 113 /* 113 * Written often fields 114 * Written often fields 114 */ 115 */ 115 struct gnet_stats_basic_sync bstats; !! 116 struct gnet_stats_basic_packed bstats; 116 struct gnet_stats_basic_sync bstats_bi !! 117 struct gnet_stats_basic_packed bstats_bias; 117 struct tc_htb_xstats xstats; /* our 118 struct tc_htb_xstats xstats; /* our special stats */ 118 119 119 /* token bucket parameters */ 120 /* token bucket parameters */ 120 s64 tokens, ctoken 121 s64 tokens, ctokens;/* current number of tokens */ 121 s64 t_c; 122 s64 t_c; /* checkpoint time */ 122 123 123 union { 124 union { 124 struct htb_class_leaf { 125 struct htb_class_leaf { 125 int defici 126 int deficit[TC_HTB_MAXDEPTH]; 126 struct Qdisc *q; 127 struct Qdisc *q; 127 struct netdev_queue *o 128 struct netdev_queue *offload_queue; 128 } leaf; 129 } leaf; 129 struct htb_class_inner { 130 struct htb_class_inner { 130 struct htb_prio clprio 131 struct htb_prio clprio[TC_HTB_NUMPRIO]; 131 } inner; 132 } inner; 132 }; 133 }; 133 s64 pq_key; 134 s64 pq_key; 134 135 135 int prio_activity; 136 int prio_activity; /* for which prios are we active */ 136 enum htb_cmode cmode; 137 enum htb_cmode cmode; /* current mode of the class */ 137 struct rb_node pq_node; 138 struct rb_node pq_node; /* node for event queue */ 138 struct rb_node node[TC_HTB_NU 139 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ 139 140 140 unsigned int drops ____cacheline_align 141 unsigned int drops ____cacheline_aligned_in_smp; 141 unsigned int overlimits; 142 unsigned int overlimits; 142 }; 143 }; 143 144 144 struct htb_level { 145 struct htb_level { 145 struct rb_root wait_pq; 146 struct rb_root wait_pq; 146 struct htb_prio hprio[TC_HTB_NUMPRIO]; 147 struct htb_prio hprio[TC_HTB_NUMPRIO]; 147 }; 148 }; 148 149 149 struct htb_sched { 150 struct htb_sched { 150 struct Qdisc_class_hash clhash; 151 struct Qdisc_class_hash clhash; 151 int defcls; 152 int defcls; /* class where unclassified flows go to */ 152 int rate2quantum; 153 int rate2quantum; /* quant = rate / rate2quantum */ 153 154 154 /* filters for qdisc itself */ 155 /* filters for qdisc itself */ 155 struct tcf_proto __rcu *filter_list; 156 struct tcf_proto __rcu *filter_list; 156 struct tcf_block *block; 157 struct tcf_block *block; 157 158 158 #define HTB_WARN_TOOMANYEVENTS 0x1 159 #define HTB_WARN_TOOMANYEVENTS 0x1 159 unsigned int warned; /* onl 160 unsigned int warned; /* only one warning */ 160 int direct_qlen; 161 int direct_qlen; 161 struct work_struct work; 162 struct work_struct work; 162 163 163 /* non shaped skbs; let them go direct 164 /* non shaped skbs; let them go directly thru */ 164 struct qdisc_skb_head direct_queue; 165 struct qdisc_skb_head direct_queue; 165 u32 direct_pkts; 166 u32 direct_pkts; 166 u32 overlimits; 167 u32 overlimits; 167 168 168 struct qdisc_watchdog watchdog; 169 struct qdisc_watchdog watchdog; 169 170 170 s64 now; /* cac 171 s64 now; /* cached dequeue time */ 171 172 172 /* time of nearest event per level (ro 173 /* time of nearest event per level (row) */ 173 s64 near_ev_cache[ 174 s64 near_ev_cache[TC_HTB_MAXDEPTH]; 174 175 175 int row_mask[TC_HT 176 int row_mask[TC_HTB_MAXDEPTH]; 176 177 177 struct htb_level hlevel[TC_HTB_ 178 struct htb_level hlevel[TC_HTB_MAXDEPTH]; 178 179 179 struct Qdisc **direct_qdisc 180 struct Qdisc **direct_qdiscs; 180 unsigned int num_direct_qdi 181 unsigned int num_direct_qdiscs; 181 182 182 bool offload; 183 bool offload; 183 }; 184 }; 184 185 185 /* find class in global hash table using given 186 /* find class in global hash table using given handle */ 186 static inline struct htb_class *htb_find(u32 h 187 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) 187 { 188 { 188 struct htb_sched *q = qdisc_priv(sch); 189 struct htb_sched *q = qdisc_priv(sch); 189 struct Qdisc_class_common *clc; 190 struct Qdisc_class_common *clc; 190 191 191 clc = qdisc_class_find(&q->clhash, han 192 clc = qdisc_class_find(&q->clhash, handle); 192 if (clc == NULL) 193 if (clc == NULL) 193 return NULL; 194 return NULL; 194 return container_of(clc, struct htb_cl 195 return container_of(clc, struct htb_class, common); 195 } 196 } 196 197 197 static unsigned long htb_search(struct Qdisc * 198 static unsigned long htb_search(struct Qdisc *sch, u32 handle) 198 { 199 { 199 return (unsigned long)htb_find(handle, 200 return (unsigned long)htb_find(handle, sch); 200 } 201 } 201 << 202 #define HTB_DIRECT ((struct htb_class *)-1L) << 203 << 204 /** 202 /** 205 * htb_classify - classify a packet into class 203 * htb_classify - classify a packet into class 206 * @skb: the socket buffer << 207 * @sch: the active queue discipline << 208 * @qerr: pointer for returned status code << 209 * 204 * 210 * It returns NULL if the packet should be dro 205 * It returns NULL if the packet should be dropped or -1 if the packet 211 * should be passed directly thru. In all othe 206 * should be passed directly thru. In all other cases leaf class is returned. 212 * We allow direct class selection by classid 207 * We allow direct class selection by classid in priority. The we examine 213 * filters in qdisc and in inner nodes (if hig 208 * filters in qdisc and in inner nodes (if higher filter points to the inner 214 * node). If we end up with classid MAJOR:0 we 209 * node). If we end up with classid MAJOR:0 we enqueue the skb into special 215 * internal fifo (direct). These packets then 210 * internal fifo (direct). These packets then go directly thru. If we still 216 * have no valid leaf we try to use MAJOR:defa 211 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful 217 * then finish and return direct queue. 212 * then finish and return direct queue. 218 */ 213 */ >> 214 #define HTB_DIRECT ((struct htb_class *)-1L) >> 215 219 static struct htb_class *htb_classify(struct s 216 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, 220 int *qer 217 int *qerr) 221 { 218 { 222 struct htb_sched *q = qdisc_priv(sch); 219 struct htb_sched *q = qdisc_priv(sch); 223 struct htb_class *cl; 220 struct htb_class *cl; 224 struct tcf_result res; 221 struct tcf_result res; 225 struct tcf_proto *tcf; 222 struct tcf_proto *tcf; 226 int result; 223 int result; 227 224 228 /* allow to select class by setting sk 225 /* allow to select class by setting skb->priority to valid classid; 229 * note that nfmark can be used too by 226 * note that nfmark can be used too by attaching filter fw with no 230 * rules in it 227 * rules in it 231 */ 228 */ 232 if (skb->priority == sch->handle) 229 if (skb->priority == sch->handle) 233 return HTB_DIRECT; /* X:0 230 return HTB_DIRECT; /* X:0 (direct flow) selected */ 234 cl = htb_find(skb->priority, sch); 231 cl = htb_find(skb->priority, sch); 235 if (cl) { 232 if (cl) { 236 if (cl->level == 0) 233 if (cl->level == 0) 237 return cl; 234 return cl; 238 /* Start with inner filter cha 235 /* Start with inner filter chain if a non-leaf class is selected */ 239 tcf = rcu_dereference_bh(cl->f 236 tcf = rcu_dereference_bh(cl->filter_list); 240 } else { 237 } else { 241 tcf = rcu_dereference_bh(q->fi 238 tcf = rcu_dereference_bh(q->filter_list); 242 } 239 } 243 240 244 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_ 241 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 245 while (tcf && (result = tcf_classify(s !! 242 while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) { 246 #ifdef CONFIG_NET_CLS_ACT 243 #ifdef CONFIG_NET_CLS_ACT 247 switch (result) { 244 switch (result) { 248 case TC_ACT_QUEUED: 245 case TC_ACT_QUEUED: 249 case TC_ACT_STOLEN: 246 case TC_ACT_STOLEN: 250 case TC_ACT_TRAP: 247 case TC_ACT_TRAP: 251 *qerr = NET_XMIT_SUCCE 248 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 252 fallthrough; 249 fallthrough; 253 case TC_ACT_SHOT: 250 case TC_ACT_SHOT: 254 return NULL; 251 return NULL; 255 } 252 } 256 #endif 253 #endif 257 cl = (void *)res.class; 254 cl = (void *)res.class; 258 if (!cl) { 255 if (!cl) { 259 if (res.classid == sch 256 if (res.classid == sch->handle) 260 return HTB_DIR 257 return HTB_DIRECT; /* X:0 (direct flow) */ 261 cl = htb_find(res.clas 258 cl = htb_find(res.classid, sch); 262 if (!cl) 259 if (!cl) 263 break; /* fil 260 break; /* filter selected invalid classid */ 264 } 261 } 265 if (!cl->level) 262 if (!cl->level) 266 return cl; /* we 263 return cl; /* we hit leaf; return it */ 267 264 268 /* we have got inner class; ap 265 /* we have got inner class; apply inner filter chain */ 269 tcf = rcu_dereference_bh(cl->f 266 tcf = rcu_dereference_bh(cl->filter_list); 270 } 267 } 271 /* classification failed; try to use d 268 /* classification failed; try to use default class */ 272 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch-> 269 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch); 273 if (!cl || cl->level) 270 if (!cl || cl->level) 274 return HTB_DIRECT; /* bad 271 return HTB_DIRECT; /* bad default .. this is safe bet */ 275 return cl; 272 return cl; 276 } 273 } 277 274 278 /** 275 /** 279 * htb_add_to_id_tree - adds class to the roun 276 * htb_add_to_id_tree - adds class to the round robin list 280 * @root: the root of the tree << 281 * @cl: the class to add << 282 * @prio: the give prio in class << 283 * 277 * 284 * Routine adds class to the list (actually tr 278 * Routine adds class to the list (actually tree) sorted by classid. 285 * Make sure that class is not already on such 279 * Make sure that class is not already on such list for given prio. 286 */ 280 */ 287 static void htb_add_to_id_tree(struct rb_root 281 static void htb_add_to_id_tree(struct rb_root *root, 288 struct htb_clas 282 struct htb_class *cl, int prio) 289 { 283 { 290 struct rb_node **p = &root->rb_node, * 284 struct rb_node **p = &root->rb_node, *parent = NULL; 291 285 292 while (*p) { 286 while (*p) { 293 struct htb_class *c; 287 struct htb_class *c; 294 parent = *p; 288 parent = *p; 295 c = rb_entry(parent, struct ht 289 c = rb_entry(parent, struct htb_class, node[prio]); 296 290 297 if (cl->common.classid > c->co 291 if (cl->common.classid > c->common.classid) 298 p = &parent->rb_right; 292 p = &parent->rb_right; 299 else 293 else 300 p = &parent->rb_left; 294 p = &parent->rb_left; 301 } 295 } 302 rb_link_node(&cl->node[prio], parent, 296 rb_link_node(&cl->node[prio], parent, p); 303 rb_insert_color(&cl->node[prio], root) 297 rb_insert_color(&cl->node[prio], root); 304 } 298 } 305 299 306 /** 300 /** 307 * htb_add_to_wait_tree - adds class to the ev 301 * htb_add_to_wait_tree - adds class to the event queue with delay 308 * @q: the priority event queue << 309 * @cl: the class to add << 310 * @delay: delay in microseconds << 311 * 302 * 312 * The class is added to priority event queue 303 * The class is added to priority event queue to indicate that class will 313 * change its mode in cl->pq_key microseconds. 304 * change its mode in cl->pq_key microseconds. Make sure that class is not 314 * already in the queue. 305 * already in the queue. 315 */ 306 */ 316 static void htb_add_to_wait_tree(struct htb_sc 307 static void htb_add_to_wait_tree(struct htb_sched *q, 317 struct htb_cl 308 struct htb_class *cl, s64 delay) 318 { 309 { 319 struct rb_node **p = &q->hlevel[cl->le 310 struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL; 320 311 321 cl->pq_key = q->now + delay; 312 cl->pq_key = q->now + delay; 322 if (cl->pq_key == q->now) 313 if (cl->pq_key == q->now) 323 cl->pq_key++; 314 cl->pq_key++; 324 315 325 /* update the nearest event cache */ 316 /* update the nearest event cache */ 326 if (q->near_ev_cache[cl->level] > cl-> 317 if (q->near_ev_cache[cl->level] > cl->pq_key) 327 q->near_ev_cache[cl->level] = 318 q->near_ev_cache[cl->level] = cl->pq_key; 328 319 329 while (*p) { 320 while (*p) { 330 struct htb_class *c; 321 struct htb_class *c; 331 parent = *p; 322 parent = *p; 332 c = rb_entry(parent, struct ht 323 c = rb_entry(parent, struct htb_class, pq_node); 333 if (cl->pq_key >= c->pq_key) 324 if (cl->pq_key >= c->pq_key) 334 p = &parent->rb_right; 325 p = &parent->rb_right; 335 else 326 else 336 p = &parent->rb_left; 327 p = &parent->rb_left; 337 } 328 } 338 rb_link_node(&cl->pq_node, parent, p); 329 rb_link_node(&cl->pq_node, parent, p); 339 rb_insert_color(&cl->pq_node, &q->hlev 330 rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq); 340 } 331 } 341 332 342 /** 333 /** 343 * htb_next_rb_node - finds next node in binar 334 * htb_next_rb_node - finds next node in binary tree 344 * @n: the current node in binary tree << 345 * 335 * 346 * When we are past last key we return NULL. 336 * When we are past last key we return NULL. 347 * Average complexity is 2 steps per call. 337 * Average complexity is 2 steps per call. 348 */ 338 */ 349 static inline void htb_next_rb_node(struct rb_ 339 static inline void htb_next_rb_node(struct rb_node **n) 350 { 340 { 351 *n = rb_next(*n); 341 *n = rb_next(*n); 352 } 342 } 353 343 354 /** 344 /** 355 * htb_add_class_to_row - add class to its row 345 * htb_add_class_to_row - add class to its row 356 * @q: the priority event queue << 357 * @cl: the class to add << 358 * @mask: the given priorities in class in bit << 359 * 346 * 360 * The class is added to row at priorities mar 347 * The class is added to row at priorities marked in mask. 361 * It does nothing if mask == 0. 348 * It does nothing if mask == 0. 362 */ 349 */ 363 static inline void htb_add_class_to_row(struct 350 static inline void htb_add_class_to_row(struct htb_sched *q, 364 struct 351 struct htb_class *cl, int mask) 365 { 352 { 366 q->row_mask[cl->level] |= mask; 353 q->row_mask[cl->level] |= mask; 367 while (mask) { 354 while (mask) { 368 int prio = ffz(~mask); 355 int prio = ffz(~mask); 369 mask &= ~(1 << prio); 356 mask &= ~(1 << prio); 370 htb_add_to_id_tree(&q->hlevel[ 357 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio); 371 } 358 } 372 } 359 } 373 360 374 /* If this triggers, it is a bug in this code, 361 /* If this triggers, it is a bug in this code, but it need not be fatal */ 375 static void htb_safe_rb_erase(struct rb_node * 362 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root) 376 { 363 { 377 if (RB_EMPTY_NODE(rb)) { 364 if (RB_EMPTY_NODE(rb)) { 378 WARN_ON(1); 365 WARN_ON(1); 379 } else { 366 } else { 380 rb_erase(rb, root); 367 rb_erase(rb, root); 381 RB_CLEAR_NODE(rb); 368 RB_CLEAR_NODE(rb); 382 } 369 } 383 } 370 } 384 371 385 372 386 /** 373 /** 387 * htb_remove_class_from_row - removes class f 374 * htb_remove_class_from_row - removes class from its row 388 * @q: the priority event queue << 389 * @cl: the class to add << 390 * @mask: the given priorities in class in bit << 391 * 375 * 392 * The class is removed from row at priorities 376 * The class is removed from row at priorities marked in mask. 393 * It does nothing if mask == 0. 377 * It does nothing if mask == 0. 394 */ 378 */ 395 static inline void htb_remove_class_from_row(s 379 static inline void htb_remove_class_from_row(struct htb_sched *q, 396 380 struct htb_class *cl, int mask) 397 { 381 { 398 int m = 0; 382 int m = 0; 399 struct htb_level *hlevel = &q->hlevel[ 383 struct htb_level *hlevel = &q->hlevel[cl->level]; 400 384 401 while (mask) { 385 while (mask) { 402 int prio = ffz(~mask); 386 int prio = ffz(~mask); 403 struct htb_prio *hprio = &hlev 387 struct htb_prio *hprio = &hlevel->hprio[prio]; 404 388 405 mask &= ~(1 << prio); 389 mask &= ~(1 << prio); 406 if (hprio->ptr == cl->node + p 390 if (hprio->ptr == cl->node + prio) 407 htb_next_rb_node(&hpri 391 htb_next_rb_node(&hprio->ptr); 408 392 409 htb_safe_rb_erase(cl->node + p 393 htb_safe_rb_erase(cl->node + prio, &hprio->row); 410 if (!hprio->row.rb_node) 394 if (!hprio->row.rb_node) 411 m |= 1 << prio; 395 m |= 1 << prio; 412 } 396 } 413 q->row_mask[cl->level] &= ~m; 397 q->row_mask[cl->level] &= ~m; 414 } 398 } 415 399 416 /** 400 /** 417 * htb_activate_prios - creates active classe' 401 * htb_activate_prios - creates active classe's feed chain 418 * @q: the priority event queue << 419 * @cl: the class to activate << 420 * 402 * 421 * The class is connected to ancestors and/or 403 * The class is connected to ancestors and/or appropriate rows 422 * for priorities it is participating on. cl-> 404 * for priorities it is participating on. cl->cmode must be new 423 * (activated) mode. It does nothing if cl->pr 405 * (activated) mode. It does nothing if cl->prio_activity == 0. 424 */ 406 */ 425 static void htb_activate_prios(struct htb_sche 407 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) 426 { 408 { 427 struct htb_class *p = cl->parent; 409 struct htb_class *p = cl->parent; 428 long m, mask = cl->prio_activity; 410 long m, mask = cl->prio_activity; 429 411 430 while (cl->cmode == HTB_MAY_BORROW && 412 while (cl->cmode == HTB_MAY_BORROW && p && mask) { 431 m = mask; 413 m = mask; 432 while (m) { 414 while (m) { 433 unsigned int prio = ff !! 415 int prio = ffz(~m); 434 << 435 if (WARN_ON_ONCE(prio << 436 break; << 437 m &= ~(1 << prio); 416 m &= ~(1 << prio); 438 417 439 if (p->inner.clprio[pr 418 if (p->inner.clprio[prio].feed.rb_node) 440 /* parent alre 419 /* parent already has its feed in use so that 441 * reset bit i 420 * reset bit in mask as parent is already ok 442 */ 421 */ 443 mask &= ~(1 << 422 mask &= ~(1 << prio); 444 423 445 htb_add_to_id_tree(&p- 424 htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio); 446 } 425 } 447 p->prio_activity |= mask; 426 p->prio_activity |= mask; 448 cl = p; 427 cl = p; 449 p = cl->parent; 428 p = cl->parent; 450 429 451 } 430 } 452 if (cl->cmode == HTB_CAN_SEND && mask) 431 if (cl->cmode == HTB_CAN_SEND && mask) 453 htb_add_class_to_row(q, cl, ma 432 htb_add_class_to_row(q, cl, mask); 454 } 433 } 455 434 456 /** 435 /** 457 * htb_deactivate_prios - remove class from fe 436 * htb_deactivate_prios - remove class from feed chain 458 * @q: the priority event queue << 459 * @cl: the class to deactivate << 460 * 437 * 461 * cl->cmode must represent old mode (before d 438 * cl->cmode must represent old mode (before deactivation). It does 462 * nothing if cl->prio_activity == 0. Class is 439 * nothing if cl->prio_activity == 0. Class is removed from all feed 463 * chains and rows. 440 * chains and rows. 464 */ 441 */ 465 static void htb_deactivate_prios(struct htb_sc 442 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) 466 { 443 { 467 struct htb_class *p = cl->parent; 444 struct htb_class *p = cl->parent; 468 long m, mask = cl->prio_activity; 445 long m, mask = cl->prio_activity; 469 446 470 while (cl->cmode == HTB_MAY_BORROW && 447 while (cl->cmode == HTB_MAY_BORROW && p && mask) { 471 m = mask; 448 m = mask; 472 mask = 0; 449 mask = 0; 473 while (m) { 450 while (m) { 474 int prio = ffz(~m); 451 int prio = ffz(~m); 475 m &= ~(1 << prio); 452 m &= ~(1 << prio); 476 453 477 if (p->inner.clprio[pr 454 if (p->inner.clprio[prio].ptr == cl->node + prio) { 478 /* we are remo 455 /* we are removing child which is pointed to from 479 * parent feed 456 * parent feed - forget the pointer but remember 480 * classid 457 * classid 481 */ 458 */ 482 p->inner.clpri 459 p->inner.clprio[prio].last_ptr_id = cl->common.classid; 483 p->inner.clpri 460 p->inner.clprio[prio].ptr = NULL; 484 } 461 } 485 462 486 htb_safe_rb_erase(cl-> 463 htb_safe_rb_erase(cl->node + prio, 487 &p-> 464 &p->inner.clprio[prio].feed); 488 465 489 if (!p->inner.clprio[p 466 if (!p->inner.clprio[prio].feed.rb_node) 490 mask |= 1 << p 467 mask |= 1 << prio; 491 } 468 } 492 469 493 p->prio_activity &= ~mask; 470 p->prio_activity &= ~mask; 494 cl = p; 471 cl = p; 495 p = cl->parent; 472 p = cl->parent; 496 473 497 } 474 } 498 if (cl->cmode == HTB_CAN_SEND && mask) 475 if (cl->cmode == HTB_CAN_SEND && mask) 499 htb_remove_class_from_row(q, c 476 htb_remove_class_from_row(q, cl, mask); 500 } 477 } 501 478 502 static inline s64 htb_lowater(const struct htb 479 static inline s64 htb_lowater(const struct htb_class *cl) 503 { 480 { 504 if (htb_hysteresis) 481 if (htb_hysteresis) 505 return cl->cmode != HTB_CANT_S 482 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; 506 else 483 else 507 return 0; 484 return 0; 508 } 485 } 509 static inline s64 htb_hiwater(const struct htb 486 static inline s64 htb_hiwater(const struct htb_class *cl) 510 { 487 { 511 if (htb_hysteresis) 488 if (htb_hysteresis) 512 return cl->cmode == HTB_CAN_SE 489 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; 513 else 490 else 514 return 0; 491 return 0; 515 } 492 } 516 493 517 494 518 /** 495 /** 519 * htb_class_mode - computes and returns curre 496 * htb_class_mode - computes and returns current class mode 520 * @cl: the target class << 521 * @diff: diff time in microseconds << 522 * 497 * 523 * It computes cl's mode at time cl->t_c+diff 498 * It computes cl's mode at time cl->t_c+diff and returns it. If mode 524 * is not HTB_CAN_SEND then cl->pq_key is upda 499 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference 525 * from now to time when cl will change its st 500 * from now to time when cl will change its state. 526 * Also it is worth to note that class mode do 501 * Also it is worth to note that class mode doesn't change simply 527 * at cl->{c,}tokens == 0 but there can rather 502 * at cl->{c,}tokens == 0 but there can rather be hysteresis of 528 * 0 .. -cl->{c,}buffer range. It is meant to 503 * 0 .. -cl->{c,}buffer range. It is meant to limit number of 529 * mode transitions per time unit. The speed g 504 * mode transitions per time unit. The speed gain is about 1/6. 530 */ 505 */ 531 static inline enum htb_cmode 506 static inline enum htb_cmode 532 htb_class_mode(struct htb_class *cl, s64 *diff 507 htb_class_mode(struct htb_class *cl, s64 *diff) 533 { 508 { 534 s64 toks; 509 s64 toks; 535 510 536 if ((toks = (cl->ctokens + *diff)) < h 511 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) { 537 *diff = -toks; 512 *diff = -toks; 538 return HTB_CANT_SEND; 513 return HTB_CANT_SEND; 539 } 514 } 540 515 541 if ((toks = (cl->tokens + *diff)) >= h 516 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl)) 542 return HTB_CAN_SEND; 517 return HTB_CAN_SEND; 543 518 544 *diff = -toks; 519 *diff = -toks; 545 return HTB_MAY_BORROW; 520 return HTB_MAY_BORROW; 546 } 521 } 547 522 548 /** 523 /** 549 * htb_change_class_mode - changes classe's mo 524 * htb_change_class_mode - changes classe's mode 550 * @q: the priority event queue << 551 * @cl: the target class << 552 * @diff: diff time in microseconds << 553 * 525 * 554 * This should be the only way how to change c 526 * This should be the only way how to change classe's mode under normal 555 * circumstances. Routine will update feed lis !! 527 * cirsumstances. Routine will update feed lists linkage, change mode 556 * and add class to the wait event queue if ap 528 * and add class to the wait event queue if appropriate. New mode should 557 * be different from old one and cl->pq_key ha 529 * be different from old one and cl->pq_key has to be valid if changing 558 * to mode other than HTB_CAN_SEND (see htb_ad 530 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree). 559 */ 531 */ 560 static void 532 static void 561 htb_change_class_mode(struct htb_sched *q, str 533 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) 562 { 534 { 563 enum htb_cmode new_mode = htb_class_mo 535 enum htb_cmode new_mode = htb_class_mode(cl, diff); 564 536 565 if (new_mode == cl->cmode) 537 if (new_mode == cl->cmode) 566 return; 538 return; 567 539 568 if (new_mode == HTB_CANT_SEND) { 540 if (new_mode == HTB_CANT_SEND) { 569 cl->overlimits++; 541 cl->overlimits++; 570 q->overlimits++; 542 q->overlimits++; 571 } 543 } 572 544 573 if (cl->prio_activity) { /* not 545 if (cl->prio_activity) { /* not necessary: speed optimization */ 574 if (cl->cmode != HTB_CANT_SEND 546 if (cl->cmode != HTB_CANT_SEND) 575 htb_deactivate_prios(q 547 htb_deactivate_prios(q, cl); 576 cl->cmode = new_mode; 548 cl->cmode = new_mode; 577 if (new_mode != HTB_CANT_SEND) 549 if (new_mode != HTB_CANT_SEND) 578 htb_activate_prios(q, 550 htb_activate_prios(q, cl); 579 } else 551 } else 580 cl->cmode = new_mode; 552 cl->cmode = new_mode; 581 } 553 } 582 554 583 /** 555 /** 584 * htb_activate - inserts leaf cl into appropr 556 * htb_activate - inserts leaf cl into appropriate active feeds 585 * @q: the priority event queue << 586 * @cl: the target class << 587 * 557 * 588 * Routine learns (new) priority of leaf and a 558 * Routine learns (new) priority of leaf and activates feed chain 589 * for the prio. It can be called on already a 559 * for the prio. It can be called on already active leaf safely. 590 * It also adds leaf into droplist. 560 * It also adds leaf into droplist. 591 */ 561 */ 592 static inline void htb_activate(struct htb_sch 562 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) 593 { 563 { 594 WARN_ON(cl->level || !cl->leaf.q || !c 564 WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen); 595 565 596 if (!cl->prio_activity) { 566 if (!cl->prio_activity) { 597 cl->prio_activity = 1 << cl->p 567 cl->prio_activity = 1 << cl->prio; 598 htb_activate_prios(q, cl); 568 htb_activate_prios(q, cl); 599 } 569 } 600 } 570 } 601 571 602 /** 572 /** 603 * htb_deactivate - remove leaf cl from active 573 * htb_deactivate - remove leaf cl from active feeds 604 * @q: the priority event queue << 605 * @cl: the target class << 606 * 574 * 607 * Make sure that leaf is active. In the other 575 * Make sure that leaf is active. In the other words it can't be called 608 * with non-active leaf. It also removes class 576 * with non-active leaf. It also removes class from the drop list. 609 */ 577 */ 610 static inline void htb_deactivate(struct htb_s 578 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) 611 { 579 { 612 WARN_ON(!cl->prio_activity); 580 WARN_ON(!cl->prio_activity); 613 581 614 htb_deactivate_prios(q, cl); 582 htb_deactivate_prios(q, cl); 615 cl->prio_activity = 0; 583 cl->prio_activity = 0; 616 } 584 } 617 585 618 static int htb_enqueue(struct sk_buff *skb, st 586 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, 619 struct sk_buff **to_fre 587 struct sk_buff **to_free) 620 { 588 { 621 int ret; 589 int ret; 622 unsigned int len = qdisc_pkt_len(skb); 590 unsigned int len = qdisc_pkt_len(skb); 623 struct htb_sched *q = qdisc_priv(sch); 591 struct htb_sched *q = qdisc_priv(sch); 624 struct htb_class *cl = htb_classify(sk 592 struct htb_class *cl = htb_classify(skb, sch, &ret); 625 593 626 if (cl == HTB_DIRECT) { 594 if (cl == HTB_DIRECT) { 627 /* enqueue to helper queue */ 595 /* enqueue to helper queue */ 628 if (q->direct_queue.qlen < q-> 596 if (q->direct_queue.qlen < q->direct_qlen) { 629 __qdisc_enqueue_tail(s 597 __qdisc_enqueue_tail(skb, &q->direct_queue); 630 q->direct_pkts++; 598 q->direct_pkts++; 631 } else { 599 } else { 632 return qdisc_drop(skb, 600 return qdisc_drop(skb, sch, to_free); 633 } 601 } 634 #ifdef CONFIG_NET_CLS_ACT 602 #ifdef CONFIG_NET_CLS_ACT 635 } else if (!cl) { 603 } else if (!cl) { 636 if (ret & __NET_XMIT_BYPASS) 604 if (ret & __NET_XMIT_BYPASS) 637 qdisc_qstats_drop(sch) 605 qdisc_qstats_drop(sch); 638 __qdisc_drop(skb, to_free); 606 __qdisc_drop(skb, to_free); 639 return ret; 607 return ret; 640 #endif 608 #endif 641 } else if ((ret = qdisc_enqueue(skb, c 609 } else if ((ret = qdisc_enqueue(skb, cl->leaf.q, 642 to_fre 610 to_free)) != NET_XMIT_SUCCESS) { 643 if (net_xmit_drop_count(ret)) 611 if (net_xmit_drop_count(ret)) { 644 qdisc_qstats_drop(sch) 612 qdisc_qstats_drop(sch); 645 cl->drops++; 613 cl->drops++; 646 } 614 } 647 return ret; 615 return ret; 648 } else { 616 } else { 649 htb_activate(q, cl); 617 htb_activate(q, cl); 650 } 618 } 651 619 652 sch->qstats.backlog += len; 620 sch->qstats.backlog += len; 653 sch->q.qlen++; 621 sch->q.qlen++; 654 return NET_XMIT_SUCCESS; 622 return NET_XMIT_SUCCESS; 655 } 623 } 656 624 657 static inline void htb_accnt_tokens(struct htb 625 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff) 658 { 626 { 659 s64 toks = diff + cl->tokens; 627 s64 toks = diff + cl->tokens; 660 628 661 if (toks > cl->buffer) 629 if (toks > cl->buffer) 662 toks = cl->buffer; 630 toks = cl->buffer; 663 toks -= (s64) psched_l2t_ns(&cl->rate, 631 toks -= (s64) psched_l2t_ns(&cl->rate, bytes); 664 if (toks <= -cl->mbuffer) 632 if (toks <= -cl->mbuffer) 665 toks = 1 - cl->mbuffer; 633 toks = 1 - cl->mbuffer; 666 634 667 cl->tokens = toks; 635 cl->tokens = toks; 668 } 636 } 669 637 670 static inline void htb_accnt_ctokens(struct ht 638 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff) 671 { 639 { 672 s64 toks = diff + cl->ctokens; 640 s64 toks = diff + cl->ctokens; 673 641 674 if (toks > cl->cbuffer) 642 if (toks > cl->cbuffer) 675 toks = cl->cbuffer; 643 toks = cl->cbuffer; 676 toks -= (s64) psched_l2t_ns(&cl->ceil, 644 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes); 677 if (toks <= -cl->mbuffer) 645 if (toks <= -cl->mbuffer) 678 toks = 1 - cl->mbuffer; 646 toks = 1 - cl->mbuffer; 679 647 680 cl->ctokens = toks; 648 cl->ctokens = toks; 681 } 649 } 682 650 683 /** 651 /** 684 * htb_charge_class - charges amount "bytes" t 652 * htb_charge_class - charges amount "bytes" to leaf and ancestors 685 * @q: the priority event queue << 686 * @cl: the class to start iterate << 687 * @level: the minimum level to account << 688 * @skb: the socket buffer << 689 * 653 * 690 * Routine assumes that packet "bytes" long wa 654 * Routine assumes that packet "bytes" long was dequeued from leaf cl 691 * borrowing from "level". It accounts bytes t 655 * borrowing from "level". It accounts bytes to ceil leaky bucket for 692 * leaf and all ancestors and to rate bucket f 656 * leaf and all ancestors and to rate bucket for ancestors at levels 693 * "level" and higher. It also handles possibl 657 * "level" and higher. It also handles possible change of mode resulting 694 * from the update. Note that mode can also in 658 * from the update. Note that mode can also increase here (MAY_BORROW to 695 * CAN_SEND) because we can use more precise c 659 * CAN_SEND) because we can use more precise clock that event queue here. 696 * In such case we remove class from event que 660 * In such case we remove class from event queue first. 697 */ 661 */ 698 static void htb_charge_class(struct htb_sched 662 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, 699 int level, struct 663 int level, struct sk_buff *skb) 700 { 664 { 701 int bytes = qdisc_pkt_len(skb); 665 int bytes = qdisc_pkt_len(skb); 702 enum htb_cmode old_mode; 666 enum htb_cmode old_mode; 703 s64 diff; 667 s64 diff; 704 668 705 while (cl) { 669 while (cl) { 706 diff = min_t(s64, q->now - cl- 670 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); 707 if (cl->level >= level) { 671 if (cl->level >= level) { 708 if (cl->level == level 672 if (cl->level == level) 709 cl->xstats.len 673 cl->xstats.lends++; 710 htb_accnt_tokens(cl, b 674 htb_accnt_tokens(cl, bytes, diff); 711 } else { 675 } else { 712 cl->xstats.borrows++; 676 cl->xstats.borrows++; 713 cl->tokens += diff; 677 cl->tokens += diff; /* we moved t_c; update tokens */ 714 } 678 } 715 htb_accnt_ctokens(cl, bytes, d 679 htb_accnt_ctokens(cl, bytes, diff); 716 cl->t_c = q->now; 680 cl->t_c = q->now; 717 681 718 old_mode = cl->cmode; 682 old_mode = cl->cmode; 719 diff = 0; 683 diff = 0; 720 htb_change_class_mode(q, cl, & 684 htb_change_class_mode(q, cl, &diff); 721 if (old_mode != cl->cmode) { 685 if (old_mode != cl->cmode) { 722 if (old_mode != HTB_CA 686 if (old_mode != HTB_CAN_SEND) 723 htb_safe_rb_er 687 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq); 724 if (cl->cmode != HTB_C 688 if (cl->cmode != HTB_CAN_SEND) 725 htb_add_to_wai 689 htb_add_to_wait_tree(q, cl, diff); 726 } 690 } 727 691 728 /* update basic stats except f 692 /* update basic stats except for leaves which are already updated */ 729 if (cl->level) 693 if (cl->level) 730 bstats_update(&cl->bst 694 bstats_update(&cl->bstats, skb); 731 695 732 cl = cl->parent; 696 cl = cl->parent; 733 } 697 } 734 } 698 } 735 699 736 /** 700 /** 737 * htb_do_events - make mode changes to classe 701 * htb_do_events - make mode changes to classes at the level 738 * @q: the priority event queue << 739 * @level: which wait_pq in 'q->hlevel' << 740 * @start: start jiffies << 741 * 702 * 742 * Scans event queue for pending events and ap 703 * Scans event queue for pending events and applies them. Returns time of 743 * next pending event (0 for no event in pq, q 704 * next pending event (0 for no event in pq, q->now for too many events). 744 * Note: Applied are events whose have cl->pq_ 705 * Note: Applied are events whose have cl->pq_key <= q->now. 745 */ 706 */ 746 static s64 htb_do_events(struct htb_sched *q, 707 static s64 htb_do_events(struct htb_sched *q, const int level, 747 unsigned long start) 708 unsigned long start) 748 { 709 { 749 /* don't run for longer than 2 jiffies 710 /* don't run for longer than 2 jiffies; 2 is used instead of 750 * 1 to simplify things when jiffy is 711 * 1 to simplify things when jiffy is going to be incremented 751 * too soon 712 * too soon 752 */ 713 */ 753 unsigned long stop_at = start + 2; 714 unsigned long stop_at = start + 2; 754 struct rb_root *wait_pq = &q->hlevel[l 715 struct rb_root *wait_pq = &q->hlevel[level].wait_pq; 755 716 756 while (time_before(jiffies, stop_at)) 717 while (time_before(jiffies, stop_at)) { 757 struct htb_class *cl; 718 struct htb_class *cl; 758 s64 diff; 719 s64 diff; 759 struct rb_node *p = rb_first(w 720 struct rb_node *p = rb_first(wait_pq); 760 721 761 if (!p) 722 if (!p) 762 return 0; 723 return 0; 763 724 764 cl = rb_entry(p, struct htb_cl 725 cl = rb_entry(p, struct htb_class, pq_node); 765 if (cl->pq_key > q->now) 726 if (cl->pq_key > q->now) 766 return cl->pq_key; 727 return cl->pq_key; 767 728 768 htb_safe_rb_erase(p, wait_pq); 729 htb_safe_rb_erase(p, wait_pq); 769 diff = min_t(s64, q->now - cl- 730 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); 770 htb_change_class_mode(q, cl, & 731 htb_change_class_mode(q, cl, &diff); 771 if (cl->cmode != HTB_CAN_SEND) 732 if (cl->cmode != HTB_CAN_SEND) 772 htb_add_to_wait_tree(q 733 htb_add_to_wait_tree(q, cl, diff); 773 } 734 } 774 735 775 /* too much load - let's continue afte 736 /* too much load - let's continue after a break for scheduling */ 776 if (!(q->warned & HTB_WARN_TOOMANYEVEN 737 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { 777 pr_warn("htb: too many events! 738 pr_warn("htb: too many events!\n"); 778 q->warned |= HTB_WARN_TOOMANYE 739 q->warned |= HTB_WARN_TOOMANYEVENTS; 779 } 740 } 780 741 781 return q->now; 742 return q->now; 782 } 743 } 783 744 784 /* Returns class->node+prio from id-tree where 745 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL 785 * is no such one exists. 746 * is no such one exists. 786 */ 747 */ 787 static struct rb_node *htb_id_find_next_upper( 748 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, 788 749 u32 id) 789 { 750 { 790 struct rb_node *r = NULL; 751 struct rb_node *r = NULL; 791 while (n) { 752 while (n) { 792 struct htb_class *cl = 753 struct htb_class *cl = 793 rb_entry(n, struct htb_cla 754 rb_entry(n, struct htb_class, node[prio]); 794 755 795 if (id > cl->common.classid) { 756 if (id > cl->common.classid) { 796 n = n->rb_right; 757 n = n->rb_right; 797 } else if (id < cl->common.cla 758 } else if (id < cl->common.classid) { 798 r = n; 759 r = n; 799 n = n->rb_left; 760 n = n->rb_left; 800 } else { 761 } else { 801 return n; 762 return n; 802 } 763 } 803 } 764 } 804 return r; 765 return r; 805 } 766 } 806 767 807 /** 768 /** 808 * htb_lookup_leaf - returns next leaf class i 769 * htb_lookup_leaf - returns next leaf class in DRR order 809 * @hprio: the current one << 810 * @prio: which prio in class << 811 * 770 * 812 * Find leaf where current feed pointers point 771 * Find leaf where current feed pointers points to. 813 */ 772 */ 814 static struct htb_class *htb_lookup_leaf(struc 773 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio) 815 { 774 { 816 int i; 775 int i; 817 struct { 776 struct { 818 struct rb_node *root; 777 struct rb_node *root; 819 struct rb_node **pptr; 778 struct rb_node **pptr; 820 u32 *pid; 779 u32 *pid; 821 } stk[TC_HTB_MAXDEPTH], *sp = stk; 780 } stk[TC_HTB_MAXDEPTH], *sp = stk; 822 781 823 BUG_ON(!hprio->row.rb_node); 782 BUG_ON(!hprio->row.rb_node); 824 sp->root = hprio->row.rb_node; 783 sp->root = hprio->row.rb_node; 825 sp->pptr = &hprio->ptr; 784 sp->pptr = &hprio->ptr; 826 sp->pid = &hprio->last_ptr_id; 785 sp->pid = &hprio->last_ptr_id; 827 786 828 for (i = 0; i < 65535; i++) { 787 for (i = 0; i < 65535; i++) { 829 if (!*sp->pptr && *sp->pid) { 788 if (!*sp->pptr && *sp->pid) { 830 /* ptr was invalidated 789 /* ptr was invalidated but id is valid - try to recover 831 * the original or nex 790 * the original or next ptr 832 */ 791 */ 833 *sp->pptr = 792 *sp->pptr = 834 htb_id_find_next_u 793 htb_id_find_next_upper(prio, sp->root, *sp->pid); 835 } 794 } 836 *sp->pid = 0; /* ptr is vali 795 *sp->pid = 0; /* ptr is valid now so that remove this hint as it 837 * can become 796 * can become out of date quickly 838 */ 797 */ 839 if (!*sp->pptr) { /* we 798 if (!*sp->pptr) { /* we are at right end; rewind & go up */ 840 *sp->pptr = sp->root; 799 *sp->pptr = sp->root; 841 while ((*sp->pptr)->rb 800 while ((*sp->pptr)->rb_left) 842 *sp->pptr = (* 801 *sp->pptr = (*sp->pptr)->rb_left; 843 if (sp > stk) { 802 if (sp > stk) { 844 sp--; 803 sp--; 845 if (!*sp->pptr 804 if (!*sp->pptr) { 846 WARN_O 805 WARN_ON(1); 847 return 806 return NULL; 848 } 807 } 849 htb_next_rb_no 808 htb_next_rb_node(sp->pptr); 850 } 809 } 851 } else { 810 } else { 852 struct htb_class *cl; 811 struct htb_class *cl; 853 struct htb_prio *clp; 812 struct htb_prio *clp; 854 813 855 cl = rb_entry(*sp->ppt 814 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]); 856 if (!cl->level) 815 if (!cl->level) 857 return cl; 816 return cl; 858 clp = &cl->inner.clpri 817 clp = &cl->inner.clprio[prio]; 859 (++sp)->root = clp->fe 818 (++sp)->root = clp->feed.rb_node; 860 sp->pptr = &clp->ptr; 819 sp->pptr = &clp->ptr; 861 sp->pid = &clp->last_p 820 sp->pid = &clp->last_ptr_id; 862 } 821 } 863 } 822 } 864 WARN_ON(1); 823 WARN_ON(1); 865 return NULL; 824 return NULL; 866 } 825 } 867 826 868 /* dequeues packet at given priority and level 827 /* dequeues packet at given priority and level; call only if 869 * you are sure that there is active class at 828 * you are sure that there is active class at prio/level 870 */ 829 */ 871 static struct sk_buff *htb_dequeue_tree(struct 830 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio, 872 const 831 const int level) 873 { 832 { 874 struct sk_buff *skb = NULL; 833 struct sk_buff *skb = NULL; 875 struct htb_class *cl, *start; 834 struct htb_class *cl, *start; 876 struct htb_level *hlevel = &q->hlevel[ 835 struct htb_level *hlevel = &q->hlevel[level]; 877 struct htb_prio *hprio = &hlevel->hpri 836 struct htb_prio *hprio = &hlevel->hprio[prio]; 878 837 879 /* look initial class up in the row */ 838 /* look initial class up in the row */ 880 start = cl = htb_lookup_leaf(hprio, pr 839 start = cl = htb_lookup_leaf(hprio, prio); 881 840 882 do { 841 do { 883 next: 842 next: 884 if (unlikely(!cl)) 843 if (unlikely(!cl)) 885 return NULL; 844 return NULL; 886 845 887 /* class can be empty - it is 846 /* class can be empty - it is unlikely but can be true if leaf 888 * qdisc drops packets in enqu 847 * qdisc drops packets in enqueue routine or if someone used 889 * graft operation on the leaf 848 * graft operation on the leaf since last dequeue; 890 * simply deactivate and skip 849 * simply deactivate and skip such class 891 */ 850 */ 892 if (unlikely(cl->leaf.q->q.qle 851 if (unlikely(cl->leaf.q->q.qlen == 0)) { 893 struct htb_class *next 852 struct htb_class *next; 894 htb_deactivate(q, cl); 853 htb_deactivate(q, cl); 895 854 896 /* row/level might bec 855 /* row/level might become empty */ 897 if ((q->row_mask[level 856 if ((q->row_mask[level] & (1 << prio)) == 0) 898 return NULL; 857 return NULL; 899 858 900 next = htb_lookup_leaf 859 next = htb_lookup_leaf(hprio, prio); 901 860 902 if (cl == start) 861 if (cl == start) /* fix start if we just deleted it */ 903 start = next; 862 start = next; 904 cl = next; 863 cl = next; 905 goto next; 864 goto next; 906 } 865 } 907 866 908 skb = cl->leaf.q->dequeue(cl-> 867 skb = cl->leaf.q->dequeue(cl->leaf.q); 909 if (likely(skb != NULL)) 868 if (likely(skb != NULL)) 910 break; 869 break; 911 870 912 qdisc_warn_nonwc("htb", cl->le 871 qdisc_warn_nonwc("htb", cl->leaf.q); 913 htb_next_rb_node(level ? &cl-> 872 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr: 914 &q->h 873 &q->hlevel[0].hprio[prio].ptr); 915 cl = htb_lookup_leaf(hprio, pr 874 cl = htb_lookup_leaf(hprio, prio); 916 875 917 } while (cl != start); 876 } while (cl != start); 918 877 919 if (likely(skb != NULL)) { 878 if (likely(skb != NULL)) { 920 bstats_update(&cl->bstats, skb 879 bstats_update(&cl->bstats, skb); 921 cl->leaf.deficit[level] -= qdi 880 cl->leaf.deficit[level] -= qdisc_pkt_len(skb); 922 if (cl->leaf.deficit[level] < 881 if (cl->leaf.deficit[level] < 0) { 923 cl->leaf.deficit[level 882 cl->leaf.deficit[level] += cl->quantum; 924 htb_next_rb_node(level 883 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr : 925 884 &q->hlevel[0].hprio[prio].ptr); 926 } 885 } 927 /* this used to be after charg 886 /* this used to be after charge_class but this constelation 928 * gives us slightly better pe 887 * gives us slightly better performance 929 */ 888 */ 930 if (!cl->leaf.q->q.qlen) 889 if (!cl->leaf.q->q.qlen) 931 htb_deactivate(q, cl); 890 htb_deactivate(q, cl); 932 htb_charge_class(q, cl, level, 891 htb_charge_class(q, cl, level, skb); 933 } 892 } 934 return skb; 893 return skb; 935 } 894 } 936 895 937 static struct sk_buff *htb_dequeue(struct Qdis 896 static struct sk_buff *htb_dequeue(struct Qdisc *sch) 938 { 897 { 939 struct sk_buff *skb; 898 struct sk_buff *skb; 940 struct htb_sched *q = qdisc_priv(sch); 899 struct htb_sched *q = qdisc_priv(sch); 941 int level; 900 int level; 942 s64 next_event; 901 s64 next_event; 943 unsigned long start_at; 902 unsigned long start_at; 944 903 945 /* try to dequeue direct packets as hi 904 /* try to dequeue direct packets as high prio (!) to minimize cpu work */ 946 skb = __qdisc_dequeue_head(&q->direct_ 905 skb = __qdisc_dequeue_head(&q->direct_queue); 947 if (skb != NULL) { 906 if (skb != NULL) { 948 ok: 907 ok: 949 qdisc_bstats_update(sch, skb); 908 qdisc_bstats_update(sch, skb); 950 qdisc_qstats_backlog_dec(sch, 909 qdisc_qstats_backlog_dec(sch, skb); 951 sch->q.qlen--; 910 sch->q.qlen--; 952 return skb; 911 return skb; 953 } 912 } 954 913 955 if (!sch->q.qlen) 914 if (!sch->q.qlen) 956 goto fin; 915 goto fin; 957 q->now = ktime_get_ns(); 916 q->now = ktime_get_ns(); 958 start_at = jiffies; 917 start_at = jiffies; 959 918 960 next_event = q->now + 5LLU * NSEC_PER_ 919 next_event = q->now + 5LLU * NSEC_PER_SEC; 961 920 962 for (level = 0; level < TC_HTB_MAXDEPT 921 for (level = 0; level < TC_HTB_MAXDEPTH; level++) { 963 /* common case optimization - 922 /* common case optimization - skip event handler quickly */ 964 int m; 923 int m; 965 s64 event = q->near_ev_cache[l 924 s64 event = q->near_ev_cache[level]; 966 925 967 if (q->now >= event) { 926 if (q->now >= event) { 968 event = htb_do_events( 927 event = htb_do_events(q, level, start_at); 969 if (!event) 928 if (!event) 970 event = q->now 929 event = q->now + NSEC_PER_SEC; 971 q->near_ev_cache[level 930 q->near_ev_cache[level] = event; 972 } 931 } 973 932 974 if (next_event > event) 933 if (next_event > event) 975 next_event = event; 934 next_event = event; 976 935 977 m = ~q->row_mask[level]; 936 m = ~q->row_mask[level]; 978 while (m != (int)(-1)) { 937 while (m != (int)(-1)) { 979 int prio = ffz(m); 938 int prio = ffz(m); 980 939 981 m |= 1 << prio; 940 m |= 1 << prio; 982 skb = htb_dequeue_tree 941 skb = htb_dequeue_tree(q, prio, level); 983 if (likely(skb != NULL 942 if (likely(skb != NULL)) 984 goto ok; 943 goto ok; 985 } 944 } 986 } 945 } 987 if (likely(next_event > q->now)) 946 if (likely(next_event > q->now)) 988 qdisc_watchdog_schedule_ns(&q- 947 qdisc_watchdog_schedule_ns(&q->watchdog, next_event); 989 else 948 else 990 schedule_work(&q->work); 949 schedule_work(&q->work); 991 fin: 950 fin: 992 return skb; 951 return skb; 993 } 952 } 994 953 995 /* reset all classes */ 954 /* reset all classes */ 996 /* always caled under BH & queue lock */ 955 /* always caled under BH & queue lock */ 997 static void htb_reset(struct Qdisc *sch) 956 static void htb_reset(struct Qdisc *sch) 998 { 957 { 999 struct htb_sched *q = qdisc_priv(sch); 958 struct htb_sched *q = qdisc_priv(sch); 1000 struct htb_class *cl; 959 struct htb_class *cl; 1001 unsigned int i; 960 unsigned int i; 1002 961 1003 for (i = 0; i < q->clhash.hashsize; i 962 for (i = 0; i < q->clhash.hashsize; i++) { 1004 hlist_for_each_entry(cl, &q-> 963 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { 1005 if (cl->level) 964 if (cl->level) 1006 memset(&cl->i 965 memset(&cl->inner, 0, sizeof(cl->inner)); 1007 else { 966 else { 1008 if (cl->leaf. 967 if (cl->leaf.q && !q->offload) 1009 qdisc 968 qdisc_reset(cl->leaf.q); 1010 } 969 } 1011 cl->prio_activity = 0 970 cl->prio_activity = 0; 1012 cl->cmode = HTB_CAN_S 971 cl->cmode = HTB_CAN_SEND; 1013 } 972 } 1014 } 973 } 1015 qdisc_watchdog_cancel(&q->watchdog); 974 qdisc_watchdog_cancel(&q->watchdog); 1016 __qdisc_reset_queue(&q->direct_queue) 975 __qdisc_reset_queue(&q->direct_queue); >> 976 sch->q.qlen = 0; >> 977 sch->qstats.backlog = 0; 1017 memset(q->hlevel, 0, sizeof(q->hlevel 978 memset(q->hlevel, 0, sizeof(q->hlevel)); 1018 memset(q->row_mask, 0, sizeof(q->row_ 979 memset(q->row_mask, 0, sizeof(q->row_mask)); 1019 } 980 } 1020 981 1021 static const struct nla_policy htb_policy[TCA 982 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { 1022 [TCA_HTB_PARMS] = { .len = sizeof(str 983 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) }, 1023 [TCA_HTB_INIT] = { .len = sizeof(str 984 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) }, 1024 [TCA_HTB_CTAB] = { .type = NLA_BINAR 985 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 1025 [TCA_HTB_RTAB] = { .type = NLA_BINAR 986 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 1026 [TCA_HTB_DIRECT_QLEN] = { .type = NLA 987 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 }, 1027 [TCA_HTB_RATE64] = { .type = NLA_U64 988 [TCA_HTB_RATE64] = { .type = NLA_U64 }, 1028 [TCA_HTB_CEIL64] = { .type = NLA_U64 989 [TCA_HTB_CEIL64] = { .type = NLA_U64 }, 1029 [TCA_HTB_OFFLOAD] = { .type = NLA_FLA 990 [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG }, 1030 }; 991 }; 1031 992 1032 static void htb_work_func(struct work_struct 993 static void htb_work_func(struct work_struct *work) 1033 { 994 { 1034 struct htb_sched *q = container_of(wo 995 struct htb_sched *q = container_of(work, struct htb_sched, work); 1035 struct Qdisc *sch = q->watchdog.qdisc 996 struct Qdisc *sch = q->watchdog.qdisc; 1036 997 1037 rcu_read_lock(); 998 rcu_read_lock(); 1038 __netif_schedule(qdisc_root(sch)); 999 __netif_schedule(qdisc_root(sch)); 1039 rcu_read_unlock(); 1000 rcu_read_unlock(); 1040 } 1001 } 1041 1002 >> 1003 static void htb_set_lockdep_class_child(struct Qdisc *q) >> 1004 { >> 1005 static struct lock_class_key child_key; >> 1006 >> 1007 lockdep_set_class(qdisc_lock(q), &child_key); >> 1008 } >> 1009 1042 static int htb_offload(struct net_device *dev 1010 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt) 1043 { 1011 { 1044 return dev->netdev_ops->ndo_setup_tc( 1012 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt); 1045 } 1013 } 1046 1014 1047 static int htb_init(struct Qdisc *sch, struct 1015 static int htb_init(struct Qdisc *sch, struct nlattr *opt, 1048 struct netlink_ext_ack *e 1016 struct netlink_ext_ack *extack) 1049 { 1017 { 1050 struct net_device *dev = qdisc_dev(sc 1018 struct net_device *dev = qdisc_dev(sch); 1051 struct tc_htb_qopt_offload offload_op 1019 struct tc_htb_qopt_offload offload_opt; 1052 struct htb_sched *q = qdisc_priv(sch) 1020 struct htb_sched *q = qdisc_priv(sch); 1053 struct nlattr *tb[TCA_HTB_MAX + 1]; 1021 struct nlattr *tb[TCA_HTB_MAX + 1]; 1054 struct tc_htb_glob *gopt; 1022 struct tc_htb_glob *gopt; 1055 unsigned int ntx; 1023 unsigned int ntx; 1056 bool offload; 1024 bool offload; 1057 int err; 1025 int err; 1058 1026 1059 qdisc_watchdog_init(&q->watchdog, sch 1027 qdisc_watchdog_init(&q->watchdog, sch); 1060 INIT_WORK(&q->work, htb_work_func); 1028 INIT_WORK(&q->work, htb_work_func); 1061 1029 1062 if (!opt) 1030 if (!opt) 1063 return -EINVAL; 1031 return -EINVAL; 1064 1032 1065 err = tcf_block_get(&q->block, &q->fi 1033 err = tcf_block_get(&q->block, &q->filter_list, sch, extack); 1066 if (err) 1034 if (err) 1067 return err; 1035 return err; 1068 1036 1069 err = nla_parse_nested_deprecated(tb, 1037 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, 1070 NUL 1038 NULL); 1071 if (err < 0) 1039 if (err < 0) 1072 return err; 1040 return err; 1073 1041 1074 if (!tb[TCA_HTB_INIT]) 1042 if (!tb[TCA_HTB_INIT]) 1075 return -EINVAL; 1043 return -EINVAL; 1076 1044 1077 gopt = nla_data(tb[TCA_HTB_INIT]); 1045 gopt = nla_data(tb[TCA_HTB_INIT]); 1078 if (gopt->version != HTB_VER >> 16) 1046 if (gopt->version != HTB_VER >> 16) 1079 return -EINVAL; 1047 return -EINVAL; 1080 1048 1081 offload = nla_get_flag(tb[TCA_HTB_OFF 1049 offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]); 1082 1050 1083 if (offload) { 1051 if (offload) { 1084 if (sch->parent != TC_H_ROOT) !! 1052 if (sch->parent != TC_H_ROOT) 1085 NL_SET_ERR_MSG(extack << 1086 return -EOPNOTSUPP; 1053 return -EOPNOTSUPP; 1087 } << 1088 1054 1089 if (!tc_can_offload(dev) || ! !! 1055 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 1090 NL_SET_ERR_MSG(extack << 1091 return -EOPNOTSUPP; 1056 return -EOPNOTSUPP; 1092 } << 1093 1057 1094 q->num_direct_qdiscs = dev->r 1058 q->num_direct_qdiscs = dev->real_num_tx_queues; 1095 q->direct_qdiscs = kcalloc(q- 1059 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs, 1096 si 1060 sizeof(*q->direct_qdiscs), 1097 GF 1061 GFP_KERNEL); 1098 if (!q->direct_qdiscs) 1062 if (!q->direct_qdiscs) 1099 return -ENOMEM; 1063 return -ENOMEM; 1100 } 1064 } 1101 1065 1102 err = qdisc_class_hash_init(&q->clhas 1066 err = qdisc_class_hash_init(&q->clhash); 1103 if (err < 0) 1067 if (err < 0) 1104 return err; !! 1068 goto err_free_direct_qdiscs; >> 1069 >> 1070 qdisc_skb_head_init(&q->direct_queue); 1105 1071 1106 if (tb[TCA_HTB_DIRECT_QLEN]) 1072 if (tb[TCA_HTB_DIRECT_QLEN]) 1107 q->direct_qlen = nla_get_u32( 1073 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); 1108 else 1074 else 1109 q->direct_qlen = qdisc_dev(sc 1075 q->direct_qlen = qdisc_dev(sch)->tx_queue_len; 1110 1076 1111 if ((q->rate2quantum = gopt->rate2qua 1077 if ((q->rate2quantum = gopt->rate2quantum) < 1) 1112 q->rate2quantum = 1; 1078 q->rate2quantum = 1; 1113 q->defcls = gopt->defcls; 1079 q->defcls = gopt->defcls; 1114 1080 1115 if (!offload) 1081 if (!offload) 1116 return 0; 1082 return 0; 1117 1083 1118 for (ntx = 0; ntx < q->num_direct_qdi 1084 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { 1119 struct netdev_queue *dev_queu 1085 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); 1120 struct Qdisc *qdisc; 1086 struct Qdisc *qdisc; 1121 1087 1122 qdisc = qdisc_create_dflt(dev 1088 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, 1123 TC_ 1089 TC_H_MAKE(sch->handle, 0), extack); 1124 if (!qdisc) { 1090 if (!qdisc) { 1125 return -ENOMEM; !! 1091 err = -ENOMEM; >> 1092 goto err_free_qdiscs; 1126 } 1093 } 1127 1094 >> 1095 htb_set_lockdep_class_child(qdisc); 1128 q->direct_qdiscs[ntx] = qdisc 1096 q->direct_qdiscs[ntx] = qdisc; 1129 qdisc->flags |= TCQ_F_ONETXQU 1097 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; 1130 } 1098 } 1131 1099 1132 sch->flags |= TCQ_F_MQROOT; 1100 sch->flags |= TCQ_F_MQROOT; 1133 1101 1134 offload_opt = (struct tc_htb_qopt_off 1102 offload_opt = (struct tc_htb_qopt_offload) { 1135 .command = TC_HTB_CREATE, 1103 .command = TC_HTB_CREATE, 1136 .parent_classid = TC_H_MAJ(sc 1104 .parent_classid = TC_H_MAJ(sch->handle) >> 16, 1137 .classid = TC_H_MIN(q->defcls 1105 .classid = TC_H_MIN(q->defcls), 1138 .extack = extack, 1106 .extack = extack, 1139 }; 1107 }; 1140 err = htb_offload(dev, &offload_opt); 1108 err = htb_offload(dev, &offload_opt); 1141 if (err) 1109 if (err) 1142 return err; !! 1110 goto err_free_qdiscs; 1143 1111 1144 /* Defer this assignment, so that htb 1112 /* Defer this assignment, so that htb_destroy skips offload-related 1145 * parts (especially calling ndo_setu 1113 * parts (especially calling ndo_setup_tc) on errors. 1146 */ 1114 */ 1147 q->offload = true; 1115 q->offload = true; 1148 1116 1149 return 0; 1117 return 0; >> 1118 >> 1119 err_free_qdiscs: >> 1120 for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx]; >> 1121 ntx++) >> 1122 qdisc_put(q->direct_qdiscs[ntx]); >> 1123 >> 1124 qdisc_class_hash_destroy(&q->clhash); >> 1125 /* Prevent use-after-free and double-free when htb_destroy gets called. >> 1126 */ >> 1127 q->clhash.hash = NULL; >> 1128 q->clhash.hashsize = 0; >> 1129 >> 1130 err_free_direct_qdiscs: >> 1131 kfree(q->direct_qdiscs); >> 1132 q->direct_qdiscs = NULL; >> 1133 return err; 1150 } 1134 } 1151 1135 1152 static void htb_attach_offload(struct Qdisc * 1136 static void htb_attach_offload(struct Qdisc *sch) 1153 { 1137 { 1154 struct net_device *dev = qdisc_dev(sc 1138 struct net_device *dev = qdisc_dev(sch); 1155 struct htb_sched *q = qdisc_priv(sch) 1139 struct htb_sched *q = qdisc_priv(sch); 1156 unsigned int ntx; 1140 unsigned int ntx; 1157 1141 1158 for (ntx = 0; ntx < q->num_direct_qdi 1142 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { 1159 struct Qdisc *old, *qdisc = q 1143 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx]; 1160 1144 1161 old = dev_graft_qdisc(qdisc-> 1145 old = dev_graft_qdisc(qdisc->dev_queue, qdisc); 1162 qdisc_put(old); 1146 qdisc_put(old); 1163 qdisc_hash_add(qdisc, false); 1147 qdisc_hash_add(qdisc, false); 1164 } 1148 } 1165 for (ntx = q->num_direct_qdiscs; ntx 1149 for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) { 1166 struct netdev_queue *dev_queu 1150 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); 1167 struct Qdisc *old = dev_graft 1151 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL); 1168 1152 1169 qdisc_put(old); 1153 qdisc_put(old); 1170 } 1154 } 1171 1155 1172 kfree(q->direct_qdiscs); 1156 kfree(q->direct_qdiscs); 1173 q->direct_qdiscs = NULL; 1157 q->direct_qdiscs = NULL; 1174 } 1158 } 1175 1159 1176 static void htb_attach_software(struct Qdisc 1160 static void htb_attach_software(struct Qdisc *sch) 1177 { 1161 { 1178 struct net_device *dev = qdisc_dev(sc 1162 struct net_device *dev = qdisc_dev(sch); 1179 unsigned int ntx; 1163 unsigned int ntx; 1180 1164 1181 /* Resemble qdisc_graft behavior. */ 1165 /* Resemble qdisc_graft behavior. */ 1182 for (ntx = 0; ntx < dev->num_tx_queue 1166 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 1183 struct netdev_queue *dev_queu 1167 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); 1184 struct Qdisc *old = dev_graft 1168 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch); 1185 1169 1186 qdisc_refcount_inc(sch); 1170 qdisc_refcount_inc(sch); 1187 1171 1188 qdisc_put(old); 1172 qdisc_put(old); 1189 } 1173 } 1190 } 1174 } 1191 1175 1192 static void htb_attach(struct Qdisc *sch) 1176 static void htb_attach(struct Qdisc *sch) 1193 { 1177 { 1194 struct htb_sched *q = qdisc_priv(sch) 1178 struct htb_sched *q = qdisc_priv(sch); 1195 1179 1196 if (q->offload) 1180 if (q->offload) 1197 htb_attach_offload(sch); 1181 htb_attach_offload(sch); 1198 else 1182 else 1199 htb_attach_software(sch); 1183 htb_attach_software(sch); 1200 } 1184 } 1201 1185 1202 static int htb_dump(struct Qdisc *sch, struct 1186 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) 1203 { 1187 { 1204 struct htb_sched *q = qdisc_priv(sch) 1188 struct htb_sched *q = qdisc_priv(sch); 1205 struct nlattr *nest; 1189 struct nlattr *nest; 1206 struct tc_htb_glob gopt; 1190 struct tc_htb_glob gopt; 1207 1191 1208 if (q->offload) 1192 if (q->offload) 1209 sch->flags |= TCQ_F_OFFLOADED 1193 sch->flags |= TCQ_F_OFFLOADED; 1210 else 1194 else 1211 sch->flags &= ~TCQ_F_OFFLOADE 1195 sch->flags &= ~TCQ_F_OFFLOADED; 1212 1196 1213 sch->qstats.overlimits = q->overlimit 1197 sch->qstats.overlimits = q->overlimits; 1214 /* Its safe to not acquire qdisc lock 1198 /* Its safe to not acquire qdisc lock. As we hold RTNL, 1215 * no change can happen on the qdisc 1199 * no change can happen on the qdisc parameters. 1216 */ 1200 */ 1217 1201 1218 gopt.direct_pkts = q->direct_pkts; 1202 gopt.direct_pkts = q->direct_pkts; 1219 gopt.version = HTB_VER; 1203 gopt.version = HTB_VER; 1220 gopt.rate2quantum = q->rate2quantum; 1204 gopt.rate2quantum = q->rate2quantum; 1221 gopt.defcls = q->defcls; 1205 gopt.defcls = q->defcls; 1222 gopt.debug = 0; 1206 gopt.debug = 0; 1223 1207 1224 nest = nla_nest_start_noflag(skb, TCA 1208 nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 1225 if (nest == NULL) 1209 if (nest == NULL) 1226 goto nla_put_failure; 1210 goto nla_put_failure; 1227 if (nla_put(skb, TCA_HTB_INIT, sizeof 1211 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || 1228 nla_put_u32(skb, TCA_HTB_DIRECT_Q 1212 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen)) 1229 goto nla_put_failure; 1213 goto nla_put_failure; 1230 if (q->offload && nla_put_flag(skb, T 1214 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) 1231 goto nla_put_failure; 1215 goto nla_put_failure; 1232 1216 1233 return nla_nest_end(skb, nest); 1217 return nla_nest_end(skb, nest); 1234 1218 1235 nla_put_failure: 1219 nla_put_failure: 1236 nla_nest_cancel(skb, nest); 1220 nla_nest_cancel(skb, nest); 1237 return -1; 1221 return -1; 1238 } 1222 } 1239 1223 1240 static int htb_dump_class(struct Qdisc *sch, 1224 static int htb_dump_class(struct Qdisc *sch, unsigned long arg, 1241 struct sk_buff *skb 1225 struct sk_buff *skb, struct tcmsg *tcm) 1242 { 1226 { 1243 struct htb_class *cl = (struct htb_cl 1227 struct htb_class *cl = (struct htb_class *)arg; 1244 struct htb_sched *q = qdisc_priv(sch) 1228 struct htb_sched *q = qdisc_priv(sch); 1245 struct nlattr *nest; 1229 struct nlattr *nest; 1246 struct tc_htb_opt opt; 1230 struct tc_htb_opt opt; 1247 1231 1248 /* Its safe to not acquire qdisc lock 1232 /* Its safe to not acquire qdisc lock. As we hold RTNL, 1249 * no change can happen on the class 1233 * no change can happen on the class parameters. 1250 */ 1234 */ 1251 tcm->tcm_parent = cl->parent ? cl->pa 1235 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT; 1252 tcm->tcm_handle = cl->common.classid; 1236 tcm->tcm_handle = cl->common.classid; 1253 if (!cl->level && cl->leaf.q) 1237 if (!cl->level && cl->leaf.q) 1254 tcm->tcm_info = cl->leaf.q->h 1238 tcm->tcm_info = cl->leaf.q->handle; 1255 1239 1256 nest = nla_nest_start_noflag(skb, TCA 1240 nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 1257 if (nest == NULL) 1241 if (nest == NULL) 1258 goto nla_put_failure; 1242 goto nla_put_failure; 1259 1243 1260 memset(&opt, 0, sizeof(opt)); 1244 memset(&opt, 0, sizeof(opt)); 1261 1245 1262 psched_ratecfg_getrate(&opt.rate, &cl 1246 psched_ratecfg_getrate(&opt.rate, &cl->rate); 1263 opt.buffer = PSCHED_NS2TICKS(cl->buff 1247 opt.buffer = PSCHED_NS2TICKS(cl->buffer); 1264 psched_ratecfg_getrate(&opt.ceil, &cl 1248 psched_ratecfg_getrate(&opt.ceil, &cl->ceil); 1265 opt.cbuffer = PSCHED_NS2TICKS(cl->cbu 1249 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer); 1266 opt.quantum = cl->quantum; 1250 opt.quantum = cl->quantum; 1267 opt.prio = cl->prio; 1251 opt.prio = cl->prio; 1268 opt.level = cl->level; 1252 opt.level = cl->level; 1269 if (nla_put(skb, TCA_HTB_PARMS, sizeo 1253 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt)) 1270 goto nla_put_failure; 1254 goto nla_put_failure; 1271 if (q->offload && nla_put_flag(skb, T 1255 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) 1272 goto nla_put_failure; 1256 goto nla_put_failure; 1273 if ((cl->rate.rate_bytes_ps >= (1ULL 1257 if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) && 1274 nla_put_u64_64bit(skb, TCA_HTB_RA 1258 nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps, 1275 TCA_HTB_PAD)) 1259 TCA_HTB_PAD)) 1276 goto nla_put_failure; 1260 goto nla_put_failure; 1277 if ((cl->ceil.rate_bytes_ps >= (1ULL 1261 if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) && 1278 nla_put_u64_64bit(skb, TCA_HTB_CE 1262 nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps, 1279 TCA_HTB_PAD)) 1263 TCA_HTB_PAD)) 1280 goto nla_put_failure; 1264 goto nla_put_failure; 1281 1265 1282 return nla_nest_end(skb, nest); 1266 return nla_nest_end(skb, nest); 1283 1267 1284 nla_put_failure: 1268 nla_put_failure: 1285 nla_nest_cancel(skb, nest); 1269 nla_nest_cancel(skb, nest); 1286 return -1; 1270 return -1; 1287 } 1271 } 1288 1272 1289 static void htb_offload_aggregate_stats(struc 1273 static void htb_offload_aggregate_stats(struct htb_sched *q, 1290 struc 1274 struct htb_class *cl) 1291 { 1275 { 1292 u64 bytes = 0, packets = 0; << 1293 struct htb_class *c; 1276 struct htb_class *c; 1294 unsigned int i; 1277 unsigned int i; 1295 1278 1296 gnet_stats_basic_sync_init(&cl->bstat !! 1279 memset(&cl->bstats, 0, sizeof(cl->bstats)); 1297 1280 1298 for (i = 0; i < q->clhash.hashsize; i 1281 for (i = 0; i < q->clhash.hashsize; i++) { 1299 hlist_for_each_entry(c, &q->c 1282 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { 1300 struct htb_class *p = 1283 struct htb_class *p = c; 1301 1284 1302 while (p && p->level 1285 while (p && p->level < cl->level) 1303 p = p->parent 1286 p = p->parent; 1304 1287 1305 if (p != cl) 1288 if (p != cl) 1306 continue; 1289 continue; 1307 1290 1308 bytes += u64_stats_re !! 1291 cl->bstats.bytes += c->bstats_bias.bytes; 1309 packets += u64_stats_ !! 1292 cl->bstats.packets += c->bstats_bias.packets; 1310 if (c->level == 0) { 1293 if (c->level == 0) { 1311 bytes += u64_ !! 1294 cl->bstats.bytes += c->leaf.q->bstats.bytes; 1312 packets += u6 !! 1295 cl->bstats.packets += c->leaf.q->bstats.packets; 1313 } 1296 } 1314 } 1297 } 1315 } 1298 } 1316 _bstats_update(&cl->bstats, bytes, pa << 1317 } 1299 } 1318 1300 1319 static int 1301 static int 1320 htb_dump_class_stats(struct Qdisc *sch, unsig 1302 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) 1321 { 1303 { 1322 struct htb_class *cl = (struct htb_cl 1304 struct htb_class *cl = (struct htb_class *)arg; 1323 struct htb_sched *q = qdisc_priv(sch) 1305 struct htb_sched *q = qdisc_priv(sch); 1324 struct gnet_stats_queue qs = { 1306 struct gnet_stats_queue qs = { 1325 .drops = cl->drops, 1307 .drops = cl->drops, 1326 .overlimits = cl->overlimits, 1308 .overlimits = cl->overlimits, 1327 }; 1309 }; 1328 __u32 qlen = 0; 1310 __u32 qlen = 0; 1329 1311 1330 if (!cl->level && cl->leaf.q) 1312 if (!cl->level && cl->leaf.q) 1331 qdisc_qstats_qlen_backlog(cl- 1313 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog); 1332 1314 1333 cl->xstats.tokens = clamp_t(s64, PSCH 1315 cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens), 1334 INT_MIN, 1316 INT_MIN, INT_MAX); 1335 cl->xstats.ctokens = clamp_t(s64, PSC 1317 cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens), 1336 INT_MIN, 1318 INT_MIN, INT_MAX); 1337 1319 1338 if (q->offload) { 1320 if (q->offload) { 1339 if (!cl->level) { 1321 if (!cl->level) { 1340 if (cl->leaf.q) 1322 if (cl->leaf.q) 1341 cl->bstats = 1323 cl->bstats = cl->leaf.q->bstats; 1342 else 1324 else 1343 gnet_stats_ba !! 1325 memset(&cl->bstats, 0, sizeof(cl->bstats)); 1344 _bstats_update(&cl->b !! 1326 cl->bstats.bytes += cl->bstats_bias.bytes; 1345 u64_st !! 1327 cl->bstats.packets += cl->bstats_bias.packets; 1346 u64_st << 1347 } else { 1328 } else { 1348 htb_offload_aggregate 1329 htb_offload_aggregate_stats(q, cl); 1349 } 1330 } 1350 } 1331 } 1351 1332 1352 if (gnet_stats_copy_basic(d, NULL, &c !! 1333 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), >> 1334 d, NULL, &cl->bstats) < 0 || 1353 gnet_stats_copy_rate_est(d, &cl-> 1335 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || 1354 gnet_stats_copy_queue(d, NULL, &q 1336 gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) 1355 return -1; 1337 return -1; 1356 1338 1357 return gnet_stats_copy_app(d, &cl->xs 1339 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); 1358 } 1340 } 1359 1341 1360 static struct netdev_queue * 1342 static struct netdev_queue * 1361 htb_select_queue(struct Qdisc *sch, struct tc 1343 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm) 1362 { 1344 { 1363 struct net_device *dev = qdisc_dev(sc 1345 struct net_device *dev = qdisc_dev(sch); 1364 struct tc_htb_qopt_offload offload_op 1346 struct tc_htb_qopt_offload offload_opt; 1365 struct htb_sched *q = qdisc_priv(sch) 1347 struct htb_sched *q = qdisc_priv(sch); 1366 int err; 1348 int err; 1367 1349 1368 if (!q->offload) 1350 if (!q->offload) 1369 return sch->dev_queue; 1351 return sch->dev_queue; 1370 1352 1371 offload_opt = (struct tc_htb_qopt_off 1353 offload_opt = (struct tc_htb_qopt_offload) { 1372 .command = TC_HTB_LEAF_QUERY_ 1354 .command = TC_HTB_LEAF_QUERY_QUEUE, 1373 .classid = TC_H_MIN(tcm->tcm_ 1355 .classid = TC_H_MIN(tcm->tcm_parent), 1374 }; 1356 }; 1375 err = htb_offload(dev, &offload_opt); 1357 err = htb_offload(dev, &offload_opt); 1376 if (err || offload_opt.qid >= dev->nu 1358 if (err || offload_opt.qid >= dev->num_tx_queues) 1377 return NULL; 1359 return NULL; 1378 return netdev_get_tx_queue(dev, offlo 1360 return netdev_get_tx_queue(dev, offload_opt.qid); 1379 } 1361 } 1380 1362 1381 static struct Qdisc * 1363 static struct Qdisc * 1382 htb_graft_helper(struct netdev_queue *dev_que 1364 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q) 1383 { 1365 { 1384 struct net_device *dev = dev_queue->d 1366 struct net_device *dev = dev_queue->dev; 1385 struct Qdisc *old_q; 1367 struct Qdisc *old_q; 1386 1368 1387 if (dev->flags & IFF_UP) 1369 if (dev->flags & IFF_UP) 1388 dev_deactivate(dev); 1370 dev_deactivate(dev); 1389 old_q = dev_graft_qdisc(dev_queue, ne 1371 old_q = dev_graft_qdisc(dev_queue, new_q); 1390 if (new_q) 1372 if (new_q) 1391 new_q->flags |= TCQ_F_ONETXQU 1373 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; 1392 if (dev->flags & IFF_UP) 1374 if (dev->flags & IFF_UP) 1393 dev_activate(dev); 1375 dev_activate(dev); 1394 1376 1395 return old_q; 1377 return old_q; 1396 } 1378 } 1397 1379 1398 static struct netdev_queue *htb_offload_get_q 1380 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl) 1399 { 1381 { 1400 struct netdev_queue *queue; 1382 struct netdev_queue *queue; 1401 1383 1402 queue = cl->leaf.offload_queue; 1384 queue = cl->leaf.offload_queue; 1403 if (!(cl->leaf.q->flags & TCQ_F_BUILT 1385 if (!(cl->leaf.q->flags & TCQ_F_BUILTIN)) 1404 WARN_ON(cl->leaf.q->dev_queue 1386 WARN_ON(cl->leaf.q->dev_queue != queue); 1405 1387 1406 return queue; 1388 return queue; 1407 } 1389 } 1408 1390 1409 static void htb_offload_move_qdisc(struct Qdi 1391 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old, 1410 struct htb 1392 struct htb_class *cl_new, bool destroying) 1411 { 1393 { 1412 struct netdev_queue *queue_old, *queu 1394 struct netdev_queue *queue_old, *queue_new; 1413 struct net_device *dev = qdisc_dev(sc 1395 struct net_device *dev = qdisc_dev(sch); 1414 1396 1415 queue_old = htb_offload_get_queue(cl_ 1397 queue_old = htb_offload_get_queue(cl_old); 1416 queue_new = htb_offload_get_queue(cl_ 1398 queue_new = htb_offload_get_queue(cl_new); 1417 1399 1418 if (!destroying) { 1400 if (!destroying) { 1419 struct Qdisc *qdisc; 1401 struct Qdisc *qdisc; 1420 1402 1421 if (dev->flags & IFF_UP) 1403 if (dev->flags & IFF_UP) 1422 dev_deactivate(dev); 1404 dev_deactivate(dev); 1423 qdisc = dev_graft_qdisc(queue 1405 qdisc = dev_graft_qdisc(queue_old, NULL); 1424 WARN_ON(qdisc != cl_old->leaf 1406 WARN_ON(qdisc != cl_old->leaf.q); 1425 } 1407 } 1426 1408 1427 if (!(cl_old->leaf.q->flags & TCQ_F_B 1409 if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN)) 1428 cl_old->leaf.q->dev_queue = q 1410 cl_old->leaf.q->dev_queue = queue_new; 1429 cl_old->leaf.offload_queue = queue_ne 1411 cl_old->leaf.offload_queue = queue_new; 1430 1412 1431 if (!destroying) { 1413 if (!destroying) { 1432 struct Qdisc *qdisc; 1414 struct Qdisc *qdisc; 1433 1415 1434 qdisc = dev_graft_qdisc(queue 1416 qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q); 1435 if (dev->flags & IFF_UP) 1417 if (dev->flags & IFF_UP) 1436 dev_activate(dev); 1418 dev_activate(dev); 1437 WARN_ON(!(qdisc->flags & TCQ_ 1419 WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN)); 1438 } 1420 } 1439 } 1421 } 1440 1422 1441 static int htb_graft(struct Qdisc *sch, unsig 1423 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1442 struct Qdisc **old, stru 1424 struct Qdisc **old, struct netlink_ext_ack *extack) 1443 { 1425 { 1444 struct netdev_queue *dev_queue = sch- 1426 struct netdev_queue *dev_queue = sch->dev_queue; 1445 struct htb_class *cl = (struct htb_cl 1427 struct htb_class *cl = (struct htb_class *)arg; 1446 struct htb_sched *q = qdisc_priv(sch) 1428 struct htb_sched *q = qdisc_priv(sch); 1447 struct Qdisc *old_q; 1429 struct Qdisc *old_q; 1448 1430 1449 if (cl->level) 1431 if (cl->level) 1450 return -EINVAL; 1432 return -EINVAL; 1451 1433 1452 if (q->offload) 1434 if (q->offload) 1453 dev_queue = htb_offload_get_q 1435 dev_queue = htb_offload_get_queue(cl); 1454 1436 1455 if (!new) { 1437 if (!new) { 1456 new = qdisc_create_dflt(dev_q 1438 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, 1457 cl->c 1439 cl->common.classid, extack); 1458 if (!new) 1440 if (!new) 1459 return -ENOBUFS; 1441 return -ENOBUFS; 1460 } 1442 } 1461 1443 1462 if (q->offload) { 1444 if (q->offload) { >> 1445 htb_set_lockdep_class_child(new); 1463 /* One ref for cl->leaf.q, th 1446 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ 1464 qdisc_refcount_inc(new); 1447 qdisc_refcount_inc(new); 1465 old_q = htb_graft_helper(dev_ 1448 old_q = htb_graft_helper(dev_queue, new); 1466 } 1449 } 1467 1450 1468 *old = qdisc_replace(sch, new, &cl->l 1451 *old = qdisc_replace(sch, new, &cl->leaf.q); 1469 1452 1470 if (q->offload) { 1453 if (q->offload) { 1471 WARN_ON(old_q != *old); 1454 WARN_ON(old_q != *old); 1472 qdisc_put(old_q); 1455 qdisc_put(old_q); 1473 } 1456 } 1474 1457 1475 return 0; 1458 return 0; 1476 } 1459 } 1477 1460 1478 static struct Qdisc *htb_leaf(struct Qdisc *s 1461 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg) 1479 { 1462 { 1480 struct htb_class *cl = (struct htb_cl 1463 struct htb_class *cl = (struct htb_class *)arg; 1481 return !cl->level ? cl->leaf.q : NULL 1464 return !cl->level ? cl->leaf.q : NULL; 1482 } 1465 } 1483 1466 1484 static void htb_qlen_notify(struct Qdisc *sch 1467 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) 1485 { 1468 { 1486 struct htb_class *cl = (struct htb_cl 1469 struct htb_class *cl = (struct htb_class *)arg; 1487 1470 1488 htb_deactivate(qdisc_priv(sch), cl); 1471 htb_deactivate(qdisc_priv(sch), cl); 1489 } 1472 } 1490 1473 1491 static inline int htb_parent_last_child(struc 1474 static inline int htb_parent_last_child(struct htb_class *cl) 1492 { 1475 { 1493 if (!cl->parent) 1476 if (!cl->parent) 1494 /* the root class */ 1477 /* the root class */ 1495 return 0; 1478 return 0; 1496 if (cl->parent->children > 1) 1479 if (cl->parent->children > 1) 1497 /* not the last child */ 1480 /* not the last child */ 1498 return 0; 1481 return 0; 1499 return 1; 1482 return 1; 1500 } 1483 } 1501 1484 1502 static void htb_parent_to_leaf(struct Qdisc * 1485 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl, 1503 struct Qdisc * 1486 struct Qdisc *new_q) 1504 { 1487 { 1505 struct htb_sched *q = qdisc_priv(sch) 1488 struct htb_sched *q = qdisc_priv(sch); 1506 struct htb_class *parent = cl->parent 1489 struct htb_class *parent = cl->parent; 1507 1490 1508 WARN_ON(cl->level || !cl->leaf.q || c 1491 WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity); 1509 1492 1510 if (parent->cmode != HTB_CAN_SEND) 1493 if (parent->cmode != HTB_CAN_SEND) 1511 htb_safe_rb_erase(&parent->pq 1494 htb_safe_rb_erase(&parent->pq_node, 1512 &q->hlevel[ 1495 &q->hlevel[parent->level].wait_pq); 1513 1496 1514 parent->level = 0; 1497 parent->level = 0; 1515 memset(&parent->inner, 0, sizeof(pare 1498 memset(&parent->inner, 0, sizeof(parent->inner)); 1516 parent->leaf.q = new_q ? new_q : &noo 1499 parent->leaf.q = new_q ? new_q : &noop_qdisc; 1517 parent->tokens = parent->buffer; 1500 parent->tokens = parent->buffer; 1518 parent->ctokens = parent->cbuffer; 1501 parent->ctokens = parent->cbuffer; 1519 parent->t_c = ktime_get_ns(); 1502 parent->t_c = ktime_get_ns(); 1520 parent->cmode = HTB_CAN_SEND; 1503 parent->cmode = HTB_CAN_SEND; 1521 if (q->offload) 1504 if (q->offload) 1522 parent->leaf.offload_queue = 1505 parent->leaf.offload_queue = cl->leaf.offload_queue; 1523 } 1506 } 1524 1507 1525 static void htb_parent_to_leaf_offload(struct 1508 static void htb_parent_to_leaf_offload(struct Qdisc *sch, 1526 struct 1509 struct netdev_queue *dev_queue, 1527 struct 1510 struct Qdisc *new_q) 1528 { 1511 { 1529 struct Qdisc *old_q; 1512 struct Qdisc *old_q; 1530 1513 1531 /* One ref for cl->leaf.q, the other 1514 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ 1532 if (new_q) 1515 if (new_q) 1533 qdisc_refcount_inc(new_q); 1516 qdisc_refcount_inc(new_q); 1534 old_q = htb_graft_helper(dev_queue, n 1517 old_q = htb_graft_helper(dev_queue, new_q); 1535 WARN_ON(!(old_q->flags & TCQ_F_BUILTI 1518 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); 1536 } 1519 } 1537 1520 1538 static int htb_destroy_class_offload(struct Q 1521 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, 1539 bool las 1522 bool last_child, bool destroying, 1540 struct n 1523 struct netlink_ext_ack *extack) 1541 { 1524 { 1542 struct tc_htb_qopt_offload offload_op 1525 struct tc_htb_qopt_offload offload_opt; 1543 struct netdev_queue *dev_queue; 1526 struct netdev_queue *dev_queue; 1544 struct Qdisc *q = cl->leaf.q; 1527 struct Qdisc *q = cl->leaf.q; 1545 struct Qdisc *old; !! 1528 struct Qdisc *old = NULL; 1546 int err; 1529 int err; 1547 1530 1548 if (cl->level) 1531 if (cl->level) 1549 return -EINVAL; 1532 return -EINVAL; 1550 1533 1551 WARN_ON(!q); 1534 WARN_ON(!q); 1552 dev_queue = htb_offload_get_queue(cl) 1535 dev_queue = htb_offload_get_queue(cl); 1553 /* When destroying, caller qdisc_graf !! 1536 old = htb_graft_helper(dev_queue, NULL); 1554 * qdisc_put for the qdisc being dest !! 1537 if (destroying) 1555 * does not need to graft or qdisc_pu !! 1538 /* Before HTB is destroyed, the kernel grafts noop_qdisc to 1556 */ !! 1539 * all queues. 1557 if (!destroying) { << 1558 old = htb_graft_helper(dev_qu << 1559 /* Last qdisc grafted should << 1560 * calling htb_delete. << 1561 */ 1540 */ >> 1541 WARN_ON(!(old->flags & TCQ_F_BUILTIN)); >> 1542 else 1562 WARN_ON(old != q); 1543 WARN_ON(old != q); 1563 } << 1564 1544 1565 if (cl->parent) { 1545 if (cl->parent) { 1566 _bstats_update(&cl->parent->b !! 1546 cl->parent->bstats_bias.bytes += q->bstats.bytes; 1567 u64_stats_read !! 1547 cl->parent->bstats_bias.packets += q->bstats.packets; 1568 u64_stats_read << 1569 } 1548 } 1570 1549 1571 offload_opt = (struct tc_htb_qopt_off 1550 offload_opt = (struct tc_htb_qopt_offload) { 1572 .command = !last_child ? TC_H 1551 .command = !last_child ? TC_HTB_LEAF_DEL : 1573 destroying ? TC_HT 1552 destroying ? TC_HTB_LEAF_DEL_LAST_FORCE : 1574 TC_HTB_LEAF_DEL_LA 1553 TC_HTB_LEAF_DEL_LAST, 1575 .classid = cl->common.classid 1554 .classid = cl->common.classid, 1576 .extack = extack, 1555 .extack = extack, 1577 }; 1556 }; 1578 err = htb_offload(qdisc_dev(sch), &of 1557 err = htb_offload(qdisc_dev(sch), &offload_opt); 1579 1558 1580 if (!destroying) { !! 1559 if (!err || destroying) 1581 if (!err) !! 1560 qdisc_put(old); 1582 qdisc_put(old); !! 1561 else 1583 else !! 1562 htb_graft_helper(dev_queue, old); 1584 htb_graft_helper(dev_ << 1585 } << 1586 1563 1587 if (last_child) 1564 if (last_child) 1588 return err; 1565 return err; 1589 1566 1590 if (!err && offload_opt.classid != TC 1567 if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) { 1591 u32 classid = TC_H_MAJ(sch->h 1568 u32 classid = TC_H_MAJ(sch->handle) | 1592 TC_H_MIN(offloa 1569 TC_H_MIN(offload_opt.classid); 1593 struct htb_class *moved_cl = 1570 struct htb_class *moved_cl = htb_find(classid, sch); 1594 1571 1595 htb_offload_move_qdisc(sch, m 1572 htb_offload_move_qdisc(sch, moved_cl, cl, destroying); 1596 } 1573 } 1597 1574 1598 return err; 1575 return err; 1599 } 1576 } 1600 1577 1601 static void htb_destroy_class(struct Qdisc *s 1578 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) 1602 { 1579 { 1603 if (!cl->level) { 1580 if (!cl->level) { 1604 WARN_ON(!cl->leaf.q); 1581 WARN_ON(!cl->leaf.q); 1605 qdisc_put(cl->leaf.q); 1582 qdisc_put(cl->leaf.q); 1606 } 1583 } 1607 gen_kill_estimator(&cl->rate_est); 1584 gen_kill_estimator(&cl->rate_est); 1608 tcf_block_put(cl->block); 1585 tcf_block_put(cl->block); 1609 kfree(cl); 1586 kfree(cl); 1610 } 1587 } 1611 1588 1612 static void htb_destroy(struct Qdisc *sch) 1589 static void htb_destroy(struct Qdisc *sch) 1613 { 1590 { 1614 struct net_device *dev = qdisc_dev(sc 1591 struct net_device *dev = qdisc_dev(sch); 1615 struct tc_htb_qopt_offload offload_op 1592 struct tc_htb_qopt_offload offload_opt; 1616 struct htb_sched *q = qdisc_priv(sch) 1593 struct htb_sched *q = qdisc_priv(sch); 1617 struct hlist_node *next; 1594 struct hlist_node *next; 1618 bool nonempty, changed; 1595 bool nonempty, changed; 1619 struct htb_class *cl; 1596 struct htb_class *cl; 1620 unsigned int i; 1597 unsigned int i; 1621 1598 1622 cancel_work_sync(&q->work); 1599 cancel_work_sync(&q->work); 1623 qdisc_watchdog_cancel(&q->watchdog); 1600 qdisc_watchdog_cancel(&q->watchdog); 1624 /* This line used to be after htb_des 1601 /* This line used to be after htb_destroy_class call below 1625 * and surprisingly it worked in 2.4. 1602 * and surprisingly it worked in 2.4. But it must precede it 1626 * because filter need its target cla 1603 * because filter need its target class alive to be able to call 1627 * unbind_filter on it (without Oops) 1604 * unbind_filter on it (without Oops). 1628 */ 1605 */ 1629 tcf_block_put(q->block); 1606 tcf_block_put(q->block); 1630 1607 1631 for (i = 0; i < q->clhash.hashsize; i 1608 for (i = 0; i < q->clhash.hashsize; i++) { 1632 hlist_for_each_entry(cl, &q-> 1609 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { 1633 tcf_block_put(cl->blo 1610 tcf_block_put(cl->block); 1634 cl->block = NULL; 1611 cl->block = NULL; 1635 } 1612 } 1636 } 1613 } 1637 1614 1638 do { 1615 do { 1639 nonempty = false; 1616 nonempty = false; 1640 changed = false; 1617 changed = false; 1641 for (i = 0; i < q->clhash.has 1618 for (i = 0; i < q->clhash.hashsize; i++) { 1642 hlist_for_each_entry_ 1619 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], 1643 1620 common.hnode) { 1644 bool last_chi 1621 bool last_child; 1645 1622 1646 if (!q->offlo 1623 if (!q->offload) { 1647 htb_d 1624 htb_destroy_class(sch, cl); 1648 conti 1625 continue; 1649 } 1626 } 1650 1627 1651 nonempty = tr 1628 nonempty = true; 1652 1629 1653 if (cl->level 1630 if (cl->level) 1654 conti 1631 continue; 1655 1632 1656 changed = tru 1633 changed = true; 1657 1634 1658 last_child = 1635 last_child = htb_parent_last_child(cl); 1659 htb_destroy_c 1636 htb_destroy_class_offload(sch, cl, last_child, 1660 1637 true, NULL); 1661 qdisc_class_h 1638 qdisc_class_hash_remove(&q->clhash, 1662 1639 &cl->common); 1663 if (cl->paren 1640 if (cl->parent) 1664 cl->p 1641 cl->parent->children--; 1665 if (last_chil 1642 if (last_child) 1666 htb_p 1643 htb_parent_to_leaf(sch, cl, NULL); 1667 htb_destroy_c 1644 htb_destroy_class(sch, cl); 1668 } 1645 } 1669 } 1646 } 1670 } while (changed); 1647 } while (changed); 1671 WARN_ON(nonempty); 1648 WARN_ON(nonempty); 1672 1649 1673 qdisc_class_hash_destroy(&q->clhash); 1650 qdisc_class_hash_destroy(&q->clhash); 1674 __qdisc_reset_queue(&q->direct_queue) 1651 __qdisc_reset_queue(&q->direct_queue); 1675 1652 1676 if (q->offload) { !! 1653 if (!q->offload) 1677 offload_opt = (struct tc_htb_ !! 1654 return; 1678 .command = TC_HTB_DES !! 1655 1679 }; !! 1656 offload_opt = (struct tc_htb_qopt_offload) { 1680 htb_offload(dev, &offload_opt !! 1657 .command = TC_HTB_DESTROY, 1681 } !! 1658 }; >> 1659 htb_offload(dev, &offload_opt); 1682 1660 1683 if (!q->direct_qdiscs) 1661 if (!q->direct_qdiscs) 1684 return; 1662 return; 1685 for (i = 0; i < q->num_direct_qdiscs 1663 for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++) 1686 qdisc_put(q->direct_qdiscs[i] 1664 qdisc_put(q->direct_qdiscs[i]); 1687 kfree(q->direct_qdiscs); 1665 kfree(q->direct_qdiscs); 1688 } 1666 } 1689 1667 1690 static int htb_delete(struct Qdisc *sch, unsi 1668 static int htb_delete(struct Qdisc *sch, unsigned long arg, 1691 struct netlink_ext_ack 1669 struct netlink_ext_ack *extack) 1692 { 1670 { 1693 struct htb_sched *q = qdisc_priv(sch) 1671 struct htb_sched *q = qdisc_priv(sch); 1694 struct htb_class *cl = (struct htb_cl 1672 struct htb_class *cl = (struct htb_class *)arg; 1695 struct Qdisc *new_q = NULL; 1673 struct Qdisc *new_q = NULL; 1696 int last_child = 0; 1674 int last_child = 0; 1697 int err; 1675 int err; 1698 1676 1699 /* TODO: why don't allow to delete su 1677 /* TODO: why don't allow to delete subtree ? references ? does 1700 * tc subsys guarantee us that in htb 1678 * tc subsys guarantee us that in htb_destroy it holds no class 1701 * refs so that we can remove childre 1679 * refs so that we can remove children safely there ? 1702 */ 1680 */ 1703 if (cl->children || qdisc_class_in_us !! 1681 if (cl->children || cl->filter_cnt) 1704 NL_SET_ERR_MSG(extack, "HTB c << 1705 return -EBUSY; 1682 return -EBUSY; 1706 } << 1707 1683 1708 if (!cl->level && htb_parent_last_chi 1684 if (!cl->level && htb_parent_last_child(cl)) 1709 last_child = 1; 1685 last_child = 1; 1710 1686 1711 if (q->offload) { 1687 if (q->offload) { 1712 err = htb_destroy_class_offlo 1688 err = htb_destroy_class_offload(sch, cl, last_child, false, 1713 1689 extack); 1714 if (err) 1690 if (err) 1715 return err; 1691 return err; 1716 } 1692 } 1717 1693 1718 if (last_child) { 1694 if (last_child) { 1719 struct netdev_queue *dev_queu 1695 struct netdev_queue *dev_queue = sch->dev_queue; 1720 1696 1721 if (q->offload) 1697 if (q->offload) 1722 dev_queue = htb_offlo 1698 dev_queue = htb_offload_get_queue(cl); 1723 1699 1724 new_q = qdisc_create_dflt(dev 1700 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, 1725 cl- 1701 cl->parent->common.classid, 1726 NUL 1702 NULL); 1727 if (q->offload) !! 1703 if (q->offload) { >> 1704 if (new_q) >> 1705 htb_set_lockdep_class_child(new_q); 1728 htb_parent_to_leaf_of 1706 htb_parent_to_leaf_offload(sch, dev_queue, new_q); >> 1707 } 1729 } 1708 } 1730 1709 1731 sch_tree_lock(sch); 1710 sch_tree_lock(sch); 1732 1711 1733 if (!cl->level) 1712 if (!cl->level) 1734 qdisc_purge_queue(cl->leaf.q) 1713 qdisc_purge_queue(cl->leaf.q); 1735 1714 1736 /* delete from hash and active; remai 1715 /* delete from hash and active; remainder in destroy_class */ 1737 qdisc_class_hash_remove(&q->clhash, & 1716 qdisc_class_hash_remove(&q->clhash, &cl->common); 1738 if (cl->parent) 1717 if (cl->parent) 1739 cl->parent->children--; 1718 cl->parent->children--; 1740 1719 1741 if (cl->prio_activity) 1720 if (cl->prio_activity) 1742 htb_deactivate(q, cl); 1721 htb_deactivate(q, cl); 1743 1722 1744 if (cl->cmode != HTB_CAN_SEND) 1723 if (cl->cmode != HTB_CAN_SEND) 1745 htb_safe_rb_erase(&cl->pq_nod 1724 htb_safe_rb_erase(&cl->pq_node, 1746 &q->hlevel[ 1725 &q->hlevel[cl->level].wait_pq); 1747 1726 1748 if (last_child) 1727 if (last_child) 1749 htb_parent_to_leaf(sch, cl, n 1728 htb_parent_to_leaf(sch, cl, new_q); 1750 1729 1751 sch_tree_unlock(sch); 1730 sch_tree_unlock(sch); 1752 1731 1753 htb_destroy_class(sch, cl); 1732 htb_destroy_class(sch, cl); 1754 return 0; 1733 return 0; 1755 } 1734 } 1756 1735 1757 static int htb_change_class(struct Qdisc *sch 1736 static int htb_change_class(struct Qdisc *sch, u32 classid, 1758 u32 parentid, str 1737 u32 parentid, struct nlattr **tca, 1759 unsigned long *ar 1738 unsigned long *arg, struct netlink_ext_ack *extack) 1760 { 1739 { 1761 int err = -EINVAL; 1740 int err = -EINVAL; 1762 struct htb_sched *q = qdisc_priv(sch) 1741 struct htb_sched *q = qdisc_priv(sch); 1763 struct htb_class *cl = (struct htb_cl 1742 struct htb_class *cl = (struct htb_class *)*arg, *parent; 1764 struct tc_htb_qopt_offload offload_op 1743 struct tc_htb_qopt_offload offload_opt; 1765 struct nlattr *opt = tca[TCA_OPTIONS] 1744 struct nlattr *opt = tca[TCA_OPTIONS]; 1766 struct nlattr *tb[TCA_HTB_MAX + 1]; 1745 struct nlattr *tb[TCA_HTB_MAX + 1]; 1767 struct Qdisc *parent_qdisc = NULL; 1746 struct Qdisc *parent_qdisc = NULL; 1768 struct netdev_queue *dev_queue; 1747 struct netdev_queue *dev_queue; 1769 struct tc_htb_opt *hopt; 1748 struct tc_htb_opt *hopt; 1770 u64 rate64, ceil64; 1749 u64 rate64, ceil64; 1771 int warn = 0; 1750 int warn = 0; 1772 1751 1773 /* extract all subattrs from opt attr 1752 /* extract all subattrs from opt attr */ 1774 if (!opt) 1753 if (!opt) 1775 goto failure; 1754 goto failure; 1776 1755 1777 err = nla_parse_nested_deprecated(tb, 1756 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, 1778 ext !! 1757 NULL); 1779 if (err < 0) 1758 if (err < 0) 1780 goto failure; 1759 goto failure; 1781 1760 1782 err = -EINVAL; 1761 err = -EINVAL; 1783 if (tb[TCA_HTB_PARMS] == NULL) 1762 if (tb[TCA_HTB_PARMS] == NULL) 1784 goto failure; 1763 goto failure; 1785 1764 1786 parent = parentid == TC_H_ROOT ? NULL 1765 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch); 1787 1766 1788 hopt = nla_data(tb[TCA_HTB_PARMS]); 1767 hopt = nla_data(tb[TCA_HTB_PARMS]); 1789 if (!hopt->rate.rate || !hopt->ceil.r 1768 if (!hopt->rate.rate || !hopt->ceil.rate) 1790 goto failure; 1769 goto failure; 1791 1770 1792 if (q->offload) { << 1793 /* Options not supported by t << 1794 if (hopt->rate.overhead || ho << 1795 NL_SET_ERR_MSG(extack << 1796 goto failure; << 1797 } << 1798 if (hopt->rate.mpu || hopt->c << 1799 NL_SET_ERR_MSG(extack << 1800 goto failure; << 1801 } << 1802 } << 1803 << 1804 /* Keeping backward compatible with r 1771 /* Keeping backward compatible with rate_table based iproute2 tc */ 1805 if (hopt->rate.linklayer == TC_LINKLA 1772 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) 1806 qdisc_put_rtab(qdisc_get_rtab 1773 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB], 1807 1774 NULL)); 1808 1775 1809 if (hopt->ceil.linklayer == TC_LINKLA 1776 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) 1810 qdisc_put_rtab(qdisc_get_rtab 1777 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB], 1811 1778 NULL)); 1812 1779 1813 rate64 = tb[TCA_HTB_RATE64] ? nla_get 1780 rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; 1814 ceil64 = tb[TCA_HTB_CEIL64] ? nla_get 1781 ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; 1815 1782 1816 if (!cl) { /* new class 1783 if (!cl) { /* new class */ 1817 struct net_device *dev = qdis 1784 struct net_device *dev = qdisc_dev(sch); 1818 struct Qdisc *new_q, *old_q; 1785 struct Qdisc *new_q, *old_q; 1819 int prio; 1786 int prio; 1820 struct { 1787 struct { 1821 struct nlattr 1788 struct nlattr nla; 1822 struct gnet_estimator 1789 struct gnet_estimator opt; 1823 } est = { 1790 } est = { 1824 .nla = { 1791 .nla = { 1825 .nla_len 1792 .nla_len = nla_attr_size(sizeof(est.opt)), 1826 .nla_type 1793 .nla_type = TCA_RATE, 1827 }, 1794 }, 1828 .opt = { 1795 .opt = { 1829 /* 4s interva 1796 /* 4s interval, 16s averaging constant */ 1830 .interval 1797 .interval = 2, 1831 .ewma_log 1798 .ewma_log = 2, 1832 }, 1799 }, 1833 }; 1800 }; 1834 1801 1835 /* check for valid classid */ 1802 /* check for valid classid */ 1836 if (!classid || TC_H_MAJ(clas 1803 if (!classid || TC_H_MAJ(classid ^ sch->handle) || 1837 htb_find(classid, sch)) 1804 htb_find(classid, sch)) 1838 goto failure; 1805 goto failure; 1839 1806 1840 /* check maximal depth */ 1807 /* check maximal depth */ 1841 if (parent && parent->parent 1808 if (parent && parent->parent && parent->parent->level < 2) { 1842 NL_SET_ERR_MSG_MOD(ex !! 1809 pr_err("htb: tree is too deep\n"); 1843 goto failure; 1810 goto failure; 1844 } 1811 } 1845 err = -ENOBUFS; 1812 err = -ENOBUFS; 1846 cl = kzalloc(sizeof(*cl), GFP 1813 cl = kzalloc(sizeof(*cl), GFP_KERNEL); 1847 if (!cl) 1814 if (!cl) 1848 goto failure; 1815 goto failure; 1849 1816 1850 gnet_stats_basic_sync_init(&c << 1851 gnet_stats_basic_sync_init(&c << 1852 << 1853 err = tcf_block_get(&cl->bloc 1817 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); 1854 if (err) { 1818 if (err) { 1855 kfree(cl); 1819 kfree(cl); 1856 goto failure; 1820 goto failure; 1857 } 1821 } 1858 if (htb_rate_est || tca[TCA_R 1822 if (htb_rate_est || tca[TCA_RATE]) { 1859 err = gen_new_estimat 1823 err = gen_new_estimator(&cl->bstats, NULL, 1860 1824 &cl->rate_est, 1861 1825 NULL, 1862 !! 1826 qdisc_root_sleeping_running(sch), 1863 1827 tca[TCA_RATE] ? : &est.nla); 1864 if (err) 1828 if (err) 1865 goto err_bloc 1829 goto err_block_put; 1866 } 1830 } 1867 1831 1868 cl->children = 0; 1832 cl->children = 0; 1869 RB_CLEAR_NODE(&cl->pq_node); 1833 RB_CLEAR_NODE(&cl->pq_node); 1870 1834 1871 for (prio = 0; prio < TC_HTB_ 1835 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++) 1872 RB_CLEAR_NODE(&cl->no 1836 RB_CLEAR_NODE(&cl->node[prio]); 1873 1837 1874 cl->common.classid = classid; 1838 cl->common.classid = classid; 1875 1839 1876 /* Make sure nothing interrup 1840 /* Make sure nothing interrupts us in between of two 1877 * ndo_setup_tc calls. 1841 * ndo_setup_tc calls. 1878 */ 1842 */ 1879 ASSERT_RTNL(); 1843 ASSERT_RTNL(); 1880 1844 1881 /* create leaf qdisc early be 1845 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) 1882 * so that can't be used insi 1846 * so that can't be used inside of sch_tree_lock 1883 * -- thanks to Karlis Peisen 1847 * -- thanks to Karlis Peisenieks 1884 */ 1848 */ 1885 if (!q->offload) { 1849 if (!q->offload) { 1886 dev_queue = sch->dev_ 1850 dev_queue = sch->dev_queue; 1887 } else if (!(parent && !paren 1851 } else if (!(parent && !parent->level)) { 1888 /* Assign a dev_queue 1852 /* Assign a dev_queue to this classid. */ 1889 offload_opt = (struct 1853 offload_opt = (struct tc_htb_qopt_offload) { 1890 .command = TC 1854 .command = TC_HTB_LEAF_ALLOC_QUEUE, 1891 .classid = cl 1855 .classid = cl->common.classid, 1892 .parent_class 1856 .parent_classid = parent ? 1893 TC_H_ 1857 TC_H_MIN(parent->common.classid) : 1894 TC_HT 1858 TC_HTB_CLASSID_ROOT, 1895 .rate = max_t 1859 .rate = max_t(u64, hopt->rate.rate, rate64), 1896 .ceil = max_t 1860 .ceil = max_t(u64, hopt->ceil.rate, ceil64), 1897 .prio = hopt- << 1898 .quantum = ho << 1899 .extack = ext 1861 .extack = extack, 1900 }; 1862 }; 1901 err = htb_offload(dev 1863 err = htb_offload(dev, &offload_opt); 1902 if (err) { 1864 if (err) { 1903 NL_SET_ERR_MS !! 1865 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n", 1904 !! 1866 err); 1905 goto err_kill 1867 goto err_kill_estimator; 1906 } 1868 } 1907 dev_queue = netdev_ge 1869 dev_queue = netdev_get_tx_queue(dev, offload_opt.qid); 1908 } else { /* First child. */ 1870 } else { /* First child. */ 1909 dev_queue = htb_offlo 1871 dev_queue = htb_offload_get_queue(parent); 1910 old_q = htb_graft_hel 1872 old_q = htb_graft_helper(dev_queue, NULL); 1911 WARN_ON(old_q != pare 1873 WARN_ON(old_q != parent->leaf.q); 1912 offload_opt = (struct 1874 offload_opt = (struct tc_htb_qopt_offload) { 1913 .command = TC 1875 .command = TC_HTB_LEAF_TO_INNER, 1914 .classid = cl 1876 .classid = cl->common.classid, 1915 .parent_class 1877 .parent_classid = 1916 TC_H_ 1878 TC_H_MIN(parent->common.classid), 1917 .rate = max_t 1879 .rate = max_t(u64, hopt->rate.rate, rate64), 1918 .ceil = max_t 1880 .ceil = max_t(u64, hopt->ceil.rate, ceil64), 1919 .prio = hopt- << 1920 .quantum = ho << 1921 .extack = ext 1881 .extack = extack, 1922 }; 1882 }; 1923 err = htb_offload(dev 1883 err = htb_offload(dev, &offload_opt); 1924 if (err) { 1884 if (err) { 1925 NL_SET_ERR_MS !! 1885 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n", 1926 !! 1886 err); 1927 htb_graft_hel 1887 htb_graft_helper(dev_queue, old_q); 1928 goto err_kill 1888 goto err_kill_estimator; 1929 } 1889 } 1930 _bstats_update(&paren !! 1890 parent->bstats_bias.bytes += old_q->bstats.bytes; 1931 u64_st !! 1891 parent->bstats_bias.packets += old_q->bstats.packets; 1932 u64_st << 1933 qdisc_put(old_q); 1892 qdisc_put(old_q); 1934 } 1893 } 1935 new_q = qdisc_create_dflt(dev 1894 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, 1936 cla 1895 classid, NULL); 1937 if (q->offload) { 1896 if (q->offload) { 1938 /* One ref for cl->le !! 1897 if (new_q) { 1939 if (new_q) !! 1898 htb_set_lockdep_class_child(new_q); >> 1899 /* One ref for cl->leaf.q, the other for >> 1900 * dev_queue->qdisc. >> 1901 */ 1940 qdisc_refcoun 1902 qdisc_refcount_inc(new_q); >> 1903 } 1941 old_q = htb_graft_hel 1904 old_q = htb_graft_helper(dev_queue, new_q); 1942 /* No qdisc_put neede 1905 /* No qdisc_put needed. */ 1943 WARN_ON(!(old_q->flag 1906 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); 1944 } 1907 } 1945 sch_tree_lock(sch); 1908 sch_tree_lock(sch); 1946 if (parent && !parent->level) 1909 if (parent && !parent->level) { 1947 /* turn parent into i 1910 /* turn parent into inner node */ 1948 qdisc_purge_queue(par 1911 qdisc_purge_queue(parent->leaf.q); 1949 parent_qdisc = parent 1912 parent_qdisc = parent->leaf.q; 1950 if (parent->prio_acti 1913 if (parent->prio_activity) 1951 htb_deactivat 1914 htb_deactivate(q, parent); 1952 1915 1953 /* remove from evt li 1916 /* remove from evt list because of level change */ 1954 if (parent->cmode != 1917 if (parent->cmode != HTB_CAN_SEND) { 1955 htb_safe_rb_e 1918 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq); 1956 parent->cmode 1919 parent->cmode = HTB_CAN_SEND; 1957 } 1920 } 1958 parent->level = (pare 1921 parent->level = (parent->parent ? parent->parent->level 1959 : TC 1922 : TC_HTB_MAXDEPTH) - 1; 1960 memset(&parent->inner 1923 memset(&parent->inner, 0, sizeof(parent->inner)); 1961 } 1924 } 1962 1925 1963 /* leaf (we) needs elementary 1926 /* leaf (we) needs elementary qdisc */ 1964 cl->leaf.q = new_q ? new_q : 1927 cl->leaf.q = new_q ? new_q : &noop_qdisc; 1965 if (q->offload) 1928 if (q->offload) 1966 cl->leaf.offload_queu 1929 cl->leaf.offload_queue = dev_queue; 1967 1930 1968 cl->parent = parent; 1931 cl->parent = parent; 1969 1932 1970 /* set class to be in HTB_CAN 1933 /* set class to be in HTB_CAN_SEND state */ 1971 cl->tokens = PSCHED_TICKS2NS( 1934 cl->tokens = PSCHED_TICKS2NS(hopt->buffer); 1972 cl->ctokens = PSCHED_TICKS2NS 1935 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer); 1973 cl->mbuffer = 60ULL * NSEC_PE 1936 cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */ 1974 cl->t_c = ktime_get_ns(); 1937 cl->t_c = ktime_get_ns(); 1975 cl->cmode = HTB_CAN_SEND; 1938 cl->cmode = HTB_CAN_SEND; 1976 1939 1977 /* attach to the hash list an 1940 /* attach to the hash list and parent's family */ 1978 qdisc_class_hash_insert(&q->c 1941 qdisc_class_hash_insert(&q->clhash, &cl->common); 1979 if (parent) 1942 if (parent) 1980 parent->children++; 1943 parent->children++; 1981 if (cl->leaf.q != &noop_qdisc 1944 if (cl->leaf.q != &noop_qdisc) 1982 qdisc_hash_add(cl->le 1945 qdisc_hash_add(cl->leaf.q, true); 1983 } else { 1946 } else { 1984 if (tca[TCA_RATE]) { 1947 if (tca[TCA_RATE]) { 1985 err = gen_replace_est 1948 err = gen_replace_estimator(&cl->bstats, NULL, 1986 1949 &cl->rate_est, 1987 1950 NULL, 1988 !! 1951 qdisc_root_sleeping_running(sch), 1989 1952 tca[TCA_RATE]); 1990 if (err) 1953 if (err) 1991 return err; 1954 return err; 1992 } 1955 } 1993 1956 1994 if (q->offload) { 1957 if (q->offload) { 1995 struct net_device *de 1958 struct net_device *dev = qdisc_dev(sch); 1996 1959 1997 offload_opt = (struct 1960 offload_opt = (struct tc_htb_qopt_offload) { 1998 .command = TC 1961 .command = TC_HTB_NODE_MODIFY, 1999 .classid = cl 1962 .classid = cl->common.classid, 2000 .rate = max_t 1963 .rate = max_t(u64, hopt->rate.rate, rate64), 2001 .ceil = max_t 1964 .ceil = max_t(u64, hopt->ceil.rate, ceil64), 2002 .prio = hopt- << 2003 .quantum = ho << 2004 .extack = ext 1965 .extack = extack, 2005 }; 1966 }; 2006 err = htb_offload(dev 1967 err = htb_offload(dev, &offload_opt); 2007 if (err) 1968 if (err) 2008 /* Estimator 1969 /* Estimator was replaced, and rollback may fail 2009 * as well, s 1970 * as well, so we don't try to recover it, and 2010 * the estima 1971 * the estimator won't work property with the 2011 * offload an 1972 * offload anyway, because bstats are updated 2012 * only when 1973 * only when the stats are queried. 2013 */ 1974 */ 2014 return err; 1975 return err; 2015 } 1976 } 2016 1977 2017 sch_tree_lock(sch); 1978 sch_tree_lock(sch); 2018 } 1979 } 2019 1980 2020 psched_ratecfg_precompute(&cl->rate, 1981 psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); 2021 psched_ratecfg_precompute(&cl->ceil, 1982 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); 2022 1983 2023 /* it used to be a nasty bug here, we 1984 /* it used to be a nasty bug here, we have to check that node 2024 * is really leaf before changing cl- 1985 * is really leaf before changing cl->leaf ! 2025 */ 1986 */ 2026 if (!cl->level) { 1987 if (!cl->level) { 2027 u64 quantum = cl->rate.rate_b 1988 u64 quantum = cl->rate.rate_bytes_ps; 2028 1989 2029 do_div(quantum, q->rate2quant 1990 do_div(quantum, q->rate2quantum); 2030 cl->quantum = min_t(u64, quan 1991 cl->quantum = min_t(u64, quantum, INT_MAX); 2031 1992 2032 if (!hopt->quantum && cl->qua 1993 if (!hopt->quantum && cl->quantum < 1000) { 2033 warn = -1; 1994 warn = -1; 2034 cl->quantum = 1000; 1995 cl->quantum = 1000; 2035 } 1996 } 2036 if (!hopt->quantum && cl->qua 1997 if (!hopt->quantum && cl->quantum > 200000) { 2037 warn = 1; 1998 warn = 1; 2038 cl->quantum = 200000; 1999 cl->quantum = 200000; 2039 } 2000 } 2040 if (hopt->quantum) 2001 if (hopt->quantum) 2041 cl->quantum = hopt->q 2002 cl->quantum = hopt->quantum; 2042 if ((cl->prio = hopt->prio) > 2003 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO) 2043 cl->prio = TC_HTB_NUM 2004 cl->prio = TC_HTB_NUMPRIO - 1; 2044 } 2005 } 2045 2006 2046 cl->buffer = PSCHED_TICKS2NS(hopt->bu 2007 cl->buffer = PSCHED_TICKS2NS(hopt->buffer); 2047 cl->cbuffer = PSCHED_TICKS2NS(hopt->c 2008 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer); 2048 2009 2049 sch_tree_unlock(sch); 2010 sch_tree_unlock(sch); 2050 qdisc_put(parent_qdisc); 2011 qdisc_put(parent_qdisc); 2051 2012 2052 if (warn) 2013 if (warn) 2053 NL_SET_ERR_MSG_FMT_MOD(extack !! 2014 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n", 2054 "quant !! 2015 cl->common.classid, (warn == -1 ? "small" : "big")); 2055 cl->co << 2056 2016 2057 qdisc_class_hash_grow(sch, &q->clhash 2017 qdisc_class_hash_grow(sch, &q->clhash); 2058 2018 2059 *arg = (unsigned long)cl; 2019 *arg = (unsigned long)cl; 2060 return 0; 2020 return 0; 2061 2021 2062 err_kill_estimator: 2022 err_kill_estimator: 2063 gen_kill_estimator(&cl->rate_est); 2023 gen_kill_estimator(&cl->rate_est); 2064 err_block_put: 2024 err_block_put: 2065 tcf_block_put(cl->block); 2025 tcf_block_put(cl->block); 2066 kfree(cl); 2026 kfree(cl); 2067 failure: 2027 failure: 2068 return err; 2028 return err; 2069 } 2029 } 2070 2030 2071 static struct tcf_block *htb_tcf_block(struct 2031 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg, 2072 struct 2032 struct netlink_ext_ack *extack) 2073 { 2033 { 2074 struct htb_sched *q = qdisc_priv(sch) 2034 struct htb_sched *q = qdisc_priv(sch); 2075 struct htb_class *cl = (struct htb_cl 2035 struct htb_class *cl = (struct htb_class *)arg; 2076 2036 2077 return cl ? cl->block : q->block; 2037 return cl ? cl->block : q->block; 2078 } 2038 } 2079 2039 2080 static unsigned long htb_bind_filter(struct Q 2040 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, 2081 u32 clas 2041 u32 classid) 2082 { 2042 { 2083 struct htb_class *cl = htb_find(class 2043 struct htb_class *cl = htb_find(classid, sch); 2084 2044 2085 /*if (cl && !cl->level) return 0; 2045 /*if (cl && !cl->level) return 0; 2086 * The line above used to be there to 2046 * The line above used to be there to prevent attaching filters to 2087 * leaves. But at least tc_index filt 2047 * leaves. But at least tc_index filter uses this just to get class 2088 * for other reasons so that we have 2048 * for other reasons so that we have to allow for it. 2089 * ---- 2049 * ---- 2090 * 19.6.2002 As Werner explained it i 2050 * 19.6.2002 As Werner explained it is ok - bind filter is just 2091 * another way to "lock" the class - 2051 * another way to "lock" the class - unlike "get" this lock can 2092 * be broken by class during destroy 2052 * be broken by class during destroy IIUC. 2093 */ 2053 */ 2094 if (cl) 2054 if (cl) 2095 qdisc_class_get(&cl->common); !! 2055 cl->filter_cnt++; 2096 return (unsigned long)cl; 2056 return (unsigned long)cl; 2097 } 2057 } 2098 2058 2099 static void htb_unbind_filter(struct Qdisc *s 2059 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg) 2100 { 2060 { 2101 struct htb_class *cl = (struct htb_cl 2061 struct htb_class *cl = (struct htb_class *)arg; 2102 2062 2103 qdisc_class_put(&cl->common); !! 2063 if (cl) >> 2064 cl->filter_cnt--; 2104 } 2065 } 2105 2066 2106 static void htb_walk(struct Qdisc *sch, struc 2067 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) 2107 { 2068 { 2108 struct htb_sched *q = qdisc_priv(sch) 2069 struct htb_sched *q = qdisc_priv(sch); 2109 struct htb_class *cl; 2070 struct htb_class *cl; 2110 unsigned int i; 2071 unsigned int i; 2111 2072 2112 if (arg->stop) 2073 if (arg->stop) 2113 return; 2074 return; 2114 2075 2115 for (i = 0; i < q->clhash.hashsize; i 2076 for (i = 0; i < q->clhash.hashsize; i++) { 2116 hlist_for_each_entry(cl, &q-> 2077 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { 2117 if (!tc_qdisc_stats_d !! 2078 if (arg->count < arg->skip) { >> 2079 arg->count++; >> 2080 continue; >> 2081 } >> 2082 if (arg->fn(sch, (unsigned long)cl, arg) < 0) { >> 2083 arg->stop = 1; 2118 return; 2084 return; >> 2085 } >> 2086 arg->count++; 2119 } 2087 } 2120 } 2088 } 2121 } 2089 } 2122 2090 2123 static const struct Qdisc_class_ops htb_class 2091 static const struct Qdisc_class_ops htb_class_ops = { 2124 .select_queue = htb_select_qu 2092 .select_queue = htb_select_queue, 2125 .graft = htb_graft, 2093 .graft = htb_graft, 2126 .leaf = htb_leaf, 2094 .leaf = htb_leaf, 2127 .qlen_notify = htb_qlen_noti 2095 .qlen_notify = htb_qlen_notify, 2128 .find = htb_search, 2096 .find = htb_search, 2129 .change = htb_change_cl 2097 .change = htb_change_class, 2130 .delete = htb_delete, 2098 .delete = htb_delete, 2131 .walk = htb_walk, 2099 .walk = htb_walk, 2132 .tcf_block = htb_tcf_block 2100 .tcf_block = htb_tcf_block, 2133 .bind_tcf = htb_bind_filt 2101 .bind_tcf = htb_bind_filter, 2134 .unbind_tcf = htb_unbind_fi 2102 .unbind_tcf = htb_unbind_filter, 2135 .dump = htb_dump_clas 2103 .dump = htb_dump_class, 2136 .dump_stats = htb_dump_clas 2104 .dump_stats = htb_dump_class_stats, 2137 }; 2105 }; 2138 2106 2139 static struct Qdisc_ops htb_qdisc_ops __read_ 2107 static struct Qdisc_ops htb_qdisc_ops __read_mostly = { 2140 .cl_ops = &htb_class_op 2108 .cl_ops = &htb_class_ops, 2141 .id = "htb", 2109 .id = "htb", 2142 .priv_size = sizeof(struct 2110 .priv_size = sizeof(struct htb_sched), 2143 .enqueue = htb_enqueue, 2111 .enqueue = htb_enqueue, 2144 .dequeue = htb_dequeue, 2112 .dequeue = htb_dequeue, 2145 .peek = qdisc_peek_de 2113 .peek = qdisc_peek_dequeued, 2146 .init = htb_init, 2114 .init = htb_init, 2147 .attach = htb_attach, 2115 .attach = htb_attach, 2148 .reset = htb_reset, 2116 .reset = htb_reset, 2149 .destroy = htb_destroy, 2117 .destroy = htb_destroy, 2150 .dump = htb_dump, 2118 .dump = htb_dump, 2151 .owner = THIS_MODULE, 2119 .owner = THIS_MODULE, 2152 }; 2120 }; 2153 MODULE_ALIAS_NET_SCH("htb"); << 2154 2121 2155 static int __init htb_module_init(void) 2122 static int __init htb_module_init(void) 2156 { 2123 { 2157 return register_qdisc(&htb_qdisc_ops) 2124 return register_qdisc(&htb_qdisc_ops); 2158 } 2125 } 2159 static void __exit htb_module_exit(void) 2126 static void __exit htb_module_exit(void) 2160 { 2127 { 2161 unregister_qdisc(&htb_qdisc_ops); 2128 unregister_qdisc(&htb_qdisc_ops); 2162 } 2129 } 2163 2130 2164 module_init(htb_module_init) 2131 module_init(htb_module_init) 2165 module_exit(htb_module_exit) 2132 module_exit(htb_module_exit) 2166 MODULE_LICENSE("GPL"); 2133 MODULE_LICENSE("GPL"); 2167 MODULE_DESCRIPTION("Hierarchical Token Bucket << 2168 2134
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.