~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/sched/sch_red.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /*
  3  * net/sched/sch_red.c  Random Early Detection queue.
  4  *
  5  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  6  *
  7  * Changes:
  8  * J Hadi Salim 980914: computation fixes
  9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
 10  * J Hadi Salim 980816:  ECN support
 11  */
 12 
 13 #include <linux/module.h>
 14 #include <linux/types.h>
 15 #include <linux/kernel.h>
 16 #include <linux/skbuff.h>
 17 #include <net/pkt_sched.h>
 18 #include <net/pkt_cls.h>
 19 #include <net/inet_ecn.h>
 20 #include <net/red.h>
 21 
 22 
 23 /*      Parameters, settable by user:
 24         -----------------------------
 25 
 26         limit           - bytes (must be > qth_max + burst)
 27 
 28         Hard limit on queue length, should be chosen >qth_max
 29         to allow packet bursts. This parameter does not
 30         affect the algorithms behaviour and can be chosen
 31         arbitrarily high (well, less than ram size)
 32         Really, this limit will never be reached
 33         if RED works correctly.
 34  */
 35 
 36 struct red_sched_data {
 37         u32                     limit;          /* HARD maximal queue length */
 38 
 39         unsigned char           flags;
 40         /* Non-flags in tc_red_qopt.flags. */
 41         unsigned char           userbits;
 42 
 43         struct timer_list       adapt_timer;
 44         struct Qdisc            *sch;
 45         struct red_parms        parms;
 46         struct red_vars         vars;
 47         struct red_stats        stats;
 48         struct Qdisc            *qdisc;
 49         struct tcf_qevent       qe_early_drop;
 50         struct tcf_qevent       qe_mark;
 51 };
 52 
 53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
 54 
 55 static inline int red_use_ecn(struct red_sched_data *q)
 56 {
 57         return q->flags & TC_RED_ECN;
 58 }
 59 
 60 static inline int red_use_harddrop(struct red_sched_data *q)
 61 {
 62         return q->flags & TC_RED_HARDDROP;
 63 }
 64 
 65 static int red_use_nodrop(struct red_sched_data *q)
 66 {
 67         return q->flags & TC_RED_NODROP;
 68 }
 69 
 70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 71                        struct sk_buff **to_free)
 72 {
 73         struct red_sched_data *q = qdisc_priv(sch);
 74         struct Qdisc *child = q->qdisc;
 75         unsigned int len;
 76         int ret;
 77 
 78         q->vars.qavg = red_calc_qavg(&q->parms,
 79                                      &q->vars,
 80                                      child->qstats.backlog);
 81 
 82         if (red_is_idling(&q->vars))
 83                 red_end_of_idle_period(&q->vars);
 84 
 85         switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
 86         case RED_DONT_MARK:
 87                 break;
 88 
 89         case RED_PROB_MARK:
 90                 qdisc_qstats_overlimit(sch);
 91                 if (!red_use_ecn(q)) {
 92                         q->stats.prob_drop++;
 93                         goto congestion_drop;
 94                 }
 95 
 96                 if (INET_ECN_set_ce(skb)) {
 97                         q->stats.prob_mark++;
 98                         skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
 99                         if (!skb)
100                                 return NET_XMIT_CN | ret;
101                 } else if (!red_use_nodrop(q)) {
102                         q->stats.prob_drop++;
103                         goto congestion_drop;
104                 }
105 
106                 /* Non-ECT packet in ECN nodrop mode: queue it. */
107                 break;
108 
109         case RED_HARD_MARK:
110                 qdisc_qstats_overlimit(sch);
111                 if (red_use_harddrop(q) || !red_use_ecn(q)) {
112                         q->stats.forced_drop++;
113                         goto congestion_drop;
114                 }
115 
116                 if (INET_ECN_set_ce(skb)) {
117                         q->stats.forced_mark++;
118                         skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
119                         if (!skb)
120                                 return NET_XMIT_CN | ret;
121                 } else if (!red_use_nodrop(q)) {
122                         q->stats.forced_drop++;
123                         goto congestion_drop;
124                 }
125 
126                 /* Non-ECT packet in ECN nodrop mode: queue it. */
127                 break;
128         }
129 
130         len = qdisc_pkt_len(skb);
131         ret = qdisc_enqueue(skb, child, to_free);
132         if (likely(ret == NET_XMIT_SUCCESS)) {
133                 sch->qstats.backlog += len;
134                 sch->q.qlen++;
135         } else if (net_xmit_drop_count(ret)) {
136                 q->stats.pdrop++;
137                 qdisc_qstats_drop(sch);
138         }
139         return ret;
140 
141 congestion_drop:
142         skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
143         if (!skb)
144                 return NET_XMIT_CN | ret;
145 
146         qdisc_drop(skb, sch, to_free);
147         return NET_XMIT_CN;
148 }
149 
150 static struct sk_buff *red_dequeue(struct Qdisc *sch)
151 {
152         struct sk_buff *skb;
153         struct red_sched_data *q = qdisc_priv(sch);
154         struct Qdisc *child = q->qdisc;
155 
156         skb = child->dequeue(child);
157         if (skb) {
158                 qdisc_bstats_update(sch, skb);
159                 qdisc_qstats_backlog_dec(sch, skb);
160                 sch->q.qlen--;
161         } else {
162                 if (!red_is_idling(&q->vars))
163                         red_start_of_idle_period(&q->vars);
164         }
165         return skb;
166 }
167 
168 static struct sk_buff *red_peek(struct Qdisc *sch)
169 {
170         struct red_sched_data *q = qdisc_priv(sch);
171         struct Qdisc *child = q->qdisc;
172 
173         return child->ops->peek(child);
174 }
175 
176 static void red_reset(struct Qdisc *sch)
177 {
178         struct red_sched_data *q = qdisc_priv(sch);
179 
180         qdisc_reset(q->qdisc);
181         red_restart(&q->vars);
182 }
183 
184 static int red_offload(struct Qdisc *sch, bool enable)
185 {
186         struct red_sched_data *q = qdisc_priv(sch);
187         struct net_device *dev = qdisc_dev(sch);
188         struct tc_red_qopt_offload opt = {
189                 .handle = sch->handle,
190                 .parent = sch->parent,
191         };
192 
193         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
194                 return -EOPNOTSUPP;
195 
196         if (enable) {
197                 opt.command = TC_RED_REPLACE;
198                 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
199                 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
200                 opt.set.probability = q->parms.max_P;
201                 opt.set.limit = q->limit;
202                 opt.set.is_ecn = red_use_ecn(q);
203                 opt.set.is_harddrop = red_use_harddrop(q);
204                 opt.set.is_nodrop = red_use_nodrop(q);
205                 opt.set.qstats = &sch->qstats;
206         } else {
207                 opt.command = TC_RED_DESTROY;
208         }
209 
210         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
211 }
212 
213 static void red_destroy(struct Qdisc *sch)
214 {
215         struct red_sched_data *q = qdisc_priv(sch);
216 
217         tcf_qevent_destroy(&q->qe_mark, sch);
218         tcf_qevent_destroy(&q->qe_early_drop, sch);
219         del_timer_sync(&q->adapt_timer);
220         red_offload(sch, false);
221         qdisc_put(q->qdisc);
222 }
223 
224 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
225         [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
226         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
227         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
228         [TCA_RED_MAX_P] = { .type = NLA_U32 },
229         [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
230         [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
231         [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
232 };
233 
234 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
235                         struct netlink_ext_ack *extack)
236 {
237         struct Qdisc *old_child = NULL, *child = NULL;
238         struct red_sched_data *q = qdisc_priv(sch);
239         struct nla_bitfield32 flags_bf;
240         struct tc_red_qopt *ctl;
241         unsigned char userbits;
242         unsigned char flags;
243         int err;
244         u32 max_P;
245         u8 *stab;
246 
247         if (tb[TCA_RED_PARMS] == NULL ||
248             tb[TCA_RED_STAB] == NULL)
249                 return -EINVAL;
250 
251         max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
252 
253         ctl = nla_data(tb[TCA_RED_PARMS]);
254         stab = nla_data(tb[TCA_RED_STAB]);
255         if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
256                               ctl->Scell_log, stab))
257                 return -EINVAL;
258 
259         err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
260                             tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
261                             &flags_bf, &userbits, extack);
262         if (err)
263                 return err;
264 
265         if (ctl->limit > 0) {
266                 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
267                                          extack);
268                 if (IS_ERR(child))
269                         return PTR_ERR(child);
270 
271                 /* child is fifo, no need to check for noop_qdisc */
272                 qdisc_hash_add(child, true);
273         }
274 
275         sch_tree_lock(sch);
276 
277         flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
278         err = red_validate_flags(flags, extack);
279         if (err)
280                 goto unlock_out;
281 
282         q->flags = flags;
283         q->userbits = userbits;
284         q->limit = ctl->limit;
285         if (child) {
286                 qdisc_tree_flush_backlog(q->qdisc);
287                 old_child = q->qdisc;
288                 q->qdisc = child;
289         }
290 
291         red_set_parms(&q->parms,
292                       ctl->qth_min, ctl->qth_max, ctl->Wlog,
293                       ctl->Plog, ctl->Scell_log,
294                       stab,
295                       max_P);
296         red_set_vars(&q->vars);
297 
298         del_timer(&q->adapt_timer);
299         if (ctl->flags & TC_RED_ADAPTATIVE)
300                 mod_timer(&q->adapt_timer, jiffies + HZ/2);
301 
302         if (!q->qdisc->q.qlen)
303                 red_start_of_idle_period(&q->vars);
304 
305         sch_tree_unlock(sch);
306 
307         red_offload(sch, true);
308 
309         if (old_child)
310                 qdisc_put(old_child);
311         return 0;
312 
313 unlock_out:
314         sch_tree_unlock(sch);
315         if (child)
316                 qdisc_put(child);
317         return err;
318 }
319 
320 static inline void red_adaptative_timer(struct timer_list *t)
321 {
322         struct red_sched_data *q = from_timer(q, t, adapt_timer);
323         struct Qdisc *sch = q->sch;
324         spinlock_t *root_lock;
325 
326         rcu_read_lock();
327         root_lock = qdisc_lock(qdisc_root_sleeping(sch));
328         spin_lock(root_lock);
329         red_adaptative_algo(&q->parms, &q->vars);
330         mod_timer(&q->adapt_timer, jiffies + HZ/2);
331         spin_unlock(root_lock);
332         rcu_read_unlock();
333 }
334 
335 static int red_init(struct Qdisc *sch, struct nlattr *opt,
336                     struct netlink_ext_ack *extack)
337 {
338         struct red_sched_data *q = qdisc_priv(sch);
339         struct nlattr *tb[TCA_RED_MAX + 1];
340         int err;
341 
342         q->qdisc = &noop_qdisc;
343         q->sch = sch;
344         timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
345 
346         if (!opt)
347                 return -EINVAL;
348 
349         err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
350                                           extack);
351         if (err < 0)
352                 return err;
353 
354         err = __red_change(sch, tb, extack);
355         if (err)
356                 return err;
357 
358         err = tcf_qevent_init(&q->qe_early_drop, sch,
359                               FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
360                               tb[TCA_RED_EARLY_DROP_BLOCK], extack);
361         if (err)
362                 return err;
363 
364         return tcf_qevent_init(&q->qe_mark, sch,
365                                FLOW_BLOCK_BINDER_TYPE_RED_MARK,
366                                tb[TCA_RED_MARK_BLOCK], extack);
367 }
368 
369 static int red_change(struct Qdisc *sch, struct nlattr *opt,
370                       struct netlink_ext_ack *extack)
371 {
372         struct red_sched_data *q = qdisc_priv(sch);
373         struct nlattr *tb[TCA_RED_MAX + 1];
374         int err;
375 
376         err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
377                                           extack);
378         if (err < 0)
379                 return err;
380 
381         err = tcf_qevent_validate_change(&q->qe_early_drop,
382                                          tb[TCA_RED_EARLY_DROP_BLOCK], extack);
383         if (err)
384                 return err;
385 
386         err = tcf_qevent_validate_change(&q->qe_mark,
387                                          tb[TCA_RED_MARK_BLOCK], extack);
388         if (err)
389                 return err;
390 
391         return __red_change(sch, tb, extack);
392 }
393 
394 static int red_dump_offload_stats(struct Qdisc *sch)
395 {
396         struct tc_red_qopt_offload hw_stats = {
397                 .command = TC_RED_STATS,
398                 .handle = sch->handle,
399                 .parent = sch->parent,
400                 {
401                         .stats.bstats = &sch->bstats,
402                         .stats.qstats = &sch->qstats,
403                 },
404         };
405 
406         return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
407 }
408 
409 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
410 {
411         struct red_sched_data *q = qdisc_priv(sch);
412         struct nlattr *opts = NULL;
413         struct tc_red_qopt opt = {
414                 .limit          = q->limit,
415                 .flags          = (q->flags & TC_RED_HISTORIC_FLAGS) |
416                                   q->userbits,
417                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
418                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
419                 .Wlog           = q->parms.Wlog,
420                 .Plog           = q->parms.Plog,
421                 .Scell_log      = q->parms.Scell_log,
422         };
423         int err;
424 
425         err = red_dump_offload_stats(sch);
426         if (err)
427                 goto nla_put_failure;
428 
429         opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
430         if (opts == NULL)
431                 goto nla_put_failure;
432         if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
433             nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
434             nla_put_bitfield32(skb, TCA_RED_FLAGS,
435                                q->flags, TC_RED_SUPPORTED_FLAGS) ||
436             tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
437             tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
438                 goto nla_put_failure;
439         return nla_nest_end(skb, opts);
440 
441 nla_put_failure:
442         nla_nest_cancel(skb, opts);
443         return -EMSGSIZE;
444 }
445 
446 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
447 {
448         struct red_sched_data *q = qdisc_priv(sch);
449         struct net_device *dev = qdisc_dev(sch);
450         struct tc_red_xstats st = {0};
451 
452         if (sch->flags & TCQ_F_OFFLOADED) {
453                 struct tc_red_qopt_offload hw_stats_request = {
454                         .command = TC_RED_XSTATS,
455                         .handle = sch->handle,
456                         .parent = sch->parent,
457                         {
458                                 .xstats = &q->stats,
459                         },
460                 };
461                 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
462                                               &hw_stats_request);
463         }
464         st.early = q->stats.prob_drop + q->stats.forced_drop;
465         st.pdrop = q->stats.pdrop;
466         st.marked = q->stats.prob_mark + q->stats.forced_mark;
467 
468         return gnet_stats_copy_app(d, &st, sizeof(st));
469 }
470 
471 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
472                           struct sk_buff *skb, struct tcmsg *tcm)
473 {
474         struct red_sched_data *q = qdisc_priv(sch);
475 
476         tcm->tcm_handle |= TC_H_MIN(1);
477         tcm->tcm_info = q->qdisc->handle;
478         return 0;
479 }
480 
481 static void red_graft_offload(struct Qdisc *sch,
482                               struct Qdisc *new, struct Qdisc *old,
483                               struct netlink_ext_ack *extack)
484 {
485         struct tc_red_qopt_offload graft_offload = {
486                 .handle         = sch->handle,
487                 .parent         = sch->parent,
488                 .child_handle   = new->handle,
489                 .command        = TC_RED_GRAFT,
490         };
491 
492         qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
493                                    TC_SETUP_QDISC_RED, &graft_offload, extack);
494 }
495 
496 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
497                      struct Qdisc **old, struct netlink_ext_ack *extack)
498 {
499         struct red_sched_data *q = qdisc_priv(sch);
500 
501         if (new == NULL)
502                 new = &noop_qdisc;
503 
504         *old = qdisc_replace(sch, new, &q->qdisc);
505 
506         red_graft_offload(sch, new, *old, extack);
507         return 0;
508 }
509 
510 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
511 {
512         struct red_sched_data *q = qdisc_priv(sch);
513         return q->qdisc;
514 }
515 
516 static unsigned long red_find(struct Qdisc *sch, u32 classid)
517 {
518         return 1;
519 }
520 
521 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
522 {
523         if (!walker->stop) {
524                 tc_qdisc_stats_dump(sch, 1, walker);
525         }
526 }
527 
528 static const struct Qdisc_class_ops red_class_ops = {
529         .graft          =       red_graft,
530         .leaf           =       red_leaf,
531         .find           =       red_find,
532         .walk           =       red_walk,
533         .dump           =       red_dump_class,
534 };
535 
536 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
537         .id             =       "red",
538         .priv_size      =       sizeof(struct red_sched_data),
539         .cl_ops         =       &red_class_ops,
540         .enqueue        =       red_enqueue,
541         .dequeue        =       red_dequeue,
542         .peek           =       red_peek,
543         .init           =       red_init,
544         .reset          =       red_reset,
545         .destroy        =       red_destroy,
546         .change         =       red_change,
547         .dump           =       red_dump,
548         .dump_stats     =       red_dump_stats,
549         .owner          =       THIS_MODULE,
550 };
551 MODULE_ALIAS_NET_SCH("red");
552 
553 static int __init red_module_init(void)
554 {
555         return register_qdisc(&red_qdisc_ops);
556 }
557 
558 static void __exit red_module_exit(void)
559 {
560         unregister_qdisc(&red_qdisc_ops);
561 }
562 
563 module_init(red_module_init)
564 module_exit(red_module_exit)
565 
566 MODULE_LICENSE("GPL");
567 MODULE_DESCRIPTION("Random Early Detection qdisc");
568 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php