~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/sched_ext/scx_qmap.bpf.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /tools/sched_ext/scx_qmap.bpf.c (Architecture mips) and /tools/sched_ext/scx_qmap.bpf.c (Architecture ppc)


  1 /* SPDX-License-Identifier: GPL-2.0 */              1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /*                                                  2 /*
  3  * A simple five-level FIFO queue scheduler.        3  * A simple five-level FIFO queue scheduler.
  4  *                                                  4  *
  5  * There are five FIFOs implemented using BPF_      5  * There are five FIFOs implemented using BPF_MAP_TYPE_QUEUE. A task gets
  6  * assigned to one depending on its compound w      6  * assigned to one depending on its compound weight. Each CPU round robins
  7  * through the FIFOs and dispatches more from       7  * through the FIFOs and dispatches more from FIFOs with higher indices - 1 from
  8  * queue0, 2 from queue1, 4 from queue2 and so      8  * queue0, 2 from queue1, 4 from queue2 and so on.
  9  *                                                  9  *
 10  * This scheduler demonstrates:                    10  * This scheduler demonstrates:
 11  *                                                 11  *
 12  * - BPF-side queueing using PIDs.                 12  * - BPF-side queueing using PIDs.
 13  * - Sleepable per-task storage allocation usi     13  * - Sleepable per-task storage allocation using ops.prep_enable().
 14  * - Using ops.cpu_release() to handle a highe     14  * - Using ops.cpu_release() to handle a higher priority scheduling class taking
 15  *   the CPU away.                                 15  *   the CPU away.
 16  * - Core-sched support.                           16  * - Core-sched support.
 17  *                                                 17  *
 18  * This scheduler is primarily for demonstrati     18  * This scheduler is primarily for demonstration and testing of sched_ext
 19  * features and unlikely to be useful for actu     19  * features and unlikely to be useful for actual workloads.
 20  *                                                 20  *
 21  * Copyright (c) 2022 Meta Platforms, Inc. and     21  * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
 22  * Copyright (c) 2022 Tejun Heo <tj@kernel.org     22  * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
 23  * Copyright (c) 2022 David Vernet <dvernet@me     23  * Copyright (c) 2022 David Vernet <dvernet@meta.com>
 24  */                                                24  */
 25 #include <scx/common.bpf.h>                        25 #include <scx/common.bpf.h>
 26                                                    26 
 27 enum consts {                                      27 enum consts {
 28         ONE_SEC_IN_NS           = 1000000000,      28         ONE_SEC_IN_NS           = 1000000000,
 29         SHARED_DSQ              = 0,               29         SHARED_DSQ              = 0,
 30         HIGHPRI_DSQ             = 1,               30         HIGHPRI_DSQ             = 1,
 31         HIGHPRI_WEIGHT          = 8668,            31         HIGHPRI_WEIGHT          = 8668,         /* this is what -20 maps to */
 32 };                                                 32 };
 33                                                    33 
 34 char _license[] SEC("license") = "GPL";            34 char _license[] SEC("license") = "GPL";
 35                                                    35 
 36 const volatile u64 slice_ns = SCX_SLICE_DFL;       36 const volatile u64 slice_ns = SCX_SLICE_DFL;
 37 const volatile u32 stall_user_nth;                 37 const volatile u32 stall_user_nth;
 38 const volatile u32 stall_kernel_nth;               38 const volatile u32 stall_kernel_nth;
 39 const volatile u32 dsp_inf_loop_after;             39 const volatile u32 dsp_inf_loop_after;
 40 const volatile u32 dsp_batch;                      40 const volatile u32 dsp_batch;
 41 const volatile bool highpri_boosting;              41 const volatile bool highpri_boosting;
 42 const volatile bool print_shared_dsq;              42 const volatile bool print_shared_dsq;
 43 const volatile s32 disallow_tgid;                  43 const volatile s32 disallow_tgid;
 44 const volatile bool suppress_dump;                 44 const volatile bool suppress_dump;
 45                                                    45 
 46 u64 nr_highpri_queued;                             46 u64 nr_highpri_queued;
 47 u32 test_error_cnt;                                47 u32 test_error_cnt;
 48                                                    48 
 49 UEI_DEFINE(uei);                                   49 UEI_DEFINE(uei);
 50                                                    50 
 51 struct qmap {                                      51 struct qmap {
 52         __uint(type, BPF_MAP_TYPE_QUEUE);          52         __uint(type, BPF_MAP_TYPE_QUEUE);
 53         __uint(max_entries, 4096);                 53         __uint(max_entries, 4096);
 54         __type(value, u32);                        54         __type(value, u32);
 55 } queue0 SEC(".maps"),                             55 } queue0 SEC(".maps"),
 56   queue1 SEC(".maps"),                             56   queue1 SEC(".maps"),
 57   queue2 SEC(".maps"),                             57   queue2 SEC(".maps"),
 58   queue3 SEC(".maps"),                             58   queue3 SEC(".maps"),
 59   queue4 SEC(".maps");                             59   queue4 SEC(".maps");
 60                                                    60 
 61 struct {                                           61 struct {
 62         __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAP     62         __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
 63         __uint(max_entries, 5);                    63         __uint(max_entries, 5);
 64         __type(key, int);                          64         __type(key, int);
 65         __array(values, struct qmap);              65         __array(values, struct qmap);
 66 } queue_arr SEC(".maps") = {                       66 } queue_arr SEC(".maps") = {
 67         .values = {                                67         .values = {
 68                 [0] = &queue0,                     68                 [0] = &queue0,
 69                 [1] = &queue1,                     69                 [1] = &queue1,
 70                 [2] = &queue2,                     70                 [2] = &queue2,
 71                 [3] = &queue3,                     71                 [3] = &queue3,
 72                 [4] = &queue4,                     72                 [4] = &queue4,
 73         },                                         73         },
 74 };                                                 74 };
 75                                                    75 
 76 /*                                                 76 /*
 77  * If enabled, CPU performance target is set a     77  * If enabled, CPU performance target is set according to the queue index
 78  * according to the following table.               78  * according to the following table.
 79  */                                                79  */
 80 static const u32 qidx_to_cpuperf_target[] = {      80 static const u32 qidx_to_cpuperf_target[] = {
 81         [0] = SCX_CPUPERF_ONE * 0 / 4,             81         [0] = SCX_CPUPERF_ONE * 0 / 4,
 82         [1] = SCX_CPUPERF_ONE * 1 / 4,             82         [1] = SCX_CPUPERF_ONE * 1 / 4,
 83         [2] = SCX_CPUPERF_ONE * 2 / 4,             83         [2] = SCX_CPUPERF_ONE * 2 / 4,
 84         [3] = SCX_CPUPERF_ONE * 3 / 4,             84         [3] = SCX_CPUPERF_ONE * 3 / 4,
 85         [4] = SCX_CPUPERF_ONE * 4 / 4,             85         [4] = SCX_CPUPERF_ONE * 4 / 4,
 86 };                                                 86 };
 87                                                    87 
 88 /*                                                 88 /*
 89  * Per-queue sequence numbers to implement cor     89  * Per-queue sequence numbers to implement core-sched ordering.
 90  *                                                 90  *
 91  * Tail seq is assigned to each queued task an     91  * Tail seq is assigned to each queued task and incremented. Head seq tracks the
 92  * sequence number of the latest dispatched ta     92  * sequence number of the latest dispatched task. The distance between the a
 93  * task's seq and the associated queue's head      93  * task's seq and the associated queue's head seq is called the queue distance
 94  * and used when comparing two tasks for order     94  * and used when comparing two tasks for ordering. See qmap_core_sched_before().
 95  */                                                95  */
 96 static u64 core_sched_head_seqs[5];                96 static u64 core_sched_head_seqs[5];
 97 static u64 core_sched_tail_seqs[5];                97 static u64 core_sched_tail_seqs[5];
 98                                                    98 
 99 /* Per-task scheduling context */                  99 /* Per-task scheduling context */
100 struct task_ctx {                                 100 struct task_ctx {
101         bool    force_local;    /* Dispatch di    101         bool    force_local;    /* Dispatch directly to local_dsq */
102         bool    highpri;                          102         bool    highpri;
103         u64     core_sched_seq;                   103         u64     core_sched_seq;
104 };                                                104 };
105                                                   105 
106 struct {                                          106 struct {
107         __uint(type, BPF_MAP_TYPE_TASK_STORAGE    107         __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
108         __uint(map_flags, BPF_F_NO_PREALLOC);     108         __uint(map_flags, BPF_F_NO_PREALLOC);
109         __type(key, int);                         109         __type(key, int);
110         __type(value, struct task_ctx);           110         __type(value, struct task_ctx);
111 } task_ctx_stor SEC(".maps");                     111 } task_ctx_stor SEC(".maps");
112                                                   112 
113 struct cpu_ctx {                                  113 struct cpu_ctx {
114         u64     dsp_idx;        /* dispatch in    114         u64     dsp_idx;        /* dispatch index */
115         u64     dsp_cnt;        /* remaining c    115         u64     dsp_cnt;        /* remaining count */
116         u32     avg_weight;                       116         u32     avg_weight;
117         u32     cpuperf_target;                   117         u32     cpuperf_target;
118 };                                                118 };
119                                                   119 
120 struct {                                          120 struct {
121         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY    121         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
122         __uint(max_entries, 1);                   122         __uint(max_entries, 1);
123         __type(key, u32);                         123         __type(key, u32);
124         __type(value, struct cpu_ctx);            124         __type(value, struct cpu_ctx);
125 } cpu_ctx_stor SEC(".maps");                      125 } cpu_ctx_stor SEC(".maps");
126                                                   126 
127 /* Statistics */                                  127 /* Statistics */
128 u64 nr_enqueued, nr_dispatched, nr_reenqueued,    128 u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued, nr_ddsp_from_enq;
129 u64 nr_core_sched_execed;                         129 u64 nr_core_sched_execed;
130 u64 nr_expedited_local, nr_expedited_remote, n    130 u64 nr_expedited_local, nr_expedited_remote, nr_expedited_lost, nr_expedited_from_timer;
131 u32 cpuperf_min, cpuperf_avg, cpuperf_max;        131 u32 cpuperf_min, cpuperf_avg, cpuperf_max;
132 u32 cpuperf_target_min, cpuperf_target_avg, cp    132 u32 cpuperf_target_min, cpuperf_target_avg, cpuperf_target_max;
133                                                   133 
134 static s32 pick_direct_dispatch_cpu(struct tas    134 static s32 pick_direct_dispatch_cpu(struct task_struct *p, s32 prev_cpu)
135 {                                                 135 {
136         s32 cpu;                                  136         s32 cpu;
137                                                   137 
138         if (p->nr_cpus_allowed == 1 ||            138         if (p->nr_cpus_allowed == 1 ||
139             scx_bpf_test_and_clear_cpu_idle(pr    139             scx_bpf_test_and_clear_cpu_idle(prev_cpu))
140                 return prev_cpu;                  140                 return prev_cpu;
141                                                   141 
142         cpu = scx_bpf_pick_idle_cpu(p->cpus_pt    142         cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
143         if (cpu >= 0)                             143         if (cpu >= 0)
144                 return cpu;                       144                 return cpu;
145                                                   145 
146         return -1;                                146         return -1;
147 }                                                 147 }
148                                                   148 
149 static struct task_ctx *lookup_task_ctx(struct    149 static struct task_ctx *lookup_task_ctx(struct task_struct *p)
150 {                                                 150 {
151         struct task_ctx *tctx;                    151         struct task_ctx *tctx;
152                                                   152 
153         if (!(tctx = bpf_task_storage_get(&tas    153         if (!(tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0))) {
154                 scx_bpf_error("task_ctx lookup    154                 scx_bpf_error("task_ctx lookup failed");
155                 return NULL;                      155                 return NULL;
156         }                                         156         }
157         return tctx;                              157         return tctx;
158 }                                                 158 }
159                                                   159 
160 s32 BPF_STRUCT_OPS(qmap_select_cpu, struct tas    160 s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p,
161                    s32 prev_cpu, u64 wake_flag    161                    s32 prev_cpu, u64 wake_flags)
162 {                                                 162 {
163         struct task_ctx *tctx;                    163         struct task_ctx *tctx;
164         s32 cpu;                                  164         s32 cpu;
165                                                   165 
166         if (!(tctx = lookup_task_ctx(p)))         166         if (!(tctx = lookup_task_ctx(p)))
167                 return -ESRCH;                    167                 return -ESRCH;
168                                                   168 
169         cpu = pick_direct_dispatch_cpu(p, prev    169         cpu = pick_direct_dispatch_cpu(p, prev_cpu);
170                                                   170 
171         if (cpu >= 0) {                           171         if (cpu >= 0) {
172                 tctx->force_local = true;         172                 tctx->force_local = true;
173                 return cpu;                       173                 return cpu;
174         } else {                                  174         } else {
175                 return prev_cpu;                  175                 return prev_cpu;
176         }                                         176         }
177 }                                                 177 }
178                                                   178 
179 static int weight_to_idx(u32 weight)              179 static int weight_to_idx(u32 weight)
180 {                                                 180 {
181         /* Coarsely map the compound weight to    181         /* Coarsely map the compound weight to a FIFO. */
182         if (weight <= 25)                         182         if (weight <= 25)
183                 return 0;                         183                 return 0;
184         else if (weight <= 50)                    184         else if (weight <= 50)
185                 return 1;                         185                 return 1;
186         else if (weight < 200)                    186         else if (weight < 200)
187                 return 2;                         187                 return 2;
188         else if (weight < 400)                    188         else if (weight < 400)
189                 return 3;                         189                 return 3;
190         else                                      190         else
191                 return 4;                         191                 return 4;
192 }                                                 192 }
193                                                   193 
194 void BPF_STRUCT_OPS(qmap_enqueue, struct task_    194 void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
195 {                                                 195 {
196         static u32 user_cnt, kernel_cnt;          196         static u32 user_cnt, kernel_cnt;
197         struct task_ctx *tctx;                    197         struct task_ctx *tctx;
198         u32 pid = p->pid;                         198         u32 pid = p->pid;
199         int idx = weight_to_idx(p->scx.weight)    199         int idx = weight_to_idx(p->scx.weight);
200         void *ring;                               200         void *ring;
201         s32 cpu;                                  201         s32 cpu;
202                                                   202 
203         if (p->flags & PF_KTHREAD) {              203         if (p->flags & PF_KTHREAD) {
204                 if (stall_kernel_nth && !(++ke    204                 if (stall_kernel_nth && !(++kernel_cnt % stall_kernel_nth))
205                         return;                   205                         return;
206         } else {                                  206         } else {
207                 if (stall_user_nth && !(++user    207                 if (stall_user_nth && !(++user_cnt % stall_user_nth))
208                         return;                   208                         return;
209         }                                         209         }
210                                                   210 
211         if (test_error_cnt && !--test_error_cn    211         if (test_error_cnt && !--test_error_cnt)
212                 scx_bpf_error("test triggering    212                 scx_bpf_error("test triggering error");
213                                                   213 
214         if (!(tctx = lookup_task_ctx(p)))         214         if (!(tctx = lookup_task_ctx(p)))
215                 return;                           215                 return;
216                                                   216 
217         /*                                        217         /*
218          * All enqueued tasks must have their     218          * All enqueued tasks must have their core_sched_seq updated for correct
219          * core-sched ordering. Also, take a l    219          * core-sched ordering. Also, take a look at the end of qmap_dispatch().
220          */                                       220          */
221         tctx->core_sched_seq = core_sched_tail    221         tctx->core_sched_seq = core_sched_tail_seqs[idx]++;
222                                                   222 
223         /*                                        223         /*
224          * If qmap_select_cpu() is telling us     224          * If qmap_select_cpu() is telling us to or this is the last runnable
225          * task on the CPU, enqueue locally.      225          * task on the CPU, enqueue locally.
226          */                                       226          */
227         if (tctx->force_local) {                  227         if (tctx->force_local) {
228                 tctx->force_local = false;        228                 tctx->force_local = false;
229                 scx_bpf_dispatch(p, SCX_DSQ_LO    229                 scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, enq_flags);
230                 return;                           230                 return;
231         }                                         231         }
232                                                   232 
233         /* if select_cpu() wasn't called, try     233         /* if select_cpu() wasn't called, try direct dispatch */
234         if (!(enq_flags & SCX_ENQ_CPU_SELECTED    234         if (!(enq_flags & SCX_ENQ_CPU_SELECTED) &&
235             (cpu = pick_direct_dispatch_cpu(p,    235             (cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) {
236                 __sync_fetch_and_add(&nr_ddsp_    236                 __sync_fetch_and_add(&nr_ddsp_from_enq, 1);
237                 scx_bpf_dispatch(p, SCX_DSQ_LO    237                 scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags);
238                 return;                           238                 return;
239         }                                         239         }
240                                                   240 
241         /*                                        241         /*
242          * If the task was re-enqueued due to     242          * If the task was re-enqueued due to the CPU being preempted by a
243          * higher priority scheduling class, j    243          * higher priority scheduling class, just re-enqueue the task directly
244          * on the global DSQ. As we want anoth    244          * on the global DSQ. As we want another CPU to pick it up, find and
245          * kick an idle CPU.                      245          * kick an idle CPU.
246          */                                       246          */
247         if (enq_flags & SCX_ENQ_REENQ) {          247         if (enq_flags & SCX_ENQ_REENQ) {
248                 s32 cpu;                          248                 s32 cpu;
249                                                   249 
250                 scx_bpf_dispatch(p, SHARED_DSQ    250                 scx_bpf_dispatch(p, SHARED_DSQ, 0, enq_flags);
251                 cpu = scx_bpf_pick_idle_cpu(p-    251                 cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
252                 if (cpu >= 0)                     252                 if (cpu >= 0)
253                         scx_bpf_kick_cpu(cpu,     253                         scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
254                 return;                           254                 return;
255         }                                         255         }
256                                                   256 
257         ring = bpf_map_lookup_elem(&queue_arr,    257         ring = bpf_map_lookup_elem(&queue_arr, &idx);
258         if (!ring) {                              258         if (!ring) {
259                 scx_bpf_error("failed to find     259                 scx_bpf_error("failed to find ring %d", idx);
260                 return;                           260                 return;
261         }                                         261         }
262                                                   262 
263         /* Queue on the selected FIFO. If the     263         /* Queue on the selected FIFO. If the FIFO overflows, punt to global. */
264         if (bpf_map_push_elem(ring, &pid, 0))     264         if (bpf_map_push_elem(ring, &pid, 0)) {
265                 scx_bpf_dispatch(p, SHARED_DSQ    265                 scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, enq_flags);
266                 return;                           266                 return;
267         }                                         267         }
268                                                   268 
269         if (highpri_boosting && p->scx.weight     269         if (highpri_boosting && p->scx.weight >= HIGHPRI_WEIGHT) {
270                 tctx->highpri = true;             270                 tctx->highpri = true;
271                 __sync_fetch_and_add(&nr_highp    271                 __sync_fetch_and_add(&nr_highpri_queued, 1);
272         }                                         272         }
273         __sync_fetch_and_add(&nr_enqueued, 1);    273         __sync_fetch_and_add(&nr_enqueued, 1);
274 }                                                 274 }
275                                                   275 
276 /*                                                276 /*
277  * The BPF queue map doesn't support removal a    277  * The BPF queue map doesn't support removal and sched_ext can handle spurious
278  * dispatches. qmap_dequeue() is only used to     278  * dispatches. qmap_dequeue() is only used to collect statistics.
279  */                                               279  */
280 void BPF_STRUCT_OPS(qmap_dequeue, struct task_    280 void BPF_STRUCT_OPS(qmap_dequeue, struct task_struct *p, u64 deq_flags)
281 {                                                 281 {
282         __sync_fetch_and_add(&nr_dequeued, 1);    282         __sync_fetch_and_add(&nr_dequeued, 1);
283         if (deq_flags & SCX_DEQ_CORE_SCHED_EXE    283         if (deq_flags & SCX_DEQ_CORE_SCHED_EXEC)
284                 __sync_fetch_and_add(&nr_core_    284                 __sync_fetch_and_add(&nr_core_sched_execed, 1);
285 }                                                 285 }
286                                                   286 
287 static void update_core_sched_head_seq(struct     287 static void update_core_sched_head_seq(struct task_struct *p)
288 {                                                 288 {
289         int idx = weight_to_idx(p->scx.weight)    289         int idx = weight_to_idx(p->scx.weight);
290         struct task_ctx *tctx;                    290         struct task_ctx *tctx;
291                                                   291 
292         if ((tctx = lookup_task_ctx(p)))          292         if ((tctx = lookup_task_ctx(p)))
293                 core_sched_head_seqs[idx] = tc    293                 core_sched_head_seqs[idx] = tctx->core_sched_seq;
294 }                                                 294 }
295                                                   295 
296 /*                                                296 /*
297  * To demonstrate the use of scx_bpf_dispatch_    297  * To demonstrate the use of scx_bpf_dispatch_from_dsq(), implement silly
298  * selective priority boosting mechanism by sc    298  * selective priority boosting mechanism by scanning SHARED_DSQ looking for
299  * highpri tasks, moving them to HIGHPRI_DSQ a    299  * highpri tasks, moving them to HIGHPRI_DSQ and then consuming them first. This
300  * makes minor difference only when dsp_batch     300  * makes minor difference only when dsp_batch is larger than 1.
301  *                                                301  *
302  * scx_bpf_dispatch[_vtime]_from_dsq() are all    302  * scx_bpf_dispatch[_vtime]_from_dsq() are allowed both from ops.dispatch() and
303  * non-rq-lock holding BPF programs. As demons    303  * non-rq-lock holding BPF programs. As demonstration, this function is called
304  * from qmap_dispatch() and monitor_timerfn().    304  * from qmap_dispatch() and monitor_timerfn().
305  */                                               305  */
306 static bool dispatch_highpri(bool from_timer)     306 static bool dispatch_highpri(bool from_timer)
307 {                                                 307 {
308         struct task_struct *p;                    308         struct task_struct *p;
309         s32 this_cpu = bpf_get_smp_processor_i    309         s32 this_cpu = bpf_get_smp_processor_id();
310                                                   310 
311         /* scan SHARED_DSQ and move highpri ta    311         /* scan SHARED_DSQ and move highpri tasks to HIGHPRI_DSQ */
312         bpf_for_each(scx_dsq, p, SHARED_DSQ, 0    312         bpf_for_each(scx_dsq, p, SHARED_DSQ, 0) {
313                 static u64 highpri_seq;           313                 static u64 highpri_seq;
314                 struct task_ctx *tctx;            314                 struct task_ctx *tctx;
315                                                   315 
316                 if (!(tctx = lookup_task_ctx(p    316                 if (!(tctx = lookup_task_ctx(p)))
317                         return false;             317                         return false;
318                                                   318 
319                 if (tctx->highpri) {              319                 if (tctx->highpri) {
320                         /* exercise the set_*(    320                         /* exercise the set_*() and vtime interface too */
321                         __COMPAT_scx_bpf_dispa    321                         __COMPAT_scx_bpf_dispatch_from_dsq_set_slice(
322                                 BPF_FOR_EACH_I    322                                 BPF_FOR_EACH_ITER, slice_ns * 2);
323                         __COMPAT_scx_bpf_dispa    323                         __COMPAT_scx_bpf_dispatch_from_dsq_set_vtime(
324                                 BPF_FOR_EACH_I    324                                 BPF_FOR_EACH_ITER, highpri_seq++);
325                         __COMPAT_scx_bpf_dispa    325                         __COMPAT_scx_bpf_dispatch_vtime_from_dsq(
326                                 BPF_FOR_EACH_I    326                                 BPF_FOR_EACH_ITER, p, HIGHPRI_DSQ, 0);
327                 }                                 327                 }
328         }                                         328         }
329                                                   329 
330         /*                                        330         /*
331          * Scan HIGHPRI_DSQ and dispatch until    331          * Scan HIGHPRI_DSQ and dispatch until a task that can run on this CPU
332          * is found.                              332          * is found.
333          */                                       333          */
334         bpf_for_each(scx_dsq, p, HIGHPRI_DSQ,     334         bpf_for_each(scx_dsq, p, HIGHPRI_DSQ, 0) {
335                 bool dispatched = false;          335                 bool dispatched = false;
336                 s32 cpu;                          336                 s32 cpu;
337                                                   337 
338                 if (bpf_cpumask_test_cpu(this_    338                 if (bpf_cpumask_test_cpu(this_cpu, p->cpus_ptr))
339                         cpu = this_cpu;           339                         cpu = this_cpu;
340                 else                              340                 else
341                         cpu = scx_bpf_pick_any    341                         cpu = scx_bpf_pick_any_cpu(p->cpus_ptr, 0);
342                                                   342 
343                 if (__COMPAT_scx_bpf_dispatch_    343                 if (__COMPAT_scx_bpf_dispatch_from_dsq(BPF_FOR_EACH_ITER, p,
344                                                   344                                                        SCX_DSQ_LOCAL_ON | cpu,
345                                                   345                                                        SCX_ENQ_PREEMPT)) {
346                         if (cpu == this_cpu) {    346                         if (cpu == this_cpu) {
347                                 dispatched = t    347                                 dispatched = true;
348                                 __sync_fetch_a    348                                 __sync_fetch_and_add(&nr_expedited_local, 1);
349                         } else {                  349                         } else {
350                                 __sync_fetch_a    350                                 __sync_fetch_and_add(&nr_expedited_remote, 1);
351                         }                         351                         }
352                         if (from_timer)           352                         if (from_timer)
353                                 __sync_fetch_a    353                                 __sync_fetch_and_add(&nr_expedited_from_timer, 1);
354                 } else {                          354                 } else {
355                         __sync_fetch_and_add(&    355                         __sync_fetch_and_add(&nr_expedited_lost, 1);
356                 }                                 356                 }
357                                                   357 
358                 if (dispatched)                   358                 if (dispatched)
359                         return true;              359                         return true;
360         }                                         360         }
361                                                   361 
362         return false;                             362         return false;
363 }                                                 363 }
364                                                   364 
365 void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, st    365 void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev)
366 {                                                 366 {
367         struct task_struct *p;                    367         struct task_struct *p;
368         struct cpu_ctx *cpuc;                     368         struct cpu_ctx *cpuc;
369         struct task_ctx *tctx;                    369         struct task_ctx *tctx;
370         u32 zero = 0, batch = dsp_batch ?: 1;     370         u32 zero = 0, batch = dsp_batch ?: 1;
371         void *fifo;                               371         void *fifo;
372         s32 i, pid;                               372         s32 i, pid;
373                                                   373 
374         if (dispatch_highpri(false))              374         if (dispatch_highpri(false))
375                 return;                           375                 return;
376                                                   376 
377         if (!nr_highpri_queued && scx_bpf_cons    377         if (!nr_highpri_queued && scx_bpf_consume(SHARED_DSQ))
378                 return;                           378                 return;
379                                                   379 
380         if (dsp_inf_loop_after && nr_dispatche    380         if (dsp_inf_loop_after && nr_dispatched > dsp_inf_loop_after) {
381                 /*                                381                 /*
382                  * PID 2 should be kthreadd wh    382                  * PID 2 should be kthreadd which should mostly be idle and off
383                  * the scheduler. Let's keep d    383                  * the scheduler. Let's keep dispatching it to force the kernel
384                  * to call this function over     384                  * to call this function over and over again.
385                  */                               385                  */
386                 p = bpf_task_from_pid(2);         386                 p = bpf_task_from_pid(2);
387                 if (p) {                          387                 if (p) {
388                         scx_bpf_dispatch(p, SC    388                         scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, 0);
389                         bpf_task_release(p);      389                         bpf_task_release(p);
390                         return;                   390                         return;
391                 }                                 391                 }
392         }                                         392         }
393                                                   393 
394         if (!(cpuc = bpf_map_lookup_elem(&cpu_    394         if (!(cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &zero))) {
395                 scx_bpf_error("failed to look     395                 scx_bpf_error("failed to look up cpu_ctx");
396                 return;                           396                 return;
397         }                                         397         }
398                                                   398 
399         for (i = 0; i < 5; i++) {                 399         for (i = 0; i < 5; i++) {
400                 /* Advance the dispatch cursor    400                 /* Advance the dispatch cursor and pick the fifo. */
401                 if (!cpuc->dsp_cnt) {             401                 if (!cpuc->dsp_cnt) {
402                         cpuc->dsp_idx = (cpuc-    402                         cpuc->dsp_idx = (cpuc->dsp_idx + 1) % 5;
403                         cpuc->dsp_cnt = 1 << c    403                         cpuc->dsp_cnt = 1 << cpuc->dsp_idx;
404                 }                                 404                 }
405                                                   405 
406                 fifo = bpf_map_lookup_elem(&qu    406                 fifo = bpf_map_lookup_elem(&queue_arr, &cpuc->dsp_idx);
407                 if (!fifo) {                      407                 if (!fifo) {
408                         scx_bpf_error("failed     408                         scx_bpf_error("failed to find ring %llu", cpuc->dsp_idx);
409                         return;                   409                         return;
410                 }                                 410                 }
411                                                   411 
412                 /* Dispatch or advance. */        412                 /* Dispatch or advance. */
413                 bpf_repeat(BPF_MAX_LOOPS) {       413                 bpf_repeat(BPF_MAX_LOOPS) {
414                         struct task_ctx *tctx;    414                         struct task_ctx *tctx;
415                                                   415 
416                         if (bpf_map_pop_elem(f    416                         if (bpf_map_pop_elem(fifo, &pid))
417                                 break;            417                                 break;
418                                                   418 
419                         p = bpf_task_from_pid(    419                         p = bpf_task_from_pid(pid);
420                         if (!p)                   420                         if (!p)
421                                 continue;         421                                 continue;
422                                                   422 
423                         if (!(tctx = lookup_ta    423                         if (!(tctx = lookup_task_ctx(p))) {
424                                 bpf_task_relea    424                                 bpf_task_release(p);
425                                 return;           425                                 return;
426                         }                         426                         }
427                                                   427 
428                         if (tctx->highpri)        428                         if (tctx->highpri)
429                                 __sync_fetch_a    429                                 __sync_fetch_and_sub(&nr_highpri_queued, 1);
430                                                   430 
431                         update_core_sched_head    431                         update_core_sched_head_seq(p);
432                         __sync_fetch_and_add(&    432                         __sync_fetch_and_add(&nr_dispatched, 1);
433                                                   433 
434                         scx_bpf_dispatch(p, SH    434                         scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, 0);
435                         bpf_task_release(p);      435                         bpf_task_release(p);
436                                                   436 
437                         batch--;                  437                         batch--;
438                         cpuc->dsp_cnt--;          438                         cpuc->dsp_cnt--;
439                         if (!batch || !scx_bpf    439                         if (!batch || !scx_bpf_dispatch_nr_slots()) {
440                                 if (dispatch_h    440                                 if (dispatch_highpri(false))
441                                         return    441                                         return;
442                                 scx_bpf_consum    442                                 scx_bpf_consume(SHARED_DSQ);
443                                 return;           443                                 return;
444                         }                         444                         }
445                         if (!cpuc->dsp_cnt)       445                         if (!cpuc->dsp_cnt)
446                                 break;            446                                 break;
447                 }                                 447                 }
448                                                   448 
449                 cpuc->dsp_cnt = 0;                449                 cpuc->dsp_cnt = 0;
450         }                                         450         }
451                                                   451 
452         /*                                        452         /*
453          * No other tasks. @prev will keep run    453          * No other tasks. @prev will keep running. Update its core_sched_seq as
454          * if the task were enqueued and dispa    454          * if the task were enqueued and dispatched immediately.
455          */                                       455          */
456         if (prev) {                               456         if (prev) {
457                 tctx = bpf_task_storage_get(&t    457                 tctx = bpf_task_storage_get(&task_ctx_stor, prev, 0, 0);
458                 if (!tctx) {                      458                 if (!tctx) {
459                         scx_bpf_error("task_ct    459                         scx_bpf_error("task_ctx lookup failed");
460                         return;                   460                         return;
461                 }                                 461                 }
462                                                   462 
463                 tctx->core_sched_seq =            463                 tctx->core_sched_seq =
464                         core_sched_tail_seqs[w    464                         core_sched_tail_seqs[weight_to_idx(prev->scx.weight)]++;
465         }                                         465         }
466 }                                                 466 }
467                                                   467 
468 void BPF_STRUCT_OPS(qmap_tick, struct task_str    468 void BPF_STRUCT_OPS(qmap_tick, struct task_struct *p)
469 {                                                 469 {
470         struct cpu_ctx *cpuc;                     470         struct cpu_ctx *cpuc;
471         u32 zero = 0;                             471         u32 zero = 0;
472         int idx;                                  472         int idx;
473                                                   473 
474         if (!(cpuc = bpf_map_lookup_elem(&cpu_    474         if (!(cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &zero))) {
475                 scx_bpf_error("failed to look     475                 scx_bpf_error("failed to look up cpu_ctx");
476                 return;                           476                 return;
477         }                                         477         }
478                                                   478 
479         /*                                        479         /*
480          * Use the running avg of weights to s    480          * Use the running avg of weights to select the target cpuperf level.
481          * This is a demonstration of the cpup    481          * This is a demonstration of the cpuperf feature rather than a
482          * practical strategy to regulate CPU     482          * practical strategy to regulate CPU frequency.
483          */                                       483          */
484         cpuc->avg_weight = cpuc->avg_weight *     484         cpuc->avg_weight = cpuc->avg_weight * 3 / 4 + p->scx.weight / 4;
485         idx = weight_to_idx(cpuc->avg_weight);    485         idx = weight_to_idx(cpuc->avg_weight);
486         cpuc->cpuperf_target = qidx_to_cpuperf    486         cpuc->cpuperf_target = qidx_to_cpuperf_target[idx];
487                                                   487 
488         scx_bpf_cpuperf_set(scx_bpf_task_cpu(p    488         scx_bpf_cpuperf_set(scx_bpf_task_cpu(p), cpuc->cpuperf_target);
489 }                                                 489 }
490                                                   490 
491 /*                                                491 /*
492  * The distance from the head of the queue sca    492  * The distance from the head of the queue scaled by the weight of the queue.
493  * The lower the number, the older the task an    493  * The lower the number, the older the task and the higher the priority.
494  */                                               494  */
495 static s64 task_qdist(struct task_struct *p)      495 static s64 task_qdist(struct task_struct *p)
496 {                                                 496 {
497         int idx = weight_to_idx(p->scx.weight)    497         int idx = weight_to_idx(p->scx.weight);
498         struct task_ctx *tctx;                    498         struct task_ctx *tctx;
499         s64 qdist;                                499         s64 qdist;
500                                                   500 
501         tctx = bpf_task_storage_get(&task_ctx_    501         tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0);
502         if (!tctx) {                              502         if (!tctx) {
503                 scx_bpf_error("task_ctx lookup    503                 scx_bpf_error("task_ctx lookup failed");
504                 return 0;                         504                 return 0;
505         }                                         505         }
506                                                   506 
507         qdist = tctx->core_sched_seq - core_sc    507         qdist = tctx->core_sched_seq - core_sched_head_seqs[idx];
508                                                   508 
509         /*                                        509         /*
510          * As queue index increments, the prio    510          * As queue index increments, the priority doubles. The queue w/ index 3
511          * is dispatched twice more frequently    511          * is dispatched twice more frequently than 2. Reflect the difference by
512          * scaling qdists accordingly. Note th    512          * scaling qdists accordingly. Note that the shift amount needs to be
513          * flipped depending on the sign to av    513          * flipped depending on the sign to avoid flipping priority direction.
514          */                                       514          */
515         if (qdist >= 0)                           515         if (qdist >= 0)
516                 return qdist << (4 - idx);        516                 return qdist << (4 - idx);
517         else                                      517         else
518                 return qdist << idx;              518                 return qdist << idx;
519 }                                                 519 }
520                                                   520 
521 /*                                                521 /*
522  * This is called to determine the task orderi    522  * This is called to determine the task ordering when core-sched is picking
523  * tasks to execute on SMT siblings and should    523  * tasks to execute on SMT siblings and should encode about the same ordering as
524  * the regular scheduling path. Use the priori    524  * the regular scheduling path. Use the priority-scaled distances from the head
525  * of the queues to compare the two tasks whic    525  * of the queues to compare the two tasks which should be consistent with the
526  * dispatch path behavior.                        526  * dispatch path behavior.
527  */                                               527  */
528 bool BPF_STRUCT_OPS(qmap_core_sched_before,       528 bool BPF_STRUCT_OPS(qmap_core_sched_before,
529                     struct task_struct *a, str    529                     struct task_struct *a, struct task_struct *b)
530 {                                                 530 {
531         return task_qdist(a) > task_qdist(b);     531         return task_qdist(a) > task_qdist(b);
532 }                                                 532 }
533                                                   533 
534 void BPF_STRUCT_OPS(qmap_cpu_release, s32 cpu,    534 void BPF_STRUCT_OPS(qmap_cpu_release, s32 cpu, struct scx_cpu_release_args *args)
535 {                                                 535 {
536         u32 cnt;                                  536         u32 cnt;
537                                                   537 
538         /*                                        538         /*
539          * Called when @cpu is taken by a high    539          * Called when @cpu is taken by a higher priority scheduling class. This
540          * makes @cpu no longer available for     540          * makes @cpu no longer available for executing sched_ext tasks. As we
541          * don't want the tasks in @cpu's loca    541          * don't want the tasks in @cpu's local dsq to sit there until @cpu
542          * becomes available again, re-enqueue    542          * becomes available again, re-enqueue them into the global dsq. See
543          * %SCX_ENQ_REENQ handling in qmap_enq    543          * %SCX_ENQ_REENQ handling in qmap_enqueue().
544          */                                       544          */
545         cnt = scx_bpf_reenqueue_local();          545         cnt = scx_bpf_reenqueue_local();
546         if (cnt)                                  546         if (cnt)
547                 __sync_fetch_and_add(&nr_reenq    547                 __sync_fetch_and_add(&nr_reenqueued, cnt);
548 }                                                 548 }
549                                                   549 
550 s32 BPF_STRUCT_OPS(qmap_init_task, struct task    550 s32 BPF_STRUCT_OPS(qmap_init_task, struct task_struct *p,
551                    struct scx_init_task_args *    551                    struct scx_init_task_args *args)
552 {                                                 552 {
553         if (p->tgid == disallow_tgid)             553         if (p->tgid == disallow_tgid)
554                 p->scx.disallow = true;           554                 p->scx.disallow = true;
555                                                   555 
556         /*                                        556         /*
557          * @p is new. Let's ensure that its ta    557          * @p is new. Let's ensure that its task_ctx is available. We can sleep
558          * in this function and the following     558          * in this function and the following will automatically use GFP_KERNEL.
559          */                                       559          */
560         if (bpf_task_storage_get(&task_ctx_sto    560         if (bpf_task_storage_get(&task_ctx_stor, p, 0,
561                                  BPF_LOCAL_STO    561                                  BPF_LOCAL_STORAGE_GET_F_CREATE))
562                 return 0;                         562                 return 0;
563         else                                      563         else
564                 return -ENOMEM;                   564                 return -ENOMEM;
565 }                                                 565 }
566                                                   566 
567 void BPF_STRUCT_OPS(qmap_dump, struct scx_dump    567 void BPF_STRUCT_OPS(qmap_dump, struct scx_dump_ctx *dctx)
568 {                                                 568 {
569         s32 i, pid;                               569         s32 i, pid;
570                                                   570 
571         if (suppress_dump)                        571         if (suppress_dump)
572                 return;                           572                 return;
573                                                   573 
574         bpf_for(i, 0, 5) {                        574         bpf_for(i, 0, 5) {
575                 void *fifo;                       575                 void *fifo;
576                                                   576 
577                 if (!(fifo = bpf_map_lookup_el    577                 if (!(fifo = bpf_map_lookup_elem(&queue_arr, &i)))
578                         return;                   578                         return;
579                                                   579 
580                 scx_bpf_dump("QMAP FIFO[%d]:",    580                 scx_bpf_dump("QMAP FIFO[%d]:", i);
581                 bpf_repeat(4096) {                581                 bpf_repeat(4096) {
582                         if (bpf_map_pop_elem(f    582                         if (bpf_map_pop_elem(fifo, &pid))
583                                 break;            583                                 break;
584                         scx_bpf_dump(" %d", pi    584                         scx_bpf_dump(" %d", pid);
585                 }                                 585                 }
586                 scx_bpf_dump("\n");               586                 scx_bpf_dump("\n");
587         }                                         587         }
588 }                                                 588 }
589                                                   589 
590 void BPF_STRUCT_OPS(qmap_dump_cpu, struct scx_    590 void BPF_STRUCT_OPS(qmap_dump_cpu, struct scx_dump_ctx *dctx, s32 cpu, bool idle)
591 {                                                 591 {
592         u32 zero = 0;                             592         u32 zero = 0;
593         struct cpu_ctx *cpuc;                     593         struct cpu_ctx *cpuc;
594                                                   594 
595         if (suppress_dump || idle)                595         if (suppress_dump || idle)
596                 return;                           596                 return;
597         if (!(cpuc = bpf_map_lookup_percpu_ele    597         if (!(cpuc = bpf_map_lookup_percpu_elem(&cpu_ctx_stor, &zero, cpu)))
598                 return;                           598                 return;
599                                                   599 
600         scx_bpf_dump("QMAP: dsp_idx=%llu dsp_c    600         scx_bpf_dump("QMAP: dsp_idx=%llu dsp_cnt=%llu avg_weight=%u cpuperf_target=%u",
601                      cpuc->dsp_idx, cpuc->dsp_    601                      cpuc->dsp_idx, cpuc->dsp_cnt, cpuc->avg_weight,
602                      cpuc->cpuperf_target);       602                      cpuc->cpuperf_target);
603 }                                                 603 }
604                                                   604 
605 void BPF_STRUCT_OPS(qmap_dump_task, struct scx    605 void BPF_STRUCT_OPS(qmap_dump_task, struct scx_dump_ctx *dctx, struct task_struct *p)
606 {                                                 606 {
607         struct task_ctx *taskc;                   607         struct task_ctx *taskc;
608                                                   608 
609         if (suppress_dump)                        609         if (suppress_dump)
610                 return;                           610                 return;
611         if (!(taskc = bpf_task_storage_get(&ta    611         if (!(taskc = bpf_task_storage_get(&task_ctx_stor, p, 0, 0)))
612                 return;                           612                 return;
613                                                   613 
614         scx_bpf_dump("QMAP: force_local=%d cor    614         scx_bpf_dump("QMAP: force_local=%d core_sched_seq=%llu",
615                      taskc->force_local, taskc    615                      taskc->force_local, taskc->core_sched_seq);
616 }                                                 616 }
617                                                   617 
618 /*                                                618 /*
619  * Print out the online and possible CPU map u    619  * Print out the online and possible CPU map using bpf_printk() as a
620  * demonstration of using the cpumask kfuncs a    620  * demonstration of using the cpumask kfuncs and ops.cpu_on/offline().
621  */                                               621  */
622 static void print_cpus(void)                      622 static void print_cpus(void)
623 {                                                 623 {
624         const struct cpumask *possible, *onlin    624         const struct cpumask *possible, *online;
625         s32 cpu;                                  625         s32 cpu;
626         char buf[128] = "", *p;                   626         char buf[128] = "", *p;
627         int idx;                                  627         int idx;
628                                                   628 
629         possible = scx_bpf_get_possible_cpumas    629         possible = scx_bpf_get_possible_cpumask();
630         online = scx_bpf_get_online_cpumask();    630         online = scx_bpf_get_online_cpumask();
631                                                   631 
632         idx = 0;                                  632         idx = 0;
633         bpf_for(cpu, 0, scx_bpf_nr_cpu_ids())     633         bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) {
634                 if (!(p = MEMBER_VPTR(buf, [id    634                 if (!(p = MEMBER_VPTR(buf, [idx++])))
635                         break;                    635                         break;
636                 if (bpf_cpumask_test_cpu(cpu,     636                 if (bpf_cpumask_test_cpu(cpu, online))
637                         *p++ = 'O';               637                         *p++ = 'O';
638                 else if (bpf_cpumask_test_cpu(    638                 else if (bpf_cpumask_test_cpu(cpu, possible))
639                         *p++ = 'X';               639                         *p++ = 'X';
640                 else                              640                 else
641                         *p++ = ' ';               641                         *p++ = ' ';
642                                                   642 
643                 if ((cpu & 7) == 7) {             643                 if ((cpu & 7) == 7) {
644                         if (!(p = MEMBER_VPTR(    644                         if (!(p = MEMBER_VPTR(buf, [idx++])))
645                                 break;            645                                 break;
646                         *p++ = '|';               646                         *p++ = '|';
647                 }                                 647                 }
648         }                                         648         }
649         buf[sizeof(buf) - 1] = '\0';              649         buf[sizeof(buf) - 1] = '\0';
650                                                   650 
651         scx_bpf_put_cpumask(online);              651         scx_bpf_put_cpumask(online);
652         scx_bpf_put_cpumask(possible);            652         scx_bpf_put_cpumask(possible);
653                                                   653 
654         bpf_printk("CPUS: |%s", buf);             654         bpf_printk("CPUS: |%s", buf);
655 }                                                 655 }
656                                                   656 
657 void BPF_STRUCT_OPS(qmap_cpu_online, s32 cpu)     657 void BPF_STRUCT_OPS(qmap_cpu_online, s32 cpu)
658 {                                                 658 {
659         bpf_printk("CPU %d coming online", cpu    659         bpf_printk("CPU %d coming online", cpu);
660         /* @cpu is already online at this poin    660         /* @cpu is already online at this point */
661         print_cpus();                             661         print_cpus();
662 }                                                 662 }
663                                                   663 
664 void BPF_STRUCT_OPS(qmap_cpu_offline, s32 cpu)    664 void BPF_STRUCT_OPS(qmap_cpu_offline, s32 cpu)
665 {                                                 665 {
666         bpf_printk("CPU %d going offline", cpu    666         bpf_printk("CPU %d going offline", cpu);
667         /* @cpu is still online at this point     667         /* @cpu is still online at this point */
668         print_cpus();                             668         print_cpus();
669 }                                                 669 }
670                                                   670 
671 struct monitor_timer {                            671 struct monitor_timer {
672         struct bpf_timer timer;                   672         struct bpf_timer timer;
673 };                                                673 };
674                                                   674 
675 struct {                                          675 struct {
676         __uint(type, BPF_MAP_TYPE_ARRAY);         676         __uint(type, BPF_MAP_TYPE_ARRAY);
677         __uint(max_entries, 1);                   677         __uint(max_entries, 1);
678         __type(key, u32);                         678         __type(key, u32);
679         __type(value, struct monitor_timer);      679         __type(value, struct monitor_timer);
680 } monitor_timer SEC(".maps");                     680 } monitor_timer SEC(".maps");
681                                                   681 
682 /*                                                682 /*
683  * Print out the min, avg and max performance     683  * Print out the min, avg and max performance levels of CPUs every second to
684  * demonstrate the cpuperf interface.             684  * demonstrate the cpuperf interface.
685  */                                               685  */
686 static void monitor_cpuperf(void)                 686 static void monitor_cpuperf(void)
687 {                                                 687 {
688         u32 zero = 0, nr_cpu_ids;                 688         u32 zero = 0, nr_cpu_ids;
689         u64 cap_sum = 0, cur_sum = 0, cur_min     689         u64 cap_sum = 0, cur_sum = 0, cur_min = SCX_CPUPERF_ONE, cur_max = 0;
690         u64 target_sum = 0, target_min = SCX_C    690         u64 target_sum = 0, target_min = SCX_CPUPERF_ONE, target_max = 0;
691         const struct cpumask *online;             691         const struct cpumask *online;
692         int i, nr_online_cpus = 0;                692         int i, nr_online_cpus = 0;
693                                                   693 
694         nr_cpu_ids = scx_bpf_nr_cpu_ids();        694         nr_cpu_ids = scx_bpf_nr_cpu_ids();
695         online = scx_bpf_get_online_cpumask();    695         online = scx_bpf_get_online_cpumask();
696                                                   696 
697         bpf_for(i, 0, nr_cpu_ids) {               697         bpf_for(i, 0, nr_cpu_ids) {
698                 struct cpu_ctx *cpuc;             698                 struct cpu_ctx *cpuc;
699                 u32 cap, cur;                     699                 u32 cap, cur;
700                                                   700 
701                 if (!bpf_cpumask_test_cpu(i, o    701                 if (!bpf_cpumask_test_cpu(i, online))
702                         continue;                 702                         continue;
703                 nr_online_cpus++;                 703                 nr_online_cpus++;
704                                                   704 
705                 /* collect the capacity and cu    705                 /* collect the capacity and current cpuperf */
706                 cap = scx_bpf_cpuperf_cap(i);     706                 cap = scx_bpf_cpuperf_cap(i);
707                 cur = scx_bpf_cpuperf_cur(i);     707                 cur = scx_bpf_cpuperf_cur(i);
708                                                   708 
709                 cur_min = cur < cur_min ? cur     709                 cur_min = cur < cur_min ? cur : cur_min;
710                 cur_max = cur > cur_max ? cur     710                 cur_max = cur > cur_max ? cur : cur_max;
711                                                   711 
712                 /*                                712                 /*
713                  * $cur is relative to $cap. S    713                  * $cur is relative to $cap. Scale it down accordingly so that
714                  * it's in the same scale as o    714                  * it's in the same scale as other CPUs and $cur_sum/$cap_sum
715                  * makes sense.                   715                  * makes sense.
716                  */                               716                  */
717                 cur_sum += cur * cap / SCX_CPU    717                 cur_sum += cur * cap / SCX_CPUPERF_ONE;
718                 cap_sum += cap;                   718                 cap_sum += cap;
719                                                   719 
720                 if (!(cpuc = bpf_map_lookup_pe    720                 if (!(cpuc = bpf_map_lookup_percpu_elem(&cpu_ctx_stor, &zero, i))) {
721                         scx_bpf_error("failed     721                         scx_bpf_error("failed to look up cpu_ctx");
722                         goto out;                 722                         goto out;
723                 }                                 723                 }
724                                                   724 
725                 /* collect target */              725                 /* collect target */
726                 cur = cpuc->cpuperf_target;       726                 cur = cpuc->cpuperf_target;
727                 target_sum += cur;                727                 target_sum += cur;
728                 target_min = cur < target_min     728                 target_min = cur < target_min ? cur : target_min;
729                 target_max = cur > target_max     729                 target_max = cur > target_max ? cur : target_max;
730         }                                         730         }
731                                                   731 
732         cpuperf_min = cur_min;                    732         cpuperf_min = cur_min;
733         cpuperf_avg = cur_sum * SCX_CPUPERF_ON    733         cpuperf_avg = cur_sum * SCX_CPUPERF_ONE / cap_sum;
734         cpuperf_max = cur_max;                    734         cpuperf_max = cur_max;
735                                                   735 
736         cpuperf_target_min = target_min;          736         cpuperf_target_min = target_min;
737         cpuperf_target_avg = target_sum / nr_o    737         cpuperf_target_avg = target_sum / nr_online_cpus;
738         cpuperf_target_max = target_max;          738         cpuperf_target_max = target_max;
739 out:                                              739 out:
740         scx_bpf_put_cpumask(online);              740         scx_bpf_put_cpumask(online);
741 }                                                 741 }
742                                                   742 
743 /*                                                743 /*
744  * Dump the currently queued tasks in the shar    744  * Dump the currently queued tasks in the shared DSQ to demonstrate the usage of
745  * scx_bpf_dsq_nr_queued() and DSQ iterator. R    745  * scx_bpf_dsq_nr_queued() and DSQ iterator. Raise the dispatch batch count to
746  * see meaningful dumps in the trace pipe.        746  * see meaningful dumps in the trace pipe.
747  */                                               747  */
748 static void dump_shared_dsq(void)                 748 static void dump_shared_dsq(void)
749 {                                                 749 {
750         struct task_struct *p;                    750         struct task_struct *p;
751         s32 nr;                                   751         s32 nr;
752                                                   752 
753         if (!(nr = scx_bpf_dsq_nr_queued(SHARE    753         if (!(nr = scx_bpf_dsq_nr_queued(SHARED_DSQ)))
754                 return;                           754                 return;
755                                                   755 
756         bpf_printk("Dumping %d tasks in SHARED    756         bpf_printk("Dumping %d tasks in SHARED_DSQ in reverse order", nr);
757                                                   757 
758         bpf_rcu_read_lock();                      758         bpf_rcu_read_lock();
759         bpf_for_each(scx_dsq, p, SHARED_DSQ, S    759         bpf_for_each(scx_dsq, p, SHARED_DSQ, SCX_DSQ_ITER_REV)
760                 bpf_printk("%s[%d]", p->comm,     760                 bpf_printk("%s[%d]", p->comm, p->pid);
761         bpf_rcu_read_unlock();                    761         bpf_rcu_read_unlock();
762 }                                                 762 }
763                                                   763 
764 static int monitor_timerfn(void *map, int *key    764 static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
765 {                                                 765 {
766         bpf_rcu_read_lock();                      766         bpf_rcu_read_lock();
767         dispatch_highpri(true);                   767         dispatch_highpri(true);
768         bpf_rcu_read_unlock();                    768         bpf_rcu_read_unlock();
769                                                   769 
770         monitor_cpuperf();                        770         monitor_cpuperf();
771                                                   771 
772         if (print_shared_dsq)                     772         if (print_shared_dsq)
773                 dump_shared_dsq();                773                 dump_shared_dsq();
774                                                   774 
775         bpf_timer_start(timer, ONE_SEC_IN_NS,     775         bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
776         return 0;                                 776         return 0;
777 }                                                 777 }
778                                                   778 
779 s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)           779 s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)
780 {                                                 780 {
781         u32 key = 0;                              781         u32 key = 0;
782         struct bpf_timer *timer;                  782         struct bpf_timer *timer;
783         s32 ret;                                  783         s32 ret;
784                                                   784 
785         print_cpus();                             785         print_cpus();
786                                                   786 
787         ret = scx_bpf_create_dsq(SHARED_DSQ, -    787         ret = scx_bpf_create_dsq(SHARED_DSQ, -1);
788         if (ret)                                  788         if (ret)
789                 return ret;                       789                 return ret;
790                                                   790 
791         ret = scx_bpf_create_dsq(HIGHPRI_DSQ,     791         ret = scx_bpf_create_dsq(HIGHPRI_DSQ, -1);
792         if (ret)                                  792         if (ret)
793                 return ret;                       793                 return ret;
794                                                   794 
795         timer = bpf_map_lookup_elem(&monitor_t    795         timer = bpf_map_lookup_elem(&monitor_timer, &key);
796         if (!timer)                               796         if (!timer)
797                 return -ESRCH;                    797                 return -ESRCH;
798                                                   798 
799         bpf_timer_init(timer, &monitor_timer,     799         bpf_timer_init(timer, &monitor_timer, CLOCK_MONOTONIC);
800         bpf_timer_set_callback(timer, monitor_    800         bpf_timer_set_callback(timer, monitor_timerfn);
801                                                   801 
802         return bpf_timer_start(timer, ONE_SEC_    802         return bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
803 }                                                 803 }
804                                                   804 
805 void BPF_STRUCT_OPS(qmap_exit, struct scx_exit    805 void BPF_STRUCT_OPS(qmap_exit, struct scx_exit_info *ei)
806 {                                                 806 {
807         UEI_RECORD(uei, ei);                      807         UEI_RECORD(uei, ei);
808 }                                                 808 }
809                                                   809 
810 SCX_OPS_DEFINE(qmap_ops,                          810 SCX_OPS_DEFINE(qmap_ops,
811                .select_cpu              = (voi    811                .select_cpu              = (void *)qmap_select_cpu,
812                .enqueue                 = (voi    812                .enqueue                 = (void *)qmap_enqueue,
813                .dequeue                 = (voi    813                .dequeue                 = (void *)qmap_dequeue,
814                .dispatch                = (voi    814                .dispatch                = (void *)qmap_dispatch,
815                .tick                    = (voi    815                .tick                    = (void *)qmap_tick,
816                .core_sched_before       = (voi    816                .core_sched_before       = (void *)qmap_core_sched_before,
817                .cpu_release             = (voi    817                .cpu_release             = (void *)qmap_cpu_release,
818                .init_task               = (voi    818                .init_task               = (void *)qmap_init_task,
819                .dump                    = (voi    819                .dump                    = (void *)qmap_dump,
820                .dump_cpu                = (voi    820                .dump_cpu                = (void *)qmap_dump_cpu,
821                .dump_task               = (voi    821                .dump_task               = (void *)qmap_dump_task,
822                .cpu_online              = (voi    822                .cpu_online              = (void *)qmap_cpu_online,
823                .cpu_offline             = (voi    823                .cpu_offline             = (void *)qmap_cpu_offline,
824                .init                    = (voi    824                .init                    = (void *)qmap_init,
825                .exit                    = (voi    825                .exit                    = (void *)qmap_exit,
826                .timeout_ms              = 5000    826                .timeout_ms              = 5000U,
827                .name                    = "qma    827                .name                    = "qmap");
828                                                   828 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php