1 /* SPDX-License-Identifier: GPL-2.0 */ 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 2 /* 3 * A simple five-level FIFO queue scheduler. 3 * A simple five-level FIFO queue scheduler. 4 * 4 * 5 * There are five FIFOs implemented using BPF_ 5 * There are five FIFOs implemented using BPF_MAP_TYPE_QUEUE. A task gets 6 * assigned to one depending on its compound w 6 * assigned to one depending on its compound weight. Each CPU round robins 7 * through the FIFOs and dispatches more from 7 * through the FIFOs and dispatches more from FIFOs with higher indices - 1 from 8 * queue0, 2 from queue1, 4 from queue2 and so 8 * queue0, 2 from queue1, 4 from queue2 and so on. 9 * 9 * 10 * This scheduler demonstrates: 10 * This scheduler demonstrates: 11 * 11 * 12 * - BPF-side queueing using PIDs. 12 * - BPF-side queueing using PIDs. 13 * - Sleepable per-task storage allocation usi 13 * - Sleepable per-task storage allocation using ops.prep_enable(). 14 * - Using ops.cpu_release() to handle a highe 14 * - Using ops.cpu_release() to handle a higher priority scheduling class taking 15 * the CPU away. 15 * the CPU away. 16 * - Core-sched support. 16 * - Core-sched support. 17 * 17 * 18 * This scheduler is primarily for demonstrati 18 * This scheduler is primarily for demonstration and testing of sched_ext 19 * features and unlikely to be useful for actu 19 * features and unlikely to be useful for actual workloads. 20 * 20 * 21 * Copyright (c) 2022 Meta Platforms, Inc. and 21 * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. 22 * Copyright (c) 2022 Tejun Heo <tj@kernel.org 22 * Copyright (c) 2022 Tejun Heo <tj@kernel.org> 23 * Copyright (c) 2022 David Vernet <dvernet@me 23 * Copyright (c) 2022 David Vernet <dvernet@meta.com> 24 */ 24 */ 25 #include <scx/common.bpf.h> 25 #include <scx/common.bpf.h> 26 26 27 enum consts { 27 enum consts { 28 ONE_SEC_IN_NS = 1000000000, 28 ONE_SEC_IN_NS = 1000000000, 29 SHARED_DSQ = 0, 29 SHARED_DSQ = 0, 30 HIGHPRI_DSQ = 1, 30 HIGHPRI_DSQ = 1, 31 HIGHPRI_WEIGHT = 8668, 31 HIGHPRI_WEIGHT = 8668, /* this is what -20 maps to */ 32 }; 32 }; 33 33 34 char _license[] SEC("license") = "GPL"; 34 char _license[] SEC("license") = "GPL"; 35 35 36 const volatile u64 slice_ns = SCX_SLICE_DFL; 36 const volatile u64 slice_ns = SCX_SLICE_DFL; 37 const volatile u32 stall_user_nth; 37 const volatile u32 stall_user_nth; 38 const volatile u32 stall_kernel_nth; 38 const volatile u32 stall_kernel_nth; 39 const volatile u32 dsp_inf_loop_after; 39 const volatile u32 dsp_inf_loop_after; 40 const volatile u32 dsp_batch; 40 const volatile u32 dsp_batch; 41 const volatile bool highpri_boosting; 41 const volatile bool highpri_boosting; 42 const volatile bool print_shared_dsq; 42 const volatile bool print_shared_dsq; 43 const volatile s32 disallow_tgid; 43 const volatile s32 disallow_tgid; 44 const volatile bool suppress_dump; 44 const volatile bool suppress_dump; 45 45 46 u64 nr_highpri_queued; 46 u64 nr_highpri_queued; 47 u32 test_error_cnt; 47 u32 test_error_cnt; 48 48 49 UEI_DEFINE(uei); 49 UEI_DEFINE(uei); 50 50 51 struct qmap { 51 struct qmap { 52 __uint(type, BPF_MAP_TYPE_QUEUE); 52 __uint(type, BPF_MAP_TYPE_QUEUE); 53 __uint(max_entries, 4096); 53 __uint(max_entries, 4096); 54 __type(value, u32); 54 __type(value, u32); 55 } queue0 SEC(".maps"), 55 } queue0 SEC(".maps"), 56 queue1 SEC(".maps"), 56 queue1 SEC(".maps"), 57 queue2 SEC(".maps"), 57 queue2 SEC(".maps"), 58 queue3 SEC(".maps"), 58 queue3 SEC(".maps"), 59 queue4 SEC(".maps"); 59 queue4 SEC(".maps"); 60 60 61 struct { 61 struct { 62 __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAP 62 __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); 63 __uint(max_entries, 5); 63 __uint(max_entries, 5); 64 __type(key, int); 64 __type(key, int); 65 __array(values, struct qmap); 65 __array(values, struct qmap); 66 } queue_arr SEC(".maps") = { 66 } queue_arr SEC(".maps") = { 67 .values = { 67 .values = { 68 [0] = &queue0, 68 [0] = &queue0, 69 [1] = &queue1, 69 [1] = &queue1, 70 [2] = &queue2, 70 [2] = &queue2, 71 [3] = &queue3, 71 [3] = &queue3, 72 [4] = &queue4, 72 [4] = &queue4, 73 }, 73 }, 74 }; 74 }; 75 75 76 /* 76 /* 77 * If enabled, CPU performance target is set a 77 * If enabled, CPU performance target is set according to the queue index 78 * according to the following table. 78 * according to the following table. 79 */ 79 */ 80 static const u32 qidx_to_cpuperf_target[] = { 80 static const u32 qidx_to_cpuperf_target[] = { 81 [0] = SCX_CPUPERF_ONE * 0 / 4, 81 [0] = SCX_CPUPERF_ONE * 0 / 4, 82 [1] = SCX_CPUPERF_ONE * 1 / 4, 82 [1] = SCX_CPUPERF_ONE * 1 / 4, 83 [2] = SCX_CPUPERF_ONE * 2 / 4, 83 [2] = SCX_CPUPERF_ONE * 2 / 4, 84 [3] = SCX_CPUPERF_ONE * 3 / 4, 84 [3] = SCX_CPUPERF_ONE * 3 / 4, 85 [4] = SCX_CPUPERF_ONE * 4 / 4, 85 [4] = SCX_CPUPERF_ONE * 4 / 4, 86 }; 86 }; 87 87 88 /* 88 /* 89 * Per-queue sequence numbers to implement cor 89 * Per-queue sequence numbers to implement core-sched ordering. 90 * 90 * 91 * Tail seq is assigned to each queued task an 91 * Tail seq is assigned to each queued task and incremented. Head seq tracks the 92 * sequence number of the latest dispatched ta 92 * sequence number of the latest dispatched task. The distance between the a 93 * task's seq and the associated queue's head 93 * task's seq and the associated queue's head seq is called the queue distance 94 * and used when comparing two tasks for order 94 * and used when comparing two tasks for ordering. See qmap_core_sched_before(). 95 */ 95 */ 96 static u64 core_sched_head_seqs[5]; 96 static u64 core_sched_head_seqs[5]; 97 static u64 core_sched_tail_seqs[5]; 97 static u64 core_sched_tail_seqs[5]; 98 98 99 /* Per-task scheduling context */ 99 /* Per-task scheduling context */ 100 struct task_ctx { 100 struct task_ctx { 101 bool force_local; /* Dispatch di 101 bool force_local; /* Dispatch directly to local_dsq */ 102 bool highpri; 102 bool highpri; 103 u64 core_sched_seq; 103 u64 core_sched_seq; 104 }; 104 }; 105 105 106 struct { 106 struct { 107 __uint(type, BPF_MAP_TYPE_TASK_STORAGE 107 __uint(type, BPF_MAP_TYPE_TASK_STORAGE); 108 __uint(map_flags, BPF_F_NO_PREALLOC); 108 __uint(map_flags, BPF_F_NO_PREALLOC); 109 __type(key, int); 109 __type(key, int); 110 __type(value, struct task_ctx); 110 __type(value, struct task_ctx); 111 } task_ctx_stor SEC(".maps"); 111 } task_ctx_stor SEC(".maps"); 112 112 113 struct cpu_ctx { 113 struct cpu_ctx { 114 u64 dsp_idx; /* dispatch in 114 u64 dsp_idx; /* dispatch index */ 115 u64 dsp_cnt; /* remaining c 115 u64 dsp_cnt; /* remaining count */ 116 u32 avg_weight; 116 u32 avg_weight; 117 u32 cpuperf_target; 117 u32 cpuperf_target; 118 }; 118 }; 119 119 120 struct { 120 struct { 121 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY 121 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 122 __uint(max_entries, 1); 122 __uint(max_entries, 1); 123 __type(key, u32); 123 __type(key, u32); 124 __type(value, struct cpu_ctx); 124 __type(value, struct cpu_ctx); 125 } cpu_ctx_stor SEC(".maps"); 125 } cpu_ctx_stor SEC(".maps"); 126 126 127 /* Statistics */ 127 /* Statistics */ 128 u64 nr_enqueued, nr_dispatched, nr_reenqueued, 128 u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued, nr_ddsp_from_enq; 129 u64 nr_core_sched_execed; 129 u64 nr_core_sched_execed; 130 u64 nr_expedited_local, nr_expedited_remote, n 130 u64 nr_expedited_local, nr_expedited_remote, nr_expedited_lost, nr_expedited_from_timer; 131 u32 cpuperf_min, cpuperf_avg, cpuperf_max; 131 u32 cpuperf_min, cpuperf_avg, cpuperf_max; 132 u32 cpuperf_target_min, cpuperf_target_avg, cp 132 u32 cpuperf_target_min, cpuperf_target_avg, cpuperf_target_max; 133 133 134 static s32 pick_direct_dispatch_cpu(struct tas 134 static s32 pick_direct_dispatch_cpu(struct task_struct *p, s32 prev_cpu) 135 { 135 { 136 s32 cpu; 136 s32 cpu; 137 137 138 if (p->nr_cpus_allowed == 1 || 138 if (p->nr_cpus_allowed == 1 || 139 scx_bpf_test_and_clear_cpu_idle(pr 139 scx_bpf_test_and_clear_cpu_idle(prev_cpu)) 140 return prev_cpu; 140 return prev_cpu; 141 141 142 cpu = scx_bpf_pick_idle_cpu(p->cpus_pt 142 cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0); 143 if (cpu >= 0) 143 if (cpu >= 0) 144 return cpu; 144 return cpu; 145 145 146 return -1; 146 return -1; 147 } 147 } 148 148 149 static struct task_ctx *lookup_task_ctx(struct 149 static struct task_ctx *lookup_task_ctx(struct task_struct *p) 150 { 150 { 151 struct task_ctx *tctx; 151 struct task_ctx *tctx; 152 152 153 if (!(tctx = bpf_task_storage_get(&tas 153 if (!(tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0))) { 154 scx_bpf_error("task_ctx lookup 154 scx_bpf_error("task_ctx lookup failed"); 155 return NULL; 155 return NULL; 156 } 156 } 157 return tctx; 157 return tctx; 158 } 158 } 159 159 160 s32 BPF_STRUCT_OPS(qmap_select_cpu, struct tas 160 s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p, 161 s32 prev_cpu, u64 wake_flag 161 s32 prev_cpu, u64 wake_flags) 162 { 162 { 163 struct task_ctx *tctx; 163 struct task_ctx *tctx; 164 s32 cpu; 164 s32 cpu; 165 165 166 if (!(tctx = lookup_task_ctx(p))) 166 if (!(tctx = lookup_task_ctx(p))) 167 return -ESRCH; 167 return -ESRCH; 168 168 169 cpu = pick_direct_dispatch_cpu(p, prev 169 cpu = pick_direct_dispatch_cpu(p, prev_cpu); 170 170 171 if (cpu >= 0) { 171 if (cpu >= 0) { 172 tctx->force_local = true; 172 tctx->force_local = true; 173 return cpu; 173 return cpu; 174 } else { 174 } else { 175 return prev_cpu; 175 return prev_cpu; 176 } 176 } 177 } 177 } 178 178 179 static int weight_to_idx(u32 weight) 179 static int weight_to_idx(u32 weight) 180 { 180 { 181 /* Coarsely map the compound weight to 181 /* Coarsely map the compound weight to a FIFO. */ 182 if (weight <= 25) 182 if (weight <= 25) 183 return 0; 183 return 0; 184 else if (weight <= 50) 184 else if (weight <= 50) 185 return 1; 185 return 1; 186 else if (weight < 200) 186 else if (weight < 200) 187 return 2; 187 return 2; 188 else if (weight < 400) 188 else if (weight < 400) 189 return 3; 189 return 3; 190 else 190 else 191 return 4; 191 return 4; 192 } 192 } 193 193 194 void BPF_STRUCT_OPS(qmap_enqueue, struct task_ 194 void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) 195 { 195 { 196 static u32 user_cnt, kernel_cnt; 196 static u32 user_cnt, kernel_cnt; 197 struct task_ctx *tctx; 197 struct task_ctx *tctx; 198 u32 pid = p->pid; 198 u32 pid = p->pid; 199 int idx = weight_to_idx(p->scx.weight) 199 int idx = weight_to_idx(p->scx.weight); 200 void *ring; 200 void *ring; 201 s32 cpu; 201 s32 cpu; 202 202 203 if (p->flags & PF_KTHREAD) { 203 if (p->flags & PF_KTHREAD) { 204 if (stall_kernel_nth && !(++ke 204 if (stall_kernel_nth && !(++kernel_cnt % stall_kernel_nth)) 205 return; 205 return; 206 } else { 206 } else { 207 if (stall_user_nth && !(++user 207 if (stall_user_nth && !(++user_cnt % stall_user_nth)) 208 return; 208 return; 209 } 209 } 210 210 211 if (test_error_cnt && !--test_error_cn 211 if (test_error_cnt && !--test_error_cnt) 212 scx_bpf_error("test triggering 212 scx_bpf_error("test triggering error"); 213 213 214 if (!(tctx = lookup_task_ctx(p))) 214 if (!(tctx = lookup_task_ctx(p))) 215 return; 215 return; 216 216 217 /* 217 /* 218 * All enqueued tasks must have their 218 * All enqueued tasks must have their core_sched_seq updated for correct 219 * core-sched ordering. Also, take a l 219 * core-sched ordering. Also, take a look at the end of qmap_dispatch(). 220 */ 220 */ 221 tctx->core_sched_seq = core_sched_tail 221 tctx->core_sched_seq = core_sched_tail_seqs[idx]++; 222 222 223 /* 223 /* 224 * If qmap_select_cpu() is telling us 224 * If qmap_select_cpu() is telling us to or this is the last runnable 225 * task on the CPU, enqueue locally. 225 * task on the CPU, enqueue locally. 226 */ 226 */ 227 if (tctx->force_local) { 227 if (tctx->force_local) { 228 tctx->force_local = false; 228 tctx->force_local = false; 229 scx_bpf_dispatch(p, SCX_DSQ_LO 229 scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, enq_flags); 230 return; 230 return; 231 } 231 } 232 232 233 /* if select_cpu() wasn't called, try 233 /* if select_cpu() wasn't called, try direct dispatch */ 234 if (!(enq_flags & SCX_ENQ_CPU_SELECTED 234 if (!(enq_flags & SCX_ENQ_CPU_SELECTED) && 235 (cpu = pick_direct_dispatch_cpu(p, 235 (cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) { 236 __sync_fetch_and_add(&nr_ddsp_ 236 __sync_fetch_and_add(&nr_ddsp_from_enq, 1); 237 scx_bpf_dispatch(p, SCX_DSQ_LO 237 scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags); 238 return; 238 return; 239 } 239 } 240 240 241 /* 241 /* 242 * If the task was re-enqueued due to 242 * If the task was re-enqueued due to the CPU being preempted by a 243 * higher priority scheduling class, j 243 * higher priority scheduling class, just re-enqueue the task directly 244 * on the global DSQ. As we want anoth 244 * on the global DSQ. As we want another CPU to pick it up, find and 245 * kick an idle CPU. 245 * kick an idle CPU. 246 */ 246 */ 247 if (enq_flags & SCX_ENQ_REENQ) { 247 if (enq_flags & SCX_ENQ_REENQ) { 248 s32 cpu; 248 s32 cpu; 249 249 250 scx_bpf_dispatch(p, SHARED_DSQ 250 scx_bpf_dispatch(p, SHARED_DSQ, 0, enq_flags); 251 cpu = scx_bpf_pick_idle_cpu(p- 251 cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0); 252 if (cpu >= 0) 252 if (cpu >= 0) 253 scx_bpf_kick_cpu(cpu, 253 scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE); 254 return; 254 return; 255 } 255 } 256 256 257 ring = bpf_map_lookup_elem(&queue_arr, 257 ring = bpf_map_lookup_elem(&queue_arr, &idx); 258 if (!ring) { 258 if (!ring) { 259 scx_bpf_error("failed to find 259 scx_bpf_error("failed to find ring %d", idx); 260 return; 260 return; 261 } 261 } 262 262 263 /* Queue on the selected FIFO. If the 263 /* Queue on the selected FIFO. If the FIFO overflows, punt to global. */ 264 if (bpf_map_push_elem(ring, &pid, 0)) 264 if (bpf_map_push_elem(ring, &pid, 0)) { 265 scx_bpf_dispatch(p, SHARED_DSQ 265 scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, enq_flags); 266 return; 266 return; 267 } 267 } 268 268 269 if (highpri_boosting && p->scx.weight 269 if (highpri_boosting && p->scx.weight >= HIGHPRI_WEIGHT) { 270 tctx->highpri = true; 270 tctx->highpri = true; 271 __sync_fetch_and_add(&nr_highp 271 __sync_fetch_and_add(&nr_highpri_queued, 1); 272 } 272 } 273 __sync_fetch_and_add(&nr_enqueued, 1); 273 __sync_fetch_and_add(&nr_enqueued, 1); 274 } 274 } 275 275 276 /* 276 /* 277 * The BPF queue map doesn't support removal a 277 * The BPF queue map doesn't support removal and sched_ext can handle spurious 278 * dispatches. qmap_dequeue() is only used to 278 * dispatches. qmap_dequeue() is only used to collect statistics. 279 */ 279 */ 280 void BPF_STRUCT_OPS(qmap_dequeue, struct task_ 280 void BPF_STRUCT_OPS(qmap_dequeue, struct task_struct *p, u64 deq_flags) 281 { 281 { 282 __sync_fetch_and_add(&nr_dequeued, 1); 282 __sync_fetch_and_add(&nr_dequeued, 1); 283 if (deq_flags & SCX_DEQ_CORE_SCHED_EXE 283 if (deq_flags & SCX_DEQ_CORE_SCHED_EXEC) 284 __sync_fetch_and_add(&nr_core_ 284 __sync_fetch_and_add(&nr_core_sched_execed, 1); 285 } 285 } 286 286 287 static void update_core_sched_head_seq(struct 287 static void update_core_sched_head_seq(struct task_struct *p) 288 { 288 { 289 int idx = weight_to_idx(p->scx.weight) 289 int idx = weight_to_idx(p->scx.weight); 290 struct task_ctx *tctx; 290 struct task_ctx *tctx; 291 291 292 if ((tctx = lookup_task_ctx(p))) 292 if ((tctx = lookup_task_ctx(p))) 293 core_sched_head_seqs[idx] = tc 293 core_sched_head_seqs[idx] = tctx->core_sched_seq; 294 } 294 } 295 295 296 /* 296 /* 297 * To demonstrate the use of scx_bpf_dispatch_ 297 * To demonstrate the use of scx_bpf_dispatch_from_dsq(), implement silly 298 * selective priority boosting mechanism by sc 298 * selective priority boosting mechanism by scanning SHARED_DSQ looking for 299 * highpri tasks, moving them to HIGHPRI_DSQ a 299 * highpri tasks, moving them to HIGHPRI_DSQ and then consuming them first. This 300 * makes minor difference only when dsp_batch 300 * makes minor difference only when dsp_batch is larger than 1. 301 * 301 * 302 * scx_bpf_dispatch[_vtime]_from_dsq() are all 302 * scx_bpf_dispatch[_vtime]_from_dsq() are allowed both from ops.dispatch() and 303 * non-rq-lock holding BPF programs. As demons 303 * non-rq-lock holding BPF programs. As demonstration, this function is called 304 * from qmap_dispatch() and monitor_timerfn(). 304 * from qmap_dispatch() and monitor_timerfn(). 305 */ 305 */ 306 static bool dispatch_highpri(bool from_timer) 306 static bool dispatch_highpri(bool from_timer) 307 { 307 { 308 struct task_struct *p; 308 struct task_struct *p; 309 s32 this_cpu = bpf_get_smp_processor_i 309 s32 this_cpu = bpf_get_smp_processor_id(); 310 310 311 /* scan SHARED_DSQ and move highpri ta 311 /* scan SHARED_DSQ and move highpri tasks to HIGHPRI_DSQ */ 312 bpf_for_each(scx_dsq, p, SHARED_DSQ, 0 312 bpf_for_each(scx_dsq, p, SHARED_DSQ, 0) { 313 static u64 highpri_seq; 313 static u64 highpri_seq; 314 struct task_ctx *tctx; 314 struct task_ctx *tctx; 315 315 316 if (!(tctx = lookup_task_ctx(p 316 if (!(tctx = lookup_task_ctx(p))) 317 return false; 317 return false; 318 318 319 if (tctx->highpri) { 319 if (tctx->highpri) { 320 /* exercise the set_*( 320 /* exercise the set_*() and vtime interface too */ 321 __COMPAT_scx_bpf_dispa 321 __COMPAT_scx_bpf_dispatch_from_dsq_set_slice( 322 BPF_FOR_EACH_I 322 BPF_FOR_EACH_ITER, slice_ns * 2); 323 __COMPAT_scx_bpf_dispa 323 __COMPAT_scx_bpf_dispatch_from_dsq_set_vtime( 324 BPF_FOR_EACH_I 324 BPF_FOR_EACH_ITER, highpri_seq++); 325 __COMPAT_scx_bpf_dispa 325 __COMPAT_scx_bpf_dispatch_vtime_from_dsq( 326 BPF_FOR_EACH_I 326 BPF_FOR_EACH_ITER, p, HIGHPRI_DSQ, 0); 327 } 327 } 328 } 328 } 329 329 330 /* 330 /* 331 * Scan HIGHPRI_DSQ and dispatch until 331 * Scan HIGHPRI_DSQ and dispatch until a task that can run on this CPU 332 * is found. 332 * is found. 333 */ 333 */ 334 bpf_for_each(scx_dsq, p, HIGHPRI_DSQ, 334 bpf_for_each(scx_dsq, p, HIGHPRI_DSQ, 0) { 335 bool dispatched = false; 335 bool dispatched = false; 336 s32 cpu; 336 s32 cpu; 337 337 338 if (bpf_cpumask_test_cpu(this_ 338 if (bpf_cpumask_test_cpu(this_cpu, p->cpus_ptr)) 339 cpu = this_cpu; 339 cpu = this_cpu; 340 else 340 else 341 cpu = scx_bpf_pick_any 341 cpu = scx_bpf_pick_any_cpu(p->cpus_ptr, 0); 342 342 343 if (__COMPAT_scx_bpf_dispatch_ 343 if (__COMPAT_scx_bpf_dispatch_from_dsq(BPF_FOR_EACH_ITER, p, 344 344 SCX_DSQ_LOCAL_ON | cpu, 345 345 SCX_ENQ_PREEMPT)) { 346 if (cpu == this_cpu) { 346 if (cpu == this_cpu) { 347 dispatched = t 347 dispatched = true; 348 __sync_fetch_a 348 __sync_fetch_and_add(&nr_expedited_local, 1); 349 } else { 349 } else { 350 __sync_fetch_a 350 __sync_fetch_and_add(&nr_expedited_remote, 1); 351 } 351 } 352 if (from_timer) 352 if (from_timer) 353 __sync_fetch_a 353 __sync_fetch_and_add(&nr_expedited_from_timer, 1); 354 } else { 354 } else { 355 __sync_fetch_and_add(& 355 __sync_fetch_and_add(&nr_expedited_lost, 1); 356 } 356 } 357 357 358 if (dispatched) 358 if (dispatched) 359 return true; 359 return true; 360 } 360 } 361 361 362 return false; 362 return false; 363 } 363 } 364 364 365 void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, st 365 void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) 366 { 366 { 367 struct task_struct *p; 367 struct task_struct *p; 368 struct cpu_ctx *cpuc; 368 struct cpu_ctx *cpuc; 369 struct task_ctx *tctx; 369 struct task_ctx *tctx; 370 u32 zero = 0, batch = dsp_batch ?: 1; 370 u32 zero = 0, batch = dsp_batch ?: 1; 371 void *fifo; 371 void *fifo; 372 s32 i, pid; 372 s32 i, pid; 373 373 374 if (dispatch_highpri(false)) 374 if (dispatch_highpri(false)) 375 return; 375 return; 376 376 377 if (!nr_highpri_queued && scx_bpf_cons 377 if (!nr_highpri_queued && scx_bpf_consume(SHARED_DSQ)) 378 return; 378 return; 379 379 380 if (dsp_inf_loop_after && nr_dispatche 380 if (dsp_inf_loop_after && nr_dispatched > dsp_inf_loop_after) { 381 /* 381 /* 382 * PID 2 should be kthreadd wh 382 * PID 2 should be kthreadd which should mostly be idle and off 383 * the scheduler. Let's keep d 383 * the scheduler. Let's keep dispatching it to force the kernel 384 * to call this function over 384 * to call this function over and over again. 385 */ 385 */ 386 p = bpf_task_from_pid(2); 386 p = bpf_task_from_pid(2); 387 if (p) { 387 if (p) { 388 scx_bpf_dispatch(p, SC 388 scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, 0); 389 bpf_task_release(p); 389 bpf_task_release(p); 390 return; 390 return; 391 } 391 } 392 } 392 } 393 393 394 if (!(cpuc = bpf_map_lookup_elem(&cpu_ 394 if (!(cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &zero))) { 395 scx_bpf_error("failed to look 395 scx_bpf_error("failed to look up cpu_ctx"); 396 return; 396 return; 397 } 397 } 398 398 399 for (i = 0; i < 5; i++) { 399 for (i = 0; i < 5; i++) { 400 /* Advance the dispatch cursor 400 /* Advance the dispatch cursor and pick the fifo. */ 401 if (!cpuc->dsp_cnt) { 401 if (!cpuc->dsp_cnt) { 402 cpuc->dsp_idx = (cpuc- 402 cpuc->dsp_idx = (cpuc->dsp_idx + 1) % 5; 403 cpuc->dsp_cnt = 1 << c 403 cpuc->dsp_cnt = 1 << cpuc->dsp_idx; 404 } 404 } 405 405 406 fifo = bpf_map_lookup_elem(&qu 406 fifo = bpf_map_lookup_elem(&queue_arr, &cpuc->dsp_idx); 407 if (!fifo) { 407 if (!fifo) { 408 scx_bpf_error("failed 408 scx_bpf_error("failed to find ring %llu", cpuc->dsp_idx); 409 return; 409 return; 410 } 410 } 411 411 412 /* Dispatch or advance. */ 412 /* Dispatch or advance. */ 413 bpf_repeat(BPF_MAX_LOOPS) { 413 bpf_repeat(BPF_MAX_LOOPS) { 414 struct task_ctx *tctx; 414 struct task_ctx *tctx; 415 415 416 if (bpf_map_pop_elem(f 416 if (bpf_map_pop_elem(fifo, &pid)) 417 break; 417 break; 418 418 419 p = bpf_task_from_pid( 419 p = bpf_task_from_pid(pid); 420 if (!p) 420 if (!p) 421 continue; 421 continue; 422 422 423 if (!(tctx = lookup_ta 423 if (!(tctx = lookup_task_ctx(p))) { 424 bpf_task_relea 424 bpf_task_release(p); 425 return; 425 return; 426 } 426 } 427 427 428 if (tctx->highpri) 428 if (tctx->highpri) 429 __sync_fetch_a 429 __sync_fetch_and_sub(&nr_highpri_queued, 1); 430 430 431 update_core_sched_head 431 update_core_sched_head_seq(p); 432 __sync_fetch_and_add(& 432 __sync_fetch_and_add(&nr_dispatched, 1); 433 433 434 scx_bpf_dispatch(p, SH 434 scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, 0); 435 bpf_task_release(p); 435 bpf_task_release(p); 436 436 437 batch--; 437 batch--; 438 cpuc->dsp_cnt--; 438 cpuc->dsp_cnt--; 439 if (!batch || !scx_bpf 439 if (!batch || !scx_bpf_dispatch_nr_slots()) { 440 if (dispatch_h 440 if (dispatch_highpri(false)) 441 return 441 return; 442 scx_bpf_consum 442 scx_bpf_consume(SHARED_DSQ); 443 return; 443 return; 444 } 444 } 445 if (!cpuc->dsp_cnt) 445 if (!cpuc->dsp_cnt) 446 break; 446 break; 447 } 447 } 448 448 449 cpuc->dsp_cnt = 0; 449 cpuc->dsp_cnt = 0; 450 } 450 } 451 451 452 /* 452 /* 453 * No other tasks. @prev will keep run 453 * No other tasks. @prev will keep running. Update its core_sched_seq as 454 * if the task were enqueued and dispa 454 * if the task were enqueued and dispatched immediately. 455 */ 455 */ 456 if (prev) { 456 if (prev) { 457 tctx = bpf_task_storage_get(&t 457 tctx = bpf_task_storage_get(&task_ctx_stor, prev, 0, 0); 458 if (!tctx) { 458 if (!tctx) { 459 scx_bpf_error("task_ct 459 scx_bpf_error("task_ctx lookup failed"); 460 return; 460 return; 461 } 461 } 462 462 463 tctx->core_sched_seq = 463 tctx->core_sched_seq = 464 core_sched_tail_seqs[w 464 core_sched_tail_seqs[weight_to_idx(prev->scx.weight)]++; 465 } 465 } 466 } 466 } 467 467 468 void BPF_STRUCT_OPS(qmap_tick, struct task_str 468 void BPF_STRUCT_OPS(qmap_tick, struct task_struct *p) 469 { 469 { 470 struct cpu_ctx *cpuc; 470 struct cpu_ctx *cpuc; 471 u32 zero = 0; 471 u32 zero = 0; 472 int idx; 472 int idx; 473 473 474 if (!(cpuc = bpf_map_lookup_elem(&cpu_ 474 if (!(cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &zero))) { 475 scx_bpf_error("failed to look 475 scx_bpf_error("failed to look up cpu_ctx"); 476 return; 476 return; 477 } 477 } 478 478 479 /* 479 /* 480 * Use the running avg of weights to s 480 * Use the running avg of weights to select the target cpuperf level. 481 * This is a demonstration of the cpup 481 * This is a demonstration of the cpuperf feature rather than a 482 * practical strategy to regulate CPU 482 * practical strategy to regulate CPU frequency. 483 */ 483 */ 484 cpuc->avg_weight = cpuc->avg_weight * 484 cpuc->avg_weight = cpuc->avg_weight * 3 / 4 + p->scx.weight / 4; 485 idx = weight_to_idx(cpuc->avg_weight); 485 idx = weight_to_idx(cpuc->avg_weight); 486 cpuc->cpuperf_target = qidx_to_cpuperf 486 cpuc->cpuperf_target = qidx_to_cpuperf_target[idx]; 487 487 488 scx_bpf_cpuperf_set(scx_bpf_task_cpu(p 488 scx_bpf_cpuperf_set(scx_bpf_task_cpu(p), cpuc->cpuperf_target); 489 } 489 } 490 490 491 /* 491 /* 492 * The distance from the head of the queue sca 492 * The distance from the head of the queue scaled by the weight of the queue. 493 * The lower the number, the older the task an 493 * The lower the number, the older the task and the higher the priority. 494 */ 494 */ 495 static s64 task_qdist(struct task_struct *p) 495 static s64 task_qdist(struct task_struct *p) 496 { 496 { 497 int idx = weight_to_idx(p->scx.weight) 497 int idx = weight_to_idx(p->scx.weight); 498 struct task_ctx *tctx; 498 struct task_ctx *tctx; 499 s64 qdist; 499 s64 qdist; 500 500 501 tctx = bpf_task_storage_get(&task_ctx_ 501 tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0); 502 if (!tctx) { 502 if (!tctx) { 503 scx_bpf_error("task_ctx lookup 503 scx_bpf_error("task_ctx lookup failed"); 504 return 0; 504 return 0; 505 } 505 } 506 506 507 qdist = tctx->core_sched_seq - core_sc 507 qdist = tctx->core_sched_seq - core_sched_head_seqs[idx]; 508 508 509 /* 509 /* 510 * As queue index increments, the prio 510 * As queue index increments, the priority doubles. The queue w/ index 3 511 * is dispatched twice more frequently 511 * is dispatched twice more frequently than 2. Reflect the difference by 512 * scaling qdists accordingly. Note th 512 * scaling qdists accordingly. Note that the shift amount needs to be 513 * flipped depending on the sign to av 513 * flipped depending on the sign to avoid flipping priority direction. 514 */ 514 */ 515 if (qdist >= 0) 515 if (qdist >= 0) 516 return qdist << (4 - idx); 516 return qdist << (4 - idx); 517 else 517 else 518 return qdist << idx; 518 return qdist << idx; 519 } 519 } 520 520 521 /* 521 /* 522 * This is called to determine the task orderi 522 * This is called to determine the task ordering when core-sched is picking 523 * tasks to execute on SMT siblings and should 523 * tasks to execute on SMT siblings and should encode about the same ordering as 524 * the regular scheduling path. Use the priori 524 * the regular scheduling path. Use the priority-scaled distances from the head 525 * of the queues to compare the two tasks whic 525 * of the queues to compare the two tasks which should be consistent with the 526 * dispatch path behavior. 526 * dispatch path behavior. 527 */ 527 */ 528 bool BPF_STRUCT_OPS(qmap_core_sched_before, 528 bool BPF_STRUCT_OPS(qmap_core_sched_before, 529 struct task_struct *a, str 529 struct task_struct *a, struct task_struct *b) 530 { 530 { 531 return task_qdist(a) > task_qdist(b); 531 return task_qdist(a) > task_qdist(b); 532 } 532 } 533 533 534 void BPF_STRUCT_OPS(qmap_cpu_release, s32 cpu, 534 void BPF_STRUCT_OPS(qmap_cpu_release, s32 cpu, struct scx_cpu_release_args *args) 535 { 535 { 536 u32 cnt; 536 u32 cnt; 537 537 538 /* 538 /* 539 * Called when @cpu is taken by a high 539 * Called when @cpu is taken by a higher priority scheduling class. This 540 * makes @cpu no longer available for 540 * makes @cpu no longer available for executing sched_ext tasks. As we 541 * don't want the tasks in @cpu's loca 541 * don't want the tasks in @cpu's local dsq to sit there until @cpu 542 * becomes available again, re-enqueue 542 * becomes available again, re-enqueue them into the global dsq. See 543 * %SCX_ENQ_REENQ handling in qmap_enq 543 * %SCX_ENQ_REENQ handling in qmap_enqueue(). 544 */ 544 */ 545 cnt = scx_bpf_reenqueue_local(); 545 cnt = scx_bpf_reenqueue_local(); 546 if (cnt) 546 if (cnt) 547 __sync_fetch_and_add(&nr_reenq 547 __sync_fetch_and_add(&nr_reenqueued, cnt); 548 } 548 } 549 549 550 s32 BPF_STRUCT_OPS(qmap_init_task, struct task 550 s32 BPF_STRUCT_OPS(qmap_init_task, struct task_struct *p, 551 struct scx_init_task_args * 551 struct scx_init_task_args *args) 552 { 552 { 553 if (p->tgid == disallow_tgid) 553 if (p->tgid == disallow_tgid) 554 p->scx.disallow = true; 554 p->scx.disallow = true; 555 555 556 /* 556 /* 557 * @p is new. Let's ensure that its ta 557 * @p is new. Let's ensure that its task_ctx is available. We can sleep 558 * in this function and the following 558 * in this function and the following will automatically use GFP_KERNEL. 559 */ 559 */ 560 if (bpf_task_storage_get(&task_ctx_sto 560 if (bpf_task_storage_get(&task_ctx_stor, p, 0, 561 BPF_LOCAL_STO 561 BPF_LOCAL_STORAGE_GET_F_CREATE)) 562 return 0; 562 return 0; 563 else 563 else 564 return -ENOMEM; 564 return -ENOMEM; 565 } 565 } 566 566 567 void BPF_STRUCT_OPS(qmap_dump, struct scx_dump 567 void BPF_STRUCT_OPS(qmap_dump, struct scx_dump_ctx *dctx) 568 { 568 { 569 s32 i, pid; 569 s32 i, pid; 570 570 571 if (suppress_dump) 571 if (suppress_dump) 572 return; 572 return; 573 573 574 bpf_for(i, 0, 5) { 574 bpf_for(i, 0, 5) { 575 void *fifo; 575 void *fifo; 576 576 577 if (!(fifo = bpf_map_lookup_el 577 if (!(fifo = bpf_map_lookup_elem(&queue_arr, &i))) 578 return; 578 return; 579 579 580 scx_bpf_dump("QMAP FIFO[%d]:", 580 scx_bpf_dump("QMAP FIFO[%d]:", i); 581 bpf_repeat(4096) { 581 bpf_repeat(4096) { 582 if (bpf_map_pop_elem(f 582 if (bpf_map_pop_elem(fifo, &pid)) 583 break; 583 break; 584 scx_bpf_dump(" %d", pi 584 scx_bpf_dump(" %d", pid); 585 } 585 } 586 scx_bpf_dump("\n"); 586 scx_bpf_dump("\n"); 587 } 587 } 588 } 588 } 589 589 590 void BPF_STRUCT_OPS(qmap_dump_cpu, struct scx_ 590 void BPF_STRUCT_OPS(qmap_dump_cpu, struct scx_dump_ctx *dctx, s32 cpu, bool idle) 591 { 591 { 592 u32 zero = 0; 592 u32 zero = 0; 593 struct cpu_ctx *cpuc; 593 struct cpu_ctx *cpuc; 594 594 595 if (suppress_dump || idle) 595 if (suppress_dump || idle) 596 return; 596 return; 597 if (!(cpuc = bpf_map_lookup_percpu_ele 597 if (!(cpuc = bpf_map_lookup_percpu_elem(&cpu_ctx_stor, &zero, cpu))) 598 return; 598 return; 599 599 600 scx_bpf_dump("QMAP: dsp_idx=%llu dsp_c 600 scx_bpf_dump("QMAP: dsp_idx=%llu dsp_cnt=%llu avg_weight=%u cpuperf_target=%u", 601 cpuc->dsp_idx, cpuc->dsp_ 601 cpuc->dsp_idx, cpuc->dsp_cnt, cpuc->avg_weight, 602 cpuc->cpuperf_target); 602 cpuc->cpuperf_target); 603 } 603 } 604 604 605 void BPF_STRUCT_OPS(qmap_dump_task, struct scx 605 void BPF_STRUCT_OPS(qmap_dump_task, struct scx_dump_ctx *dctx, struct task_struct *p) 606 { 606 { 607 struct task_ctx *taskc; 607 struct task_ctx *taskc; 608 608 609 if (suppress_dump) 609 if (suppress_dump) 610 return; 610 return; 611 if (!(taskc = bpf_task_storage_get(&ta 611 if (!(taskc = bpf_task_storage_get(&task_ctx_stor, p, 0, 0))) 612 return; 612 return; 613 613 614 scx_bpf_dump("QMAP: force_local=%d cor 614 scx_bpf_dump("QMAP: force_local=%d core_sched_seq=%llu", 615 taskc->force_local, taskc 615 taskc->force_local, taskc->core_sched_seq); 616 } 616 } 617 617 618 /* 618 /* 619 * Print out the online and possible CPU map u 619 * Print out the online and possible CPU map using bpf_printk() as a 620 * demonstration of using the cpumask kfuncs a 620 * demonstration of using the cpumask kfuncs and ops.cpu_on/offline(). 621 */ 621 */ 622 static void print_cpus(void) 622 static void print_cpus(void) 623 { 623 { 624 const struct cpumask *possible, *onlin 624 const struct cpumask *possible, *online; 625 s32 cpu; 625 s32 cpu; 626 char buf[128] = "", *p; 626 char buf[128] = "", *p; 627 int idx; 627 int idx; 628 628 629 possible = scx_bpf_get_possible_cpumas 629 possible = scx_bpf_get_possible_cpumask(); 630 online = scx_bpf_get_online_cpumask(); 630 online = scx_bpf_get_online_cpumask(); 631 631 632 idx = 0; 632 idx = 0; 633 bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) 633 bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) { 634 if (!(p = MEMBER_VPTR(buf, [id 634 if (!(p = MEMBER_VPTR(buf, [idx++]))) 635 break; 635 break; 636 if (bpf_cpumask_test_cpu(cpu, 636 if (bpf_cpumask_test_cpu(cpu, online)) 637 *p++ = 'O'; 637 *p++ = 'O'; 638 else if (bpf_cpumask_test_cpu( 638 else if (bpf_cpumask_test_cpu(cpu, possible)) 639 *p++ = 'X'; 639 *p++ = 'X'; 640 else 640 else 641 *p++ = ' '; 641 *p++ = ' '; 642 642 643 if ((cpu & 7) == 7) { 643 if ((cpu & 7) == 7) { 644 if (!(p = MEMBER_VPTR( 644 if (!(p = MEMBER_VPTR(buf, [idx++]))) 645 break; 645 break; 646 *p++ = '|'; 646 *p++ = '|'; 647 } 647 } 648 } 648 } 649 buf[sizeof(buf) - 1] = '\0'; 649 buf[sizeof(buf) - 1] = '\0'; 650 650 651 scx_bpf_put_cpumask(online); 651 scx_bpf_put_cpumask(online); 652 scx_bpf_put_cpumask(possible); 652 scx_bpf_put_cpumask(possible); 653 653 654 bpf_printk("CPUS: |%s", buf); 654 bpf_printk("CPUS: |%s", buf); 655 } 655 } 656 656 657 void BPF_STRUCT_OPS(qmap_cpu_online, s32 cpu) 657 void BPF_STRUCT_OPS(qmap_cpu_online, s32 cpu) 658 { 658 { 659 bpf_printk("CPU %d coming online", cpu 659 bpf_printk("CPU %d coming online", cpu); 660 /* @cpu is already online at this poin 660 /* @cpu is already online at this point */ 661 print_cpus(); 661 print_cpus(); 662 } 662 } 663 663 664 void BPF_STRUCT_OPS(qmap_cpu_offline, s32 cpu) 664 void BPF_STRUCT_OPS(qmap_cpu_offline, s32 cpu) 665 { 665 { 666 bpf_printk("CPU %d going offline", cpu 666 bpf_printk("CPU %d going offline", cpu); 667 /* @cpu is still online at this point 667 /* @cpu is still online at this point */ 668 print_cpus(); 668 print_cpus(); 669 } 669 } 670 670 671 struct monitor_timer { 671 struct monitor_timer { 672 struct bpf_timer timer; 672 struct bpf_timer timer; 673 }; 673 }; 674 674 675 struct { 675 struct { 676 __uint(type, BPF_MAP_TYPE_ARRAY); 676 __uint(type, BPF_MAP_TYPE_ARRAY); 677 __uint(max_entries, 1); 677 __uint(max_entries, 1); 678 __type(key, u32); 678 __type(key, u32); 679 __type(value, struct monitor_timer); 679 __type(value, struct monitor_timer); 680 } monitor_timer SEC(".maps"); 680 } monitor_timer SEC(".maps"); 681 681 682 /* 682 /* 683 * Print out the min, avg and max performance 683 * Print out the min, avg and max performance levels of CPUs every second to 684 * demonstrate the cpuperf interface. 684 * demonstrate the cpuperf interface. 685 */ 685 */ 686 static void monitor_cpuperf(void) 686 static void monitor_cpuperf(void) 687 { 687 { 688 u32 zero = 0, nr_cpu_ids; 688 u32 zero = 0, nr_cpu_ids; 689 u64 cap_sum = 0, cur_sum = 0, cur_min 689 u64 cap_sum = 0, cur_sum = 0, cur_min = SCX_CPUPERF_ONE, cur_max = 0; 690 u64 target_sum = 0, target_min = SCX_C 690 u64 target_sum = 0, target_min = SCX_CPUPERF_ONE, target_max = 0; 691 const struct cpumask *online; 691 const struct cpumask *online; 692 int i, nr_online_cpus = 0; 692 int i, nr_online_cpus = 0; 693 693 694 nr_cpu_ids = scx_bpf_nr_cpu_ids(); 694 nr_cpu_ids = scx_bpf_nr_cpu_ids(); 695 online = scx_bpf_get_online_cpumask(); 695 online = scx_bpf_get_online_cpumask(); 696 696 697 bpf_for(i, 0, nr_cpu_ids) { 697 bpf_for(i, 0, nr_cpu_ids) { 698 struct cpu_ctx *cpuc; 698 struct cpu_ctx *cpuc; 699 u32 cap, cur; 699 u32 cap, cur; 700 700 701 if (!bpf_cpumask_test_cpu(i, o 701 if (!bpf_cpumask_test_cpu(i, online)) 702 continue; 702 continue; 703 nr_online_cpus++; 703 nr_online_cpus++; 704 704 705 /* collect the capacity and cu 705 /* collect the capacity and current cpuperf */ 706 cap = scx_bpf_cpuperf_cap(i); 706 cap = scx_bpf_cpuperf_cap(i); 707 cur = scx_bpf_cpuperf_cur(i); 707 cur = scx_bpf_cpuperf_cur(i); 708 708 709 cur_min = cur < cur_min ? cur 709 cur_min = cur < cur_min ? cur : cur_min; 710 cur_max = cur > cur_max ? cur 710 cur_max = cur > cur_max ? cur : cur_max; 711 711 712 /* 712 /* 713 * $cur is relative to $cap. S 713 * $cur is relative to $cap. Scale it down accordingly so that 714 * it's in the same scale as o 714 * it's in the same scale as other CPUs and $cur_sum/$cap_sum 715 * makes sense. 715 * makes sense. 716 */ 716 */ 717 cur_sum += cur * cap / SCX_CPU 717 cur_sum += cur * cap / SCX_CPUPERF_ONE; 718 cap_sum += cap; 718 cap_sum += cap; 719 719 720 if (!(cpuc = bpf_map_lookup_pe 720 if (!(cpuc = bpf_map_lookup_percpu_elem(&cpu_ctx_stor, &zero, i))) { 721 scx_bpf_error("failed 721 scx_bpf_error("failed to look up cpu_ctx"); 722 goto out; 722 goto out; 723 } 723 } 724 724 725 /* collect target */ 725 /* collect target */ 726 cur = cpuc->cpuperf_target; 726 cur = cpuc->cpuperf_target; 727 target_sum += cur; 727 target_sum += cur; 728 target_min = cur < target_min 728 target_min = cur < target_min ? cur : target_min; 729 target_max = cur > target_max 729 target_max = cur > target_max ? cur : target_max; 730 } 730 } 731 731 732 cpuperf_min = cur_min; 732 cpuperf_min = cur_min; 733 cpuperf_avg = cur_sum * SCX_CPUPERF_ON 733 cpuperf_avg = cur_sum * SCX_CPUPERF_ONE / cap_sum; 734 cpuperf_max = cur_max; 734 cpuperf_max = cur_max; 735 735 736 cpuperf_target_min = target_min; 736 cpuperf_target_min = target_min; 737 cpuperf_target_avg = target_sum / nr_o 737 cpuperf_target_avg = target_sum / nr_online_cpus; 738 cpuperf_target_max = target_max; 738 cpuperf_target_max = target_max; 739 out: 739 out: 740 scx_bpf_put_cpumask(online); 740 scx_bpf_put_cpumask(online); 741 } 741 } 742 742 743 /* 743 /* 744 * Dump the currently queued tasks in the shar 744 * Dump the currently queued tasks in the shared DSQ to demonstrate the usage of 745 * scx_bpf_dsq_nr_queued() and DSQ iterator. R 745 * scx_bpf_dsq_nr_queued() and DSQ iterator. Raise the dispatch batch count to 746 * see meaningful dumps in the trace pipe. 746 * see meaningful dumps in the trace pipe. 747 */ 747 */ 748 static void dump_shared_dsq(void) 748 static void dump_shared_dsq(void) 749 { 749 { 750 struct task_struct *p; 750 struct task_struct *p; 751 s32 nr; 751 s32 nr; 752 752 753 if (!(nr = scx_bpf_dsq_nr_queued(SHARE 753 if (!(nr = scx_bpf_dsq_nr_queued(SHARED_DSQ))) 754 return; 754 return; 755 755 756 bpf_printk("Dumping %d tasks in SHARED 756 bpf_printk("Dumping %d tasks in SHARED_DSQ in reverse order", nr); 757 757 758 bpf_rcu_read_lock(); 758 bpf_rcu_read_lock(); 759 bpf_for_each(scx_dsq, p, SHARED_DSQ, S 759 bpf_for_each(scx_dsq, p, SHARED_DSQ, SCX_DSQ_ITER_REV) 760 bpf_printk("%s[%d]", p->comm, 760 bpf_printk("%s[%d]", p->comm, p->pid); 761 bpf_rcu_read_unlock(); 761 bpf_rcu_read_unlock(); 762 } 762 } 763 763 764 static int monitor_timerfn(void *map, int *key 764 static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer) 765 { 765 { 766 bpf_rcu_read_lock(); 766 bpf_rcu_read_lock(); 767 dispatch_highpri(true); 767 dispatch_highpri(true); 768 bpf_rcu_read_unlock(); 768 bpf_rcu_read_unlock(); 769 769 770 monitor_cpuperf(); 770 monitor_cpuperf(); 771 771 772 if (print_shared_dsq) 772 if (print_shared_dsq) 773 dump_shared_dsq(); 773 dump_shared_dsq(); 774 774 775 bpf_timer_start(timer, ONE_SEC_IN_NS, 775 bpf_timer_start(timer, ONE_SEC_IN_NS, 0); 776 return 0; 776 return 0; 777 } 777 } 778 778 779 s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init) 779 s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init) 780 { 780 { 781 u32 key = 0; 781 u32 key = 0; 782 struct bpf_timer *timer; 782 struct bpf_timer *timer; 783 s32 ret; 783 s32 ret; 784 784 785 print_cpus(); 785 print_cpus(); 786 786 787 ret = scx_bpf_create_dsq(SHARED_DSQ, - 787 ret = scx_bpf_create_dsq(SHARED_DSQ, -1); 788 if (ret) 788 if (ret) 789 return ret; 789 return ret; 790 790 791 ret = scx_bpf_create_dsq(HIGHPRI_DSQ, 791 ret = scx_bpf_create_dsq(HIGHPRI_DSQ, -1); 792 if (ret) 792 if (ret) 793 return ret; 793 return ret; 794 794 795 timer = bpf_map_lookup_elem(&monitor_t 795 timer = bpf_map_lookup_elem(&monitor_timer, &key); 796 if (!timer) 796 if (!timer) 797 return -ESRCH; 797 return -ESRCH; 798 798 799 bpf_timer_init(timer, &monitor_timer, 799 bpf_timer_init(timer, &monitor_timer, CLOCK_MONOTONIC); 800 bpf_timer_set_callback(timer, monitor_ 800 bpf_timer_set_callback(timer, monitor_timerfn); 801 801 802 return bpf_timer_start(timer, ONE_SEC_ 802 return bpf_timer_start(timer, ONE_SEC_IN_NS, 0); 803 } 803 } 804 804 805 void BPF_STRUCT_OPS(qmap_exit, struct scx_exit 805 void BPF_STRUCT_OPS(qmap_exit, struct scx_exit_info *ei) 806 { 806 { 807 UEI_RECORD(uei, ei); 807 UEI_RECORD(uei, ei); 808 } 808 } 809 809 810 SCX_OPS_DEFINE(qmap_ops, 810 SCX_OPS_DEFINE(qmap_ops, 811 .select_cpu = (voi 811 .select_cpu = (void *)qmap_select_cpu, 812 .enqueue = (voi 812 .enqueue = (void *)qmap_enqueue, 813 .dequeue = (voi 813 .dequeue = (void *)qmap_dequeue, 814 .dispatch = (voi 814 .dispatch = (void *)qmap_dispatch, 815 .tick = (voi 815 .tick = (void *)qmap_tick, 816 .core_sched_before = (voi 816 .core_sched_before = (void *)qmap_core_sched_before, 817 .cpu_release = (voi 817 .cpu_release = (void *)qmap_cpu_release, 818 .init_task = (voi 818 .init_task = (void *)qmap_init_task, 819 .dump = (voi 819 .dump = (void *)qmap_dump, 820 .dump_cpu = (voi 820 .dump_cpu = (void *)qmap_dump_cpu, 821 .dump_task = (voi 821 .dump_task = (void *)qmap_dump_task, 822 .cpu_online = (voi 822 .cpu_online = (void *)qmap_cpu_online, 823 .cpu_offline = (voi 823 .cpu_offline = (void *)qmap_cpu_offline, 824 .init = (voi 824 .init = (void *)qmap_init, 825 .exit = (voi 825 .exit = (void *)qmap_exit, 826 .timeout_ms = 5000 826 .timeout_ms = 5000U, 827 .name = "qma 827 .name = "qmap"); 828 828
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.