~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/include/linux/sched/ext.h

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /*
  3  * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst
  4  *
  5  * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
  6  * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
  7  * Copyright (c) 2022 David Vernet <dvernet@meta.com>
  8  */
  9 #ifndef _LINUX_SCHED_EXT_H
 10 #define _LINUX_SCHED_EXT_H
 11 
 12 #ifdef CONFIG_SCHED_CLASS_EXT
 13 
 14 #include <linux/llist.h>
 15 #include <linux/rhashtable-types.h>
 16 
 17 enum scx_public_consts {
 18         SCX_OPS_NAME_LEN        = 128,
 19 
 20         SCX_SLICE_DFL           = 20 * 1000000, /* 20ms */
 21         SCX_SLICE_INF           = U64_MAX,      /* infinite, implies nohz */
 22 };
 23 
 24 /*
 25  * DSQ (dispatch queue) IDs are 64bit of the format:
 26  *
 27  *   Bits: [63] [62 ..  0]
 28  *         [ B] [   ID   ]
 29  *
 30  *    B: 1 for IDs for built-in DSQs, 0 for ops-created user DSQs
 31  *   ID: 63 bit ID
 32  *
 33  * Built-in IDs:
 34  *
 35  *   Bits: [63] [62] [61..32] [31 ..  0]
 36  *         [ 1] [ L] [   R  ] [    V   ]
 37  *
 38  *    1: 1 for built-in DSQs.
 39  *    L: 1 for LOCAL_ON DSQ IDs, 0 for others
 40  *    V: For LOCAL_ON DSQ IDs, a CPU number. For others, a pre-defined value.
 41  */
 42 enum scx_dsq_id_flags {
 43         SCX_DSQ_FLAG_BUILTIN    = 1LLU << 63,
 44         SCX_DSQ_FLAG_LOCAL_ON   = 1LLU << 62,
 45 
 46         SCX_DSQ_INVALID         = SCX_DSQ_FLAG_BUILTIN | 0,
 47         SCX_DSQ_GLOBAL          = SCX_DSQ_FLAG_BUILTIN | 1,
 48         SCX_DSQ_LOCAL           = SCX_DSQ_FLAG_BUILTIN | 2,
 49         SCX_DSQ_LOCAL_ON        = SCX_DSQ_FLAG_BUILTIN | SCX_DSQ_FLAG_LOCAL_ON,
 50         SCX_DSQ_LOCAL_CPU_MASK  = 0xffffffffLLU,
 51 };
 52 
 53 /*
 54  * A dispatch queue (DSQ) can be either a FIFO or p->scx.dsq_vtime ordered
 55  * queue. A built-in DSQ is always a FIFO. The built-in local DSQs are used to
 56  * buffer between the scheduler core and the BPF scheduler. See the
 57  * documentation for more details.
 58  */
 59 struct scx_dispatch_q {
 60         raw_spinlock_t          lock;
 61         struct list_head        list;   /* tasks in dispatch order */
 62         struct rb_root          priq;   /* used to order by p->scx.dsq_vtime */
 63         u32                     nr;
 64         u32                     seq;    /* used by BPF iter */
 65         u64                     id;
 66         struct rhash_head       hash_node;
 67         struct llist_node       free_node;
 68         struct rcu_head         rcu;
 69 };
 70 
 71 /* scx_entity.flags */
 72 enum scx_ent_flags {
 73         SCX_TASK_QUEUED         = 1 << 0, /* on ext runqueue */
 74         SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */
 75         SCX_TASK_DEQD_FOR_SLEEP = 1 << 3, /* last dequeue was for SLEEP */
 76 
 77         SCX_TASK_STATE_SHIFT    = 8,      /* bit 8 and 9 are used to carry scx_task_state */
 78         SCX_TASK_STATE_BITS     = 2,
 79         SCX_TASK_STATE_MASK     = ((1 << SCX_TASK_STATE_BITS) - 1) << SCX_TASK_STATE_SHIFT,
 80 
 81         SCX_TASK_CURSOR         = 1 << 31, /* iteration cursor, not a task */
 82 };
 83 
 84 /* scx_entity.flags & SCX_TASK_STATE_MASK */
 85 enum scx_task_state {
 86         SCX_TASK_NONE,          /* ops.init_task() not called yet */
 87         SCX_TASK_INIT,          /* ops.init_task() succeeded, but task can be cancelled */
 88         SCX_TASK_READY,         /* fully initialized, but not in sched_ext */
 89         SCX_TASK_ENABLED,       /* fully initialized and in sched_ext */
 90 
 91         SCX_TASK_NR_STATES,
 92 };
 93 
 94 /* scx_entity.dsq_flags */
 95 enum scx_ent_dsq_flags {
 96         SCX_TASK_DSQ_ON_PRIQ    = 1 << 0, /* task is queued on the priority queue of a dsq */
 97 };
 98 
 99 /*
100  * Mask bits for scx_entity.kf_mask. Not all kfuncs can be called from
101  * everywhere and the following bits track which kfunc sets are currently
102  * allowed for %current. This simple per-task tracking works because SCX ops
103  * nest in a limited way. BPF will likely implement a way to allow and disallow
104  * kfuncs depending on the calling context which will replace this manual
105  * mechanism. See scx_kf_allow().
106  */
107 enum scx_kf_mask {
108         SCX_KF_UNLOCKED         = 0,      /* sleepable and not rq locked */
109         /* ENQUEUE and DISPATCH may be nested inside CPU_RELEASE */
110         SCX_KF_CPU_RELEASE      = 1 << 0, /* ops.cpu_release() */
111         /* ops.dequeue (in REST) may be nested inside DISPATCH */
112         SCX_KF_DISPATCH         = 1 << 1, /* ops.dispatch() */
113         SCX_KF_ENQUEUE          = 1 << 2, /* ops.enqueue() and ops.select_cpu() */
114         SCX_KF_SELECT_CPU       = 1 << 3, /* ops.select_cpu() */
115         SCX_KF_REST             = 1 << 4, /* other rq-locked operations */
116 
117         __SCX_KF_RQ_LOCKED      = SCX_KF_CPU_RELEASE | SCX_KF_DISPATCH |
118                                   SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
119         __SCX_KF_TERMINAL       = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
120 };
121 
122 enum scx_dsq_lnode_flags {
123         SCX_DSQ_LNODE_ITER_CURSOR = 1 << 0,
124 
125         /* high 16 bits can be for iter cursor flags */
126         __SCX_DSQ_LNODE_PRIV_SHIFT = 16,
127 };
128 
129 struct scx_dsq_list_node {
130         struct list_head        node;
131         u32                     flags;
132         u32                     priv;           /* can be used by iter cursor */
133 };
134 
135 /*
136  * The following is embedded in task_struct and contains all fields necessary
137  * for a task to be scheduled by SCX.
138  */
139 struct sched_ext_entity {
140         struct scx_dispatch_q   *dsq;
141         struct scx_dsq_list_node dsq_list;      /* dispatch order */
142         struct rb_node          dsq_priq;       /* p->scx.dsq_vtime order */
143         u32                     dsq_seq;
144         u32                     dsq_flags;      /* protected by DSQ lock */
145         u32                     flags;          /* protected by rq lock */
146         u32                     weight;
147         s32                     sticky_cpu;
148         s32                     holding_cpu;
149         u32                     kf_mask;        /* see scx_kf_mask above */
150         struct task_struct      *kf_tasks[2];   /* see SCX_CALL_OP_TASK() */
151         atomic_long_t           ops_state;
152 
153         struct list_head        runnable_node;  /* rq->scx.runnable_list */
154         unsigned long           runnable_at;
155 
156 #ifdef CONFIG_SCHED_CORE
157         u64                     core_sched_at;  /* see scx_prio_less() */
158 #endif
159         u64                     ddsp_dsq_id;
160         u64                     ddsp_enq_flags;
161 
162         /* BPF scheduler modifiable fields */
163 
164         /*
165          * Runtime budget in nsecs. This is usually set through
166          * scx_bpf_dispatch() but can also be modified directly by the BPF
167          * scheduler. Automatically decreased by SCX as the task executes. On
168          * depletion, a scheduling event is triggered.
169          *
170          * This value is cleared to zero if the task is preempted by
171          * %SCX_KICK_PREEMPT and shouldn't be used to determine how long the
172          * task ran. Use p->se.sum_exec_runtime instead.
173          */
174         u64                     slice;
175 
176         /*
177          * Used to order tasks when dispatching to the vtime-ordered priority
178          * queue of a dsq. This is usually set through scx_bpf_dispatch_vtime()
179          * but can also be modified directly by the BPF scheduler. Modifying it
180          * while a task is queued on a dsq may mangle the ordering and is not
181          * recommended.
182          */
183         u64                     dsq_vtime;
184 
185         /*
186          * If set, reject future sched_setscheduler(2) calls updating the policy
187          * to %SCHED_EXT with -%EACCES.
188          *
189          * Can be set from ops.init_task() while the BPF scheduler is being
190          * loaded (!scx_init_task_args->fork). If set and the task's policy is
191          * already %SCHED_EXT, the task's policy is rejected and forcefully
192          * reverted to %SCHED_NORMAL. The number of such events are reported
193          * through /sys/kernel/debug/sched_ext::nr_rejected. Setting this flag
194          * during fork is not allowed.
195          */
196         bool                    disallow;       /* reject switching into SCX */
197 
198         /* cold fields */
199 #ifdef CONFIG_EXT_GROUP_SCHED
200         struct cgroup           *cgrp_moving_from;
201 #endif
202         /* must be the last field, see init_scx_entity() */
203         struct list_head        tasks_node;
204 };
205 
206 void sched_ext_free(struct task_struct *p);
207 void print_scx_info(const char *log_lvl, struct task_struct *p);
208 
209 #else   /* !CONFIG_SCHED_CLASS_EXT */
210 
211 static inline void sched_ext_free(struct task_struct *p) {}
212 static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
213 
214 #endif  /* CONFIG_SCHED_CLASS_EXT */
215 #endif  /* _LINUX_SCHED_EXT_H */
216 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php