1 /* SPDX-License-Identifier: GPL-2.0-or-later * 1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 2 3 #ifndef __CPUSET_INTERNAL_H 3 #ifndef __CPUSET_INTERNAL_H 4 #define __CPUSET_INTERNAL_H 4 #define __CPUSET_INTERNAL_H 5 5 6 #include <linux/cgroup.h> 6 #include <linux/cgroup.h> 7 #include <linux/cpu.h> 7 #include <linux/cpu.h> 8 #include <linux/cpumask.h> 8 #include <linux/cpumask.h> 9 #include <linux/cpuset.h> 9 #include <linux/cpuset.h> 10 #include <linux/spinlock.h> 10 #include <linux/spinlock.h> 11 #include <linux/union_find.h> 11 #include <linux/union_find.h> 12 12 13 /* See "Frequency meter" comments, below. */ 13 /* See "Frequency meter" comments, below. */ 14 14 15 struct fmeter { 15 struct fmeter { 16 int cnt; /* unprocessed 16 int cnt; /* unprocessed events count */ 17 int val; /* most recent 17 int val; /* most recent output value */ 18 time64_t time; /* clock (secs 18 time64_t time; /* clock (secs) when val computed */ 19 spinlock_t lock; /* guards read 19 spinlock_t lock; /* guards read or write of above */ 20 }; 20 }; 21 21 22 /* 22 /* 23 * Invalid partition error code 23 * Invalid partition error code 24 */ 24 */ 25 enum prs_errcode { 25 enum prs_errcode { 26 PERR_NONE = 0, 26 PERR_NONE = 0, 27 PERR_INVCPUS, 27 PERR_INVCPUS, 28 PERR_INVPARENT, 28 PERR_INVPARENT, 29 PERR_NOTPART, 29 PERR_NOTPART, 30 PERR_NOTEXCL, 30 PERR_NOTEXCL, 31 PERR_NOCPUS, 31 PERR_NOCPUS, 32 PERR_HOTPLUG, 32 PERR_HOTPLUG, 33 PERR_CPUSEMPTY, 33 PERR_CPUSEMPTY, 34 PERR_HKEEPING, 34 PERR_HKEEPING, 35 PERR_ACCESS, 35 PERR_ACCESS, 36 }; 36 }; 37 37 38 /* bits in struct cpuset flags field */ 38 /* bits in struct cpuset flags field */ 39 typedef enum { 39 typedef enum { 40 CS_ONLINE, 40 CS_ONLINE, 41 CS_CPU_EXCLUSIVE, 41 CS_CPU_EXCLUSIVE, 42 CS_MEM_EXCLUSIVE, 42 CS_MEM_EXCLUSIVE, 43 CS_MEM_HARDWALL, 43 CS_MEM_HARDWALL, 44 CS_MEMORY_MIGRATE, 44 CS_MEMORY_MIGRATE, 45 CS_SCHED_LOAD_BALANCE, 45 CS_SCHED_LOAD_BALANCE, 46 CS_SPREAD_PAGE, 46 CS_SPREAD_PAGE, 47 CS_SPREAD_SLAB, 47 CS_SPREAD_SLAB, 48 } cpuset_flagbits_t; 48 } cpuset_flagbits_t; 49 49 50 /* The various types of files and directories 50 /* The various types of files and directories in a cpuset file system */ 51 51 52 typedef enum { 52 typedef enum { 53 FILE_MEMORY_MIGRATE, 53 FILE_MEMORY_MIGRATE, 54 FILE_CPULIST, 54 FILE_CPULIST, 55 FILE_MEMLIST, 55 FILE_MEMLIST, 56 FILE_EFFECTIVE_CPULIST, 56 FILE_EFFECTIVE_CPULIST, 57 FILE_EFFECTIVE_MEMLIST, 57 FILE_EFFECTIVE_MEMLIST, 58 FILE_SUBPARTS_CPULIST, 58 FILE_SUBPARTS_CPULIST, 59 FILE_EXCLUSIVE_CPULIST, 59 FILE_EXCLUSIVE_CPULIST, 60 FILE_EFFECTIVE_XCPULIST, 60 FILE_EFFECTIVE_XCPULIST, 61 FILE_ISOLATED_CPULIST, 61 FILE_ISOLATED_CPULIST, 62 FILE_CPU_EXCLUSIVE, 62 FILE_CPU_EXCLUSIVE, 63 FILE_MEM_EXCLUSIVE, 63 FILE_MEM_EXCLUSIVE, 64 FILE_MEM_HARDWALL, 64 FILE_MEM_HARDWALL, 65 FILE_SCHED_LOAD_BALANCE, 65 FILE_SCHED_LOAD_BALANCE, 66 FILE_PARTITION_ROOT, 66 FILE_PARTITION_ROOT, 67 FILE_SCHED_RELAX_DOMAIN_LEVEL, 67 FILE_SCHED_RELAX_DOMAIN_LEVEL, 68 FILE_MEMORY_PRESSURE_ENABLED, 68 FILE_MEMORY_PRESSURE_ENABLED, 69 FILE_MEMORY_PRESSURE, 69 FILE_MEMORY_PRESSURE, 70 FILE_SPREAD_PAGE, 70 FILE_SPREAD_PAGE, 71 FILE_SPREAD_SLAB, 71 FILE_SPREAD_SLAB, 72 } cpuset_filetype_t; 72 } cpuset_filetype_t; 73 73 74 struct cpuset { 74 struct cpuset { 75 struct cgroup_subsys_state css; 75 struct cgroup_subsys_state css; 76 76 77 unsigned long flags; /* "un 77 unsigned long flags; /* "unsigned long" so bitops work */ 78 78 79 /* 79 /* 80 * On default hierarchy: 80 * On default hierarchy: 81 * 81 * 82 * The user-configured masks can only 82 * The user-configured masks can only be changed by writing to 83 * cpuset.cpus and cpuset.mems, and wo 83 * cpuset.cpus and cpuset.mems, and won't be limited by the 84 * parent masks. 84 * parent masks. 85 * 85 * 86 * The effective masks is the real mas 86 * The effective masks is the real masks that apply to the tasks 87 * in the cpuset. They may be changed 87 * in the cpuset. They may be changed if the configured masks are 88 * changed or hotplug happens. 88 * changed or hotplug happens. 89 * 89 * 90 * effective_mask == configured_mask & 90 * effective_mask == configured_mask & parent's effective_mask, 91 * and if it ends up empty, it will in 91 * and if it ends up empty, it will inherit the parent's mask. 92 * 92 * 93 * 93 * 94 * On legacy hierarchy: 94 * On legacy hierarchy: 95 * 95 * 96 * The user-configured masks are alway 96 * The user-configured masks are always the same with effective masks. 97 */ 97 */ 98 98 99 /* user-configured CPUs and Memory Nod 99 /* user-configured CPUs and Memory Nodes allow to tasks */ 100 cpumask_var_t cpus_allowed; 100 cpumask_var_t cpus_allowed; 101 nodemask_t mems_allowed; 101 nodemask_t mems_allowed; 102 102 103 /* effective CPUs and Memory Nodes all 103 /* effective CPUs and Memory Nodes allow to tasks */ 104 cpumask_var_t effective_cpus; 104 cpumask_var_t effective_cpus; 105 nodemask_t effective_mems; 105 nodemask_t effective_mems; 106 106 107 /* 107 /* 108 * Exclusive CPUs dedicated to current 108 * Exclusive CPUs dedicated to current cgroup (default hierarchy only) 109 * 109 * 110 * The effective_cpus of a valid parti 110 * The effective_cpus of a valid partition root comes solely from its 111 * effective_xcpus and some of the eff 111 * effective_xcpus and some of the effective_xcpus may be distributed 112 * to sub-partitions below & hence exc 112 * to sub-partitions below & hence excluded from its effective_cpus. 113 * For a valid partition root, its eff 113 * For a valid partition root, its effective_cpus have no relationship 114 * with cpus_allowed unless its exclus 114 * with cpus_allowed unless its exclusive_cpus isn't set. 115 * 115 * 116 * This value will only be set if eith 116 * This value will only be set if either exclusive_cpus is set or 117 * when this cpuset becomes a local pa 117 * when this cpuset becomes a local partition root. 118 */ 118 */ 119 cpumask_var_t effective_xcpus; 119 cpumask_var_t effective_xcpus; 120 120 121 /* 121 /* 122 * Exclusive CPUs as requested by the 122 * Exclusive CPUs as requested by the user (default hierarchy only) 123 * 123 * 124 * Its value is independent of cpus_al 124 * Its value is independent of cpus_allowed and designates the set of 125 * CPUs that can be granted to the cur 125 * CPUs that can be granted to the current cpuset or its children when 126 * it becomes a valid partition root. 126 * it becomes a valid partition root. The effective set of exclusive 127 * CPUs granted (effective_xcpus) depe 127 * CPUs granted (effective_xcpus) depends on whether those exclusive 128 * CPUs are passed down by its ancesto 128 * CPUs are passed down by its ancestors and not yet taken up by 129 * another sibling partition root alon 129 * another sibling partition root along the way. 130 * 130 * 131 * If its value isn't set, it defaults 131 * If its value isn't set, it defaults to cpus_allowed. 132 */ 132 */ 133 cpumask_var_t exclusive_cpus; 133 cpumask_var_t exclusive_cpus; 134 134 135 /* 135 /* 136 * This is old Memory Nodes tasks took 136 * This is old Memory Nodes tasks took on. 137 * 137 * 138 * - top_cpuset.old_mems_allowed is in 138 * - top_cpuset.old_mems_allowed is initialized to mems_allowed. 139 * - A new cpuset's old_mems_allowed i 139 * - A new cpuset's old_mems_allowed is initialized when some 140 * task is moved into it. 140 * task is moved into it. 141 * - old_mems_allowed is used in cpuse 141 * - old_mems_allowed is used in cpuset_migrate_mm() when we change 142 * cpuset.mems_allowed and have task 142 * cpuset.mems_allowed and have tasks' nodemask updated, and 143 * then old_mems_allowed is updated 143 * then old_mems_allowed is updated to mems_allowed. 144 */ 144 */ 145 nodemask_t old_mems_allowed; 145 nodemask_t old_mems_allowed; 146 146 147 struct fmeter fmeter; /* mem 147 struct fmeter fmeter; /* memory_pressure filter */ 148 148 149 /* 149 /* 150 * Tasks are being attached to this cp 150 * Tasks are being attached to this cpuset. Used to prevent 151 * zeroing cpus/mems_allowed between - 151 * zeroing cpus/mems_allowed between ->can_attach() and ->attach(). 152 */ 152 */ 153 int attach_in_progress; 153 int attach_in_progress; 154 154 155 /* for custom sched domain */ 155 /* for custom sched domain */ 156 int relax_domain_level; 156 int relax_domain_level; 157 157 158 /* number of valid local child partiti 158 /* number of valid local child partitions */ 159 int nr_subparts; 159 int nr_subparts; 160 160 161 /* partition root state */ 161 /* partition root state */ 162 int partition_root_state; 162 int partition_root_state; 163 163 164 /* 164 /* 165 * number of SCHED_DEADLINE tasks atta 165 * number of SCHED_DEADLINE tasks attached to this cpuset, so that we 166 * know when to rebuild associated roo 166 * know when to rebuild associated root domain bandwidth information. 167 */ 167 */ 168 int nr_deadline_tasks; 168 int nr_deadline_tasks; 169 int nr_migrate_dl_tasks; 169 int nr_migrate_dl_tasks; 170 u64 sum_migrate_dl_bw; 170 u64 sum_migrate_dl_bw; 171 171 172 /* Invalid partition error code, not l 172 /* Invalid partition error code, not lock protected */ 173 enum prs_errcode prs_err; 173 enum prs_errcode prs_err; 174 174 175 /* Handle for cpuset.cpus.partition */ 175 /* Handle for cpuset.cpus.partition */ 176 struct cgroup_file partition_file; 176 struct cgroup_file partition_file; 177 177 178 /* Remote partition silbling list anch 178 /* Remote partition silbling list anchored at remote_children */ 179 struct list_head remote_sibling; 179 struct list_head remote_sibling; 180 180 181 /* Used to merge intersecting subsets 181 /* Used to merge intersecting subsets for generate_sched_domains */ 182 struct uf_node node; 182 struct uf_node node; 183 }; 183 }; 184 184 185 static inline struct cpuset *css_cs(struct cgr 185 static inline struct cpuset *css_cs(struct cgroup_subsys_state *css) 186 { 186 { 187 return css ? container_of(css, struct 187 return css ? container_of(css, struct cpuset, css) : NULL; 188 } 188 } 189 189 190 /* Retrieve the cpuset for a task */ 190 /* Retrieve the cpuset for a task */ 191 static inline struct cpuset *task_cs(struct ta 191 static inline struct cpuset *task_cs(struct task_struct *task) 192 { 192 { 193 return css_cs(task_css(task, cpuset_cg 193 return css_cs(task_css(task, cpuset_cgrp_id)); 194 } 194 } 195 195 196 static inline struct cpuset *parent_cs(struct 196 static inline struct cpuset *parent_cs(struct cpuset *cs) 197 { 197 { 198 return css_cs(cs->css.parent); 198 return css_cs(cs->css.parent); 199 } 199 } 200 200 201 /* convenient tests for these bits */ 201 /* convenient tests for these bits */ 202 static inline bool is_cpuset_online(struct cpu 202 static inline bool is_cpuset_online(struct cpuset *cs) 203 { 203 { 204 return test_bit(CS_ONLINE, &cs->flags) 204 return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css); 205 } 205 } 206 206 207 static inline int is_cpu_exclusive(const struc 207 static inline int is_cpu_exclusive(const struct cpuset *cs) 208 { 208 { 209 return test_bit(CS_CPU_EXCLUSIVE, &cs- 209 return test_bit(CS_CPU_EXCLUSIVE, &cs->flags); 210 } 210 } 211 211 212 static inline int is_mem_exclusive(const struc 212 static inline int is_mem_exclusive(const struct cpuset *cs) 213 { 213 { 214 return test_bit(CS_MEM_EXCLUSIVE, &cs- 214 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); 215 } 215 } 216 216 217 static inline int is_mem_hardwall(const struct 217 static inline int is_mem_hardwall(const struct cpuset *cs) 218 { 218 { 219 return test_bit(CS_MEM_HARDWALL, &cs-> 219 return test_bit(CS_MEM_HARDWALL, &cs->flags); 220 } 220 } 221 221 222 static inline int is_sched_load_balance(const 222 static inline int is_sched_load_balance(const struct cpuset *cs) 223 { 223 { 224 return test_bit(CS_SCHED_LOAD_BALANCE, 224 return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); 225 } 225 } 226 226 227 static inline int is_memory_migrate(const stru 227 static inline int is_memory_migrate(const struct cpuset *cs) 228 { 228 { 229 return test_bit(CS_MEMORY_MIGRATE, &cs 229 return test_bit(CS_MEMORY_MIGRATE, &cs->flags); 230 } 230 } 231 231 232 static inline int is_spread_page(const struct 232 static inline int is_spread_page(const struct cpuset *cs) 233 { 233 { 234 return test_bit(CS_SPREAD_PAGE, &cs->f 234 return test_bit(CS_SPREAD_PAGE, &cs->flags); 235 } 235 } 236 236 237 static inline int is_spread_slab(const struct 237 static inline int is_spread_slab(const struct cpuset *cs) 238 { 238 { 239 return test_bit(CS_SPREAD_SLAB, &cs->f 239 return test_bit(CS_SPREAD_SLAB, &cs->flags); 240 } 240 } 241 241 242 /** 242 /** 243 * cpuset_for_each_child - traverse online chi 243 * cpuset_for_each_child - traverse online children of a cpuset 244 * @child_cs: loop cursor pointing to the curr 244 * @child_cs: loop cursor pointing to the current child 245 * @pos_css: used for iteration 245 * @pos_css: used for iteration 246 * @parent_cs: target cpuset to walk children 246 * @parent_cs: target cpuset to walk children of 247 * 247 * 248 * Walk @child_cs through the online children 248 * Walk @child_cs through the online children of @parent_cs. Must be used 249 * with RCU read locked. 249 * with RCU read locked. 250 */ 250 */ 251 #define cpuset_for_each_child(child_cs, pos_cs 251 #define cpuset_for_each_child(child_cs, pos_css, parent_cs) \ 252 css_for_each_child((pos_css), &(parent 252 css_for_each_child((pos_css), &(parent_cs)->css) \ 253 if (is_cpuset_online(((child_c 253 if (is_cpuset_online(((child_cs) = css_cs((pos_css))))) 254 254 255 /** 255 /** 256 * cpuset_for_each_descendant_pre - pre-order 256 * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants 257 * @des_cs: loop cursor pointing to the curren 257 * @des_cs: loop cursor pointing to the current descendant 258 * @pos_css: used for iteration 258 * @pos_css: used for iteration 259 * @root_cs: target cpuset to walk ancestor of 259 * @root_cs: target cpuset to walk ancestor of 260 * 260 * 261 * Walk @des_cs through the online descendants 261 * Walk @des_cs through the online descendants of @root_cs. Must be used 262 * with RCU read locked. The caller may modif 262 * with RCU read locked. The caller may modify @pos_css by calling 263 * css_rightmost_descendant() to skip subtree. 263 * css_rightmost_descendant() to skip subtree. @root_cs is included in the 264 * iteration and the first node to be visited. 264 * iteration and the first node to be visited. 265 */ 265 */ 266 #define cpuset_for_each_descendant_pre(des_cs, 266 #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs) \ 267 css_for_each_descendant_pre((pos_css), 267 css_for_each_descendant_pre((pos_css), &(root_cs)->css) \ 268 if (is_cpuset_online(((des_cs) 268 if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) 269 269 270 void rebuild_sched_domains_locked(void); 270 void rebuild_sched_domains_locked(void); 271 void cpuset_callback_lock_irq(void); 271 void cpuset_callback_lock_irq(void); 272 void cpuset_callback_unlock_irq(void); 272 void cpuset_callback_unlock_irq(void); 273 void cpuset_update_tasks_cpumask(struct cpuset 273 void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus); 274 void cpuset_update_tasks_nodemask(struct cpuse 274 void cpuset_update_tasks_nodemask(struct cpuset *cs); 275 int cpuset_update_flag(cpuset_flagbits_t bit, 275 int cpuset_update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on); 276 ssize_t cpuset_write_resmask(struct kernfs_ope 276 ssize_t cpuset_write_resmask(struct kernfs_open_file *of, 277 char *buf, 277 char *buf, size_t nbytes, loff_t off); 278 int cpuset_common_seq_show(struct seq_file *sf 278 int cpuset_common_seq_show(struct seq_file *sf, void *v); 279 279 280 /* 280 /* 281 * cpuset-v1.c 281 * cpuset-v1.c 282 */ 282 */ 283 #ifdef CONFIG_CPUSETS_V1 283 #ifdef CONFIG_CPUSETS_V1 284 extern struct cftype cpuset1_files[]; 284 extern struct cftype cpuset1_files[]; 285 void fmeter_init(struct fmeter *fmp); 285 void fmeter_init(struct fmeter *fmp); 286 void cpuset1_update_task_spread_flags(struct c 286 void cpuset1_update_task_spread_flags(struct cpuset *cs, 287 struct 287 struct task_struct *tsk); 288 void cpuset1_update_tasks_flags(struct cpuset 288 void cpuset1_update_tasks_flags(struct cpuset *cs); 289 void cpuset1_hotplug_update_tasks(struct cpuse 289 void cpuset1_hotplug_update_tasks(struct cpuset *cs, 290 struct cpumask *ne 290 struct cpumask *new_cpus, nodemask_t *new_mems, 291 bool cpus_updated, 291 bool cpus_updated, bool mems_updated); 292 int cpuset1_validate_change(struct cpuset *cur 292 int cpuset1_validate_change(struct cpuset *cur, struct cpuset *trial); 293 #else 293 #else 294 static inline void fmeter_init(struct fmeter * 294 static inline void fmeter_init(struct fmeter *fmp) {} 295 static inline void cpuset1_update_task_spread_ 295 static inline void cpuset1_update_task_spread_flags(struct cpuset *cs, 296 struct 296 struct task_struct *tsk) {} 297 static inline void cpuset1_update_tasks_flags( 297 static inline void cpuset1_update_tasks_flags(struct cpuset *cs) {} 298 static inline void cpuset1_hotplug_update_task 298 static inline void cpuset1_hotplug_update_tasks(struct cpuset *cs, 299 struct cpumask *ne 299 struct cpumask *new_cpus, nodemask_t *new_mems, 300 bool cpus_updated, 300 bool cpus_updated, bool mems_updated) {} 301 static inline int cpuset1_validate_change(stru 301 static inline int cpuset1_validate_change(struct cpuset *cur, 302 struct cpuset 302 struct cpuset *trial) { return 0; } 303 #endif /* CONFIG_CPUSETS_V1 */ 303 #endif /* CONFIG_CPUSETS_V1 */ 304 304 305 #endif /* __CPUSET_INTERNAL_H */ 305 #endif /* __CPUSET_INTERNAL_H */ 306 306
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.