Linux/kernel/sched/cpufreq

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * CPUFreq governor based on scheduler-provided CPU utilization data. 4 * 5 * Copyright (C) 2016, Intel Corporation 6 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 7 */ 8 9 #define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8) 10 11 struct sugov_tunables { 12 struct gov_attr_set attr_set; 13 unsigned int rate_limit_us; 14 }; 15 16 struct sugov_policy { 17 struct cpufreq_policy *policy; 18 19 struct sugov_tunables *tunables; 20 struct list_head tunables_hook; 21 22 raw_spinlock_t update_lock; 23 u64 last_freq_update_time; 24 s64 freq_update_delay_ns; 25 unsigned int next_freq; 26 unsigned int cached_raw_freq; 27 28 /* The next fields are only needed if fast switch cannot be used: */ 29 struct irq_work irq_work; 30 struct kthread_work work; 31 struct mutex work_lock; 32 struct kthread_worker worker; 33 struct task_struct *thread; 34 bool work_in_progress; 35 36 bool limits_changed; 37 bool need_freq_update; 38 }; 39 40 struct sugov_cpu { 41 struct update_util_data update_util; 42 struct sugov_policy *sg_policy; 43 unsigned int cpu; 44 45 bool iowait_boost_pending; 46 unsigned int iowait_boost; 47 u64 last_update; 48 49 unsigned long util; 50 unsigned long bw_min; 51 52 /* The field below is for single-CPU policies only: */ 53 #ifdef CONFIG_NO_HZ_COMMON 54 unsigned long saved_idle_calls; 55 #endif 56 }; 57 58 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); 59 60 /************************ Governor internals ***********************/ 61 62 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) 63 { 64 s64 delta_ns; 65 66 /* 67 * Since cpufreq_update_util() is called with rq->lock held for 68 * the @target_cpu, our per-CPU data is fully serialized. 69 * 70 * However, drivers cannot in general deal with cross-CPU 71 * requests, so while get_next_freq() will work, our 72 * sugov_update_commit() call may not for the fast switching platforms. 73 * 74 * Hence stop here for remote requests if they aren't supported 75 * by the hardware, as calculating the frequency is pointless if 76 * we cannot in fact act on it. 77 * 78 * This is needed on the slow switching platforms too to prevent CPUs 79 * going offline from leaving stale IRQ work items behind. 80 */ 81 if (!cpufreq_this_cpu_can_update(sg_policy->policy)) 82 return false; 83 84 if (unlikely(sg_policy->limits_changed)) { 85 sg_policy->limits_changed = false; 86 sg_policy->need_freq_update = true; 87 return true; 88 } 89 90 delta_ns = time - sg_policy->last_freq_update_time; 91 92 return delta_ns >= sg_policy->freq_update_delay_ns; 93 } 94 95 static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, 96 unsigned int next_freq) 97 { 98 if (sg_policy->need_freq_update) 99 sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); 100 else if (sg_policy->next_freq == next_freq) 101 return false; 102 103 sg_policy->next_freq = next_freq; 104 sg_policy->last_freq_update_time = time; 105 106 return true; 107 } 108 109 static void sugov_deferred_update(struct sugov_policy *sg_policy) 110 { 111 if (!sg_policy->work_in_progress) { 112 sg_policy->work_in_progress = true; 113 irq_work_queue(&sg_policy->irq_work); 114 } 115 } 116 117 /** 118 * get_capacity_ref_freq - get the reference frequency that has been used to 119 * correlate frequency and compute capacity for a given cpufreq policy. We use 120 * the CPU managing it for the arch_scale_freq_ref() call in the function. 121 * @policy: the cpufreq policy of the CPU in question. 122 * 123 * Return: the reference CPU frequency to compute a capacity. 124 */ 125 static __always_inline 126 unsigned long get_capacity_ref_freq(struct cpufreq_policy *policy) 127 { 128 unsigned int freq = arch_scale_freq_ref(policy->cpu); 129 130 if (freq) 131 return freq; 132 133 if (arch_scale_freq_invariant()) 134 return policy->cpuinfo.max_freq; 135 136 /* 137 * Apply a 25% margin so that we select a higher frequency than 138 * the current one before the CPU is fully busy: 139 */ 140 return policy->cur + (policy->cur >> 2); 141 } 142 143 /** 144 * get_next_freq - Compute a new frequency for a given cpufreq policy. 145 * @sg_policy: schedutil policy object to compute the new frequency for. 146 * @util: Current CPU utilization. 147 * @max: CPU capacity. 148 * 149 * If the utilization is frequency-invariant, choose the new frequency to be 150 * proportional to it, that is 151 * 152 * next_freq = C * max_freq * util / max 153 * 154 * Otherwise, approximate the would-be frequency-invariant utilization by 155 * util_raw * (curr_freq / max_freq) which leads to 156 * 157 * next_freq = C * curr_freq * util_raw / max 158 * 159 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. 160 * 161 * The lowest driver-supported frequency which is equal or greater than the raw 162 * next_freq (as calculated above) is returned, subject to policy min/max and 163 * cpufreq driver limitations. 164 */ 165 static unsigned int get_next_freq(struct sugov_policy *sg_policy, 166 unsigned long util, unsigned long max) 167 { 168 struct cpufreq_policy *policy = sg_policy->policy; 169 unsigned int freq; 170 171 freq = get_capacity_ref_freq(policy); 172 freq = map_util_freq(util, freq, max); 173 174 if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update) 175 return sg_policy->next_freq; 176 177 sg_policy->cached_raw_freq = freq; 178 return cpufreq_driver_resolve_freq(policy, freq); 179 } 180 181 unsigned long sugov_effective_cpu_perf(int cpu, unsigned long actual, 182 unsigned long min, 183 unsigned long max) 184 { 185 /* Add dvfs headroom to actual utilization */ 186 actual = map_util_perf(actual); 187 /* Actually we don't need to target the max performance */ 188 if (actual < max) 189 max = actual; 190 191 /* 192 * Ensure at least minimum performance while providing more compute 193 * capacity when possible. 194 */ 195 return max(min, max); 196 } 197 198 static void sugov_get_util(struct sugov_cpu *sg_cpu, unsigned long boost) 199 { 200 unsigned long min, max, util = cpu_util_cfs_boost(sg_cpu->cpu); 201 202 util = effective_cpu_util(sg_cpu->cpu, util, &min, &max); 203 util = max(util, boost); 204 sg_cpu->bw_min = min; 205 sg_cpu->util = sugov_effective_cpu_perf(sg_cpu->cpu, util, min, max); 206 } 207 208 /** 209 * sugov_iowait_reset() - Reset the IO boost status of a CPU. 210 * @sg_cpu: the sugov data for the CPU to boost 211 * @time: the update time from the caller 212 * @set_iowait_boost: true if an IO boost has been requested 213 * 214 * The IO wait boost of a task is disabled after a tick since the last update 215 * of a CPU. If a new IO wait boost is requested after more then a tick, then 216 * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy 217 * efficiency by ignoring sporadic wakeups from IO. 218 */ 219 static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, 220 bool set_iowait_boost) 221 { 222 s64 delta_ns = time - sg_cpu->last_update; 223 224 /* Reset boost only if a tick has elapsed since last request */ 225 if (delta_ns <= TICK_NSEC) 226 return false; 227 228 sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0; 229 sg_cpu->iowait_boost_pending = set_iowait_boost; 230 231 return true; 232 } 233 234 /** 235 * sugov_iowait_boost() - Updates the IO boost status of a CPU. 236 * @sg_cpu: the sugov data for the CPU to boost 237 * @time: the update time from the caller 238 * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait 239 * 240 * Each time a task wakes up after an IO operation, the CPU utilization can be 241 * boosted to a certain utilization which doubles at each "frequent and 242 * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization 243 * of the maximum OPP. 244 * 245 * To keep doubling, an IO boost has to be requested at least once per tick, 246 * otherwise we restart from the utilization of the minimum OPP. 247 */ 248 static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, 249 unsigned int flags) 250 { 251 bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT; 252 253 /* Reset boost if the CPU appears to have been idle enough */ 254 if (sg_cpu->iowait_boost && 255 sugov_iowait_reset(sg_cpu, time, set_iowait_boost)) 256 return; 257 258 /* Boost only tasks waking up after IO */ 259 if (!set_iowait_boost) 260 return; 261 262 /* Ensure boost doubles only one time at each request */ 263 if (sg_cpu->iowait_boost_pending) 264 return; 265 sg_cpu->iowait_boost_pending = true; 266 267 /* Double the boost at each request */ 268 if (sg_cpu->iowait_boost) { 269 sg_cpu->iowait_boost = 270 min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE); 271 return; 272 } 273 274 /* First wakeup after IO: start with minimum boost */ 275 sg_cpu->iowait_boost = IOWAIT_BOOST_MIN; 276 } 277 278 /** 279 * sugov_iowait_apply() - Apply the IO boost to a CPU. 280 * @sg_cpu: the sugov data for the cpu to boost 281 * @time: the update time from the caller 282 * @max_cap: the max CPU capacity 283 * 284 * A CPU running a task which woken up after an IO operation can have its 285 * utilization boosted to speed up the completion of those IO operations. 286 * The IO boost value is increased each time a task wakes up from IO, in 287 * sugov_iowait_apply(), and it's instead decreased by this function, 288 * each time an increase has not been requested (!iowait_boost_pending). 289 * 290 * A CPU which also appears to have been idle for at least one tick has also 291 * its IO boost utilization reset. 292 * 293 * This mechanism is designed to boost high frequently IO waiting tasks, while 294 * being more conservative on tasks which does sporadic IO operations. 295 */ 296 static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, 297 unsigned long max_cap) 298 { 299 /* No boost currently required */ 300 if (!sg_cpu->iowait_boost) 301 return 0; 302 303 /* Reset boost if the CPU appears to have been idle enough */ 304 if (sugov_iowait_reset(sg_cpu, time, false)) 305 return 0; 306 307 if (!sg_cpu->iowait_boost_pending) { 308 /* 309 * No boost pending; reduce the boost value. 310 */ 311 sg_cpu->iowait_boost >>= 1; 312 if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) { 313 sg_cpu->iowait_boost = 0; 314 return 0; 315 } 316 } 317 318 sg_cpu->iowait_boost_pending = false; 319 320 /* 321 * sg_cpu->util is already in capacity scale; convert iowait_boost 322 * into the same scale so we can compare. 323 */ 324 return (sg_cpu->iowait_boost * max_cap) >> SCHED_CAPACITY_SHIFT; 325 } 326 327 #ifdef CONFIG_NO_HZ_COMMON 328 static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) 329 { 330 unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu); 331 bool ret = idle_calls == sg_cpu->saved_idle_calls; 332 333 sg_cpu->saved_idle_calls = idle_calls; 334 return ret; 335 } 336 #else 337 static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } 338 #endif /* CONFIG_NO_HZ_COMMON */ 339 340 /* 341 * Make sugov_should_update_freq() ignore the rate limit when DL 342 * has increased the utilization. 343 */ 344 static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) 345 { 346 if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_min) 347 sg_cpu->sg_policy->limits_changed = true; 348 } 349 350 static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, 351 u64 time, unsigned long max_cap, 352 unsigned int flags) 353 { 354 unsigned long boost; 355 356 sugov_iowait_boost(sg_cpu, time, flags); 357 sg_cpu->last_update = time; 358 359 ignore_dl_rate_limit(sg_cpu); 360 361 if (!sugov_should_update_freq(sg_cpu->sg_policy, time)) 362 return false; 363 364 boost = sugov_iowait_apply(sg_cpu, time, max_cap); 365 sugov_get_util(sg_cpu, boost); 366 367 return true; 368 } 369 370 static void sugov_update_single_freq(struct update_util_data *hook, u64 time, 371 unsigned int flags) 372 { 373 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 374 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 375 unsigned int cached_freq = sg_policy->cached_raw_freq; 376 unsigned long max_cap; 377 unsigned int next_f; 378 379 max_cap = arch_scale_cpu_capacity(sg_cpu->cpu); 380 381 if (!sugov_update_single_common(sg_cpu, time, max_cap, flags)) 382 return; 383 384 next_f = get_next_freq(sg_policy, sg_cpu->util, max_cap); 385 /* 386 * Do not reduce the frequency if the CPU has not been idle 387 * recently, as the reduction is likely to be premature then. 388 * 389 * Except when the rq is capped by uclamp_max. 390 */ 391 if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && 392 sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq && 393 !sg_policy->need_freq_update) { 394 next_f = sg_policy->next_freq; 395 396 /* Restore cached freq as next_freq has changed */ 397 sg_policy->cached_raw_freq = cached_freq; 398 } 399 400 if (!sugov_update_next_freq(sg_policy, time, next_f)) 401 return; 402 403 /* 404 * This code runs under rq->lock for the target CPU, so it won't run 405 * concurrently on two different CPUs for the same target and it is not 406 * necessary to acquire the lock in the fast switch case. 407 */ 408 if (sg_policy->policy->fast_switch_enabled) { 409 cpufreq_driver_fast_switch(sg_policy->policy, next_f); 410 } else { 411 raw_spin_lock(&sg_policy->update_lock); 412 sugov_deferred_update(sg_policy); 413 raw_spin_unlock(&sg_policy->update_lock); 414 } 415 } 416 417 static void sugov_update_single_perf(struct update_util_data *hook, u64 time, 418 unsigned int flags) 419 { 420 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 421 unsigned long prev_util = sg_cpu->util; 422 unsigned long max_cap; 423 424 /* 425 * Fall back to the "frequency" path if frequency invariance is not 426 * supported, because the direct mapping between the utilization and 427 * the performance levels depends on the frequency invariance. 428 */ 429 if (!arch_scale_freq_invariant()) { 430 sugov_update_single_freq(hook, time, flags); 431 return; 432 } 433 434 max_cap = arch_scale_cpu_capacity(sg_cpu->cpu); 435 436 if (!sugov_update_single_common(sg_cpu, time, max_cap, flags)) 437 return; 438 439 /* 440 * Do not reduce the target performance level if the CPU has not been 441 * idle recently, as the reduction is likely to be premature then. 442 * 443 * Except when the rq is capped by uclamp_max. 444 */ 445 if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && 446 sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util) 447 sg_cpu->util = prev_util; 448 449 cpufreq_driver_adjust_perf(sg_cpu->cpu, sg_cpu->bw_min, 450 sg_cpu->util, max_cap); 451 452 sg_cpu->sg_policy->last_freq_update_time = time; 453 } 454 455 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) 456 { 457 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 458 struct cpufreq_policy *policy = sg_policy->policy; 459 unsigned long util = 0, max_cap; 460 unsigned int j; 461 462 max_cap = arch_scale_cpu_capacity(sg_cpu->cpu); 463 464 for_each_cpu(j, policy->cpus) { 465 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); 466 unsigned long boost; 467 468 boost = sugov_iowait_apply(j_sg_cpu, time, max_cap); 469 sugov_get_util(j_sg_cpu, boost); 470 471 util = max(j_sg_cpu->util, util); 472 } 473 474 return get_next_freq(sg_policy, util, max_cap); 475 } 476 477 static void 478 sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) 479 { 480 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 481 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 482 unsigned int next_f; 483 484 raw_spin_lock(&sg_policy->update_lock); 485 486 sugov_iowait_boost(sg_cpu, time, flags); 487 sg_cpu->last_update = time; 488 489 ignore_dl_rate_limit(sg_cpu); 490 491 if (sugov_should_update_freq(sg_policy, time)) { 492 next_f = sugov_next_freq_shared(sg_cpu, time); 493 494 if (!sugov_update_next_freq(sg_policy, time, next_f)) 495 goto unlock; 496 497 if (sg_policy->policy->fast_switch_enabled) 498 cpufreq_driver_fast_switch(sg_policy->policy, next_f); 499 else 500 sugov_deferred_update(sg_policy); 501 } 502 unlock: 503 raw_spin_unlock(&sg_policy->update_lock); 504 } 505 506 static void sugov_work(struct kthread_work *work) 507 { 508 struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); 509 unsigned int freq; 510 unsigned long flags; 511 512 /* 513 * Hold sg_policy->update_lock shortly to handle the case where: 514 * in case sg_policy->next_freq is read here, and then updated by 515 * sugov_deferred_update() just before work_in_progress is set to false 516 * here, we may miss queueing the new update. 517 * 518 * Note: If a work was queued after the update_lock is released, 519 * sugov_work() will just be called again by kthread_work code; and the 520 * request will be proceed before the sugov thread sleeps. 521 */ 522 raw_spin_lock_irqsave(&sg_policy->update_lock, flags); 523 freq = sg_policy->next_freq; 524 sg_policy->work_in_progress = false; 525 raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags); 526 527 mutex_lock(&sg_policy->work_lock); 528 __cpufreq_driver_target(sg_policy->policy, freq, CPUFREQ_RELATION_L); 529 mutex_unlock(&sg_policy->work_lock); 530 } 531 532 static void sugov_irq_work(struct irq_work *irq_work) 533 { 534 struct sugov_policy *sg_policy; 535 536 sg_policy = container_of(irq_work, struct sugov_policy, irq_work); 537 538 kthread_queue_work(&sg_policy->worker, &sg_policy->work); 539 } 540 541 /************************** sysfs interface ************************/ 542 543 static struct sugov_tunables *global_tunables; 544 static DEFINE_MUTEX(global_tunables_lock); 545 546 static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) 547 { 548 return container_of(attr_set, struct sugov_tunables, attr_set); 549 } 550 551 static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) 552 { 553 struct sugov_tunables *tunables = to_sugov_tunables(attr_set); 554 555 return sprintf(buf, "%u\n", tunables->rate_limit_us); 556 } 557 558 static ssize_t 559 rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count) 560 { 561 struct sugov_tunables *tunables = to_sugov_tunables(attr_set); 562 struct sugov_policy *sg_policy; 563 unsigned int rate_limit_us; 564 565 if (kstrtouint(buf, 10, &rate_limit_us)) 566 return -EINVAL; 567 568 tunables->rate_limit_us = rate_limit_us; 569 570 list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) 571 sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; 572 573 return count; 574 } 575 576 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); 577 578 static struct attribute *sugov_attrs[] = { 579 &rate_limit_us.attr, 580 NULL 581 }; 582 ATTRIBUTE_GROUPS(sugov); 583 584 static void sugov_tunables_free(struct kobject *kobj) 585 { 586 struct gov_attr_set *attr_set = to_gov_attr_set(kobj); 587 588 kfree(to_sugov_tunables(attr_set)); 589 } 590 591 static const struct kobj_type sugov_tunables_ktype = { 592 .default_groups = sugov_groups, 593 .sysfs_ops = &governor_sysfs_ops, 594 .release = &sugov_tunables_free, 595 }; 596 597 /********************** cpufreq governor interface *********************/ 598 599 #ifdef CONFIG_ENERGY_MODEL 600 static void rebuild_sd_workfn(struct work_struct *work) 601 { 602 rebuild_sched_domains_energy(); 603 } 604 605 static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); 606 607 /* 608 * EAS shouldn't be attempted without sugov, so rebuild the sched_domains 609 * on governor changes to make sure the scheduler knows about it. 610 */ 611 static void sugov_eas_rebuild_sd(void) 612 { 613 /* 614 * When called from the cpufreq_register_driver() path, the 615 * cpu_hotplug_lock is already held, so use a work item to 616 * avoid nested locking in rebuild_sched_domains(). 617 */ 618 schedule_work(&rebuild_sd_work); 619 } 620 #else 621 static inline void sugov_eas_rebuild_sd(void) { }; 622 #endif 623 624 struct cpufreq_governor schedutil_gov; 625 626 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) 627 { 628 struct sugov_policy *sg_policy; 629 630 sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); 631 if (!sg_policy) 632 return NULL; 633 634 sg_policy->policy = policy; 635 raw_spin_lock_init(&sg_policy->update_lock); 636 return sg_policy; 637 } 638 639 static void sugov_policy_free(struct sugov_policy *sg_policy) 640 { 641 kfree(sg_policy); 642 } 643 644 static int sugov_kthread_create(struct sugov_policy *sg_policy) 645 { 646 struct task_struct *thread; 647 struct sched_attr attr = { 648 .size = sizeof(struct sched_attr), 649 .sched_policy = SCHED_DEADLINE, 650 .sched_flags = SCHED_FLAG_SUGOV, 651 .sched_nice = 0, 652 .sched_priority = 0, 653 /* 654 * Fake (unused) bandwidth; workaround to "fix" 655 * priority inheritance. 656 */ 657 .sched_runtime = 1000000, 658 .sched_deadline = 10000000, 659 .sched_period = 10000000, 660 }; 661 struct cpufreq_policy *policy = sg_policy->policy; 662 int ret; 663 664 /* kthread only required for slow path */ 665 if (policy->fast_switch_enabled) 666 return 0; 667 668 kthread_init_work(&sg_policy->work, sugov_work); 669 kthread_init_worker(&sg_policy->worker); 670 thread = kthread_create(kthread_worker_fn, &sg_policy->worker, 671 "sugov:%d", 672 cpumask_first(policy->related_cpus)); 673 if (IS_ERR(thread)) { 674 pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread)); 675 return PTR_ERR(thread); 676 } 677 678 ret = sched_setattr_nocheck(thread, &attr); 679 if (ret) { 680 kthread_stop(thread); 681 pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); 682 return ret; 683 } 684 685 sg_policy->thread = thread; 686 kthread_bind_mask(thread, policy->related_cpus); 687 init_irq_work(&sg_policy->irq_work, sugov_irq_work); 688 mutex_init(&sg_policy->work_lock); 689 690 wake_up_process(thread); 691 692 return 0; 693 } 694 695 static void sugov_kthread_stop(struct sugov_policy *sg_policy) 696 { 697 /* kthread only required for slow path */ 698 if (sg_policy->policy->fast_switch_enabled) 699 return; 700 701 kthread_flush_worker(&sg_policy->worker); 702 kthread_stop(sg_policy->thread); 703 mutex_destroy(&sg_policy->work_lock); 704 } 705 706 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) 707 { 708 struct sugov_tunables *tunables; 709 710 tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); 711 if (tunables) { 712 gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); 713 if (!have_governor_per_policy()) 714 global_tunables = tunables; 715 } 716 return tunables; 717 } 718 719 static void sugov_clear_global_tunables(void) 720 { 721 if (!have_governor_per_policy()) 722 global_tunables = NULL; 723 } 724 725 static int sugov_init(struct cpufreq_policy *policy) 726 { 727 struct sugov_policy *sg_policy; 728 struct sugov_tunables *tunables; 729 int ret = 0; 730 731 /* State should be equivalent to EXIT */ 732 if (policy->governor_data) 733 return -EBUSY; 734 735 cpufreq_enable_fast_switch(policy); 736 737 sg_policy = sugov_policy_alloc(policy); 738 if (!sg_policy) { 739 ret = -ENOMEM; 740 goto disable_fast_switch; 741 } 742 743 ret = sugov_kthread_create(sg_policy); 744 if (ret) 745 goto free_sg_policy; 746 747 mutex_lock(&global_tunables_lock); 748 749 if (global_tunables) { 750 if (WARN_ON(have_governor_per_policy())) { 751 ret = -EINVAL; 752 goto stop_kthread; 753 } 754 policy->governor_data = sg_policy; 755 sg_policy->tunables = global_tunables; 756 757 gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); 758 goto out; 759 } 760 761 tunables = sugov_tunables_alloc(sg_policy); 762 if (!tunables) { 763 ret = -ENOMEM; 764 goto stop_kthread; 765 } 766 767 tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy); 768 769 policy->governor_data = sg_policy; 770 sg_policy->tunables = tunables; 771 772 ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, 773 get_governor_parent_kobj(policy), "%s", 774 schedutil_gov.name); 775 if (ret) 776 goto fail; 777 778 sugov_eas_rebuild_sd(); 779 780 out: 781 mutex_unlock(&global_tunables_lock); 782 return 0; 783 784 fail: 785 kobject_put(&tunables->attr_set.kobj); 786 policy->governor_data = NULL; 787 sugov_clear_global_tunables(); 788 789 stop_kthread: 790 sugov_kthread_stop(sg_policy); 791 mutex_unlock(&global_tunables_lock); 792 793 free_sg_policy: 794 sugov_policy_free(sg_policy); 795 796 disable_fast_switch: 797 cpufreq_disable_fast_switch(policy); 798 799 pr_err("initialization failed (error %d)\n", ret); 800 return ret; 801 } 802 803 static void sugov_exit(struct cpufreq_policy *policy) 804 { 805 struct sugov_policy *sg_policy = policy->governor_data; 806 struct sugov_tunables *tunables = sg_policy->tunables; 807 unsigned int count; 808 809 mutex_lock(&global_tunables_lock); 810 811 count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); 812 policy->governor_data = NULL; 813 if (!count) 814 sugov_clear_global_tunables(); 815 816 mutex_unlock(&global_tunables_lock); 817 818 sugov_kthread_stop(sg_policy); 819 sugov_policy_free(sg_policy); 820 cpufreq_disable_fast_switch(policy); 821 822 sugov_eas_rebuild_sd(); 823 } 824 825 static int sugov_start(struct cpufreq_policy *policy) 826 { 827 struct sugov_policy *sg_policy = policy->governor_data; 828 void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); 829 unsigned int cpu; 830 831 sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; 832 sg_policy->last_freq_update_time = 0; 833 sg_policy->next_freq = 0; 834 sg_policy->work_in_progress = false; 835 sg_policy->limits_changed = false; 836 sg_policy->cached_raw_freq = 0; 837 838 sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); 839 840 if (policy_is_shared(policy)) 841 uu = sugov_update_shared; 842 else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf()) 843 uu = sugov_update_single_perf; 844 else 845 uu = sugov_update_single_freq; 846 847 for_each_cpu(cpu, policy->cpus) { 848 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); 849 850 memset(sg_cpu, 0, sizeof(*sg_cpu)); 851 sg_cpu->cpu = cpu; 852 sg_cpu->sg_policy = sg_policy; 853 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, uu); 854 } 855 return 0; 856 } 857 858 static void sugov_stop(struct cpufreq_policy *policy) 859 { 860 struct sugov_policy *sg_policy = policy->governor_data; 861 unsigned int cpu; 862 863 for_each_cpu(cpu, policy->cpus) 864 cpufreq_remove_update_util_hook(cpu); 865 866 synchronize_rcu(); 867 868 if (!policy->fast_switch_enabled) { 869 irq_work_sync(&sg_policy->irq_work); 870 kthread_cancel_work_sync(&sg_policy->work); 871 } 872 } 873 874 static void sugov_limits(struct cpufreq_policy *policy) 875 { 876 struct sugov_policy *sg_policy = policy->governor_data; 877 878 if (!policy->fast_switch_enabled) { 879 mutex_lock(&sg_policy->work_lock); 880 cpufreq_policy_apply_limits(policy); 881 mutex_unlock(&sg_policy->work_lock); 882 } 883 884 sg_policy->limits_changed = true; 885 } 886 887 struct cpufreq_governor schedutil_gov = { 888 .name = "schedutil", 889 .owner = THIS_MODULE, 890 .flags = CPUFREQ_GOV_DYNAMIC_SWITCHING, 891 .init = sugov_init, 892 .exit = sugov_exit, 893 .start = sugov_start, 894 .stop = sugov_stop, 895 .limits = sugov_limits, 896 }; 897 898 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL 899 struct cpufreq_governor *cpufreq_default_governor(void) 900 { 901 return &schedutil_gov; 902 } 903 #endif 904 905 cpufreq_governor_init(schedutil_gov); 906

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

TOMOYO Linux Cross Reference Linux/kernel/sched/cpufreq_schedutil.c

TOMOYO Linux Cross Reference
Linux/kernel/sched/cpufreq_schedutil.c