1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This file contains functions which emulate a local clock-event 4 * device via a broadcast event source. 5 * 6 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 7 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 8 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner 9 */ 10 #include <linux/cpu.h> 11 #include <linux/err.h> 12 #include <linux/hrtimer.h> 13 #include <linux/interrupt.h> 14 #include <linux/percpu.h> 15 #include <linux/profile.h> 16 #include <linux/sched.h> 17 #include <linux/smp.h> 18 #include <linux/module.h> 19 20 #include "tick-internal.h" 21 22 /* 23 * Broadcast support for broken x86 hardware, where the local apic 24 * timer stops in C3 state. 25 */ 26 27 static struct tick_device tick_broadcast_device; 28 static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly; 29 static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly; 30 static cpumask_var_t tmpmask __cpumask_var_read_mostly; 31 static int tick_broadcast_forced; 32 33 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); 34 35 #ifdef CONFIG_TICK_ONESHOT 36 static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device); 37 38 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic); 39 static void tick_broadcast_clear_oneshot(int cpu); 40 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); 41 # ifdef CONFIG_HOTPLUG_CPU 42 static void tick_broadcast_oneshot_offline(unsigned int cpu); 43 # endif 44 #else 45 static inline void 46 tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic) { BUG(); } 47 static inline void tick_broadcast_clear_oneshot(int cpu) { } 48 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } 49 # ifdef CONFIG_HOTPLUG_CPU 50 static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { } 51 # endif 52 #endif 53 54 /* 55 * Debugging: see timer_list.c 56 */ 57 struct tick_device *tick_get_broadcast_device(void) 58 { 59 return &tick_broadcast_device; 60 } 61 62 struct cpumask *tick_get_broadcast_mask(void) 63 { 64 return tick_broadcast_mask; 65 } 66 67 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu); 68 69 const struct clock_event_device *tick_get_wakeup_device(int cpu) 70 { 71 return tick_get_oneshot_wakeup_device(cpu); 72 } 73 74 /* 75 * Start the device in periodic mode 76 */ 77 static void tick_broadcast_start_periodic(struct clock_event_device *bc) 78 { 79 if (bc) 80 tick_setup_periodic(bc, 1); 81 } 82 83 /* 84 * Check, if the device can be utilized as broadcast device: 85 */ 86 static bool tick_check_broadcast_device(struct clock_event_device *curdev, 87 struct clock_event_device *newdev) 88 { 89 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || 90 (newdev->features & CLOCK_EVT_FEAT_PERCPU) || 91 (newdev->features & CLOCK_EVT_FEAT_C3STOP)) 92 return false; 93 94 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && 95 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) 96 return false; 97 98 return !curdev || newdev->rating > curdev->rating; 99 } 100 101 #ifdef CONFIG_TICK_ONESHOT 102 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) 103 { 104 return per_cpu(tick_oneshot_wakeup_device, cpu); 105 } 106 107 static void tick_oneshot_wakeup_handler(struct clock_event_device *wd) 108 { 109 /* 110 * If we woke up early and the tick was reprogrammed in the 111 * meantime then this may be spurious but harmless. 112 */ 113 tick_receive_broadcast(); 114 } 115 116 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, 117 int cpu) 118 { 119 struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu); 120 121 if (!newdev) 122 goto set_device; 123 124 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || 125 (newdev->features & CLOCK_EVT_FEAT_C3STOP)) 126 return false; 127 128 if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) || 129 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) 130 return false; 131 132 if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) 133 return false; 134 135 if (curdev && newdev->rating <= curdev->rating) 136 return false; 137 138 if (!try_module_get(newdev->owner)) 139 return false; 140 141 newdev->event_handler = tick_oneshot_wakeup_handler; 142 set_device: 143 clockevents_exchange_device(curdev, newdev); 144 per_cpu(tick_oneshot_wakeup_device, cpu) = newdev; 145 return true; 146 } 147 #else 148 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) 149 { 150 return NULL; 151 } 152 153 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, 154 int cpu) 155 { 156 return false; 157 } 158 #endif 159 160 /* 161 * Conditionally install/replace broadcast device 162 */ 163 void tick_install_broadcast_device(struct clock_event_device *dev, int cpu) 164 { 165 struct clock_event_device *cur = tick_broadcast_device.evtdev; 166 167 if (tick_set_oneshot_wakeup_device(dev, cpu)) 168 return; 169 170 if (!tick_check_broadcast_device(cur, dev)) 171 return; 172 173 if (!try_module_get(dev->owner)) 174 return; 175 176 clockevents_exchange_device(cur, dev); 177 if (cur) 178 cur->event_handler = clockevents_handle_noop; 179 tick_broadcast_device.evtdev = dev; 180 if (!cpumask_empty(tick_broadcast_mask)) 181 tick_broadcast_start_periodic(dev); 182 183 if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) 184 return; 185 186 /* 187 * If the system already runs in oneshot mode, switch the newly 188 * registered broadcast device to oneshot mode explicitly. 189 */ 190 if (tick_broadcast_oneshot_active()) { 191 tick_broadcast_switch_to_oneshot(); 192 return; 193 } 194 195 /* 196 * Inform all cpus about this. We might be in a situation 197 * where we did not switch to oneshot mode because the per cpu 198 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack 199 * of a oneshot capable broadcast device. Without that 200 * notification the systems stays stuck in periodic mode 201 * forever. 202 */ 203 tick_clock_notify(); 204 } 205 206 /* 207 * Check, if the device is the broadcast device 208 */ 209 int tick_is_broadcast_device(struct clock_event_device *dev) 210 { 211 return (dev && tick_broadcast_device.evtdev == dev); 212 } 213 214 int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) 215 { 216 int ret = -ENODEV; 217 218 if (tick_is_broadcast_device(dev)) { 219 raw_spin_lock(&tick_broadcast_lock); 220 ret = __clockevents_update_freq(dev, freq); 221 raw_spin_unlock(&tick_broadcast_lock); 222 } 223 return ret; 224 } 225 226 227 static void err_broadcast(const struct cpumask *mask) 228 { 229 pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); 230 } 231 232 static void tick_device_setup_broadcast_func(struct clock_event_device *dev) 233 { 234 if (!dev->broadcast) 235 dev->broadcast = tick_broadcast; 236 if (!dev->broadcast) { 237 pr_warn_once("%s depends on broadcast, but no broadcast function available\n", 238 dev->name); 239 dev->broadcast = err_broadcast; 240 } 241 } 242 243 /* 244 * Check, if the device is dysfunctional and a placeholder, which 245 * needs to be handled by the broadcast device. 246 */ 247 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) 248 { 249 struct clock_event_device *bc = tick_broadcast_device.evtdev; 250 unsigned long flags; 251 int ret = 0; 252 253 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 254 255 /* 256 * Devices might be registered with both periodic and oneshot 257 * mode disabled. This signals, that the device needs to be 258 * operated from the broadcast device and is a placeholder for 259 * the cpu local device. 260 */ 261 if (!tick_device_is_functional(dev)) { 262 dev->event_handler = tick_handle_periodic; 263 tick_device_setup_broadcast_func(dev); 264 cpumask_set_cpu(cpu, tick_broadcast_mask); 265 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 266 tick_broadcast_start_periodic(bc); 267 else 268 tick_broadcast_setup_oneshot(bc, false); 269 ret = 1; 270 } else { 271 /* 272 * Clear the broadcast bit for this cpu if the 273 * device is not power state affected. 274 */ 275 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) 276 cpumask_clear_cpu(cpu, tick_broadcast_mask); 277 else 278 tick_device_setup_broadcast_func(dev); 279 280 /* 281 * Clear the broadcast bit if the CPU is not in 282 * periodic broadcast on state. 283 */ 284 if (!cpumask_test_cpu(cpu, tick_broadcast_on)) 285 cpumask_clear_cpu(cpu, tick_broadcast_mask); 286 287 switch (tick_broadcast_device.mode) { 288 case TICKDEV_MODE_ONESHOT: 289 /* 290 * If the system is in oneshot mode we can 291 * unconditionally clear the oneshot mask bit, 292 * because the CPU is running and therefore 293 * not in an idle state which causes the power 294 * state affected device to stop. Let the 295 * caller initialize the device. 296 */ 297 tick_broadcast_clear_oneshot(cpu); 298 ret = 0; 299 break; 300 301 case TICKDEV_MODE_PERIODIC: 302 /* 303 * If the system is in periodic mode, check 304 * whether the broadcast device can be 305 * switched off now. 306 */ 307 if (cpumask_empty(tick_broadcast_mask) && bc) 308 clockevents_shutdown(bc); 309 /* 310 * If we kept the cpu in the broadcast mask, 311 * tell the caller to leave the per cpu device 312 * in shutdown state. The periodic interrupt 313 * is delivered by the broadcast device, if 314 * the broadcast device exists and is not 315 * hrtimer based. 316 */ 317 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) 318 ret = cpumask_test_cpu(cpu, tick_broadcast_mask); 319 break; 320 default: 321 break; 322 } 323 } 324 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 325 return ret; 326 } 327 328 int tick_receive_broadcast(void) 329 { 330 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 331 struct clock_event_device *evt = td->evtdev; 332 333 if (!evt) 334 return -ENODEV; 335 336 if (!evt->event_handler) 337 return -EINVAL; 338 339 evt->event_handler(evt); 340 return 0; 341 } 342 343 /* 344 * Broadcast the event to the cpus, which are set in the mask (mangled). 345 */ 346 static bool tick_do_broadcast(struct cpumask *mask) 347 { 348 int cpu = smp_processor_id(); 349 struct tick_device *td; 350 bool local = false; 351 352 /* 353 * Check, if the current cpu is in the mask 354 */ 355 if (cpumask_test_cpu(cpu, mask)) { 356 struct clock_event_device *bc = tick_broadcast_device.evtdev; 357 358 cpumask_clear_cpu(cpu, mask); 359 /* 360 * We only run the local handler, if the broadcast 361 * device is not hrtimer based. Otherwise we run into 362 * a hrtimer recursion. 363 * 364 * local timer_interrupt() 365 * local_handler() 366 * expire_hrtimers() 367 * bc_handler() 368 * local_handler() 369 * expire_hrtimers() 370 */ 371 local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); 372 } 373 374 if (!cpumask_empty(mask)) { 375 /* 376 * It might be necessary to actually check whether the devices 377 * have different broadcast functions. For now, just use the 378 * one of the first device. This works as long as we have this 379 * misfeature only on x86 (lapic) 380 */ 381 td = &per_cpu(tick_cpu_device, cpumask_first(mask)); 382 td->evtdev->broadcast(mask); 383 } 384 return local; 385 } 386 387 /* 388 * Periodic broadcast: 389 * - invoke the broadcast handlers 390 */ 391 static bool tick_do_periodic_broadcast(void) 392 { 393 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); 394 return tick_do_broadcast(tmpmask); 395 } 396 397 /* 398 * Event handler for periodic broadcast ticks 399 */ 400 static void tick_handle_periodic_broadcast(struct clock_event_device *dev) 401 { 402 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 403 bool bc_local; 404 405 raw_spin_lock(&tick_broadcast_lock); 406 407 /* Handle spurious interrupts gracefully */ 408 if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { 409 raw_spin_unlock(&tick_broadcast_lock); 410 return; 411 } 412 413 bc_local = tick_do_periodic_broadcast(); 414 415 if (clockevent_state_oneshot(dev)) { 416 ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC); 417 418 clockevents_program_event(dev, next, true); 419 } 420 raw_spin_unlock(&tick_broadcast_lock); 421 422 /* 423 * We run the handler of the local cpu after dropping 424 * tick_broadcast_lock because the handler might deadlock when 425 * trying to switch to oneshot mode. 426 */ 427 if (bc_local) 428 td->evtdev->event_handler(td->evtdev); 429 } 430 431 /** 432 * tick_broadcast_control - Enable/disable or force broadcast mode 433 * @mode: The selected broadcast mode 434 * 435 * Called when the system enters a state where affected tick devices 436 * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. 437 */ 438 void tick_broadcast_control(enum tick_broadcast_mode mode) 439 { 440 struct clock_event_device *bc, *dev; 441 struct tick_device *td; 442 int cpu, bc_stopped; 443 unsigned long flags; 444 445 /* Protects also the local clockevent device. */ 446 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 447 td = this_cpu_ptr(&tick_cpu_device); 448 dev = td->evtdev; 449 450 /* 451 * Is the device not affected by the powerstate ? 452 */ 453 if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) 454 goto out; 455 456 if (!tick_device_is_functional(dev)) 457 goto out; 458 459 cpu = smp_processor_id(); 460 bc = tick_broadcast_device.evtdev; 461 bc_stopped = cpumask_empty(tick_broadcast_mask); 462 463 switch (mode) { 464 case TICK_BROADCAST_FORCE: 465 tick_broadcast_forced = 1; 466 fallthrough; 467 case TICK_BROADCAST_ON: 468 cpumask_set_cpu(cpu, tick_broadcast_on); 469 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { 470 /* 471 * Only shutdown the cpu local device, if: 472 * 473 * - the broadcast device exists 474 * - the broadcast device is not a hrtimer based one 475 * - the broadcast device is in periodic mode to 476 * avoid a hiccup during switch to oneshot mode 477 */ 478 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && 479 tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 480 clockevents_shutdown(dev); 481 } 482 break; 483 484 case TICK_BROADCAST_OFF: 485 if (tick_broadcast_forced) 486 break; 487 cpumask_clear_cpu(cpu, tick_broadcast_on); 488 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { 489 if (tick_broadcast_device.mode == 490 TICKDEV_MODE_PERIODIC) 491 tick_setup_periodic(dev, 0); 492 } 493 break; 494 } 495 496 if (bc) { 497 if (cpumask_empty(tick_broadcast_mask)) { 498 if (!bc_stopped) 499 clockevents_shutdown(bc); 500 } else if (bc_stopped) { 501 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 502 tick_broadcast_start_periodic(bc); 503 else 504 tick_broadcast_setup_oneshot(bc, false); 505 } 506 } 507 out: 508 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 509 } 510 EXPORT_SYMBOL_GPL(tick_broadcast_control); 511 512 /* 513 * Set the periodic handler depending on broadcast on/off 514 */ 515 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) 516 { 517 if (!broadcast) 518 dev->event_handler = tick_handle_periodic; 519 else 520 dev->event_handler = tick_handle_periodic_broadcast; 521 } 522 523 #ifdef CONFIG_HOTPLUG_CPU 524 static void tick_shutdown_broadcast(void) 525 { 526 struct clock_event_device *bc = tick_broadcast_device.evtdev; 527 528 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 529 if (bc && cpumask_empty(tick_broadcast_mask)) 530 clockevents_shutdown(bc); 531 } 532 } 533 534 /* 535 * Remove a CPU from broadcasting 536 */ 537 void tick_broadcast_offline(unsigned int cpu) 538 { 539 raw_spin_lock(&tick_broadcast_lock); 540 cpumask_clear_cpu(cpu, tick_broadcast_mask); 541 cpumask_clear_cpu(cpu, tick_broadcast_on); 542 tick_broadcast_oneshot_offline(cpu); 543 tick_shutdown_broadcast(); 544 raw_spin_unlock(&tick_broadcast_lock); 545 } 546 547 #endif 548 549 void tick_suspend_broadcast(void) 550 { 551 struct clock_event_device *bc; 552 unsigned long flags; 553 554 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 555 556 bc = tick_broadcast_device.evtdev; 557 if (bc) 558 clockevents_shutdown(bc); 559 560 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 561 } 562 563 /* 564 * This is called from tick_resume_local() on a resuming CPU. That's 565 * called from the core resume function, tick_unfreeze() and the magic XEN 566 * resume hackery. 567 * 568 * In none of these cases the broadcast device mode can change and the 569 * bit of the resuming CPU in the broadcast mask is safe as well. 570 */ 571 bool tick_resume_check_broadcast(void) 572 { 573 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) 574 return false; 575 else 576 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask); 577 } 578 579 void tick_resume_broadcast(void) 580 { 581 struct clock_event_device *bc; 582 unsigned long flags; 583 584 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 585 586 bc = tick_broadcast_device.evtdev; 587 588 if (bc) { 589 clockevents_tick_resume(bc); 590 591 switch (tick_broadcast_device.mode) { 592 case TICKDEV_MODE_PERIODIC: 593 if (!cpumask_empty(tick_broadcast_mask)) 594 tick_broadcast_start_periodic(bc); 595 break; 596 case TICKDEV_MODE_ONESHOT: 597 if (!cpumask_empty(tick_broadcast_mask)) 598 tick_resume_broadcast_oneshot(bc); 599 break; 600 } 601 } 602 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 603 } 604 605 #ifdef CONFIG_TICK_ONESHOT 606 607 static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly; 608 static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly; 609 static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly; 610 611 /* 612 * Exposed for debugging: see timer_list.c 613 */ 614 struct cpumask *tick_get_broadcast_oneshot_mask(void) 615 { 616 return tick_broadcast_oneshot_mask; 617 } 618 619 /* 620 * Called before going idle with interrupts disabled. Checks whether a 621 * broadcast event from the other core is about to happen. We detected 622 * that in tick_broadcast_oneshot_control(). The callsite can use this 623 * to avoid a deep idle transition as we are about to get the 624 * broadcast IPI right away. 625 */ 626 noinstr int tick_check_broadcast_expired(void) 627 { 628 #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H 629 return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask)); 630 #else 631 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask); 632 #endif 633 } 634 635 /* 636 * Set broadcast interrupt affinity 637 */ 638 static void tick_broadcast_set_affinity(struct clock_event_device *bc, 639 const struct cpumask *cpumask) 640 { 641 if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ)) 642 return; 643 644 if (cpumask_equal(bc->cpumask, cpumask)) 645 return; 646 647 bc->cpumask = cpumask; 648 irq_set_affinity(bc->irq, bc->cpumask); 649 } 650 651 static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu, 652 ktime_t expires) 653 { 654 if (!clockevent_state_oneshot(bc)) 655 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 656 657 clockevents_program_event(bc, expires, 1); 658 tick_broadcast_set_affinity(bc, cpumask_of(cpu)); 659 } 660 661 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) 662 { 663 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 664 } 665 666 /* 667 * Called from irq_enter() when idle was interrupted to reenable the 668 * per cpu device. 669 */ 670 void tick_check_oneshot_broadcast_this_cpu(void) 671 { 672 if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { 673 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 674 675 /* 676 * We might be in the middle of switching over from 677 * periodic to oneshot. If the CPU has not yet 678 * switched over, leave the device alone. 679 */ 680 if (td->mode == TICKDEV_MODE_ONESHOT) { 681 clockevents_switch_state(td->evtdev, 682 CLOCK_EVT_STATE_ONESHOT); 683 } 684 } 685 } 686 687 /* 688 * Handle oneshot mode broadcasting 689 */ 690 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) 691 { 692 struct tick_device *td; 693 ktime_t now, next_event; 694 int cpu, next_cpu = 0; 695 bool bc_local; 696 697 raw_spin_lock(&tick_broadcast_lock); 698 dev->next_event = KTIME_MAX; 699 next_event = KTIME_MAX; 700 cpumask_clear(tmpmask); 701 now = ktime_get(); 702 /* Find all expired events */ 703 for_each_cpu(cpu, tick_broadcast_oneshot_mask) { 704 /* 705 * Required for !SMP because for_each_cpu() reports 706 * unconditionally CPU0 as set on UP kernels. 707 */ 708 if (!IS_ENABLED(CONFIG_SMP) && 709 cpumask_empty(tick_broadcast_oneshot_mask)) 710 break; 711 712 td = &per_cpu(tick_cpu_device, cpu); 713 if (td->evtdev->next_event <= now) { 714 cpumask_set_cpu(cpu, tmpmask); 715 /* 716 * Mark the remote cpu in the pending mask, so 717 * it can avoid reprogramming the cpu local 718 * timer in tick_broadcast_oneshot_control(). 719 */ 720 cpumask_set_cpu(cpu, tick_broadcast_pending_mask); 721 } else if (td->evtdev->next_event < next_event) { 722 next_event = td->evtdev->next_event; 723 next_cpu = cpu; 724 } 725 } 726 727 /* 728 * Remove the current cpu from the pending mask. The event is 729 * delivered immediately in tick_do_broadcast() ! 730 */ 731 cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); 732 733 /* Take care of enforced broadcast requests */ 734 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); 735 cpumask_clear(tick_broadcast_force_mask); 736 737 /* 738 * Sanity check. Catch the case where we try to broadcast to 739 * offline cpus. 740 */ 741 if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) 742 cpumask_and(tmpmask, tmpmask, cpu_online_mask); 743 744 /* 745 * Wakeup the cpus which have an expired event. 746 */ 747 bc_local = tick_do_broadcast(tmpmask); 748 749 /* 750 * Two reasons for reprogram: 751 * 752 * - The global event did not expire any CPU local 753 * events. This happens in dyntick mode, as the maximum PIT 754 * delta is quite small. 755 * 756 * - There are pending events on sleeping CPUs which were not 757 * in the event mask 758 */ 759 if (next_event != KTIME_MAX) 760 tick_broadcast_set_event(dev, next_cpu, next_event); 761 762 raw_spin_unlock(&tick_broadcast_lock); 763 764 if (bc_local) { 765 td = this_cpu_ptr(&tick_cpu_device); 766 td->evtdev->event_handler(td->evtdev); 767 } 768 } 769 770 static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) 771 { 772 if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER)) 773 return 0; 774 if (bc->next_event == KTIME_MAX) 775 return 0; 776 return bc->bound_on == cpu ? -EBUSY : 0; 777 } 778 779 static void broadcast_shutdown_local(struct clock_event_device *bc, 780 struct clock_event_device *dev) 781 { 782 /* 783 * For hrtimer based broadcasting we cannot shutdown the cpu 784 * local device if our own event is the first one to expire or 785 * if we own the broadcast timer. 786 */ 787 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) { 788 if (broadcast_needs_cpu(bc, smp_processor_id())) 789 return; 790 if (dev->next_event < bc->next_event) 791 return; 792 } 793 clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); 794 } 795 796 static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state, 797 struct tick_device *td, 798 int cpu) 799 { 800 struct clock_event_device *bc, *dev = td->evtdev; 801 int ret = 0; 802 ktime_t now; 803 804 raw_spin_lock(&tick_broadcast_lock); 805 bc = tick_broadcast_device.evtdev; 806 807 if (state == TICK_BROADCAST_ENTER) { 808 /* 809 * If the current CPU owns the hrtimer broadcast 810 * mechanism, it cannot go deep idle and we do not add 811 * the CPU to the broadcast mask. We don't have to go 812 * through the EXIT path as the local timer is not 813 * shutdown. 814 */ 815 ret = broadcast_needs_cpu(bc, cpu); 816 if (ret) 817 goto out; 818 819 /* 820 * If the broadcast device is in periodic mode, we 821 * return. 822 */ 823 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 824 /* If it is a hrtimer based broadcast, return busy */ 825 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) 826 ret = -EBUSY; 827 goto out; 828 } 829 830 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { 831 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); 832 833 /* Conditionally shut down the local timer. */ 834 broadcast_shutdown_local(bc, dev); 835 836 /* 837 * We only reprogram the broadcast timer if we 838 * did not mark ourself in the force mask and 839 * if the cpu local event is earlier than the 840 * broadcast event. If the current CPU is in 841 * the force mask, then we are going to be 842 * woken by the IPI right away; we return 843 * busy, so the CPU does not try to go deep 844 * idle. 845 */ 846 if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { 847 ret = -EBUSY; 848 } else if (dev->next_event < bc->next_event) { 849 tick_broadcast_set_event(bc, cpu, dev->next_event); 850 /* 851 * In case of hrtimer broadcasts the 852 * programming might have moved the 853 * timer to this cpu. If yes, remove 854 * us from the broadcast mask and 855 * return busy. 856 */ 857 ret = broadcast_needs_cpu(bc, cpu); 858 if (ret) { 859 cpumask_clear_cpu(cpu, 860 tick_broadcast_oneshot_mask); 861 } 862 } 863 } 864 } else { 865 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { 866 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); 867 /* 868 * The cpu which was handling the broadcast 869 * timer marked this cpu in the broadcast 870 * pending mask and fired the broadcast 871 * IPI. So we are going to handle the expired 872 * event anyway via the broadcast IPI 873 * handler. No need to reprogram the timer 874 * with an already expired event. 875 */ 876 if (cpumask_test_and_clear_cpu(cpu, 877 tick_broadcast_pending_mask)) 878 goto out; 879 880 /* 881 * Bail out if there is no next event. 882 */ 883 if (dev->next_event == KTIME_MAX) 884 goto out; 885 /* 886 * If the pending bit is not set, then we are 887 * either the CPU handling the broadcast 888 * interrupt or we got woken by something else. 889 * 890 * We are no longer in the broadcast mask, so 891 * if the cpu local expiry time is already 892 * reached, we would reprogram the cpu local 893 * timer with an already expired event. 894 * 895 * This can lead to a ping-pong when we return 896 * to idle and therefore rearm the broadcast 897 * timer before the cpu local timer was able 898 * to fire. This happens because the forced 899 * reprogramming makes sure that the event 900 * will happen in the future and depending on 901 * the min_delta setting this might be far 902 * enough out that the ping-pong starts. 903 * 904 * If the cpu local next_event has expired 905 * then we know that the broadcast timer 906 * next_event has expired as well and 907 * broadcast is about to be handled. So we 908 * avoid reprogramming and enforce that the 909 * broadcast handler, which did not run yet, 910 * will invoke the cpu local handler. 911 * 912 * We cannot call the handler directly from 913 * here, because we might be in a NOHZ phase 914 * and we did not go through the irq_enter() 915 * nohz fixups. 916 */ 917 now = ktime_get(); 918 if (dev->next_event <= now) { 919 cpumask_set_cpu(cpu, tick_broadcast_force_mask); 920 goto out; 921 } 922 /* 923 * We got woken by something else. Reprogram 924 * the cpu local timer device. 925 */ 926 tick_program_event(dev->next_event, 1); 927 } 928 } 929 out: 930 raw_spin_unlock(&tick_broadcast_lock); 931 return ret; 932 } 933 934 static int tick_oneshot_wakeup_control(enum tick_broadcast_state state, 935 struct tick_device *td, 936 int cpu) 937 { 938 struct clock_event_device *dev, *wd; 939 940 dev = td->evtdev; 941 if (td->mode != TICKDEV_MODE_ONESHOT) 942 return -EINVAL; 943 944 wd = tick_get_oneshot_wakeup_device(cpu); 945 if (!wd) 946 return -ENODEV; 947 948 switch (state) { 949 case TICK_BROADCAST_ENTER: 950 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED); 951 clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT); 952 clockevents_program_event(wd, dev->next_event, 1); 953 break; 954 case TICK_BROADCAST_EXIT: 955 /* We may have transitioned to oneshot mode while idle */ 956 if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT) 957 return -ENODEV; 958 } 959 960 return 0; 961 } 962 963 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) 964 { 965 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 966 int cpu = smp_processor_id(); 967 968 if (!tick_oneshot_wakeup_control(state, td, cpu)) 969 return 0; 970 971 if (tick_broadcast_device.evtdev) 972 return ___tick_broadcast_oneshot_control(state, td, cpu); 973 974 /* 975 * If there is no broadcast or wakeup device, tell the caller not 976 * to go into deep idle. 977 */ 978 return -EBUSY; 979 } 980 981 /* 982 * Reset the one shot broadcast for a cpu 983 * 984 * Called with tick_broadcast_lock held 985 */ 986 static void tick_broadcast_clear_oneshot(int cpu) 987 { 988 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 989 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 990 } 991 992 static void tick_broadcast_init_next_event(struct cpumask *mask, 993 ktime_t expires) 994 { 995 struct tick_device *td; 996 int cpu; 997 998 for_each_cpu(cpu, mask) { 999 td = &per_cpu(tick_cpu_device, cpu); 1000 if (td->evtdev) 1001 td->evtdev->next_event = expires; 1002 } 1003 } 1004 1005 static inline ktime_t tick_get_next_period(void) 1006 { 1007 ktime_t next; 1008 1009 /* 1010 * Protect against concurrent updates (store /load tearing on 1011 * 32bit). It does not matter if the time is already in the 1012 * past. The broadcast device which is about to be programmed will 1013 * fire in any case. 1014 */ 1015 raw_spin_lock(&jiffies_lock); 1016 next = tick_next_period; 1017 raw_spin_unlock(&jiffies_lock); 1018 return next; 1019 } 1020 1021 /** 1022 * tick_broadcast_setup_oneshot - setup the broadcast device 1023 */ 1024 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, 1025 bool from_periodic) 1026 { 1027 int cpu = smp_processor_id(); 1028 ktime_t nexttick = 0; 1029 1030 if (!bc) 1031 return; 1032 1033 /* 1034 * When the broadcast device was switched to oneshot by the first 1035 * CPU handling the NOHZ change, the other CPUs will reach this 1036 * code via hrtimer_run_queues() -> tick_check_oneshot_change() 1037 * too. Set up the broadcast device only once! 1038 */ 1039 if (bc->event_handler == tick_handle_oneshot_broadcast) { 1040 /* 1041 * The CPU which switched from periodic to oneshot mode 1042 * set the broadcast oneshot bit for all other CPUs which 1043 * are in the general (periodic) broadcast mask to ensure 1044 * that CPUs which wait for the periodic broadcast are 1045 * woken up. 1046 * 1047 * Clear the bit for the local CPU as the set bit would 1048 * prevent the first tick_broadcast_enter() after this CPU 1049 * switched to oneshot state to program the broadcast 1050 * device. 1051 * 1052 * This code can also be reached via tick_broadcast_control(), 1053 * but this cannot avoid the tick_broadcast_clear_oneshot() 1054 * as that would break the periodic to oneshot transition of 1055 * secondary CPUs. But that's harmless as the below only 1056 * clears already cleared bits. 1057 */ 1058 tick_broadcast_clear_oneshot(cpu); 1059 return; 1060 } 1061 1062 1063 bc->event_handler = tick_handle_oneshot_broadcast; 1064 bc->next_event = KTIME_MAX; 1065 1066 /* 1067 * When the tick mode is switched from periodic to oneshot it must 1068 * be ensured that CPUs which are waiting for periodic broadcast 1069 * get their wake-up at the next tick. This is achieved by ORing 1070 * tick_broadcast_mask into tick_broadcast_oneshot_mask. 1071 * 1072 * For other callers, e.g. broadcast device replacement, 1073 * tick_broadcast_oneshot_mask must not be touched as this would 1074 * set bits for CPUs which are already NOHZ, but not idle. Their 1075 * next tick_broadcast_enter() would observe the bit set and fail 1076 * to update the expiry time and the broadcast event device. 1077 */ 1078 if (from_periodic) { 1079 cpumask_copy(tmpmask, tick_broadcast_mask); 1080 /* Remove the local CPU as it is obviously not idle */ 1081 cpumask_clear_cpu(cpu, tmpmask); 1082 cpumask_or(tick_broadcast_oneshot_mask, tick_broadcast_oneshot_mask, tmpmask); 1083 1084 /* 1085 * Ensure that the oneshot broadcast handler will wake the 1086 * CPUs which are still waiting for periodic broadcast. 1087 */ 1088 nexttick = tick_get_next_period(); 1089 tick_broadcast_init_next_event(tmpmask, nexttick); 1090 1091 /* 1092 * If the underlying broadcast clock event device is 1093 * already in oneshot state, then there is nothing to do. 1094 * The device was already armed for the next tick 1095 * in tick_handle_broadcast_periodic() 1096 */ 1097 if (clockevent_state_oneshot(bc)) 1098 return; 1099 } 1100 1101 /* 1102 * When switching from periodic to oneshot mode arm the broadcast 1103 * device for the next tick. 1104 * 1105 * If the broadcast device has been replaced in oneshot mode and 1106 * the oneshot broadcast mask is not empty, then arm it to expire 1107 * immediately in order to reevaluate the next expiring timer. 1108 * @nexttick is 0 and therefore in the past which will cause the 1109 * clockevent code to force an event. 1110 * 1111 * For both cases the programming can be avoided when the oneshot 1112 * broadcast mask is empty. 1113 * 1114 * tick_broadcast_set_event() implicitly switches the broadcast 1115 * device to oneshot state. 1116 */ 1117 if (!cpumask_empty(tick_broadcast_oneshot_mask)) 1118 tick_broadcast_set_event(bc, cpu, nexttick); 1119 } 1120 1121 /* 1122 * Select oneshot operating mode for the broadcast device 1123 */ 1124 void tick_broadcast_switch_to_oneshot(void) 1125 { 1126 struct clock_event_device *bc; 1127 enum tick_device_mode oldmode; 1128 unsigned long flags; 1129 1130 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 1131 1132 oldmode = tick_broadcast_device.mode; 1133 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; 1134 bc = tick_broadcast_device.evtdev; 1135 if (bc) 1136 tick_broadcast_setup_oneshot(bc, oldmode == TICKDEV_MODE_PERIODIC); 1137 1138 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 1139 } 1140 1141 #ifdef CONFIG_HOTPLUG_CPU 1142 void hotplug_cpu__broadcast_tick_pull(int deadcpu) 1143 { 1144 struct clock_event_device *bc; 1145 unsigned long flags; 1146 1147 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 1148 bc = tick_broadcast_device.evtdev; 1149 1150 if (bc && broadcast_needs_cpu(bc, deadcpu)) { 1151 /* 1152 * If the broadcast force bit of the current CPU is set, 1153 * then the current CPU has not yet reprogrammed the local 1154 * timer device to avoid a ping-pong race. See 1155 * ___tick_broadcast_oneshot_control(). 1156 * 1157 * If the broadcast device is hrtimer based then 1158 * programming the broadcast event below does not have any 1159 * effect because the local clockevent device is not 1160 * running and not programmed because the broadcast event 1161 * is not earlier than the pending event of the local clock 1162 * event device. As a consequence all CPUs waiting for a 1163 * broadcast event are stuck forever. 1164 * 1165 * Detect this condition and reprogram the cpu local timer 1166 * device to avoid the starvation. 1167 */ 1168 if (tick_check_broadcast_expired()) { 1169 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 1170 1171 cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); 1172 tick_program_event(td->evtdev->next_event, 1); 1173 } 1174 1175 /* This moves the broadcast assignment to this CPU: */ 1176 clockevents_program_event(bc, bc->next_event, 1); 1177 } 1178 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 1179 } 1180 1181 /* 1182 * Remove a dying CPU from broadcasting 1183 */ 1184 static void tick_broadcast_oneshot_offline(unsigned int cpu) 1185 { 1186 if (tick_get_oneshot_wakeup_device(cpu)) 1187 tick_set_oneshot_wakeup_device(NULL, cpu); 1188 1189 /* 1190 * Clear the broadcast masks for the dead cpu, but do not stop 1191 * the broadcast device! 1192 */ 1193 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 1194 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 1195 cpumask_clear_cpu(cpu, tick_broadcast_force_mask); 1196 } 1197 #endif 1198 1199 /* 1200 * Check, whether the broadcast device is in one shot mode 1201 */ 1202 int tick_broadcast_oneshot_active(void) 1203 { 1204 return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; 1205 } 1206 1207 /* 1208 * Check whether the broadcast device supports oneshot. 1209 */ 1210 bool tick_broadcast_oneshot_available(void) 1211 { 1212 struct clock_event_device *bc = tick_broadcast_device.evtdev; 1213 1214 return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; 1215 } 1216 1217 #else 1218 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) 1219 { 1220 struct clock_event_device *bc = tick_broadcast_device.evtdev; 1221 1222 if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) 1223 return -EBUSY; 1224 1225 return 0; 1226 } 1227 #endif 1228 1229 void __init tick_broadcast_init(void) 1230 { 1231 zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); 1232 zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); 1233 zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); 1234 #ifdef CONFIG_TICK_ONESHOT 1235 zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); 1236 zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); 1237 zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT); 1238 #endif 1239 } 1240
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.