~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/smp.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Generic helpers for smp ipi calls
  4  *
  5  * (C) Jens Axboe <jens.axboe@oracle.com> 2008
  6  */
  7 
  8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  9 
 10 #include <linux/irq_work.h>
 11 #include <linux/rcupdate.h>
 12 #include <linux/rculist.h>
 13 #include <linux/kernel.h>
 14 #include <linux/export.h>
 15 #include <linux/percpu.h>
 16 #include <linux/init.h>
 17 #include <linux/interrupt.h>
 18 #include <linux/gfp.h>
 19 #include <linux/smp.h>
 20 #include <linux/cpu.h>
 21 #include <linux/sched.h>
 22 #include <linux/sched/idle.h>
 23 #include <linux/hypervisor.h>
 24 #include <linux/sched/clock.h>
 25 #include <linux/nmi.h>
 26 #include <linux/sched/debug.h>
 27 #include <linux/jump_label.h>
 28 #include <linux/string_choices.h>
 29 
 30 #include <trace/events/ipi.h>
 31 #define CREATE_TRACE_POINTS
 32 #include <trace/events/csd.h>
 33 #undef CREATE_TRACE_POINTS
 34 
 35 #include "smpboot.h"
 36 #include "sched/smp.h"
 37 
 38 #define CSD_TYPE(_csd)  ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
 39 
 40 struct call_function_data {
 41         call_single_data_t      __percpu *csd;
 42         cpumask_var_t           cpumask;
 43         cpumask_var_t           cpumask_ipi;
 44 };
 45 
 46 static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
 47 
 48 static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
 49 
 50 static DEFINE_PER_CPU(atomic_t, trigger_backtrace) = ATOMIC_INIT(1);
 51 
 52 static void __flush_smp_call_function_queue(bool warn_cpu_offline);
 53 
 54 int smpcfd_prepare_cpu(unsigned int cpu)
 55 {
 56         struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
 57 
 58         if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
 59                                      cpu_to_node(cpu)))
 60                 return -ENOMEM;
 61         if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
 62                                      cpu_to_node(cpu))) {
 63                 free_cpumask_var(cfd->cpumask);
 64                 return -ENOMEM;
 65         }
 66         cfd->csd = alloc_percpu(call_single_data_t);
 67         if (!cfd->csd) {
 68                 free_cpumask_var(cfd->cpumask);
 69                 free_cpumask_var(cfd->cpumask_ipi);
 70                 return -ENOMEM;
 71         }
 72 
 73         return 0;
 74 }
 75 
 76 int smpcfd_dead_cpu(unsigned int cpu)
 77 {
 78         struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
 79 
 80         free_cpumask_var(cfd->cpumask);
 81         free_cpumask_var(cfd->cpumask_ipi);
 82         free_percpu(cfd->csd);
 83         return 0;
 84 }
 85 
 86 int smpcfd_dying_cpu(unsigned int cpu)
 87 {
 88         /*
 89          * The IPIs for the smp-call-function callbacks queued by other
 90          * CPUs might arrive late, either due to hardware latencies or
 91          * because this CPU disabled interrupts (inside stop-machine)
 92          * before the IPIs were sent. So flush out any pending callbacks
 93          * explicitly (without waiting for the IPIs to arrive), to
 94          * ensure that the outgoing CPU doesn't go offline with work
 95          * still pending.
 96          */
 97         __flush_smp_call_function_queue(false);
 98         irq_work_run();
 99         return 0;
100 }
101 
102 void __init call_function_init(void)
103 {
104         int i;
105 
106         for_each_possible_cpu(i)
107                 init_llist_head(&per_cpu(call_single_queue, i));
108 
109         smpcfd_prepare_cpu(smp_processor_id());
110 }
111 
112 static __always_inline void
113 send_call_function_single_ipi(int cpu)
114 {
115         if (call_function_single_prep_ipi(cpu)) {
116                 trace_ipi_send_cpu(cpu, _RET_IP_,
117                                    generic_smp_call_function_single_interrupt);
118                 arch_send_call_function_single_ipi(cpu);
119         }
120 }
121 
122 static __always_inline void
123 send_call_function_ipi_mask(struct cpumask *mask)
124 {
125         trace_ipi_send_cpumask(mask, _RET_IP_,
126                                generic_smp_call_function_single_interrupt);
127         arch_send_call_function_ipi_mask(mask);
128 }
129 
130 static __always_inline void
131 csd_do_func(smp_call_func_t func, void *info, call_single_data_t *csd)
132 {
133         trace_csd_function_entry(func, csd);
134         func(info);
135         trace_csd_function_exit(func, csd);
136 }
137 
138 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
139 
140 static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled);
141 
142 /*
143  * Parse the csdlock_debug= kernel boot parameter.
144  *
145  * If you need to restore the old "ext" value that once provided
146  * additional debugging information, reapply the following commits:
147  *
148  * de7b09ef658d ("locking/csd_lock: Prepare more CSD lock debugging")
149  * a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging")
150  */
151 static int __init csdlock_debug(char *str)
152 {
153         int ret;
154         unsigned int val = 0;
155 
156         ret = get_option(&str, &val);
157         if (ret) {
158                 if (val)
159                         static_branch_enable(&csdlock_debug_enabled);
160                 else
161                         static_branch_disable(&csdlock_debug_enabled);
162         }
163 
164         return 1;
165 }
166 __setup("csdlock_debug=", csdlock_debug);
167 
168 static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
169 static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
170 static DEFINE_PER_CPU(void *, cur_csd_info);
171 
172 static ulong csd_lock_timeout = 5000;  /* CSD lock timeout in milliseconds. */
173 module_param(csd_lock_timeout, ulong, 0444);
174 static int panic_on_ipistall;  /* CSD panic timeout in milliseconds, 300000 for five minutes. */
175 module_param(panic_on_ipistall, int, 0444);
176 
177 static atomic_t csd_bug_count = ATOMIC_INIT(0);
178 
179 /* Record current CSD work for current CPU, NULL to erase. */
180 static void __csd_lock_record(call_single_data_t *csd)
181 {
182         if (!csd) {
183                 smp_mb(); /* NULL cur_csd after unlock. */
184                 __this_cpu_write(cur_csd, NULL);
185                 return;
186         }
187         __this_cpu_write(cur_csd_func, csd->func);
188         __this_cpu_write(cur_csd_info, csd->info);
189         smp_wmb(); /* func and info before csd. */
190         __this_cpu_write(cur_csd, csd);
191         smp_mb(); /* Update cur_csd before function call. */
192                   /* Or before unlock, as the case may be. */
193 }
194 
195 static __always_inline void csd_lock_record(call_single_data_t *csd)
196 {
197         if (static_branch_unlikely(&csdlock_debug_enabled))
198                 __csd_lock_record(csd);
199 }
200 
201 static int csd_lock_wait_getcpu(call_single_data_t *csd)
202 {
203         unsigned int csd_type;
204 
205         csd_type = CSD_TYPE(csd);
206         if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC)
207                 return csd->node.dst; /* Other CSD_TYPE_ values might not have ->dst. */
208         return -1;
209 }
210 
211 /*
212  * Complain if too much time spent waiting.  Note that only
213  * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
214  * so waiting on other types gets much less information.
215  */
216 static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
217 {
218         int cpu = -1;
219         int cpux;
220         bool firsttime;
221         u64 ts2, ts_delta;
222         call_single_data_t *cpu_cur_csd;
223         unsigned int flags = READ_ONCE(csd->node.u_flags);
224         unsigned long long csd_lock_timeout_ns = csd_lock_timeout * NSEC_PER_MSEC;
225 
226         if (!(flags & CSD_FLAG_LOCK)) {
227                 if (!unlikely(*bug_id))
228                         return true;
229                 cpu = csd_lock_wait_getcpu(csd);
230                 pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
231                          *bug_id, raw_smp_processor_id(), cpu);
232                 return true;
233         }
234 
235         ts2 = sched_clock();
236         /* How long since we last checked for a stuck CSD lock.*/
237         ts_delta = ts2 - *ts1;
238         if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0))
239                 return false;
240 
241         firsttime = !*bug_id;
242         if (firsttime)
243                 *bug_id = atomic_inc_return(&csd_bug_count);
244         cpu = csd_lock_wait_getcpu(csd);
245         if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu))
246                 cpux = 0;
247         else
248                 cpux = cpu;
249         cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */
250         /* How long since this CSD lock was stuck. */
251         ts_delta = ts2 - ts0;
252         pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
253                  firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts_delta,
254                  cpu, csd->func, csd->info);
255         /*
256          * If the CSD lock is still stuck after 5 minutes, it is unlikely
257          * to become unstuck. Use a signed comparison to avoid triggering
258          * on underflows when the TSC is out of sync between sockets.
259          */
260         BUG_ON(panic_on_ipistall > 0 && (s64)ts_delta > ((s64)panic_on_ipistall * NSEC_PER_MSEC));
261         if (cpu_cur_csd && csd != cpu_cur_csd) {
262                 pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
263                          *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
264                          READ_ONCE(per_cpu(cur_csd_info, cpux)));
265         } else {
266                 pr_alert("\tcsd: CSD lock (#%d) %s.\n",
267                          *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
268         }
269         if (cpu >= 0) {
270                 if (atomic_cmpxchg_acquire(&per_cpu(trigger_backtrace, cpu), 1, 0))
271                         dump_cpu_task(cpu);
272                 if (!cpu_cur_csd) {
273                         pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
274                         arch_send_call_function_single_ipi(cpu);
275                 }
276         }
277         if (firsttime)
278                 dump_stack();
279         *ts1 = ts2;
280 
281         return false;
282 }
283 
284 /*
285  * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
286  *
287  * For non-synchronous ipi calls the csd can still be in use by the
288  * previous function call. For multi-cpu calls its even more interesting
289  * as we'll have to ensure no other cpu is observing our csd.
290  */
291 static void __csd_lock_wait(call_single_data_t *csd)
292 {
293         int bug_id = 0;
294         u64 ts0, ts1;
295 
296         ts1 = ts0 = sched_clock();
297         for (;;) {
298                 if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id))
299                         break;
300                 cpu_relax();
301         }
302         smp_acquire__after_ctrl_dep();
303 }
304 
305 static __always_inline void csd_lock_wait(call_single_data_t *csd)
306 {
307         if (static_branch_unlikely(&csdlock_debug_enabled)) {
308                 __csd_lock_wait(csd);
309                 return;
310         }
311 
312         smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
313 }
314 #else
315 static void csd_lock_record(call_single_data_t *csd)
316 {
317 }
318 
319 static __always_inline void csd_lock_wait(call_single_data_t *csd)
320 {
321         smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
322 }
323 #endif
324 
325 static __always_inline void csd_lock(call_single_data_t *csd)
326 {
327         csd_lock_wait(csd);
328         csd->node.u_flags |= CSD_FLAG_LOCK;
329 
330         /*
331          * prevent CPU from reordering the above assignment
332          * to ->flags with any subsequent assignments to other
333          * fields of the specified call_single_data_t structure:
334          */
335         smp_wmb();
336 }
337 
338 static __always_inline void csd_unlock(call_single_data_t *csd)
339 {
340         WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK));
341 
342         /*
343          * ensure we're all done before releasing data:
344          */
345         smp_store_release(&csd->node.u_flags, 0);
346 }
347 
348 static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
349 
350 void __smp_call_single_queue(int cpu, struct llist_node *node)
351 {
352         /*
353          * We have to check the type of the CSD before queueing it, because
354          * once queued it can have its flags cleared by
355          *   flush_smp_call_function_queue()
356          * even if we haven't sent the smp_call IPI yet (e.g. the stopper
357          * executes migration_cpu_stop() on the remote CPU).
358          */
359         if (trace_csd_queue_cpu_enabled()) {
360                 call_single_data_t *csd;
361                 smp_call_func_t func;
362 
363                 csd = container_of(node, call_single_data_t, node.llist);
364                 func = CSD_TYPE(csd) == CSD_TYPE_TTWU ?
365                         sched_ttwu_pending : csd->func;
366 
367                 trace_csd_queue_cpu(cpu, _RET_IP_, func, csd);
368         }
369 
370         /*
371          * The list addition should be visible to the target CPU when it pops
372          * the head of the list to pull the entry off it in the IPI handler
373          * because of normal cache coherency rules implied by the underlying
374          * llist ops.
375          *
376          * If IPIs can go out of order to the cache coherency protocol
377          * in an architecture, sufficient synchronisation should be added
378          * to arch code to make it appear to obey cache coherency WRT
379          * locking and barrier primitives. Generic code isn't really
380          * equipped to do the right thing...
381          */
382         if (llist_add(node, &per_cpu(call_single_queue, cpu)))
383                 send_call_function_single_ipi(cpu);
384 }
385 
386 /*
387  * Insert a previously allocated call_single_data_t element
388  * for execution on the given CPU. data must already have
389  * ->func, ->info, and ->flags set.
390  */
391 static int generic_exec_single(int cpu, call_single_data_t *csd)
392 {
393         if (cpu == smp_processor_id()) {
394                 smp_call_func_t func = csd->func;
395                 void *info = csd->info;
396                 unsigned long flags;
397 
398                 /*
399                  * We can unlock early even for the synchronous on-stack case,
400                  * since we're doing this from the same CPU..
401                  */
402                 csd_lock_record(csd);
403                 csd_unlock(csd);
404                 local_irq_save(flags);
405                 csd_do_func(func, info, NULL);
406                 csd_lock_record(NULL);
407                 local_irq_restore(flags);
408                 return 0;
409         }
410 
411         if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
412                 csd_unlock(csd);
413                 return -ENXIO;
414         }
415 
416         __smp_call_single_queue(cpu, &csd->node.llist);
417 
418         return 0;
419 }
420 
421 /**
422  * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
423  *
424  * Invoked by arch to handle an IPI for call function single.
425  * Must be called with interrupts disabled.
426  */
427 void generic_smp_call_function_single_interrupt(void)
428 {
429         __flush_smp_call_function_queue(true);
430 }
431 
432 /**
433  * __flush_smp_call_function_queue - Flush pending smp-call-function callbacks
434  *
435  * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
436  *                    offline CPU. Skip this check if set to 'false'.
437  *
438  * Flush any pending smp-call-function callbacks queued on this CPU. This is
439  * invoked by the generic IPI handler, as well as by a CPU about to go offline,
440  * to ensure that all pending IPI callbacks are run before it goes completely
441  * offline.
442  *
443  * Loop through the call_single_queue and run all the queued callbacks.
444  * Must be called with interrupts disabled.
445  */
446 static void __flush_smp_call_function_queue(bool warn_cpu_offline)
447 {
448         call_single_data_t *csd, *csd_next;
449         struct llist_node *entry, *prev;
450         struct llist_head *head;
451         static bool warned;
452         atomic_t *tbt;
453 
454         lockdep_assert_irqs_disabled();
455 
456         /* Allow waiters to send backtrace NMI from here onwards */
457         tbt = this_cpu_ptr(&trigger_backtrace);
458         atomic_set_release(tbt, 1);
459 
460         head = this_cpu_ptr(&call_single_queue);
461         entry = llist_del_all(head);
462         entry = llist_reverse_order(entry);
463 
464         /* There shouldn't be any pending callbacks on an offline CPU. */
465         if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
466                      !warned && entry != NULL)) {
467                 warned = true;
468                 WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
469 
470                 /*
471                  * We don't have to use the _safe() variant here
472                  * because we are not invoking the IPI handlers yet.
473                  */
474                 llist_for_each_entry(csd, entry, node.llist) {
475                         switch (CSD_TYPE(csd)) {
476                         case CSD_TYPE_ASYNC:
477                         case CSD_TYPE_SYNC:
478                         case CSD_TYPE_IRQ_WORK:
479                                 pr_warn("IPI callback %pS sent to offline CPU\n",
480                                         csd->func);
481                                 break;
482 
483                         case CSD_TYPE_TTWU:
484                                 pr_warn("IPI task-wakeup sent to offline CPU\n");
485                                 break;
486 
487                         default:
488                                 pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
489                                         CSD_TYPE(csd));
490                                 break;
491                         }
492                 }
493         }
494 
495         /*
496          * First; run all SYNC callbacks, people are waiting for us.
497          */
498         prev = NULL;
499         llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
500                 /* Do we wait until *after* callback? */
501                 if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
502                         smp_call_func_t func = csd->func;
503                         void *info = csd->info;
504 
505                         if (prev) {
506                                 prev->next = &csd_next->node.llist;
507                         } else {
508                                 entry = &csd_next->node.llist;
509                         }
510 
511                         csd_lock_record(csd);
512                         csd_do_func(func, info, csd);
513                         csd_unlock(csd);
514                         csd_lock_record(NULL);
515                 } else {
516                         prev = &csd->node.llist;
517                 }
518         }
519 
520         if (!entry)
521                 return;
522 
523         /*
524          * Second; run all !SYNC callbacks.
525          */
526         prev = NULL;
527         llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
528                 int type = CSD_TYPE(csd);
529 
530                 if (type != CSD_TYPE_TTWU) {
531                         if (prev) {
532                                 prev->next = &csd_next->node.llist;
533                         } else {
534                                 entry = &csd_next->node.llist;
535                         }
536 
537                         if (type == CSD_TYPE_ASYNC) {
538                                 smp_call_func_t func = csd->func;
539                                 void *info = csd->info;
540 
541                                 csd_lock_record(csd);
542                                 csd_unlock(csd);
543                                 csd_do_func(func, info, csd);
544                                 csd_lock_record(NULL);
545                         } else if (type == CSD_TYPE_IRQ_WORK) {
546                                 irq_work_single(csd);
547                         }
548 
549                 } else {
550                         prev = &csd->node.llist;
551                 }
552         }
553 
554         /*
555          * Third; only CSD_TYPE_TTWU is left, issue those.
556          */
557         if (entry) {
558                 csd = llist_entry(entry, typeof(*csd), node.llist);
559                 csd_do_func(sched_ttwu_pending, entry, csd);
560         }
561 }
562 
563 
564 /**
565  * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
566  *                                 from task context (idle, migration thread)
567  *
568  * When TIF_POLLING_NRFLAG is supported and a CPU is in idle and has it
569  * set, then remote CPUs can avoid sending IPIs and wake the idle CPU by
570  * setting TIF_NEED_RESCHED. The idle task on the woken up CPU has to
571  * handle queued SMP function calls before scheduling.
572  *
573  * The migration thread has to ensure that an eventually pending wakeup has
574  * been handled before it migrates a task.
575  */
576 void flush_smp_call_function_queue(void)
577 {
578         unsigned int was_pending;
579         unsigned long flags;
580 
581         if (llist_empty(this_cpu_ptr(&call_single_queue)))
582                 return;
583 
584         local_irq_save(flags);
585         /* Get the already pending soft interrupts for RT enabled kernels */
586         was_pending = local_softirq_pending();
587         __flush_smp_call_function_queue(true);
588         if (local_softirq_pending())
589                 do_softirq_post_smp_call_flush(was_pending);
590 
591         local_irq_restore(flags);
592 }
593 
594 /*
595  * smp_call_function_single - Run a function on a specific CPU
596  * @func: The function to run. This must be fast and non-blocking.
597  * @info: An arbitrary pointer to pass to the function.
598  * @wait: If true, wait until function has completed on other CPUs.
599  *
600  * Returns 0 on success, else a negative status code.
601  */
602 int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
603                              int wait)
604 {
605         call_single_data_t *csd;
606         call_single_data_t csd_stack = {
607                 .node = { .u_flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, },
608         };
609         int this_cpu;
610         int err;
611 
612         /*
613          * prevent preemption and reschedule on another processor,
614          * as well as CPU removal
615          */
616         this_cpu = get_cpu();
617 
618         /*
619          * Can deadlock when called with interrupts disabled.
620          * We allow cpu's that are not yet online though, as no one else can
621          * send smp call function interrupt to this cpu and as such deadlocks
622          * can't happen.
623          */
624         WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
625                      && !oops_in_progress);
626 
627         /*
628          * When @wait we can deadlock when we interrupt between llist_add() and
629          * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
630          * csd_lock() on because the interrupt context uses the same csd
631          * storage.
632          */
633         WARN_ON_ONCE(!in_task());
634 
635         csd = &csd_stack;
636         if (!wait) {
637                 csd = this_cpu_ptr(&csd_data);
638                 csd_lock(csd);
639         }
640 
641         csd->func = func;
642         csd->info = info;
643 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
644         csd->node.src = smp_processor_id();
645         csd->node.dst = cpu;
646 #endif
647 
648         err = generic_exec_single(cpu, csd);
649 
650         if (wait)
651                 csd_lock_wait(csd);
652 
653         put_cpu();
654 
655         return err;
656 }
657 EXPORT_SYMBOL(smp_call_function_single);
658 
659 /**
660  * smp_call_function_single_async() - Run an asynchronous function on a
661  *                               specific CPU.
662  * @cpu: The CPU to run on.
663  * @csd: Pre-allocated and setup data structure
664  *
665  * Like smp_call_function_single(), but the call is asynchonous and
666  * can thus be done from contexts with disabled interrupts.
667  *
668  * The caller passes his own pre-allocated data structure
669  * (ie: embedded in an object) and is responsible for synchronizing it
670  * such that the IPIs performed on the @csd are strictly serialized.
671  *
672  * If the function is called with one csd which has not yet been
673  * processed by previous call to smp_call_function_single_async(), the
674  * function will return immediately with -EBUSY showing that the csd
675  * object is still in progress.
676  *
677  * NOTE: Be careful, there is unfortunately no current debugging facility to
678  * validate the correctness of this serialization.
679  *
680  * Return: %0 on success or negative errno value on error
681  */
682 int smp_call_function_single_async(int cpu, call_single_data_t *csd)
683 {
684         int err = 0;
685 
686         preempt_disable();
687 
688         if (csd->node.u_flags & CSD_FLAG_LOCK) {
689                 err = -EBUSY;
690                 goto out;
691         }
692 
693         csd->node.u_flags = CSD_FLAG_LOCK;
694         smp_wmb();
695 
696         err = generic_exec_single(cpu, csd);
697 
698 out:
699         preempt_enable();
700 
701         return err;
702 }
703 EXPORT_SYMBOL_GPL(smp_call_function_single_async);
704 
705 /*
706  * smp_call_function_any - Run a function on any of the given cpus
707  * @mask: The mask of cpus it can run on.
708  * @func: The function to run. This must be fast and non-blocking.
709  * @info: An arbitrary pointer to pass to the function.
710  * @wait: If true, wait until function has completed.
711  *
712  * Returns 0 on success, else a negative status code (if no cpus were online).
713  *
714  * Selection preference:
715  *      1) current cpu if in @mask
716  *      2) any cpu of current node if in @mask
717  *      3) any other online cpu in @mask
718  */
719 int smp_call_function_any(const struct cpumask *mask,
720                           smp_call_func_t func, void *info, int wait)
721 {
722         unsigned int cpu;
723         const struct cpumask *nodemask;
724         int ret;
725 
726         /* Try for same CPU (cheapest) */
727         cpu = get_cpu();
728         if (cpumask_test_cpu(cpu, mask))
729                 goto call;
730 
731         /* Try for same node. */
732         nodemask = cpumask_of_node(cpu_to_node(cpu));
733         for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
734              cpu = cpumask_next_and(cpu, nodemask, mask)) {
735                 if (cpu_online(cpu))
736                         goto call;
737         }
738 
739         /* Any online will do: smp_call_function_single handles nr_cpu_ids. */
740         cpu = cpumask_any_and(mask, cpu_online_mask);
741 call:
742         ret = smp_call_function_single(cpu, func, info, wait);
743         put_cpu();
744         return ret;
745 }
746 EXPORT_SYMBOL_GPL(smp_call_function_any);
747 
748 /*
749  * Flags to be used as scf_flags argument of smp_call_function_many_cond().
750  *
751  * %SCF_WAIT:           Wait until function execution is completed
752  * %SCF_RUN_LOCAL:      Run also locally if local cpu is set in cpumask
753  */
754 #define SCF_WAIT        (1U << 0)
755 #define SCF_RUN_LOCAL   (1U << 1)
756 
757 static void smp_call_function_many_cond(const struct cpumask *mask,
758                                         smp_call_func_t func, void *info,
759                                         unsigned int scf_flags,
760                                         smp_cond_func_t cond_func)
761 {
762         int cpu, last_cpu, this_cpu = smp_processor_id();
763         struct call_function_data *cfd;
764         bool wait = scf_flags & SCF_WAIT;
765         int nr_cpus = 0;
766         bool run_remote = false;
767         bool run_local = false;
768 
769         lockdep_assert_preemption_disabled();
770 
771         /*
772          * Can deadlock when called with interrupts disabled.
773          * We allow cpu's that are not yet online though, as no one else can
774          * send smp call function interrupt to this cpu and as such deadlocks
775          * can't happen.
776          */
777         if (cpu_online(this_cpu) && !oops_in_progress &&
778             !early_boot_irqs_disabled)
779                 lockdep_assert_irqs_enabled();
780 
781         /*
782          * When @wait we can deadlock when we interrupt between llist_add() and
783          * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
784          * csd_lock() on because the interrupt context uses the same csd
785          * storage.
786          */
787         WARN_ON_ONCE(!in_task());
788 
789         /* Check if we need local execution. */
790         if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
791                 run_local = true;
792 
793         /* Check if we need remote execution, i.e., any CPU excluding this one. */
794         cpu = cpumask_first_and(mask, cpu_online_mask);
795         if (cpu == this_cpu)
796                 cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
797         if (cpu < nr_cpu_ids)
798                 run_remote = true;
799 
800         if (run_remote) {
801                 cfd = this_cpu_ptr(&cfd_data);
802                 cpumask_and(cfd->cpumask, mask, cpu_online_mask);
803                 __cpumask_clear_cpu(this_cpu, cfd->cpumask);
804 
805                 cpumask_clear(cfd->cpumask_ipi);
806                 for_each_cpu(cpu, cfd->cpumask) {
807                         call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
808 
809                         if (cond_func && !cond_func(cpu, info)) {
810                                 __cpumask_clear_cpu(cpu, cfd->cpumask);
811                                 continue;
812                         }
813 
814                         csd_lock(csd);
815                         if (wait)
816                                 csd->node.u_flags |= CSD_TYPE_SYNC;
817                         csd->func = func;
818                         csd->info = info;
819 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
820                         csd->node.src = smp_processor_id();
821                         csd->node.dst = cpu;
822 #endif
823                         trace_csd_queue_cpu(cpu, _RET_IP_, func, csd);
824 
825                         if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
826                                 __cpumask_set_cpu(cpu, cfd->cpumask_ipi);
827                                 nr_cpus++;
828                                 last_cpu = cpu;
829                         }
830                 }
831 
832                 /*
833                  * Choose the most efficient way to send an IPI. Note that the
834                  * number of CPUs might be zero due to concurrent changes to the
835                  * provided mask.
836                  */
837                 if (nr_cpus == 1)
838                         send_call_function_single_ipi(last_cpu);
839                 else if (likely(nr_cpus > 1))
840                         send_call_function_ipi_mask(cfd->cpumask_ipi);
841         }
842 
843         if (run_local && (!cond_func || cond_func(this_cpu, info))) {
844                 unsigned long flags;
845 
846                 local_irq_save(flags);
847                 csd_do_func(func, info, NULL);
848                 local_irq_restore(flags);
849         }
850 
851         if (run_remote && wait) {
852                 for_each_cpu(cpu, cfd->cpumask) {
853                         call_single_data_t *csd;
854 
855                         csd = per_cpu_ptr(cfd->csd, cpu);
856                         csd_lock_wait(csd);
857                 }
858         }
859 }
860 
861 /**
862  * smp_call_function_many(): Run a function on a set of CPUs.
863  * @mask: The set of cpus to run on (only runs on online subset).
864  * @func: The function to run. This must be fast and non-blocking.
865  * @info: An arbitrary pointer to pass to the function.
866  * @wait: Bitmask that controls the operation. If %SCF_WAIT is set, wait
867  *        (atomically) until function has completed on other CPUs. If
868  *        %SCF_RUN_LOCAL is set, the function will also be run locally
869  *        if the local CPU is set in the @cpumask.
870  *
871  * If @wait is true, then returns once @func has returned.
872  *
873  * You must not call this function with disabled interrupts or from a
874  * hardware interrupt handler or from a bottom half handler. Preemption
875  * must be disabled when calling this function.
876  */
877 void smp_call_function_many(const struct cpumask *mask,
878                             smp_call_func_t func, void *info, bool wait)
879 {
880         smp_call_function_many_cond(mask, func, info, wait * SCF_WAIT, NULL);
881 }
882 EXPORT_SYMBOL(smp_call_function_many);
883 
884 /**
885  * smp_call_function(): Run a function on all other CPUs.
886  * @func: The function to run. This must be fast and non-blocking.
887  * @info: An arbitrary pointer to pass to the function.
888  * @wait: If true, wait (atomically) until function has completed
889  *        on other CPUs.
890  *
891  * Returns 0.
892  *
893  * If @wait is true, then returns once @func has returned; otherwise
894  * it returns just before the target cpu calls @func.
895  *
896  * You must not call this function with disabled interrupts or from a
897  * hardware interrupt handler or from a bottom half handler.
898  */
899 void smp_call_function(smp_call_func_t func, void *info, int wait)
900 {
901         preempt_disable();
902         smp_call_function_many(cpu_online_mask, func, info, wait);
903         preempt_enable();
904 }
905 EXPORT_SYMBOL(smp_call_function);
906 
907 /* Setup configured maximum number of CPUs to activate */
908 unsigned int setup_max_cpus = NR_CPUS;
909 EXPORT_SYMBOL(setup_max_cpus);
910 
911 
912 /*
913  * Setup routine for controlling SMP activation
914  *
915  * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
916  * activation entirely (the MPS table probe still happens, though).
917  *
918  * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
919  * greater than 0, limits the maximum number of CPUs activated in
920  * SMP mode to <NUM>.
921  */
922 
923 void __weak __init arch_disable_smp_support(void) { }
924 
925 static int __init nosmp(char *str)
926 {
927         setup_max_cpus = 0;
928         arch_disable_smp_support();
929 
930         return 0;
931 }
932 
933 early_param("nosmp", nosmp);
934 
935 /* this is hard limit */
936 static int __init nrcpus(char *str)
937 {
938         int nr_cpus;
939 
940         if (get_option(&str, &nr_cpus) && nr_cpus > 0 && nr_cpus < nr_cpu_ids)
941                 set_nr_cpu_ids(nr_cpus);
942 
943         return 0;
944 }
945 
946 early_param("nr_cpus", nrcpus);
947 
948 static int __init maxcpus(char *str)
949 {
950         get_option(&str, &setup_max_cpus);
951         if (setup_max_cpus == 0)
952                 arch_disable_smp_support();
953 
954         return 0;
955 }
956 
957 early_param("maxcpus", maxcpus);
958 
959 #if (NR_CPUS > 1) && !defined(CONFIG_FORCE_NR_CPUS)
960 /* Setup number of possible processor ids */
961 unsigned int nr_cpu_ids __read_mostly = NR_CPUS;
962 EXPORT_SYMBOL(nr_cpu_ids);
963 #endif
964 
965 /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
966 void __init setup_nr_cpu_ids(void)
967 {
968         set_nr_cpu_ids(find_last_bit(cpumask_bits(cpu_possible_mask), NR_CPUS) + 1);
969 }
970 
971 /* Called by boot processor to activate the rest. */
972 void __init smp_init(void)
973 {
974         int num_nodes, num_cpus;
975 
976         idle_threads_init();
977         cpuhp_threads_init();
978 
979         pr_info("Bringing up secondary CPUs ...\n");
980 
981         bringup_nonboot_cpus(setup_max_cpus);
982 
983         num_nodes = num_online_nodes();
984         num_cpus  = num_online_cpus();
985         pr_info("Brought up %d node%s, %d CPU%s\n",
986                 num_nodes, str_plural(num_nodes), num_cpus, str_plural(num_cpus));
987 
988         /* Any cleanup work */
989         smp_cpus_done(setup_max_cpus);
990 }
991 
992 /*
993  * on_each_cpu_cond(): Call a function on each processor for which
994  * the supplied function cond_func returns true, optionally waiting
995  * for all the required CPUs to finish. This may include the local
996  * processor.
997  * @cond_func:  A callback function that is passed a cpu id and
998  *              the info parameter. The function is called
999  *              with preemption disabled. The function should
1000  *              return a blooean value indicating whether to IPI
1001  *              the specified CPU.
1002  * @func:       The function to run on all applicable CPUs.
1003  *              This must be fast and non-blocking.
1004  * @info:       An arbitrary pointer to pass to both functions.
1005  * @wait:       If true, wait (atomically) until function has
1006  *              completed on other CPUs.
1007  *
1008  * Preemption is disabled to protect against CPUs going offline but not online.
1009  * CPUs going online during the call will not be seen or sent an IPI.
1010  *
1011  * You must not call this function with disabled interrupts or
1012  * from a hardware interrupt handler or from a bottom half handler.
1013  */
1014 void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
1015                            void *info, bool wait, const struct cpumask *mask)
1016 {
1017         unsigned int scf_flags = SCF_RUN_LOCAL;
1018 
1019         if (wait)
1020                 scf_flags |= SCF_WAIT;
1021 
1022         preempt_disable();
1023         smp_call_function_many_cond(mask, func, info, scf_flags, cond_func);
1024         preempt_enable();
1025 }
1026 EXPORT_SYMBOL(on_each_cpu_cond_mask);
1027 
1028 static void do_nothing(void *unused)
1029 {
1030 }
1031 
1032 /**
1033  * kick_all_cpus_sync - Force all cpus out of idle
1034  *
1035  * Used to synchronize the update of pm_idle function pointer. It's
1036  * called after the pointer is updated and returns after the dummy
1037  * callback function has been executed on all cpus. The execution of
1038  * the function can only happen on the remote cpus after they have
1039  * left the idle function which had been called via pm_idle function
1040  * pointer. So it's guaranteed that nothing uses the previous pointer
1041  * anymore.
1042  */
1043 void kick_all_cpus_sync(void)
1044 {
1045         /* Make sure the change is visible before we kick the cpus */
1046         smp_mb();
1047         smp_call_function(do_nothing, NULL, 1);
1048 }
1049 EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
1050 
1051 /**
1052  * wake_up_all_idle_cpus - break all cpus out of idle
1053  * wake_up_all_idle_cpus try to break all cpus which is in idle state even
1054  * including idle polling cpus, for non-idle cpus, we will do nothing
1055  * for them.
1056  */
1057 void wake_up_all_idle_cpus(void)
1058 {
1059         int cpu;
1060 
1061         for_each_possible_cpu(cpu) {
1062                 preempt_disable();
1063                 if (cpu != smp_processor_id() && cpu_online(cpu))
1064                         wake_up_if_idle(cpu);
1065                 preempt_enable();
1066         }
1067 }
1068 EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
1069 
1070 /**
1071  * struct smp_call_on_cpu_struct - Call a function on a specific CPU
1072  * @work: &work_struct
1073  * @done: &completion to signal
1074  * @func: function to call
1075  * @data: function's data argument
1076  * @ret: return value from @func
1077  * @cpu: target CPU (%-1 for any CPU)
1078  *
1079  * Used to call a function on a specific cpu and wait for it to return.
1080  * Optionally make sure the call is done on a specified physical cpu via vcpu
1081  * pinning in order to support virtualized environments.
1082  */
1083 struct smp_call_on_cpu_struct {
1084         struct work_struct      work;
1085         struct completion       done;
1086         int                     (*func)(void *);
1087         void                    *data;
1088         int                     ret;
1089         int                     cpu;
1090 };
1091 
1092 static void smp_call_on_cpu_callback(struct work_struct *work)
1093 {
1094         struct smp_call_on_cpu_struct *sscs;
1095 
1096         sscs = container_of(work, struct smp_call_on_cpu_struct, work);
1097         if (sscs->cpu >= 0)
1098                 hypervisor_pin_vcpu(sscs->cpu);
1099         sscs->ret = sscs->func(sscs->data);
1100         if (sscs->cpu >= 0)
1101                 hypervisor_pin_vcpu(-1);
1102 
1103         complete(&sscs->done);
1104 }
1105 
1106 int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
1107 {
1108         struct smp_call_on_cpu_struct sscs = {
1109                 .done = COMPLETION_INITIALIZER_ONSTACK(sscs.done),
1110                 .func = func,
1111                 .data = par,
1112                 .cpu  = phys ? cpu : -1,
1113         };
1114 
1115         INIT_WORK_ONSTACK(&sscs.work, smp_call_on_cpu_callback);
1116 
1117         if (cpu >= nr_cpu_ids || !cpu_online(cpu))
1118                 return -ENXIO;
1119 
1120         queue_work_on(cpu, system_wq, &sscs.work);
1121         wait_for_completion(&sscs.done);
1122         destroy_work_on_stack(&sscs.work);
1123 
1124         return sscs.ret;
1125 }
1126 EXPORT_SYMBOL_GPL(smp_call_on_cpu);
1127 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php