~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
qspinlock.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~
  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 #include <linux/bug.h>
  3 #include <linux/compiler.h>
  4 #include <linux/export.h>
  5 #include <linux/percpu.h>
  6 #include <linux/processor.h>
  7 #include <linux/smp.h>
  8 #include <linux/topology.h>
  9 #include <linux/sched/clock.h>
 10 #include <asm/qspinlock.h>
 11 #include <asm/paravirt.h>
 12 
 13 #define MAX_NODES       4
 14 
 15 struct qnode {
 16         struct qnode    *next;
 17         struct qspinlock *lock;
 18         int             cpu;
 19         u8              sleepy; /* 1 if the previous vCPU was preempted or
 20                                  * if the previous node was sleepy */
 21         u8              locked; /* 1 if lock acquired */
 22 };
 23 
 24 struct qnodes {
 25         int             count;
 26         struct qnode nodes[MAX_NODES];
 27 };
 28 
 29 /* Tuning parameters */
 30 static int steal_spins __read_mostly = (1 << 5);
 31 static int remote_steal_spins __read_mostly = (1 << 2);
 32 #if _Q_SPIN_TRY_LOCK_STEAL == 1
 33 static const bool maybe_stealers = true;
 34 #else
 35 static bool maybe_stealers __read_mostly = true;
 36 #endif
 37 static int head_spins __read_mostly = (1 << 8);
 38 
 39 static bool pv_yield_owner __read_mostly = true;
 40 static bool pv_yield_allow_steal __read_mostly = false;
 41 static bool pv_spin_on_preempted_owner __read_mostly = false;
 42 static bool pv_sleepy_lock __read_mostly = true;
 43 static bool pv_sleepy_lock_sticky __read_mostly = false;
 44 static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
 45 static int pv_sleepy_lock_factor __read_mostly = 256;
 46 static bool pv_yield_prev __read_mostly = true;
 47 static bool pv_yield_sleepy_owner __read_mostly = true;
 48 static bool pv_prod_head __read_mostly = false;
 49 
 50 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
 51 static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
 52 
 53 #if _Q_SPIN_SPEC_BARRIER == 1
 54 #define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
 55 #else
 56 #define spec_barrier() do { } while (0)
 57 #endif
 58 
 59 static __always_inline bool recently_sleepy(void)
 60 {
 61         /* pv_sleepy_lock is true when this is called */
 62         if (pv_sleepy_lock_interval_ns) {
 63                 u64 seen = this_cpu_read(sleepy_lock_seen_clock);
 64 
 65                 if (seen) {
 66                         u64 delta = sched_clock() - seen;
 67                         if (delta < pv_sleepy_lock_interval_ns)
 68                                 return true;
 69                         this_cpu_write(sleepy_lock_seen_clock, 0);
 70                 }
 71         }
 72 
 73         return false;
 74 }
 75 
 76 static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
 77 {
 78         if (paravirt && sleepy)
 79                 return steal_spins * pv_sleepy_lock_factor;
 80         else
 81                 return steal_spins;
 82 }
 83 
 84 static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
 85 {
 86         if (paravirt && sleepy)
 87                 return remote_steal_spins * pv_sleepy_lock_factor;
 88         else
 89                 return remote_steal_spins;
 90 }
 91 
 92 static __always_inline int get_head_spins(bool paravirt, bool sleepy)
 93 {
 94         if (paravirt && sleepy)
 95                 return head_spins * pv_sleepy_lock_factor;
 96         else
 97                 return head_spins;
 98 }
 99 
100 static inline u32 encode_tail_cpu(int cpu)
101 {
102         return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
103 }
104 
105 static inline int decode_tail_cpu(u32 val)
106 {
107         return (val >> _Q_TAIL_CPU_OFFSET) - 1;
108 }
109 
110 static inline int get_owner_cpu(u32 val)
111 {
112         return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
113 }
114 
115 /*
116  * Try to acquire the lock if it was not already locked. If the tail matches
117  * mytail then clear it, otherwise leave it unchnaged. Return previous value.
118  *
119  * This is used by the head of the queue to acquire the lock and clean up
120  * its tail if it was the last one queued.
121  */
122 static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
123 {
124         u32 newval = queued_spin_encode_locked_val();
125         u32 prev, tmp;
126 
127         asm volatile(
128 "1:     lwarx   %0,0,%2,%7      # trylock_clean_tail                    \n"
129         /* This test is necessary if there could be stealers */
130 "       andi.   %1,%0,%5                                                \n"
131 "       bne     3f                                                      \n"
132         /* Test whether the lock tail == mytail */
133 "       and     %1,%0,%6                                                \n"
134 "       cmpw    0,%1,%3                                                 \n"
135         /* Merge the new locked value */
136 "       or      %1,%1,%4                                                \n"
137 "       bne     2f                                                      \n"
138         /* If the lock tail matched, then clear it, otherwise leave it. */
139 "       andc    %1,%1,%6                                                \n"
140 "2:     stwcx.  %1,0,%2                                                 \n"
141 "       bne-    1b                                                      \n"
142 "\t"    PPC_ACQUIRE_BARRIER "                                           \n"
143 "3:                                                                     \n"
144         : "=&r" (prev), "=&r" (tmp)
145         : "r" (&lock->val), "r"(tail), "r" (newval),
146           "i" (_Q_LOCKED_VAL),
147           "r" (_Q_TAIL_CPU_MASK),
148           "i" (_Q_SPIN_EH_HINT)
149         : "cr0", "memory");
150 
151         return prev;
152 }
153 
154 /*
155  * Publish our tail, replacing previous tail. Return previous value.
156  *
157  * This provides a release barrier for publishing node, this pairs with the
158  * acquire barrier in get_tail_qnode() when the next CPU finds this tail
159  * value.
160  */
161 static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
162 {
163         u32 prev, tmp;
164 
165         kcsan_release();
166 
167         asm volatile(
168 "\t"    PPC_RELEASE_BARRIER "                                           \n"
169 "1:     lwarx   %0,0,%2         # publish_tail_cpu                      \n"
170 "       andc    %1,%0,%4                                                \n"
171 "       or      %1,%1,%3                                                \n"
172 "       stwcx.  %1,0,%2                                                 \n"
173 "       bne-    1b                                                      \n"
174         : "=&r" (prev), "=&r"(tmp)
175         : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
176         : "cr0", "memory");
177 
178         return prev;
179 }
180 
181 static __always_inline u32 set_mustq(struct qspinlock *lock)
182 {
183         u32 prev;
184 
185         asm volatile(
186 "1:     lwarx   %0,0,%1         # set_mustq                             \n"
187 "       or      %0,%0,%2                                                \n"
188 "       stwcx.  %0,0,%1                                                 \n"
189 "       bne-    1b                                                      \n"
190         : "=&r" (prev)
191         : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
192         : "cr0", "memory");
193 
194         return prev;
195 }
196 
197 static __always_inline u32 clear_mustq(struct qspinlock *lock)
198 {
199         u32 prev;
200 
201         asm volatile(
202 "1:     lwarx   %0,0,%1         # clear_mustq                           \n"
203 "       andc    %0,%0,%2                                                \n"
204 "       stwcx.  %0,0,%1                                                 \n"
205 "       bne-    1b                                                      \n"
206         : "=&r" (prev)
207         : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
208         : "cr0", "memory");
209 
210         return prev;
211 }
212 
213 static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
214 {
215         u32 prev;
216         u32 new = old | _Q_SLEEPY_VAL;
217 
218         BUG_ON(!(old & _Q_LOCKED_VAL));
219         BUG_ON(old & _Q_SLEEPY_VAL);
220 
221         asm volatile(
222 "1:     lwarx   %0,0,%1         # try_set_sleepy                        \n"
223 "       cmpw    0,%0,%2                                                 \n"
224 "       bne-    2f                                                      \n"
225 "       stwcx.  %3,0,%1                                                 \n"
226 "       bne-    1b                                                      \n"
227 "2:                                                                     \n"
228         : "=&r" (prev)
229         : "r" (&lock->val), "r"(old), "r" (new)
230         : "cr0", "memory");
231 
232         return likely(prev == old);
233 }
234 
235 static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
236 {
237         if (pv_sleepy_lock) {
238                 if (pv_sleepy_lock_interval_ns)
239                         this_cpu_write(sleepy_lock_seen_clock, sched_clock());
240                 if (!(val & _Q_SLEEPY_VAL))
241                         try_set_sleepy(lock, val);
242         }
243 }
244 
245 static __always_inline void seen_sleepy_lock(void)
246 {
247         if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
248                 this_cpu_write(sleepy_lock_seen_clock, sched_clock());
249 }
250 
251 static __always_inline void seen_sleepy_node(void)
252 {
253         if (pv_sleepy_lock) {
254                 if (pv_sleepy_lock_interval_ns)
255                         this_cpu_write(sleepy_lock_seen_clock, sched_clock());
256                 /* Don't set sleepy because we likely have a stale val */
257         }
258 }
259 
260 static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu)
261 {
262         struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu);
263         int idx;
264 
265         /*
266          * After publishing the new tail and finding a previous tail in the
267          * previous val (which is the control dependency), this barrier
268          * orders the release barrier in publish_tail_cpu performed by the
269          * last CPU, with subsequently looking at its qnode structures
270          * after the barrier.
271          */
272         smp_acquire__after_ctrl_dep();
273 
274         for (idx = 0; idx < MAX_NODES; idx++) {
275                 struct qnode *qnode = &qnodesp->nodes[idx];
276                 if (qnode->lock == lock)
277                         return qnode;
278         }
279 
280         BUG();
281 }
282 
283 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
284 static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
285 {
286         int owner;
287         u32 yield_count;
288         bool preempted = false;
289 
290         BUG_ON(!(val & _Q_LOCKED_VAL));
291 
292         if (!paravirt)
293                 goto relax;
294 
295         if (!pv_yield_owner)
296                 goto relax;
297 
298         owner = get_owner_cpu(val);
299         yield_count = yield_count_of(owner);
300 
301         if ((yield_count & 1) == 0)
302                 goto relax; /* owner vcpu is running */
303 
304         spin_end();
305 
306         seen_sleepy_owner(lock, val);
307         preempted = true;
308 
309         /*
310          * Read the lock word after sampling the yield count. On the other side
311          * there may a wmb because the yield count update is done by the
312          * hypervisor preemption and the value update by the OS, however this
313          * ordering might reduce the chance of out of order accesses and
314          * improve the heuristic.
315          */
316         smp_rmb();
317 
318         if (READ_ONCE(lock->val) == val) {
319                 if (mustq)
320                         clear_mustq(lock);
321                 yield_to_preempted(owner, yield_count);
322                 if (mustq)
323                         set_mustq(lock);
324                 spin_begin();
325 
326                 /* Don't relax if we yielded. Maybe we should? */
327                 return preempted;
328         }
329         spin_begin();
330 relax:
331         spin_cpu_relax();
332 
333         return preempted;
334 }
335 
336 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
337 static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
338 {
339         return __yield_to_locked_owner(lock, val, paravirt, false);
340 }
341 
342 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
343 static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
344 {
345         bool mustq = false;
346 
347         if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
348                 mustq = true;
349 
350         return __yield_to_locked_owner(lock, val, paravirt, mustq);
351 }
352 
353 static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt)
354 {
355         struct qnode *next;
356         int owner;
357 
358         if (!paravirt)
359                 return;
360         if (!pv_yield_sleepy_owner)
361                 return;
362 
363         next = READ_ONCE(node->next);
364         if (!next)
365                 return;
366 
367         if (next->sleepy)
368                 return;
369 
370         owner = get_owner_cpu(val);
371         if (vcpu_is_preempted(owner))
372                 next->sleepy = 1;
373 }
374 
375 /* Called inside spin_begin() */
376 static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
377 {
378         u32 yield_count;
379         bool preempted = false;
380 
381         if (!paravirt)
382                 goto relax;
383 
384         if (!pv_yield_sleepy_owner)
385                 goto yield_prev;
386 
387         /*
388          * If the previous waiter was preempted it might not be able to
389          * propagate sleepy to us, so check the lock in that case too.
390          */
391         if (node->sleepy || vcpu_is_preempted(prev_cpu)) {
392                 u32 val = READ_ONCE(lock->val);
393 
394                 if (val & _Q_LOCKED_VAL) {
395                         if (node->next && !node->next->sleepy) {
396                                 /*
397                                  * Propagate sleepy to next waiter. Only if
398                                  * owner is preempted, which allows the queue
399                                  * to become "non-sleepy" if vCPU preemption
400                                  * ceases to occur, even if the lock remains
401                                  * highly contended.
402                                  */
403                                 if (vcpu_is_preempted(get_owner_cpu(val)))
404                                         node->next->sleepy = 1;
405                         }
406 
407                         preempted = yield_to_locked_owner(lock, val, paravirt);
408                         if (preempted)
409                                 return preempted;
410                 }
411                 node->sleepy = false;
412         }
413 
414 yield_prev:
415         if (!pv_yield_prev)
416                 goto relax;
417 
418         yield_count = yield_count_of(prev_cpu);
419         if ((yield_count & 1) == 0)
420                 goto relax; /* owner vcpu is running */
421 
422         spin_end();
423 
424         preempted = true;
425         seen_sleepy_node();
426 
427         smp_rmb(); /* See __yield_to_locked_owner comment */
428 
429         if (!READ_ONCE(node->locked)) {
430                 yield_to_preempted(prev_cpu, yield_count);
431                 spin_begin();
432                 return preempted;
433         }
434         spin_begin();
435 
436 relax:
437         spin_cpu_relax();
438 
439         return preempted;
440 }
441 
442 static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
443 {
444         if (iters >= get_steal_spins(paravirt, sleepy))
445                 return true;
446 
447         if (IS_ENABLED(CONFIG_NUMA) &&
448             (iters >= get_remote_steal_spins(paravirt, sleepy))) {
449                 int cpu = get_owner_cpu(val);
450                 if (numa_node_id() != cpu_to_node(cpu))
451                         return true;
452         }
453         return false;
454 }
455 
456 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
457 {
458         bool seen_preempted = false;
459         bool sleepy = false;
460         int iters = 0;
461         u32 val;
462 
463         if (!steal_spins) {
464                 /* XXX: should spin_on_preempted_owner do anything here? */
465                 return false;
466         }
467 
468         /* Attempt to steal the lock */
469         spin_begin();
470         do {
471                 bool preempted = false;
472 
473                 val = READ_ONCE(lock->val);
474                 if (val & _Q_MUST_Q_VAL)
475                         break;
476                 spec_barrier();
477 
478                 if (unlikely(!(val & _Q_LOCKED_VAL))) {
479                         spin_end();
480                         if (__queued_spin_trylock_steal(lock))
481                                 return true;
482                         spin_begin();
483                 } else {
484                         preempted = yield_to_locked_owner(lock, val, paravirt);
485                 }
486 
487                 if (paravirt && pv_sleepy_lock) {
488                         if (!sleepy) {
489                                 if (val & _Q_SLEEPY_VAL) {
490                                         seen_sleepy_lock();
491                                         sleepy = true;
492                                 } else if (recently_sleepy()) {
493                                         sleepy = true;
494                                 }
495                         }
496                         if (pv_sleepy_lock_sticky && seen_preempted &&
497                             !(val & _Q_SLEEPY_VAL)) {
498                                 if (try_set_sleepy(lock, val))
499                                         val |= _Q_SLEEPY_VAL;
500                         }
501                 }
502 
503                 if (preempted) {
504                         seen_preempted = true;
505                         sleepy = true;
506                         if (!pv_spin_on_preempted_owner)
507                                 iters++;
508                         /*
509                          * pv_spin_on_preempted_owner don't increase iters
510                          * while the owner is preempted -- we won't interfere
511                          * with it by definition. This could introduce some
512                          * latency issue if we continually observe preempted
513                          * owners, but hopefully that's a rare corner case of
514                          * a badly oversubscribed system.
515                          */
516                 } else {
517                         iters++;
518                 }
519         } while (!steal_break(val, iters, paravirt, sleepy));
520 
521         spin_end();
522 
523         return false;
524 }
525 
526 static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
527 {
528         struct qnodes *qnodesp;
529         struct qnode *next, *node;
530         u32 val, old, tail;
531         bool seen_preempted = false;
532         bool sleepy = false;
533         bool mustq = false;
534         int idx;
535         int iters = 0;
536 
537         BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
538 
539         qnodesp = this_cpu_ptr(&qnodes);
540         if (unlikely(qnodesp->count >= MAX_NODES)) {
541                 spec_barrier();
542                 while (!queued_spin_trylock(lock))
543                         cpu_relax();
544                 return;
545         }
546 
547         idx = qnodesp->count++;
548         /*
549          * Ensure that we increment the head node->count before initialising
550          * the actual node. If the compiler is kind enough to reorder these
551          * stores, then an IRQ could overwrite our assignments.
552          */
553         barrier();
554         node = &qnodesp->nodes[idx];
555         node->next = NULL;
556         node->lock = lock;
557         node->cpu = smp_processor_id();
558         node->sleepy = 0;
559         node->locked = 0;
560 
561         tail = encode_tail_cpu(node->cpu);
562 
563         /*
564          * Assign all attributes of a node before it can be published.
565          * Issues an lwsync, serving as a release barrier, as well as a
566          * compiler barrier.
567          */
568         old = publish_tail_cpu(lock, tail);
569 
570         /*
571          * If there was a previous node; link it and wait until reaching the
572          * head of the waitqueue.
573          */
574         if (old & _Q_TAIL_CPU_MASK) {
575                 int prev_cpu = decode_tail_cpu(old);
576                 struct qnode *prev = get_tail_qnode(lock, prev_cpu);
577 
578                 /* Link @node into the waitqueue. */
579                 WRITE_ONCE(prev->next, node);
580 
581                 /* Wait for mcs node lock to be released */
582                 spin_begin();
583                 while (!READ_ONCE(node->locked)) {
584                         spec_barrier();
585 
586                         if (yield_to_prev(lock, node, prev_cpu, paravirt))
587                                 seen_preempted = true;
588                 }
589                 spec_barrier();
590                 spin_end();
591 
592                 smp_rmb(); /* acquire barrier for the mcs lock */
593 
594                 /*
595                  * Generic qspinlocks have this prefetch here, but it seems
596                  * like it could cause additional line transitions because
597                  * the waiter will keep loading from it.
598                  */
599                 if (_Q_SPIN_PREFETCH_NEXT) {
600                         next = READ_ONCE(node->next);
601                         if (next)
602                                 prefetchw(next);
603                 }
604         }
605 
606         /* We're at the head of the waitqueue, wait for the lock. */
607 again:
608         spin_begin();
609         for (;;) {
610                 bool preempted;
611 
612                 val = READ_ONCE(lock->val);
613                 if (!(val & _Q_LOCKED_VAL))
614                         break;
615                 spec_barrier();
616 
617                 if (paravirt && pv_sleepy_lock && maybe_stealers) {
618                         if (!sleepy) {
619                                 if (val & _Q_SLEEPY_VAL) {
620                                         seen_sleepy_lock();
621                                         sleepy = true;
622                                 } else if (recently_sleepy()) {
623                                         sleepy = true;
624                                 }
625                         }
626                         if (pv_sleepy_lock_sticky && seen_preempted &&
627                             !(val & _Q_SLEEPY_VAL)) {
628                                 if (try_set_sleepy(lock, val))
629                                         val |= _Q_SLEEPY_VAL;
630                         }
631                 }
632 
633                 propagate_sleepy(node, val, paravirt);
634                 preempted = yield_head_to_locked_owner(lock, val, paravirt);
635                 if (!maybe_stealers)
636                         continue;
637 
638                 if (preempted)
639                         seen_preempted = true;
640 
641                 if (paravirt && preempted) {
642                         sleepy = true;
643 
644                         if (!pv_spin_on_preempted_owner)
645                                 iters++;
646                 } else {
647                         iters++;
648                 }
649 
650                 if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
651                         mustq = true;
652                         set_mustq(lock);
653                         val |= _Q_MUST_Q_VAL;
654                 }
655         }
656         spec_barrier();
657         spin_end();
658 
659         /* If we're the last queued, must clean up the tail. */
660         old = trylock_clean_tail(lock, tail);
661         if (unlikely(old & _Q_LOCKED_VAL)) {
662                 BUG_ON(!maybe_stealers);
663                 goto again; /* Can only be true if maybe_stealers. */
664         }
665 
666         if ((old & _Q_TAIL_CPU_MASK) == tail)
667                 goto release; /* We were the tail, no next. */
668 
669         /* There is a next, must wait for node->next != NULL (MCS protocol) */
670         next = READ_ONCE(node->next);
671         if (!next) {
672                 spin_begin();
673                 while (!(next = READ_ONCE(node->next)))
674                         cpu_relax();
675                 spin_end();
676         }
677         spec_barrier();
678 
679         /*
680          * Unlock the next mcs waiter node. Release barrier is not required
681          * here because the acquirer is only accessing the lock word, and
682          * the acquire barrier we took the lock with orders that update vs
683          * this store to locked. The corresponding barrier is the smp_rmb()
684          * acquire barrier for mcs lock, above.
685          */
686         if (paravirt && pv_prod_head) {
687                 int next_cpu = next->cpu;
688                 WRITE_ONCE(next->locked, 1);
689                 if (_Q_SPIN_MISO)
690                         asm volatile("miso" ::: "memory");
691                 if (vcpu_is_preempted(next_cpu))
692                         prod_cpu(next_cpu);
693         } else {
694                 WRITE_ONCE(next->locked, 1);
695                 if (_Q_SPIN_MISO)
696                         asm volatile("miso" ::: "memory");
697         }
698 
699 release:
700         /*
701          * Clear the lock before releasing the node, as another CPU might see stale
702          * values if an interrupt occurs after we increment qnodesp->count
703          * but before node->lock is initialized. The barrier ensures that
704          * there are no further stores to the node after it has been released.
705          */
706         node->lock = NULL;
707         barrier();
708         qnodesp->count--;
709 }
710 
711 void queued_spin_lock_slowpath(struct qspinlock *lock)
712 {
713         /*
714          * This looks funny, but it induces the compiler to inline both
715          * sides of the branch rather than share code as when the condition
716          * is passed as the paravirt argument to the functions.
717          */
718         if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
719                 if (try_to_steal_lock(lock, true)) {
720                         spec_barrier();
721                         return;
722                 }
723                 queued_spin_lock_mcs_queue(lock, true);
724         } else {
725                 if (try_to_steal_lock(lock, false)) {
726                         spec_barrier();
727                         return;
728                 }
729                 queued_spin_lock_mcs_queue(lock, false);
730         }
731 }
732 EXPORT_SYMBOL(queued_spin_lock_slowpath);
733 
734 #ifdef CONFIG_PARAVIRT_SPINLOCKS
735 void pv_spinlocks_init(void)
736 {
737 }
738 #endif
739 
740 #include <linux/debugfs.h>
741 static int steal_spins_set(void *data, u64 val)
742 {
743 #if _Q_SPIN_TRY_LOCK_STEAL == 1
744         /* MAYBE_STEAL remains true */
745         steal_spins = val;
746 #else
747         static DEFINE_MUTEX(lock);
748 
749         /*
750          * The lock slow path has a !maybe_stealers case that can assume
751          * the head of queue will not see concurrent waiters. That waiter
752          * is unsafe in the presence of stealers, so must keep them away
753          * from one another.
754          */
755 
756         mutex_lock(&lock);
757         if (val && !steal_spins) {
758                 maybe_stealers = true;
759                 /* wait for queue head waiter to go away */
760                 synchronize_rcu();
761                 steal_spins = val;
762         } else if (!val && steal_spins) {
763                 steal_spins = val;
764                 /* wait for all possible stealers to go away */
765                 synchronize_rcu();
766                 maybe_stealers = false;
767         } else {
768                 steal_spins = val;
769         }
770         mutex_unlock(&lock);
771 #endif
772 
773         return 0;
774 }
775 
776 static int steal_spins_get(void *data, u64 *val)
777 {
778         *val = steal_spins;
779 
780         return 0;
781 }
782 
783 DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
784 
785 static int remote_steal_spins_set(void *data, u64 val)
786 {
787         remote_steal_spins = val;
788 
789         return 0;
790 }
791 
792 static int remote_steal_spins_get(void *data, u64 *val)
793 {
794         *val = remote_steal_spins;
795 
796         return 0;
797 }
798 
799 DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
800 
801 static int head_spins_set(void *data, u64 val)
802 {
803         head_spins = val;
804 
805         return 0;
806 }
807 
808 static int head_spins_get(void *data, u64 *val)
809 {
810         *val = head_spins;
811 
812         return 0;
813 }
814 
815 DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
816 
817 static int pv_yield_owner_set(void *data, u64 val)
818 {
819         pv_yield_owner = !!val;
820 
821         return 0;
822 }
823 
824 static int pv_yield_owner_get(void *data, u64 *val)
825 {
826         *val = pv_yield_owner;
827 
828         return 0;
829 }
830 
831 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
832 
833 static int pv_yield_allow_steal_set(void *data, u64 val)
834 {
835         pv_yield_allow_steal = !!val;
836 
837         return 0;
838 }
839 
840 static int pv_yield_allow_steal_get(void *data, u64 *val)
841 {
842         *val = pv_yield_allow_steal;
843 
844         return 0;
845 }
846 
847 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
848 
849 static int pv_spin_on_preempted_owner_set(void *data, u64 val)
850 {
851         pv_spin_on_preempted_owner = !!val;
852 
853         return 0;
854 }
855 
856 static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
857 {
858         *val = pv_spin_on_preempted_owner;
859 
860         return 0;
861 }
862 
863 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
864 
865 static int pv_sleepy_lock_set(void *data, u64 val)
866 {
867         pv_sleepy_lock = !!val;
868 
869         return 0;
870 }
871 
872 static int pv_sleepy_lock_get(void *data, u64 *val)
873 {
874         *val = pv_sleepy_lock;
875 
876         return 0;
877 }
878 
879 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
880 
881 static int pv_sleepy_lock_sticky_set(void *data, u64 val)
882 {
883         pv_sleepy_lock_sticky = !!val;
884 
885         return 0;
886 }
887 
888 static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
889 {
890         *val = pv_sleepy_lock_sticky;
891 
892         return 0;
893 }
894 
895 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
896 
897 static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
898 {
899         pv_sleepy_lock_interval_ns = val;
900 
901         return 0;
902 }
903 
904 static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
905 {
906         *val = pv_sleepy_lock_interval_ns;
907 
908         return 0;
909 }
910 
911 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
912 
913 static int pv_sleepy_lock_factor_set(void *data, u64 val)
914 {
915         pv_sleepy_lock_factor = val;
916 
917         return 0;
918 }
919 
920 static int pv_sleepy_lock_factor_get(void *data, u64 *val)
921 {
922         *val = pv_sleepy_lock_factor;
923 
924         return 0;
925 }
926 
927 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
928 
929 static int pv_yield_prev_set(void *data, u64 val)
930 {
931         pv_yield_prev = !!val;
932 
933         return 0;
934 }
935 
936 static int pv_yield_prev_get(void *data, u64 *val)
937 {
938         *val = pv_yield_prev;
939 
940         return 0;
941 }
942 
943 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
944 
945 static int pv_yield_sleepy_owner_set(void *data, u64 val)
946 {
947         pv_yield_sleepy_owner = !!val;
948 
949         return 0;
950 }
951 
952 static int pv_yield_sleepy_owner_get(void *data, u64 *val)
953 {
954         *val = pv_yield_sleepy_owner;
955 
956         return 0;
957 }
958 
959 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n");
960 
961 static int pv_prod_head_set(void *data, u64 val)
962 {
963         pv_prod_head = !!val;
964 
965         return 0;
966 }
967 
968 static int pv_prod_head_get(void *data, u64 *val)
969 {
970         *val = pv_prod_head;
971 
972         return 0;
973 }
974 
975 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
976 
977 static __init int spinlock_debugfs_init(void)
978 {
979         debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
980         debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
981         debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
982         if (is_shared_processor()) {
983                 debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
984                 debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
985                 debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
986                 debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
987                 debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
988                 debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
989                 debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
990                 debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
991                 debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner);
992                 debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
993         }
994 
995         return 0;
996 }
997 device_initcall(spinlock_debugfs_init);
998
~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.
TOMOYO Linux Cross Reference Linux/arch/powerpc/lib/qspinlock.c

TOMOYO Linux Cross Reference
Linux/arch/powerpc/lib/qspinlock.c