1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __LINUX_SEQLOCK_H 3 #define __LINUX_SEQLOCK_H 4 5 /* 6 * seqcount_t / seqlock_t - a reader-writer consistency mechanism with 7 * lockless readers (read-only retry loops), and no writer starvation. 8 * 9 * See Documentation/locking/seqlock.rst 10 * 11 * Copyrights: 12 * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli 13 * - Sequence counters with associated locks, (C) 2020 Linutronix GmbH 14 */ 15 16 #include <linux/compiler.h> 17 #include <linux/kcsan-checks.h> 18 #include <linux/lockdep.h> 19 #include <linux/mutex.h> 20 #include <linux/preempt.h> 21 #include <linux/seqlock_types.h> 22 #include <linux/spinlock.h> 23 24 #include <asm/processor.h> 25 26 /* 27 * The seqlock seqcount_t interface does not prescribe a precise sequence of 28 * read begin/retry/end. For readers, typically there is a call to 29 * read_seqcount_begin() and read_seqcount_retry(), however, there are more 30 * esoteric cases which do not follow this pattern. 31 * 32 * As a consequence, we take the following best-effort approach for raw usage 33 * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, 34 * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as 35 * atomics; if there is a matching read_seqcount_retry() call, no following 36 * memory operations are considered atomic. Usage of the seqlock_t interface 37 * is not affected. 38 */ 39 #define KCSAN_SEQLOCK_REGION_MAX 1000 40 41 static inline void __seqcount_init(seqcount_t *s, const char *name, 42 struct lock_class_key *key) 43 { 44 /* 45 * Make sure we are not reinitializing a held lock: 46 */ 47 lockdep_init_map(&s->dep_map, name, key, 0); 48 s->sequence = 0; 49 } 50 51 #ifdef CONFIG_DEBUG_LOCK_ALLOC 52 53 # define SEQCOUNT_DEP_MAP_INIT(lockname) \ 54 .dep_map = { .name = #lockname } 55 56 /** 57 * seqcount_init() - runtime initializer for seqcount_t 58 * @s: Pointer to the seqcount_t instance 59 */ 60 # define seqcount_init(s) \ 61 do { \ 62 static struct lock_class_key __key; \ 63 __seqcount_init((s), #s, &__key); \ 64 } while (0) 65 66 static inline void seqcount_lockdep_reader_access(const seqcount_t *s) 67 { 68 seqcount_t *l = (seqcount_t *)s; 69 unsigned long flags; 70 71 local_irq_save(flags); 72 seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); 73 seqcount_release(&l->dep_map, _RET_IP_); 74 local_irq_restore(flags); 75 } 76 77 #else 78 # define SEQCOUNT_DEP_MAP_INIT(lockname) 79 # define seqcount_init(s) __seqcount_init(s, NULL, NULL) 80 # define seqcount_lockdep_reader_access(x) 81 #endif 82 83 /** 84 * SEQCNT_ZERO() - static initializer for seqcount_t 85 * @name: Name of the seqcount_t instance 86 */ 87 #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) } 88 89 /* 90 * Sequence counters with associated locks (seqcount_LOCKNAME_t) 91 * 92 * A sequence counter which associates the lock used for writer 93 * serialization at initialization time. This enables lockdep to validate 94 * that the write side critical section is properly serialized. 95 * 96 * For associated locks which do not implicitly disable preemption, 97 * preemption protection is enforced in the write side function. 98 * 99 * Lockdep is never used in any for the raw write variants. 100 * 101 * See Documentation/locking/seqlock.rst 102 */ 103 104 /* 105 * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated 106 * @seqcount: The real sequence counter 107 * @lock: Pointer to the associated lock 108 * 109 * A plain sequence counter with external writer synchronization by 110 * LOCKNAME @lock. The lock is associated to the sequence counter in the 111 * static initializer or init function. This enables lockdep to validate 112 * that the write side critical section is properly serialized. 113 * 114 * LOCKNAME: raw_spinlock, spinlock, rwlock or mutex 115 */ 116 117 /* 118 * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t 119 * @s: Pointer to the seqcount_LOCKNAME_t instance 120 * @lock: Pointer to the associated lock 121 */ 122 123 #define seqcount_LOCKNAME_init(s, _lock, lockname) \ 124 do { \ 125 seqcount_##lockname##_t *____s = (s); \ 126 seqcount_init(&____s->seqcount); \ 127 __SEQ_LOCK(____s->lock = (_lock)); \ 128 } while (0) 129 130 #define seqcount_raw_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, raw_spinlock) 131 #define seqcount_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, spinlock) 132 #define seqcount_rwlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, rwlock) 133 #define seqcount_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, mutex) 134 135 /* 136 * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers 137 * seqprop_LOCKNAME_*() - Property accessors for seqcount_LOCKNAME_t 138 * 139 * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t 140 * @locktype: LOCKNAME canonical C data type 141 * @preemptible: preemptibility of above locktype 142 * @lockbase: prefix for associated lock/unlock 143 */ 144 #define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \ 145 static __always_inline seqcount_t * \ 146 __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ 147 { \ 148 return &s->seqcount; \ 149 } \ 150 \ 151 static __always_inline const seqcount_t * \ 152 __seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s) \ 153 { \ 154 return &s->seqcount; \ 155 } \ 156 \ 157 static __always_inline unsigned \ 158 __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ 159 { \ 160 unsigned seq = READ_ONCE(s->seqcount.sequence); \ 161 \ 162 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ 163 return seq; \ 164 \ 165 if (preemptible && unlikely(seq & 1)) { \ 166 __SEQ_LOCK(lockbase##_lock(s->lock)); \ 167 __SEQ_LOCK(lockbase##_unlock(s->lock)); \ 168 \ 169 /* \ 170 * Re-read the sequence counter since the (possibly \ 171 * preempted) writer made progress. \ 172 */ \ 173 seq = READ_ONCE(s->seqcount.sequence); \ 174 } \ 175 \ 176 return seq; \ 177 } \ 178 \ 179 static __always_inline bool \ 180 __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ 181 { \ 182 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ 183 return preemptible; \ 184 \ 185 /* PREEMPT_RT relies on the above LOCK+UNLOCK */ \ 186 return false; \ 187 } \ 188 \ 189 static __always_inline void \ 190 __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ 191 { \ 192 __SEQ_LOCK(lockdep_assert_held(s->lock)); \ 193 } 194 195 /* 196 * __seqprop() for seqcount_t 197 */ 198 199 static inline seqcount_t *__seqprop_ptr(seqcount_t *s) 200 { 201 return s; 202 } 203 204 static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s) 205 { 206 return s; 207 } 208 209 static inline unsigned __seqprop_sequence(const seqcount_t *s) 210 { 211 return READ_ONCE(s->sequence); 212 } 213 214 static inline bool __seqprop_preemptible(const seqcount_t *s) 215 { 216 return false; 217 } 218 219 static inline void __seqprop_assert(const seqcount_t *s) 220 { 221 lockdep_assert_preemption_disabled(); 222 } 223 224 #define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT) 225 226 SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin) 227 SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin) 228 SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read) 229 SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) 230 #undef SEQCOUNT_LOCKNAME 231 232 /* 233 * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t 234 * @name: Name of the seqcount_LOCKNAME_t instance 235 * @lock: Pointer to the associated LOCKNAME 236 */ 237 238 #define SEQCOUNT_LOCKNAME_ZERO(seq_name, assoc_lock) { \ 239 .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ 240 __SEQ_LOCK(.lock = (assoc_lock)) \ 241 } 242 243 #define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) 244 #define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) 245 #define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) 246 #define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) 247 #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) 248 249 #define __seqprop_case(s, lockname, prop) \ 250 seqcount_##lockname##_t: __seqprop_##lockname##_##prop 251 252 #define __seqprop(s, prop) _Generic(*(s), \ 253 seqcount_t: __seqprop_##prop, \ 254 __seqprop_case((s), raw_spinlock, prop), \ 255 __seqprop_case((s), spinlock, prop), \ 256 __seqprop_case((s), rwlock, prop), \ 257 __seqprop_case((s), mutex, prop)) 258 259 #define seqprop_ptr(s) __seqprop(s, ptr)(s) 260 #define seqprop_const_ptr(s) __seqprop(s, const_ptr)(s) 261 #define seqprop_sequence(s) __seqprop(s, sequence)(s) 262 #define seqprop_preemptible(s) __seqprop(s, preemptible)(s) 263 #define seqprop_assert(s) __seqprop(s, assert)(s) 264 265 /** 266 * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier 267 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 268 * 269 * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() 270 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 271 * provided before actually loading any of the variables that are to be 272 * protected in this critical section. 273 * 274 * Use carefully, only in critical code, and comment how the barrier is 275 * provided. 276 * 277 * Return: count to be passed to read_seqcount_retry() 278 */ 279 #define __read_seqcount_begin(s) \ 280 ({ \ 281 unsigned __seq; \ 282 \ 283 while ((__seq = seqprop_sequence(s)) & 1) \ 284 cpu_relax(); \ 285 \ 286 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ 287 __seq; \ 288 }) 289 290 /** 291 * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep 292 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 293 * 294 * Return: count to be passed to read_seqcount_retry() 295 */ 296 #define raw_read_seqcount_begin(s) \ 297 ({ \ 298 unsigned _seq = __read_seqcount_begin(s); \ 299 \ 300 smp_rmb(); \ 301 _seq; \ 302 }) 303 304 /** 305 * read_seqcount_begin() - begin a seqcount_t read critical section 306 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 307 * 308 * Return: count to be passed to read_seqcount_retry() 309 */ 310 #define read_seqcount_begin(s) \ 311 ({ \ 312 seqcount_lockdep_reader_access(seqprop_const_ptr(s)); \ 313 raw_read_seqcount_begin(s); \ 314 }) 315 316 /** 317 * raw_read_seqcount() - read the raw seqcount_t counter value 318 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 319 * 320 * raw_read_seqcount opens a read critical section of the given 321 * seqcount_t, without any lockdep checking, and without checking or 322 * masking the sequence counter LSB. Calling code is responsible for 323 * handling that. 324 * 325 * Return: count to be passed to read_seqcount_retry() 326 */ 327 #define raw_read_seqcount(s) \ 328 ({ \ 329 unsigned __seq = seqprop_sequence(s); \ 330 \ 331 smp_rmb(); \ 332 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ 333 __seq; \ 334 }) 335 336 /** 337 * raw_seqcount_begin() - begin a seqcount_t read critical section w/o 338 * lockdep and w/o counter stabilization 339 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 340 * 341 * raw_seqcount_begin opens a read critical section of the given 342 * seqcount_t. Unlike read_seqcount_begin(), this function will not wait 343 * for the count to stabilize. If a writer is active when it begins, it 344 * will fail the read_seqcount_retry() at the end of the read critical 345 * section instead of stabilizing at the beginning of it. 346 * 347 * Use this only in special kernel hot paths where the read section is 348 * small and has a high probability of success through other external 349 * means. It will save a single branching instruction. 350 * 351 * Return: count to be passed to read_seqcount_retry() 352 */ 353 #define raw_seqcount_begin(s) \ 354 ({ \ 355 /* \ 356 * If the counter is odd, let read_seqcount_retry() fail \ 357 * by decrementing the counter. \ 358 */ \ 359 raw_read_seqcount(s) & ~1; \ 360 }) 361 362 /** 363 * __read_seqcount_retry() - end a seqcount_t read section w/o barrier 364 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 365 * @start: count, from read_seqcount_begin() 366 * 367 * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() 368 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 369 * provided before actually loading any of the variables that are to be 370 * protected in this critical section. 371 * 372 * Use carefully, only in critical code, and comment how the barrier is 373 * provided. 374 * 375 * Return: true if a read section retry is required, else false 376 */ 377 #define __read_seqcount_retry(s, start) \ 378 do___read_seqcount_retry(seqprop_const_ptr(s), start) 379 380 static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) 381 { 382 kcsan_atomic_next(0); 383 return unlikely(READ_ONCE(s->sequence) != start); 384 } 385 386 /** 387 * read_seqcount_retry() - end a seqcount_t read critical section 388 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 389 * @start: count, from read_seqcount_begin() 390 * 391 * read_seqcount_retry closes the read critical section of given 392 * seqcount_t. If the critical section was invalid, it must be ignored 393 * (and typically retried). 394 * 395 * Return: true if a read section retry is required, else false 396 */ 397 #define read_seqcount_retry(s, start) \ 398 do_read_seqcount_retry(seqprop_const_ptr(s), start) 399 400 static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) 401 { 402 smp_rmb(); 403 return do___read_seqcount_retry(s, start); 404 } 405 406 /** 407 * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep 408 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 409 * 410 * Context: check write_seqcount_begin() 411 */ 412 #define raw_write_seqcount_begin(s) \ 413 do { \ 414 if (seqprop_preemptible(s)) \ 415 preempt_disable(); \ 416 \ 417 do_raw_write_seqcount_begin(seqprop_ptr(s)); \ 418 } while (0) 419 420 static inline void do_raw_write_seqcount_begin(seqcount_t *s) 421 { 422 kcsan_nestable_atomic_begin(); 423 s->sequence++; 424 smp_wmb(); 425 } 426 427 /** 428 * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep 429 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 430 * 431 * Context: check write_seqcount_end() 432 */ 433 #define raw_write_seqcount_end(s) \ 434 do { \ 435 do_raw_write_seqcount_end(seqprop_ptr(s)); \ 436 \ 437 if (seqprop_preemptible(s)) \ 438 preempt_enable(); \ 439 } while (0) 440 441 static inline void do_raw_write_seqcount_end(seqcount_t *s) 442 { 443 smp_wmb(); 444 s->sequence++; 445 kcsan_nestable_atomic_end(); 446 } 447 448 /** 449 * write_seqcount_begin_nested() - start a seqcount_t write section with 450 * custom lockdep nesting level 451 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 452 * @subclass: lockdep nesting level 453 * 454 * See Documentation/locking/lockdep-design.rst 455 * Context: check write_seqcount_begin() 456 */ 457 #define write_seqcount_begin_nested(s, subclass) \ 458 do { \ 459 seqprop_assert(s); \ 460 \ 461 if (seqprop_preemptible(s)) \ 462 preempt_disable(); \ 463 \ 464 do_write_seqcount_begin_nested(seqprop_ptr(s), subclass); \ 465 } while (0) 466 467 static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass) 468 { 469 seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); 470 do_raw_write_seqcount_begin(s); 471 } 472 473 /** 474 * write_seqcount_begin() - start a seqcount_t write side critical section 475 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 476 * 477 * Context: sequence counter write side sections must be serialized and 478 * non-preemptible. Preemption will be automatically disabled if and 479 * only if the seqcount write serialization lock is associated, and 480 * preemptible. If readers can be invoked from hardirq or softirq 481 * context, interrupts or bottom halves must be respectively disabled. 482 */ 483 #define write_seqcount_begin(s) \ 484 do { \ 485 seqprop_assert(s); \ 486 \ 487 if (seqprop_preemptible(s)) \ 488 preempt_disable(); \ 489 \ 490 do_write_seqcount_begin(seqprop_ptr(s)); \ 491 } while (0) 492 493 static inline void do_write_seqcount_begin(seqcount_t *s) 494 { 495 do_write_seqcount_begin_nested(s, 0); 496 } 497 498 /** 499 * write_seqcount_end() - end a seqcount_t write side critical section 500 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 501 * 502 * Context: Preemption will be automatically re-enabled if and only if 503 * the seqcount write serialization lock is associated, and preemptible. 504 */ 505 #define write_seqcount_end(s) \ 506 do { \ 507 do_write_seqcount_end(seqprop_ptr(s)); \ 508 \ 509 if (seqprop_preemptible(s)) \ 510 preempt_enable(); \ 511 } while (0) 512 513 static inline void do_write_seqcount_end(seqcount_t *s) 514 { 515 seqcount_release(&s->dep_map, _RET_IP_); 516 do_raw_write_seqcount_end(s); 517 } 518 519 /** 520 * raw_write_seqcount_barrier() - do a seqcount_t write barrier 521 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 522 * 523 * This can be used to provide an ordering guarantee instead of the usual 524 * consistency guarantee. It is one wmb cheaper, because it can collapse 525 * the two back-to-back wmb()s. 526 * 527 * Note that writes surrounding the barrier should be declared atomic (e.g. 528 * via WRITE_ONCE): a) to ensure the writes become visible to other threads 529 * atomically, avoiding compiler optimizations; b) to document which writes are 530 * meant to propagate to the reader critical section. This is necessary because 531 * neither writes before nor after the barrier are enclosed in a seq-writer 532 * critical section that would ensure readers are aware of ongoing writes:: 533 * 534 * seqcount_t seq; 535 * bool X = true, Y = false; 536 * 537 * void read(void) 538 * { 539 * bool x, y; 540 * 541 * do { 542 * int s = read_seqcount_begin(&seq); 543 * 544 * x = X; y = Y; 545 * 546 * } while (read_seqcount_retry(&seq, s)); 547 * 548 * BUG_ON(!x && !y); 549 * } 550 * 551 * void write(void) 552 * { 553 * WRITE_ONCE(Y, true); 554 * 555 * raw_write_seqcount_barrier(seq); 556 * 557 * WRITE_ONCE(X, false); 558 * } 559 */ 560 #define raw_write_seqcount_barrier(s) \ 561 do_raw_write_seqcount_barrier(seqprop_ptr(s)) 562 563 static inline void do_raw_write_seqcount_barrier(seqcount_t *s) 564 { 565 kcsan_nestable_atomic_begin(); 566 s->sequence++; 567 smp_wmb(); 568 s->sequence++; 569 kcsan_nestable_atomic_end(); 570 } 571 572 /** 573 * write_seqcount_invalidate() - invalidate in-progress seqcount_t read 574 * side operations 575 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 576 * 577 * After write_seqcount_invalidate, no seqcount_t read side operations 578 * will complete successfully and see data older than this. 579 */ 580 #define write_seqcount_invalidate(s) \ 581 do_write_seqcount_invalidate(seqprop_ptr(s)) 582 583 static inline void do_write_seqcount_invalidate(seqcount_t *s) 584 { 585 smp_wmb(); 586 kcsan_nestable_atomic_begin(); 587 s->sequence+=2; 588 kcsan_nestable_atomic_end(); 589 } 590 591 /* 592 * Latch sequence counters (seqcount_latch_t) 593 * 594 * A sequence counter variant where the counter even/odd value is used to 595 * switch between two copies of protected data. This allows the read path, 596 * typically NMIs, to safely interrupt the write side critical section. 597 * 598 * As the write sections are fully preemptible, no special handling for 599 * PREEMPT_RT is needed. 600 */ 601 typedef struct { 602 seqcount_t seqcount; 603 } seqcount_latch_t; 604 605 /** 606 * SEQCNT_LATCH_ZERO() - static initializer for seqcount_latch_t 607 * @seq_name: Name of the seqcount_latch_t instance 608 */ 609 #define SEQCNT_LATCH_ZERO(seq_name) { \ 610 .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ 611 } 612 613 /** 614 * seqcount_latch_init() - runtime initializer for seqcount_latch_t 615 * @s: Pointer to the seqcount_latch_t instance 616 */ 617 #define seqcount_latch_init(s) seqcount_init(&(s)->seqcount) 618 619 /** 620 * raw_read_seqcount_latch() - pick even/odd latch data copy 621 * @s: Pointer to seqcount_latch_t 622 * 623 * See raw_write_seqcount_latch() for details and a full reader/writer 624 * usage example. 625 * 626 * Return: sequence counter raw value. Use the lowest bit as an index for 627 * picking which data copy to read. The full counter must then be checked 628 * with raw_read_seqcount_latch_retry(). 629 */ 630 static __always_inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s) 631 { 632 /* 633 * Pairs with the first smp_wmb() in raw_write_seqcount_latch(). 634 * Due to the dependent load, a full smp_rmb() is not needed. 635 */ 636 return READ_ONCE(s->seqcount.sequence); 637 } 638 639 /** 640 * raw_read_seqcount_latch_retry() - end a seqcount_latch_t read section 641 * @s: Pointer to seqcount_latch_t 642 * @start: count, from raw_read_seqcount_latch() 643 * 644 * Return: true if a read section retry is required, else false 645 */ 646 static __always_inline int 647 raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) 648 { 649 smp_rmb(); 650 return unlikely(READ_ONCE(s->seqcount.sequence) != start); 651 } 652 653 /** 654 * raw_write_seqcount_latch() - redirect latch readers to even/odd copy 655 * @s: Pointer to seqcount_latch_t 656 * 657 * The latch technique is a multiversion concurrency control method that allows 658 * queries during non-atomic modifications. If you can guarantee queries never 659 * interrupt the modification -- e.g. the concurrency is strictly between CPUs 660 * -- you most likely do not need this. 661 * 662 * Where the traditional RCU/lockless data structures rely on atomic 663 * modifications to ensure queries observe either the old or the new state the 664 * latch allows the same for non-atomic updates. The trade-off is doubling the 665 * cost of storage; we have to maintain two copies of the entire data 666 * structure. 667 * 668 * Very simply put: we first modify one copy and then the other. This ensures 669 * there is always one copy in a stable state, ready to give us an answer. 670 * 671 * The basic form is a data structure like:: 672 * 673 * struct latch_struct { 674 * seqcount_latch_t seq; 675 * struct data_struct data[2]; 676 * }; 677 * 678 * Where a modification, which is assumed to be externally serialized, does the 679 * following:: 680 * 681 * void latch_modify(struct latch_struct *latch, ...) 682 * { 683 * smp_wmb(); // Ensure that the last data[1] update is visible 684 * latch->seq.sequence++; 685 * smp_wmb(); // Ensure that the seqcount update is visible 686 * 687 * modify(latch->data[0], ...); 688 * 689 * smp_wmb(); // Ensure that the data[0] update is visible 690 * latch->seq.sequence++; 691 * smp_wmb(); // Ensure that the seqcount update is visible 692 * 693 * modify(latch->data[1], ...); 694 * } 695 * 696 * The query will have a form like:: 697 * 698 * struct entry *latch_query(struct latch_struct *latch, ...) 699 * { 700 * struct entry *entry; 701 * unsigned seq, idx; 702 * 703 * do { 704 * seq = raw_read_seqcount_latch(&latch->seq); 705 * 706 * idx = seq & 0x01; 707 * entry = data_query(latch->data[idx], ...); 708 * 709 * // This includes needed smp_rmb() 710 * } while (raw_read_seqcount_latch_retry(&latch->seq, seq)); 711 * 712 * return entry; 713 * } 714 * 715 * So during the modification, queries are first redirected to data[1]. Then we 716 * modify data[0]. When that is complete, we redirect queries back to data[0] 717 * and we can modify data[1]. 718 * 719 * NOTE: 720 * 721 * The non-requirement for atomic modifications does _NOT_ include 722 * the publishing of new entries in the case where data is a dynamic 723 * data structure. 724 * 725 * An iteration might start in data[0] and get suspended long enough 726 * to miss an entire modification sequence, once it resumes it might 727 * observe the new entry. 728 * 729 * NOTE2: 730 * 731 * When data is a dynamic data structure; one should use regular RCU 732 * patterns to manage the lifetimes of the objects within. 733 */ 734 static inline void raw_write_seqcount_latch(seqcount_latch_t *s) 735 { 736 smp_wmb(); /* prior stores before incrementing "sequence" */ 737 s->seqcount.sequence++; 738 smp_wmb(); /* increment "sequence" before following stores */ 739 } 740 741 #define __SEQLOCK_UNLOCKED(lockname) \ 742 { \ 743 .seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \ 744 .lock = __SPIN_LOCK_UNLOCKED(lockname) \ 745 } 746 747 /** 748 * seqlock_init() - dynamic initializer for seqlock_t 749 * @sl: Pointer to the seqlock_t instance 750 */ 751 #define seqlock_init(sl) \ 752 do { \ 753 spin_lock_init(&(sl)->lock); \ 754 seqcount_spinlock_init(&(sl)->seqcount, &(sl)->lock); \ 755 } while (0) 756 757 /** 758 * DEFINE_SEQLOCK(sl) - Define a statically allocated seqlock_t 759 * @sl: Name of the seqlock_t instance 760 */ 761 #define DEFINE_SEQLOCK(sl) \ 762 seqlock_t sl = __SEQLOCK_UNLOCKED(sl) 763 764 /** 765 * read_seqbegin() - start a seqlock_t read side critical section 766 * @sl: Pointer to seqlock_t 767 * 768 * Return: count, to be passed to read_seqretry() 769 */ 770 static inline unsigned read_seqbegin(const seqlock_t *sl) 771 { 772 unsigned ret = read_seqcount_begin(&sl->seqcount); 773 774 kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry() */ 775 kcsan_flat_atomic_begin(); 776 return ret; 777 } 778 779 /** 780 * read_seqretry() - end a seqlock_t read side section 781 * @sl: Pointer to seqlock_t 782 * @start: count, from read_seqbegin() 783 * 784 * read_seqretry closes the read side critical section of given seqlock_t. 785 * If the critical section was invalid, it must be ignored (and typically 786 * retried). 787 * 788 * Return: true if a read section retry is required, else false 789 */ 790 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) 791 { 792 /* 793 * Assume not nested: read_seqretry() may be called multiple times when 794 * completing read critical section. 795 */ 796 kcsan_flat_atomic_end(); 797 798 return read_seqcount_retry(&sl->seqcount, start); 799 } 800 801 /* 802 * For all seqlock_t write side functions, use the internal 803 * do_write_seqcount_begin() instead of generic write_seqcount_begin(). 804 * This way, no redundant lockdep_assert_held() checks are added. 805 */ 806 807 /** 808 * write_seqlock() - start a seqlock_t write side critical section 809 * @sl: Pointer to seqlock_t 810 * 811 * write_seqlock opens a write side critical section for the given 812 * seqlock_t. It also implicitly acquires the spinlock_t embedded inside 813 * that sequential lock. All seqlock_t write side sections are thus 814 * automatically serialized and non-preemptible. 815 * 816 * Context: if the seqlock_t read section, or other write side critical 817 * sections, can be invoked from hardirq or softirq contexts, use the 818 * _irqsave or _bh variants of this function instead. 819 */ 820 static inline void write_seqlock(seqlock_t *sl) 821 { 822 spin_lock(&sl->lock); 823 do_write_seqcount_begin(&sl->seqcount.seqcount); 824 } 825 826 /** 827 * write_sequnlock() - end a seqlock_t write side critical section 828 * @sl: Pointer to seqlock_t 829 * 830 * write_sequnlock closes the (serialized and non-preemptible) write side 831 * critical section of given seqlock_t. 832 */ 833 static inline void write_sequnlock(seqlock_t *sl) 834 { 835 do_write_seqcount_end(&sl->seqcount.seqcount); 836 spin_unlock(&sl->lock); 837 } 838 839 /** 840 * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section 841 * @sl: Pointer to seqlock_t 842 * 843 * _bh variant of write_seqlock(). Use only if the read side section, or 844 * other write side sections, can be invoked from softirq contexts. 845 */ 846 static inline void write_seqlock_bh(seqlock_t *sl) 847 { 848 spin_lock_bh(&sl->lock); 849 do_write_seqcount_begin(&sl->seqcount.seqcount); 850 } 851 852 /** 853 * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section 854 * @sl: Pointer to seqlock_t 855 * 856 * write_sequnlock_bh closes the serialized, non-preemptible, and 857 * softirqs-disabled, seqlock_t write side critical section opened with 858 * write_seqlock_bh(). 859 */ 860 static inline void write_sequnlock_bh(seqlock_t *sl) 861 { 862 do_write_seqcount_end(&sl->seqcount.seqcount); 863 spin_unlock_bh(&sl->lock); 864 } 865 866 /** 867 * write_seqlock_irq() - start a non-interruptible seqlock_t write section 868 * @sl: Pointer to seqlock_t 869 * 870 * _irq variant of write_seqlock(). Use only if the read side section, or 871 * other write sections, can be invoked from hardirq contexts. 872 */ 873 static inline void write_seqlock_irq(seqlock_t *sl) 874 { 875 spin_lock_irq(&sl->lock); 876 do_write_seqcount_begin(&sl->seqcount.seqcount); 877 } 878 879 /** 880 * write_sequnlock_irq() - end a non-interruptible seqlock_t write section 881 * @sl: Pointer to seqlock_t 882 * 883 * write_sequnlock_irq closes the serialized and non-interruptible 884 * seqlock_t write side section opened with write_seqlock_irq(). 885 */ 886 static inline void write_sequnlock_irq(seqlock_t *sl) 887 { 888 do_write_seqcount_end(&sl->seqcount.seqcount); 889 spin_unlock_irq(&sl->lock); 890 } 891 892 static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) 893 { 894 unsigned long flags; 895 896 spin_lock_irqsave(&sl->lock, flags); 897 do_write_seqcount_begin(&sl->seqcount.seqcount); 898 return flags; 899 } 900 901 /** 902 * write_seqlock_irqsave() - start a non-interruptible seqlock_t write 903 * section 904 * @lock: Pointer to seqlock_t 905 * @flags: Stack-allocated storage for saving caller's local interrupt 906 * state, to be passed to write_sequnlock_irqrestore(). 907 * 908 * _irqsave variant of write_seqlock(). Use it only if the read side 909 * section, or other write sections, can be invoked from hardirq context. 910 */ 911 #define write_seqlock_irqsave(lock, flags) \ 912 do { flags = __write_seqlock_irqsave(lock); } while (0) 913 914 /** 915 * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write 916 * section 917 * @sl: Pointer to seqlock_t 918 * @flags: Caller's saved interrupt state, from write_seqlock_irqsave() 919 * 920 * write_sequnlock_irqrestore closes the serialized and non-interruptible 921 * seqlock_t write section previously opened with write_seqlock_irqsave(). 922 */ 923 static inline void 924 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) 925 { 926 do_write_seqcount_end(&sl->seqcount.seqcount); 927 spin_unlock_irqrestore(&sl->lock, flags); 928 } 929 930 /** 931 * read_seqlock_excl() - begin a seqlock_t locking reader section 932 * @sl: Pointer to seqlock_t 933 * 934 * read_seqlock_excl opens a seqlock_t locking reader critical section. A 935 * locking reader exclusively locks out *both* other writers *and* other 936 * locking readers, but it does not update the embedded sequence number. 937 * 938 * Locking readers act like a normal spin_lock()/spin_unlock(). 939 * 940 * Context: if the seqlock_t write section, *or other read sections*, can 941 * be invoked from hardirq or softirq contexts, use the _irqsave or _bh 942 * variant of this function instead. 943 * 944 * The opened read section must be closed with read_sequnlock_excl(). 945 */ 946 static inline void read_seqlock_excl(seqlock_t *sl) 947 { 948 spin_lock(&sl->lock); 949 } 950 951 /** 952 * read_sequnlock_excl() - end a seqlock_t locking reader critical section 953 * @sl: Pointer to seqlock_t 954 */ 955 static inline void read_sequnlock_excl(seqlock_t *sl) 956 { 957 spin_unlock(&sl->lock); 958 } 959 960 /** 961 * read_seqlock_excl_bh() - start a seqlock_t locking reader section with 962 * softirqs disabled 963 * @sl: Pointer to seqlock_t 964 * 965 * _bh variant of read_seqlock_excl(). Use this variant only if the 966 * seqlock_t write side section, *or other read sections*, can be invoked 967 * from softirq contexts. 968 */ 969 static inline void read_seqlock_excl_bh(seqlock_t *sl) 970 { 971 spin_lock_bh(&sl->lock); 972 } 973 974 /** 975 * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking 976 * reader section 977 * @sl: Pointer to seqlock_t 978 */ 979 static inline void read_sequnlock_excl_bh(seqlock_t *sl) 980 { 981 spin_unlock_bh(&sl->lock); 982 } 983 984 /** 985 * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking 986 * reader section 987 * @sl: Pointer to seqlock_t 988 * 989 * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t 990 * write side section, *or other read sections*, can be invoked from a 991 * hardirq context. 992 */ 993 static inline void read_seqlock_excl_irq(seqlock_t *sl) 994 { 995 spin_lock_irq(&sl->lock); 996 } 997 998 /** 999 * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t 1000 * locking reader section 1001 * @sl: Pointer to seqlock_t 1002 */ 1003 static inline void read_sequnlock_excl_irq(seqlock_t *sl) 1004 { 1005 spin_unlock_irq(&sl->lock); 1006 } 1007 1008 static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) 1009 { 1010 unsigned long flags; 1011 1012 spin_lock_irqsave(&sl->lock, flags); 1013 return flags; 1014 } 1015 1016 /** 1017 * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t 1018 * locking reader section 1019 * @lock: Pointer to seqlock_t 1020 * @flags: Stack-allocated storage for saving caller's local interrupt 1021 * state, to be passed to read_sequnlock_excl_irqrestore(). 1022 * 1023 * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t 1024 * write side section, *or other read sections*, can be invoked from a 1025 * hardirq context. 1026 */ 1027 #define read_seqlock_excl_irqsave(lock, flags) \ 1028 do { flags = __read_seqlock_excl_irqsave(lock); } while (0) 1029 1030 /** 1031 * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t 1032 * locking reader section 1033 * @sl: Pointer to seqlock_t 1034 * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave() 1035 */ 1036 static inline void 1037 read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) 1038 { 1039 spin_unlock_irqrestore(&sl->lock, flags); 1040 } 1041 1042 /** 1043 * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader 1044 * @lock: Pointer to seqlock_t 1045 * @seq : Marker and return parameter. If the passed value is even, the 1046 * reader will become a *lockless* seqlock_t reader as in read_seqbegin(). 1047 * If the passed value is odd, the reader will become a *locking* reader 1048 * as in read_seqlock_excl(). In the first call to this function, the 1049 * caller *must* initialize and pass an even value to @seq; this way, a 1050 * lockless read can be optimistically tried first. 1051 * 1052 * read_seqbegin_or_lock is an API designed to optimistically try a normal 1053 * lockless seqlock_t read section first. If an odd counter is found, the 1054 * lockless read trial has failed, and the next read iteration transforms 1055 * itself into a full seqlock_t locking reader. 1056 * 1057 * This is typically used to avoid seqlock_t lockless readers starvation 1058 * (too much retry loops) in the case of a sharp spike in write side 1059 * activity. 1060 * 1061 * Context: if the seqlock_t write section, *or other read sections*, can 1062 * be invoked from hardirq or softirq contexts, use the _irqsave or _bh 1063 * variant of this function instead. 1064 * 1065 * Check Documentation/locking/seqlock.rst for template example code. 1066 * 1067 * Return: the encountered sequence counter value, through the @seq 1068 * parameter, which is overloaded as a return parameter. This returned 1069 * value must be checked with need_seqretry(). If the read section need to 1070 * be retried, this returned value must also be passed as the @seq 1071 * parameter of the next read_seqbegin_or_lock() iteration. 1072 */ 1073 static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) 1074 { 1075 if (!(*seq & 1)) /* Even */ 1076 *seq = read_seqbegin(lock); 1077 else /* Odd */ 1078 read_seqlock_excl(lock); 1079 } 1080 1081 /** 1082 * need_seqretry() - validate seqlock_t "locking or lockless" read section 1083 * @lock: Pointer to seqlock_t 1084 * @seq: sequence count, from read_seqbegin_or_lock() 1085 * 1086 * Return: true if a read section retry is required, false otherwise 1087 */ 1088 static inline int need_seqretry(seqlock_t *lock, int seq) 1089 { 1090 return !(seq & 1) && read_seqretry(lock, seq); 1091 } 1092 1093 /** 1094 * done_seqretry() - end seqlock_t "locking or lockless" reader section 1095 * @lock: Pointer to seqlock_t 1096 * @seq: count, from read_seqbegin_or_lock() 1097 * 1098 * done_seqretry finishes the seqlock_t read side critical section started 1099 * with read_seqbegin_or_lock() and validated by need_seqretry(). 1100 */ 1101 static inline void done_seqretry(seqlock_t *lock, int seq) 1102 { 1103 if (seq & 1) 1104 read_sequnlock_excl(lock); 1105 } 1106 1107 /** 1108 * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or 1109 * a non-interruptible locking reader 1110 * @lock: Pointer to seqlock_t 1111 * @seq: Marker and return parameter. Check read_seqbegin_or_lock(). 1112 * 1113 * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if 1114 * the seqlock_t write section, *or other read sections*, can be invoked 1115 * from hardirq context. 1116 * 1117 * Note: Interrupts will be disabled only for "locking reader" mode. 1118 * 1119 * Return: 1120 * 1121 * 1. The saved local interrupts state in case of a locking reader, to 1122 * be passed to done_seqretry_irqrestore(). 1123 * 1124 * 2. The encountered sequence counter value, returned through @seq 1125 * overloaded as a return parameter. Check read_seqbegin_or_lock(). 1126 */ 1127 static inline unsigned long 1128 read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) 1129 { 1130 unsigned long flags = 0; 1131 1132 if (!(*seq & 1)) /* Even */ 1133 *seq = read_seqbegin(lock); 1134 else /* Odd */ 1135 read_seqlock_excl_irqsave(lock, flags); 1136 1137 return flags; 1138 } 1139 1140 /** 1141 * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a 1142 * non-interruptible locking reader section 1143 * @lock: Pointer to seqlock_t 1144 * @seq: Count, from read_seqbegin_or_lock_irqsave() 1145 * @flags: Caller's saved local interrupt state in case of a locking 1146 * reader, also from read_seqbegin_or_lock_irqsave() 1147 * 1148 * This is the _irqrestore variant of done_seqretry(). The read section 1149 * must've been opened with read_seqbegin_or_lock_irqsave(), and validated 1150 * by need_seqretry(). 1151 */ 1152 static inline void 1153 done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) 1154 { 1155 if (seq & 1) 1156 read_sequnlock_excl_irqrestore(lock, flags); 1157 } 1158 #endif /* __LINUX_SEQLOCK_H */ 1159
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.