~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/lib/vdso/getrandom.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  4  */
  5 
  6 #include <linux/cache.h>
  7 #include <linux/kernel.h>
  8 #include <linux/time64.h>
  9 #include <vdso/datapage.h>
 10 #include <vdso/getrandom.h>
 11 #include <asm/vdso/getrandom.h>
 12 #include <asm/vdso/vsyscall.h>
 13 #include <asm/unaligned.h>
 14 #include <uapi/linux/mman.h>
 15 
 16 #define MEMCPY_AND_ZERO_SRC(type, dst, src, len) do {                           \
 17         while (len >= sizeof(type)) {                                           \
 18                 __put_unaligned_t(type, __get_unaligned_t(type, src), dst);     \
 19                 __put_unaligned_t(type, 0, src);                                \
 20                 dst += sizeof(type);                                            \
 21                 src += sizeof(type);                                            \
 22                 len -= sizeof(type);                                            \
 23         }                                                                       \
 24 } while (0)
 25 
 26 static void memcpy_and_zero_src(void *dst, void *src, size_t len)
 27 {
 28         if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
 29                 if (IS_ENABLED(CONFIG_64BIT))
 30                         MEMCPY_AND_ZERO_SRC(u64, dst, src, len);
 31                 MEMCPY_AND_ZERO_SRC(u32, dst, src, len);
 32                 MEMCPY_AND_ZERO_SRC(u16, dst, src, len);
 33         }
 34         MEMCPY_AND_ZERO_SRC(u8, dst, src, len);
 35 }
 36 
 37 /**
 38  * __cvdso_getrandom_data - Generic vDSO implementation of getrandom() syscall.
 39  * @rng_info:           Describes state of kernel RNG, memory shared with kernel.
 40  * @buffer:             Destination buffer to fill with random bytes.
 41  * @len:                Size of @buffer in bytes.
 42  * @flags:              Zero or more GRND_* flags.
 43  * @opaque_state:       Pointer to an opaque state area.
 44  * @opaque_len:         Length of opaque state area.
 45  *
 46  * This implements a "fast key erasure" RNG using ChaCha20, in the same way that the kernel's
 47  * getrandom() syscall does. It periodically reseeds its key from the kernel's RNG, at the same
 48  * schedule that the kernel's RNG is reseeded. If the kernel's RNG is not ready, then this always
 49  * calls into the syscall.
 50  *
 51  * If @buffer, @len, and @flags are 0, and @opaque_len is ~0UL, then @opaque_state is populated
 52  * with a struct vgetrandom_opaque_params and the function returns 0; if it does not return 0,
 53  * this function should not be used.
 54  *
 55  * @opaque_state *must* be allocated by calling mmap(2) using the mmap_prot and mmap_flags fields
 56  * from the struct vgetrandom_opaque_params, and states must not straddle pages. Unless external
 57  * locking is used, one state must be allocated per thread, as it is not safe to call this function
 58  * concurrently with the same @opaque_state. However, it is safe to call this using the same
 59  * @opaque_state that is shared between main code and signal handling code, within the same thread.
 60  *
 61  * Returns:     The number of random bytes written to @buffer, or a negative value indicating an error.
 62  */
 63 static __always_inline ssize_t
 64 __cvdso_getrandom_data(const struct vdso_rng_data *rng_info, void *buffer, size_t len,
 65                        unsigned int flags, void *opaque_state, size_t opaque_len)
 66 {
 67         ssize_t ret = min_t(size_t, INT_MAX & PAGE_MASK /* = MAX_RW_COUNT */, len);
 68         struct vgetrandom_state *state = opaque_state;
 69         size_t batch_len, nblocks, orig_len = len;
 70         bool in_use, have_retried = false;
 71         unsigned long current_generation;
 72         void *orig_buffer = buffer;
 73         u32 counter[2] = { 0 };
 74 
 75         if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags)) {
 76                 *(struct vgetrandom_opaque_params *)opaque_state = (struct vgetrandom_opaque_params) {
 77                         .size_of_opaque_state = sizeof(*state),
 78                         .mmap_prot = PROT_READ | PROT_WRITE,
 79                         .mmap_flags = MAP_DROPPABLE | MAP_ANONYMOUS
 80                 };
 81                 return 0;
 82         }
 83 
 84         /* The state must not straddle a page, since pages can be zeroed at any time. */
 85         if (unlikely(((unsigned long)opaque_state & ~PAGE_MASK) + sizeof(*state) > PAGE_SIZE))
 86                 return -EFAULT;
 87 
 88         /* Handle unexpected flags by falling back to the kernel. */
 89         if (unlikely(flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)))
 90                 goto fallback_syscall;
 91 
 92         /* If the caller passes the wrong size, which might happen due to CRIU, fallback. */
 93         if (unlikely(opaque_len != sizeof(*state)))
 94                 goto fallback_syscall;
 95 
 96         /*
 97          * If the kernel's RNG is not yet ready, then it's not possible to provide random bytes from
 98          * userspace, because A) the various @flags require this to block, or not, depending on
 99          * various factors unavailable to userspace, and B) the kernel's behavior before the RNG is
100          * ready is to reseed from the entropy pool at every invocation.
101          */
102         if (unlikely(!READ_ONCE(rng_info->is_ready)))
103                 goto fallback_syscall;
104 
105         /*
106          * This condition is checked after @rng_info->is_ready, because before the kernel's RNG is
107          * initialized, the @flags parameter may require this to block or return an error, even when
108          * len is zero.
109          */
110         if (unlikely(!len))
111                 return 0;
112 
113         /*
114          * @state->in_use is basic reentrancy protection against this running in a signal handler
115          * with the same @opaque_state, but obviously not atomic wrt multiple CPUs or more than one
116          * level of reentrancy. If a signal interrupts this after reading @state->in_use, but before
117          * writing @state->in_use, there is still no race, because the signal handler will run to
118          * its completion before returning execution.
119          */
120         in_use = READ_ONCE(state->in_use);
121         if (unlikely(in_use))
122                 /* The syscall simply fills the buffer and does not touch @state, so fallback. */
123                 goto fallback_syscall;
124         WRITE_ONCE(state->in_use, true);
125 
126 retry_generation:
127         /*
128          * @rng_info->generation must always be read here, as it serializes @state->key with the
129          * kernel's RNG reseeding schedule.
130          */
131         current_generation = READ_ONCE(rng_info->generation);
132 
133         /*
134          * If @state->generation doesn't match the kernel RNG's generation, then it means the
135          * kernel's RNG has reseeded, and so @state->key is reseeded as well.
136          */
137         if (unlikely(state->generation != current_generation)) {
138                 /*
139                  * Write the generation before filling the key, in case of fork. If there is a fork
140                  * just after this line, the parent and child will get different random bytes from
141                  * the syscall, which is good. However, were this line to occur after the getrandom
142                  * syscall, then both child and parent could have the same bytes and the same
143                  * generation counter, so the fork would not be detected. Therefore, write
144                  * @state->generation before the call to the getrandom syscall.
145                  */
146                 WRITE_ONCE(state->generation, current_generation);
147 
148                 /*
149                  * Prevent the syscall from being reordered wrt current_generation. Pairs with the
150                  * smp_store_release(&_vdso_rng_data.generation) in random.c.
151                  */
152                 smp_rmb();
153 
154                 /* Reseed @state->key using fresh bytes from the kernel. */
155                 if (getrandom_syscall(state->key, sizeof(state->key), 0) != sizeof(state->key)) {
156                         /*
157                          * If the syscall failed to refresh the key, then @state->key is now
158                          * invalid, so invalidate the generation so that it is not used again, and
159                          * fallback to using the syscall entirely.
160                          */
161                         WRITE_ONCE(state->generation, 0);
162 
163                         /*
164                          * Set @state->in_use to false only after the last write to @state in the
165                          * line above.
166                          */
167                         WRITE_ONCE(state->in_use, false);
168 
169                         goto fallback_syscall;
170                 }
171 
172                 /*
173                  * Set @state->pos to beyond the end of the batch, so that the batch is refilled
174                  * using the new key.
175                  */
176                 state->pos = sizeof(state->batch);
177         }
178 
179         /* Set len to the total amount of bytes that this function is allowed to read, ret. */
180         len = ret;
181 more_batch:
182         /*
183          * First use bytes out of @state->batch, which may have been filled by the last call to this
184          * function.
185          */
186         batch_len = min_t(size_t, sizeof(state->batch) - state->pos, len);
187         if (batch_len) {
188                 /* Zeroing at the same time as memcpying helps preserve forward secrecy. */
189                 memcpy_and_zero_src(buffer, state->batch + state->pos, batch_len);
190                 state->pos += batch_len;
191                 buffer += batch_len;
192                 len -= batch_len;
193         }
194 
195         if (!len) {
196                 /* Prevent the loop from being reordered wrt ->generation. */
197                 barrier();
198 
199                 /*
200                  * Since @rng_info->generation will never be 0, re-read @state->generation, rather
201                  * than using the local current_generation variable, to learn whether a fork
202                  * occurred or if @state was zeroed due to memory pressure. Primarily, though, this
203                  * indicates whether the kernel's RNG has reseeded, in which case generate a new key
204                  * and start over.
205                  */
206                 if (unlikely(READ_ONCE(state->generation) != READ_ONCE(rng_info->generation))) {
207                         /*
208                          * Prevent this from looping forever in case of low memory or racing with a
209                          * user force-reseeding the kernel's RNG using the ioctl.
210                          */
211                         if (have_retried) {
212                                 WRITE_ONCE(state->in_use, false);
213                                 goto fallback_syscall;
214                         }
215 
216                         have_retried = true;
217                         buffer = orig_buffer;
218                         goto retry_generation;
219                 }
220 
221                 /*
222                  * Set @state->in_use to false only when there will be no more reads or writes of
223                  * @state.
224                  */
225                 WRITE_ONCE(state->in_use, false);
226                 return ret;
227         }
228 
229         /* Generate blocks of RNG output directly into @buffer while there's enough room left. */
230         nblocks = len / CHACHA_BLOCK_SIZE;
231         if (nblocks) {
232                 __arch_chacha20_blocks_nostack(buffer, state->key, counter, nblocks);
233                 buffer += nblocks * CHACHA_BLOCK_SIZE;
234                 len -= nblocks * CHACHA_BLOCK_SIZE;
235         }
236 
237         BUILD_BUG_ON(sizeof(state->batch_key) % CHACHA_BLOCK_SIZE != 0);
238 
239         /* Refill the batch and overwrite the key, in order to preserve forward secrecy. */
240         __arch_chacha20_blocks_nostack(state->batch_key, state->key, counter,
241                                        sizeof(state->batch_key) / CHACHA_BLOCK_SIZE);
242 
243         /* Since the batch was just refilled, set the position back to 0 to indicate a full batch. */
244         state->pos = 0;
245         goto more_batch;
246 
247 fallback_syscall:
248         return getrandom_syscall(orig_buffer, orig_len, flags);
249 }
250 
251 static __always_inline ssize_t
252 __cvdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
253 {
254         return __cvdso_getrandom_data(__arch_get_vdso_rng_data(), buffer, len, flags, opaque_state, opaque_len);
255 }
256 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php