~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/kernel/shstk.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * shstk.c - Intel shadow stack support
  4  *
  5  * Copyright (c) 2021, Intel Corporation.
  6  * Yu-cheng Yu <yu-cheng.yu@intel.com>
  7  */
  8 
  9 #include <linux/sched.h>
 10 #include <linux/bitops.h>
 11 #include <linux/types.h>
 12 #include <linux/mm.h>
 13 #include <linux/mman.h>
 14 #include <linux/slab.h>
 15 #include <linux/uaccess.h>
 16 #include <linux/sched/signal.h>
 17 #include <linux/compat.h>
 18 #include <linux/sizes.h>
 19 #include <linux/user.h>
 20 #include <linux/syscalls.h>
 21 #include <asm/msr.h>
 22 #include <asm/fpu/xstate.h>
 23 #include <asm/fpu/types.h>
 24 #include <asm/shstk.h>
 25 #include <asm/special_insns.h>
 26 #include <asm/fpu/api.h>
 27 #include <asm/prctl.h>
 28 
 29 #define SS_FRAME_SIZE 8
 30 
 31 static bool features_enabled(unsigned long features)
 32 {
 33         return current->thread.features & features;
 34 }
 35 
 36 static void features_set(unsigned long features)
 37 {
 38         current->thread.features |= features;
 39 }
 40 
 41 static void features_clr(unsigned long features)
 42 {
 43         current->thread.features &= ~features;
 44 }
 45 
 46 /*
 47  * Create a restore token on the shadow stack.  A token is always 8-byte
 48  * and aligned to 8.
 49  */
 50 static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
 51 {
 52         unsigned long addr;
 53 
 54         /* Token must be aligned */
 55         if (!IS_ALIGNED(ssp, 8))
 56                 return -EINVAL;
 57 
 58         addr = ssp - SS_FRAME_SIZE;
 59 
 60         /*
 61          * SSP is aligned, so reserved bits and mode bit are a zero, just mark
 62          * the token 64-bit.
 63          */
 64         ssp |= BIT(0);
 65 
 66         if (write_user_shstk_64((u64 __user *)addr, (u64)ssp))
 67                 return -EFAULT;
 68 
 69         if (token_addr)
 70                 *token_addr = addr;
 71 
 72         return 0;
 73 }
 74 
 75 /*
 76  * VM_SHADOW_STACK will have a guard page. This helps userspace protect
 77  * itself from attacks. The reasoning is as follows:
 78  *
 79  * The shadow stack pointer(SSP) is moved by CALL, RET, and INCSSPQ. The
 80  * INCSSP instruction can increment the shadow stack pointer. It is the
 81  * shadow stack analog of an instruction like:
 82  *
 83  *   addq $0x80, %rsp
 84  *
 85  * However, there is one important difference between an ADD on %rsp
 86  * and INCSSP. In addition to modifying SSP, INCSSP also reads from the
 87  * memory of the first and last elements that were "popped". It can be
 88  * thought of as acting like this:
 89  *
 90  * READ_ONCE(ssp);       // read+discard top element on stack
 91  * ssp += nr_to_pop * 8; // move the shadow stack
 92  * READ_ONCE(ssp-8);     // read+discard last popped stack element
 93  *
 94  * The maximum distance INCSSP can move the SSP is 2040 bytes, before
 95  * it would read the memory. Therefore a single page gap will be enough
 96  * to prevent any operation from shifting the SSP to an adjacent stack,
 97  * since it would have to land in the gap at least once, causing a
 98  * fault.
 99  */
100 static unsigned long alloc_shstk(unsigned long addr, unsigned long size,
101                                  unsigned long token_offset, bool set_res_tok)
102 {
103         int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G;
104         struct mm_struct *mm = current->mm;
105         unsigned long mapped_addr, unused;
106 
107         if (addr)
108                 flags |= MAP_FIXED_NOREPLACE;
109 
110         mmap_write_lock(mm);
111         mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags,
112                               VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
113         mmap_write_unlock(mm);
114 
115         if (!set_res_tok || IS_ERR_VALUE(mapped_addr))
116                 goto out;
117 
118         if (create_rstor_token(mapped_addr + token_offset, NULL)) {
119                 vm_munmap(mapped_addr, size);
120                 return -EINVAL;
121         }
122 
123 out:
124         return mapped_addr;
125 }
126 
127 static unsigned long adjust_shstk_size(unsigned long size)
128 {
129         if (size)
130                 return PAGE_ALIGN(size);
131 
132         return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
133 }
134 
135 static void unmap_shadow_stack(u64 base, u64 size)
136 {
137         int r;
138 
139         r = vm_munmap(base, size);
140 
141         /*
142          * mmap_write_lock_killable() failed with -EINTR. This means
143          * the process is about to die and have it's MM cleaned up.
144          * This task shouldn't ever make it back to userspace. In this
145          * case it is ok to leak a shadow stack, so just exit out.
146          */
147         if (r == -EINTR)
148                 return;
149 
150         /*
151          * For all other types of vm_munmap() failure, either the
152          * system is out of memory or there is bug.
153          */
154         WARN_ON_ONCE(r);
155 }
156 
157 static int shstk_setup(void)
158 {
159         struct thread_shstk *shstk = &current->thread.shstk;
160         unsigned long addr, size;
161 
162         /* Already enabled */
163         if (features_enabled(ARCH_SHSTK_SHSTK))
164                 return 0;
165 
166         /* Also not supported for 32 bit */
167         if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_ia32_syscall())
168                 return -EOPNOTSUPP;
169 
170         size = adjust_shstk_size(0);
171         addr = alloc_shstk(0, size, 0, false);
172         if (IS_ERR_VALUE(addr))
173                 return PTR_ERR((void *)addr);
174 
175         fpregs_lock_and_load();
176         wrmsrl(MSR_IA32_PL3_SSP, addr + size);
177         wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
178         fpregs_unlock();
179 
180         shstk->base = addr;
181         shstk->size = size;
182         features_set(ARCH_SHSTK_SHSTK);
183 
184         return 0;
185 }
186 
187 void reset_thread_features(void)
188 {
189         memset(&current->thread.shstk, 0, sizeof(struct thread_shstk));
190         current->thread.features = 0;
191         current->thread.features_locked = 0;
192 }
193 
194 unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags,
195                                        unsigned long stack_size)
196 {
197         struct thread_shstk *shstk = &tsk->thread.shstk;
198         unsigned long addr, size;
199 
200         /*
201          * If shadow stack is not enabled on the new thread, skip any
202          * switch to a new shadow stack.
203          */
204         if (!features_enabled(ARCH_SHSTK_SHSTK))
205                 return 0;
206 
207         /*
208          * For CLONE_VFORK the child will share the parents shadow stack.
209          * Make sure to clear the internal tracking of the thread shadow
210          * stack so the freeing logic run for child knows to leave it alone.
211          */
212         if (clone_flags & CLONE_VFORK) {
213                 shstk->base = 0;
214                 shstk->size = 0;
215                 return 0;
216         }
217 
218         /*
219          * For !CLONE_VM the child will use a copy of the parents shadow
220          * stack.
221          */
222         if (!(clone_flags & CLONE_VM))
223                 return 0;
224 
225         size = adjust_shstk_size(stack_size);
226         addr = alloc_shstk(0, size, 0, false);
227         if (IS_ERR_VALUE(addr))
228                 return addr;
229 
230         shstk->base = addr;
231         shstk->size = size;
232 
233         return addr + size;
234 }
235 
236 static unsigned long get_user_shstk_addr(void)
237 {
238         unsigned long long ssp;
239 
240         fpregs_lock_and_load();
241 
242         rdmsrl(MSR_IA32_PL3_SSP, ssp);
243 
244         fpregs_unlock();
245 
246         return ssp;
247 }
248 
249 #define SHSTK_DATA_BIT BIT(63)
250 
251 static int put_shstk_data(u64 __user *addr, u64 data)
252 {
253         if (WARN_ON_ONCE(data & SHSTK_DATA_BIT))
254                 return -EINVAL;
255 
256         /*
257          * Mark the high bit so that the sigframe can't be processed as a
258          * return address.
259          */
260         if (write_user_shstk_64(addr, data | SHSTK_DATA_BIT))
261                 return -EFAULT;
262         return 0;
263 }
264 
265 static int get_shstk_data(unsigned long *data, unsigned long __user *addr)
266 {
267         unsigned long ldata;
268 
269         if (unlikely(get_user(ldata, addr)))
270                 return -EFAULT;
271 
272         if (!(ldata & SHSTK_DATA_BIT))
273                 return -EINVAL;
274 
275         *data = ldata & ~SHSTK_DATA_BIT;
276 
277         return 0;
278 }
279 
280 static int shstk_push_sigframe(unsigned long *ssp)
281 {
282         unsigned long target_ssp = *ssp;
283 
284         /* Token must be aligned */
285         if (!IS_ALIGNED(target_ssp, 8))
286                 return -EINVAL;
287 
288         *ssp -= SS_FRAME_SIZE;
289         if (put_shstk_data((void __user *)*ssp, target_ssp))
290                 return -EFAULT;
291 
292         return 0;
293 }
294 
295 static int shstk_pop_sigframe(unsigned long *ssp)
296 {
297         struct vm_area_struct *vma;
298         unsigned long token_addr;
299         bool need_to_check_vma;
300         int err = 1;
301 
302         /*
303          * It is possible for the SSP to be off the end of a shadow stack by 4
304          * or 8 bytes. If the shadow stack is at the start of a page or 4 bytes
305          * before it, it might be this case, so check that the address being
306          * read is actually shadow stack.
307          */
308         if (!IS_ALIGNED(*ssp, 8))
309                 return -EINVAL;
310 
311         need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp;
312 
313         if (need_to_check_vma)
314                 mmap_read_lock_killable(current->mm);
315 
316         err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp);
317         if (unlikely(err))
318                 goto out_err;
319 
320         if (need_to_check_vma) {
321                 vma = find_vma(current->mm, *ssp);
322                 if (!vma || !(vma->vm_flags & VM_SHADOW_STACK)) {
323                         err = -EFAULT;
324                         goto out_err;
325                 }
326 
327                 mmap_read_unlock(current->mm);
328         }
329 
330         /* Restore SSP aligned? */
331         if (unlikely(!IS_ALIGNED(token_addr, 8)))
332                 return -EINVAL;
333 
334         /* SSP in userspace? */
335         if (unlikely(token_addr >= TASK_SIZE_MAX))
336                 return -EINVAL;
337 
338         *ssp = token_addr;
339 
340         return 0;
341 out_err:
342         if (need_to_check_vma)
343                 mmap_read_unlock(current->mm);
344         return err;
345 }
346 
347 int setup_signal_shadow_stack(struct ksignal *ksig)
348 {
349         void __user *restorer = ksig->ka.sa.sa_restorer;
350         unsigned long ssp;
351         int err;
352 
353         if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
354             !features_enabled(ARCH_SHSTK_SHSTK))
355                 return 0;
356 
357         if (!restorer)
358                 return -EINVAL;
359 
360         ssp = get_user_shstk_addr();
361         if (unlikely(!ssp))
362                 return -EINVAL;
363 
364         err = shstk_push_sigframe(&ssp);
365         if (unlikely(err))
366                 return err;
367 
368         /* Push restorer address */
369         ssp -= SS_FRAME_SIZE;
370         err = write_user_shstk_64((u64 __user *)ssp, (u64)restorer);
371         if (unlikely(err))
372                 return -EFAULT;
373 
374         fpregs_lock_and_load();
375         wrmsrl(MSR_IA32_PL3_SSP, ssp);
376         fpregs_unlock();
377 
378         return 0;
379 }
380 
381 int restore_signal_shadow_stack(void)
382 {
383         unsigned long ssp;
384         int err;
385 
386         if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
387             !features_enabled(ARCH_SHSTK_SHSTK))
388                 return 0;
389 
390         ssp = get_user_shstk_addr();
391         if (unlikely(!ssp))
392                 return -EINVAL;
393 
394         err = shstk_pop_sigframe(&ssp);
395         if (unlikely(err))
396                 return err;
397 
398         fpregs_lock_and_load();
399         wrmsrl(MSR_IA32_PL3_SSP, ssp);
400         fpregs_unlock();
401 
402         return 0;
403 }
404 
405 void shstk_free(struct task_struct *tsk)
406 {
407         struct thread_shstk *shstk = &tsk->thread.shstk;
408 
409         if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
410             !features_enabled(ARCH_SHSTK_SHSTK))
411                 return;
412 
413         /*
414          * When fork() with CLONE_VM fails, the child (tsk) already has a
415          * shadow stack allocated, and exit_thread() calls this function to
416          * free it.  In this case the parent (current) and the child share
417          * the same mm struct.
418          */
419         if (!tsk->mm || tsk->mm != current->mm)
420                 return;
421 
422         /*
423          * If shstk->base is NULL, then this task is not managing its
424          * own shadow stack (CLONE_VFORK). So skip freeing it.
425          */
426         if (!shstk->base)
427                 return;
428 
429         /*
430          * shstk->base is NULL for CLONE_VFORK child tasks, and so is
431          * normal. But size = 0 on a shstk->base is not normal and
432          * indicated an attempt to free the thread shadow stack twice.
433          * Warn about it.
434          */
435         if (WARN_ON(!shstk->size))
436                 return;
437 
438         unmap_shadow_stack(shstk->base, shstk->size);
439 
440         shstk->size = 0;
441 }
442 
443 static int wrss_control(bool enable)
444 {
445         u64 msrval;
446 
447         if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
448                 return -EOPNOTSUPP;
449 
450         /*
451          * Only enable WRSS if shadow stack is enabled. If shadow stack is not
452          * enabled, WRSS will already be disabled, so don't bother clearing it
453          * when disabling.
454          */
455         if (!features_enabled(ARCH_SHSTK_SHSTK))
456                 return -EPERM;
457 
458         /* Already enabled/disabled? */
459         if (features_enabled(ARCH_SHSTK_WRSS) == enable)
460                 return 0;
461 
462         fpregs_lock_and_load();
463         rdmsrl(MSR_IA32_U_CET, msrval);
464 
465         if (enable) {
466                 features_set(ARCH_SHSTK_WRSS);
467                 msrval |= CET_WRSS_EN;
468         } else {
469                 features_clr(ARCH_SHSTK_WRSS);
470                 if (!(msrval & CET_WRSS_EN))
471                         goto unlock;
472 
473                 msrval &= ~CET_WRSS_EN;
474         }
475 
476         wrmsrl(MSR_IA32_U_CET, msrval);
477 
478 unlock:
479         fpregs_unlock();
480 
481         return 0;
482 }
483 
484 static int shstk_disable(void)
485 {
486         if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
487                 return -EOPNOTSUPP;
488 
489         /* Already disabled? */
490         if (!features_enabled(ARCH_SHSTK_SHSTK))
491                 return 0;
492 
493         fpregs_lock_and_load();
494         /* Disable WRSS too when disabling shadow stack */
495         wrmsrl(MSR_IA32_U_CET, 0);
496         wrmsrl(MSR_IA32_PL3_SSP, 0);
497         fpregs_unlock();
498 
499         shstk_free(current);
500         features_clr(ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS);
501 
502         return 0;
503 }
504 
505 SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags)
506 {
507         bool set_tok = flags & SHADOW_STACK_SET_TOKEN;
508         unsigned long aligned_size;
509 
510         if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
511                 return -EOPNOTSUPP;
512 
513         if (flags & ~SHADOW_STACK_SET_TOKEN)
514                 return -EINVAL;
515 
516         /* If there isn't space for a token */
517         if (set_tok && size < 8)
518                 return -ENOSPC;
519 
520         if (addr && addr < SZ_4G)
521                 return -ERANGE;
522 
523         /*
524          * An overflow would result in attempting to write the restore token
525          * to the wrong location. Not catastrophic, but just return the right
526          * error code and block it.
527          */
528         aligned_size = PAGE_ALIGN(size);
529         if (aligned_size < size)
530                 return -EOVERFLOW;
531 
532         return alloc_shstk(addr, aligned_size, size, set_tok);
533 }
534 
535 long shstk_prctl(struct task_struct *task, int option, unsigned long arg2)
536 {
537         unsigned long features = arg2;
538 
539         if (option == ARCH_SHSTK_STATUS) {
540                 return put_user(task->thread.features, (unsigned long __user *)arg2);
541         }
542 
543         if (option == ARCH_SHSTK_LOCK) {
544                 task->thread.features_locked |= features;
545                 return 0;
546         }
547 
548         /* Only allow via ptrace */
549         if (task != current) {
550                 if (option == ARCH_SHSTK_UNLOCK && IS_ENABLED(CONFIG_CHECKPOINT_RESTORE)) {
551                         task->thread.features_locked &= ~features;
552                         return 0;
553                 }
554                 return -EINVAL;
555         }
556 
557         /* Do not allow to change locked features */
558         if (features & task->thread.features_locked)
559                 return -EPERM;
560 
561         /* Only support enabling/disabling one feature at a time. */
562         if (hweight_long(features) > 1)
563                 return -EINVAL;
564 
565         if (option == ARCH_SHSTK_DISABLE) {
566                 if (features & ARCH_SHSTK_WRSS)
567                         return wrss_control(false);
568                 if (features & ARCH_SHSTK_SHSTK)
569                         return shstk_disable();
570                 return -EINVAL;
571         }
572 
573         /* Handle ARCH_SHSTK_ENABLE */
574         if (features & ARCH_SHSTK_SHSTK)
575                 return shstk_setup();
576         if (features & ARCH_SHSTK_WRSS)
577                 return wrss_control(true);
578         return -EINVAL;
579 }
580 
581 int shstk_update_last_frame(unsigned long val)
582 {
583         unsigned long ssp;
584 
585         if (!features_enabled(ARCH_SHSTK_SHSTK))
586                 return 0;
587 
588         ssp = get_user_shstk_addr();
589         return write_user_shstk_64((u64 __user *)ssp, (u64)val);
590 }
591 
592 bool shstk_is_enabled(void)
593 {
594         return features_enabled(ARCH_SHSTK_SHSTK);
595 }
596 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php