~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/kernel/cpu/sgx/virt.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * Device driver to expose SGX enclave memory to KVM guests.
  4  *
  5  * Copyright(c) 2021 Intel Corporation.
  6  */
  7 
  8 #include <linux/miscdevice.h>
  9 #include <linux/mm.h>
 10 #include <linux/mman.h>
 11 #include <linux/sched/mm.h>
 12 #include <linux/sched/signal.h>
 13 #include <linux/slab.h>
 14 #include <linux/xarray.h>
 15 #include <asm/sgx.h>
 16 #include <uapi/asm/sgx.h>
 17 
 18 #include "encls.h"
 19 #include "sgx.h"
 20 
 21 struct sgx_vepc {
 22         struct xarray page_array;
 23         struct mutex lock;
 24 };
 25 
 26 /*
 27  * Temporary SECS pages that cannot be EREMOVE'd due to having child in other
 28  * virtual EPC instances, and the lock to protect it.
 29  */
 30 static struct mutex zombie_secs_pages_lock;
 31 static struct list_head zombie_secs_pages;
 32 
 33 static int __sgx_vepc_fault(struct sgx_vepc *vepc,
 34                             struct vm_area_struct *vma, unsigned long addr)
 35 {
 36         struct sgx_epc_page *epc_page;
 37         unsigned long index, pfn;
 38         int ret;
 39 
 40         WARN_ON(!mutex_is_locked(&vepc->lock));
 41 
 42         /* Calculate index of EPC page in virtual EPC's page_array */
 43         index = vma->vm_pgoff + PFN_DOWN(addr - vma->vm_start);
 44 
 45         epc_page = xa_load(&vepc->page_array, index);
 46         if (epc_page)
 47                 return 0;
 48 
 49         epc_page = sgx_alloc_epc_page(vepc, false);
 50         if (IS_ERR(epc_page))
 51                 return PTR_ERR(epc_page);
 52 
 53         ret = xa_err(xa_store(&vepc->page_array, index, epc_page, GFP_KERNEL));
 54         if (ret)
 55                 goto err_free;
 56 
 57         pfn = PFN_DOWN(sgx_get_epc_phys_addr(epc_page));
 58 
 59         ret = vmf_insert_pfn(vma, addr, pfn);
 60         if (ret != VM_FAULT_NOPAGE) {
 61                 ret = -EFAULT;
 62                 goto err_delete;
 63         }
 64 
 65         return 0;
 66 
 67 err_delete:
 68         xa_erase(&vepc->page_array, index);
 69 err_free:
 70         sgx_free_epc_page(epc_page);
 71         return ret;
 72 }
 73 
 74 static vm_fault_t sgx_vepc_fault(struct vm_fault *vmf)
 75 {
 76         struct vm_area_struct *vma = vmf->vma;
 77         struct sgx_vepc *vepc = vma->vm_private_data;
 78         int ret;
 79 
 80         mutex_lock(&vepc->lock);
 81         ret = __sgx_vepc_fault(vepc, vma, vmf->address);
 82         mutex_unlock(&vepc->lock);
 83 
 84         if (!ret)
 85                 return VM_FAULT_NOPAGE;
 86 
 87         if (ret == -EBUSY && (vmf->flags & FAULT_FLAG_ALLOW_RETRY)) {
 88                 mmap_read_unlock(vma->vm_mm);
 89                 return VM_FAULT_RETRY;
 90         }
 91 
 92         return VM_FAULT_SIGBUS;
 93 }
 94 
 95 static const struct vm_operations_struct sgx_vepc_vm_ops = {
 96         .fault = sgx_vepc_fault,
 97 };
 98 
 99 static int sgx_vepc_mmap(struct file *file, struct vm_area_struct *vma)
100 {
101         struct sgx_vepc *vepc = file->private_data;
102 
103         if (!(vma->vm_flags & VM_SHARED))
104                 return -EINVAL;
105 
106         vma->vm_ops = &sgx_vepc_vm_ops;
107         /* Don't copy VMA in fork() */
108         vm_flags_set(vma, VM_PFNMAP | VM_IO | VM_DONTDUMP | VM_DONTCOPY);
109         vma->vm_private_data = vepc;
110 
111         return 0;
112 }
113 
114 static int sgx_vepc_remove_page(struct sgx_epc_page *epc_page)
115 {
116         /*
117          * Take a previously guest-owned EPC page and return it to the
118          * general EPC page pool.
119          *
120          * Guests can not be trusted to have left this page in a good
121          * state, so run EREMOVE on the page unconditionally.  In the
122          * case that a guest properly EREMOVE'd this page, a superfluous
123          * EREMOVE is harmless.
124          */
125         return __eremove(sgx_get_epc_virt_addr(epc_page));
126 }
127 
128 static int sgx_vepc_free_page(struct sgx_epc_page *epc_page)
129 {
130         int ret = sgx_vepc_remove_page(epc_page);
131         if (ret) {
132                 /*
133                  * Only SGX_CHILD_PRESENT is expected, which is because of
134                  * EREMOVE'ing an SECS still with child, in which case it can
135                  * be handled by EREMOVE'ing the SECS again after all pages in
136                  * virtual EPC have been EREMOVE'd. See comments in below in
137                  * sgx_vepc_release().
138                  *
139                  * The user of virtual EPC (KVM) needs to guarantee there's no
140                  * logical processor is still running in the enclave in guest,
141                  * otherwise EREMOVE will get SGX_ENCLAVE_ACT which cannot be
142                  * handled here.
143                  */
144                 WARN_ONCE(ret != SGX_CHILD_PRESENT, EREMOVE_ERROR_MESSAGE,
145                           ret, ret);
146                 return ret;
147         }
148 
149         sgx_free_epc_page(epc_page);
150         return 0;
151 }
152 
153 static long sgx_vepc_remove_all(struct sgx_vepc *vepc)
154 {
155         struct sgx_epc_page *entry;
156         unsigned long index;
157         long failures = 0;
158 
159         xa_for_each(&vepc->page_array, index, entry) {
160                 int ret = sgx_vepc_remove_page(entry);
161                 if (ret) {
162                         if (ret == SGX_CHILD_PRESENT) {
163                                 /* The page is a SECS, userspace will retry.  */
164                                 failures++;
165                         } else {
166                                 /*
167                                  * Report errors due to #GP or SGX_ENCLAVE_ACT; do not
168                                  * WARN, as userspace can induce said failures by
169                                  * calling the ioctl concurrently on multiple vEPCs or
170                                  * while one or more CPUs is running the enclave.  Only
171                                  * a #PF on EREMOVE indicates a kernel/hardware issue.
172                                  */
173                                 WARN_ON_ONCE(encls_faulted(ret) &&
174                                              ENCLS_TRAPNR(ret) != X86_TRAP_GP);
175                                 return -EBUSY;
176                         }
177                 }
178                 cond_resched();
179         }
180 
181         /*
182          * Return the number of SECS pages that failed to be removed, so
183          * userspace knows that it has to retry.
184          */
185         return failures;
186 }
187 
188 static int sgx_vepc_release(struct inode *inode, struct file *file)
189 {
190         struct sgx_vepc *vepc = file->private_data;
191         struct sgx_epc_page *epc_page, *tmp, *entry;
192         unsigned long index;
193 
194         LIST_HEAD(secs_pages);
195 
196         xa_for_each(&vepc->page_array, index, entry) {
197                 /*
198                  * Remove all normal, child pages.  sgx_vepc_free_page()
199                  * will fail if EREMOVE fails, but this is OK and expected on
200                  * SECS pages.  Those can only be EREMOVE'd *after* all their
201                  * child pages. Retries below will clean them up.
202                  */
203                 if (sgx_vepc_free_page(entry))
204                         continue;
205 
206                 xa_erase(&vepc->page_array, index);
207                 cond_resched();
208         }
209 
210         /*
211          * Retry EREMOVE'ing pages.  This will clean up any SECS pages that
212          * only had children in this 'epc' area.
213          */
214         xa_for_each(&vepc->page_array, index, entry) {
215                 epc_page = entry;
216                 /*
217                  * An EREMOVE failure here means that the SECS page still
218                  * has children.  But, since all children in this 'sgx_vepc'
219                  * have been removed, the SECS page must have a child on
220                  * another instance.
221                  */
222                 if (sgx_vepc_free_page(epc_page))
223                         list_add_tail(&epc_page->list, &secs_pages);
224 
225                 xa_erase(&vepc->page_array, index);
226                 cond_resched();
227         }
228 
229         /*
230          * SECS pages are "pinned" by child pages, and "unpinned" once all
231          * children have been EREMOVE'd.  A child page in this instance
232          * may have pinned an SECS page encountered in an earlier release(),
233          * creating a zombie.  Since some children were EREMOVE'd above,
234          * try to EREMOVE all zombies in the hopes that one was unpinned.
235          */
236         mutex_lock(&zombie_secs_pages_lock);
237         list_for_each_entry_safe(epc_page, tmp, &zombie_secs_pages, list) {
238                 /*
239                  * Speculatively remove the page from the list of zombies,
240                  * if the page is successfully EREMOVE'd it will be added to
241                  * the list of free pages.  If EREMOVE fails, throw the page
242                  * on the local list, which will be spliced on at the end.
243                  */
244                 list_del(&epc_page->list);
245 
246                 if (sgx_vepc_free_page(epc_page))
247                         list_add_tail(&epc_page->list, &secs_pages);
248                 cond_resched();
249         }
250 
251         if (!list_empty(&secs_pages))
252                 list_splice_tail(&secs_pages, &zombie_secs_pages);
253         mutex_unlock(&zombie_secs_pages_lock);
254 
255         xa_destroy(&vepc->page_array);
256         kfree(vepc);
257 
258         return 0;
259 }
260 
261 static int sgx_vepc_open(struct inode *inode, struct file *file)
262 {
263         struct sgx_vepc *vepc;
264 
265         vepc = kzalloc(sizeof(struct sgx_vepc), GFP_KERNEL);
266         if (!vepc)
267                 return -ENOMEM;
268         mutex_init(&vepc->lock);
269         xa_init(&vepc->page_array);
270 
271         file->private_data = vepc;
272 
273         return 0;
274 }
275 
276 static long sgx_vepc_ioctl(struct file *file,
277                            unsigned int cmd, unsigned long arg)
278 {
279         struct sgx_vepc *vepc = file->private_data;
280 
281         switch (cmd) {
282         case SGX_IOC_VEPC_REMOVE_ALL:
283                 if (arg)
284                         return -EINVAL;
285                 return sgx_vepc_remove_all(vepc);
286 
287         default:
288                 return -ENOTTY;
289         }
290 }
291 
292 static const struct file_operations sgx_vepc_fops = {
293         .owner          = THIS_MODULE,
294         .open           = sgx_vepc_open,
295         .unlocked_ioctl = sgx_vepc_ioctl,
296         .compat_ioctl   = sgx_vepc_ioctl,
297         .release        = sgx_vepc_release,
298         .mmap           = sgx_vepc_mmap,
299 };
300 
301 static struct miscdevice sgx_vepc_dev = {
302         .minor          = MISC_DYNAMIC_MINOR,
303         .name           = "sgx_vepc",
304         .nodename       = "sgx_vepc",
305         .fops           = &sgx_vepc_fops,
306 };
307 
308 int __init sgx_vepc_init(void)
309 {
310         /* SGX virtualization requires KVM to work */
311         if (!cpu_feature_enabled(X86_FEATURE_VMX))
312                 return -ENODEV;
313 
314         INIT_LIST_HEAD(&zombie_secs_pages);
315         mutex_init(&zombie_secs_pages_lock);
316 
317         return misc_register(&sgx_vepc_dev);
318 }
319 
320 /**
321  * sgx_virt_ecreate() - Run ECREATE on behalf of guest
322  * @pageinfo:   Pointer to PAGEINFO structure
323  * @secs:       Userspace pointer to SECS page
324  * @trapnr:     trap number injected to guest in case of ECREATE error
325  *
326  * Run ECREATE on behalf of guest after KVM traps ECREATE for the purpose
327  * of enforcing policies of guest's enclaves, and return the trap number
328  * which should be injected to guest in case of any ECREATE error.
329  *
330  * Return:
331  * -  0:        ECREATE was successful.
332  * - <0:        on error.
333  */
334 int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
335                      int *trapnr)
336 {
337         int ret;
338 
339         /*
340          * @secs is an untrusted, userspace-provided address.  It comes from
341          * KVM and is assumed to be a valid pointer which points somewhere in
342          * userspace.  This can fault and call SGX or other fault handlers when
343          * userspace mapping @secs doesn't exist.
344          *
345          * Add a WARN() to make sure @secs is already valid userspace pointer
346          * from caller (KVM), who should already have handled invalid pointer
347          * case (for instance, made by malicious guest).  All other checks,
348          * such as alignment of @secs, are deferred to ENCLS itself.
349          */
350         if (WARN_ON_ONCE(!access_ok(secs, PAGE_SIZE)))
351                 return -EINVAL;
352 
353         __uaccess_begin();
354         ret = __ecreate(pageinfo, (void *)secs);
355         __uaccess_end();
356 
357         if (encls_faulted(ret)) {
358                 *trapnr = ENCLS_TRAPNR(ret);
359                 return -EFAULT;
360         }
361 
362         /* ECREATE doesn't return an error code, it faults or succeeds. */
363         WARN_ON_ONCE(ret);
364         return 0;
365 }
366 EXPORT_SYMBOL_GPL(sgx_virt_ecreate);
367 
368 static int __sgx_virt_einit(void __user *sigstruct, void __user *token,
369                             void __user *secs)
370 {
371         int ret;
372 
373         /*
374          * Make sure all userspace pointers from caller (KVM) are valid.
375          * All other checks deferred to ENCLS itself.  Also see comment
376          * for @secs in sgx_virt_ecreate().
377          */
378 #define SGX_EINITTOKEN_SIZE     304
379         if (WARN_ON_ONCE(!access_ok(sigstruct, sizeof(struct sgx_sigstruct)) ||
380                          !access_ok(token, SGX_EINITTOKEN_SIZE) ||
381                          !access_ok(secs, PAGE_SIZE)))
382                 return -EINVAL;
383 
384         __uaccess_begin();
385         ret = __einit((void *)sigstruct, (void *)token, (void *)secs);
386         __uaccess_end();
387 
388         return ret;
389 }
390 
391 /**
392  * sgx_virt_einit() - Run EINIT on behalf of guest
393  * @sigstruct:          Userspace pointer to SIGSTRUCT structure
394  * @token:              Userspace pointer to EINITTOKEN structure
395  * @secs:               Userspace pointer to SECS page
396  * @lepubkeyhash:       Pointer to guest's *virtual* SGX_LEPUBKEYHASH MSR values
397  * @trapnr:             trap number injected to guest in case of EINIT error
398  *
399  * Run EINIT on behalf of guest after KVM traps EINIT. If SGX_LC is available
400  * in host, SGX driver may rewrite the hardware values at wish, therefore KVM
401  * needs to update hardware values to guest's virtual MSR values in order to
402  * ensure EINIT is executed with expected hardware values.
403  *
404  * Return:
405  * -  0:        EINIT was successful.
406  * - <0:        on error.
407  */
408 int sgx_virt_einit(void __user *sigstruct, void __user *token,
409                    void __user *secs, u64 *lepubkeyhash, int *trapnr)
410 {
411         int ret;
412 
413         if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) {
414                 ret = __sgx_virt_einit(sigstruct, token, secs);
415         } else {
416                 preempt_disable();
417 
418                 sgx_update_lepubkeyhash(lepubkeyhash);
419 
420                 ret = __sgx_virt_einit(sigstruct, token, secs);
421                 preempt_enable();
422         }
423 
424         /* Propagate up the error from the WARN_ON_ONCE in __sgx_virt_einit() */
425         if (ret == -EINVAL)
426                 return ret;
427 
428         if (encls_faulted(ret)) {
429                 *trapnr = ENCLS_TRAPNR(ret);
430                 return -EFAULT;
431         }
432 
433         return ret;
434 }
435 EXPORT_SYMBOL_GPL(sgx_virt_einit);
436 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php