~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/mm/uffd-common.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Userfaultfd tests util functions
  4  *
  5  * Copyright (C) 2015-2023  Red Hat, Inc.
  6  */
  7 
  8 #include "uffd-common.h"
  9 
 10 #define BASE_PMD_ADDR ((void *)(1UL << 30))
 11 
 12 volatile bool test_uffdio_copy_eexist = true;
 13 unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
 14 char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap;
 15 int uffd = -1, uffd_flags, finished, *pipefd, test_type;
 16 bool map_shared;
 17 bool test_uffdio_wp = true;
 18 unsigned long long *count_verify;
 19 uffd_test_ops_t *uffd_test_ops;
 20 uffd_test_case_ops_t *uffd_test_case_ops;
 21 atomic_bool ready_for_fork;
 22 
 23 static int uffd_mem_fd_create(off_t mem_size, bool hugetlb)
 24 {
 25         unsigned int memfd_flags = 0;
 26         int mem_fd;
 27 
 28         if (hugetlb)
 29                 memfd_flags = MFD_HUGETLB;
 30         mem_fd = memfd_create("uffd-test", memfd_flags);
 31         if (mem_fd < 0)
 32                 err("memfd_create");
 33         if (ftruncate(mem_fd, mem_size))
 34                 err("ftruncate");
 35         if (fallocate(mem_fd,
 36                       FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
 37                       mem_size))
 38                 err("fallocate");
 39 
 40         return mem_fd;
 41 }
 42 
 43 static void anon_release_pages(char *rel_area)
 44 {
 45         if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
 46                 err("madvise(MADV_DONTNEED) failed");
 47 }
 48 
 49 static int anon_allocate_area(void **alloc_area, bool is_src)
 50 {
 51         *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
 52                            MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 53         if (*alloc_area == MAP_FAILED) {
 54                 *alloc_area = NULL;
 55                 return -errno;
 56         }
 57         return 0;
 58 }
 59 
 60 static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
 61 {
 62 }
 63 
 64 static void hugetlb_release_pages(char *rel_area)
 65 {
 66         if (!map_shared) {
 67                 if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
 68                         err("madvise(MADV_DONTNEED) failed");
 69         } else {
 70                 if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE))
 71                         err("madvise(MADV_REMOVE) failed");
 72         }
 73 }
 74 
 75 static int hugetlb_allocate_area(void **alloc_area, bool is_src)
 76 {
 77         off_t size = nr_pages * page_size;
 78         off_t offset = is_src ? 0 : size;
 79         void *area_alias = NULL;
 80         char **alloc_area_alias;
 81         int mem_fd = uffd_mem_fd_create(size * 2, true);
 82 
 83         *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE,
 84                            (map_shared ? MAP_SHARED : MAP_PRIVATE) |
 85                            (is_src ? 0 : MAP_NORESERVE),
 86                            mem_fd, offset);
 87         if (*alloc_area == MAP_FAILED) {
 88                 *alloc_area = NULL;
 89                 return -errno;
 90         }
 91 
 92         if (map_shared) {
 93                 area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE,
 94                                   MAP_SHARED, mem_fd, offset);
 95                 if (area_alias == MAP_FAILED)
 96                         return -errno;
 97         }
 98 
 99         if (is_src) {
100                 alloc_area_alias = &area_src_alias;
101         } else {
102                 alloc_area_alias = &area_dst_alias;
103         }
104         if (area_alias)
105                 *alloc_area_alias = area_alias;
106 
107         close(mem_fd);
108         return 0;
109 }
110 
111 static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
112 {
113         if (!map_shared)
114                 return;
115 
116         *start = (unsigned long) area_dst_alias + offset;
117 }
118 
119 static void shmem_release_pages(char *rel_area)
120 {
121         if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE))
122                 err("madvise(MADV_REMOVE) failed");
123 }
124 
125 static int shmem_allocate_area(void **alloc_area, bool is_src)
126 {
127         void *area_alias = NULL;
128         size_t bytes = nr_pages * page_size, hpage_size = read_pmd_pagesize();
129         unsigned long offset = is_src ? 0 : bytes;
130         char *p = NULL, *p_alias = NULL;
131         int mem_fd = uffd_mem_fd_create(bytes * 2, false);
132 
133         /* TODO: clean this up.  Use a static addr is ugly */
134         p = BASE_PMD_ADDR;
135         if (!is_src)
136                 /* src map + alias + interleaved hpages */
137                 p += 2 * (bytes + hpage_size);
138         p_alias = p;
139         p_alias += bytes;
140         p_alias += hpage_size;  /* Prevent src/dst VMA merge */
141 
142         *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
143                            mem_fd, offset);
144         if (*alloc_area == MAP_FAILED) {
145                 *alloc_area = NULL;
146                 return -errno;
147         }
148         if (*alloc_area != p)
149                 err("mmap of memfd failed at %p", p);
150 
151         area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
152                           mem_fd, offset);
153         if (area_alias == MAP_FAILED) {
154                 munmap(*alloc_area, bytes);
155                 *alloc_area = NULL;
156                 return -errno;
157         }
158         if (area_alias != p_alias)
159                 err("mmap of anonymous memory failed at %p", p_alias);
160 
161         if (is_src)
162                 area_src_alias = area_alias;
163         else
164                 area_dst_alias = area_alias;
165 
166         close(mem_fd);
167         return 0;
168 }
169 
170 static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset)
171 {
172         *start = (unsigned long)area_dst_alias + offset;
173 }
174 
175 static void shmem_check_pmd_mapping(void *p, int expect_nr_hpages)
176 {
177         if (!check_huge_shmem(area_dst_alias, expect_nr_hpages,
178                               read_pmd_pagesize()))
179                 err("Did not find expected %d number of hugepages",
180                     expect_nr_hpages);
181 }
182 
183 struct uffd_test_ops anon_uffd_test_ops = {
184         .allocate_area = anon_allocate_area,
185         .release_pages = anon_release_pages,
186         .alias_mapping = noop_alias_mapping,
187         .check_pmd_mapping = NULL,
188 };
189 
190 struct uffd_test_ops shmem_uffd_test_ops = {
191         .allocate_area = shmem_allocate_area,
192         .release_pages = shmem_release_pages,
193         .alias_mapping = shmem_alias_mapping,
194         .check_pmd_mapping = shmem_check_pmd_mapping,
195 };
196 
197 struct uffd_test_ops hugetlb_uffd_test_ops = {
198         .allocate_area = hugetlb_allocate_area,
199         .release_pages = hugetlb_release_pages,
200         .alias_mapping = hugetlb_alias_mapping,
201         .check_pmd_mapping = NULL,
202 };
203 
204 void uffd_stats_report(struct uffd_args *args, int n_cpus)
205 {
206         int i;
207         unsigned long long miss_total = 0, wp_total = 0, minor_total = 0;
208 
209         for (i = 0; i < n_cpus; i++) {
210                 miss_total += args[i].missing_faults;
211                 wp_total += args[i].wp_faults;
212                 minor_total += args[i].minor_faults;
213         }
214 
215         printf("userfaults: ");
216         if (miss_total) {
217                 printf("%llu missing (", miss_total);
218                 for (i = 0; i < n_cpus; i++)
219                         printf("%lu+", args[i].missing_faults);
220                 printf("\b) ");
221         }
222         if (wp_total) {
223                 printf("%llu wp (", wp_total);
224                 for (i = 0; i < n_cpus; i++)
225                         printf("%lu+", args[i].wp_faults);
226                 printf("\b) ");
227         }
228         if (minor_total) {
229                 printf("%llu minor (", minor_total);
230                 for (i = 0; i < n_cpus; i++)
231                         printf("%lu+", args[i].minor_faults);
232                 printf("\b)");
233         }
234         printf("\n");
235 }
236 
237 int userfaultfd_open(uint64_t *features)
238 {
239         struct uffdio_api uffdio_api;
240 
241         uffd = uffd_open(UFFD_FLAGS);
242         if (uffd < 0)
243                 return -1;
244         uffd_flags = fcntl(uffd, F_GETFD, NULL);
245 
246         uffdio_api.api = UFFD_API;
247         uffdio_api.features = *features;
248         if (ioctl(uffd, UFFDIO_API, &uffdio_api))
249                 /* Probably lack of CAP_PTRACE? */
250                 return -1;
251         if (uffdio_api.api != UFFD_API)
252                 err("UFFDIO_API error: %" PRIu64, (uint64_t)uffdio_api.api);
253 
254         *features = uffdio_api.features;
255         return 0;
256 }
257 
258 static inline void munmap_area(void **area)
259 {
260         if (*area)
261                 if (munmap(*area, nr_pages * page_size))
262                         err("munmap");
263 
264         *area = NULL;
265 }
266 
267 void uffd_test_ctx_clear(void)
268 {
269         size_t i;
270 
271         if (pipefd) {
272                 for (i = 0; i < nr_cpus * 2; ++i) {
273                         if (close(pipefd[i]))
274                                 err("close pipefd");
275                 }
276                 free(pipefd);
277                 pipefd = NULL;
278         }
279 
280         if (count_verify) {
281                 free(count_verify);
282                 count_verify = NULL;
283         }
284 
285         if (uffd != -1) {
286                 if (close(uffd))
287                         err("close uffd");
288                 uffd = -1;
289         }
290 
291         munmap_area((void **)&area_src);
292         munmap_area((void **)&area_src_alias);
293         munmap_area((void **)&area_dst);
294         munmap_area((void **)&area_dst_alias);
295         munmap_area((void **)&area_remap);
296 }
297 
298 int uffd_test_ctx_init(uint64_t features, const char **errmsg)
299 {
300         unsigned long nr, cpu;
301         int ret;
302 
303         if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) {
304                 ret = uffd_test_case_ops->pre_alloc(errmsg);
305                 if (ret)
306                         return ret;
307         }
308 
309         ret = uffd_test_ops->allocate_area((void **)&area_src, true);
310         ret |= uffd_test_ops->allocate_area((void **)&area_dst, false);
311         if (ret) {
312                 if (errmsg)
313                         *errmsg = "memory allocation failed";
314                 return ret;
315         }
316 
317         if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) {
318                 ret = uffd_test_case_ops->post_alloc(errmsg);
319                 if (ret)
320                         return ret;
321         }
322 
323         ret = userfaultfd_open(&features);
324         if (ret) {
325                 if (errmsg)
326                         *errmsg = "possible lack of priviledge";
327                 return ret;
328         }
329 
330         count_verify = malloc(nr_pages * sizeof(unsigned long long));
331         if (!count_verify)
332                 err("count_verify");
333 
334         for (nr = 0; nr < nr_pages; nr++) {
335                 *area_mutex(area_src, nr) =
336                         (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
337                 count_verify[nr] = *area_count(area_src, nr) = 1;
338                 /*
339                  * In the transition between 255 to 256, powerpc will
340                  * read out of order in my_bcmp and see both bytes as
341                  * zero, so leave a placeholder below always non-zero
342                  * after the count, to avoid my_bcmp to trigger false
343                  * positives.
344                  */
345                 *(area_count(area_src, nr) + 1) = 1;
346         }
347 
348         /*
349          * After initialization of area_src, we must explicitly release pages
350          * for area_dst to make sure it's fully empty.  Otherwise we could have
351          * some area_dst pages be errornously initialized with zero pages,
352          * hence we could hit memory corruption later in the test.
353          *
354          * One example is when THP is globally enabled, above allocate_area()
355          * calls could have the two areas merged into a single VMA (as they
356          * will have the same VMA flags so they're mergeable).  When we
357          * initialize the area_src above, it's possible that some part of
358          * area_dst could have been faulted in via one huge THP that will be
359          * shared between area_src and area_dst.  It could cause some of the
360          * area_dst won't be trapped by missing userfaults.
361          *
362          * This release_pages() will guarantee even if that happened, we'll
363          * proactively split the thp and drop any accidentally initialized
364          * pages within area_dst.
365          */
366         uffd_test_ops->release_pages(area_dst);
367 
368         pipefd = malloc(sizeof(int) * nr_cpus * 2);
369         if (!pipefd)
370                 err("pipefd");
371         for (cpu = 0; cpu < nr_cpus; cpu++)
372                 if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK))
373                         err("pipe");
374 
375         return 0;
376 }
377 
378 void wp_range(int ufd, __u64 start, __u64 len, bool wp)
379 {
380         struct uffdio_writeprotect prms;
381 
382         /* Write protection page faults */
383         prms.range.start = start;
384         prms.range.len = len;
385         /* Undo write-protect, do wakeup after that */
386         prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
387 
388         if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
389                 err("clear WP failed: address=0x%"PRIx64, (uint64_t)start);
390 }
391 
392 static void continue_range(int ufd, __u64 start, __u64 len, bool wp)
393 {
394         struct uffdio_continue req;
395         int ret;
396 
397         req.range.start = start;
398         req.range.len = len;
399         req.mode = 0;
400         if (wp)
401                 req.mode |= UFFDIO_CONTINUE_MODE_WP;
402 
403         if (ioctl(ufd, UFFDIO_CONTINUE, &req))
404                 err("UFFDIO_CONTINUE failed for address 0x%" PRIx64,
405                     (uint64_t)start);
406 
407         /*
408          * Error handling within the kernel for continue is subtly different
409          * from copy or zeropage, so it may be a source of bugs. Trigger an
410          * error (-EEXIST) on purpose, to verify doing so doesn't cause a BUG.
411          */
412         req.mapped = 0;
413         ret = ioctl(ufd, UFFDIO_CONTINUE, &req);
414         if (ret >= 0 || req.mapped != -EEXIST)
415                 err("failed to exercise UFFDIO_CONTINUE error handling, ret=%d, mapped=%" PRId64,
416                     ret, (int64_t) req.mapped);
417 }
418 
419 int uffd_read_msg(int ufd, struct uffd_msg *msg)
420 {
421         int ret = read(uffd, msg, sizeof(*msg));
422 
423         if (ret != sizeof(*msg)) {
424                 if (ret < 0) {
425                         if (errno == EAGAIN || errno == EINTR)
426                                 return 1;
427                         err("blocking read error");
428                 } else {
429                         err("short read");
430                 }
431         }
432 
433         return 0;
434 }
435 
436 void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args)
437 {
438         unsigned long offset;
439 
440         if (msg->event != UFFD_EVENT_PAGEFAULT)
441                 err("unexpected msg event %u", msg->event);
442 
443         if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
444                 /* Write protect page faults */
445                 wp_range(uffd, msg->arg.pagefault.address, page_size, false);
446                 args->wp_faults++;
447         } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) {
448                 uint8_t *area;
449                 int b;
450 
451                 /*
452                  * Minor page faults
453                  *
454                  * To prove we can modify the original range for testing
455                  * purposes, we're going to bit flip this range before
456                  * continuing.
457                  *
458                  * Note that this requires all minor page fault tests operate on
459                  * area_dst (non-UFFD-registered) and area_dst_alias
460                  * (UFFD-registered).
461                  */
462 
463                 area = (uint8_t *)(area_dst +
464                                    ((char *)msg->arg.pagefault.address -
465                                     area_dst_alias));
466                 for (b = 0; b < page_size; ++b)
467                         area[b] = ~area[b];
468                 continue_range(uffd, msg->arg.pagefault.address, page_size,
469                                args->apply_wp);
470                 args->minor_faults++;
471         } else {
472                 /*
473                  * Missing page faults.
474                  *
475                  * Here we force a write check for each of the missing mode
476                  * faults.  It's guaranteed because the only threads that
477                  * will trigger uffd faults are the locking threads, and
478                  * their first instruction to touch the missing page will
479                  * always be pthread_mutex_lock().
480                  *
481                  * Note that here we relied on an NPTL glibc impl detail to
482                  * always read the lock type at the entry of the lock op
483                  * (pthread_mutex_t.__data.__type, offset 0x10) before
484                  * doing any locking operations to guarantee that.  It's
485                  * actually not good to rely on this impl detail because
486                  * logically a pthread-compatible lib can implement the
487                  * locks without types and we can fail when linking with
488                  * them.  However since we used to find bugs with this
489                  * strict check we still keep it around.  Hopefully this
490                  * could be a good hint when it fails again.  If one day
491                  * it'll break on some other impl of glibc we'll revisit.
492                  */
493                 if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
494                         err("unexpected write fault");
495 
496                 offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
497                 offset &= ~(page_size-1);
498 
499                 if (copy_page(uffd, offset, args->apply_wp))
500                         args->missing_faults++;
501         }
502 }
503 
504 void *uffd_poll_thread(void *arg)
505 {
506         struct uffd_args *args = (struct uffd_args *)arg;
507         unsigned long cpu = args->cpu;
508         struct pollfd pollfd[2];
509         struct uffd_msg msg;
510         struct uffdio_register uffd_reg;
511         int ret;
512         char tmp_chr;
513 
514         if (!args->handle_fault)
515                 args->handle_fault = uffd_handle_page_fault;
516 
517         pollfd[0].fd = uffd;
518         pollfd[0].events = POLLIN;
519         pollfd[1].fd = pipefd[cpu*2];
520         pollfd[1].events = POLLIN;
521 
522         ready_for_fork = true;
523 
524         for (;;) {
525                 ret = poll(pollfd, 2, -1);
526                 if (ret <= 0) {
527                         if (errno == EINTR || errno == EAGAIN)
528                                 continue;
529                         err("poll error: %d", ret);
530                 }
531                 if (pollfd[1].revents) {
532                         if (!(pollfd[1].revents & POLLIN))
533                                 err("pollfd[1].revents %d", pollfd[1].revents);
534                         if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
535                                 err("read pipefd error");
536                         break;
537                 }
538                 if (!(pollfd[0].revents & POLLIN))
539                         err("pollfd[0].revents %d", pollfd[0].revents);
540                 if (uffd_read_msg(uffd, &msg))
541                         continue;
542                 switch (msg.event) {
543                 default:
544                         err("unexpected msg event %u\n", msg.event);
545                         break;
546                 case UFFD_EVENT_PAGEFAULT:
547                         args->handle_fault(&msg, args);
548                         break;
549                 case UFFD_EVENT_FORK:
550                         close(uffd);
551                         uffd = msg.arg.fork.ufd;
552                         pollfd[0].fd = uffd;
553                         break;
554                 case UFFD_EVENT_REMOVE:
555                         uffd_reg.range.start = msg.arg.remove.start;
556                         uffd_reg.range.len = msg.arg.remove.end -
557                                 msg.arg.remove.start;
558                         if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
559                                 err("remove failure");
560                         break;
561                 case UFFD_EVENT_REMAP:
562                         area_remap = area_dst;  /* save for later unmap */
563                         area_dst = (char *)(unsigned long)msg.arg.remap.to;
564                         break;
565                 }
566         }
567 
568         return NULL;
569 }
570 
571 static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
572                             unsigned long offset)
573 {
574         uffd_test_ops->alias_mapping(&uffdio_copy->dst,
575                                      uffdio_copy->len,
576                                      offset);
577         if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
578                 /* real retval in ufdio_copy.copy */
579                 if (uffdio_copy->copy != -EEXIST)
580                         err("UFFDIO_COPY retry error: %"PRId64,
581                             (int64_t)uffdio_copy->copy);
582         } else {
583                 err("UFFDIO_COPY retry unexpected: %"PRId64,
584                     (int64_t)uffdio_copy->copy);
585         }
586 }
587 
588 static void wake_range(int ufd, unsigned long addr, unsigned long len)
589 {
590         struct uffdio_range uffdio_wake;
591 
592         uffdio_wake.start = addr;
593         uffdio_wake.len = len;
594 
595         if (ioctl(ufd, UFFDIO_WAKE, &uffdio_wake))
596                 fprintf(stderr, "error waking %lu\n",
597                         addr), exit(1);
598 }
599 
600 int __copy_page(int ufd, unsigned long offset, bool retry, bool wp)
601 {
602         struct uffdio_copy uffdio_copy;
603 
604         if (offset >= nr_pages * page_size)
605                 err("unexpected offset %lu\n", offset);
606         uffdio_copy.dst = (unsigned long) area_dst + offset;
607         uffdio_copy.src = (unsigned long) area_src + offset;
608         uffdio_copy.len = page_size;
609         if (wp)
610                 uffdio_copy.mode = UFFDIO_COPY_MODE_WP;
611         else
612                 uffdio_copy.mode = 0;
613         uffdio_copy.copy = 0;
614         if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
615                 /* real retval in ufdio_copy.copy */
616                 if (uffdio_copy.copy != -EEXIST)
617                         err("UFFDIO_COPY error: %"PRId64,
618                             (int64_t)uffdio_copy.copy);
619                 wake_range(ufd, uffdio_copy.dst, page_size);
620         } else if (uffdio_copy.copy != page_size) {
621                 err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy);
622         } else {
623                 if (test_uffdio_copy_eexist && retry) {
624                         test_uffdio_copy_eexist = false;
625                         retry_copy_page(ufd, &uffdio_copy, offset);
626                 }
627                 return 1;
628         }
629         return 0;
630 }
631 
632 int copy_page(int ufd, unsigned long offset, bool wp)
633 {
634         return __copy_page(ufd, offset, false, wp);
635 }
636 
637 int move_page(int ufd, unsigned long offset, unsigned long len)
638 {
639         struct uffdio_move uffdio_move;
640 
641         if (offset + len > nr_pages * page_size)
642                 err("unexpected offset %lu and length %lu\n", offset, len);
643         uffdio_move.dst = (unsigned long) area_dst + offset;
644         uffdio_move.src = (unsigned long) area_src + offset;
645         uffdio_move.len = len;
646         uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES;
647         uffdio_move.move = 0;
648         if (ioctl(ufd, UFFDIO_MOVE, &uffdio_move)) {
649                 /* real retval in uffdio_move.move */
650                 if (uffdio_move.move != -EEXIST)
651                         err("UFFDIO_MOVE error: %"PRId64,
652                             (int64_t)uffdio_move.move);
653                 wake_range(ufd, uffdio_move.dst, len);
654         } else if (uffdio_move.move != len) {
655                 err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move);
656         } else
657                 return 1;
658         return 0;
659 }
660 
661 int uffd_open_dev(unsigned int flags)
662 {
663         int fd, uffd;
664 
665         fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
666         if (fd < 0)
667                 return fd;
668         uffd = ioctl(fd, USERFAULTFD_IOC_NEW, flags);
669         close(fd);
670 
671         return uffd;
672 }
673 
674 int uffd_open_sys(unsigned int flags)
675 {
676         return syscall(__NR_userfaultfd, flags);
677 }
678 
679 int uffd_open(unsigned int flags)
680 {
681         int uffd = uffd_open_sys(flags);
682 
683         if (uffd < 0)
684                 uffd = uffd_open_dev(flags);
685 
686         return uffd;
687 }
688 
689 int uffd_get_features(uint64_t *features)
690 {
691         struct uffdio_api uffdio_api = { .api = UFFD_API, .features = 0 };
692         /*
693          * This should by default work in most kernels; the feature list
694          * will be the same no matter what we pass in here.
695          */
696         int fd = uffd_open(UFFD_USER_MODE_ONLY);
697 
698         if (fd < 0)
699                 /* Maybe the kernel is older than user-only mode? */
700                 fd = uffd_open(0);
701 
702         if (fd < 0)
703                 return fd;
704 
705         if (ioctl(fd, UFFDIO_API, &uffdio_api)) {
706                 close(fd);
707                 return -errno;
708         }
709 
710         *features = uffdio_api.features;
711         close(fd);
712 
713         return 0;
714 }
715 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php