~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/mm/cow.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * COW (Copy On Write) tests.
  4  *
  5  * Copyright 2022, Red Hat, Inc.
  6  *
  7  * Author(s): David Hildenbrand <david@redhat.com>
  8  */
  9 #define _GNU_SOURCE
 10 #include <stdlib.h>
 11 #include <string.h>
 12 #include <stdbool.h>
 13 #include <stdint.h>
 14 #include <unistd.h>
 15 #include <errno.h>
 16 #include <fcntl.h>
 17 #include <assert.h>
 18 #include <linux/mman.h>
 19 #include <sys/mman.h>
 20 #include <sys/ioctl.h>
 21 #include <sys/wait.h>
 22 #include <linux/memfd.h>
 23 
 24 #include "local_config.h"
 25 #ifdef LOCAL_CONFIG_HAVE_LIBURING
 26 #include <liburing.h>
 27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
 28 
 29 #include "../../../../mm/gup_test.h"
 30 #include "../kselftest.h"
 31 #include "vm_util.h"
 32 #include "thp_settings.h"
 33 
 34 static size_t pagesize;
 35 static int pagemap_fd;
 36 static size_t pmdsize;
 37 static int nr_thpsizes;
 38 static size_t thpsizes[20];
 39 static int nr_hugetlbsizes;
 40 static size_t hugetlbsizes[10];
 41 static int gup_fd;
 42 static bool has_huge_zeropage;
 43 
 44 static int sz2ord(size_t size)
 45 {
 46         return __builtin_ctzll(size / pagesize);
 47 }
 48 
 49 static int detect_thp_sizes(size_t sizes[], int max)
 50 {
 51         int count = 0;
 52         unsigned long orders;
 53         size_t kb;
 54         int i;
 55 
 56         /* thp not supported at all. */
 57         if (!pmdsize)
 58                 return 0;
 59 
 60         orders = 1UL << sz2ord(pmdsize);
 61         orders |= thp_supported_orders();
 62 
 63         for (i = 0; orders && count < max; i++) {
 64                 if (!(orders & (1UL << i)))
 65                         continue;
 66                 orders &= ~(1UL << i);
 67                 kb = (pagesize >> 10) << i;
 68                 sizes[count++] = kb * 1024;
 69                 ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb);
 70         }
 71 
 72         return count;
 73 }
 74 
 75 static void detect_huge_zeropage(void)
 76 {
 77         int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page",
 78                       O_RDONLY);
 79         size_t enabled = 0;
 80         char buf[15];
 81         int ret;
 82 
 83         if (fd < 0)
 84                 return;
 85 
 86         ret = pread(fd, buf, sizeof(buf), 0);
 87         if (ret > 0 && ret < sizeof(buf)) {
 88                 buf[ret] = 0;
 89 
 90                 enabled = strtoul(buf, NULL, 10);
 91                 if (enabled == 1) {
 92                         has_huge_zeropage = true;
 93                         ksft_print_msg("[INFO] huge zeropage is enabled\n");
 94                 }
 95         }
 96 
 97         close(fd);
 98 }
 99 
100 static bool range_is_swapped(void *addr, size_t size)
101 {
102         for (; size; addr += pagesize, size -= pagesize)
103                 if (!pagemap_is_swapped(pagemap_fd, addr))
104                         return false;
105         return true;
106 }
107 
108 struct comm_pipes {
109         int child_ready[2];
110         int parent_ready[2];
111 };
112 
113 static int setup_comm_pipes(struct comm_pipes *comm_pipes)
114 {
115         if (pipe(comm_pipes->child_ready) < 0)
116                 return -errno;
117         if (pipe(comm_pipes->parent_ready) < 0) {
118                 close(comm_pipes->child_ready[0]);
119                 close(comm_pipes->child_ready[1]);
120                 return -errno;
121         }
122 
123         return 0;
124 }
125 
126 static void close_comm_pipes(struct comm_pipes *comm_pipes)
127 {
128         close(comm_pipes->child_ready[0]);
129         close(comm_pipes->child_ready[1]);
130         close(comm_pipes->parent_ready[0]);
131         close(comm_pipes->parent_ready[1]);
132 }
133 
134 static int child_memcmp_fn(char *mem, size_t size,
135                            struct comm_pipes *comm_pipes)
136 {
137         char *old = malloc(size);
138         char buf;
139 
140         /* Backup the original content. */
141         memcpy(old, mem, size);
142 
143         /* Wait until the parent modified the page. */
144         write(comm_pipes->child_ready[1], "", 1);
145         while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
146                 ;
147 
148         /* See if we still read the old values. */
149         return memcmp(old, mem, size);
150 }
151 
152 static int child_vmsplice_memcmp_fn(char *mem, size_t size,
153                                     struct comm_pipes *comm_pipes)
154 {
155         struct iovec iov = {
156                 .iov_base = mem,
157                 .iov_len = size,
158         };
159         ssize_t cur, total, transferred;
160         char *old, *new;
161         int fds[2];
162         char buf;
163 
164         old = malloc(size);
165         new = malloc(size);
166 
167         /* Backup the original content. */
168         memcpy(old, mem, size);
169 
170         if (pipe(fds) < 0)
171                 return -errno;
172 
173         /* Trigger a read-only pin. */
174         transferred = vmsplice(fds[1], &iov, 1, 0);
175         if (transferred < 0)
176                 return -errno;
177         if (transferred == 0)
178                 return -EINVAL;
179 
180         /* Unmap it from our page tables. */
181         if (munmap(mem, size) < 0)
182                 return -errno;
183 
184         /* Wait until the parent modified it. */
185         write(comm_pipes->child_ready[1], "", 1);
186         while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
187                 ;
188 
189         /* See if we still read the old values via the pipe. */
190         for (total = 0; total < transferred; total += cur) {
191                 cur = read(fds[0], new + total, transferred - total);
192                 if (cur < 0)
193                         return -errno;
194         }
195 
196         return memcmp(old, new, transferred);
197 }
198 
199 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
200 
201 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
202                 child_fn fn, bool xfail)
203 {
204         struct comm_pipes comm_pipes;
205         char buf;
206         int ret;
207 
208         ret = setup_comm_pipes(&comm_pipes);
209         if (ret) {
210                 ksft_test_result_fail("pipe() failed\n");
211                 return;
212         }
213 
214         ret = fork();
215         if (ret < 0) {
216                 ksft_test_result_fail("fork() failed\n");
217                 goto close_comm_pipes;
218         } else if (!ret) {
219                 exit(fn(mem, size, &comm_pipes));
220         }
221 
222         while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
223                 ;
224 
225         if (do_mprotect) {
226                 /*
227                  * mprotect() optimizations might try avoiding
228                  * write-faults by directly mapping pages writable.
229                  */
230                 ret = mprotect(mem, size, PROT_READ);
231                 ret |= mprotect(mem, size, PROT_READ|PROT_WRITE);
232                 if (ret) {
233                         ksft_test_result_fail("mprotect() failed\n");
234                         write(comm_pipes.parent_ready[1], "", 1);
235                         wait(&ret);
236                         goto close_comm_pipes;
237                 }
238         }
239 
240         /* Modify the page. */
241         memset(mem, 0xff, size);
242         write(comm_pipes.parent_ready[1], "", 1);
243 
244         wait(&ret);
245         if (WIFEXITED(ret))
246                 ret = WEXITSTATUS(ret);
247         else
248                 ret = -EINVAL;
249 
250         if (!ret) {
251                 ksft_test_result_pass("No leak from parent into child\n");
252         } else if (xfail) {
253                 /*
254                  * With hugetlb, some vmsplice() tests are currently expected to
255                  * fail because (a) harder to fix and (b) nobody really cares.
256                  * Flag them as expected failure for now.
257                  */
258                 ksft_test_result_xfail("Leak from parent into child\n");
259         } else {
260                 ksft_test_result_fail("Leak from parent into child\n");
261         }
262 close_comm_pipes:
263         close_comm_pipes(&comm_pipes);
264 }
265 
266 static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb)
267 {
268         do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false);
269 }
270 
271 static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb)
272 {
273         do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false);
274 }
275 
276 static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb)
277 {
278         do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn,
279                               is_hugetlb);
280 }
281 
282 static void test_vmsplice_in_child_mprotect(char *mem, size_t size,
283                 bool is_hugetlb)
284 {
285         do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn,
286                               is_hugetlb);
287 }
288 
289 static void do_test_vmsplice_in_parent(char *mem, size_t size,
290                                        bool before_fork, bool xfail)
291 {
292         struct iovec iov = {
293                 .iov_base = mem,
294                 .iov_len = size,
295         };
296         ssize_t cur, total, transferred;
297         struct comm_pipes comm_pipes;
298         char *old, *new;
299         int ret, fds[2];
300         char buf;
301 
302         old = malloc(size);
303         new = malloc(size);
304 
305         memcpy(old, mem, size);
306 
307         ret = setup_comm_pipes(&comm_pipes);
308         if (ret) {
309                 ksft_test_result_fail("pipe() failed\n");
310                 goto free;
311         }
312 
313         if (pipe(fds) < 0) {
314                 ksft_test_result_fail("pipe() failed\n");
315                 goto close_comm_pipes;
316         }
317 
318         if (before_fork) {
319                 transferred = vmsplice(fds[1], &iov, 1, 0);
320                 if (transferred <= 0) {
321                         ksft_test_result_fail("vmsplice() failed\n");
322                         goto close_pipe;
323                 }
324         }
325 
326         ret = fork();
327         if (ret < 0) {
328                 ksft_test_result_fail("fork() failed\n");
329                 goto close_pipe;
330         } else if (!ret) {
331                 write(comm_pipes.child_ready[1], "", 1);
332                 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
333                         ;
334                 /* Modify page content in the child. */
335                 memset(mem, 0xff, size);
336                 exit(0);
337         }
338 
339         if (!before_fork) {
340                 transferred = vmsplice(fds[1], &iov, 1, 0);
341                 if (transferred <= 0) {
342                         ksft_test_result_fail("vmsplice() failed\n");
343                         wait(&ret);
344                         goto close_pipe;
345                 }
346         }
347 
348         while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
349                 ;
350         if (munmap(mem, size) < 0) {
351                 ksft_test_result_fail("munmap() failed\n");
352                 goto close_pipe;
353         }
354         write(comm_pipes.parent_ready[1], "", 1);
355 
356         /* Wait until the child is done writing. */
357         wait(&ret);
358         if (!WIFEXITED(ret)) {
359                 ksft_test_result_fail("wait() failed\n");
360                 goto close_pipe;
361         }
362 
363         /* See if we still read the old values. */
364         for (total = 0; total < transferred; total += cur) {
365                 cur = read(fds[0], new + total, transferred - total);
366                 if (cur < 0) {
367                         ksft_test_result_fail("read() failed\n");
368                         goto close_pipe;
369                 }
370         }
371 
372         if (!memcmp(old, new, transferred)) {
373                 ksft_test_result_pass("No leak from child into parent\n");
374         } else if (xfail) {
375                 /*
376                  * With hugetlb, some vmsplice() tests are currently expected to
377                  * fail because (a) harder to fix and (b) nobody really cares.
378                  * Flag them as expected failure for now.
379                  */
380                 ksft_test_result_xfail("Leak from child into parent\n");
381         } else {
382                 ksft_test_result_fail("Leak from child into parent\n");
383         }
384 close_pipe:
385         close(fds[0]);
386         close(fds[1]);
387 close_comm_pipes:
388         close_comm_pipes(&comm_pipes);
389 free:
390         free(old);
391         free(new);
392 }
393 
394 static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb)
395 {
396         do_test_vmsplice_in_parent(mem, size, true, is_hugetlb);
397 }
398 
399 static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb)
400 {
401         do_test_vmsplice_in_parent(mem, size, false, is_hugetlb);
402 }
403 
404 #ifdef LOCAL_CONFIG_HAVE_LIBURING
405 static void do_test_iouring(char *mem, size_t size, bool use_fork)
406 {
407         struct comm_pipes comm_pipes;
408         struct io_uring_cqe *cqe;
409         struct io_uring_sqe *sqe;
410         struct io_uring ring;
411         ssize_t cur, total;
412         struct iovec iov;
413         char *buf, *tmp;
414         int ret, fd;
415         FILE *file;
416 
417         ret = setup_comm_pipes(&comm_pipes);
418         if (ret) {
419                 ksft_test_result_fail("pipe() failed\n");
420                 return;
421         }
422 
423         file = tmpfile();
424         if (!file) {
425                 ksft_test_result_fail("tmpfile() failed\n");
426                 goto close_comm_pipes;
427         }
428         fd = fileno(file);
429         assert(fd);
430 
431         tmp = malloc(size);
432         if (!tmp) {
433                 ksft_test_result_fail("malloc() failed\n");
434                 goto close_file;
435         }
436 
437         /* Skip on errors, as we might just lack kernel support. */
438         ret = io_uring_queue_init(1, &ring, 0);
439         if (ret < 0) {
440                 ksft_test_result_skip("io_uring_queue_init() failed\n");
441                 goto free_tmp;
442         }
443 
444         /*
445          * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN
446          * | FOLL_LONGTERM the range.
447          *
448          * Skip on errors, as we might just lack kernel support or might not
449          * have sufficient MEMLOCK permissions.
450          */
451         iov.iov_base = mem;
452         iov.iov_len = size;
453         ret = io_uring_register_buffers(&ring, &iov, 1);
454         if (ret) {
455                 ksft_test_result_skip("io_uring_register_buffers() failed\n");
456                 goto queue_exit;
457         }
458 
459         if (use_fork) {
460                 /*
461                  * fork() and keep the child alive until we're done. Note that
462                  * we expect the pinned page to not get shared with the child.
463                  */
464                 ret = fork();
465                 if (ret < 0) {
466                         ksft_test_result_fail("fork() failed\n");
467                         goto unregister_buffers;
468                 } else if (!ret) {
469                         write(comm_pipes.child_ready[1], "", 1);
470                         while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
471                                 ;
472                         exit(0);
473                 }
474 
475                 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
476                         ;
477         } else {
478                 /*
479                  * Map the page R/O into the page table. Enable softdirty
480                  * tracking to stop the page from getting mapped R/W immediately
481                  * again by mprotect() optimizations. Note that we don't have an
482                  * easy way to test if that worked (the pagemap does not export
483                  * if the page is mapped R/O vs. R/W).
484                  */
485                 ret = mprotect(mem, size, PROT_READ);
486                 clear_softdirty();
487                 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
488                 if (ret) {
489                         ksft_test_result_fail("mprotect() failed\n");
490                         goto unregister_buffers;
491                 }
492         }
493 
494         /*
495          * Modify the page and write page content as observed by the fixed
496          * buffer pin to the file so we can verify it.
497          */
498         memset(mem, 0xff, size);
499         sqe = io_uring_get_sqe(&ring);
500         if (!sqe) {
501                 ksft_test_result_fail("io_uring_get_sqe() failed\n");
502                 goto quit_child;
503         }
504         io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
505 
506         ret = io_uring_submit(&ring);
507         if (ret < 0) {
508                 ksft_test_result_fail("io_uring_submit() failed\n");
509                 goto quit_child;
510         }
511 
512         ret = io_uring_wait_cqe(&ring, &cqe);
513         if (ret < 0) {
514                 ksft_test_result_fail("io_uring_wait_cqe() failed\n");
515                 goto quit_child;
516         }
517 
518         if (cqe->res != size) {
519                 ksft_test_result_fail("write_fixed failed\n");
520                 goto quit_child;
521         }
522         io_uring_cqe_seen(&ring, cqe);
523 
524         /* Read back the file content to the temporary buffer. */
525         total = 0;
526         while (total < size) {
527                 cur = pread(fd, tmp + total, size - total, total);
528                 if (cur < 0) {
529                         ksft_test_result_fail("pread() failed\n");
530                         goto quit_child;
531                 }
532                 total += cur;
533         }
534 
535         /* Finally, check if we read what we expected. */
536         ksft_test_result(!memcmp(mem, tmp, size),
537                          "Longterm R/W pin is reliable\n");
538 
539 quit_child:
540         if (use_fork) {
541                 write(comm_pipes.parent_ready[1], "", 1);
542                 wait(&ret);
543         }
544 unregister_buffers:
545         io_uring_unregister_buffers(&ring);
546 queue_exit:
547         io_uring_queue_exit(&ring);
548 free_tmp:
549         free(tmp);
550 close_file:
551         fclose(file);
552 close_comm_pipes:
553         close_comm_pipes(&comm_pipes);
554 }
555 
556 static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb)
557 {
558         do_test_iouring(mem, size, false);
559 }
560 
561 static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb)
562 {
563         do_test_iouring(mem, size, true);
564 }
565 
566 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
567 
568 enum ro_pin_test {
569         RO_PIN_TEST,
570         RO_PIN_TEST_SHARED,
571         RO_PIN_TEST_PREVIOUSLY_SHARED,
572         RO_PIN_TEST_RO_EXCLUSIVE,
573 };
574 
575 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
576                            bool fast)
577 {
578         struct pin_longterm_test args;
579         struct comm_pipes comm_pipes;
580         char *tmp, buf;
581         __u64 tmp_val;
582         int ret;
583 
584         if (gup_fd < 0) {
585                 ksft_test_result_skip("gup_test not available\n");
586                 return;
587         }
588 
589         tmp = malloc(size);
590         if (!tmp) {
591                 ksft_test_result_fail("malloc() failed\n");
592                 return;
593         }
594 
595         ret = setup_comm_pipes(&comm_pipes);
596         if (ret) {
597                 ksft_test_result_fail("pipe() failed\n");
598                 goto free_tmp;
599         }
600 
601         switch (test) {
602         case RO_PIN_TEST:
603                 break;
604         case RO_PIN_TEST_SHARED:
605         case RO_PIN_TEST_PREVIOUSLY_SHARED:
606                 /*
607                  * Share the pages with our child. As the pages are not pinned,
608                  * this should just work.
609                  */
610                 ret = fork();
611                 if (ret < 0) {
612                         ksft_test_result_fail("fork() failed\n");
613                         goto close_comm_pipes;
614                 } else if (!ret) {
615                         write(comm_pipes.child_ready[1], "", 1);
616                         while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
617                                 ;
618                         exit(0);
619                 }
620 
621                 /* Wait until our child is ready. */
622                 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
623                         ;
624 
625                 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) {
626                         /*
627                          * Tell the child to quit now and wait until it quit.
628                          * The pages should now be mapped R/O into our page
629                          * tables, but they are no longer shared.
630                          */
631                         write(comm_pipes.parent_ready[1], "", 1);
632                         wait(&ret);
633                         if (!WIFEXITED(ret))
634                                 ksft_print_msg("[INFO] wait() failed\n");
635                 }
636                 break;
637         case RO_PIN_TEST_RO_EXCLUSIVE:
638                 /*
639                  * Map the page R/O into the page table. Enable softdirty
640                  * tracking to stop the page from getting mapped R/W immediately
641                  * again by mprotect() optimizations. Note that we don't have an
642                  * easy way to test if that worked (the pagemap does not export
643                  * if the page is mapped R/O vs. R/W).
644                  */
645                 ret = mprotect(mem, size, PROT_READ);
646                 clear_softdirty();
647                 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
648                 if (ret) {
649                         ksft_test_result_fail("mprotect() failed\n");
650                         goto close_comm_pipes;
651                 }
652                 break;
653         default:
654                 assert(false);
655         }
656 
657         /* Take a R/O pin. This should trigger unsharing. */
658         args.addr = (__u64)(uintptr_t)mem;
659         args.size = size;
660         args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
661         ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
662         if (ret) {
663                 if (errno == EINVAL)
664                         ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
665                 else
666                         ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
667                 goto wait;
668         }
669 
670         /* Modify the page. */
671         memset(mem, 0xff, size);
672 
673         /*
674          * Read back the content via the pin to the temporary buffer and
675          * test if we observed the modification.
676          */
677         tmp_val = (__u64)(uintptr_t)tmp;
678         ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
679         if (ret)
680                 ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n");
681         else
682                 ksft_test_result(!memcmp(mem, tmp, size),
683                                  "Longterm R/O pin is reliable\n");
684 
685         ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
686         if (ret)
687                 ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
688 wait:
689         switch (test) {
690         case RO_PIN_TEST_SHARED:
691                 write(comm_pipes.parent_ready[1], "", 1);
692                 wait(&ret);
693                 if (!WIFEXITED(ret))
694                         ksft_print_msg("[INFO] wait() failed\n");
695                 break;
696         default:
697                 break;
698         }
699 close_comm_pipes:
700         close_comm_pipes(&comm_pipes);
701 free_tmp:
702         free(tmp);
703 }
704 
705 static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
706 {
707         do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false);
708 }
709 
710 static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
711 {
712         do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true);
713 }
714 
715 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size,
716                 bool is_hugetlb)
717 {
718         do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false);
719 }
720 
721 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size,
722                 bool is_hugetlb)
723 {
724         do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true);
725 }
726 
727 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size,
728                 bool is_hugetlb)
729 {
730         do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false);
731 }
732 
733 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size,
734                 bool is_hugetlb)
735 {
736         do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true);
737 }
738 
739 typedef void (*test_fn)(char *mem, size_t size, bool hugetlb);
740 
741 static void do_run_with_base_page(test_fn fn, bool swapout)
742 {
743         char *mem;
744         int ret;
745 
746         mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
747                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
748         if (mem == MAP_FAILED) {
749                 ksft_test_result_fail("mmap() failed\n");
750                 return;
751         }
752 
753         ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
754         /* Ignore if not around on a kernel. */
755         if (ret && errno != EINVAL) {
756                 ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
757                 goto munmap;
758         }
759 
760         /* Populate a base page. */
761         memset(mem, 0, pagesize);
762 
763         if (swapout) {
764                 madvise(mem, pagesize, MADV_PAGEOUT);
765                 if (!pagemap_is_swapped(pagemap_fd, mem)) {
766                         ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
767                         goto munmap;
768                 }
769         }
770 
771         fn(mem, pagesize, false);
772 munmap:
773         munmap(mem, pagesize);
774 }
775 
776 static void run_with_base_page(test_fn fn, const char *desc)
777 {
778         ksft_print_msg("[RUN] %s ... with base page\n", desc);
779         do_run_with_base_page(fn, false);
780 }
781 
782 static void run_with_base_page_swap(test_fn fn, const char *desc)
783 {
784         ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc);
785         do_run_with_base_page(fn, true);
786 }
787 
788 enum thp_run {
789         THP_RUN_PMD,
790         THP_RUN_PMD_SWAPOUT,
791         THP_RUN_PTE,
792         THP_RUN_PTE_SWAPOUT,
793         THP_RUN_SINGLE_PTE,
794         THP_RUN_SINGLE_PTE_SWAPOUT,
795         THP_RUN_PARTIAL_MREMAP,
796         THP_RUN_PARTIAL_SHARED,
797 };
798 
799 static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
800 {
801         char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED;
802         size_t size, mmap_size, mremap_size;
803         int ret;
804 
805         /* For alignment purposes, we need twice the thp size. */
806         mmap_size = 2 * thpsize;
807         mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
808                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
809         if (mmap_mem == MAP_FAILED) {
810                 ksft_test_result_fail("mmap() failed\n");
811                 return;
812         }
813 
814         /* We need a THP-aligned memory area. */
815         mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
816 
817         ret = madvise(mem, thpsize, MADV_HUGEPAGE);
818         if (ret) {
819                 ksft_test_result_fail("MADV_HUGEPAGE failed\n");
820                 goto munmap;
821         }
822 
823         /*
824          * Try to populate a THP. Touch the first sub-page and test if
825          * we get the last sub-page populated automatically.
826          */
827         mem[0] = 0;
828         if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
829                 ksft_test_result_skip("Did not get a THP populated\n");
830                 goto munmap;
831         }
832         memset(mem, 0, thpsize);
833 
834         size = thpsize;
835         switch (thp_run) {
836         case THP_RUN_PMD:
837         case THP_RUN_PMD_SWAPOUT:
838                 assert(thpsize == pmdsize);
839                 break;
840         case THP_RUN_PTE:
841         case THP_RUN_PTE_SWAPOUT:
842                 /*
843                  * Trigger PTE-mapping the THP by temporarily mapping a single
844                  * subpage R/O. This is a noop if the THP is not pmdsize (and
845                  * therefore already PTE-mapped).
846                  */
847                 ret = mprotect(mem + pagesize, pagesize, PROT_READ);
848                 if (ret) {
849                         ksft_test_result_fail("mprotect() failed\n");
850                         goto munmap;
851                 }
852                 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
853                 if (ret) {
854                         ksft_test_result_fail("mprotect() failed\n");
855                         goto munmap;
856                 }
857                 break;
858         case THP_RUN_SINGLE_PTE:
859         case THP_RUN_SINGLE_PTE_SWAPOUT:
860                 /*
861                  * Discard all but a single subpage of that PTE-mapped THP. What
862                  * remains is a single PTE mapping a single subpage.
863                  */
864                 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
865                 if (ret) {
866                         ksft_test_result_fail("MADV_DONTNEED failed\n");
867                         goto munmap;
868                 }
869                 size = pagesize;
870                 break;
871         case THP_RUN_PARTIAL_MREMAP:
872                 /*
873                  * Remap half of the THP. We need some new memory location
874                  * for that.
875                  */
876                 mremap_size = thpsize / 2;
877                 mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
878                                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
879                 if (mem == MAP_FAILED) {
880                         ksft_test_result_fail("mmap() failed\n");
881                         goto munmap;
882                 }
883                 tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
884                              MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
885                 if (tmp != mremap_mem) {
886                         ksft_test_result_fail("mremap() failed\n");
887                         goto munmap;
888                 }
889                 size = mremap_size;
890                 break;
891         case THP_RUN_PARTIAL_SHARED:
892                 /*
893                  * Share the first page of the THP with a child and quit the
894                  * child. This will result in some parts of the THP never
895                  * have been shared.
896                  */
897                 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
898                 if (ret) {
899                         ksft_test_result_fail("MADV_DONTFORK failed\n");
900                         goto munmap;
901                 }
902                 ret = fork();
903                 if (ret < 0) {
904                         ksft_test_result_fail("fork() failed\n");
905                         goto munmap;
906                 } else if (!ret) {
907                         exit(0);
908                 }
909                 wait(&ret);
910                 /* Allow for sharing all pages again. */
911                 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
912                 if (ret) {
913                         ksft_test_result_fail("MADV_DOFORK failed\n");
914                         goto munmap;
915                 }
916                 break;
917         default:
918                 assert(false);
919         }
920 
921         switch (thp_run) {
922         case THP_RUN_PMD_SWAPOUT:
923         case THP_RUN_PTE_SWAPOUT:
924         case THP_RUN_SINGLE_PTE_SWAPOUT:
925                 madvise(mem, size, MADV_PAGEOUT);
926                 if (!range_is_swapped(mem, size)) {
927                         ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
928                         goto munmap;
929                 }
930                 break;
931         default:
932                 break;
933         }
934 
935         fn(mem, size, false);
936 munmap:
937         munmap(mmap_mem, mmap_size);
938         if (mremap_mem != MAP_FAILED)
939                 munmap(mremap_mem, mremap_size);
940 }
941 
942 static void run_with_thp(test_fn fn, const char *desc, size_t size)
943 {
944         ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n",
945                 desc, size / 1024);
946         do_run_with_thp(fn, THP_RUN_PMD, size);
947 }
948 
949 static void run_with_thp_swap(test_fn fn, const char *desc, size_t size)
950 {
951         ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n",
952                 desc, size / 1024);
953         do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size);
954 }
955 
956 static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size)
957 {
958         ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n",
959                 desc, size / 1024);
960         do_run_with_thp(fn, THP_RUN_PTE, size);
961 }
962 
963 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size)
964 {
965         ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n",
966                 desc, size / 1024);
967         do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size);
968 }
969 
970 static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size)
971 {
972         ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n",
973                 desc, size / 1024);
974         do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size);
975 }
976 
977 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size)
978 {
979         ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n",
980                 desc, size / 1024);
981         do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size);
982 }
983 
984 static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size)
985 {
986         ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n",
987                 desc, size / 1024);
988         do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size);
989 }
990 
991 static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size)
992 {
993         ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n",
994                 desc, size / 1024);
995         do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size);
996 }
997 
998 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
999 {
1000         int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
1001         char *mem, *dummy;
1002 
1003         ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc,
1004                        hugetlbsize / 1024);
1005 
1006         flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
1007 
1008         mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
1009         if (mem == MAP_FAILED) {
1010                 ksft_test_result_skip("need more free huge pages\n");
1011                 return;
1012         }
1013 
1014         /* Populate an huge page. */
1015         memset(mem, 0, hugetlbsize);
1016 
1017         /*
1018          * We need a total of two hugetlb pages to handle COW/unsharing
1019          * properly, otherwise we might get zapped by a SIGBUS.
1020          */
1021         dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
1022         if (dummy == MAP_FAILED) {
1023                 ksft_test_result_skip("need more free huge pages\n");
1024                 goto munmap;
1025         }
1026         munmap(dummy, hugetlbsize);
1027 
1028         fn(mem, hugetlbsize, true);
1029 munmap:
1030         munmap(mem, hugetlbsize);
1031 }
1032 
1033 struct test_case {
1034         const char *desc;
1035         test_fn fn;
1036 };
1037 
1038 /*
1039  * Test cases that are specific to anonymous pages: pages in private mappings
1040  * that may get shared via COW during fork().
1041  */
1042 static const struct test_case anon_test_cases[] = {
1043         /*
1044          * Basic COW tests for fork() without any GUP. If we miss to break COW,
1045          * either the child can observe modifications by the parent or the
1046          * other way around.
1047          */
1048         {
1049                 "Basic COW after fork()",
1050                 test_cow_in_parent,
1051         },
1052         /*
1053          * Basic test, but do an additional mprotect(PROT_READ)+
1054          * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1055          */
1056         {
1057                 "Basic COW after fork() with mprotect() optimization",
1058                 test_cow_in_parent_mprotect,
1059         },
1060         /*
1061          * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If
1062          * we miss to break COW, the child observes modifications by the parent.
1063          * This is CVE-2020-29374 reported by Jann Horn.
1064          */
1065         {
1066                 "vmsplice() + unmap in child",
1067                 test_vmsplice_in_child,
1068         },
1069         /*
1070          * vmsplice() test, but do an additional mprotect(PROT_READ)+
1071          * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1072          */
1073         {
1074                 "vmsplice() + unmap in child with mprotect() optimization",
1075                 test_vmsplice_in_child_mprotect,
1076         },
1077         /*
1078          * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
1079          * fork(); modify in the child. If we miss to break COW, the parent
1080          * observes modifications by the child.
1081          */
1082         {
1083                 "vmsplice() before fork(), unmap in parent after fork()",
1084                 test_vmsplice_before_fork,
1085         },
1086         /*
1087          * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the
1088          * child. If we miss to break COW, the parent observes modifications by
1089          * the child.
1090          */
1091         {
1092                 "vmsplice() + unmap in parent after fork()",
1093                 test_vmsplice_after_fork,
1094         },
1095 #ifdef LOCAL_CONFIG_HAVE_LIBURING
1096         /*
1097          * Take a R/W longterm pin and then map the page R/O into the page
1098          * table to trigger a write fault on next access. When modifying the
1099          * page, the page content must be visible via the pin.
1100          */
1101         {
1102                 "R/O-mapping a page registered as iouring fixed buffer",
1103                 test_iouring_ro,
1104         },
1105         /*
1106          * Take a R/W longterm pin and then fork() a child. When modifying the
1107          * page, the page content must be visible via the pin. We expect the
1108          * pinned page to not get shared with the child.
1109          */
1110         {
1111                 "fork() with an iouring fixed buffer",
1112                 test_iouring_fork,
1113         },
1114 
1115 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
1116         /*
1117          * Take a R/O longterm pin on a R/O-mapped shared anonymous page.
1118          * When modifying the page via the page table, the page content change
1119          * must be visible via the pin.
1120          */
1121         {
1122                 "R/O GUP pin on R/O-mapped shared page",
1123                 test_ro_pin_on_shared,
1124         },
1125         /* Same as above, but using GUP-fast. */
1126         {
1127                 "R/O GUP-fast pin on R/O-mapped shared page",
1128                 test_ro_fast_pin_on_shared,
1129         },
1130         /*
1131          * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that
1132          * was previously shared. When modifying the page via the page table,
1133          * the page content change must be visible via the pin.
1134          */
1135         {
1136                 "R/O GUP pin on R/O-mapped previously-shared page",
1137                 test_ro_pin_on_ro_previously_shared,
1138         },
1139         /* Same as above, but using GUP-fast. */
1140         {
1141                 "R/O GUP-fast pin on R/O-mapped previously-shared page",
1142                 test_ro_fast_pin_on_ro_previously_shared,
1143         },
1144         /*
1145          * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page.
1146          * When modifying the page via the page table, the page content change
1147          * must be visible via the pin.
1148          */
1149         {
1150                 "R/O GUP pin on R/O-mapped exclusive page",
1151                 test_ro_pin_on_ro_exclusive,
1152         },
1153         /* Same as above, but using GUP-fast. */
1154         {
1155                 "R/O GUP-fast pin on R/O-mapped exclusive page",
1156                 test_ro_fast_pin_on_ro_exclusive,
1157         },
1158 };
1159 
1160 static void run_anon_test_case(struct test_case const *test_case)
1161 {
1162         int i;
1163 
1164         run_with_base_page(test_case->fn, test_case->desc);
1165         run_with_base_page_swap(test_case->fn, test_case->desc);
1166         for (i = 0; i < nr_thpsizes; i++) {
1167                 size_t size = thpsizes[i];
1168                 struct thp_settings settings = *thp_current_settings();
1169 
1170                 settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER;
1171                 settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS;
1172                 thp_push_settings(&settings);
1173 
1174                 if (size == pmdsize) {
1175                         run_with_thp(test_case->fn, test_case->desc, size);
1176                         run_with_thp_swap(test_case->fn, test_case->desc, size);
1177                 }
1178 
1179                 run_with_pte_mapped_thp(test_case->fn, test_case->desc, size);
1180                 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size);
1181                 run_with_single_pte_of_thp(test_case->fn, test_case->desc, size);
1182                 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size);
1183                 run_with_partial_mremap_thp(test_case->fn, test_case->desc, size);
1184                 run_with_partial_shared_thp(test_case->fn, test_case->desc, size);
1185 
1186                 thp_pop_settings();
1187         }
1188         for (i = 0; i < nr_hugetlbsizes; i++)
1189                 run_with_hugetlb(test_case->fn, test_case->desc,
1190                                  hugetlbsizes[i]);
1191 }
1192 
1193 static void run_anon_test_cases(void)
1194 {
1195         int i;
1196 
1197         ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n");
1198 
1199         for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++)
1200                 run_anon_test_case(&anon_test_cases[i]);
1201 }
1202 
1203 static int tests_per_anon_test_case(void)
1204 {
1205         int tests = 2 + nr_hugetlbsizes;
1206 
1207         tests += 6 * nr_thpsizes;
1208         if (pmdsize)
1209                 tests += 2;
1210         return tests;
1211 }
1212 
1213 enum anon_thp_collapse_test {
1214         ANON_THP_COLLAPSE_UNSHARED,
1215         ANON_THP_COLLAPSE_FULLY_SHARED,
1216         ANON_THP_COLLAPSE_LOWER_SHARED,
1217         ANON_THP_COLLAPSE_UPPER_SHARED,
1218 };
1219 
1220 static void do_test_anon_thp_collapse(char *mem, size_t size,
1221                                       enum anon_thp_collapse_test test)
1222 {
1223         struct comm_pipes comm_pipes;
1224         char buf;
1225         int ret;
1226 
1227         ret = setup_comm_pipes(&comm_pipes);
1228         if (ret) {
1229                 ksft_test_result_fail("pipe() failed\n");
1230                 return;
1231         }
1232 
1233         /*
1234          * Trigger PTE-mapping the THP by temporarily mapping a single subpage
1235          * R/O, such that we can try collapsing it later.
1236          */
1237         ret = mprotect(mem + pagesize, pagesize, PROT_READ);
1238         if (ret) {
1239                 ksft_test_result_fail("mprotect() failed\n");
1240                 goto close_comm_pipes;
1241         }
1242         ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
1243         if (ret) {
1244                 ksft_test_result_fail("mprotect() failed\n");
1245                 goto close_comm_pipes;
1246         }
1247 
1248         switch (test) {
1249         case ANON_THP_COLLAPSE_UNSHARED:
1250                 /* Collapse before actually COW-sharing the page. */
1251                 ret = madvise(mem, size, MADV_COLLAPSE);
1252                 if (ret) {
1253                         ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
1254                                               strerror(errno));
1255                         goto close_comm_pipes;
1256                 }
1257                 break;
1258         case ANON_THP_COLLAPSE_FULLY_SHARED:
1259                 /* COW-share the full PTE-mapped THP. */
1260                 break;
1261         case ANON_THP_COLLAPSE_LOWER_SHARED:
1262                 /* Don't COW-share the upper part of the THP. */
1263                 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK);
1264                 if (ret) {
1265                         ksft_test_result_fail("MADV_DONTFORK failed\n");
1266                         goto close_comm_pipes;
1267                 }
1268                 break;
1269         case ANON_THP_COLLAPSE_UPPER_SHARED:
1270                 /* Don't COW-share the lower part of the THP. */
1271                 ret = madvise(mem, size / 2, MADV_DONTFORK);
1272                 if (ret) {
1273                         ksft_test_result_fail("MADV_DONTFORK failed\n");
1274                         goto close_comm_pipes;
1275                 }
1276                 break;
1277         default:
1278                 assert(false);
1279         }
1280 
1281         ret = fork();
1282         if (ret < 0) {
1283                 ksft_test_result_fail("fork() failed\n");
1284                 goto close_comm_pipes;
1285         } else if (!ret) {
1286                 switch (test) {
1287                 case ANON_THP_COLLAPSE_UNSHARED:
1288                 case ANON_THP_COLLAPSE_FULLY_SHARED:
1289                         exit(child_memcmp_fn(mem, size, &comm_pipes));
1290                         break;
1291                 case ANON_THP_COLLAPSE_LOWER_SHARED:
1292                         exit(child_memcmp_fn(mem, size / 2, &comm_pipes));
1293                         break;
1294                 case ANON_THP_COLLAPSE_UPPER_SHARED:
1295                         exit(child_memcmp_fn(mem + size / 2, size / 2,
1296                                              &comm_pipes));
1297                         break;
1298                 default:
1299                         assert(false);
1300                 }
1301         }
1302 
1303         while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
1304                 ;
1305 
1306         switch (test) {
1307         case ANON_THP_COLLAPSE_UNSHARED:
1308                 break;
1309         case ANON_THP_COLLAPSE_UPPER_SHARED:
1310         case ANON_THP_COLLAPSE_LOWER_SHARED:
1311                 /*
1312                  * Revert MADV_DONTFORK such that we merge the VMAs and are
1313                  * able to actually collapse.
1314                  */
1315                 ret = madvise(mem, size, MADV_DOFORK);
1316                 if (ret) {
1317                         ksft_test_result_fail("MADV_DOFORK failed\n");
1318                         write(comm_pipes.parent_ready[1], "", 1);
1319                         wait(&ret);
1320                         goto close_comm_pipes;
1321                 }
1322                 /* FALLTHROUGH */
1323         case ANON_THP_COLLAPSE_FULLY_SHARED:
1324                 /* Collapse before anyone modified the COW-shared page. */
1325                 ret = madvise(mem, size, MADV_COLLAPSE);
1326                 if (ret) {
1327                         ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
1328                                               strerror(errno));
1329                         write(comm_pipes.parent_ready[1], "", 1);
1330                         wait(&ret);
1331                         goto close_comm_pipes;
1332                 }
1333                 break;
1334         default:
1335                 assert(false);
1336         }
1337 
1338         /* Modify the page. */
1339         memset(mem, 0xff, size);
1340         write(comm_pipes.parent_ready[1], "", 1);
1341 
1342         wait(&ret);
1343         if (WIFEXITED(ret))
1344                 ret = WEXITSTATUS(ret);
1345         else
1346                 ret = -EINVAL;
1347 
1348         ksft_test_result(!ret, "No leak from parent into child\n");
1349 close_comm_pipes:
1350         close_comm_pipes(&comm_pipes);
1351 }
1352 
1353 static void test_anon_thp_collapse_unshared(char *mem, size_t size,
1354                 bool is_hugetlb)
1355 {
1356         assert(!is_hugetlb);
1357         do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED);
1358 }
1359 
1360 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size,
1361                 bool is_hugetlb)
1362 {
1363         assert(!is_hugetlb);
1364         do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED);
1365 }
1366 
1367 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size,
1368                 bool is_hugetlb)
1369 {
1370         assert(!is_hugetlb);
1371         do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED);
1372 }
1373 
1374 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size,
1375                 bool is_hugetlb)
1376 {
1377         assert(!is_hugetlb);
1378         do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED);
1379 }
1380 
1381 /*
1382  * Test cases that are specific to anonymous THP: pages in private mappings
1383  * that may get shared via COW during fork().
1384  */
1385 static const struct test_case anon_thp_test_cases[] = {
1386         /*
1387          * Basic COW test for fork() without any GUP when collapsing a THP
1388          * before fork().
1389          *
1390          * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place
1391          * collapse") might easily get COW handling wrong when not collapsing
1392          * exclusivity information properly.
1393          */
1394         {
1395                 "Basic COW after fork() when collapsing before fork()",
1396                 test_anon_thp_collapse_unshared,
1397         },
1398         /* Basic COW test, but collapse after COW-sharing a full THP. */
1399         {
1400                 "Basic COW after fork() when collapsing after fork() (fully shared)",
1401                 test_anon_thp_collapse_fully_shared,
1402         },
1403         /*
1404          * Basic COW test, but collapse after COW-sharing the lower half of a
1405          * THP.
1406          */
1407         {
1408                 "Basic COW after fork() when collapsing after fork() (lower shared)",
1409                 test_anon_thp_collapse_lower_shared,
1410         },
1411         /*
1412          * Basic COW test, but collapse after COW-sharing the upper half of a
1413          * THP.
1414          */
1415         {
1416                 "Basic COW after fork() when collapsing after fork() (upper shared)",
1417                 test_anon_thp_collapse_upper_shared,
1418         },
1419 };
1420 
1421 static void run_anon_thp_test_cases(void)
1422 {
1423         int i;
1424 
1425         if (!pmdsize)
1426                 return;
1427 
1428         ksft_print_msg("[INFO] Anonymous THP tests\n");
1429 
1430         for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) {
1431                 struct test_case const *test_case = &anon_thp_test_cases[i];
1432 
1433                 ksft_print_msg("[RUN] %s\n", test_case->desc);
1434                 do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize);
1435         }
1436 }
1437 
1438 static int tests_per_anon_thp_test_case(void)
1439 {
1440         return pmdsize ? 1 : 0;
1441 }
1442 
1443 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size);
1444 
1445 static void test_cow(char *mem, const char *smem, size_t size)
1446 {
1447         char *old = malloc(size);
1448 
1449         /* Backup the original content. */
1450         memcpy(old, smem, size);
1451 
1452         /* Modify the page. */
1453         memset(mem, 0xff, size);
1454 
1455         /* See if we still read the old values via the other mapping. */
1456         ksft_test_result(!memcmp(smem, old, size),
1457                          "Other mapping not modified\n");
1458         free(old);
1459 }
1460 
1461 static void test_ro_pin(char *mem, const char *smem, size_t size)
1462 {
1463         do_test_ro_pin(mem, size, RO_PIN_TEST, false);
1464 }
1465 
1466 static void test_ro_fast_pin(char *mem, const char *smem, size_t size)
1467 {
1468         do_test_ro_pin(mem, size, RO_PIN_TEST, true);
1469 }
1470 
1471 static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
1472 {
1473         char *mem, *smem, tmp;
1474 
1475         ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc);
1476 
1477         mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
1478                    MAP_PRIVATE | MAP_ANON, -1, 0);
1479         if (mem == MAP_FAILED) {
1480                 ksft_test_result_fail("mmap() failed\n");
1481                 return;
1482         }
1483 
1484         smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
1485         if (mem == MAP_FAILED) {
1486                 ksft_test_result_fail("mmap() failed\n");
1487                 goto munmap;
1488         }
1489 
1490         /* Read from the page to populate the shared zeropage. */
1491         tmp = *mem + *smem;
1492         asm volatile("" : "+r" (tmp));
1493 
1494         fn(mem, smem, pagesize);
1495 munmap:
1496         munmap(mem, pagesize);
1497         if (smem != MAP_FAILED)
1498                 munmap(smem, pagesize);
1499 }
1500 
1501 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
1502 {
1503         char *mem, *smem, *mmap_mem, *mmap_smem, tmp;
1504         size_t mmap_size;
1505         int ret;
1506 
1507         ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc);
1508 
1509         if (!has_huge_zeropage) {
1510                 ksft_test_result_skip("Huge zeropage not enabled\n");
1511                 return;
1512         }
1513 
1514         /* For alignment purposes, we need twice the thp size. */
1515         mmap_size = 2 * pmdsize;
1516         mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
1517                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1518         if (mmap_mem == MAP_FAILED) {
1519                 ksft_test_result_fail("mmap() failed\n");
1520                 return;
1521         }
1522         mmap_smem = mmap(NULL, mmap_size, PROT_READ,
1523                          MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1524         if (mmap_smem == MAP_FAILED) {
1525                 ksft_test_result_fail("mmap() failed\n");
1526                 goto munmap;
1527         }
1528 
1529         /* We need a THP-aligned memory area. */
1530         mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1));
1531         smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1));
1532 
1533         ret = madvise(mem, pmdsize, MADV_HUGEPAGE);
1534         ret |= madvise(smem, pmdsize, MADV_HUGEPAGE);
1535         if (ret) {
1536                 ksft_test_result_fail("MADV_HUGEPAGE failed\n");
1537                 goto munmap;
1538         }
1539 
1540         /*
1541          * Read from the memory to populate the huge shared zeropage. Read from
1542          * the first sub-page and test if we get another sub-page populated
1543          * automatically.
1544          */
1545         tmp = *mem + *smem;
1546         asm volatile("" : "+r" (tmp));
1547         if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
1548             !pagemap_is_populated(pagemap_fd, smem + pagesize)) {
1549                 ksft_test_result_skip("Did not get THPs populated\n");
1550                 goto munmap;
1551         }
1552 
1553         fn(mem, smem, pmdsize);
1554 munmap:
1555         munmap(mmap_mem, mmap_size);
1556         if (mmap_smem != MAP_FAILED)
1557                 munmap(mmap_smem, mmap_size);
1558 }
1559 
1560 static void run_with_memfd(non_anon_test_fn fn, const char *desc)
1561 {
1562         char *mem, *smem, tmp;
1563         int fd;
1564 
1565         ksft_print_msg("[RUN] %s ... with memfd\n", desc);
1566 
1567         fd = memfd_create("test", 0);
1568         if (fd < 0) {
1569                 ksft_test_result_fail("memfd_create() failed\n");
1570                 return;
1571         }
1572 
1573         /* File consists of a single page filled with zeroes. */
1574         if (fallocate(fd, 0, 0, pagesize)) {
1575                 ksft_test_result_fail("fallocate() failed\n");
1576                 goto close;
1577         }
1578 
1579         /* Create a private mapping of the memfd. */
1580         mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1581         if (mem == MAP_FAILED) {
1582                 ksft_test_result_fail("mmap() failed\n");
1583                 goto close;
1584         }
1585         smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1586         if (mem == MAP_FAILED) {
1587                 ksft_test_result_fail("mmap() failed\n");
1588                 goto munmap;
1589         }
1590 
1591         /* Fault the page in. */
1592         tmp = *mem + *smem;
1593         asm volatile("" : "+r" (tmp));
1594 
1595         fn(mem, smem, pagesize);
1596 munmap:
1597         munmap(mem, pagesize);
1598         if (smem != MAP_FAILED)
1599                 munmap(smem, pagesize);
1600 close:
1601         close(fd);
1602 }
1603 
1604 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
1605 {
1606         char *mem, *smem, tmp;
1607         FILE *file;
1608         int fd;
1609 
1610         ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
1611 
1612         file = tmpfile();
1613         if (!file) {
1614                 ksft_test_result_fail("tmpfile() failed\n");
1615                 return;
1616         }
1617 
1618         fd = fileno(file);
1619         if (fd < 0) {
1620                 ksft_test_result_skip("fileno() failed\n");
1621                 return;
1622         }
1623 
1624         /* File consists of a single page filled with zeroes. */
1625         if (fallocate(fd, 0, 0, pagesize)) {
1626                 ksft_test_result_fail("fallocate() failed\n");
1627                 goto close;
1628         }
1629 
1630         /* Create a private mapping of the memfd. */
1631         mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1632         if (mem == MAP_FAILED) {
1633                 ksft_test_result_fail("mmap() failed\n");
1634                 goto close;
1635         }
1636         smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1637         if (mem == MAP_FAILED) {
1638                 ksft_test_result_fail("mmap() failed\n");
1639                 goto munmap;
1640         }
1641 
1642         /* Fault the page in. */
1643         tmp = *mem + *smem;
1644         asm volatile("" : "+r" (tmp));
1645 
1646         fn(mem, smem, pagesize);
1647 munmap:
1648         munmap(mem, pagesize);
1649         if (smem != MAP_FAILED)
1650                 munmap(smem, pagesize);
1651 close:
1652         fclose(file);
1653 }
1654 
1655 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
1656                                    size_t hugetlbsize)
1657 {
1658         int flags = MFD_HUGETLB;
1659         char *mem, *smem, tmp;
1660         int fd;
1661 
1662         ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
1663                        hugetlbsize / 1024);
1664 
1665         flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
1666 
1667         fd = memfd_create("test", flags);
1668         if (fd < 0) {
1669                 ksft_test_result_skip("memfd_create() failed\n");
1670                 return;
1671         }
1672 
1673         /* File consists of a single page filled with zeroes. */
1674         if (fallocate(fd, 0, 0, hugetlbsize)) {
1675                 ksft_test_result_skip("need more free huge pages\n");
1676                 goto close;
1677         }
1678 
1679         /* Create a private mapping of the memfd. */
1680         mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
1681                    0);
1682         if (mem == MAP_FAILED) {
1683                 ksft_test_result_skip("need more free huge pages\n");
1684                 goto close;
1685         }
1686         smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
1687         if (mem == MAP_FAILED) {
1688                 ksft_test_result_fail("mmap() failed\n");
1689                 goto munmap;
1690         }
1691 
1692         /* Fault the page in. */
1693         tmp = *mem + *smem;
1694         asm volatile("" : "+r" (tmp));
1695 
1696         fn(mem, smem, hugetlbsize);
1697 munmap:
1698         munmap(mem, hugetlbsize);
1699         if (mem != MAP_FAILED)
1700                 munmap(smem, hugetlbsize);
1701 close:
1702         close(fd);
1703 }
1704 
1705 struct non_anon_test_case {
1706         const char *desc;
1707         non_anon_test_fn fn;
1708 };
1709 
1710 /*
1711  * Test cases that target any pages in private mappings that are not anonymous:
1712  * pages that may get shared via COW ndependent of fork(). This includes
1713  * the shared zeropage(s), pagecache pages, ...
1714  */
1715 static const struct non_anon_test_case non_anon_test_cases[] = {
1716         /*
1717          * Basic COW test without any GUP. If we miss to break COW, changes are
1718          * visible via other private/shared mappings.
1719          */
1720         {
1721                 "Basic COW",
1722                 test_cow,
1723         },
1724         /*
1725          * Take a R/O longterm pin. When modifying the page via the page table,
1726          * the page content change must be visible via the pin.
1727          */
1728         {
1729                 "R/O longterm GUP pin",
1730                 test_ro_pin,
1731         },
1732         /* Same as above, but using GUP-fast. */
1733         {
1734                 "R/O longterm GUP-fast pin",
1735                 test_ro_fast_pin,
1736         },
1737 };
1738 
1739 static void run_non_anon_test_case(struct non_anon_test_case const *test_case)
1740 {
1741         int i;
1742 
1743         run_with_zeropage(test_case->fn, test_case->desc);
1744         run_with_memfd(test_case->fn, test_case->desc);
1745         run_with_tmpfile(test_case->fn, test_case->desc);
1746         if (pmdsize)
1747                 run_with_huge_zeropage(test_case->fn, test_case->desc);
1748         for (i = 0; i < nr_hugetlbsizes; i++)
1749                 run_with_memfd_hugetlb(test_case->fn, test_case->desc,
1750                                        hugetlbsizes[i]);
1751 }
1752 
1753 static void run_non_anon_test_cases(void)
1754 {
1755         int i;
1756 
1757         ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n");
1758 
1759         for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++)
1760                 run_non_anon_test_case(&non_anon_test_cases[i]);
1761 }
1762 
1763 static int tests_per_non_anon_test_case(void)
1764 {
1765         int tests = 3 + nr_hugetlbsizes;
1766 
1767         if (pmdsize)
1768                 tests += 1;
1769         return tests;
1770 }
1771 
1772 int main(int argc, char **argv)
1773 {
1774         int err;
1775         struct thp_settings default_settings;
1776 
1777         ksft_print_header();
1778 
1779         pagesize = getpagesize();
1780         pmdsize = read_pmd_pagesize();
1781         if (pmdsize) {
1782                 /* Only if THP is supported. */
1783                 thp_read_settings(&default_settings);
1784                 default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT;
1785                 thp_save_settings();
1786                 thp_push_settings(&default_settings);
1787 
1788                 ksft_print_msg("[INFO] detected PMD size: %zu KiB\n",
1789                                pmdsize / 1024);
1790                 nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes));
1791         }
1792         nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
1793                                                     ARRAY_SIZE(hugetlbsizes));
1794         detect_huge_zeropage();
1795 
1796         ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
1797                       ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() +
1798                       ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case());
1799 
1800         gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
1801         pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
1802         if (pagemap_fd < 0)
1803                 ksft_exit_fail_msg("opening pagemap failed\n");
1804 
1805         run_anon_test_cases();
1806         run_anon_thp_test_cases();
1807         run_non_anon_test_cases();
1808 
1809         if (pmdsize) {
1810                 /* Only if THP is supported. */
1811                 thp_restore_settings();
1812         }
1813 
1814         err = ksft_get_fail_cnt();
1815         if (err)
1816                 ksft_exit_fail_msg("%d out of %d tests failed\n",
1817                                    err, ksft_test_num());
1818         ksft_exit_pass();
1819 }
1820 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php