~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/kvm/rseq_test.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 
  3 /*
  4  * Include rseq.c without _GNU_SOURCE defined, before including any headers, so
  5  * that rseq.c is compiled with its configuration, not KVM selftests' config.
  6  */
  7 #undef _GNU_SOURCE
  8 #include "../rseq/rseq.c"
  9 #define _GNU_SOURCE
 10 
 11 #include <errno.h>
 12 #include <fcntl.h>
 13 #include <pthread.h>
 14 #include <sched.h>
 15 #include <stdio.h>
 16 #include <stdlib.h>
 17 #include <string.h>
 18 #include <signal.h>
 19 #include <syscall.h>
 20 #include <sys/ioctl.h>
 21 #include <sys/sysinfo.h>
 22 #include <asm/barrier.h>
 23 #include <linux/atomic.h>
 24 #include <linux/rseq.h>
 25 #include <linux/unistd.h>
 26 
 27 #include "kvm_util.h"
 28 #include "processor.h"
 29 #include "test_util.h"
 30 #include "ucall_common.h"
 31 
 32 /*
 33  * Any bug related to task migration is likely to be timing-dependent; perform
 34  * a large number of migrations to reduce the odds of a false negative.
 35  */
 36 #define NR_TASK_MIGRATIONS 100000
 37 
 38 static pthread_t migration_thread;
 39 static cpu_set_t possible_mask;
 40 static int min_cpu, max_cpu;
 41 static bool done;
 42 
 43 static atomic_t seq_cnt;
 44 
 45 static void guest_code(void)
 46 {
 47         for (;;)
 48                 GUEST_SYNC(0);
 49 }
 50 
 51 static int next_cpu(int cpu)
 52 {
 53         /*
 54          * Advance to the next CPU, skipping those that weren't in the original
 55          * affinity set.  Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's
 56          * data storage is considered as opaque.  Note, if this task is pinned
 57          * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will
 58          * burn a lot cycles and the test will take longer than normal to
 59          * complete.
 60          */
 61         do {
 62                 cpu++;
 63                 if (cpu > max_cpu) {
 64                         cpu = min_cpu;
 65                         TEST_ASSERT(CPU_ISSET(cpu, &possible_mask),
 66                                     "Min CPU = %d must always be usable", cpu);
 67                         break;
 68                 }
 69         } while (!CPU_ISSET(cpu, &possible_mask));
 70 
 71         return cpu;
 72 }
 73 
 74 static void *migration_worker(void *__rseq_tid)
 75 {
 76         pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid;
 77         cpu_set_t allowed_mask;
 78         int r, i, cpu;
 79 
 80         CPU_ZERO(&allowed_mask);
 81 
 82         for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) {
 83                 CPU_SET(cpu, &allowed_mask);
 84 
 85                 /*
 86                  * Bump the sequence count twice to allow the reader to detect
 87                  * that a migration may have occurred in between rseq and sched
 88                  * CPU ID reads.  An odd sequence count indicates a migration
 89                  * is in-progress, while a completely different count indicates
 90                  * a migration occurred since the count was last read.
 91                  */
 92                 atomic_inc(&seq_cnt);
 93 
 94                 /*
 95                  * Ensure the odd count is visible while getcpu() isn't
 96                  * stable, i.e. while changing affinity is in-progress.
 97                  */
 98                 smp_wmb();
 99                 r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask);
100                 TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
101                             errno, strerror(errno));
102                 smp_wmb();
103                 atomic_inc(&seq_cnt);
104 
105                 CPU_CLR(cpu, &allowed_mask);
106 
107                 /*
108                  * Wait 1-10us before proceeding to the next iteration and more
109                  * specifically, before bumping seq_cnt again.  A delay is
110                  * needed on three fronts:
111                  *
112                  *  1. To allow sched_setaffinity() to prompt migration before
113                  *     ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME
114                  *     (or TIF_NEED_RESCHED, which indirectly leads to handling
115                  *     NOTIFY_RESUME) is handled in KVM context.
116                  *
117                  *     If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters
118                  *     the guest, the guest will trigger a IO/MMIO exit all the
119                  *     way to userspace and the TIF flags will be handled by
120                  *     the generic "exit to userspace" logic, not by KVM.  The
121                  *     exit to userspace is necessary to give the test a chance
122                  *     to check the rseq CPU ID (see #2).
123                  *
124                  *     Alternatively, guest_code() could include an instruction
125                  *     to trigger an exit that is handled by KVM, but any such
126                  *     exit requires architecture specific code.
127                  *
128                  *  2. To let ioctl(KVM_RUN) make its way back to the test
129                  *     before the next round of migration.  The test's check on
130                  *     the rseq CPU ID must wait for migration to complete in
131                  *     order to avoid false positive, thus any kernel rseq bug
132                  *     will be missed if the next migration starts before the
133                  *     check completes.
134                  *
135                  *  3. To ensure the read-side makes efficient forward progress,
136                  *     e.g. if getcpu() involves a syscall. Stalling the read-side
137                  *     means the test will spend more time waiting for getcpu()
138                  *     to stabilize and less time trying to hit the timing-dependent
139                  *     bug.
140                  *
141                  * Because any bug in this area is likely to be timing-dependent,
142                  * run with a range of delays at 1us intervals from 1us to 10us
143                  * as a best effort to avoid tuning the test to the point where
144                  * it can hit _only_ the original bug and not detect future
145                  * regressions.
146                  *
147                  * The original bug can reproduce with a delay up to ~500us on
148                  * x86-64, but starts to require more iterations to reproduce
149                  * as the delay creeps above ~10us, and the average runtime of
150                  * each iteration obviously increases as well.  Cap the delay
151                  * at 10us to keep test runtime reasonable while minimizing
152                  * potential coverage loss.
153                  *
154                  * The lower bound for reproducing the bug is likely below 1us,
155                  * e.g. failures occur on x86-64 with nanosleep(0), but at that
156                  * point the overhead of the syscall likely dominates the delay.
157                  * Use usleep() for simplicity and to avoid unnecessary kernel
158                  * dependencies.
159                  */
160                 usleep((i % 10) + 1);
161         }
162         done = true;
163         return NULL;
164 }
165 
166 static void calc_min_max_cpu(void)
167 {
168         int i, cnt, nproc;
169 
170         TEST_REQUIRE(CPU_COUNT(&possible_mask) >= 2);
171 
172         /*
173          * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that
174          * this task is affined to in order to reduce the time spent querying
175          * unusable CPUs, e.g. if this task is pinned to a small percentage of
176          * total CPUs.
177          */
178         nproc = get_nprocs_conf();
179         min_cpu = -1;
180         max_cpu = -1;
181         cnt = 0;
182 
183         for (i = 0; i < nproc; i++) {
184                 if (!CPU_ISSET(i, &possible_mask))
185                         continue;
186                 if (min_cpu == -1)
187                         min_cpu = i;
188                 max_cpu = i;
189                 cnt++;
190         }
191 
192         __TEST_REQUIRE(cnt >= 2,
193                        "Only one usable CPU, task migration not possible");
194 }
195 
196 static void help(const char *name)
197 {
198         puts("");
199         printf("usage: %s [-h] [-u]\n", name);
200         printf(" -u: Don't sanity check the number of successful KVM_RUNs\n");
201         puts("");
202         exit(0);
203 }
204 
205 int main(int argc, char *argv[])
206 {
207         bool skip_sanity_check = false;
208         int r, i, snapshot;
209         struct kvm_vm *vm;
210         struct kvm_vcpu *vcpu;
211         u32 cpu, rseq_cpu;
212         int opt;
213 
214         while ((opt = getopt(argc, argv, "hu")) != -1) {
215                 switch (opt) {
216                 case 'u':
217                         skip_sanity_check = true;
218                         break;
219                 case 'h':
220                 default:
221                         help(argv[0]);
222                         break;
223                 }
224         }
225 
226         r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
227         TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
228                     strerror(errno));
229 
230         calc_min_max_cpu();
231 
232         r = rseq_register_current_thread();
233         TEST_ASSERT(!r, "rseq_register_current_thread failed, errno = %d (%s)",
234                     errno, strerror(errno));
235 
236         /*
237          * Create and run a dummy VM that immediately exits to userspace via
238          * GUEST_SYNC, while concurrently migrating the process by setting its
239          * CPU affinity.
240          */
241         vm = vm_create_with_one_vcpu(&vcpu, guest_code);
242 
243         pthread_create(&migration_thread, NULL, migration_worker,
244                        (void *)(unsigned long)syscall(SYS_gettid));
245 
246         for (i = 0; !done; i++) {
247                 vcpu_run(vcpu);
248                 TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
249                             "Guest failed?");
250 
251                 /*
252                  * Verify rseq's CPU matches sched's CPU.  Ensure migration
253                  * doesn't occur between getcpu() and reading the rseq cpu_id
254                  * by rereading both if the sequence count changes, or if the
255                  * count is odd (migration in-progress).
256                  */
257                 do {
258                         /*
259                          * Drop bit 0 to force a mismatch if the count is odd,
260                          * i.e. if a migration is in-progress.
261                          */
262                         snapshot = atomic_read(&seq_cnt) & ~1;
263 
264                         /*
265                          * Ensure calling getcpu() and reading rseq.cpu_id complete
266                          * in a single "no migration" window, i.e. are not reordered
267                          * across the seq_cnt reads.
268                          */
269                         smp_rmb();
270                         r = sys_getcpu(&cpu, NULL);
271                         TEST_ASSERT(!r, "getcpu failed, errno = %d (%s)",
272                                     errno, strerror(errno));
273                         rseq_cpu = rseq_current_cpu_raw();
274                         smp_rmb();
275                 } while (snapshot != atomic_read(&seq_cnt));
276 
277                 TEST_ASSERT(rseq_cpu == cpu,
278                             "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu);
279         }
280 
281         /*
282          * Sanity check that the test was able to enter the guest a reasonable
283          * number of times, e.g. didn't get stalled too often/long waiting for
284          * getcpu() to stabilize.  A 2:1 migration:KVM_RUN ratio is a fairly
285          * conservative ratio on x86-64, which can do _more_ KVM_RUNs than
286          * migrations given the 1us+ delay in the migration task.
287          *
288          * Another reason why it may have small migration:KVM_RUN ratio is that,
289          * on systems with large low power mode wakeup latency, it may happen
290          * quite often that the scheduler is not able to wake up the target CPU
291          * before the vCPU thread is scheduled to another CPU.
292          */
293         TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2),
294                     "Only performed %d KVM_RUNs, task stalled too much?\n\n"
295                     "  Try disabling deep sleep states to reduce CPU wakeup latency,\n"
296                     "  e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '',\n"
297                     "  or run with -u to disable this sanity check.", i);
298 
299         pthread_join(migration_thread, NULL);
300 
301         kvm_vm_free(vm);
302 
303         rseq_unregister_current_thread();
304 
305         return 0;
306 }
307 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php