~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/futex/syscalls.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 
  3 #include <linux/syscalls.h>
  4 #include <linux/time_namespace.h>
  5 
  6 #include "futex.h"
  7 
  8 /*
  9  * Support for robust futexes: the kernel cleans up held futexes at
 10  * thread exit time.
 11  *
 12  * Implementation: user-space maintains a per-thread list of locks it
 13  * is holding. Upon do_exit(), the kernel carefully walks this list,
 14  * and marks all locks that are owned by this thread with the
 15  * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
 16  * always manipulated with the lock held, so the list is private and
 17  * per-thread. Userspace also maintains a per-thread 'list_op_pending'
 18  * field, to allow the kernel to clean up if the thread dies after
 19  * acquiring the lock, but just before it could have added itself to
 20  * the list. There can only be one such pending lock.
 21  */
 22 
 23 /**
 24  * sys_set_robust_list() - Set the robust-futex list head of a task
 25  * @head:       pointer to the list-head
 26  * @len:        length of the list-head, as userspace expects
 27  */
 28 SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
 29                 size_t, len)
 30 {
 31         /*
 32          * The kernel knows only one size for now:
 33          */
 34         if (unlikely(len != sizeof(*head)))
 35                 return -EINVAL;
 36 
 37         current->robust_list = head;
 38 
 39         return 0;
 40 }
 41 
 42 /**
 43  * sys_get_robust_list() - Get the robust-futex list head of a task
 44  * @pid:        pid of the process [zero for current task]
 45  * @head_ptr:   pointer to a list-head pointer, the kernel fills it in
 46  * @len_ptr:    pointer to a length field, the kernel fills in the header size
 47  */
 48 SYSCALL_DEFINE3(get_robust_list, int, pid,
 49                 struct robust_list_head __user * __user *, head_ptr,
 50                 size_t __user *, len_ptr)
 51 {
 52         struct robust_list_head __user *head;
 53         unsigned long ret;
 54         struct task_struct *p;
 55 
 56         rcu_read_lock();
 57 
 58         ret = -ESRCH;
 59         if (!pid)
 60                 p = current;
 61         else {
 62                 p = find_task_by_vpid(pid);
 63                 if (!p)
 64                         goto err_unlock;
 65         }
 66 
 67         ret = -EPERM;
 68         if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
 69                 goto err_unlock;
 70 
 71         head = p->robust_list;
 72         rcu_read_unlock();
 73 
 74         if (put_user(sizeof(*head), len_ptr))
 75                 return -EFAULT;
 76         return put_user(head, head_ptr);
 77 
 78 err_unlock:
 79         rcu_read_unlock();
 80 
 81         return ret;
 82 }
 83 
 84 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 85                 u32 __user *uaddr2, u32 val2, u32 val3)
 86 {
 87         unsigned int flags = futex_to_flags(op);
 88         int cmd = op & FUTEX_CMD_MASK;
 89 
 90         if (flags & FLAGS_CLOCKRT) {
 91                 if (cmd != FUTEX_WAIT_BITSET &&
 92                     cmd != FUTEX_WAIT_REQUEUE_PI &&
 93                     cmd != FUTEX_LOCK_PI2)
 94                         return -ENOSYS;
 95         }
 96 
 97         switch (cmd) {
 98         case FUTEX_WAIT:
 99                 val3 = FUTEX_BITSET_MATCH_ANY;
100                 fallthrough;
101         case FUTEX_WAIT_BITSET:
102                 return futex_wait(uaddr, flags, val, timeout, val3);
103         case FUTEX_WAKE:
104                 val3 = FUTEX_BITSET_MATCH_ANY;
105                 fallthrough;
106         case FUTEX_WAKE_BITSET:
107                 return futex_wake(uaddr, flags, val, val3);
108         case FUTEX_REQUEUE:
109                 return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, NULL, 0);
110         case FUTEX_CMP_REQUEUE:
111                 return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 0);
112         case FUTEX_WAKE_OP:
113                 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
114         case FUTEX_LOCK_PI:
115                 flags |= FLAGS_CLOCKRT;
116                 fallthrough;
117         case FUTEX_LOCK_PI2:
118                 return futex_lock_pi(uaddr, flags, timeout, 0);
119         case FUTEX_UNLOCK_PI:
120                 return futex_unlock_pi(uaddr, flags);
121         case FUTEX_TRYLOCK_PI:
122                 return futex_lock_pi(uaddr, flags, NULL, 1);
123         case FUTEX_WAIT_REQUEUE_PI:
124                 val3 = FUTEX_BITSET_MATCH_ANY;
125                 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
126                                              uaddr2);
127         case FUTEX_CMP_REQUEUE_PI:
128                 return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 1);
129         }
130         return -ENOSYS;
131 }
132 
133 static __always_inline bool futex_cmd_has_timeout(u32 cmd)
134 {
135         switch (cmd) {
136         case FUTEX_WAIT:
137         case FUTEX_LOCK_PI:
138         case FUTEX_LOCK_PI2:
139         case FUTEX_WAIT_BITSET:
140         case FUTEX_WAIT_REQUEUE_PI:
141                 return true;
142         }
143         return false;
144 }
145 
146 static __always_inline int
147 futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
148 {
149         if (!timespec64_valid(ts))
150                 return -EINVAL;
151 
152         *t = timespec64_to_ktime(*ts);
153         if (cmd == FUTEX_WAIT)
154                 *t = ktime_add_safe(ktime_get(), *t);
155         else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
156                 *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
157         return 0;
158 }
159 
160 SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
161                 const struct __kernel_timespec __user *, utime,
162                 u32 __user *, uaddr2, u32, val3)
163 {
164         int ret, cmd = op & FUTEX_CMD_MASK;
165         ktime_t t, *tp = NULL;
166         struct timespec64 ts;
167 
168         if (utime && futex_cmd_has_timeout(cmd)) {
169                 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
170                         return -EFAULT;
171                 if (get_timespec64(&ts, utime))
172                         return -EFAULT;
173                 ret = futex_init_timeout(cmd, op, &ts, &t);
174                 if (ret)
175                         return ret;
176                 tp = &t;
177         }
178 
179         return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
180 }
181 
182 /**
183  * futex_parse_waitv - Parse a waitv array from userspace
184  * @futexv:     Kernel side list of waiters to be filled
185  * @uwaitv:     Userspace list to be parsed
186  * @nr_futexes: Length of futexv
187  * @wake:       Wake to call when futex is woken
188  * @wake_data:  Data for the wake handler
189  *
190  * Return: Error code on failure, 0 on success
191  */
192 int futex_parse_waitv(struct futex_vector *futexv,
193                       struct futex_waitv __user *uwaitv,
194                       unsigned int nr_futexes, futex_wake_fn *wake,
195                       void *wake_data)
196 {
197         struct futex_waitv aux;
198         unsigned int i;
199 
200         for (i = 0; i < nr_futexes; i++) {
201                 unsigned int flags;
202 
203                 if (copy_from_user(&aux, &uwaitv[i], sizeof(aux)))
204                         return -EFAULT;
205 
206                 if ((aux.flags & ~FUTEX2_VALID_MASK) || aux.__reserved)
207                         return -EINVAL;
208 
209                 flags = futex2_to_flags(aux.flags);
210                 if (!futex_flags_valid(flags))
211                         return -EINVAL;
212 
213                 if (!futex_validate_input(flags, aux.val))
214                         return -EINVAL;
215 
216                 futexv[i].w.flags = flags;
217                 futexv[i].w.val = aux.val;
218                 futexv[i].w.uaddr = aux.uaddr;
219                 futexv[i].q = futex_q_init;
220                 futexv[i].q.wake = wake;
221                 futexv[i].q.wake_data = wake_data;
222         }
223 
224         return 0;
225 }
226 
227 static int futex2_setup_timeout(struct __kernel_timespec __user *timeout,
228                                 clockid_t clockid, struct hrtimer_sleeper *to)
229 {
230         int flag_clkid = 0, flag_init = 0;
231         struct timespec64 ts;
232         ktime_t time;
233         int ret;
234 
235         if (!timeout)
236                 return 0;
237 
238         if (clockid == CLOCK_REALTIME) {
239                 flag_clkid = FLAGS_CLOCKRT;
240                 flag_init = FUTEX_CLOCK_REALTIME;
241         }
242 
243         if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
244                 return -EINVAL;
245 
246         if (get_timespec64(&ts, timeout))
247                 return -EFAULT;
248 
249         /*
250          * Since there's no opcode for futex_waitv, use
251          * FUTEX_WAIT_BITSET that uses absolute timeout as well
252          */
253         ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
254         if (ret)
255                 return ret;
256 
257         futex_setup_timer(&time, to, flag_clkid, 0);
258         return 0;
259 }
260 
261 static inline void futex2_destroy_timeout(struct hrtimer_sleeper *to)
262 {
263         hrtimer_cancel(&to->timer);
264         destroy_hrtimer_on_stack(&to->timer);
265 }
266 
267 /**
268  * sys_futex_waitv - Wait on a list of futexes
269  * @waiters:    List of futexes to wait on
270  * @nr_futexes: Length of futexv
271  * @flags:      Flag for timeout (monotonic/realtime)
272  * @timeout:    Optional absolute timeout.
273  * @clockid:    Clock to be used for the timeout, realtime or monotonic.
274  *
275  * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes
276  * if a futex_wake() is performed at any uaddr. The syscall returns immediately
277  * if any waiter has *uaddr != val. *timeout is an optional timeout value for
278  * the operation. Each waiter has individual flags. The `flags` argument for
279  * the syscall should be used solely for specifying the timeout as realtime, if
280  * needed. Flags for private futexes, sizes, etc. should be used on the
281  * individual flags of each waiter.
282  *
283  * Returns the array index of one of the woken futexes. No further information
284  * is provided: any number of other futexes may also have been woken by the
285  * same event, and if more than one futex was woken, the retrned index may
286  * refer to any one of them. (It is not necessaryily the futex with the
287  * smallest index, nor the one most recently woken, nor...)
288  */
289 
290 SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
291                 unsigned int, nr_futexes, unsigned int, flags,
292                 struct __kernel_timespec __user *, timeout, clockid_t, clockid)
293 {
294         struct hrtimer_sleeper to;
295         struct futex_vector *futexv;
296         int ret;
297 
298         /* This syscall supports no flags for now */
299         if (flags)
300                 return -EINVAL;
301 
302         if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
303                 return -EINVAL;
304 
305         if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to)))
306                 return ret;
307 
308         futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
309         if (!futexv) {
310                 ret = -ENOMEM;
311                 goto destroy_timer;
312         }
313 
314         ret = futex_parse_waitv(futexv, waiters, nr_futexes, futex_wake_mark,
315                                 NULL);
316         if (!ret)
317                 ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL);
318 
319         kfree(futexv);
320 
321 destroy_timer:
322         if (timeout)
323                 futex2_destroy_timeout(&to);
324         return ret;
325 }
326 
327 /*
328  * sys_futex_wake - Wake a number of futexes
329  * @uaddr:      Address of the futex(es) to wake
330  * @mask:       bitmask
331  * @nr:         Number of the futexes to wake
332  * @flags:      FUTEX2 flags
333  *
334  * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the
335  * futex2 family of calls.
336  */
337 
338 SYSCALL_DEFINE4(futex_wake,
339                 void __user *, uaddr,
340                 unsigned long, mask,
341                 int, nr,
342                 unsigned int, flags)
343 {
344         if (flags & ~FUTEX2_VALID_MASK)
345                 return -EINVAL;
346 
347         flags = futex2_to_flags(flags);
348         if (!futex_flags_valid(flags))
349                 return -EINVAL;
350 
351         if (!futex_validate_input(flags, mask))
352                 return -EINVAL;
353 
354         return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask);
355 }
356 
357 /*
358  * sys_futex_wait - Wait on a futex
359  * @uaddr:      Address of the futex to wait on
360  * @val:        Value of @uaddr
361  * @mask:       bitmask
362  * @flags:      FUTEX2 flags
363  * @timeout:    Optional absolute timeout
364  * @clockid:    Clock to be used for the timeout, realtime or monotonic
365  *
366  * Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the
367  * futex2 familiy of calls.
368  */
369 
370 SYSCALL_DEFINE6(futex_wait,
371                 void __user *, uaddr,
372                 unsigned long, val,
373                 unsigned long, mask,
374                 unsigned int, flags,
375                 struct __kernel_timespec __user *, timeout,
376                 clockid_t, clockid)
377 {
378         struct hrtimer_sleeper to;
379         int ret;
380 
381         if (flags & ~FUTEX2_VALID_MASK)
382                 return -EINVAL;
383 
384         flags = futex2_to_flags(flags);
385         if (!futex_flags_valid(flags))
386                 return -EINVAL;
387 
388         if (!futex_validate_input(flags, val) ||
389             !futex_validate_input(flags, mask))
390                 return -EINVAL;
391 
392         if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to)))
393                 return ret;
394 
395         ret = __futex_wait(uaddr, flags, val, timeout ? &to : NULL, mask);
396 
397         if (timeout)
398                 futex2_destroy_timeout(&to);
399 
400         return ret;
401 }
402 
403 /*
404  * sys_futex_requeue - Requeue a waiter from one futex to another
405  * @waiters:    array describing the source and destination futex
406  * @flags:      unused
407  * @nr_wake:    number of futexes to wake
408  * @nr_requeue: number of futexes to requeue
409  *
410  * Identical to the traditional FUTEX_CMP_REQUEUE op, except it is part of the
411  * futex2 family of calls.
412  */
413 
414 SYSCALL_DEFINE4(futex_requeue,
415                 struct futex_waitv __user *, waiters,
416                 unsigned int, flags,
417                 int, nr_wake,
418                 int, nr_requeue)
419 {
420         struct futex_vector futexes[2];
421         u32 cmpval;
422         int ret;
423 
424         if (flags)
425                 return -EINVAL;
426 
427         if (!waiters)
428                 return -EINVAL;
429 
430         ret = futex_parse_waitv(futexes, waiters, 2, futex_wake_mark, NULL);
431         if (ret)
432                 return ret;
433 
434         cmpval = futexes[0].w.val;
435 
436         return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags,
437                              u64_to_user_ptr(futexes[1].w.uaddr), futexes[1].w.flags,
438                              nr_wake, nr_requeue, &cmpval, 0);
439 }
440 
441 #ifdef CONFIG_COMPAT
442 COMPAT_SYSCALL_DEFINE2(set_robust_list,
443                 struct compat_robust_list_head __user *, head,
444                 compat_size_t, len)
445 {
446         if (unlikely(len != sizeof(*head)))
447                 return -EINVAL;
448 
449         current->compat_robust_list = head;
450 
451         return 0;
452 }
453 
454 COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
455                         compat_uptr_t __user *, head_ptr,
456                         compat_size_t __user *, len_ptr)
457 {
458         struct compat_robust_list_head __user *head;
459         unsigned long ret;
460         struct task_struct *p;
461 
462         rcu_read_lock();
463 
464         ret = -ESRCH;
465         if (!pid)
466                 p = current;
467         else {
468                 p = find_task_by_vpid(pid);
469                 if (!p)
470                         goto err_unlock;
471         }
472 
473         ret = -EPERM;
474         if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
475                 goto err_unlock;
476 
477         head = p->compat_robust_list;
478         rcu_read_unlock();
479 
480         if (put_user(sizeof(*head), len_ptr))
481                 return -EFAULT;
482         return put_user(ptr_to_compat(head), head_ptr);
483 
484 err_unlock:
485         rcu_read_unlock();
486 
487         return ret;
488 }
489 #endif /* CONFIG_COMPAT */
490 
491 #ifdef CONFIG_COMPAT_32BIT_TIME
492 SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
493                 const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
494                 u32, val3)
495 {
496         int ret, cmd = op & FUTEX_CMD_MASK;
497         ktime_t t, *tp = NULL;
498         struct timespec64 ts;
499 
500         if (utime && futex_cmd_has_timeout(cmd)) {
501                 if (get_old_timespec32(&ts, utime))
502                         return -EFAULT;
503                 ret = futex_init_timeout(cmd, op, &ts, &t);
504                 if (ret)
505                         return ret;
506                 tp = &t;
507         }
508 
509         return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
510 }
511 #endif /* CONFIG_COMPAT_32BIT_TIME */
512 
513 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php