~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/io_uring/register.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * Code related to the io_uring_register() syscall
  4  *
  5  * Copyright (C) 2023 Jens Axboe
  6  */
  7 #include <linux/kernel.h>
  8 #include <linux/errno.h>
  9 #include <linux/syscalls.h>
 10 #include <linux/refcount.h>
 11 #include <linux/bits.h>
 12 #include <linux/fs.h>
 13 #include <linux/file.h>
 14 #include <linux/slab.h>
 15 #include <linux/uaccess.h>
 16 #include <linux/nospec.h>
 17 #include <linux/compat.h>
 18 #include <linux/io_uring.h>
 19 #include <linux/io_uring_types.h>
 20 
 21 #include "io_uring.h"
 22 #include "opdef.h"
 23 #include "tctx.h"
 24 #include "rsrc.h"
 25 #include "sqpoll.h"
 26 #include "register.h"
 27 #include "cancel.h"
 28 #include "kbuf.h"
 29 #include "napi.h"
 30 #include "eventfd.h"
 31 
 32 #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
 33                                  IORING_REGISTER_LAST + IORING_OP_LAST)
 34 
 35 static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
 36                            unsigned nr_args)
 37 {
 38         struct io_uring_probe *p;
 39         size_t size;
 40         int i, ret;
 41 
 42         if (nr_args > IORING_OP_LAST)
 43                 nr_args = IORING_OP_LAST;
 44 
 45         size = struct_size(p, ops, nr_args);
 46         p = kzalloc(size, GFP_KERNEL);
 47         if (!p)
 48                 return -ENOMEM;
 49 
 50         ret = -EFAULT;
 51         if (copy_from_user(p, arg, size))
 52                 goto out;
 53         ret = -EINVAL;
 54         if (memchr_inv(p, 0, size))
 55                 goto out;
 56 
 57         p->last_op = IORING_OP_LAST - 1;
 58 
 59         for (i = 0; i < nr_args; i++) {
 60                 p->ops[i].op = i;
 61                 if (io_uring_op_supported(i))
 62                         p->ops[i].flags = IO_URING_OP_SUPPORTED;
 63         }
 64         p->ops_len = i;
 65 
 66         ret = 0;
 67         if (copy_to_user(arg, p, size))
 68                 ret = -EFAULT;
 69 out:
 70         kfree(p);
 71         return ret;
 72 }
 73 
 74 int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
 75 {
 76         const struct cred *creds;
 77 
 78         creds = xa_erase(&ctx->personalities, id);
 79         if (creds) {
 80                 put_cred(creds);
 81                 return 0;
 82         }
 83 
 84         return -EINVAL;
 85 }
 86 
 87 
 88 static int io_register_personality(struct io_ring_ctx *ctx)
 89 {
 90         const struct cred *creds;
 91         u32 id;
 92         int ret;
 93 
 94         creds = get_current_cred();
 95 
 96         ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
 97                         XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
 98         if (ret < 0) {
 99                 put_cred(creds);
100                 return ret;
101         }
102         return id;
103 }
104 
105 static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
106                                            void __user *arg, unsigned int nr_args)
107 {
108         struct io_uring_restriction *res;
109         size_t size;
110         int i, ret;
111 
112         /* Restrictions allowed only if rings started disabled */
113         if (!(ctx->flags & IORING_SETUP_R_DISABLED))
114                 return -EBADFD;
115 
116         /* We allow only a single restrictions registration */
117         if (ctx->restrictions.registered)
118                 return -EBUSY;
119 
120         if (!arg || nr_args > IORING_MAX_RESTRICTIONS)
121                 return -EINVAL;
122 
123         size = array_size(nr_args, sizeof(*res));
124         if (size == SIZE_MAX)
125                 return -EOVERFLOW;
126 
127         res = memdup_user(arg, size);
128         if (IS_ERR(res))
129                 return PTR_ERR(res);
130 
131         ret = 0;
132 
133         for (i = 0; i < nr_args; i++) {
134                 switch (res[i].opcode) {
135                 case IORING_RESTRICTION_REGISTER_OP:
136                         if (res[i].register_op >= IORING_REGISTER_LAST) {
137                                 ret = -EINVAL;
138                                 goto out;
139                         }
140 
141                         __set_bit(res[i].register_op,
142                                   ctx->restrictions.register_op);
143                         break;
144                 case IORING_RESTRICTION_SQE_OP:
145                         if (res[i].sqe_op >= IORING_OP_LAST) {
146                                 ret = -EINVAL;
147                                 goto out;
148                         }
149 
150                         __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op);
151                         break;
152                 case IORING_RESTRICTION_SQE_FLAGS_ALLOWED:
153                         ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags;
154                         break;
155                 case IORING_RESTRICTION_SQE_FLAGS_REQUIRED:
156                         ctx->restrictions.sqe_flags_required = res[i].sqe_flags;
157                         break;
158                 default:
159                         ret = -EINVAL;
160                         goto out;
161                 }
162         }
163 
164 out:
165         /* Reset all restrictions if an error happened */
166         if (ret != 0)
167                 memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
168         else
169                 ctx->restrictions.registered = true;
170 
171         kfree(res);
172         return ret;
173 }
174 
175 static int io_register_enable_rings(struct io_ring_ctx *ctx)
176 {
177         if (!(ctx->flags & IORING_SETUP_R_DISABLED))
178                 return -EBADFD;
179 
180         if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) {
181                 WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
182                 /*
183                  * Lazy activation attempts would fail if it was polled before
184                  * submitter_task is set.
185                  */
186                 if (wq_has_sleeper(&ctx->poll_wq))
187                         io_activate_pollwq(ctx);
188         }
189 
190         if (ctx->restrictions.registered)
191                 ctx->restricted = 1;
192 
193         ctx->flags &= ~IORING_SETUP_R_DISABLED;
194         if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait))
195                 wake_up(&ctx->sq_data->wait);
196         return 0;
197 }
198 
199 static __cold int __io_register_iowq_aff(struct io_ring_ctx *ctx,
200                                          cpumask_var_t new_mask)
201 {
202         int ret;
203 
204         if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
205                 ret = io_wq_cpu_affinity(current->io_uring, new_mask);
206         } else {
207                 mutex_unlock(&ctx->uring_lock);
208                 ret = io_sqpoll_wq_cpu_affinity(ctx, new_mask);
209                 mutex_lock(&ctx->uring_lock);
210         }
211 
212         return ret;
213 }
214 
215 static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx,
216                                        void __user *arg, unsigned len)
217 {
218         cpumask_var_t new_mask;
219         int ret;
220 
221         if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
222                 return -ENOMEM;
223 
224         cpumask_clear(new_mask);
225         if (len > cpumask_size())
226                 len = cpumask_size();
227 
228 #ifdef CONFIG_COMPAT
229         if (in_compat_syscall())
230                 ret = compat_get_bitmap(cpumask_bits(new_mask),
231                                         (const compat_ulong_t __user *)arg,
232                                         len * 8 /* CHAR_BIT */);
233         else
234 #endif
235                 ret = copy_from_user(new_mask, arg, len);
236 
237         if (ret) {
238                 free_cpumask_var(new_mask);
239                 return -EFAULT;
240         }
241 
242         ret = __io_register_iowq_aff(ctx, new_mask);
243         free_cpumask_var(new_mask);
244         return ret;
245 }
246 
247 static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
248 {
249         return __io_register_iowq_aff(ctx, NULL);
250 }
251 
252 static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
253                                                void __user *arg)
254         __must_hold(&ctx->uring_lock)
255 {
256         struct io_tctx_node *node;
257         struct io_uring_task *tctx = NULL;
258         struct io_sq_data *sqd = NULL;
259         __u32 new_count[2];
260         int i, ret;
261 
262         if (copy_from_user(new_count, arg, sizeof(new_count)))
263                 return -EFAULT;
264         for (i = 0; i < ARRAY_SIZE(new_count); i++)
265                 if (new_count[i] > INT_MAX)
266                         return -EINVAL;
267 
268         if (ctx->flags & IORING_SETUP_SQPOLL) {
269                 sqd = ctx->sq_data;
270                 if (sqd) {
271                         /*
272                          * Observe the correct sqd->lock -> ctx->uring_lock
273                          * ordering. Fine to drop uring_lock here, we hold
274                          * a ref to the ctx.
275                          */
276                         refcount_inc(&sqd->refs);
277                         mutex_unlock(&ctx->uring_lock);
278                         mutex_lock(&sqd->lock);
279                         mutex_lock(&ctx->uring_lock);
280                         if (sqd->thread)
281                                 tctx = sqd->thread->io_uring;
282                 }
283         } else {
284                 tctx = current->io_uring;
285         }
286 
287         BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits));
288 
289         for (i = 0; i < ARRAY_SIZE(new_count); i++)
290                 if (new_count[i])
291                         ctx->iowq_limits[i] = new_count[i];
292         ctx->iowq_limits_set = true;
293 
294         if (tctx && tctx->io_wq) {
295                 ret = io_wq_max_workers(tctx->io_wq, new_count);
296                 if (ret)
297                         goto err;
298         } else {
299                 memset(new_count, 0, sizeof(new_count));
300         }
301 
302         if (sqd) {
303                 mutex_unlock(&ctx->uring_lock);
304                 mutex_unlock(&sqd->lock);
305                 io_put_sq_data(sqd);
306                 mutex_lock(&ctx->uring_lock);
307         }
308 
309         if (copy_to_user(arg, new_count, sizeof(new_count)))
310                 return -EFAULT;
311 
312         /* that's it for SQPOLL, only the SQPOLL task creates requests */
313         if (sqd)
314                 return 0;
315 
316         /* now propagate the restriction to all registered users */
317         list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
318                 tctx = node->task->io_uring;
319                 if (WARN_ON_ONCE(!tctx->io_wq))
320                         continue;
321 
322                 for (i = 0; i < ARRAY_SIZE(new_count); i++)
323                         new_count[i] = ctx->iowq_limits[i];
324                 /* ignore errors, it always returns zero anyway */
325                 (void)io_wq_max_workers(tctx->io_wq, new_count);
326         }
327         return 0;
328 err:
329         if (sqd) {
330                 mutex_unlock(&ctx->uring_lock);
331                 mutex_unlock(&sqd->lock);
332                 io_put_sq_data(sqd);
333                 mutex_lock(&ctx->uring_lock);
334         }
335         return ret;
336 }
337 
338 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
339                                void __user *arg, unsigned nr_args)
340         __releases(ctx->uring_lock)
341         __acquires(ctx->uring_lock)
342 {
343         int ret;
344 
345         /*
346          * We don't quiesce the refs for register anymore and so it can't be
347          * dying as we're holding a file ref here.
348          */
349         if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs)))
350                 return -ENXIO;
351 
352         if (ctx->submitter_task && ctx->submitter_task != current)
353                 return -EEXIST;
354 
355         if (ctx->restricted) {
356                 opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
357                 if (!test_bit(opcode, ctx->restrictions.register_op))
358                         return -EACCES;
359         }
360 
361         switch (opcode) {
362         case IORING_REGISTER_BUFFERS:
363                 ret = -EFAULT;
364                 if (!arg)
365                         break;
366                 ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
367                 break;
368         case IORING_UNREGISTER_BUFFERS:
369                 ret = -EINVAL;
370                 if (arg || nr_args)
371                         break;
372                 ret = io_sqe_buffers_unregister(ctx);
373                 break;
374         case IORING_REGISTER_FILES:
375                 ret = -EFAULT;
376                 if (!arg)
377                         break;
378                 ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
379                 break;
380         case IORING_UNREGISTER_FILES:
381                 ret = -EINVAL;
382                 if (arg || nr_args)
383                         break;
384                 ret = io_sqe_files_unregister(ctx);
385                 break;
386         case IORING_REGISTER_FILES_UPDATE:
387                 ret = io_register_files_update(ctx, arg, nr_args);
388                 break;
389         case IORING_REGISTER_EVENTFD:
390                 ret = -EINVAL;
391                 if (nr_args != 1)
392                         break;
393                 ret = io_eventfd_register(ctx, arg, 0);
394                 break;
395         case IORING_REGISTER_EVENTFD_ASYNC:
396                 ret = -EINVAL;
397                 if (nr_args != 1)
398                         break;
399                 ret = io_eventfd_register(ctx, arg, 1);
400                 break;
401         case IORING_UNREGISTER_EVENTFD:
402                 ret = -EINVAL;
403                 if (arg || nr_args)
404                         break;
405                 ret = io_eventfd_unregister(ctx);
406                 break;
407         case IORING_REGISTER_PROBE:
408                 ret = -EINVAL;
409                 if (!arg || nr_args > 256)
410                         break;
411                 ret = io_probe(ctx, arg, nr_args);
412                 break;
413         case IORING_REGISTER_PERSONALITY:
414                 ret = -EINVAL;
415                 if (arg || nr_args)
416                         break;
417                 ret = io_register_personality(ctx);
418                 break;
419         case IORING_UNREGISTER_PERSONALITY:
420                 ret = -EINVAL;
421                 if (arg)
422                         break;
423                 ret = io_unregister_personality(ctx, nr_args);
424                 break;
425         case IORING_REGISTER_ENABLE_RINGS:
426                 ret = -EINVAL;
427                 if (arg || nr_args)
428                         break;
429                 ret = io_register_enable_rings(ctx);
430                 break;
431         case IORING_REGISTER_RESTRICTIONS:
432                 ret = io_register_restrictions(ctx, arg, nr_args);
433                 break;
434         case IORING_REGISTER_FILES2:
435                 ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE);
436                 break;
437         case IORING_REGISTER_FILES_UPDATE2:
438                 ret = io_register_rsrc_update(ctx, arg, nr_args,
439                                               IORING_RSRC_FILE);
440                 break;
441         case IORING_REGISTER_BUFFERS2:
442                 ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER);
443                 break;
444         case IORING_REGISTER_BUFFERS_UPDATE:
445                 ret = io_register_rsrc_update(ctx, arg, nr_args,
446                                               IORING_RSRC_BUFFER);
447                 break;
448         case IORING_REGISTER_IOWQ_AFF:
449                 ret = -EINVAL;
450                 if (!arg || !nr_args)
451                         break;
452                 ret = io_register_iowq_aff(ctx, arg, nr_args);
453                 break;
454         case IORING_UNREGISTER_IOWQ_AFF:
455                 ret = -EINVAL;
456                 if (arg || nr_args)
457                         break;
458                 ret = io_unregister_iowq_aff(ctx);
459                 break;
460         case IORING_REGISTER_IOWQ_MAX_WORKERS:
461                 ret = -EINVAL;
462                 if (!arg || nr_args != 2)
463                         break;
464                 ret = io_register_iowq_max_workers(ctx, arg);
465                 break;
466         case IORING_REGISTER_RING_FDS:
467                 ret = io_ringfd_register(ctx, arg, nr_args);
468                 break;
469         case IORING_UNREGISTER_RING_FDS:
470                 ret = io_ringfd_unregister(ctx, arg, nr_args);
471                 break;
472         case IORING_REGISTER_PBUF_RING:
473                 ret = -EINVAL;
474                 if (!arg || nr_args != 1)
475                         break;
476                 ret = io_register_pbuf_ring(ctx, arg);
477                 break;
478         case IORING_UNREGISTER_PBUF_RING:
479                 ret = -EINVAL;
480                 if (!arg || nr_args != 1)
481                         break;
482                 ret = io_unregister_pbuf_ring(ctx, arg);
483                 break;
484         case IORING_REGISTER_SYNC_CANCEL:
485                 ret = -EINVAL;
486                 if (!arg || nr_args != 1)
487                         break;
488                 ret = io_sync_cancel(ctx, arg);
489                 break;
490         case IORING_REGISTER_FILE_ALLOC_RANGE:
491                 ret = -EINVAL;
492                 if (!arg || nr_args)
493                         break;
494                 ret = io_register_file_alloc_range(ctx, arg);
495                 break;
496         case IORING_REGISTER_PBUF_STATUS:
497                 ret = -EINVAL;
498                 if (!arg || nr_args != 1)
499                         break;
500                 ret = io_register_pbuf_status(ctx, arg);
501                 break;
502         case IORING_REGISTER_NAPI:
503                 ret = -EINVAL;
504                 if (!arg || nr_args != 1)
505                         break;
506                 ret = io_register_napi(ctx, arg);
507                 break;
508         case IORING_UNREGISTER_NAPI:
509                 ret = -EINVAL;
510                 if (nr_args != 1)
511                         break;
512                 ret = io_unregister_napi(ctx, arg);
513                 break;
514         default:
515                 ret = -EINVAL;
516                 break;
517         }
518 
519         return ret;
520 }
521 
522 SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
523                 void __user *, arg, unsigned int, nr_args)
524 {
525         struct io_ring_ctx *ctx;
526         long ret = -EBADF;
527         struct file *file;
528         bool use_registered_ring;
529 
530         use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING);
531         opcode &= ~IORING_REGISTER_USE_REGISTERED_RING;
532 
533         if (opcode >= IORING_REGISTER_LAST)
534                 return -EINVAL;
535 
536         if (use_registered_ring) {
537                 /*
538                  * Ring fd has been registered via IORING_REGISTER_RING_FDS, we
539                  * need only dereference our task private array to find it.
540                  */
541                 struct io_uring_task *tctx = current->io_uring;
542 
543                 if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
544                         return -EINVAL;
545                 fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
546                 file = tctx->registered_rings[fd];
547                 if (unlikely(!file))
548                         return -EBADF;
549         } else {
550                 file = fget(fd);
551                 if (unlikely(!file))
552                         return -EBADF;
553                 ret = -EOPNOTSUPP;
554                 if (!io_is_uring_fops(file))
555                         goto out_fput;
556         }
557 
558         ctx = file->private_data;
559 
560         mutex_lock(&ctx->uring_lock);
561         ret = __io_uring_register(ctx, opcode, arg, nr_args);
562         mutex_unlock(&ctx->uring_lock);
563         trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret);
564 out_fput:
565         if (!use_registered_ring)
566                 fput(file);
567         return ret;
568 }
569 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php