1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/file.h> 5 #include <linux/slab.h> 6 #include <linux/net.h> 7 #include <linux/compat.h> 8 #include <net/compat.h> 9 #include <linux/io_uring.h> 10 11 #include <uapi/linux/io_uring.h> 12 13 #include "io_uring.h" 14 #include "kbuf.h" 15 #include "alloc_cache.h" 16 #include "net.h" 17 #include "notif.h" 18 #include "rsrc.h" 19 20 #if defined(CONFIG_NET) 21 struct io_shutdown { 22 struct file *file; 23 int how; 24 }; 25 26 struct io_accept { 27 struct file *file; 28 struct sockaddr __user *addr; 29 int __user *addr_len; 30 int flags; 31 int iou_flags; 32 u32 file_slot; 33 unsigned long nofile; 34 }; 35 36 struct io_socket { 37 struct file *file; 38 int domain; 39 int type; 40 int protocol; 41 int flags; 42 u32 file_slot; 43 unsigned long nofile; 44 }; 45 46 struct io_connect { 47 struct file *file; 48 struct sockaddr __user *addr; 49 int addr_len; 50 bool in_progress; 51 bool seen_econnaborted; 52 }; 53 54 struct io_bind { 55 struct file *file; 56 int addr_len; 57 }; 58 59 struct io_listen { 60 struct file *file; 61 int backlog; 62 }; 63 64 struct io_sr_msg { 65 struct file *file; 66 union { 67 struct compat_msghdr __user *umsg_compat; 68 struct user_msghdr __user *umsg; 69 void __user *buf; 70 }; 71 int len; 72 unsigned done_io; 73 unsigned msg_flags; 74 unsigned nr_multishot_loops; 75 u16 flags; 76 /* initialised and used only by !msg send variants */ 77 u16 addr_len; 78 u16 buf_group; 79 void __user *addr; 80 void __user *msg_control; 81 /* used only for send zerocopy */ 82 struct io_kiocb *notif; 83 }; 84 85 /* 86 * Number of times we'll try and do receives if there's more data. If we 87 * exceed this limit, then add us to the back of the queue and retry from 88 * there. This helps fairness between flooding clients. 89 */ 90 #define MULTISHOT_MAX_RETRY 32 91 92 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 93 { 94 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 95 96 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || 97 sqe->buf_index || sqe->splice_fd_in)) 98 return -EINVAL; 99 100 shutdown->how = READ_ONCE(sqe->len); 101 req->flags |= REQ_F_FORCE_ASYNC; 102 return 0; 103 } 104 105 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 106 { 107 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); 108 struct socket *sock; 109 int ret; 110 111 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); 112 113 sock = sock_from_file(req->file); 114 if (unlikely(!sock)) 115 return -ENOTSOCK; 116 117 ret = __sys_shutdown_sock(sock, shutdown->how); 118 io_req_set_res(req, ret, 0); 119 return IOU_OK; 120 } 121 122 static bool io_net_retry(struct socket *sock, int flags) 123 { 124 if (!(flags & MSG_WAITALL)) 125 return false; 126 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 127 } 128 129 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) 130 { 131 if (kmsg->free_iov) { 132 kfree(kmsg->free_iov); 133 kmsg->free_iov_nr = 0; 134 kmsg->free_iov = NULL; 135 } 136 } 137 138 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) 139 { 140 struct io_async_msghdr *hdr = req->async_data; 141 struct iovec *iov; 142 143 /* can't recycle, ensure we free the iovec if we have one */ 144 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { 145 io_netmsg_iovec_free(hdr); 146 return; 147 } 148 149 /* Let normal cleanup path reap it if we fail adding to the cache */ 150 iov = hdr->free_iov; 151 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { 152 if (iov) 153 kasan_mempool_poison_object(iov); 154 req->async_data = NULL; 155 req->flags &= ~REQ_F_ASYNC_DATA; 156 } 157 } 158 159 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) 160 { 161 struct io_ring_ctx *ctx = req->ctx; 162 struct io_async_msghdr *hdr; 163 164 hdr = io_alloc_cache_get(&ctx->netmsg_cache); 165 if (hdr) { 166 if (hdr->free_iov) { 167 kasan_mempool_unpoison_object(hdr->free_iov, 168 hdr->free_iov_nr * sizeof(struct iovec)); 169 req->flags |= REQ_F_NEED_CLEANUP; 170 } 171 req->flags |= REQ_F_ASYNC_DATA; 172 req->async_data = hdr; 173 return hdr; 174 } 175 176 if (!io_alloc_async_data(req)) { 177 hdr = req->async_data; 178 hdr->free_iov_nr = 0; 179 hdr->free_iov = NULL; 180 return hdr; 181 } 182 return NULL; 183 } 184 185 /* assign new iovec to kmsg, if we need to */ 186 static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, 187 struct iovec *iov) 188 { 189 if (iov) { 190 req->flags |= REQ_F_NEED_CLEANUP; 191 kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs; 192 if (kmsg->free_iov) 193 kfree(kmsg->free_iov); 194 kmsg->free_iov = iov; 195 } 196 return 0; 197 } 198 199 static inline void io_mshot_prep_retry(struct io_kiocb *req, 200 struct io_async_msghdr *kmsg) 201 { 202 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 203 204 req->flags &= ~REQ_F_BL_EMPTY; 205 sr->done_io = 0; 206 sr->len = 0; /* get from the provided buffer */ 207 req->buf_index = sr->buf_group; 208 } 209 210 #ifdef CONFIG_COMPAT 211 static int io_compat_msg_copy_hdr(struct io_kiocb *req, 212 struct io_async_msghdr *iomsg, 213 struct compat_msghdr *msg, int ddir) 214 { 215 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 216 struct compat_iovec __user *uiov; 217 struct iovec *iov; 218 int ret, nr_segs; 219 220 if (iomsg->free_iov) { 221 nr_segs = iomsg->free_iov_nr; 222 iov = iomsg->free_iov; 223 } else { 224 iov = &iomsg->fast_iov; 225 nr_segs = 1; 226 } 227 228 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) 229 return -EFAULT; 230 231 uiov = compat_ptr(msg->msg_iov); 232 if (req->flags & REQ_F_BUFFER_SELECT) { 233 compat_ssize_t clen; 234 235 if (msg->msg_iovlen == 0) { 236 sr->len = iov->iov_len = 0; 237 iov->iov_base = NULL; 238 } else if (msg->msg_iovlen > 1) { 239 return -EINVAL; 240 } else { 241 if (!access_ok(uiov, sizeof(*uiov))) 242 return -EFAULT; 243 if (__get_user(clen, &uiov->iov_len)) 244 return -EFAULT; 245 if (clen < 0) 246 return -EINVAL; 247 sr->len = clen; 248 } 249 250 return 0; 251 } 252 253 ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen, 254 nr_segs, &iov, &iomsg->msg.msg_iter, true); 255 if (unlikely(ret < 0)) 256 return ret; 257 258 return io_net_vec_assign(req, iomsg, iov); 259 } 260 #endif 261 262 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, 263 struct user_msghdr *msg, int ddir) 264 { 265 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 266 struct iovec *iov; 267 int ret, nr_segs; 268 269 if (iomsg->free_iov) { 270 nr_segs = iomsg->free_iov_nr; 271 iov = iomsg->free_iov; 272 } else { 273 iov = &iomsg->fast_iov; 274 nr_segs = 1; 275 } 276 277 if (!user_access_begin(sr->umsg, sizeof(*sr->umsg))) 278 return -EFAULT; 279 280 ret = -EFAULT; 281 unsafe_get_user(msg->msg_name, &sr->umsg->msg_name, ua_end); 282 unsafe_get_user(msg->msg_namelen, &sr->umsg->msg_namelen, ua_end); 283 unsafe_get_user(msg->msg_iov, &sr->umsg->msg_iov, ua_end); 284 unsafe_get_user(msg->msg_iovlen, &sr->umsg->msg_iovlen, ua_end); 285 unsafe_get_user(msg->msg_control, &sr->umsg->msg_control, ua_end); 286 unsafe_get_user(msg->msg_controllen, &sr->umsg->msg_controllen, ua_end); 287 msg->msg_flags = 0; 288 289 if (req->flags & REQ_F_BUFFER_SELECT) { 290 if (msg->msg_iovlen == 0) { 291 sr->len = iov->iov_len = 0; 292 iov->iov_base = NULL; 293 } else if (msg->msg_iovlen > 1) { 294 ret = -EINVAL; 295 goto ua_end; 296 } else { 297 /* we only need the length for provided buffers */ 298 if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t))) 299 goto ua_end; 300 unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len, 301 ua_end); 302 sr->len = iov->iov_len; 303 } 304 ret = 0; 305 ua_end: 306 user_access_end(); 307 return ret; 308 } 309 310 user_access_end(); 311 ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs, 312 &iov, &iomsg->msg.msg_iter, false); 313 if (unlikely(ret < 0)) 314 return ret; 315 316 return io_net_vec_assign(req, iomsg, iov); 317 } 318 319 static int io_sendmsg_copy_hdr(struct io_kiocb *req, 320 struct io_async_msghdr *iomsg) 321 { 322 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 323 struct user_msghdr msg; 324 int ret; 325 326 iomsg->msg.msg_name = &iomsg->addr; 327 iomsg->msg.msg_iter.nr_segs = 0; 328 329 #ifdef CONFIG_COMPAT 330 if (unlikely(req->ctx->compat)) { 331 struct compat_msghdr cmsg; 332 333 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE); 334 if (unlikely(ret)) 335 return ret; 336 337 return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); 338 } 339 #endif 340 341 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE); 342 if (unlikely(ret)) 343 return ret; 344 345 ret = __copy_msghdr(&iomsg->msg, &msg, NULL); 346 347 /* save msg_control as sys_sendmsg() overwrites it */ 348 sr->msg_control = iomsg->msg.msg_control_user; 349 return ret; 350 } 351 352 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) 353 { 354 struct io_async_msghdr *io = req->async_data; 355 356 io_netmsg_iovec_free(io); 357 } 358 359 static int io_send_setup(struct io_kiocb *req) 360 { 361 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 362 struct io_async_msghdr *kmsg = req->async_data; 363 int ret; 364 365 kmsg->msg.msg_name = NULL; 366 kmsg->msg.msg_namelen = 0; 367 kmsg->msg.msg_control = NULL; 368 kmsg->msg.msg_controllen = 0; 369 kmsg->msg.msg_ubuf = NULL; 370 371 if (sr->addr) { 372 ret = move_addr_to_kernel(sr->addr, sr->addr_len, &kmsg->addr); 373 if (unlikely(ret < 0)) 374 return ret; 375 kmsg->msg.msg_name = &kmsg->addr; 376 kmsg->msg.msg_namelen = sr->addr_len; 377 } 378 if (!io_do_buffer_select(req)) { 379 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, 380 &kmsg->msg.msg_iter); 381 if (unlikely(ret < 0)) 382 return ret; 383 } 384 return 0; 385 } 386 387 static int io_sendmsg_prep_setup(struct io_kiocb *req, int is_msg) 388 { 389 struct io_async_msghdr *kmsg; 390 int ret; 391 392 kmsg = io_msg_alloc_async(req); 393 if (unlikely(!kmsg)) 394 return -ENOMEM; 395 if (!is_msg) 396 return io_send_setup(req); 397 ret = io_sendmsg_copy_hdr(req, kmsg); 398 if (!ret) 399 req->flags |= REQ_F_NEED_CLEANUP; 400 return ret; 401 } 402 403 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) 404 405 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 406 { 407 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 408 409 sr->done_io = 0; 410 411 if (req->opcode == IORING_OP_SEND) { 412 if (READ_ONCE(sqe->__pad3[0])) 413 return -EINVAL; 414 sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 415 sr->addr_len = READ_ONCE(sqe->addr_len); 416 } else if (sqe->addr2 || sqe->file_index) { 417 return -EINVAL; 418 } 419 420 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 421 sr->len = READ_ONCE(sqe->len); 422 sr->flags = READ_ONCE(sqe->ioprio); 423 if (sr->flags & ~SENDMSG_FLAGS) 424 return -EINVAL; 425 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 426 if (sr->msg_flags & MSG_DONTWAIT) 427 req->flags |= REQ_F_NOWAIT; 428 if (sr->flags & IORING_RECVSEND_BUNDLE) { 429 if (req->opcode == IORING_OP_SENDMSG) 430 return -EINVAL; 431 if (!(req->flags & REQ_F_BUFFER_SELECT)) 432 return -EINVAL; 433 sr->msg_flags |= MSG_WAITALL; 434 sr->buf_group = req->buf_index; 435 req->buf_list = NULL; 436 } 437 if (req->flags & REQ_F_BUFFER_SELECT && sr->len) 438 return -EINVAL; 439 440 #ifdef CONFIG_COMPAT 441 if (req->ctx->compat) 442 sr->msg_flags |= MSG_CMSG_COMPAT; 443 #endif 444 return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG); 445 } 446 447 static void io_req_msg_cleanup(struct io_kiocb *req, 448 unsigned int issue_flags) 449 { 450 req->flags &= ~REQ_F_NEED_CLEANUP; 451 io_netmsg_recycle(req, issue_flags); 452 } 453 454 /* 455 * For bundle completions, we need to figure out how many segments we consumed. 456 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it 457 * could be using an ITER_IOVEC. If the latter, then if we consumed all of 458 * the segments, then it's a trivial questiont o answer. If we have residual 459 * data in the iter, then loop the segments to figure out how much we 460 * transferred. 461 */ 462 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) 463 { 464 struct iovec *iov; 465 int nbufs; 466 467 /* no data is always zero segments, and a ubuf is always 1 segment */ 468 if (ret <= 0) 469 return 0; 470 if (iter_is_ubuf(&kmsg->msg.msg_iter)) 471 return 1; 472 473 iov = kmsg->free_iov; 474 if (!iov) 475 iov = &kmsg->fast_iov; 476 477 /* if all data was transferred, it's basic pointer math */ 478 if (!iov_iter_count(&kmsg->msg.msg_iter)) 479 return iter_iov(&kmsg->msg.msg_iter) - iov; 480 481 /* short transfer, count segments */ 482 nbufs = 0; 483 do { 484 int this_len = min_t(int, iov[nbufs].iov_len, ret); 485 486 nbufs++; 487 ret -= this_len; 488 } while (ret); 489 490 return nbufs; 491 } 492 493 static inline bool io_send_finish(struct io_kiocb *req, int *ret, 494 struct io_async_msghdr *kmsg, 495 unsigned issue_flags) 496 { 497 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 498 bool bundle_finished = *ret <= 0; 499 unsigned int cflags; 500 501 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { 502 cflags = io_put_kbuf(req, issue_flags); 503 goto finish; 504 } 505 506 cflags = io_put_kbufs(req, io_bundle_nbufs(kmsg, *ret), issue_flags); 507 508 if (bundle_finished || req->flags & REQ_F_BL_EMPTY) 509 goto finish; 510 511 /* 512 * Fill CQE for this receive and see if we should keep trying to 513 * receive from this socket. 514 */ 515 if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 516 io_mshot_prep_retry(req, kmsg); 517 return false; 518 } 519 520 /* Otherwise stop bundle and use the current result. */ 521 finish: 522 io_req_set_res(req, *ret, cflags); 523 *ret = IOU_OK; 524 return true; 525 } 526 527 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 528 { 529 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 530 struct io_async_msghdr *kmsg = req->async_data; 531 struct socket *sock; 532 unsigned flags; 533 int min_ret = 0; 534 int ret; 535 536 sock = sock_from_file(req->file); 537 if (unlikely(!sock)) 538 return -ENOTSOCK; 539 540 if (!(req->flags & REQ_F_POLLED) && 541 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 542 return -EAGAIN; 543 544 flags = sr->msg_flags; 545 if (issue_flags & IO_URING_F_NONBLOCK) 546 flags |= MSG_DONTWAIT; 547 if (flags & MSG_WAITALL) 548 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 549 550 kmsg->msg.msg_control_user = sr->msg_control; 551 552 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 553 554 if (ret < min_ret) { 555 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 556 return -EAGAIN; 557 if (ret > 0 && io_net_retry(sock, flags)) { 558 kmsg->msg.msg_controllen = 0; 559 kmsg->msg.msg_control = NULL; 560 sr->done_io += ret; 561 req->flags |= REQ_F_BL_NO_RECYCLE; 562 return -EAGAIN; 563 } 564 if (ret == -ERESTARTSYS) 565 ret = -EINTR; 566 req_set_fail(req); 567 } 568 io_req_msg_cleanup(req, issue_flags); 569 if (ret >= 0) 570 ret += sr->done_io; 571 else if (sr->done_io) 572 ret = sr->done_io; 573 io_req_set_res(req, ret, 0); 574 return IOU_OK; 575 } 576 577 int io_send(struct io_kiocb *req, unsigned int issue_flags) 578 { 579 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 580 struct io_async_msghdr *kmsg = req->async_data; 581 struct socket *sock; 582 unsigned flags; 583 int min_ret = 0; 584 int ret; 585 586 sock = sock_from_file(req->file); 587 if (unlikely(!sock)) 588 return -ENOTSOCK; 589 590 if (!(req->flags & REQ_F_POLLED) && 591 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 592 return -EAGAIN; 593 594 flags = sr->msg_flags; 595 if (issue_flags & IO_URING_F_NONBLOCK) 596 flags |= MSG_DONTWAIT; 597 598 retry_bundle: 599 if (io_do_buffer_select(req)) { 600 struct buf_sel_arg arg = { 601 .iovs = &kmsg->fast_iov, 602 .max_len = INT_MAX, 603 .nr_iovs = 1, 604 }; 605 606 if (kmsg->free_iov) { 607 arg.nr_iovs = kmsg->free_iov_nr; 608 arg.iovs = kmsg->free_iov; 609 arg.mode = KBUF_MODE_FREE; 610 } 611 612 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) 613 arg.nr_iovs = 1; 614 else 615 arg.mode |= KBUF_MODE_EXPAND; 616 617 ret = io_buffers_select(req, &arg, issue_flags); 618 if (unlikely(ret < 0)) 619 return ret; 620 621 sr->len = arg.out_len; 622 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, arg.iovs, ret, 623 arg.out_len); 624 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 625 kmsg->free_iov_nr = ret; 626 kmsg->free_iov = arg.iovs; 627 req->flags |= REQ_F_NEED_CLEANUP; 628 } 629 } 630 631 /* 632 * If MSG_WAITALL is set, or this is a bundle send, then we need 633 * the full amount. If just bundle is set, if we do a short send 634 * then we complete the bundle sequence rather than continue on. 635 */ 636 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE) 637 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 638 639 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 640 kmsg->msg.msg_flags = flags; 641 ret = sock_sendmsg(sock, &kmsg->msg); 642 if (ret < min_ret) { 643 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 644 return -EAGAIN; 645 646 if (ret > 0 && io_net_retry(sock, flags)) { 647 sr->len -= ret; 648 sr->buf += ret; 649 sr->done_io += ret; 650 req->flags |= REQ_F_BL_NO_RECYCLE; 651 return -EAGAIN; 652 } 653 if (ret == -ERESTARTSYS) 654 ret = -EINTR; 655 req_set_fail(req); 656 } 657 if (ret >= 0) 658 ret += sr->done_io; 659 else if (sr->done_io) 660 ret = sr->done_io; 661 662 if (!io_send_finish(req, &ret, kmsg, issue_flags)) 663 goto retry_bundle; 664 665 io_req_msg_cleanup(req, issue_flags); 666 return ret; 667 } 668 669 static int io_recvmsg_mshot_prep(struct io_kiocb *req, 670 struct io_async_msghdr *iomsg, 671 int namelen, size_t controllen) 672 { 673 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == 674 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { 675 int hdr; 676 677 if (unlikely(namelen < 0)) 678 return -EOVERFLOW; 679 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), 680 namelen, &hdr)) 681 return -EOVERFLOW; 682 if (check_add_overflow(hdr, controllen, &hdr)) 683 return -EOVERFLOW; 684 685 iomsg->namelen = namelen; 686 iomsg->controllen = controllen; 687 return 0; 688 } 689 690 return 0; 691 } 692 693 static int io_recvmsg_copy_hdr(struct io_kiocb *req, 694 struct io_async_msghdr *iomsg) 695 { 696 struct user_msghdr msg; 697 int ret; 698 699 iomsg->msg.msg_name = &iomsg->addr; 700 iomsg->msg.msg_iter.nr_segs = 0; 701 702 #ifdef CONFIG_COMPAT 703 if (unlikely(req->ctx->compat)) { 704 struct compat_msghdr cmsg; 705 706 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST); 707 if (unlikely(ret)) 708 return ret; 709 710 ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr); 711 if (unlikely(ret)) 712 return ret; 713 714 return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen, 715 cmsg.msg_controllen); 716 } 717 #endif 718 719 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST); 720 if (unlikely(ret)) 721 return ret; 722 723 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); 724 if (unlikely(ret)) 725 return ret; 726 727 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, 728 msg.msg_controllen); 729 } 730 731 static int io_recvmsg_prep_setup(struct io_kiocb *req) 732 { 733 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 734 struct io_async_msghdr *kmsg; 735 int ret; 736 737 kmsg = io_msg_alloc_async(req); 738 if (unlikely(!kmsg)) 739 return -ENOMEM; 740 741 if (req->opcode == IORING_OP_RECV) { 742 kmsg->msg.msg_name = NULL; 743 kmsg->msg.msg_namelen = 0; 744 kmsg->msg.msg_control = NULL; 745 kmsg->msg.msg_get_inq = 1; 746 kmsg->msg.msg_controllen = 0; 747 kmsg->msg.msg_iocb = NULL; 748 kmsg->msg.msg_ubuf = NULL; 749 750 if (!io_do_buffer_select(req)) { 751 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 752 &kmsg->msg.msg_iter); 753 if (unlikely(ret)) 754 return ret; 755 } 756 return 0; 757 } 758 759 ret = io_recvmsg_copy_hdr(req, kmsg); 760 if (!ret) 761 req->flags |= REQ_F_NEED_CLEANUP; 762 return ret; 763 } 764 765 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \ 766 IORING_RECVSEND_BUNDLE) 767 768 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 769 { 770 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 771 772 sr->done_io = 0; 773 774 if (unlikely(sqe->file_index || sqe->addr2)) 775 return -EINVAL; 776 777 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 778 sr->len = READ_ONCE(sqe->len); 779 sr->flags = READ_ONCE(sqe->ioprio); 780 if (sr->flags & ~RECVMSG_FLAGS) 781 return -EINVAL; 782 sr->msg_flags = READ_ONCE(sqe->msg_flags); 783 if (sr->msg_flags & MSG_DONTWAIT) 784 req->flags |= REQ_F_NOWAIT; 785 if (sr->msg_flags & MSG_ERRQUEUE) 786 req->flags |= REQ_F_CLEAR_POLLIN; 787 if (req->flags & REQ_F_BUFFER_SELECT) { 788 /* 789 * Store the buffer group for this multishot receive separately, 790 * as if we end up doing an io-wq based issue that selects a 791 * buffer, it has to be committed immediately and that will 792 * clear ->buf_list. This means we lose the link to the buffer 793 * list, and the eventual buffer put on completion then cannot 794 * restore it. 795 */ 796 sr->buf_group = req->buf_index; 797 req->buf_list = NULL; 798 } 799 if (sr->flags & IORING_RECV_MULTISHOT) { 800 if (!(req->flags & REQ_F_BUFFER_SELECT)) 801 return -EINVAL; 802 if (sr->msg_flags & MSG_WAITALL) 803 return -EINVAL; 804 if (req->opcode == IORING_OP_RECV && sr->len) 805 return -EINVAL; 806 req->flags |= REQ_F_APOLL_MULTISHOT; 807 } 808 if (sr->flags & IORING_RECVSEND_BUNDLE) { 809 if (req->opcode == IORING_OP_RECVMSG) 810 return -EINVAL; 811 } 812 813 #ifdef CONFIG_COMPAT 814 if (req->ctx->compat) 815 sr->msg_flags |= MSG_CMSG_COMPAT; 816 #endif 817 sr->nr_multishot_loops = 0; 818 return io_recvmsg_prep_setup(req); 819 } 820 821 /* 822 * Finishes io_recv and io_recvmsg. 823 * 824 * Returns true if it is actually finished, or false if it should run 825 * again (for multishot). 826 */ 827 static inline bool io_recv_finish(struct io_kiocb *req, int *ret, 828 struct io_async_msghdr *kmsg, 829 bool mshot_finished, unsigned issue_flags) 830 { 831 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 832 unsigned int cflags = 0; 833 834 if (kmsg->msg.msg_inq > 0) 835 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 836 837 if (sr->flags & IORING_RECVSEND_BUNDLE) { 838 cflags |= io_put_kbufs(req, io_bundle_nbufs(kmsg, *ret), 839 issue_flags); 840 /* bundle with no more immediate buffers, we're done */ 841 if (req->flags & REQ_F_BL_EMPTY) 842 goto finish; 843 } else { 844 cflags |= io_put_kbuf(req, issue_flags); 845 } 846 847 /* 848 * Fill CQE for this receive and see if we should keep trying to 849 * receive from this socket. 850 */ 851 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && 852 io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { 853 int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE; 854 855 io_mshot_prep_retry(req, kmsg); 856 /* Known not-empty or unknown state, retry */ 857 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { 858 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY) 859 return false; 860 /* mshot retries exceeded, force a requeue */ 861 sr->nr_multishot_loops = 0; 862 mshot_retry_ret = IOU_REQUEUE; 863 } 864 if (issue_flags & IO_URING_F_MULTISHOT) 865 *ret = mshot_retry_ret; 866 else 867 *ret = -EAGAIN; 868 return true; 869 } 870 871 /* Finish the request / stop multishot. */ 872 finish: 873 io_req_set_res(req, *ret, cflags); 874 875 if (issue_flags & IO_URING_F_MULTISHOT) 876 *ret = IOU_STOP_MULTISHOT; 877 else 878 *ret = IOU_OK; 879 io_req_msg_cleanup(req, issue_flags); 880 return true; 881 } 882 883 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg, 884 struct io_sr_msg *sr, void __user **buf, 885 size_t *len) 886 { 887 unsigned long ubuf = (unsigned long) *buf; 888 unsigned long hdr; 889 890 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 891 kmsg->controllen; 892 if (*len < hdr) 893 return -EFAULT; 894 895 if (kmsg->controllen) { 896 unsigned long control = ubuf + hdr - kmsg->controllen; 897 898 kmsg->msg.msg_control_user = (void __user *) control; 899 kmsg->msg.msg_controllen = kmsg->controllen; 900 } 901 902 sr->buf = *buf; /* stash for later copy */ 903 *buf = (void __user *) (ubuf + hdr); 904 kmsg->payloadlen = *len = *len - hdr; 905 return 0; 906 } 907 908 struct io_recvmsg_multishot_hdr { 909 struct io_uring_recvmsg_out msg; 910 struct sockaddr_storage addr; 911 }; 912 913 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, 914 struct io_async_msghdr *kmsg, 915 unsigned int flags, bool *finished) 916 { 917 int err; 918 int copy_len; 919 struct io_recvmsg_multishot_hdr hdr; 920 921 if (kmsg->namelen) 922 kmsg->msg.msg_name = &hdr.addr; 923 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 924 kmsg->msg.msg_namelen = 0; 925 926 if (sock->file->f_flags & O_NONBLOCK) 927 flags |= MSG_DONTWAIT; 928 929 err = sock_recvmsg(sock, &kmsg->msg, flags); 930 *finished = err <= 0; 931 if (err < 0) 932 return err; 933 934 hdr.msg = (struct io_uring_recvmsg_out) { 935 .controllen = kmsg->controllen - kmsg->msg.msg_controllen, 936 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT 937 }; 938 939 hdr.msg.payloadlen = err; 940 if (err > kmsg->payloadlen) 941 err = kmsg->payloadlen; 942 943 copy_len = sizeof(struct io_uring_recvmsg_out); 944 if (kmsg->msg.msg_namelen > kmsg->namelen) 945 copy_len += kmsg->namelen; 946 else 947 copy_len += kmsg->msg.msg_namelen; 948 949 /* 950 * "fromlen shall refer to the value before truncation.." 951 * 1003.1g 952 */ 953 hdr.msg.namelen = kmsg->msg.msg_namelen; 954 955 /* ensure that there is no gap between hdr and sockaddr_storage */ 956 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) != 957 sizeof(struct io_uring_recvmsg_out)); 958 if (copy_to_user(io->buf, &hdr, copy_len)) { 959 *finished = true; 960 return -EFAULT; 961 } 962 963 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen + 964 kmsg->controllen + err; 965 } 966 967 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 968 { 969 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 970 struct io_async_msghdr *kmsg = req->async_data; 971 struct socket *sock; 972 unsigned flags; 973 int ret, min_ret = 0; 974 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 975 bool mshot_finished = true; 976 977 sock = sock_from_file(req->file); 978 if (unlikely(!sock)) 979 return -ENOTSOCK; 980 981 if (!(req->flags & REQ_F_POLLED) && 982 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 983 return -EAGAIN; 984 985 flags = sr->msg_flags; 986 if (force_nonblock) 987 flags |= MSG_DONTWAIT; 988 989 retry_multishot: 990 if (io_do_buffer_select(req)) { 991 void __user *buf; 992 size_t len = sr->len; 993 994 buf = io_buffer_select(req, &len, issue_flags); 995 if (!buf) 996 return -ENOBUFS; 997 998 if (req->flags & REQ_F_APOLL_MULTISHOT) { 999 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); 1000 if (ret) { 1001 io_kbuf_recycle(req, issue_flags); 1002 return ret; 1003 } 1004 } 1005 1006 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); 1007 } 1008 1009 kmsg->msg.msg_get_inq = 1; 1010 kmsg->msg.msg_inq = -1; 1011 if (req->flags & REQ_F_APOLL_MULTISHOT) { 1012 ret = io_recvmsg_multishot(sock, sr, kmsg, flags, 1013 &mshot_finished); 1014 } else { 1015 /* disable partial retry for recvmsg with cmsg attached */ 1016 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 1017 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1018 1019 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, 1020 kmsg->uaddr, flags); 1021 } 1022 1023 if (ret < min_ret) { 1024 if (ret == -EAGAIN && force_nonblock) { 1025 if (issue_flags & IO_URING_F_MULTISHOT) { 1026 io_kbuf_recycle(req, issue_flags); 1027 return IOU_ISSUE_SKIP_COMPLETE; 1028 } 1029 return -EAGAIN; 1030 } 1031 if (ret > 0 && io_net_retry(sock, flags)) { 1032 sr->done_io += ret; 1033 req->flags |= REQ_F_BL_NO_RECYCLE; 1034 return -EAGAIN; 1035 } 1036 if (ret == -ERESTARTSYS) 1037 ret = -EINTR; 1038 req_set_fail(req); 1039 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1040 req_set_fail(req); 1041 } 1042 1043 if (ret > 0) 1044 ret += sr->done_io; 1045 else if (sr->done_io) 1046 ret = sr->done_io; 1047 else 1048 io_kbuf_recycle(req, issue_flags); 1049 1050 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1051 goto retry_multishot; 1052 1053 return ret; 1054 } 1055 1056 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, 1057 size_t *len, unsigned int issue_flags) 1058 { 1059 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1060 int ret; 1061 1062 /* 1063 * If the ring isn't locked, then don't use the peek interface 1064 * to grab multiple buffers as we will lock/unlock between 1065 * this selection and posting the buffers. 1066 */ 1067 if (!(issue_flags & IO_URING_F_UNLOCKED) && 1068 sr->flags & IORING_RECVSEND_BUNDLE) { 1069 struct buf_sel_arg arg = { 1070 .iovs = &kmsg->fast_iov, 1071 .nr_iovs = 1, 1072 .mode = KBUF_MODE_EXPAND, 1073 }; 1074 1075 if (kmsg->free_iov) { 1076 arg.nr_iovs = kmsg->free_iov_nr; 1077 arg.iovs = kmsg->free_iov; 1078 arg.mode |= KBUF_MODE_FREE; 1079 } 1080 1081 if (kmsg->msg.msg_inq > 0) 1082 arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq); 1083 1084 ret = io_buffers_peek(req, &arg); 1085 if (unlikely(ret < 0)) 1086 return ret; 1087 1088 /* special case 1 vec, can be a fast path */ 1089 if (ret == 1) { 1090 sr->buf = arg.iovs[0].iov_base; 1091 sr->len = arg.iovs[0].iov_len; 1092 goto map_ubuf; 1093 } 1094 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, 1095 arg.out_len); 1096 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { 1097 kmsg->free_iov_nr = ret; 1098 kmsg->free_iov = arg.iovs; 1099 req->flags |= REQ_F_NEED_CLEANUP; 1100 } 1101 } else { 1102 void __user *buf; 1103 1104 *len = sr->len; 1105 buf = io_buffer_select(req, len, issue_flags); 1106 if (!buf) 1107 return -ENOBUFS; 1108 sr->buf = buf; 1109 sr->len = *len; 1110 map_ubuf: 1111 ret = import_ubuf(ITER_DEST, sr->buf, sr->len, 1112 &kmsg->msg.msg_iter); 1113 if (unlikely(ret)) 1114 return ret; 1115 } 1116 1117 return 0; 1118 } 1119 1120 int io_recv(struct io_kiocb *req, unsigned int issue_flags) 1121 { 1122 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1123 struct io_async_msghdr *kmsg = req->async_data; 1124 struct socket *sock; 1125 unsigned flags; 1126 int ret, min_ret = 0; 1127 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1128 size_t len = sr->len; 1129 bool mshot_finished; 1130 1131 if (!(req->flags & REQ_F_POLLED) && 1132 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1133 return -EAGAIN; 1134 1135 sock = sock_from_file(req->file); 1136 if (unlikely(!sock)) 1137 return -ENOTSOCK; 1138 1139 flags = sr->msg_flags; 1140 if (force_nonblock) 1141 flags |= MSG_DONTWAIT; 1142 1143 retry_multishot: 1144 if (io_do_buffer_select(req)) { 1145 ret = io_recv_buf_select(req, kmsg, &len, issue_flags); 1146 if (unlikely(ret)) { 1147 kmsg->msg.msg_inq = -1; 1148 goto out_free; 1149 } 1150 sr->buf = NULL; 1151 } 1152 1153 kmsg->msg.msg_flags = 0; 1154 kmsg->msg.msg_inq = -1; 1155 1156 if (flags & MSG_WAITALL) 1157 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1158 1159 ret = sock_recvmsg(sock, &kmsg->msg, flags); 1160 if (ret < min_ret) { 1161 if (ret == -EAGAIN && force_nonblock) { 1162 if (issue_flags & IO_URING_F_MULTISHOT) { 1163 io_kbuf_recycle(req, issue_flags); 1164 return IOU_ISSUE_SKIP_COMPLETE; 1165 } 1166 1167 return -EAGAIN; 1168 } 1169 if (ret > 0 && io_net_retry(sock, flags)) { 1170 sr->len -= ret; 1171 sr->buf += ret; 1172 sr->done_io += ret; 1173 req->flags |= REQ_F_BL_NO_RECYCLE; 1174 return -EAGAIN; 1175 } 1176 if (ret == -ERESTARTSYS) 1177 ret = -EINTR; 1178 req_set_fail(req); 1179 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 1180 out_free: 1181 req_set_fail(req); 1182 } 1183 1184 mshot_finished = ret <= 0; 1185 if (ret > 0) 1186 ret += sr->done_io; 1187 else if (sr->done_io) 1188 ret = sr->done_io; 1189 else 1190 io_kbuf_recycle(req, issue_flags); 1191 1192 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) 1193 goto retry_multishot; 1194 1195 return ret; 1196 } 1197 1198 void io_send_zc_cleanup(struct io_kiocb *req) 1199 { 1200 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1201 struct io_async_msghdr *io = req->async_data; 1202 1203 if (req_has_async_data(req)) 1204 io_netmsg_iovec_free(io); 1205 if (zc->notif) { 1206 io_notif_flush(zc->notif); 1207 zc->notif = NULL; 1208 } 1209 } 1210 1211 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) 1212 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) 1213 1214 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1215 { 1216 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1217 struct io_ring_ctx *ctx = req->ctx; 1218 struct io_kiocb *notif; 1219 1220 zc->done_io = 0; 1221 req->flags |= REQ_F_POLL_NO_LAZY; 1222 1223 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) 1224 return -EINVAL; 1225 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ 1226 if (req->flags & REQ_F_CQE_SKIP) 1227 return -EINVAL; 1228 1229 notif = zc->notif = io_alloc_notif(ctx); 1230 if (!notif) 1231 return -ENOMEM; 1232 notif->cqe.user_data = req->cqe.user_data; 1233 notif->cqe.res = 0; 1234 notif->cqe.flags = IORING_CQE_F_NOTIF; 1235 req->flags |= REQ_F_NEED_CLEANUP; 1236 1237 zc->flags = READ_ONCE(sqe->ioprio); 1238 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { 1239 if (zc->flags & ~IO_ZC_FLAGS_VALID) 1240 return -EINVAL; 1241 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { 1242 struct io_notif_data *nd = io_notif_to_data(notif); 1243 1244 nd->zc_report = true; 1245 nd->zc_used = false; 1246 nd->zc_copied = false; 1247 } 1248 } 1249 1250 if (zc->flags & IORING_RECVSEND_FIXED_BUF) { 1251 unsigned idx = READ_ONCE(sqe->buf_index); 1252 1253 if (unlikely(idx >= ctx->nr_user_bufs)) 1254 return -EFAULT; 1255 idx = array_index_nospec(idx, ctx->nr_user_bufs); 1256 req->imu = READ_ONCE(ctx->user_bufs[idx]); 1257 io_req_set_rsrc_node(notif, ctx, 0); 1258 } 1259 1260 if (req->opcode == IORING_OP_SEND_ZC) { 1261 if (READ_ONCE(sqe->__pad3[0])) 1262 return -EINVAL; 1263 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1264 zc->addr_len = READ_ONCE(sqe->addr_len); 1265 } else { 1266 if (unlikely(sqe->addr2 || sqe->file_index)) 1267 return -EINVAL; 1268 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) 1269 return -EINVAL; 1270 } 1271 1272 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1273 zc->len = READ_ONCE(sqe->len); 1274 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; 1275 if (zc->msg_flags & MSG_DONTWAIT) 1276 req->flags |= REQ_F_NOWAIT; 1277 1278 #ifdef CONFIG_COMPAT 1279 if (req->ctx->compat) 1280 zc->msg_flags |= MSG_CMSG_COMPAT; 1281 #endif 1282 return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG_ZC); 1283 } 1284 1285 static int io_sg_from_iter_iovec(struct sk_buff *skb, 1286 struct iov_iter *from, size_t length) 1287 { 1288 skb_zcopy_downgrade_managed(skb); 1289 return zerocopy_fill_skb_from_iter(skb, from, length); 1290 } 1291 1292 static int io_sg_from_iter(struct sk_buff *skb, 1293 struct iov_iter *from, size_t length) 1294 { 1295 struct skb_shared_info *shinfo = skb_shinfo(skb); 1296 int frag = shinfo->nr_frags; 1297 int ret = 0; 1298 struct bvec_iter bi; 1299 ssize_t copied = 0; 1300 unsigned long truesize = 0; 1301 1302 if (!frag) 1303 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; 1304 else if (unlikely(!skb_zcopy_managed(skb))) 1305 return zerocopy_fill_skb_from_iter(skb, from, length); 1306 1307 bi.bi_size = min(from->count, length); 1308 bi.bi_bvec_done = from->iov_offset; 1309 bi.bi_idx = 0; 1310 1311 while (bi.bi_size && frag < MAX_SKB_FRAGS) { 1312 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi); 1313 1314 copied += v.bv_len; 1315 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset); 1316 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page, 1317 v.bv_offset, v.bv_len); 1318 bvec_iter_advance_single(from->bvec, &bi, v.bv_len); 1319 } 1320 if (bi.bi_size) 1321 ret = -EMSGSIZE; 1322 1323 shinfo->nr_frags = frag; 1324 from->bvec += bi.bi_idx; 1325 from->nr_segs -= bi.bi_idx; 1326 from->count -= copied; 1327 from->iov_offset = bi.bi_bvec_done; 1328 1329 skb->data_len += copied; 1330 skb->len += copied; 1331 skb->truesize += truesize; 1332 return ret; 1333 } 1334 1335 static int io_send_zc_import(struct io_kiocb *req, struct io_async_msghdr *kmsg) 1336 { 1337 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1338 int ret; 1339 1340 if (sr->flags & IORING_RECVSEND_FIXED_BUF) { 1341 ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter, req->imu, 1342 (u64)(uintptr_t)sr->buf, sr->len); 1343 if (unlikely(ret)) 1344 return ret; 1345 kmsg->msg.sg_from_iter = io_sg_from_iter; 1346 } else { 1347 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 1348 if (unlikely(ret)) 1349 return ret; 1350 ret = io_notif_account_mem(sr->notif, sr->len); 1351 if (unlikely(ret)) 1352 return ret; 1353 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1354 } 1355 1356 return ret; 1357 } 1358 1359 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) 1360 { 1361 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); 1362 struct io_async_msghdr *kmsg = req->async_data; 1363 struct socket *sock; 1364 unsigned msg_flags; 1365 int ret, min_ret = 0; 1366 1367 sock = sock_from_file(req->file); 1368 if (unlikely(!sock)) 1369 return -ENOTSOCK; 1370 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1371 return -EOPNOTSUPP; 1372 1373 if (!(req->flags & REQ_F_POLLED) && 1374 (zc->flags & IORING_RECVSEND_POLL_FIRST)) 1375 return -EAGAIN; 1376 1377 if (!zc->done_io) { 1378 ret = io_send_zc_import(req, kmsg); 1379 if (unlikely(ret)) 1380 return ret; 1381 } 1382 1383 msg_flags = zc->msg_flags; 1384 if (issue_flags & IO_URING_F_NONBLOCK) 1385 msg_flags |= MSG_DONTWAIT; 1386 if (msg_flags & MSG_WAITALL) 1387 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1388 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; 1389 1390 kmsg->msg.msg_flags = msg_flags; 1391 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; 1392 ret = sock_sendmsg(sock, &kmsg->msg); 1393 1394 if (unlikely(ret < min_ret)) { 1395 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1396 return -EAGAIN; 1397 1398 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) { 1399 zc->len -= ret; 1400 zc->buf += ret; 1401 zc->done_io += ret; 1402 req->flags |= REQ_F_BL_NO_RECYCLE; 1403 return -EAGAIN; 1404 } 1405 if (ret == -ERESTARTSYS) 1406 ret = -EINTR; 1407 req_set_fail(req); 1408 } 1409 1410 if (ret >= 0) 1411 ret += zc->done_io; 1412 else if (zc->done_io) 1413 ret = zc->done_io; 1414 1415 /* 1416 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1417 * flushing notif to io_send_zc_cleanup() 1418 */ 1419 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1420 io_notif_flush(zc->notif); 1421 io_req_msg_cleanup(req, 0); 1422 } 1423 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1424 return IOU_OK; 1425 } 1426 1427 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) 1428 { 1429 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1430 struct io_async_msghdr *kmsg = req->async_data; 1431 struct socket *sock; 1432 unsigned flags; 1433 int ret, min_ret = 0; 1434 1435 sock = sock_from_file(req->file); 1436 if (unlikely(!sock)) 1437 return -ENOTSOCK; 1438 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) 1439 return -EOPNOTSUPP; 1440 1441 if (!(req->flags & REQ_F_POLLED) && 1442 (sr->flags & IORING_RECVSEND_POLL_FIRST)) 1443 return -EAGAIN; 1444 1445 flags = sr->msg_flags; 1446 if (issue_flags & IO_URING_F_NONBLOCK) 1447 flags |= MSG_DONTWAIT; 1448 if (flags & MSG_WAITALL) 1449 min_ret = iov_iter_count(&kmsg->msg.msg_iter); 1450 1451 kmsg->msg.msg_control_user = sr->msg_control; 1452 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; 1453 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; 1454 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 1455 1456 if (unlikely(ret < min_ret)) { 1457 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1458 return -EAGAIN; 1459 1460 if (ret > 0 && io_net_retry(sock, flags)) { 1461 sr->done_io += ret; 1462 req->flags |= REQ_F_BL_NO_RECYCLE; 1463 return -EAGAIN; 1464 } 1465 if (ret == -ERESTARTSYS) 1466 ret = -EINTR; 1467 req_set_fail(req); 1468 } 1469 1470 if (ret >= 0) 1471 ret += sr->done_io; 1472 else if (sr->done_io) 1473 ret = sr->done_io; 1474 1475 /* 1476 * If we're in io-wq we can't rely on tw ordering guarantees, defer 1477 * flushing notif to io_send_zc_cleanup() 1478 */ 1479 if (!(issue_flags & IO_URING_F_UNLOCKED)) { 1480 io_notif_flush(sr->notif); 1481 io_req_msg_cleanup(req, 0); 1482 } 1483 io_req_set_res(req, ret, IORING_CQE_F_MORE); 1484 return IOU_OK; 1485 } 1486 1487 void io_sendrecv_fail(struct io_kiocb *req) 1488 { 1489 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); 1490 1491 if (sr->done_io) 1492 req->cqe.res = sr->done_io; 1493 1494 if ((req->flags & REQ_F_NEED_CLEANUP) && 1495 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) 1496 req->cqe.flags |= IORING_CQE_F_MORE; 1497 } 1498 1499 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \ 1500 IORING_ACCEPT_POLL_FIRST) 1501 1502 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1503 { 1504 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1505 1506 if (sqe->len || sqe->buf_index) 1507 return -EINVAL; 1508 1509 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1510 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 1511 accept->flags = READ_ONCE(sqe->accept_flags); 1512 accept->nofile = rlimit(RLIMIT_NOFILE); 1513 accept->iou_flags = READ_ONCE(sqe->ioprio); 1514 if (accept->iou_flags & ~ACCEPT_FLAGS) 1515 return -EINVAL; 1516 1517 accept->file_slot = READ_ONCE(sqe->file_index); 1518 if (accept->file_slot) { 1519 if (accept->flags & SOCK_CLOEXEC) 1520 return -EINVAL; 1521 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT && 1522 accept->file_slot != IORING_FILE_INDEX_ALLOC) 1523 return -EINVAL; 1524 } 1525 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1526 return -EINVAL; 1527 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 1528 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1529 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT) 1530 req->flags |= REQ_F_APOLL_MULTISHOT; 1531 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT) 1532 req->flags |= REQ_F_NOWAIT; 1533 return 0; 1534 } 1535 1536 int io_accept(struct io_kiocb *req, unsigned int issue_flags) 1537 { 1538 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); 1539 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1540 bool fixed = !!accept->file_slot; 1541 struct proto_accept_arg arg = { 1542 .flags = force_nonblock ? O_NONBLOCK : 0, 1543 }; 1544 struct file *file; 1545 unsigned cflags; 1546 int ret, fd; 1547 1548 if (!(req->flags & REQ_F_POLLED) && 1549 accept->iou_flags & IORING_ACCEPT_POLL_FIRST) 1550 return -EAGAIN; 1551 1552 retry: 1553 if (!fixed) { 1554 fd = __get_unused_fd_flags(accept->flags, accept->nofile); 1555 if (unlikely(fd < 0)) 1556 return fd; 1557 } 1558 arg.err = 0; 1559 arg.is_empty = -1; 1560 file = do_accept(req->file, &arg, accept->addr, accept->addr_len, 1561 accept->flags); 1562 if (IS_ERR(file)) { 1563 if (!fixed) 1564 put_unused_fd(fd); 1565 ret = PTR_ERR(file); 1566 if (ret == -EAGAIN && force_nonblock && 1567 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) { 1568 /* 1569 * if it's multishot and polled, we don't need to 1570 * return EAGAIN to arm the poll infra since it 1571 * has already been done 1572 */ 1573 if (issue_flags & IO_URING_F_MULTISHOT) 1574 return IOU_ISSUE_SKIP_COMPLETE; 1575 return ret; 1576 } 1577 if (ret == -ERESTARTSYS) 1578 ret = -EINTR; 1579 req_set_fail(req); 1580 } else if (!fixed) { 1581 fd_install(fd, file); 1582 ret = fd; 1583 } else { 1584 ret = io_fixed_fd_install(req, issue_flags, file, 1585 accept->file_slot); 1586 } 1587 1588 cflags = 0; 1589 if (!arg.is_empty) 1590 cflags |= IORING_CQE_F_SOCK_NONEMPTY; 1591 1592 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 1593 io_req_set_res(req, ret, cflags); 1594 return IOU_OK; 1595 } 1596 1597 if (ret < 0) 1598 return ret; 1599 if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { 1600 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1) 1601 goto retry; 1602 if (issue_flags & IO_URING_F_MULTISHOT) 1603 return IOU_ISSUE_SKIP_COMPLETE; 1604 return -EAGAIN; 1605 } 1606 1607 io_req_set_res(req, ret, cflags); 1608 return IOU_STOP_MULTISHOT; 1609 } 1610 1611 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1612 { 1613 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1614 1615 if (sqe->addr || sqe->rw_flags || sqe->buf_index) 1616 return -EINVAL; 1617 1618 sock->domain = READ_ONCE(sqe->fd); 1619 sock->type = READ_ONCE(sqe->off); 1620 sock->protocol = READ_ONCE(sqe->len); 1621 sock->file_slot = READ_ONCE(sqe->file_index); 1622 sock->nofile = rlimit(RLIMIT_NOFILE); 1623 1624 sock->flags = sock->type & ~SOCK_TYPE_MASK; 1625 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) 1626 return -EINVAL; 1627 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1628 return -EINVAL; 1629 return 0; 1630 } 1631 1632 int io_socket(struct io_kiocb *req, unsigned int issue_flags) 1633 { 1634 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket); 1635 bool fixed = !!sock->file_slot; 1636 struct file *file; 1637 int ret, fd; 1638 1639 if (!fixed) { 1640 fd = __get_unused_fd_flags(sock->flags, sock->nofile); 1641 if (unlikely(fd < 0)) 1642 return fd; 1643 } 1644 file = __sys_socket_file(sock->domain, sock->type, sock->protocol); 1645 if (IS_ERR(file)) { 1646 if (!fixed) 1647 put_unused_fd(fd); 1648 ret = PTR_ERR(file); 1649 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 1650 return -EAGAIN; 1651 if (ret == -ERESTARTSYS) 1652 ret = -EINTR; 1653 req_set_fail(req); 1654 } else if (!fixed) { 1655 fd_install(fd, file); 1656 ret = fd; 1657 } else { 1658 ret = io_fixed_fd_install(req, issue_flags, file, 1659 sock->file_slot); 1660 } 1661 io_req_set_res(req, ret, 0); 1662 return IOU_OK; 1663 } 1664 1665 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1666 { 1667 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); 1668 struct io_async_msghdr *io; 1669 1670 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1671 return -EINVAL; 1672 1673 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1674 conn->addr_len = READ_ONCE(sqe->addr2); 1675 conn->in_progress = conn->seen_econnaborted = false; 1676 1677 io = io_msg_alloc_async(req); 1678 if (unlikely(!io)) 1679 return -ENOMEM; 1680 1681 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); 1682 } 1683 1684 int io_connect(struct io_kiocb *req, unsigned int issue_flags) 1685 { 1686 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); 1687 struct io_async_msghdr *io = req->async_data; 1688 unsigned file_flags; 1689 int ret; 1690 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 1691 1692 file_flags = force_nonblock ? O_NONBLOCK : 0; 1693 1694 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, 1695 file_flags); 1696 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) 1697 && force_nonblock) { 1698 if (ret == -EINPROGRESS) { 1699 connect->in_progress = true; 1700 } else if (ret == -ECONNABORTED) { 1701 if (connect->seen_econnaborted) 1702 goto out; 1703 connect->seen_econnaborted = true; 1704 } 1705 return -EAGAIN; 1706 } 1707 if (connect->in_progress) { 1708 /* 1709 * At least bluetooth will return -EBADFD on a re-connect 1710 * attempt, and it's (supposedly) also valid to get -EISCONN 1711 * which means the previous result is good. For both of these, 1712 * grab the sock_error() and use that for the completion. 1713 */ 1714 if (ret == -EBADFD || ret == -EISCONN) 1715 ret = sock_error(sock_from_file(req->file)->sk); 1716 } 1717 if (ret == -ERESTARTSYS) 1718 ret = -EINTR; 1719 out: 1720 if (ret < 0) 1721 req_set_fail(req); 1722 io_req_msg_cleanup(req, issue_flags); 1723 io_req_set_res(req, ret, 0); 1724 return IOU_OK; 1725 } 1726 1727 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1728 { 1729 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1730 struct sockaddr __user *uaddr; 1731 struct io_async_msghdr *io; 1732 1733 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) 1734 return -EINVAL; 1735 1736 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 1737 bind->addr_len = READ_ONCE(sqe->addr2); 1738 1739 io = io_msg_alloc_async(req); 1740 if (unlikely(!io)) 1741 return -ENOMEM; 1742 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); 1743 } 1744 1745 int io_bind(struct io_kiocb *req, unsigned int issue_flags) 1746 { 1747 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); 1748 struct io_async_msghdr *io = req->async_data; 1749 struct socket *sock; 1750 int ret; 1751 1752 sock = sock_from_file(req->file); 1753 if (unlikely(!sock)) 1754 return -ENOTSOCK; 1755 1756 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len); 1757 if (ret < 0) 1758 req_set_fail(req); 1759 io_req_set_res(req, ret, 0); 1760 return 0; 1761 } 1762 1763 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 1764 { 1765 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1766 1767 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2) 1768 return -EINVAL; 1769 1770 listen->backlog = READ_ONCE(sqe->len); 1771 return 0; 1772 } 1773 1774 int io_listen(struct io_kiocb *req, unsigned int issue_flags) 1775 { 1776 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen); 1777 struct socket *sock; 1778 int ret; 1779 1780 sock = sock_from_file(req->file); 1781 if (unlikely(!sock)) 1782 return -ENOTSOCK; 1783 1784 ret = __sys_listen_socket(sock, listen->backlog); 1785 if (ret < 0) 1786 req_set_fail(req); 1787 io_req_set_res(req, ret, 0); 1788 return 0; 1789 } 1790 1791 void io_netmsg_cache_free(const void *entry) 1792 { 1793 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; 1794 1795 if (kmsg->free_iov) { 1796 kasan_mempool_unpoison_object(kmsg->free_iov, 1797 kmsg->free_iov_nr * sizeof(struct iovec)); 1798 io_netmsg_iovec_free(kmsg); 1799 } 1800 kfree(kmsg); 1801 } 1802 #endif 1803
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.