1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/fs.h> 5 #include <linux/file.h> 6 #include <linux/io_uring.h> 7 8 #include <uapi/linux/io_uring.h> 9 10 #include "../kernel/futex/futex.h" 11 #include "io_uring.h" 12 #include "alloc_cache.h" 13 #include "futex.h" 14 15 struct io_futex { 16 struct file *file; 17 union { 18 u32 __user *uaddr; 19 struct futex_waitv __user *uwaitv; 20 }; 21 unsigned long futex_val; 22 unsigned long futex_mask; 23 unsigned long futexv_owned; 24 u32 futex_flags; 25 unsigned int futex_nr; 26 bool futexv_unqueued; 27 }; 28 29 struct io_futex_data { 30 struct futex_q q; 31 struct io_kiocb *req; 32 }; 33 34 #define IO_FUTEX_ALLOC_CACHE_MAX 32 35 36 bool io_futex_cache_init(struct io_ring_ctx *ctx) 37 { 38 return io_alloc_cache_init(&ctx->futex_cache, IO_FUTEX_ALLOC_CACHE_MAX, 39 sizeof(struct io_futex_data)); 40 } 41 42 void io_futex_cache_free(struct io_ring_ctx *ctx) 43 { 44 io_alloc_cache_free(&ctx->futex_cache, kfree); 45 } 46 47 static void __io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts) 48 { 49 req->async_data = NULL; 50 hlist_del_init(&req->hash_node); 51 io_req_task_complete(req, ts); 52 } 53 54 static void io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts) 55 { 56 struct io_futex_data *ifd = req->async_data; 57 struct io_ring_ctx *ctx = req->ctx; 58 59 io_tw_lock(ctx, ts); 60 if (!io_alloc_cache_put(&ctx->futex_cache, ifd)) 61 kfree(ifd); 62 __io_futex_complete(req, ts); 63 } 64 65 static void io_futexv_complete(struct io_kiocb *req, struct io_tw_state *ts) 66 { 67 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 68 struct futex_vector *futexv = req->async_data; 69 70 io_tw_lock(req->ctx, ts); 71 72 if (!iof->futexv_unqueued) { 73 int res; 74 75 res = futex_unqueue_multiple(futexv, iof->futex_nr); 76 if (res != -1) 77 io_req_set_res(req, res, 0); 78 } 79 80 kfree(req->async_data); 81 req->flags &= ~REQ_F_ASYNC_DATA; 82 __io_futex_complete(req, ts); 83 } 84 85 static bool io_futexv_claim(struct io_futex *iof) 86 { 87 if (test_bit(0, &iof->futexv_owned) || 88 test_and_set_bit_lock(0, &iof->futexv_owned)) 89 return false; 90 return true; 91 } 92 93 static bool __io_futex_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) 94 { 95 /* futex wake already done or in progress */ 96 if (req->opcode == IORING_OP_FUTEX_WAIT) { 97 struct io_futex_data *ifd = req->async_data; 98 99 if (!futex_unqueue(&ifd->q)) 100 return false; 101 req->io_task_work.func = io_futex_complete; 102 } else { 103 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 104 105 if (!io_futexv_claim(iof)) 106 return false; 107 req->io_task_work.func = io_futexv_complete; 108 } 109 110 hlist_del_init(&req->hash_node); 111 io_req_set_res(req, -ECANCELED, 0); 112 io_req_task_work_add(req); 113 return true; 114 } 115 116 int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 117 unsigned int issue_flags) 118 { 119 struct hlist_node *tmp; 120 struct io_kiocb *req; 121 int nr = 0; 122 123 if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED)) 124 return -ENOENT; 125 126 io_ring_submit_lock(ctx, issue_flags); 127 hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) { 128 if (req->cqe.user_data != cd->data && 129 !(cd->flags & IORING_ASYNC_CANCEL_ANY)) 130 continue; 131 if (__io_futex_cancel(ctx, req)) 132 nr++; 133 if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) 134 break; 135 } 136 io_ring_submit_unlock(ctx, issue_flags); 137 138 if (nr) 139 return nr; 140 141 return -ENOENT; 142 } 143 144 bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task, 145 bool cancel_all) 146 { 147 struct hlist_node *tmp; 148 struct io_kiocb *req; 149 bool found = false; 150 151 lockdep_assert_held(&ctx->uring_lock); 152 153 hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) { 154 if (!io_match_task_safe(req, task, cancel_all)) 155 continue; 156 hlist_del_init(&req->hash_node); 157 __io_futex_cancel(ctx, req); 158 found = true; 159 } 160 161 return found; 162 } 163 164 int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 165 { 166 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 167 u32 flags; 168 169 if (unlikely(sqe->len || sqe->futex_flags || sqe->buf_index || 170 sqe->file_index)) 171 return -EINVAL; 172 173 iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 174 iof->futex_val = READ_ONCE(sqe->addr2); 175 iof->futex_mask = READ_ONCE(sqe->addr3); 176 flags = READ_ONCE(sqe->fd); 177 178 if (flags & ~FUTEX2_VALID_MASK) 179 return -EINVAL; 180 181 iof->futex_flags = futex2_to_flags(flags); 182 if (!futex_flags_valid(iof->futex_flags)) 183 return -EINVAL; 184 185 if (!futex_validate_input(iof->futex_flags, iof->futex_val) || 186 !futex_validate_input(iof->futex_flags, iof->futex_mask)) 187 return -EINVAL; 188 189 return 0; 190 } 191 192 static void io_futex_wakev_fn(struct wake_q_head *wake_q, struct futex_q *q) 193 { 194 struct io_kiocb *req = q->wake_data; 195 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 196 197 if (!io_futexv_claim(iof)) 198 return; 199 if (unlikely(!__futex_wake_mark(q))) 200 return; 201 202 io_req_set_res(req, 0, 0); 203 req->io_task_work.func = io_futexv_complete; 204 io_req_task_work_add(req); 205 } 206 207 int io_futexv_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 208 { 209 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 210 struct futex_vector *futexv; 211 int ret; 212 213 /* No flags or mask supported for waitv */ 214 if (unlikely(sqe->fd || sqe->buf_index || sqe->file_index || 215 sqe->addr2 || sqe->futex_flags || sqe->addr3)) 216 return -EINVAL; 217 218 iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 219 iof->futex_nr = READ_ONCE(sqe->len); 220 if (!iof->futex_nr || iof->futex_nr > FUTEX_WAITV_MAX) 221 return -EINVAL; 222 223 futexv = kcalloc(iof->futex_nr, sizeof(*futexv), GFP_KERNEL); 224 if (!futexv) 225 return -ENOMEM; 226 227 ret = futex_parse_waitv(futexv, iof->uwaitv, iof->futex_nr, 228 io_futex_wakev_fn, req); 229 if (ret) { 230 kfree(futexv); 231 return ret; 232 } 233 234 iof->futexv_owned = 0; 235 iof->futexv_unqueued = 0; 236 req->flags |= REQ_F_ASYNC_DATA; 237 req->async_data = futexv; 238 return 0; 239 } 240 241 static void io_futex_wake_fn(struct wake_q_head *wake_q, struct futex_q *q) 242 { 243 struct io_futex_data *ifd = container_of(q, struct io_futex_data, q); 244 struct io_kiocb *req = ifd->req; 245 246 if (unlikely(!__futex_wake_mark(q))) 247 return; 248 249 io_req_set_res(req, 0, 0); 250 req->io_task_work.func = io_futex_complete; 251 io_req_task_work_add(req); 252 } 253 254 static struct io_futex_data *io_alloc_ifd(struct io_ring_ctx *ctx) 255 { 256 struct io_futex_data *ifd; 257 258 ifd = io_alloc_cache_get(&ctx->futex_cache); 259 if (ifd) 260 return ifd; 261 262 return kmalloc(sizeof(struct io_futex_data), GFP_NOWAIT); 263 } 264 265 int io_futexv_wait(struct io_kiocb *req, unsigned int issue_flags) 266 { 267 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 268 struct futex_vector *futexv = req->async_data; 269 struct io_ring_ctx *ctx = req->ctx; 270 int ret, woken = -1; 271 272 io_ring_submit_lock(ctx, issue_flags); 273 274 ret = futex_wait_multiple_setup(futexv, iof->futex_nr, &woken); 275 276 /* 277 * Error case, ret is < 0. Mark the request as failed. 278 */ 279 if (unlikely(ret < 0)) { 280 io_ring_submit_unlock(ctx, issue_flags); 281 req_set_fail(req); 282 io_req_set_res(req, ret, 0); 283 kfree(futexv); 284 req->async_data = NULL; 285 req->flags &= ~REQ_F_ASYNC_DATA; 286 return IOU_OK; 287 } 288 289 /* 290 * 0 return means that we successfully setup the waiters, and that 291 * nobody triggered a wakeup while we were doing so. If the wakeup 292 * happened post setup, the task_work will be run post this issue and 293 * under the submission lock. 1 means We got woken while setting up, 294 * let that side do the completion. Note that 295 * futex_wait_multiple_setup() will have unqueued all the futexes in 296 * this case. Mark us as having done that already, since this is 297 * different from normal wakeup. 298 */ 299 if (!ret) { 300 /* 301 * If futex_wait_multiple_setup() returns 0 for a 302 * successful setup, then the task state will not be 303 * runnable. This is fine for the sync syscall, as 304 * it'll be blocking unless we already got one of the 305 * futexes woken, but it obviously won't work for an 306 * async invocation. Mark us runnable again. 307 */ 308 __set_current_state(TASK_RUNNING); 309 hlist_add_head(&req->hash_node, &ctx->futex_list); 310 } else { 311 iof->futexv_unqueued = 1; 312 if (woken != -1) 313 io_req_set_res(req, woken, 0); 314 } 315 316 io_ring_submit_unlock(ctx, issue_flags); 317 return IOU_ISSUE_SKIP_COMPLETE; 318 } 319 320 int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) 321 { 322 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 323 struct io_ring_ctx *ctx = req->ctx; 324 struct io_futex_data *ifd = NULL; 325 struct futex_hash_bucket *hb; 326 int ret; 327 328 if (!iof->futex_mask) { 329 ret = -EINVAL; 330 goto done; 331 } 332 333 io_ring_submit_lock(ctx, issue_flags); 334 ifd = io_alloc_ifd(ctx); 335 if (!ifd) { 336 ret = -ENOMEM; 337 goto done_unlock; 338 } 339 340 req->async_data = ifd; 341 ifd->q = futex_q_init; 342 ifd->q.bitset = iof->futex_mask; 343 ifd->q.wake = io_futex_wake_fn; 344 ifd->req = req; 345 346 ret = futex_wait_setup(iof->uaddr, iof->futex_val, iof->futex_flags, 347 &ifd->q, &hb); 348 if (!ret) { 349 hlist_add_head(&req->hash_node, &ctx->futex_list); 350 io_ring_submit_unlock(ctx, issue_flags); 351 352 futex_queue(&ifd->q, hb); 353 return IOU_ISSUE_SKIP_COMPLETE; 354 } 355 356 done_unlock: 357 io_ring_submit_unlock(ctx, issue_flags); 358 done: 359 if (ret < 0) 360 req_set_fail(req); 361 io_req_set_res(req, ret, 0); 362 kfree(ifd); 363 return IOU_OK; 364 } 365 366 int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags) 367 { 368 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); 369 int ret; 370 371 /* 372 * Strict flags - ensure that waking 0 futexes yields a 0 result. 373 * See commit 43adf8449510 ("futex: FLAGS_STRICT") for details. 374 */ 375 ret = futex_wake(iof->uaddr, FLAGS_STRICT | iof->futex_flags, 376 iof->futex_val, iof->futex_mask); 377 if (ret < 0) 378 req_set_fail(req); 379 io_req_set_res(req, ret, 0); 380 return IOU_OK; 381 } 382
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.