1 // SPDX-License-Identifier: GPL-2.0-only 1 2 /* Network filesystem read subrequest result c 3 * retrying. 4 * 5 * Copyright (C) 2024 Red Hat, Inc. All Rights 6 * Written by David Howells (dhowells@redhat.c 7 */ 8 9 #include <linux/export.h> 10 #include <linux/fs.h> 11 #include <linux/mm.h> 12 #include <linux/pagemap.h> 13 #include <linux/slab.h> 14 #include <linux/task_io_accounting_ops.h> 15 #include "internal.h" 16 17 /* 18 * Clear the unread part of an I/O request. 19 */ 20 static void netfs_clear_unread(struct netfs_io 21 { 22 netfs_reset_iter(subreq); 23 WARN_ON_ONCE(subreq->len - subreq->tra 24 iov_iter_zero(iov_iter_count(&subreq-> 25 if (subreq->start + subreq->transferre 26 __set_bit(NETFS_SREQ_HIT_EOF, 27 } 28 29 /* 30 * Flush, mark and unlock a folio that's now c 31 * cache the folio, we set the group to NETFS_ 32 * dirty and let writeback handle it. 33 */ 34 static void netfs_unlock_read_folio(struct net 35 struct net 36 struct fol 37 int slot) 38 { 39 struct netfs_folio *finfo; 40 struct folio *folio = folioq_folio(fol 41 42 flush_dcache_folio(folio); 43 folio_mark_uptodate(folio); 44 45 if (!test_bit(NETFS_RREQ_USE_PGPRIV2, 46 finfo = netfs_folio_info(folio 47 if (finfo) { 48 trace_netfs_folio(foli 49 if (finfo->netfs_group 50 folio_change_p 51 else 52 folio_detach_p 53 kfree(finfo); 54 } 55 56 if (test_bit(NETFS_SREQ_COPY_T 57 if (!WARN_ON_ONCE(foli 58 trace_netfs_fo 59 folio_attach_p 60 folio_mark_dir 61 } 62 } else { 63 trace_netfs_folio(foli 64 } 65 } else { 66 // TODO: Use of PG_private_2 i 67 if (test_bit(NETFS_SREQ_COPY_T 68 netfs_pgpriv2_mark_cop 69 } 70 71 if (!test_bit(NETFS_RREQ_DONT_UNLOCK_F 72 if (folio->index == rreq->no_u 73 test_bit(NETFS_RREQ_NO_UNL 74 _debug("no unlock"); 75 } else { 76 trace_netfs_folio(foli 77 folio_unlock(folio); 78 } 79 } 80 81 folioq_clear(folioq, slot); 82 } 83 84 /* 85 * Unlock any folios that are now completely r 86 * subrequest is removed from the list. 87 */ 88 static bool netfs_consume_read_data(struct net 89 { 90 struct netfs_io_subrequest *prev, *nex 91 struct netfs_io_request *rreq = subreq 92 struct folio_queue *folioq = subreq->c 93 size_t avail, prev_donated, next_donat 94 loff_t fpos, start; 95 loff_t fend; 96 int slot = subreq->curr_folioq_slot; 97 98 if (WARN(subreq->transferred > subreq- 99 "Subreq overread: R%x[%x] %zu 100 rreq->debug_id, subreq->debug 101 subreq->transferred, subreq-> 102 subreq->transferred = subreq-> 103 104 next_folio: 105 fsize = PAGE_SIZE << subreq->curr_foli 106 fpos = round_down(subreq->start + subr 107 fend = fpos + fsize; 108 109 if (WARN_ON_ONCE(!folioq) || 110 WARN_ON_ONCE(!folioq_folio(folioq, 111 WARN_ON_ONCE(folioq_folio(folioq, 112 pr_err("R=%08x[%x] s=%llx-%llx 113 rreq->debug_id, subreq- 114 subreq->start, subreq-> 115 subreq->consumed, subre 116 slot); 117 if (folioq) { 118 struct folio *folio = 119 120 pr_err("folioq: orders 121 folioq->orders[ 122 folioq->orders[ 123 if (folio) 124 pr_err("folio: 125 fpos, f 126 folioq_ 127 } 128 } 129 130 donation_changed: 131 /* Try to consume the current folio if 132 * it. There's a possibility that thi 133 * beginning of the folio, in which ca 134 * preceding subreq. 135 * 136 * We also need to include any potenti 137 * following subreq. 138 */ 139 prev_donated = READ_ONCE(subreq->prev_ 140 next_donated = READ_ONCE(subreq->next 141 if (prev_donated || next_donated) { 142 spin_lock_bh(&rreq->lock); 143 prev_donated = subreq->prev_do 144 next_donated = subreq->next_d 145 subreq->start -= prev_donated; 146 subreq->len += prev_donated; 147 subreq->transferred += prev_do 148 prev_donated = subreq->prev_do 149 if (subreq->transferred == sub 150 subreq->len += next_do 151 subreq->transferred += 152 next_donated = subreq- 153 } 154 trace_netfs_sreq(subreq, netfs 155 spin_unlock_bh(&rreq->lock); 156 } 157 158 avail = subreq->transferred; 159 if (avail == subreq->len) 160 avail += next_donated; 161 start = subreq->start; 162 if (subreq->consumed == 0) { 163 start -= prev_donated; 164 avail += prev_donated; 165 } else { 166 start += subreq->consumed; 167 avail -= subreq->consumed; 168 } 169 part = umin(avail, fsize); 170 171 trace_netfs_progress(subreq, start, av 172 173 if (start + avail >= fend) { 174 if (fpos == start) { 175 /* Flush, unlock and m 176 subreq->consumed = fen 177 netfs_unlock_read_foli 178 folioq_mark2(folioq, s 179 if (subreq->consumed > 180 goto remove_su 181 } else if (fpos < start) { 182 excess = fend - subreq 183 184 spin_lock_bh(&rreq->lo 185 /* If we complete firs 186 * preceding subreq, d 187 * we get the responsi 188 */ 189 if (subreq->prev_donat 190 spin_unlock_bh 191 goto donation_ 192 } 193 194 if (list_is_first(&sub 195 spin_unlock_bh 196 pr_err("Can't 197 goto bad; 198 } 199 200 prev = list_prev_entry 201 WRITE_ONCE(prev->next_ 202 subreq->start += exces 203 subreq->len -= excess; 204 subreq->transferred -= 205 trace_netfs_donate(rre 206 net 207 trace_netfs_sreq(subre 208 209 if (subreq->consumed > 210 goto remove_su 211 spin_unlock_bh(&rreq-> 212 } else { 213 pr_err("fpos > start\n 214 goto bad; 215 } 216 217 /* Advance the rolling buffer 218 slot++; 219 if (slot >= folioq_nr_slots(fo 220 slot = 0; 221 folioq = folioq->next; 222 subreq->curr_folioq = 223 } 224 subreq->curr_folioq_slot = slo 225 if (folioq && folioq_folio(fol 226 subreq->curr_folio_ord 227 if (!was_async) 228 cond_resched(); 229 goto next_folio; 230 } 231 232 /* Deal with partial progress. */ 233 if (subreq->transferred < subreq->len) 234 return false; 235 236 /* Donate the remaining downloaded dat 237 * subrequests. Note that we may race 238 */ 239 spin_lock_bh(&rreq->lock); 240 241 if (subreq->prev_donated != prev_donat 242 subreq->next_donated != next_donat 243 spin_unlock_bh(&rreq->lock); 244 cond_resched(); 245 goto donation_changed; 246 } 247 248 /* Deal with the trickiest case: that 249 * folio, not touching either edge, bu 250 * case, we donate to the previous sub 251 * donation is only handled when that 252 * subreq from the list. 253 * 254 * If the previous subreq finished fir 255 * donation and should be able to unlo 256 */ 257 if (!subreq->consumed && 258 !prev_donated && 259 !list_is_first(&subreq->rreq_link, 260 prev = list_prev_entry(subreq, 261 WRITE_ONCE(prev->next_donated, 262 subreq->start += subreq->len; 263 subreq->len = 0; 264 subreq->transferred = 0; 265 trace_netfs_donate(rreq, subre 266 netfs_trace 267 trace_netfs_sreq(subreq, netfs 268 goto remove_subreq_locked; 269 } 270 271 /* If we can't donate down the chain, 272 excess = subreq->len - subreq->consume 273 274 if (!subreq->consumed) 275 excess += prev_donated; 276 277 if (list_is_last(&subreq->rreq_link, & 278 rreq->prev_donated = excess; 279 trace_netfs_donate(rreq, subre 280 netfs_trace 281 } else { 282 next = list_next_entry(subreq, 283 WRITE_ONCE(next->prev_donated, 284 trace_netfs_donate(rreq, subre 285 netfs_trace 286 } 287 trace_netfs_sreq(subreq, netfs_sreq_tr 288 subreq->len = subreq->consumed; 289 subreq->transferred = subreq->consumed 290 goto remove_subreq_locked; 291 292 remove_subreq: 293 spin_lock_bh(&rreq->lock); 294 remove_subreq_locked: 295 subreq->consumed = subreq->len; 296 list_del(&subreq->rreq_link); 297 spin_unlock_bh(&rreq->lock); 298 netfs_put_subrequest(subreq, false, ne 299 return true; 300 301 bad: 302 /* Errr... prev and next both donated 303 * the folio. 304 */ 305 printk("R=%08x[%x] s=%llx-%llx %zx/%zx 306 rreq->debug_id, subreq->debug_i 307 subreq->start, subreq->start + 308 subreq->consumed, subreq->trans 309 printk("folio: %llx-%llx\n", fpos, fen 310 printk("donated: prev=%zx next=%zx\n", 311 printk("s=%llx av=%zx part=%zx\n", sta 312 BUG(); 313 } 314 315 /* 316 * Do page flushing and suchlike after DIO. 317 */ 318 static void netfs_rreq_assess_dio(struct netfs 319 { 320 struct netfs_io_subrequest *subreq; 321 unsigned int i; 322 323 /* Collect unbuffered reads and direct 324 * sizes until we find the first short 325 */ 326 list_for_each_entry(subreq, &rreq->sub 327 rreq->transferred += subreq->t 328 329 if (subreq->transferred < subr 330 test_bit(NETFS_SREQ_FAILED 331 rreq->error = subreq-> 332 break; 333 } 334 } 335 336 if (rreq->origin == NETFS_DIO_READ) { 337 for (i = 0; i < rreq->direct_b 338 flush_dcache_page(rreq 339 // TODO: cifs marks pa 340 // dirty under some ci 341 // need to do that too 342 set_page_dirty(rreq->d 343 } 344 } 345 346 if (rreq->iocb) { 347 rreq->iocb->ki_pos += rreq->tr 348 if (rreq->iocb->ki_complete) 349 rreq->iocb->ki_complet 350 rreq->iocb, rr 351 } 352 if (rreq->netfs_ops->done) 353 rreq->netfs_ops->done(rreq); 354 if (rreq->origin == NETFS_DIO_READ) 355 inode_dio_end(rreq->inode); 356 } 357 358 /* 359 * Assess the state of a read request and deci 360 * 361 * Note that we're in normal kernel thread con 362 * running on a workqueue. 363 */ 364 static void netfs_rreq_assess(struct netfs_io_ 365 { 366 trace_netfs_rreq(rreq, netfs_rreq_trac 367 368 //netfs_rreq_is_still_valid(rreq); 369 370 if (test_and_clear_bit(NETFS_RREQ_NEED 371 netfs_retry_reads(rreq); 372 return; 373 } 374 375 if (rreq->origin == NETFS_DIO_READ || 376 rreq->origin == NETFS_READ_GAPS) 377 netfs_rreq_assess_dio(rreq); 378 task_io_account_read(rreq->transferred 379 380 trace_netfs_rreq(rreq, netfs_rreq_trac 381 clear_bit_unlock(NETFS_RREQ_IN_PROGRES 382 wake_up_bit(&rreq->flags, NETFS_RREQ_I 383 384 trace_netfs_rreq(rreq, netfs_rreq_trac 385 netfs_clear_subrequests(rreq, false); 386 netfs_unlock_abandoned_read_pages(rreq 387 if (unlikely(test_bit(NETFS_RREQ_USE_P 388 netfs_pgpriv2_write_to_the_cac 389 } 390 391 void netfs_read_termination_worker(struct work 392 { 393 struct netfs_io_request *rreq = 394 container_of(work, struct netf 395 netfs_see_request(rreq, netfs_rreq_tra 396 netfs_rreq_assess(rreq); 397 netfs_put_request(rreq, false, netfs_r 398 } 399 400 /* 401 * Handle the completion of all outstanding I/ 402 * We inherit a ref from the caller. 403 */ 404 void netfs_rreq_terminated(struct netfs_io_req 405 { 406 if (!was_async) 407 return netfs_rreq_assess(rreq) 408 if (!work_pending(&rreq->work)) { 409 netfs_get_request(rreq, netfs_ 410 if (!queue_work(system_unbound 411 netfs_put_request(rreq 412 } 413 } 414 415 /** 416 * netfs_read_subreq_progress - Note progress 417 * @subreq: The read request that has terminat 418 * @was_async: True if we're in an asynchronou 419 * 420 * This tells the read side of netfs lib that 421 * made some progress and that it may be possi 422 * 423 * Before calling, the filesystem should updat 424 * the amount of data copied into the output b 425 * 426 * If @was_async is true, the caller might be 427 * context and we can't sleep. 428 */ 429 void netfs_read_subreq_progress(struct netfs_i 430 bool was_async 431 { 432 struct netfs_io_request *rreq = subreq 433 434 trace_netfs_sreq(subreq, netfs_sreq_tr 435 436 if (subreq->transferred > subreq->cons 437 (rreq->origin == NETFS_READAHEAD | 438 rreq->origin == NETFS_READPAGE || 439 rreq->origin == NETFS_READ_FOR_WR 440 netfs_consume_read_data(subreq 441 __clear_bit(NETFS_SREQ_NO_PROG 442 } 443 } 444 EXPORT_SYMBOL(netfs_read_subreq_progress); 445 446 /** 447 * netfs_read_subreq_terminated - Note the ter 448 * @subreq: The I/O request that has terminate 449 * @error: Error code indicating type of compl 450 * @was_async: The termination was asynchronou 451 * 452 * This tells the read helper that a contribut 453 * one way or another, and that it should inte 454 * 455 * The caller indicates the outcome of the ope 456 * 0 to indicate a successful or retryable tra 457 * is set) or a negative error code. The help 458 * operations as appropriate and writing downl 459 * 460 * Before calling, the filesystem should updat 461 * the amount of data copied into the output b 462 * 463 * If @was_async is true, the caller might be 464 * context and we can't sleep. 465 */ 466 void netfs_read_subreq_terminated(struct netfs 467 int error, b 468 { 469 struct netfs_io_request *rreq = subreq 470 471 switch (subreq->source) { 472 case NETFS_READ_FROM_CACHE: 473 netfs_stat(&netfs_n_rh_read_do 474 break; 475 case NETFS_DOWNLOAD_FROM_SERVER: 476 netfs_stat(&netfs_n_rh_downloa 477 break; 478 default: 479 break; 480 } 481 482 if (rreq->origin != NETFS_DIO_READ) { 483 /* Collect buffered reads. 484 * 485 * If the read completed valid 486 * tail before going on to unl 487 */ 488 if (error == 0 && subreq->tran 489 (test_bit(NETFS_SREQ_HIT_E 490 test_bit(NETFS_SREQ_CLEAR 491 netfs_clear_unread(sub 492 subreq->transferred = 493 trace_netfs_sreq(subre 494 } 495 if (subreq->transferred > subr 496 (rreq->origin == NETFS_REA 497 rreq->origin == NETFS_REA 498 rreq->origin == NETFS_REA 499 netfs_consume_read_dat 500 __clear_bit(NETFS_SREQ 501 } 502 rreq->transferred += subreq->t 503 } 504 505 /* Deal with retry requests, short rea 506 * but don't make progress, we abandon 507 */ 508 if (!error && subreq->transferred < su 509 if (test_bit(NETFS_SREQ_HIT_EO 510 trace_netfs_sreq(subre 511 } else { 512 trace_netfs_sreq(subre 513 if (subreq->transferre 514 __set_bit(NETF 515 __clear_bit(NE 516 set_bit(NETFS_ 517 } else if (!__test_and 518 __set_bit(NETF 519 set_bit(NETFS_ 520 } else { 521 __set_bit(NETF 522 error = -ENODA 523 } 524 } 525 } 526 527 subreq->error = error; 528 trace_netfs_sreq(subreq, netfs_sreq_tr 529 530 if (unlikely(error < 0)) { 531 trace_netfs_failure(rreq, subr 532 if (subreq->source == NETFS_RE 533 netfs_stat(&netfs_n_rh 534 } else { 535 netfs_stat(&netfs_n_rh 536 set_bit(NETFS_RREQ_FAI 537 rreq->error = subreq-> 538 } 539 } 540 541 if (atomic_dec_and_test(&rreq->nr_outs 542 netfs_rreq_terminated(rreq, wa 543 544 netfs_put_subrequest(subreq, was_async 545 } 546 EXPORT_SYMBOL(netfs_read_subreq_terminated); 547
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.