~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
write_collect.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~
  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /* Network filesystem write subrequest result collection, assessment
  3  * and retrying.
  4  *
  5  * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
  6  * Written by David Howells (dhowells@redhat.com)
  7  */
  8 
  9 #include <linux/export.h>
 10 #include <linux/fs.h>
 11 #include <linux/mm.h>
 12 #include <linux/pagemap.h>
 13 #include <linux/slab.h>
 14 #include "internal.h"
 15 
 16 /* Notes made in the collector */
 17 #define HIT_PENDING             0x01    /* A front op was still pending */
 18 #define SOME_EMPTY              0x02    /* One of more streams are empty */
 19 #define ALL_EMPTY               0x04    /* All streams are empty */
 20 #define MAYBE_DISCONTIG         0x08    /* A front op may be discontiguous (rounded to PAGE_SIZE) */
 21 #define NEED_REASSESS           0x10    /* Need to loop round and reassess */
 22 #define REASSESS_DISCONTIG      0x20    /* Reassess discontiguity if contiguity advances */
 23 #define MADE_PROGRESS           0x40    /* Made progress cleaning up a stream or the folio set */
 24 #define BUFFERED                0x80    /* The pagecache needs cleaning up */
 25 #define NEED_RETRY              0x100   /* A front op requests retrying */
 26 #define SAW_FAILURE             0x200   /* One stream or hit a permanent failure */
 27 
 28 /*
 29  * Successful completion of write of a folio to the server and/or cache.  Note
 30  * that we are not allowed to lock the folio here on pain of deadlocking with
 31  * truncate.
 32  */
 33 int netfs_folio_written_back(struct folio *folio)
 34 {
 35         enum netfs_folio_trace why = netfs_folio_trace_clear;
 36         struct netfs_inode *ictx = netfs_inode(folio->mapping->host);
 37         struct netfs_folio *finfo;
 38         struct netfs_group *group = NULL;
 39         int gcount = 0;
 40 
 41         if ((finfo = netfs_folio_info(folio))) {
 42                 /* Streaming writes cannot be redirtied whilst under writeback,
 43                  * so discard the streaming record.
 44                  */
 45                 unsigned long long fend;
 46 
 47                 fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len;
 48                 if (fend > ictx->zero_point)
 49                         ictx->zero_point = fend;
 50 
 51                 folio_detach_private(folio);
 52                 group = finfo->netfs_group;
 53                 gcount++;
 54                 kfree(finfo);
 55                 why = netfs_folio_trace_clear_s;
 56                 goto end_wb;
 57         }
 58 
 59         if ((group = netfs_folio_group(folio))) {
 60                 if (group == NETFS_FOLIO_COPY_TO_CACHE) {
 61                         why = netfs_folio_trace_clear_cc;
 62                         folio_detach_private(folio);
 63                         goto end_wb;
 64                 }
 65 
 66                 /* Need to detach the group pointer if the page didn't get
 67                  * redirtied.  If it has been redirtied, then it must be within
 68                  * the same group.
 69                  */
 70                 why = netfs_folio_trace_redirtied;
 71                 if (!folio_test_dirty(folio)) {
 72                         folio_detach_private(folio);
 73                         gcount++;
 74                         why = netfs_folio_trace_clear_g;
 75                 }
 76         }
 77 
 78 end_wb:
 79         trace_netfs_folio(folio, why);
 80         folio_end_writeback(folio);
 81         return gcount;
 82 }
 83 
 84 /*
 85  * Get hold of a folio we have under writeback.  We don't want to get the
 86  * refcount on it.
 87  */
 88 static struct folio *netfs_writeback_lookup_folio(struct netfs_io_request *wreq, loff_t pos)
 89 {
 90         XA_STATE(xas, &wreq->mapping->i_pages, pos / PAGE_SIZE);
 91         struct folio *folio;
 92 
 93         rcu_read_lock();
 94 
 95         for (;;) {
 96                 xas_reset(&xas);
 97                 folio = xas_load(&xas);
 98                 if (xas_retry(&xas, folio))
 99                         continue;
100 
101                 if (!folio || xa_is_value(folio))
102                         kdebug("R=%08x: folio %lx (%llx) not present",
103                                wreq->debug_id, xas.xa_index, pos / PAGE_SIZE);
104                 BUG_ON(!folio || xa_is_value(folio));
105 
106                 if (folio == xas_reload(&xas))
107                         break;
108         }
109 
110         rcu_read_unlock();
111 
112         if (WARN_ONCE(!folio_test_writeback(folio),
113                       "R=%08x: folio %lx is not under writeback\n",
114                       wreq->debug_id, folio->index)) {
115                 trace_netfs_folio(folio, netfs_folio_trace_not_under_wback);
116         }
117         return folio;
118 }
119 
120 /*
121  * Unlock any folios we've finished with.
122  */
123 static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
124                                           unsigned long long collected_to,
125                                           unsigned int *notes)
126 {
127         for (;;) {
128                 struct folio *folio;
129                 struct netfs_folio *finfo;
130                 unsigned long long fpos, fend;
131                 size_t fsize, flen;
132 
133                 folio = netfs_writeback_lookup_folio(wreq, wreq->cleaned_to);
134 
135                 fpos = folio_pos(folio);
136                 fsize = folio_size(folio);
137                 finfo = netfs_folio_info(folio);
138                 flen = finfo ? finfo->dirty_offset + finfo->dirty_len : fsize;
139 
140                 fend = min_t(unsigned long long, fpos + flen, wreq->i_size);
141 
142                 trace_netfs_collect_folio(wreq, folio, fend, collected_to);
143 
144                 if (fpos + fsize > wreq->contiguity) {
145                         trace_netfs_collect_contig(wreq, fpos + fsize,
146                                                    netfs_contig_trace_unlock);
147                         wreq->contiguity = fpos + fsize;
148                 }
149 
150                 /* Unlock any folio we've transferred all of. */
151                 if (collected_to < fend)
152                         break;
153 
154                 wreq->nr_group_rel += netfs_folio_written_back(folio);
155                 wreq->cleaned_to = fpos + fsize;
156                 *notes |= MADE_PROGRESS;
157 
158                 if (fpos + fsize >= collected_to)
159                         break;
160         }
161 }
162 
163 /*
164  * Perform retries on the streams that need it.
165  */
166 static void netfs_retry_write_stream(struct netfs_io_request *wreq,
167                                      struct netfs_io_stream *stream)
168 {
169         struct list_head *next;
170 
171         _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
172 
173         if (list_empty(&stream->subrequests))
174                 return;
175 
176         if (stream->source == NETFS_UPLOAD_TO_SERVER &&
177             wreq->netfs_ops->retry_request)
178                 wreq->netfs_ops->retry_request(wreq, stream);
179 
180         if (unlikely(stream->failed))
181                 return;
182 
183         /* If there's no renegotiation to do, just resend each failed subreq. */
184         if (!stream->prepare_write) {
185                 struct netfs_io_subrequest *subreq;
186 
187                 list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
188                         if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
189                                 break;
190                         if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
191                                 __set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
192                                 netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
193                                 netfs_reissue_write(stream, subreq);
194                         }
195                 }
196                 return;
197         }
198 
199         next = stream->subrequests.next;
200 
201         do {
202                 struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp;
203                 unsigned long long start, len;
204                 size_t part;
205                 bool boundary = false;
206 
207                 /* Go through the stream and find the next span of contiguous
208                  * data that we then rejig (cifs, for example, needs the wsize
209                  * renegotiating) and reissue.
210                  */
211                 from = list_entry(next, struct netfs_io_subrequest, rreq_link);
212                 to = from;
213                 start = from->start + from->transferred;
214                 len   = from->len   - from->transferred;
215 
216                 if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
217                     !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
218                         return;
219 
220                 list_for_each_continue(next, &stream->subrequests) {
221                         subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
222                         if (subreq->start + subreq->transferred != start + len ||
223                             test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
224                             !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
225                                 break;
226                         to = subreq;
227                         len += to->len;
228                 }
229 
230                 /* Work through the sublist. */
231                 subreq = from;
232                 list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
233                         if (!len)
234                                 break;
235                         /* Renegotiate max_len (wsize) */
236                         trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
237                         __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
238                         __set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
239                         stream->prepare_write(subreq);
240 
241                         part = min(len, subreq->max_len);
242                         subreq->len = part;
243                         subreq->start = start;
244                         subreq->transferred = 0;
245                         len -= part;
246                         start += part;
247                         if (len && subreq == to &&
248                             __test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags))
249                                 boundary = true;
250 
251                         netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
252                         netfs_reissue_write(stream, subreq);
253                         if (subreq == to)
254                                 break;
255                 }
256 
257                 /* If we managed to use fewer subreqs, we can discard the
258                  * excess; if we used the same number, then we're done.
259                  */
260                 if (!len) {
261                         if (subreq == to)
262                                 continue;
263                         list_for_each_entry_safe_from(subreq, tmp,
264                                                       &stream->subrequests, rreq_link) {
265                                 trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
266                                 list_del(&subreq->rreq_link);
267                                 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
268                                 if (subreq == to)
269                                         break;
270                         }
271                         continue;
272                 }
273 
274                 /* We ran out of subrequests, so we need to allocate some more
275                  * and insert them after.
276                  */
277                 do {
278                         subreq = netfs_alloc_subrequest(wreq);
279                         subreq->source          = to->source;
280                         subreq->start           = start;
281                         subreq->max_len         = len;
282                         subreq->max_nr_segs     = INT_MAX;
283                         subreq->debug_index     = atomic_inc_return(&wreq->subreq_counter);
284                         subreq->stream_nr       = to->stream_nr;
285                         __set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
286 
287                         trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
288                                              refcount_read(&subreq->ref),
289                                              netfs_sreq_trace_new);
290                         netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
291 
292                         list_add(&subreq->rreq_link, &to->rreq_link);
293                         to = list_next_entry(to, rreq_link);
294                         trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
295 
296                         switch (stream->source) {
297                         case NETFS_UPLOAD_TO_SERVER:
298                                 netfs_stat(&netfs_n_wh_upload);
299                                 subreq->max_len = min(len, wreq->wsize);
300                                 break;
301                         case NETFS_WRITE_TO_CACHE:
302                                 netfs_stat(&netfs_n_wh_write);
303                                 break;
304                         default:
305                                 WARN_ON_ONCE(1);
306                         }
307 
308                         stream->prepare_write(subreq);
309 
310                         part = min(len, subreq->max_len);
311                         subreq->len = subreq->transferred + part;
312                         len -= part;
313                         start += part;
314                         if (!len && boundary) {
315                                 __set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
316                                 boundary = false;
317                         }
318 
319                         netfs_reissue_write(stream, subreq);
320                         if (!len)
321                                 break;
322 
323                 } while (len);
324 
325         } while (!list_is_head(next, &stream->subrequests));
326 }
327 
328 /*
329  * Perform retries on the streams that need it.  If we're doing content
330  * encryption and the server copy changed due to a third-party write, we may
331  * need to do an RMW cycle and also rewrite the data to the cache.
332  */
333 static void netfs_retry_writes(struct netfs_io_request *wreq)
334 {
335         struct netfs_io_subrequest *subreq;
336         struct netfs_io_stream *stream;
337         int s;
338 
339         /* Wait for all outstanding I/O to quiesce before performing retries as
340          * we may need to renegotiate the I/O sizes.
341          */
342         for (s = 0; s < NR_IO_STREAMS; s++) {
343                 stream = &wreq->io_streams[s];
344                 if (!stream->active)
345                         continue;
346 
347                 list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
348                         wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
349                                     TASK_UNINTERRUPTIBLE);
350                 }
351         }
352 
353         // TODO: Enc: Fetch changed partial pages
354         // TODO: Enc: Reencrypt content if needed.
355         // TODO: Enc: Wind back transferred point.
356         // TODO: Enc: Mark cache pages for retry.
357 
358         for (s = 0; s < NR_IO_STREAMS; s++) {
359                 stream = &wreq->io_streams[s];
360                 if (stream->need_retry) {
361                         stream->need_retry = false;
362                         netfs_retry_write_stream(wreq, stream);
363                 }
364         }
365 }
366 
367 /*
368  * Collect and assess the results of various write subrequests.  We may need to
369  * retry some of the results - or even do an RMW cycle for content crypto.
370  *
371  * Note that we have a number of parallel, overlapping lists of subrequests,
372  * one to the server and one to the local cache for example, which may not be
373  * the same size or starting position and may not even correspond in boundary
374  * alignment.
375  */
376 static void netfs_collect_write_results(struct netfs_io_request *wreq)
377 {
378         struct netfs_io_subrequest *front, *remove;
379         struct netfs_io_stream *stream;
380         unsigned long long collected_to;
381         unsigned int notes;
382         int s;
383 
384         _enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
385         trace_netfs_collect(wreq);
386         trace_netfs_rreq(wreq, netfs_rreq_trace_collect);
387 
388 reassess_streams:
389         smp_rmb();
390         collected_to = ULLONG_MAX;
391         if (wreq->origin == NETFS_WRITEBACK)
392                 notes = ALL_EMPTY | BUFFERED | MAYBE_DISCONTIG;
393         else if (wreq->origin == NETFS_WRITETHROUGH)
394                 notes = ALL_EMPTY | BUFFERED;
395         else
396                 notes = ALL_EMPTY;
397 
398         /* Remove completed subrequests from the front of the streams and
399          * advance the completion point on each stream.  We stop when we hit
400          * something that's in progress.  The issuer thread may be adding stuff
401          * to the tail whilst we're doing this.
402          *
403          * We must not, however, merge in discontiguities that span whole
404          * folios that aren't under writeback.  This is made more complicated
405          * by the folios in the gap being of unpredictable sizes - if they even
406          * exist - but we don't want to look them up.
407          */
408         for (s = 0; s < NR_IO_STREAMS; s++) {
409                 loff_t rstart, rend;
410 
411                 stream = &wreq->io_streams[s];
412                 /* Read active flag before list pointers */
413                 if (!smp_load_acquire(&stream->active))
414                         continue;
415 
416                 front = stream->front;
417                 while (front) {
418                         trace_netfs_collect_sreq(wreq, front);
419                         //_debug("sreq [%x] %llx %zx/%zx",
420                         //       front->debug_index, front->start, front->transferred, front->len);
421 
422                         /* Stall if there may be a discontinuity. */
423                         rstart = round_down(front->start, PAGE_SIZE);
424                         if (rstart > wreq->contiguity) {
425                                 if (wreq->contiguity > stream->collected_to) {
426                                         trace_netfs_collect_gap(wreq, stream,
427                                                                 wreq->contiguity, 'D');
428                                         stream->collected_to = wreq->contiguity;
429                                 }
430                                 notes |= REASSESS_DISCONTIG;
431                                 break;
432                         }
433                         rend = round_up(front->start + front->len, PAGE_SIZE);
434                         if (rend > wreq->contiguity) {
435                                 trace_netfs_collect_contig(wreq, rend,
436                                                            netfs_contig_trace_collect);
437                                 wreq->contiguity = rend;
438                                 if (notes & REASSESS_DISCONTIG)
439                                         notes |= NEED_REASSESS;
440                         }
441                         notes &= ~MAYBE_DISCONTIG;
442 
443                         /* Stall if the front is still undergoing I/O. */
444                         if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) {
445                                 notes |= HIT_PENDING;
446                                 break;
447                         }
448                         smp_rmb(); /* Read counters after I-P flag. */
449 
450                         if (stream->failed) {
451                                 stream->collected_to = front->start + front->len;
452                                 notes |= MADE_PROGRESS | SAW_FAILURE;
453                                 goto cancel;
454                         }
455                         if (front->start + front->transferred > stream->collected_to) {
456                                 stream->collected_to = front->start + front->transferred;
457                                 stream->transferred = stream->collected_to - wreq->start;
458                                 notes |= MADE_PROGRESS;
459                         }
460                         if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
461                                 stream->failed = true;
462                                 stream->error = front->error;
463                                 if (stream->source == NETFS_UPLOAD_TO_SERVER)
464                                         mapping_set_error(wreq->mapping, front->error);
465                                 notes |= NEED_REASSESS | SAW_FAILURE;
466                                 break;
467                         }
468                         if (front->transferred < front->len) {
469                                 stream->need_retry = true;
470                                 notes |= NEED_RETRY | MADE_PROGRESS;
471                                 break;
472                         }
473 
474                 cancel:
475                         /* Remove if completely consumed. */
476                         spin_lock(&wreq->lock);
477 
478                         remove = front;
479                         list_del_init(&front->rreq_link);
480                         front = list_first_entry_or_null(&stream->subrequests,
481                                                          struct netfs_io_subrequest, rreq_link);
482                         stream->front = front;
483                         if (!front) {
484                                 unsigned long long jump_to = atomic64_read(&wreq->issued_to);
485 
486                                 if (stream->collected_to < jump_to) {
487                                         trace_netfs_collect_gap(wreq, stream, jump_to, 'A');
488                                         stream->collected_to = jump_to;
489                                 }
490                         }
491 
492                         spin_unlock(&wreq->lock);
493                         netfs_put_subrequest(remove, false,
494                                              notes & SAW_FAILURE ?
495                                              netfs_sreq_trace_put_cancel :
496                                              netfs_sreq_trace_put_done);
497                 }
498 
499                 if (front)
500                         notes &= ~ALL_EMPTY;
501                 else
502                         notes |= SOME_EMPTY;
503 
504                 if (stream->collected_to < collected_to)
505                         collected_to = stream->collected_to;
506         }
507 
508         if (collected_to != ULLONG_MAX && collected_to > wreq->collected_to)
509                 wreq->collected_to = collected_to;
510 
511         /* If we have an empty stream, we need to jump it forward over any gap
512          * otherwise the collection point will never advance.
513          *
514          * Note that the issuer always adds to the stream with the lowest
515          * so-far submitted start, so if we see two consecutive subreqs in one
516          * stream with nothing between then in another stream, then the second
517          * stream has a gap that can be jumped.
518          */
519         if (notes & SOME_EMPTY) {
520                 unsigned long long jump_to = wreq->start + READ_ONCE(wreq->submitted);
521 
522                 for (s = 0; s < NR_IO_STREAMS; s++) {
523                         stream = &wreq->io_streams[s];
524                         if (stream->active &&
525                             stream->front &&
526                             stream->front->start < jump_to)
527                                 jump_to = stream->front->start;
528                 }
529 
530                 for (s = 0; s < NR_IO_STREAMS; s++) {
531                         stream = &wreq->io_streams[s];
532                         if (stream->active &&
533                             !stream->front &&
534                             stream->collected_to < jump_to) {
535                                 trace_netfs_collect_gap(wreq, stream, jump_to, 'B');
536                                 stream->collected_to = jump_to;
537                         }
538                 }
539         }
540 
541         for (s = 0; s < NR_IO_STREAMS; s++) {
542                 stream = &wreq->io_streams[s];
543                 if (stream->active)
544                         trace_netfs_collect_stream(wreq, stream);
545         }
546 
547         trace_netfs_collect_state(wreq, wreq->collected_to, notes);
548 
549         /* Unlock any folios that we have now finished with. */
550         if (notes & BUFFERED) {
551                 unsigned long long clean_to = min(wreq->collected_to, wreq->contiguity);
552 
553                 if (wreq->cleaned_to < clean_to)
554                         netfs_writeback_unlock_folios(wreq, clean_to, &notes);
555         } else {
556                 wreq->cleaned_to = wreq->collected_to;
557         }
558 
559         // TODO: Discard encryption buffers
560 
561         /* If all streams are discontiguous with the last folio we cleared, we
562          * may need to skip a set of folios.
563          */
564         if ((notes & (MAYBE_DISCONTIG | ALL_EMPTY)) == MAYBE_DISCONTIG) {
565                 unsigned long long jump_to = ULLONG_MAX;
566 
567                 for (s = 0; s < NR_IO_STREAMS; s++) {
568                         stream = &wreq->io_streams[s];
569                         if (stream->active && stream->front &&
570                             stream->front->start < jump_to)
571                                 jump_to = stream->front->start;
572                 }
573 
574                 trace_netfs_collect_contig(wreq, jump_to, netfs_contig_trace_jump);
575                 wreq->contiguity = jump_to;
576                 wreq->cleaned_to = jump_to;
577                 wreq->collected_to = jump_to;
578                 for (s = 0; s < NR_IO_STREAMS; s++) {
579                         stream = &wreq->io_streams[s];
580                         if (stream->collected_to < jump_to)
581                                 stream->collected_to = jump_to;
582                 }
583                 //cond_resched();
584                 notes |= MADE_PROGRESS;
585                 goto reassess_streams;
586         }
587 
588         if (notes & NEED_RETRY)
589                 goto need_retry;
590         if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
591                 trace_netfs_rreq(wreq, netfs_rreq_trace_unpause);
592                 clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags);
593                 wake_up_bit(&wreq->flags, NETFS_RREQ_PAUSE);
594         }
595 
596         if (notes & NEED_REASSESS) {
597                 //cond_resched();
598                 goto reassess_streams;
599         }
600         if (notes & MADE_PROGRESS) {
601                 //cond_resched();
602                 goto reassess_streams;
603         }
604 
605 out:
606         netfs_put_group_many(wreq->group, wreq->nr_group_rel);
607         wreq->nr_group_rel = 0;
608         _leave(" = %x", notes);
609         return;
610 
611 need_retry:
612         /* Okay...  We're going to have to retry one or both streams.  Note
613          * that any partially completed op will have had any wholly transferred
614          * folios removed from it.
615          */
616         _debug("retry");
617         netfs_retry_writes(wreq);
618         goto out;
619 }
620 
621 /*
622  * Perform the collection of subrequests, folios and encryption buffers.
623  */
624 void netfs_write_collection_worker(struct work_struct *work)
625 {
626         struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work);
627         struct netfs_inode *ictx = netfs_inode(wreq->inode);
628         size_t transferred;
629         int s;
630 
631         _enter("R=%x", wreq->debug_id);
632 
633         netfs_see_request(wreq, netfs_rreq_trace_see_work);
634         if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) {
635                 netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
636                 return;
637         }
638 
639         netfs_collect_write_results(wreq);
640 
641         /* We're done when the app thread has finished posting subreqs and all
642          * the queues in all the streams are empty.
643          */
644         if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) {
645                 netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
646                 return;
647         }
648         smp_rmb(); /* Read ALL_QUEUED before lists. */
649 
650         transferred = LONG_MAX;
651         for (s = 0; s < NR_IO_STREAMS; s++) {
652                 struct netfs_io_stream *stream = &wreq->io_streams[s];
653                 if (!stream->active)
654                         continue;
655                 if (!list_empty(&stream->subrequests)) {
656                         netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
657                         return;
658                 }
659                 if (stream->transferred < transferred)
660                         transferred = stream->transferred;
661         }
662 
663         /* Okay, declare that all I/O is complete. */
664         wreq->transferred = transferred;
665         trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
666 
667         if (wreq->io_streams[1].active &&
668             wreq->io_streams[1].failed) {
669                 /* Cache write failure doesn't prevent writeback completion
670                  * unless we're in disconnected mode.
671                  */
672                 ictx->ops->invalidate_cache(wreq);
673         }
674 
675         if (wreq->cleanup)
676                 wreq->cleanup(wreq);
677 
678         if (wreq->origin == NETFS_DIO_WRITE &&
679             wreq->mapping->nrpages) {
680                 /* mmap may have got underfoot and we may now have folios
681                  * locally covering the region we just wrote.  Attempt to
682                  * discard the folios, but leave in place any modified locally.
683                  * ->write_iter() is prevented from interfering by the DIO
684                  * counter.
685                  */
686                 pgoff_t first = wreq->start >> PAGE_SHIFT;
687                 pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
688                 invalidate_inode_pages2_range(wreq->mapping, first, last);
689         }
690 
691         if (wreq->origin == NETFS_DIO_WRITE)
692                 inode_dio_end(wreq->inode);
693 
694         _debug("finished");
695         trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
696         clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
697         wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
698 
699         if (wreq->iocb) {
700                 size_t written = min(wreq->transferred, wreq->len);
701                 wreq->iocb->ki_pos += written;
702                 if (wreq->iocb->ki_complete)
703                         wreq->iocb->ki_complete(
704                                 wreq->iocb, wreq->error ? wreq->error : written);
705                 wreq->iocb = VFS_PTR_POISON;
706         }
707 
708         netfs_clear_subrequests(wreq, false);
709         netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete);
710 }
711 
712 /*
713  * Wake the collection work item.
714  */
715 void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async)
716 {
717         if (!work_pending(&wreq->work)) {
718                 netfs_get_request(wreq, netfs_rreq_trace_get_work);
719                 if (!queue_work(system_unbound_wq, &wreq->work))
720                         netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq);
721         }
722 }
723 
724 /**
725  * netfs_write_subrequest_terminated - Note the termination of a write operation.
726  * @_op: The I/O request that has terminated.
727  * @transferred_or_error: The amount of data transferred or an error code.
728  * @was_async: The termination was asynchronous
729  *
730  * This tells the library that a contributory write I/O operation has
731  * terminated, one way or another, and that it should collect the results.
732  *
733  * The caller indicates in @transferred_or_error the outcome of the operation,
734  * supplying a positive value to indicate the number of bytes transferred or a
735  * negative error code.  The library will look after reissuing I/O operations
736  * as appropriate and writing downloaded data to the cache.
737  *
738  * If @was_async is true, the caller might be running in softirq or interrupt
739  * context and we can't sleep.
740  *
741  * When this is called, ownership of the subrequest is transferred back to the
742  * library, along with a ref.
743  *
744  * Note that %_op is a void* so that the function can be passed to
745  * kiocb::term_func without the need for a casting wrapper.
746  */
747 void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
748                                        bool was_async)
749 {
750         struct netfs_io_subrequest *subreq = _op;
751         struct netfs_io_request *wreq = subreq->rreq;
752         struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
753 
754         _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
755 
756         switch (subreq->source) {
757         case NETFS_UPLOAD_TO_SERVER:
758                 netfs_stat(&netfs_n_wh_upload_done);
759                 break;
760         case NETFS_WRITE_TO_CACHE:
761                 netfs_stat(&netfs_n_wh_write_done);
762                 break;
763         case NETFS_INVALID_WRITE:
764                 break;
765         default:
766                 BUG();
767         }
768 
769         if (IS_ERR_VALUE(transferred_or_error)) {
770                 subreq->error = transferred_or_error;
771                 if (subreq->error == -EAGAIN)
772                         set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
773                 else
774                         set_bit(NETFS_SREQ_FAILED, &subreq->flags);
775                 trace_netfs_failure(wreq, subreq, transferred_or_error, netfs_fail_write);
776 
777                 switch (subreq->source) {
778                 case NETFS_WRITE_TO_CACHE:
779                         netfs_stat(&netfs_n_wh_write_failed);
780                         break;
781                 case NETFS_UPLOAD_TO_SERVER:
782                         netfs_stat(&netfs_n_wh_upload_failed);
783                         break;
784                 default:
785                         break;
786                 }
787                 trace_netfs_rreq(wreq, netfs_rreq_trace_set_pause);
788                 set_bit(NETFS_RREQ_PAUSE, &wreq->flags);
789         } else {
790                 if (WARN(transferred_or_error > subreq->len - subreq->transferred,
791                          "Subreq excess write: R=%x[%x] %zd > %zu - %zu",
792                          wreq->debug_id, subreq->debug_index,
793                          transferred_or_error, subreq->len, subreq->transferred))
794                         transferred_or_error = subreq->len - subreq->transferred;
795 
796                 subreq->error = 0;
797                 subreq->transferred += transferred_or_error;
798 
799                 if (subreq->transferred < subreq->len)
800                         set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
801         }
802 
803         trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
804 
805         clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
806         wake_up_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS);
807 
808         /* If we are at the head of the queue, wake up the collector,
809          * transferring a ref to it if we were the ones to do so.
810          */
811         if (list_is_first(&subreq->rreq_link, &stream->subrequests))
812                 netfs_wake_write_collector(wreq, was_async);
813 
814         netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
815 }
816 EXPORT_SYMBOL(netfs_write_subrequest_terminated);
817
~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.
TOMOYO Linux Cross Reference Linux/fs/netfs/write_collect.c

TOMOYO Linux Cross Reference
Linux/fs/netfs/write_collect.c