~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/mpage.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /fs/mpage.c (Architecture m68k) and /fs/mpage.c (Architecture i386)


  1 // SPDX-License-Identifier: GPL-2.0                 1 // SPDX-License-Identifier: GPL-2.0
  2 /*                                                  2 /*
  3  * fs/mpage.c                                       3  * fs/mpage.c
  4  *                                                  4  *
  5  * Copyright (C) 2002, Linus Torvalds.              5  * Copyright (C) 2002, Linus Torvalds.
  6  *                                                  6  *
  7  * Contains functions related to preparing and      7  * Contains functions related to preparing and submitting BIOs which contain
  8  * multiple pagecache pages.                        8  * multiple pagecache pages.
  9  *                                                  9  *
 10  * 15May2002    Andrew Morton                      10  * 15May2002    Andrew Morton
 11  *              Initial version                    11  *              Initial version
 12  * 27Jun2002    axboe@suse.de                      12  * 27Jun2002    axboe@suse.de
 13  *              use bio_add_page() to build bi     13  *              use bio_add_page() to build bio's just the right size
 14  */                                                14  */
 15                                                    15 
 16 #include <linux/kernel.h>                          16 #include <linux/kernel.h>
 17 #include <linux/export.h>                          17 #include <linux/export.h>
 18 #include <linux/mm.h>                              18 #include <linux/mm.h>
 19 #include <linux/kdev_t.h>                          19 #include <linux/kdev_t.h>
 20 #include <linux/gfp.h>                             20 #include <linux/gfp.h>
 21 #include <linux/bio.h>                             21 #include <linux/bio.h>
 22 #include <linux/fs.h>                              22 #include <linux/fs.h>
 23 #include <linux/buffer_head.h>                     23 #include <linux/buffer_head.h>
 24 #include <linux/blkdev.h>                          24 #include <linux/blkdev.h>
 25 #include <linux/highmem.h>                         25 #include <linux/highmem.h>
 26 #include <linux/prefetch.h>                        26 #include <linux/prefetch.h>
 27 #include <linux/mpage.h>                           27 #include <linux/mpage.h>
 28 #include <linux/mm_inline.h>                       28 #include <linux/mm_inline.h>
 29 #include <linux/writeback.h>                       29 #include <linux/writeback.h>
 30 #include <linux/backing-dev.h>                     30 #include <linux/backing-dev.h>
 31 #include <linux/pagevec.h>                         31 #include <linux/pagevec.h>
 32 #include "internal.h"                              32 #include "internal.h"
 33                                                    33 
 34 /*                                                 34 /*
 35  * I/O completion handler for multipage BIOs.      35  * I/O completion handler for multipage BIOs.
 36  *                                                 36  *
 37  * The mpage code never puts partial pages int     37  * The mpage code never puts partial pages into a BIO (except for end-of-file).
 38  * If a page does not map to a contiguous run      38  * If a page does not map to a contiguous run of blocks then it simply falls
 39  * back to block_read_full_folio().                39  * back to block_read_full_folio().
 40  *                                                 40  *
 41  * Why is this?  If a page's completion depend     41  * Why is this?  If a page's completion depends on a number of different BIOs
 42  * which can complete in any order (or at the      42  * which can complete in any order (or at the same time) then determining the
 43  * status of that page is hard.  See end_buffe     43  * status of that page is hard.  See end_buffer_async_read() for the details.
 44  * There is no point in duplicating all that c     44  * There is no point in duplicating all that complexity.
 45  */                                                45  */
 46 static void mpage_read_end_io(struct bio *bio)     46 static void mpage_read_end_io(struct bio *bio)
 47 {                                                  47 {
 48         struct folio_iter fi;                      48         struct folio_iter fi;
 49         int err = blk_status_to_errno(bio->bi_     49         int err = blk_status_to_errno(bio->bi_status);
 50                                                    50 
 51         bio_for_each_folio_all(fi, bio)            51         bio_for_each_folio_all(fi, bio)
 52                 folio_end_read(fi.folio, err =     52                 folio_end_read(fi.folio, err == 0);
 53                                                    53 
 54         bio_put(bio);                              54         bio_put(bio);
 55 }                                                  55 }
 56                                                    56 
 57 static void mpage_write_end_io(struct bio *bio     57 static void mpage_write_end_io(struct bio *bio)
 58 {                                                  58 {
 59         struct folio_iter fi;                      59         struct folio_iter fi;
 60         int err = blk_status_to_errno(bio->bi_     60         int err = blk_status_to_errno(bio->bi_status);
 61                                                    61 
 62         bio_for_each_folio_all(fi, bio) {          62         bio_for_each_folio_all(fi, bio) {
 63                 if (err)                           63                 if (err)
 64                         mapping_set_error(fi.f     64                         mapping_set_error(fi.folio->mapping, err);
 65                 folio_end_writeback(fi.folio);     65                 folio_end_writeback(fi.folio);
 66         }                                          66         }
 67                                                    67 
 68         bio_put(bio);                              68         bio_put(bio);
 69 }                                                  69 }
 70                                                    70 
 71 static struct bio *mpage_bio_submit_read(struc     71 static struct bio *mpage_bio_submit_read(struct bio *bio)
 72 {                                                  72 {
 73         bio->bi_end_io = mpage_read_end_io;        73         bio->bi_end_io = mpage_read_end_io;
 74         guard_bio_eod(bio);                        74         guard_bio_eod(bio);
 75         submit_bio(bio);                           75         submit_bio(bio);
 76         return NULL;                               76         return NULL;
 77 }                                                  77 }
 78                                                    78 
 79 static struct bio *mpage_bio_submit_write(stru     79 static struct bio *mpage_bio_submit_write(struct bio *bio)
 80 {                                                  80 {
 81         bio->bi_end_io = mpage_write_end_io;       81         bio->bi_end_io = mpage_write_end_io;
 82         guard_bio_eod(bio);                        82         guard_bio_eod(bio);
 83         submit_bio(bio);                           83         submit_bio(bio);
 84         return NULL;                               84         return NULL;
 85 }                                                  85 }
 86                                                    86 
 87 /*                                                 87 /*
 88  * support function for mpage_readahead.  The      88  * support function for mpage_readahead.  The fs supplied get_block might
 89  * return an up to date buffer.  This is used      89  * return an up to date buffer.  This is used to map that buffer into
 90  * the page, which allows read_folio to avoid      90  * the page, which allows read_folio to avoid triggering a duplicate call
 91  * to get_block.                                   91  * to get_block.
 92  *                                                 92  *
 93  * The idea is to avoid adding buffers to page     93  * The idea is to avoid adding buffers to pages that don't already have
 94  * them.  So when the buffer is up to date and     94  * them.  So when the buffer is up to date and the page size == block size,
 95  * this marks the page up to date instead of a     95  * this marks the page up to date instead of adding new buffers.
 96  */                                                96  */
 97 static void map_buffer_to_folio(struct folio *     97 static void map_buffer_to_folio(struct folio *folio, struct buffer_head *bh,
 98                 int page_block)                    98                 int page_block)
 99 {                                                  99 {
100         struct inode *inode = folio->mapping->    100         struct inode *inode = folio->mapping->host;
101         struct buffer_head *page_bh, *head;       101         struct buffer_head *page_bh, *head;
102         int block = 0;                            102         int block = 0;
103                                                   103 
104         head = folio_buffers(folio);              104         head = folio_buffers(folio);
105         if (!head) {                              105         if (!head) {
106                 /*                                106                 /*
107                  * don't make any buffers if t    107                  * don't make any buffers if there is only one buffer on
108                  * the folio and the folio jus    108                  * the folio and the folio just needs to be set up to date
109                  */                               109                  */
110                 if (inode->i_blkbits == PAGE_S    110                 if (inode->i_blkbits == PAGE_SHIFT &&
111                     buffer_uptodate(bh)) {        111                     buffer_uptodate(bh)) {
112                         folio_mark_uptodate(fo    112                         folio_mark_uptodate(folio);
113                         return;                   113                         return;
114                 }                                 114                 }
115                 head = create_empty_buffers(fo    115                 head = create_empty_buffers(folio, i_blocksize(inode), 0);
116         }                                         116         }
117                                                   117 
118         page_bh = head;                           118         page_bh = head;
119         do {                                      119         do {
120                 if (block == page_block) {        120                 if (block == page_block) {
121                         page_bh->b_state = bh-    121                         page_bh->b_state = bh->b_state;
122                         page_bh->b_bdev = bh->    122                         page_bh->b_bdev = bh->b_bdev;
123                         page_bh->b_blocknr = b    123                         page_bh->b_blocknr = bh->b_blocknr;
124                         break;                    124                         break;
125                 }                                 125                 }
126                 page_bh = page_bh->b_this_page    126                 page_bh = page_bh->b_this_page;
127                 block++;                          127                 block++;
128         } while (page_bh != head);                128         } while (page_bh != head);
129 }                                                 129 }
130                                                   130 
131 struct mpage_readpage_args {                      131 struct mpage_readpage_args {
132         struct bio *bio;                          132         struct bio *bio;
133         struct folio *folio;                      133         struct folio *folio;
134         unsigned int nr_pages;                    134         unsigned int nr_pages;
135         bool is_readahead;                        135         bool is_readahead;
136         sector_t last_block_in_bio;               136         sector_t last_block_in_bio;
137         struct buffer_head map_bh;                137         struct buffer_head map_bh;
138         unsigned long first_logical_block;        138         unsigned long first_logical_block;
139         get_block_t *get_block;                   139         get_block_t *get_block;
140 };                                                140 };
141                                                   141 
142 /*                                                142 /*
143  * This is the worker routine which does all t    143  * This is the worker routine which does all the work of mapping the disk
144  * blocks and constructs largest possible bios    144  * blocks and constructs largest possible bios, submits them for IO if the
145  * blocks are not contiguous on the disk.         145  * blocks are not contiguous on the disk.
146  *                                                146  *
147  * We pass a buffer_head back and forth and us    147  * We pass a buffer_head back and forth and use its buffer_mapped() flag to
148  * represent the validity of its disk mapping     148  * represent the validity of its disk mapping and to decide when to do the next
149  * get_block() call.                              149  * get_block() call.
150  */                                               150  */
151 static struct bio *do_mpage_readpage(struct mp    151 static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
152 {                                                 152 {
153         struct folio *folio = args->folio;        153         struct folio *folio = args->folio;
154         struct inode *inode = folio->mapping->    154         struct inode *inode = folio->mapping->host;
155         const unsigned blkbits = inode->i_blkb    155         const unsigned blkbits = inode->i_blkbits;
156         const unsigned blocks_per_page = PAGE_    156         const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
157         const unsigned blocksize = 1 << blkbit    157         const unsigned blocksize = 1 << blkbits;
158         struct buffer_head *map_bh = &args->ma    158         struct buffer_head *map_bh = &args->map_bh;
159         sector_t block_in_file;                   159         sector_t block_in_file;
160         sector_t last_block;                      160         sector_t last_block;
161         sector_t last_block_in_file;              161         sector_t last_block_in_file;
162         sector_t first_block;                     162         sector_t first_block;
163         unsigned page_block;                      163         unsigned page_block;
164         unsigned first_hole = blocks_per_page;    164         unsigned first_hole = blocks_per_page;
165         struct block_device *bdev = NULL;         165         struct block_device *bdev = NULL;
166         int length;                               166         int length;
167         int fully_mapped = 1;                     167         int fully_mapped = 1;
168         blk_opf_t opf = REQ_OP_READ;              168         blk_opf_t opf = REQ_OP_READ;
169         unsigned nblocks;                         169         unsigned nblocks;
170         unsigned relative_block;                  170         unsigned relative_block;
171         gfp_t gfp = mapping_gfp_constraint(fol    171         gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL);
172                                                   172 
173         /* MAX_BUF_PER_PAGE, for example */       173         /* MAX_BUF_PER_PAGE, for example */
174         VM_BUG_ON_FOLIO(folio_test_large(folio    174         VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
175                                                   175 
176         if (args->is_readahead) {                 176         if (args->is_readahead) {
177                 opf |= REQ_RAHEAD;                177                 opf |= REQ_RAHEAD;
178                 gfp |= __GFP_NORETRY | __GFP_N    178                 gfp |= __GFP_NORETRY | __GFP_NOWARN;
179         }                                         179         }
180                                                   180 
181         if (folio_buffers(folio))                 181         if (folio_buffers(folio))
182                 goto confused;                    182                 goto confused;
183                                                   183 
184         block_in_file = (sector_t)folio->index    184         block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits);
185         last_block = block_in_file + args->nr_    185         last_block = block_in_file + args->nr_pages * blocks_per_page;
186         last_block_in_file = (i_size_read(inod    186         last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
187         if (last_block > last_block_in_file)      187         if (last_block > last_block_in_file)
188                 last_block = last_block_in_fil    188                 last_block = last_block_in_file;
189         page_block = 0;                           189         page_block = 0;
190                                                   190 
191         /*                                        191         /*
192          * Map blocks using the result from th    192          * Map blocks using the result from the previous get_blocks call first.
193          */                                       193          */
194         nblocks = map_bh->b_size >> blkbits;      194         nblocks = map_bh->b_size >> blkbits;
195         if (buffer_mapped(map_bh) &&              195         if (buffer_mapped(map_bh) &&
196                         block_in_file > args->    196                         block_in_file > args->first_logical_block &&
197                         block_in_file < (args-    197                         block_in_file < (args->first_logical_block + nblocks)) {
198                 unsigned map_offset = block_in    198                 unsigned map_offset = block_in_file - args->first_logical_block;
199                 unsigned last = nblocks - map_    199                 unsigned last = nblocks - map_offset;
200                                                   200 
201                 first_block = map_bh->b_blockn    201                 first_block = map_bh->b_blocknr + map_offset;
202                 for (relative_block = 0; ; rel    202                 for (relative_block = 0; ; relative_block++) {
203                         if (relative_block ==     203                         if (relative_block == last) {
204                                 clear_buffer_m    204                                 clear_buffer_mapped(map_bh);
205                                 break;            205                                 break;
206                         }                         206                         }
207                         if (page_block == bloc    207                         if (page_block == blocks_per_page)
208                                 break;            208                                 break;
209                         page_block++;             209                         page_block++;
210                         block_in_file++;          210                         block_in_file++;
211                 }                                 211                 }
212                 bdev = map_bh->b_bdev;            212                 bdev = map_bh->b_bdev;
213         }                                         213         }
214                                                   214 
215         /*                                        215         /*
216          * Then do more get_blocks calls until    216          * Then do more get_blocks calls until we are done with this folio.
217          */                                       217          */
218         map_bh->b_folio = folio;                  218         map_bh->b_folio = folio;
219         while (page_block < blocks_per_page) {    219         while (page_block < blocks_per_page) {
220                 map_bh->b_state = 0;              220                 map_bh->b_state = 0;
221                 map_bh->b_size = 0;               221                 map_bh->b_size = 0;
222                                                   222 
223                 if (block_in_file < last_block    223                 if (block_in_file < last_block) {
224                         map_bh->b_size = (last    224                         map_bh->b_size = (last_block-block_in_file) << blkbits;
225                         if (args->get_block(in    225                         if (args->get_block(inode, block_in_file, map_bh, 0))
226                                 goto confused;    226                                 goto confused;
227                         args->first_logical_bl    227                         args->first_logical_block = block_in_file;
228                 }                                 228                 }
229                                                   229 
230                 if (!buffer_mapped(map_bh)) {     230                 if (!buffer_mapped(map_bh)) {
231                         fully_mapped = 0;         231                         fully_mapped = 0;
232                         if (first_hole == bloc    232                         if (first_hole == blocks_per_page)
233                                 first_hole = p    233                                 first_hole = page_block;
234                         page_block++;             234                         page_block++;
235                         block_in_file++;          235                         block_in_file++;
236                         continue;                 236                         continue;
237                 }                                 237                 }
238                                                   238 
239                 /* some filesystems will copy     239                 /* some filesystems will copy data into the page during
240                  * the get_block call, in whic    240                  * the get_block call, in which case we don't want to
241                  * read it again.  map_buffer_    241                  * read it again.  map_buffer_to_folio copies the data
242                  * we just collected from get_    242                  * we just collected from get_block into the folio's buffers
243                  * so read_folio doesn't have     243                  * so read_folio doesn't have to repeat the get_block call
244                  */                               244                  */
245                 if (buffer_uptodate(map_bh)) {    245                 if (buffer_uptodate(map_bh)) {
246                         map_buffer_to_folio(fo    246                         map_buffer_to_folio(folio, map_bh, page_block);
247                         goto confused;            247                         goto confused;
248                 }                                 248                 }
249                                                   249         
250                 if (first_hole != blocks_per_p    250                 if (first_hole != blocks_per_page)
251                         goto confused;            251                         goto confused;          /* hole -> non-hole */
252                                                   252 
253                 /* Contiguous blocks? */          253                 /* Contiguous blocks? */
254                 if (!page_block)                  254                 if (!page_block)
255                         first_block = map_bh->    255                         first_block = map_bh->b_blocknr;
256                 else if (first_block + page_bl    256                 else if (first_block + page_block != map_bh->b_blocknr)
257                         goto confused;            257                         goto confused;
258                 nblocks = map_bh->b_size >> bl    258                 nblocks = map_bh->b_size >> blkbits;
259                 for (relative_block = 0; ; rel    259                 for (relative_block = 0; ; relative_block++) {
260                         if (relative_block ==     260                         if (relative_block == nblocks) {
261                                 clear_buffer_m    261                                 clear_buffer_mapped(map_bh);
262                                 break;            262                                 break;
263                         } else if (page_block     263                         } else if (page_block == blocks_per_page)
264                                 break;            264                                 break;
265                         page_block++;             265                         page_block++;
266                         block_in_file++;          266                         block_in_file++;
267                 }                                 267                 }
268                 bdev = map_bh->b_bdev;            268                 bdev = map_bh->b_bdev;
269         }                                         269         }
270                                                   270 
271         if (first_hole != blocks_per_page) {      271         if (first_hole != blocks_per_page) {
272                 folio_zero_segment(folio, firs    272                 folio_zero_segment(folio, first_hole << blkbits, PAGE_SIZE);
273                 if (first_hole == 0) {            273                 if (first_hole == 0) {
274                         folio_mark_uptodate(fo    274                         folio_mark_uptodate(folio);
275                         folio_unlock(folio);      275                         folio_unlock(folio);
276                         goto out;                 276                         goto out;
277                 }                                 277                 }
278         } else if (fully_mapped) {                278         } else if (fully_mapped) {
279                 folio_set_mappedtodisk(folio);    279                 folio_set_mappedtodisk(folio);
280         }                                         280         }
281                                                   281 
282         /*                                        282         /*
283          * This folio will go to BIO.  Do we n    283          * This folio will go to BIO.  Do we need to send this BIO off first?
284          */                                       284          */
285         if (args->bio && (args->last_block_in_    285         if (args->bio && (args->last_block_in_bio != first_block - 1))
286                 args->bio = mpage_bio_submit_r    286                 args->bio = mpage_bio_submit_read(args->bio);
287                                                   287 
288 alloc_new:                                        288 alloc_new:
289         if (args->bio == NULL) {                  289         if (args->bio == NULL) {
290                 args->bio = bio_alloc(bdev, bi    290                 args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), opf,
291                                       gfp);       291                                       gfp);
292                 if (args->bio == NULL)            292                 if (args->bio == NULL)
293                         goto confused;            293                         goto confused;
294                 args->bio->bi_iter.bi_sector =    294                 args->bio->bi_iter.bi_sector = first_block << (blkbits - 9);
295         }                                         295         }
296                                                   296 
297         length = first_hole << blkbits;           297         length = first_hole << blkbits;
298         if (!bio_add_folio(args->bio, folio, l    298         if (!bio_add_folio(args->bio, folio, length, 0)) {
299                 args->bio = mpage_bio_submit_r    299                 args->bio = mpage_bio_submit_read(args->bio);
300                 goto alloc_new;                   300                 goto alloc_new;
301         }                                         301         }
302                                                   302 
303         relative_block = block_in_file - args-    303         relative_block = block_in_file - args->first_logical_block;
304         nblocks = map_bh->b_size >> blkbits;      304         nblocks = map_bh->b_size >> blkbits;
305         if ((buffer_boundary(map_bh) && relati    305         if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
306             (first_hole != blocks_per_page))      306             (first_hole != blocks_per_page))
307                 args->bio = mpage_bio_submit_r    307                 args->bio = mpage_bio_submit_read(args->bio);
308         else                                      308         else
309                 args->last_block_in_bio = firs    309                 args->last_block_in_bio = first_block + blocks_per_page - 1;
310 out:                                              310 out:
311         return args->bio;                         311         return args->bio;
312                                                   312 
313 confused:                                         313 confused:
314         if (args->bio)                            314         if (args->bio)
315                 args->bio = mpage_bio_submit_r    315                 args->bio = mpage_bio_submit_read(args->bio);
316         if (!folio_test_uptodate(folio))          316         if (!folio_test_uptodate(folio))
317                 block_read_full_folio(folio, a    317                 block_read_full_folio(folio, args->get_block);
318         else                                      318         else
319                 folio_unlock(folio);              319                 folio_unlock(folio);
320         goto out;                                 320         goto out;
321 }                                                 321 }
322                                                   322 
323 /**                                               323 /**
324  * mpage_readahead - start reads against pages    324  * mpage_readahead - start reads against pages
325  * @rac: Describes which pages to read.           325  * @rac: Describes which pages to read.
326  * @get_block: The filesystem's block mapper f    326  * @get_block: The filesystem's block mapper function.
327  *                                                327  *
328  * This function walks the pages and the block    328  * This function walks the pages and the blocks within each page, building and
329  * emitting large BIOs.                           329  * emitting large BIOs.
330  *                                                330  *
331  * If anything unusual happens, such as:          331  * If anything unusual happens, such as:
332  *                                                332  *
333  * - encountering a page which has buffers        333  * - encountering a page which has buffers
334  * - encountering a page which has a non-hole     334  * - encountering a page which has a non-hole after a hole
335  * - encountering a page with non-contiguous b    335  * - encountering a page with non-contiguous blocks
336  *                                                336  *
337  * then this code just gives up and calls the     337  * then this code just gives up and calls the buffer_head-based read function.
338  * It does handle a page which has holes at th    338  * It does handle a page which has holes at the end - that is a common case:
339  * the end-of-file on blocksize < PAGE_SIZE se    339  * the end-of-file on blocksize < PAGE_SIZE setups.
340  *                                                340  *
341  * BH_Boundary explanation:                       341  * BH_Boundary explanation:
342  *                                                342  *
343  * There is a problem.  The mpage read code as    343  * There is a problem.  The mpage read code assembles several pages, gets all
344  * their disk mappings, and then submits them     344  * their disk mappings, and then submits them all.  That's fine, but obtaining
345  * the disk mappings may require I/O.  Reads o    345  * the disk mappings may require I/O.  Reads of indirect blocks, for example.
346  *                                                346  *
347  * So an mpage read of the first 16 blocks of     347  * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
348  * submitted in the following order:              348  * submitted in the following order:
349  *                                                349  *
350  *      12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15     350  *      12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
351  *                                                351  *
352  * because the indirect block has to be read t    352  * because the indirect block has to be read to get the mappings of blocks
353  * 13,14,15,16.  Obviously, this impacts perfo    353  * 13,14,15,16.  Obviously, this impacts performance.
354  *                                                354  *
355  * So what we do it to allow the filesystem's     355  * So what we do it to allow the filesystem's get_block() function to set
356  * BH_Boundary when it maps block 11.  BH_Boun    356  * BH_Boundary when it maps block 11.  BH_Boundary says: mapping of the block
357  * after this one will require I/O against a b    357  * after this one will require I/O against a block which is probably close to
358  * this one.  So you should push what I/O you     358  * this one.  So you should push what I/O you have currently accumulated.
359  *                                                359  *
360  * This all causes the disk requests to be iss    360  * This all causes the disk requests to be issued in the correct order.
361  */                                               361  */
362 void mpage_readahead(struct readahead_control     362 void mpage_readahead(struct readahead_control *rac, get_block_t get_block)
363 {                                                 363 {
364         struct folio *folio;                      364         struct folio *folio;
365         struct mpage_readpage_args args = {       365         struct mpage_readpage_args args = {
366                 .get_block = get_block,           366                 .get_block = get_block,
367                 .is_readahead = true,             367                 .is_readahead = true,
368         };                                        368         };
369                                                   369 
370         while ((folio = readahead_folio(rac)))    370         while ((folio = readahead_folio(rac))) {
371                 prefetchw(&folio->flags);         371                 prefetchw(&folio->flags);
372                 args.folio = folio;               372                 args.folio = folio;
373                 args.nr_pages = readahead_coun    373                 args.nr_pages = readahead_count(rac);
374                 args.bio = do_mpage_readpage(&    374                 args.bio = do_mpage_readpage(&args);
375         }                                         375         }
376         if (args.bio)                             376         if (args.bio)
377                 mpage_bio_submit_read(args.bio    377                 mpage_bio_submit_read(args.bio);
378 }                                                 378 }
379 EXPORT_SYMBOL(mpage_readahead);                   379 EXPORT_SYMBOL(mpage_readahead);
380                                                   380 
381 /*                                                381 /*
382  * This isn't called much at all                  382  * This isn't called much at all
383  */                                               383  */
384 int mpage_read_folio(struct folio *folio, get_    384 int mpage_read_folio(struct folio *folio, get_block_t get_block)
385 {                                                 385 {
386         struct mpage_readpage_args args = {       386         struct mpage_readpage_args args = {
387                 .folio = folio,                   387                 .folio = folio,
388                 .nr_pages = 1,                    388                 .nr_pages = 1,
389                 .get_block = get_block,           389                 .get_block = get_block,
390         };                                        390         };
391                                                   391 
392         args.bio = do_mpage_readpage(&args);      392         args.bio = do_mpage_readpage(&args);
393         if (args.bio)                             393         if (args.bio)
394                 mpage_bio_submit_read(args.bio    394                 mpage_bio_submit_read(args.bio);
395         return 0;                                 395         return 0;
396 }                                                 396 }
397 EXPORT_SYMBOL(mpage_read_folio);                  397 EXPORT_SYMBOL(mpage_read_folio);
398                                                   398 
399 /*                                                399 /*
400  * Writing is not so simple.                      400  * Writing is not so simple.
401  *                                                401  *
402  * If the page has buffers then they will be u    402  * If the page has buffers then they will be used for obtaining the disk
403  * mapping.  We only support pages which are f    403  * mapping.  We only support pages which are fully mapped-and-dirty, with a
404  * special case for pages which are unmapped a    404  * special case for pages which are unmapped at the end: end-of-file.
405  *                                                405  *
406  * If the page has no buffers (preferred) then    406  * If the page has no buffers (preferred) then the page is mapped here.
407  *                                                407  *
408  * If all blocks are found to be contiguous th    408  * If all blocks are found to be contiguous then the page can go into the
409  * BIO.  Otherwise fall back to the mapping's     409  * BIO.  Otherwise fall back to the mapping's writepage().
410  *                                                410  * 
411  * FIXME: This code wants an estimate of how m    411  * FIXME: This code wants an estimate of how many pages are still to be
412  * written, so it can intelligently allocate a    412  * written, so it can intelligently allocate a suitably-sized BIO.  For now,
413  * just allocate full-size (16-page) BIOs.        413  * just allocate full-size (16-page) BIOs.
414  */                                               414  */
415                                                   415 
416 struct mpage_data {                               416 struct mpage_data {
417         struct bio *bio;                          417         struct bio *bio;
418         sector_t last_block_in_bio;               418         sector_t last_block_in_bio;
419         get_block_t *get_block;                   419         get_block_t *get_block;
420 };                                                420 };
421                                                   421 
422 /*                                                422 /*
423  * We have our BIO, so we can now mark the buf    423  * We have our BIO, so we can now mark the buffers clean.  Make
424  * sure to only clean buffers which we know we    424  * sure to only clean buffers which we know we'll be writing.
425  */                                               425  */
426 static void clean_buffers(struct folio *folio,    426 static void clean_buffers(struct folio *folio, unsigned first_unmapped)
427 {                                                 427 {
428         unsigned buffer_counter = 0;              428         unsigned buffer_counter = 0;
429         struct buffer_head *bh, *head = folio_    429         struct buffer_head *bh, *head = folio_buffers(folio);
430                                                   430 
431         if (!head)                                431         if (!head)
432                 return;                           432                 return;
433         bh = head;                                433         bh = head;
434                                                   434 
435         do {                                      435         do {
436                 if (buffer_counter++ == first_    436                 if (buffer_counter++ == first_unmapped)
437                         break;                    437                         break;
438                 clear_buffer_dirty(bh);           438                 clear_buffer_dirty(bh);
439                 bh = bh->b_this_page;             439                 bh = bh->b_this_page;
440         } while (bh != head);                     440         } while (bh != head);
441                                                   441 
442         /*                                        442         /*
443          * we cannot drop the bh if the page i    443          * we cannot drop the bh if the page is not uptodate or a concurrent
444          * read_folio would fail to serialize     444          * read_folio would fail to serialize with the bh and it would read from
445          * disk before we reach the platter.      445          * disk before we reach the platter.
446          */                                       446          */
447         if (buffer_heads_over_limit && folio_t    447         if (buffer_heads_over_limit && folio_test_uptodate(folio))
448                 try_to_free_buffers(folio);       448                 try_to_free_buffers(folio);
449 }                                                 449 }
450                                                   450 
451 static int __mpage_writepage(struct folio *fol    451 static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
452                       void *data)                 452                       void *data)
453 {                                                 453 {
454         struct mpage_data *mpd = data;            454         struct mpage_data *mpd = data;
455         struct bio *bio = mpd->bio;               455         struct bio *bio = mpd->bio;
456         struct address_space *mapping = folio-    456         struct address_space *mapping = folio->mapping;
457         struct inode *inode = mapping->host;      457         struct inode *inode = mapping->host;
458         const unsigned blkbits = inode->i_blkb    458         const unsigned blkbits = inode->i_blkbits;
459         const unsigned blocks_per_page = PAGE_    459         const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
460         sector_t last_block;                      460         sector_t last_block;
461         sector_t block_in_file;                   461         sector_t block_in_file;
462         sector_t first_block;                     462         sector_t first_block;
463         unsigned page_block;                      463         unsigned page_block;
464         unsigned first_unmapped = blocks_per_p    464         unsigned first_unmapped = blocks_per_page;
465         struct block_device *bdev = NULL;         465         struct block_device *bdev = NULL;
466         int boundary = 0;                         466         int boundary = 0;
467         sector_t boundary_block = 0;              467         sector_t boundary_block = 0;
468         struct block_device *boundary_bdev = N    468         struct block_device *boundary_bdev = NULL;
469         size_t length;                            469         size_t length;
470         struct buffer_head map_bh;                470         struct buffer_head map_bh;
471         loff_t i_size = i_size_read(inode);       471         loff_t i_size = i_size_read(inode);
472         int ret = 0;                              472         int ret = 0;
473         struct buffer_head *head = folio_buffe    473         struct buffer_head *head = folio_buffers(folio);
474                                                   474 
475         if (head) {                               475         if (head) {
476                 struct buffer_head *bh = head;    476                 struct buffer_head *bh = head;
477                                                   477 
478                 /* If they're all mapped and d    478                 /* If they're all mapped and dirty, do it */
479                 page_block = 0;                   479                 page_block = 0;
480                 do {                              480                 do {
481                         BUG_ON(buffer_locked(b    481                         BUG_ON(buffer_locked(bh));
482                         if (!buffer_mapped(bh)    482                         if (!buffer_mapped(bh)) {
483                                 /*                483                                 /*
484                                  * unmapped di    484                                  * unmapped dirty buffers are created by
485                                  * block_dirty    485                                  * block_dirty_folio -> mmapped data
486                                  */               486                                  */
487                                 if (buffer_dir    487                                 if (buffer_dirty(bh))
488                                         goto c    488                                         goto confused;
489                                 if (first_unma    489                                 if (first_unmapped == blocks_per_page)
490                                         first_    490                                         first_unmapped = page_block;
491                                 continue;         491                                 continue;
492                         }                         492                         }
493                                                   493 
494                         if (first_unmapped !=     494                         if (first_unmapped != blocks_per_page)
495                                 goto confused;    495                                 goto confused;  /* hole -> non-hole */
496                                                   496 
497                         if (!buffer_dirty(bh)     497                         if (!buffer_dirty(bh) || !buffer_uptodate(bh))
498                                 goto confused;    498                                 goto confused;
499                         if (page_block) {         499                         if (page_block) {
500                                 if (bh->b_bloc    500                                 if (bh->b_blocknr != first_block + page_block)
501                                         goto c    501                                         goto confused;
502                         } else {                  502                         } else {
503                                 first_block =     503                                 first_block = bh->b_blocknr;
504                         }                         504                         }
505                         page_block++;             505                         page_block++;
506                         boundary = buffer_boun    506                         boundary = buffer_boundary(bh);
507                         if (boundary) {           507                         if (boundary) {
508                                 boundary_block    508                                 boundary_block = bh->b_blocknr;
509                                 boundary_bdev     509                                 boundary_bdev = bh->b_bdev;
510                         }                         510                         }
511                         bdev = bh->b_bdev;        511                         bdev = bh->b_bdev;
512                 } while ((bh = bh->b_this_page    512                 } while ((bh = bh->b_this_page) != head);
513                                                   513 
514                 if (first_unmapped)               514                 if (first_unmapped)
515                         goto page_is_mapped;      515                         goto page_is_mapped;
516                                                   516 
517                 /*                                517                 /*
518                  * Page has buffers, but they     518                  * Page has buffers, but they are all unmapped. The page was
519                  * created by pagein or read o    519                  * created by pagein or read over a hole which was handled by
520                  * block_read_full_folio().  I    520                  * block_read_full_folio().  If this address_space is also
521                  * using mpage_readahead then     521                  * using mpage_readahead then this can rarely happen.
522                  */                               522                  */
523                 goto confused;                    523                 goto confused;
524         }                                         524         }
525                                                   525 
526         /*                                        526         /*
527          * The page has no buffers: map it to     527          * The page has no buffers: map it to disk
528          */                                       528          */
529         BUG_ON(!folio_test_uptodate(folio));      529         BUG_ON(!folio_test_uptodate(folio));
530         block_in_file = (sector_t)folio->index    530         block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits);
531         /*                                        531         /*
532          * Whole page beyond EOF? Skip allocat    532          * Whole page beyond EOF? Skip allocating blocks to avoid leaking
533          * space.                                 533          * space.
534          */                                       534          */
535         if (block_in_file >= (i_size + (1 << b    535         if (block_in_file >= (i_size + (1 << blkbits) - 1) >> blkbits)
536                 goto page_is_mapped;              536                 goto page_is_mapped;
537         last_block = (i_size - 1) >> blkbits;     537         last_block = (i_size - 1) >> blkbits;
538         map_bh.b_folio = folio;                   538         map_bh.b_folio = folio;
539         for (page_block = 0; page_block < bloc    539         for (page_block = 0; page_block < blocks_per_page; ) {
540                                                   540 
541                 map_bh.b_state = 0;               541                 map_bh.b_state = 0;
542                 map_bh.b_size = 1 << blkbits;     542                 map_bh.b_size = 1 << blkbits;
543                 if (mpd->get_block(inode, bloc    543                 if (mpd->get_block(inode, block_in_file, &map_bh, 1))
544                         goto confused;            544                         goto confused;
545                 if (!buffer_mapped(&map_bh))      545                 if (!buffer_mapped(&map_bh))
546                         goto confused;            546                         goto confused;
547                 if (buffer_new(&map_bh))          547                 if (buffer_new(&map_bh))
548                         clean_bdev_bh_alias(&m    548                         clean_bdev_bh_alias(&map_bh);
549                 if (buffer_boundary(&map_bh))     549                 if (buffer_boundary(&map_bh)) {
550                         boundary_block = map_b    550                         boundary_block = map_bh.b_blocknr;
551                         boundary_bdev = map_bh    551                         boundary_bdev = map_bh.b_bdev;
552                 }                                 552                 }
553                 if (page_block) {                 553                 if (page_block) {
554                         if (map_bh.b_blocknr !    554                         if (map_bh.b_blocknr != first_block + page_block)
555                                 goto confused;    555                                 goto confused;
556                 } else {                          556                 } else {
557                         first_block = map_bh.b    557                         first_block = map_bh.b_blocknr;
558                 }                                 558                 }
559                 page_block++;                     559                 page_block++;
560                 boundary = buffer_boundary(&ma    560                 boundary = buffer_boundary(&map_bh);
561                 bdev = map_bh.b_bdev;             561                 bdev = map_bh.b_bdev;
562                 if (block_in_file == last_bloc    562                 if (block_in_file == last_block)
563                         break;                    563                         break;
564                 block_in_file++;                  564                 block_in_file++;
565         }                                         565         }
566         BUG_ON(page_block == 0);                  566         BUG_ON(page_block == 0);
567                                                   567 
568         first_unmapped = page_block;              568         first_unmapped = page_block;
569                                                   569 
570 page_is_mapped:                                   570 page_is_mapped:
571         /* Don't bother writing beyond EOF, tr    571         /* Don't bother writing beyond EOF, truncate will discard the folio */
572         if (folio_pos(folio) >= i_size)           572         if (folio_pos(folio) >= i_size)
573                 goto confused;                    573                 goto confused;
574         length = folio_size(folio);               574         length = folio_size(folio);
575         if (folio_pos(folio) + length > i_size    575         if (folio_pos(folio) + length > i_size) {
576                 /*                                576                 /*
577                  * The page straddles i_size.     577                  * The page straddles i_size.  It must be zeroed out on each
578                  * and every writepage invocat    578                  * and every writepage invocation because it may be mmapped.
579                  * "A file is mapped in multip    579                  * "A file is mapped in multiples of the page size.  For a file
580                  * that is not a multiple of t    580                  * that is not a multiple of the page size, the remaining memory
581                  * is zeroed when mapped, and     581                  * is zeroed when mapped, and writes to that region are not
582                  * written out to the file."      582                  * written out to the file."
583                  */                               583                  */
584                 length = i_size - folio_pos(fo    584                 length = i_size - folio_pos(folio);
585                 folio_zero_segment(folio, leng    585                 folio_zero_segment(folio, length, folio_size(folio));
586         }                                         586         }
587                                                   587 
588         /*                                        588         /*
589          * This page will go to BIO.  Do we ne    589          * This page will go to BIO.  Do we need to send this BIO off first?
590          */                                       590          */
591         if (bio && mpd->last_block_in_bio != f    591         if (bio && mpd->last_block_in_bio != first_block - 1)
592                 bio = mpage_bio_submit_write(b    592                 bio = mpage_bio_submit_write(bio);
593                                                   593 
594 alloc_new:                                        594 alloc_new:
595         if (bio == NULL) {                        595         if (bio == NULL) {
596                 bio = bio_alloc(bdev, BIO_MAX_    596                 bio = bio_alloc(bdev, BIO_MAX_VECS,
597                                 REQ_OP_WRITE |    597                                 REQ_OP_WRITE | wbc_to_write_flags(wbc),
598                                 GFP_NOFS);        598                                 GFP_NOFS);
599                 bio->bi_iter.bi_sector = first    599                 bio->bi_iter.bi_sector = first_block << (blkbits - 9);
600                 wbc_init_bio(wbc, bio);           600                 wbc_init_bio(wbc, bio);
601                 bio->bi_write_hint = inode->i_    601                 bio->bi_write_hint = inode->i_write_hint;
602         }                                         602         }
603                                                   603 
604         /*                                        604         /*
605          * Must try to add the page before mar    605          * Must try to add the page before marking the buffer clean or
606          * the confused fail path above (OOM)     606          * the confused fail path above (OOM) will be very confused when
607          * it finds all bh marked clean (i.e.     607          * it finds all bh marked clean (i.e. it will not write anything)
608          */                                       608          */
609         wbc_account_cgroup_owner(wbc, &folio->    609         wbc_account_cgroup_owner(wbc, &folio->page, folio_size(folio));
610         length = first_unmapped << blkbits;       610         length = first_unmapped << blkbits;
611         if (!bio_add_folio(bio, folio, length,    611         if (!bio_add_folio(bio, folio, length, 0)) {
612                 bio = mpage_bio_submit_write(b    612                 bio = mpage_bio_submit_write(bio);
613                 goto alloc_new;                   613                 goto alloc_new;
614         }                                         614         }
615                                                   615 
616         clean_buffers(folio, first_unmapped);     616         clean_buffers(folio, first_unmapped);
617                                                   617 
618         BUG_ON(folio_test_writeback(folio));      618         BUG_ON(folio_test_writeback(folio));
619         folio_start_writeback(folio);             619         folio_start_writeback(folio);
620         folio_unlock(folio);                      620         folio_unlock(folio);
621         if (boundary || (first_unmapped != blo    621         if (boundary || (first_unmapped != blocks_per_page)) {
622                 bio = mpage_bio_submit_write(b    622                 bio = mpage_bio_submit_write(bio);
623                 if (boundary_block) {             623                 if (boundary_block) {
624                         write_boundary_block(b    624                         write_boundary_block(boundary_bdev,
625                                         bounda    625                                         boundary_block, 1 << blkbits);
626                 }                                 626                 }
627         } else {                                  627         } else {
628                 mpd->last_block_in_bio = first    628                 mpd->last_block_in_bio = first_block + blocks_per_page - 1;
629         }                                         629         }
630         goto out;                                 630         goto out;
631                                                   631 
632 confused:                                         632 confused:
633         if (bio)                                  633         if (bio)
634                 bio = mpage_bio_submit_write(b    634                 bio = mpage_bio_submit_write(bio);
635                                                   635 
636         /*                                        636         /*
637          * The caller has a ref on the inode,     637          * The caller has a ref on the inode, so *mapping is stable
638          */                                       638          */
639         ret = block_write_full_folio(folio, wb    639         ret = block_write_full_folio(folio, wbc, mpd->get_block);
640         mapping_set_error(mapping, ret);          640         mapping_set_error(mapping, ret);
641 out:                                              641 out:
642         mpd->bio = bio;                           642         mpd->bio = bio;
643         return ret;                               643         return ret;
644 }                                                 644 }
645                                                   645 
646 /**                                               646 /**
647  * mpage_writepages - walk the list of dirty p    647  * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
648  * @mapping: address space structure to write     648  * @mapping: address space structure to write
649  * @wbc: subtract the number of written pages     649  * @wbc: subtract the number of written pages from *@wbc->nr_to_write
650  * @get_block: the filesystem's block mapper f    650  * @get_block: the filesystem's block mapper function.
651  *                                                651  *
652  * This is a library function, which implement    652  * This is a library function, which implements the writepages()
653  * address_space_operation.                       653  * address_space_operation.
654  */                                               654  */
655 int                                               655 int
656 mpage_writepages(struct address_space *mapping    656 mpage_writepages(struct address_space *mapping,
657                 struct writeback_control *wbc,    657                 struct writeback_control *wbc, get_block_t get_block)
658 {                                                 658 {
659         struct mpage_data mpd = {                 659         struct mpage_data mpd = {
660                 .get_block      = get_block,      660                 .get_block      = get_block,
661         };                                        661         };
662         struct blk_plug plug;                     662         struct blk_plug plug;
663         int ret;                                  663         int ret;
664                                                   664 
665         blk_start_plug(&plug);                    665         blk_start_plug(&plug);
666         ret = write_cache_pages(mapping, wbc,     666         ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
667         if (mpd.bio)                              667         if (mpd.bio)
668                 mpage_bio_submit_write(mpd.bio    668                 mpage_bio_submit_write(mpd.bio);
669         blk_finish_plug(&plug);                   669         blk_finish_plug(&plug);
670         return ret;                               670         return ret;
671 }                                                 671 }
672 EXPORT_SYMBOL(mpage_writepages);                  672 EXPORT_SYMBOL(mpage_writepages);
673                                                   673 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php