1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Squashfs - a compressed read only filesystem for Linux 4 * 5 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 6 * Phillip Lougher <phillip@squashfs.org.uk> 7 * 8 * file.c 9 */ 10 11 /* 12 * This file contains code for handling regular files. A regular file 13 * consists of a sequence of contiguous compressed blocks, and/or a 14 * compressed fragment block (tail-end packed block). The compressed size 15 * of each datablock is stored in a block list contained within the 16 * file inode (itself stored in one or more compressed metadata blocks). 17 * 18 * To speed up access to datablocks when reading 'large' files (256 Mbytes or 19 * larger), the code implements an index cache that caches the mapping from 20 * block index to datablock location on disk. 21 * 22 * The index cache allows Squashfs to handle large files (up to 1.75 TiB) while 23 * retaining a simple and space-efficient block list on disk. The cache 24 * is split into slots, caching up to eight 224 GiB files (128 KiB blocks). 25 * Larger files use multiple slots, with 1.75 TiB files using all 8 slots. 26 * The index cache is designed to be memory efficient, and by default uses 27 * 16 KiB. 28 */ 29 30 #include <linux/fs.h> 31 #include <linux/vfs.h> 32 #include <linux/kernel.h> 33 #include <linux/slab.h> 34 #include <linux/string.h> 35 #include <linux/pagemap.h> 36 #include <linux/mutex.h> 37 38 #include "squashfs_fs.h" 39 #include "squashfs_fs_sb.h" 40 #include "squashfs_fs_i.h" 41 #include "squashfs.h" 42 #include "page_actor.h" 43 44 /* 45 * Locate cache slot in range [offset, index] for specified inode. If 46 * there's more than one return the slot closest to index. 47 */ 48 static struct meta_index *locate_meta_index(struct inode *inode, int offset, 49 int index) 50 { 51 struct meta_index *meta = NULL; 52 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 53 int i; 54 55 mutex_lock(&msblk->meta_index_mutex); 56 57 TRACE("locate_meta_index: index %d, offset %d\n", index, offset); 58 59 if (msblk->meta_index == NULL) 60 goto not_allocated; 61 62 for (i = 0; i < SQUASHFS_META_SLOTS; i++) { 63 if (msblk->meta_index[i].inode_number == inode->i_ino && 64 msblk->meta_index[i].offset >= offset && 65 msblk->meta_index[i].offset <= index && 66 msblk->meta_index[i].locked == 0) { 67 TRACE("locate_meta_index: entry %d, offset %d\n", i, 68 msblk->meta_index[i].offset); 69 meta = &msblk->meta_index[i]; 70 offset = meta->offset; 71 } 72 } 73 74 if (meta) 75 meta->locked = 1; 76 77 not_allocated: 78 mutex_unlock(&msblk->meta_index_mutex); 79 80 return meta; 81 } 82 83 84 /* 85 * Find and initialise an empty cache slot for index offset. 86 */ 87 static struct meta_index *empty_meta_index(struct inode *inode, int offset, 88 int skip) 89 { 90 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 91 struct meta_index *meta = NULL; 92 int i; 93 94 mutex_lock(&msblk->meta_index_mutex); 95 96 TRACE("empty_meta_index: offset %d, skip %d\n", offset, skip); 97 98 if (msblk->meta_index == NULL) { 99 /* 100 * First time cache index has been used, allocate and 101 * initialise. The cache index could be allocated at 102 * mount time but doing it here means it is allocated only 103 * if a 'large' file is read. 104 */ 105 msblk->meta_index = kcalloc(SQUASHFS_META_SLOTS, 106 sizeof(*(msblk->meta_index)), GFP_KERNEL); 107 if (msblk->meta_index == NULL) { 108 ERROR("Failed to allocate meta_index\n"); 109 goto failed; 110 } 111 for (i = 0; i < SQUASHFS_META_SLOTS; i++) { 112 msblk->meta_index[i].inode_number = 0; 113 msblk->meta_index[i].locked = 0; 114 } 115 msblk->next_meta_index = 0; 116 } 117 118 for (i = SQUASHFS_META_SLOTS; i && 119 msblk->meta_index[msblk->next_meta_index].locked; i--) 120 msblk->next_meta_index = (msblk->next_meta_index + 1) % 121 SQUASHFS_META_SLOTS; 122 123 if (i == 0) { 124 TRACE("empty_meta_index: failed!\n"); 125 goto failed; 126 } 127 128 TRACE("empty_meta_index: returned meta entry %d, %p\n", 129 msblk->next_meta_index, 130 &msblk->meta_index[msblk->next_meta_index]); 131 132 meta = &msblk->meta_index[msblk->next_meta_index]; 133 msblk->next_meta_index = (msblk->next_meta_index + 1) % 134 SQUASHFS_META_SLOTS; 135 136 meta->inode_number = inode->i_ino; 137 meta->offset = offset; 138 meta->skip = skip; 139 meta->entries = 0; 140 meta->locked = 1; 141 142 failed: 143 mutex_unlock(&msblk->meta_index_mutex); 144 return meta; 145 } 146 147 148 static void release_meta_index(struct inode *inode, struct meta_index *meta) 149 { 150 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 151 mutex_lock(&msblk->meta_index_mutex); 152 meta->locked = 0; 153 mutex_unlock(&msblk->meta_index_mutex); 154 } 155 156 157 /* 158 * Read the next n blocks from the block list, starting from 159 * metadata block <start_block, offset>. 160 */ 161 static long long read_indexes(struct super_block *sb, int n, 162 u64 *start_block, int *offset) 163 { 164 int err, i; 165 long long block = 0; 166 __le32 *blist = kmalloc(PAGE_SIZE, GFP_KERNEL); 167 168 if (blist == NULL) { 169 ERROR("read_indexes: Failed to allocate block_list\n"); 170 return -ENOMEM; 171 } 172 173 while (n) { 174 int blocks = min_t(int, n, PAGE_SIZE >> 2); 175 176 err = squashfs_read_metadata(sb, blist, start_block, 177 offset, blocks << 2); 178 if (err < 0) { 179 ERROR("read_indexes: reading block [%llx:%x]\n", 180 *start_block, *offset); 181 goto failure; 182 } 183 184 for (i = 0; i < blocks; i++) { 185 int size = squashfs_block_size(blist[i]); 186 if (size < 0) { 187 err = size; 188 goto failure; 189 } 190 block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size); 191 } 192 n -= blocks; 193 } 194 195 kfree(blist); 196 return block; 197 198 failure: 199 kfree(blist); 200 return err; 201 } 202 203 204 /* 205 * Each cache index slot has SQUASHFS_META_ENTRIES, each of which 206 * can cache one index -> datablock/blocklist-block mapping. We wish 207 * to distribute these over the length of the file, entry[0] maps index x, 208 * entry[1] maps index x + skip, entry[2] maps index x + 2 * skip, and so on. 209 * The larger the file, the greater the skip factor. The skip factor is 210 * limited to the size of the metadata cache (SQUASHFS_CACHED_BLKS) to ensure 211 * the number of metadata blocks that need to be read fits into the cache. 212 * If the skip factor is limited in this way then the file will use multiple 213 * slots. 214 */ 215 static inline int calculate_skip(u64 blocks) 216 { 217 u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1) 218 * SQUASHFS_META_INDEXES); 219 return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1); 220 } 221 222 223 /* 224 * Search and grow the index cache for the specified inode, returning the 225 * on-disk locations of the datablock and block list metadata block 226 * <index_block, index_offset> for index (scaled to nearest cache index). 227 */ 228 static int fill_meta_index(struct inode *inode, int index, 229 u64 *index_block, int *index_offset, u64 *data_block) 230 { 231 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 232 int skip = calculate_skip(i_size_read(inode) >> msblk->block_log); 233 int offset = 0; 234 struct meta_index *meta; 235 struct meta_entry *meta_entry; 236 u64 cur_index_block = squashfs_i(inode)->block_list_start; 237 int cur_offset = squashfs_i(inode)->offset; 238 u64 cur_data_block = squashfs_i(inode)->start; 239 int err, i; 240 241 /* 242 * Scale index to cache index (cache slot entry) 243 */ 244 index /= SQUASHFS_META_INDEXES * skip; 245 246 while (offset < index) { 247 meta = locate_meta_index(inode, offset + 1, index); 248 249 if (meta == NULL) { 250 meta = empty_meta_index(inode, offset + 1, skip); 251 if (meta == NULL) 252 goto all_done; 253 } else { 254 offset = index < meta->offset + meta->entries ? index : 255 meta->offset + meta->entries - 1; 256 meta_entry = &meta->meta_entry[offset - meta->offset]; 257 cur_index_block = meta_entry->index_block + 258 msblk->inode_table; 259 cur_offset = meta_entry->offset; 260 cur_data_block = meta_entry->data_block; 261 TRACE("get_meta_index: offset %d, meta->offset %d, " 262 "meta->entries %d\n", offset, meta->offset, 263 meta->entries); 264 TRACE("get_meta_index: index_block 0x%llx, offset 0x%x" 265 " data_block 0x%llx\n", cur_index_block, 266 cur_offset, cur_data_block); 267 } 268 269 /* 270 * If necessary grow cache slot by reading block list. Cache 271 * slot is extended up to index or to the end of the slot, in 272 * which case further slots will be used. 273 */ 274 for (i = meta->offset + meta->entries; i <= index && 275 i < meta->offset + SQUASHFS_META_ENTRIES; i++) { 276 int blocks = skip * SQUASHFS_META_INDEXES; 277 long long res = read_indexes(inode->i_sb, blocks, 278 &cur_index_block, &cur_offset); 279 280 if (res < 0) { 281 if (meta->entries == 0) 282 /* 283 * Don't leave an empty slot on read 284 * error allocated to this inode... 285 */ 286 meta->inode_number = 0; 287 err = res; 288 goto failed; 289 } 290 291 cur_data_block += res; 292 meta_entry = &meta->meta_entry[i - meta->offset]; 293 meta_entry->index_block = cur_index_block - 294 msblk->inode_table; 295 meta_entry->offset = cur_offset; 296 meta_entry->data_block = cur_data_block; 297 meta->entries++; 298 offset++; 299 } 300 301 TRACE("get_meta_index: meta->offset %d, meta->entries %d\n", 302 meta->offset, meta->entries); 303 304 release_meta_index(inode, meta); 305 } 306 307 all_done: 308 *index_block = cur_index_block; 309 *index_offset = cur_offset; 310 *data_block = cur_data_block; 311 312 /* 313 * Scale cache index (cache slot entry) to index 314 */ 315 return offset * SQUASHFS_META_INDEXES * skip; 316 317 failed: 318 release_meta_index(inode, meta); 319 return err; 320 } 321 322 323 /* 324 * Get the on-disk location and compressed size of the datablock 325 * specified by index. Fill_meta_index() does most of the work. 326 */ 327 static int read_blocklist(struct inode *inode, int index, u64 *block) 328 { 329 u64 start; 330 long long blks; 331 int offset; 332 __le32 size; 333 int res = fill_meta_index(inode, index, &start, &offset, block); 334 335 TRACE("read_blocklist: res %d, index %d, start 0x%llx, offset" 336 " 0x%x, block 0x%llx\n", res, index, start, offset, 337 *block); 338 339 if (res < 0) 340 return res; 341 342 /* 343 * res contains the index of the mapping returned by fill_meta_index(), 344 * this will likely be less than the desired index (because the 345 * meta_index cache works at a higher granularity). Read any 346 * extra block indexes needed. 347 */ 348 if (res < index) { 349 blks = read_indexes(inode->i_sb, index - res, &start, &offset); 350 if (blks < 0) 351 return (int) blks; 352 *block += blks; 353 } 354 355 /* 356 * Read length of block specified by index. 357 */ 358 res = squashfs_read_metadata(inode->i_sb, &size, &start, &offset, 359 sizeof(size)); 360 if (res < 0) 361 return res; 362 return squashfs_block_size(size); 363 } 364 365 void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail) 366 { 367 int copied; 368 void *pageaddr; 369 370 pageaddr = kmap_atomic(page); 371 copied = squashfs_copy_data(pageaddr, buffer, offset, avail); 372 memset(pageaddr + copied, 0, PAGE_SIZE - copied); 373 kunmap_atomic(pageaddr); 374 375 flush_dcache_page(page); 376 if (copied == avail) 377 SetPageUptodate(page); 378 } 379 380 /* Copy data into page cache */ 381 void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, 382 int bytes, int offset) 383 { 384 struct inode *inode = page->mapping->host; 385 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 386 int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; 387 int start_index = page->index & ~mask, end_index = start_index | mask; 388 389 /* 390 * Loop copying datablock into pages. As the datablock likely covers 391 * many PAGE_SIZE pages (default block size is 128 KiB) explicitly 392 * grab the pages from the page cache, except for the page that we've 393 * been called to fill. 394 */ 395 for (i = start_index; i <= end_index && bytes > 0; i++, 396 bytes -= PAGE_SIZE, offset += PAGE_SIZE) { 397 struct page *push_page; 398 int avail = buffer ? min_t(int, bytes, PAGE_SIZE) : 0; 399 400 TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); 401 402 push_page = (i == page->index) ? page : 403 grab_cache_page_nowait(page->mapping, i); 404 405 if (!push_page) 406 continue; 407 408 if (PageUptodate(push_page)) 409 goto skip_page; 410 411 squashfs_fill_page(push_page, buffer, offset, avail); 412 skip_page: 413 unlock_page(push_page); 414 if (i != page->index) 415 put_page(push_page); 416 } 417 } 418 419 /* Read datablock stored packed inside a fragment (tail-end packed block) */ 420 static int squashfs_readpage_fragment(struct page *page, int expected) 421 { 422 struct inode *inode = page->mapping->host; 423 struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, 424 squashfs_i(inode)->fragment_block, 425 squashfs_i(inode)->fragment_size); 426 int res = buffer->error; 427 428 if (res) 429 ERROR("Unable to read page, block %llx, size %x\n", 430 squashfs_i(inode)->fragment_block, 431 squashfs_i(inode)->fragment_size); 432 else 433 squashfs_copy_cache(page, buffer, expected, 434 squashfs_i(inode)->fragment_offset); 435 436 squashfs_cache_put(buffer); 437 return res; 438 } 439 440 static int squashfs_readpage_sparse(struct page *page, int expected) 441 { 442 squashfs_copy_cache(page, NULL, expected, 0); 443 return 0; 444 } 445 446 static int squashfs_read_folio(struct file *file, struct folio *folio) 447 { 448 struct page *page = &folio->page; 449 struct inode *inode = page->mapping->host; 450 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 451 int index = page->index >> (msblk->block_log - PAGE_SHIFT); 452 int file_end = i_size_read(inode) >> msblk->block_log; 453 int expected = index == file_end ? 454 (i_size_read(inode) & (msblk->block_size - 1)) : 455 msblk->block_size; 456 int res = 0; 457 void *pageaddr; 458 459 TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", 460 page->index, squashfs_i(inode)->start); 461 462 if (page->index >= ((i_size_read(inode) + PAGE_SIZE - 1) >> 463 PAGE_SHIFT)) 464 goto out; 465 466 if (index < file_end || squashfs_i(inode)->fragment_block == 467 SQUASHFS_INVALID_BLK) { 468 u64 block = 0; 469 470 res = read_blocklist(inode, index, &block); 471 if (res < 0) 472 goto out; 473 474 if (res == 0) 475 res = squashfs_readpage_sparse(page, expected); 476 else 477 res = squashfs_readpage_block(page, block, res, expected); 478 } else 479 res = squashfs_readpage_fragment(page, expected); 480 481 if (!res) 482 return 0; 483 484 out: 485 pageaddr = kmap_atomic(page); 486 memset(pageaddr, 0, PAGE_SIZE); 487 kunmap_atomic(pageaddr); 488 flush_dcache_page(page); 489 if (res == 0) 490 SetPageUptodate(page); 491 unlock_page(page); 492 493 return res; 494 } 495 496 static int squashfs_readahead_fragment(struct page **page, 497 unsigned int pages, unsigned int expected) 498 { 499 struct inode *inode = page[0]->mapping->host; 500 struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, 501 squashfs_i(inode)->fragment_block, 502 squashfs_i(inode)->fragment_size); 503 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 504 unsigned int n, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; 505 int error = buffer->error; 506 507 if (error) 508 goto out; 509 510 expected += squashfs_i(inode)->fragment_offset; 511 512 for (n = 0; n < pages; n++) { 513 unsigned int base = (page[n]->index & mask) << PAGE_SHIFT; 514 unsigned int offset = base + squashfs_i(inode)->fragment_offset; 515 516 if (expected > offset) { 517 unsigned int avail = min_t(unsigned int, expected - 518 offset, PAGE_SIZE); 519 520 squashfs_fill_page(page[n], buffer, offset, avail); 521 } 522 523 unlock_page(page[n]); 524 put_page(page[n]); 525 } 526 527 out: 528 squashfs_cache_put(buffer); 529 return error; 530 } 531 532 static void squashfs_readahead(struct readahead_control *ractl) 533 { 534 struct inode *inode = ractl->mapping->host; 535 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 536 size_t mask = (1UL << msblk->block_log) - 1; 537 unsigned short shift = msblk->block_log - PAGE_SHIFT; 538 loff_t start = readahead_pos(ractl) & ~mask; 539 size_t len = readahead_length(ractl) + readahead_pos(ractl) - start; 540 struct squashfs_page_actor *actor; 541 unsigned int nr_pages = 0; 542 struct page **pages; 543 int i; 544 loff_t file_end = i_size_read(inode) >> msblk->block_log; 545 unsigned int max_pages = 1UL << shift; 546 547 readahead_expand(ractl, start, (len | mask) + 1); 548 549 pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL); 550 if (!pages) 551 return; 552 553 for (;;) { 554 pgoff_t index; 555 int res, bsize; 556 u64 block = 0; 557 unsigned int expected; 558 struct page *last_page; 559 560 expected = start >> msblk->block_log == file_end ? 561 (i_size_read(inode) & (msblk->block_size - 1)) : 562 msblk->block_size; 563 564 max_pages = (expected + PAGE_SIZE - 1) >> PAGE_SHIFT; 565 566 nr_pages = __readahead_batch(ractl, pages, max_pages); 567 if (!nr_pages) 568 break; 569 570 if (readahead_pos(ractl) >= i_size_read(inode)) 571 goto skip_pages; 572 573 index = pages[0]->index >> shift; 574 575 if ((pages[nr_pages - 1]->index >> shift) != index) 576 goto skip_pages; 577 578 if (index == file_end && squashfs_i(inode)->fragment_block != 579 SQUASHFS_INVALID_BLK) { 580 res = squashfs_readahead_fragment(pages, nr_pages, 581 expected); 582 if (res) 583 goto skip_pages; 584 continue; 585 } 586 587 bsize = read_blocklist(inode, index, &block); 588 if (bsize == 0) 589 goto skip_pages; 590 591 actor = squashfs_page_actor_init_special(msblk, pages, nr_pages, 592 expected); 593 if (!actor) 594 goto skip_pages; 595 596 res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); 597 598 last_page = squashfs_page_actor_free(actor); 599 600 if (res == expected) { 601 int bytes; 602 603 /* Last page (if present) may have trailing bytes not filled */ 604 bytes = res % PAGE_SIZE; 605 if (index == file_end && bytes && last_page) 606 memzero_page(last_page, bytes, 607 PAGE_SIZE - bytes); 608 609 for (i = 0; i < nr_pages; i++) { 610 flush_dcache_page(pages[i]); 611 SetPageUptodate(pages[i]); 612 } 613 } 614 615 for (i = 0; i < nr_pages; i++) { 616 unlock_page(pages[i]); 617 put_page(pages[i]); 618 } 619 } 620 621 kfree(pages); 622 return; 623 624 skip_pages: 625 for (i = 0; i < nr_pages; i++) { 626 unlock_page(pages[i]); 627 put_page(pages[i]); 628 } 629 kfree(pages); 630 } 631 632 const struct address_space_operations squashfs_aops = { 633 .read_folio = squashfs_read_folio, 634 .readahead = squashfs_readahead 635 }; 636
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.