1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * This file is part of UBIFS. 4 * 5 * Copyright (C) 2006-2008 Nokia Corporation. 6 * 7 * Authors: Adrian Hunter 8 * Artem Bityutskiy (Битюцкий Артём) 9 */ 10 11 /* 12 * This file contains journal replay code. It runs when the file-system is being 13 * mounted and requires no locking. 14 * 15 * The larger is the journal, the longer it takes to scan it, so the longer it 16 * takes to mount UBIFS. This is why the journal has limited size which may be 17 * changed depending on the system requirements. But a larger journal gives 18 * faster I/O speed because it writes the index less frequently. So this is a 19 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 20 * larger is the journal, the more memory its index may consume. 21 */ 22 23 #include "ubifs.h" 24 #include <linux/list_sort.h> 25 #include <crypto/hash.h> 26 27 /** 28 * struct replay_entry - replay list entry. 29 * @lnum: logical eraseblock number of the node 30 * @offs: node offset 31 * @len: node length 32 * @hash: node hash 33 * @deletion: non-zero if this entry corresponds to a node deletion 34 * @sqnum: node sequence number 35 * @list: links the replay list 36 * @key: node key 37 * @nm: directory entry name 38 * @old_size: truncation old size 39 * @new_size: truncation new size 40 * 41 * The replay process first scans all buds and builds the replay list, then 42 * sorts the replay list in nodes sequence number order, and then inserts all 43 * the replay entries to the TNC. 44 */ 45 struct replay_entry { 46 int lnum; 47 int offs; 48 int len; 49 u8 hash[UBIFS_HASH_ARR_SZ]; 50 unsigned int deletion:1; 51 unsigned long long sqnum; 52 struct list_head list; 53 union ubifs_key key; 54 union { 55 struct fscrypt_name nm; 56 struct { 57 loff_t old_size; 58 loff_t new_size; 59 }; 60 }; 61 }; 62 63 /** 64 * struct bud_entry - entry in the list of buds to replay. 65 * @list: next bud in the list 66 * @bud: bud description object 67 * @sqnum: reference node sequence number 68 * @free: free bytes in the bud 69 * @dirty: dirty bytes in the bud 70 */ 71 struct bud_entry { 72 struct list_head list; 73 struct ubifs_bud *bud; 74 unsigned long long sqnum; 75 int free; 76 int dirty; 77 }; 78 79 /** 80 * set_bud_lprops - set free and dirty space used by a bud. 81 * @c: UBIFS file-system description object 82 * @b: bud entry which describes the bud 83 * 84 * This function makes sure the LEB properties of bud @b are set correctly 85 * after the replay. Returns zero in case of success and a negative error code 86 * in case of failure. 87 */ 88 static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) 89 { 90 const struct ubifs_lprops *lp; 91 int err = 0, dirty; 92 93 ubifs_get_lprops(c); 94 95 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); 96 if (IS_ERR(lp)) { 97 err = PTR_ERR(lp); 98 goto out; 99 } 100 101 dirty = lp->dirty; 102 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 103 /* 104 * The LEB was added to the journal with a starting offset of 105 * zero which means the LEB must have been empty. The LEB 106 * property values should be @lp->free == @c->leb_size and 107 * @lp->dirty == 0, but that is not the case. The reason is that 108 * the LEB had been garbage collected before it became the bud, 109 * and there was no commit in between. The garbage collector 110 * resets the free and dirty space without recording it 111 * anywhere except lprops, so if there was no commit then 112 * lprops does not have that information. 113 * 114 * We do not need to adjust free space because the scan has told 115 * us the exact value which is recorded in the replay entry as 116 * @b->free. 117 * 118 * However we do need to subtract from the dirty space the 119 * amount of space that the garbage collector reclaimed, which 120 * is the whole LEB minus the amount of space that was free. 121 */ 122 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 123 lp->free, lp->dirty); 124 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 125 lp->free, lp->dirty); 126 dirty -= c->leb_size - lp->free; 127 /* 128 * If the replay order was perfect the dirty space would now be 129 * zero. The order is not perfect because the journal heads 130 * race with each other. This is not a problem but is does mean 131 * that the dirty space may temporarily exceed c->leb_size 132 * during the replay. 133 */ 134 if (dirty != 0) 135 dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty", 136 b->bud->lnum, lp->free, lp->dirty, b->free, 137 b->dirty); 138 } 139 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, 140 lp->flags | LPROPS_TAKEN, 0); 141 if (IS_ERR(lp)) { 142 err = PTR_ERR(lp); 143 goto out; 144 } 145 146 /* Make sure the journal head points to the latest bud */ 147 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, 148 b->bud->lnum, c->leb_size - b->free); 149 150 out: 151 ubifs_release_lprops(c); 152 return err; 153 } 154 155 /** 156 * set_buds_lprops - set free and dirty space for all replayed buds. 157 * @c: UBIFS file-system description object 158 * 159 * This function sets LEB properties for all replayed buds. Returns zero in 160 * case of success and a negative error code in case of failure. 161 */ 162 static int set_buds_lprops(struct ubifs_info *c) 163 { 164 struct bud_entry *b; 165 int err; 166 167 list_for_each_entry(b, &c->replay_buds, list) { 168 err = set_bud_lprops(c, b); 169 if (err) 170 return err; 171 } 172 173 return 0; 174 } 175 176 /** 177 * trun_remove_range - apply a replay entry for a truncation to the TNC. 178 * @c: UBIFS file-system description object 179 * @r: replay entry of truncation 180 */ 181 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 182 { 183 unsigned min_blk, max_blk; 184 union ubifs_key min_key, max_key; 185 ino_t ino; 186 187 min_blk = r->new_size / UBIFS_BLOCK_SIZE; 188 if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 189 min_blk += 1; 190 191 max_blk = r->old_size / UBIFS_BLOCK_SIZE; 192 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 193 max_blk -= 1; 194 195 ino = key_inum(c, &r->key); 196 197 data_key_init(c, &min_key, ino, min_blk); 198 data_key_init(c, &max_key, ino, max_blk); 199 200 return ubifs_tnc_remove_range(c, &min_key, &max_key); 201 } 202 203 /** 204 * inode_still_linked - check whether inode in question will be re-linked. 205 * @c: UBIFS file-system description object 206 * @rino: replay entry to test 207 * 208 * O_TMPFILE files can be re-linked, this means link count goes from 0 to 1. 209 * This case needs special care, otherwise all references to the inode will 210 * be removed upon the first replay entry of an inode with link count 0 211 * is found. 212 */ 213 static bool inode_still_linked(struct ubifs_info *c, struct replay_entry *rino) 214 { 215 struct replay_entry *r; 216 217 ubifs_assert(c, rino->deletion); 218 ubifs_assert(c, key_type(c, &rino->key) == UBIFS_INO_KEY); 219 220 /* 221 * Find the most recent entry for the inode behind @rino and check 222 * whether it is a deletion. 223 */ 224 list_for_each_entry_reverse(r, &c->replay_list, list) { 225 ubifs_assert(c, r->sqnum >= rino->sqnum); 226 if (key_inum(c, &r->key) == key_inum(c, &rino->key) && 227 key_type(c, &r->key) == UBIFS_INO_KEY) 228 return r->deletion == 0; 229 230 } 231 232 ubifs_assert(c, 0); 233 return false; 234 } 235 236 /** 237 * apply_replay_entry - apply a replay entry to the TNC. 238 * @c: UBIFS file-system description object 239 * @r: replay entry to apply 240 * 241 * Apply a replay entry to the TNC. 242 */ 243 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 244 { 245 int err; 246 247 dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", 248 r->lnum, r->offs, r->len, r->deletion, r->sqnum); 249 250 if (is_hash_key(c, &r->key)) { 251 if (r->deletion) 252 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 253 else 254 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 255 r->len, r->hash, &r->nm); 256 } else { 257 if (r->deletion) 258 switch (key_type(c, &r->key)) { 259 case UBIFS_INO_KEY: 260 { 261 ino_t inum = key_inum(c, &r->key); 262 263 if (inode_still_linked(c, r)) { 264 err = 0; 265 break; 266 } 267 268 err = ubifs_tnc_remove_ino(c, inum); 269 break; 270 } 271 case UBIFS_TRUN_KEY: 272 err = trun_remove_range(c, r); 273 break; 274 default: 275 err = ubifs_tnc_remove(c, &r->key); 276 break; 277 } 278 else 279 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 280 r->len, r->hash); 281 if (err) 282 return err; 283 284 if (c->need_recovery) 285 err = ubifs_recover_size_accum(c, &r->key, r->deletion, 286 r->new_size); 287 } 288 289 return err; 290 } 291 292 /** 293 * replay_entries_cmp - compare 2 replay entries. 294 * @priv: UBIFS file-system description object 295 * @a: first replay entry 296 * @b: second replay entry 297 * 298 * This is a comparios function for 'list_sort()' which compares 2 replay 299 * entries @a and @b by comparing their sequence number. Returns %1 if @a has 300 * greater sequence number and %-1 otherwise. 301 */ 302 static int replay_entries_cmp(void *priv, const struct list_head *a, 303 const struct list_head *b) 304 { 305 struct ubifs_info *c = priv; 306 struct replay_entry *ra, *rb; 307 308 cond_resched(); 309 if (a == b) 310 return 0; 311 312 ra = list_entry(a, struct replay_entry, list); 313 rb = list_entry(b, struct replay_entry, list); 314 ubifs_assert(c, ra->sqnum != rb->sqnum); 315 if (ra->sqnum > rb->sqnum) 316 return 1; 317 return -1; 318 } 319 320 /** 321 * apply_replay_list - apply the replay list to the TNC. 322 * @c: UBIFS file-system description object 323 * 324 * Apply all entries in the replay list to the TNC. Returns zero in case of 325 * success and a negative error code in case of failure. 326 */ 327 static int apply_replay_list(struct ubifs_info *c) 328 { 329 struct replay_entry *r; 330 int err; 331 332 list_sort(c, &c->replay_list, &replay_entries_cmp); 333 334 list_for_each_entry(r, &c->replay_list, list) { 335 cond_resched(); 336 337 err = apply_replay_entry(c, r); 338 if (err) 339 return err; 340 } 341 342 return 0; 343 } 344 345 /** 346 * destroy_replay_list - destroy the replay. 347 * @c: UBIFS file-system description object 348 * 349 * Destroy the replay list. 350 */ 351 static void destroy_replay_list(struct ubifs_info *c) 352 { 353 struct replay_entry *r, *tmp; 354 355 list_for_each_entry_safe(r, tmp, &c->replay_list, list) { 356 if (is_hash_key(c, &r->key)) 357 kfree(fname_name(&r->nm)); 358 list_del(&r->list); 359 kfree(r); 360 } 361 } 362 363 /** 364 * insert_node - insert a node to the replay list 365 * @c: UBIFS file-system description object 366 * @lnum: node logical eraseblock number 367 * @offs: node offset 368 * @len: node length 369 * @hash: node hash 370 * @key: node key 371 * @sqnum: sequence number 372 * @deletion: non-zero if this is a deletion 373 * @used: number of bytes in use in a LEB 374 * @old_size: truncation old size 375 * @new_size: truncation new size 376 * 377 * This function inserts a scanned non-direntry node to the replay list. The 378 * replay list contains @struct replay_entry elements, and we sort this list in 379 * sequence number order before applying it. The replay list is applied at the 380 * very end of the replay process. Since the list is sorted in sequence number 381 * order, the older modifications are applied first. This function returns zero 382 * in case of success and a negative error code in case of failure. 383 */ 384 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 385 const u8 *hash, union ubifs_key *key, 386 unsigned long long sqnum, int deletion, int *used, 387 loff_t old_size, loff_t new_size) 388 { 389 struct replay_entry *r; 390 391 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 392 393 if (key_inum(c, key) >= c->highest_inum) 394 c->highest_inum = key_inum(c, key); 395 396 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 397 if (!r) 398 return -ENOMEM; 399 400 if (!deletion) 401 *used += ALIGN(len, 8); 402 r->lnum = lnum; 403 r->offs = offs; 404 r->len = len; 405 ubifs_copy_hash(c, hash, r->hash); 406 r->deletion = !!deletion; 407 r->sqnum = sqnum; 408 key_copy(c, key, &r->key); 409 r->old_size = old_size; 410 r->new_size = new_size; 411 412 list_add_tail(&r->list, &c->replay_list); 413 return 0; 414 } 415 416 /** 417 * insert_dent - insert a directory entry node into the replay list. 418 * @c: UBIFS file-system description object 419 * @lnum: node logical eraseblock number 420 * @offs: node offset 421 * @len: node length 422 * @hash: node hash 423 * @key: node key 424 * @name: directory entry name 425 * @nlen: directory entry name length 426 * @sqnum: sequence number 427 * @deletion: non-zero if this is a deletion 428 * @used: number of bytes in use in a LEB 429 * 430 * This function inserts a scanned directory entry node or an extended 431 * attribute entry to the replay list. Returns zero in case of success and a 432 * negative error code in case of failure. 433 */ 434 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 435 const u8 *hash, union ubifs_key *key, 436 const char *name, int nlen, unsigned long long sqnum, 437 int deletion, int *used) 438 { 439 struct replay_entry *r; 440 char *nbuf; 441 442 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 443 if (key_inum(c, key) >= c->highest_inum) 444 c->highest_inum = key_inum(c, key); 445 446 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 447 if (!r) 448 return -ENOMEM; 449 450 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 451 if (!nbuf) { 452 kfree(r); 453 return -ENOMEM; 454 } 455 456 if (!deletion) 457 *used += ALIGN(len, 8); 458 r->lnum = lnum; 459 r->offs = offs; 460 r->len = len; 461 ubifs_copy_hash(c, hash, r->hash); 462 r->deletion = !!deletion; 463 r->sqnum = sqnum; 464 key_copy(c, key, &r->key); 465 fname_len(&r->nm) = nlen; 466 memcpy(nbuf, name, nlen); 467 nbuf[nlen] = '\0'; 468 fname_name(&r->nm) = nbuf; 469 470 list_add_tail(&r->list, &c->replay_list); 471 return 0; 472 } 473 474 /** 475 * ubifs_validate_entry - validate directory or extended attribute entry node. 476 * @c: UBIFS file-system description object 477 * @dent: the node to validate 478 * 479 * This function validates directory or extended attribute entry node @dent. 480 * Returns zero if the node is all right and a %-EINVAL if not. 481 */ 482 int ubifs_validate_entry(struct ubifs_info *c, 483 const struct ubifs_dent_node *dent) 484 { 485 int key_type = key_type_flash(c, dent->key); 486 int nlen = le16_to_cpu(dent->nlen); 487 488 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 489 dent->type >= UBIFS_ITYPES_CNT || 490 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 491 (key_type == UBIFS_XENT_KEY && strnlen(dent->name, nlen) != nlen) || 492 le64_to_cpu(dent->inum) > MAX_INUM) { 493 ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ? 494 "directory entry" : "extended attribute entry"); 495 return -EINVAL; 496 } 497 498 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 499 ubifs_err(c, "bad key type %d", key_type); 500 return -EINVAL; 501 } 502 503 return 0; 504 } 505 506 /** 507 * is_last_bud - check if the bud is the last in the journal head. 508 * @c: UBIFS file-system description object 509 * @bud: bud description object 510 * 511 * This function checks if bud @bud is the last bud in its journal head. This 512 * information is then used by 'replay_bud()' to decide whether the bud can 513 * have corruptions or not. Indeed, only last buds can be corrupted by power 514 * cuts. Returns %1 if this is the last bud, and %0 if not. 515 */ 516 static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) 517 { 518 struct ubifs_jhead *jh = &c->jheads[bud->jhead]; 519 struct ubifs_bud *next; 520 uint32_t data; 521 int err; 522 523 if (list_is_last(&bud->list, &jh->buds_list)) 524 return 1; 525 526 /* 527 * The following is a quirk to make sure we work correctly with UBIFS 528 * images used with older UBIFS. 529 * 530 * Normally, the last bud will be the last in the journal head's list 531 * of bud. However, there is one exception if the UBIFS image belongs 532 * to older UBIFS. This is fairly unlikely: one would need to use old 533 * UBIFS, then have a power cut exactly at the right point, and then 534 * try to mount this image with new UBIFS. 535 * 536 * The exception is: it is possible to have 2 buds A and B, A goes 537 * before B, and B is the last, bud B is contains no data, and bud A is 538 * corrupted at the end. The reason is that in older versions when the 539 * journal code switched the next bud (from A to B), it first added a 540 * log reference node for the new bud (B), and only after this it 541 * synchronized the write-buffer of current bud (A). But later this was 542 * changed and UBIFS started to always synchronize the write-buffer of 543 * the bud (A) before writing the log reference for the new bud (B). 544 * 545 * But because older UBIFS always synchronized A's write-buffer before 546 * writing to B, we can recognize this exceptional situation but 547 * checking the contents of bud B - if it is empty, then A can be 548 * treated as the last and we can recover it. 549 * 550 * TODO: remove this piece of code in a couple of years (today it is 551 * 16.05.2011). 552 */ 553 next = list_entry(bud->list.next, struct ubifs_bud, list); 554 if (!list_is_last(&next->list, &jh->buds_list)) 555 return 0; 556 557 err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); 558 if (err) 559 return 0; 560 561 return data == 0xFFFFFFFF; 562 } 563 564 /* authenticate_sleb_hash is split out for stack usage */ 565 static int noinline_for_stack 566 authenticate_sleb_hash(struct ubifs_info *c, 567 struct shash_desc *log_hash, u8 *hash) 568 { 569 SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm); 570 571 hash_desc->tfm = c->hash_tfm; 572 573 ubifs_shash_copy_state(c, log_hash, hash_desc); 574 return crypto_shash_final(hash_desc, hash); 575 } 576 577 /** 578 * authenticate_sleb - authenticate one scan LEB 579 * @c: UBIFS file-system description object 580 * @sleb: the scan LEB to authenticate 581 * @log_hash: 582 * @is_last: if true, this is the last LEB 583 * 584 * This function iterates over the buds of a single LEB authenticating all buds 585 * with the authentication nodes on this LEB. Authentication nodes are written 586 * after some buds and contain a HMAC covering the authentication node itself 587 * and the buds between the last authentication node and the current 588 * authentication node. It can happen that the last buds cannot be authenticated 589 * because a powercut happened when some nodes were written but not the 590 * corresponding authentication node. This function returns the number of nodes 591 * that could be authenticated or a negative error code. 592 */ 593 static int authenticate_sleb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, 594 struct shash_desc *log_hash, int is_last) 595 { 596 int n_not_auth = 0; 597 struct ubifs_scan_node *snod; 598 int n_nodes = 0; 599 int err; 600 u8 hash[UBIFS_HASH_ARR_SZ]; 601 u8 hmac[UBIFS_HMAC_ARR_SZ]; 602 603 if (!ubifs_authenticated(c)) 604 return sleb->nodes_cnt; 605 606 list_for_each_entry(snod, &sleb->nodes, list) { 607 608 n_nodes++; 609 610 if (snod->type == UBIFS_AUTH_NODE) { 611 struct ubifs_auth_node *auth = snod->node; 612 613 err = authenticate_sleb_hash(c, log_hash, hash); 614 if (err) 615 goto out; 616 617 err = crypto_shash_tfm_digest(c->hmac_tfm, hash, 618 c->hash_len, hmac); 619 if (err) 620 goto out; 621 622 err = ubifs_check_hmac(c, auth->hmac, hmac); 623 if (err) { 624 err = -EPERM; 625 goto out; 626 } 627 n_not_auth = 0; 628 } else { 629 err = crypto_shash_update(log_hash, snod->node, 630 snod->len); 631 if (err) 632 goto out; 633 n_not_auth++; 634 } 635 } 636 637 /* 638 * A powercut can happen when some nodes were written, but not yet 639 * the corresponding authentication node. This may only happen on 640 * the last bud though. 641 */ 642 if (n_not_auth) { 643 if (is_last) { 644 dbg_mnt("%d unauthenticated nodes found on LEB %d, Ignoring them", 645 n_not_auth, sleb->lnum); 646 err = 0; 647 } else { 648 dbg_mnt("%d unauthenticated nodes found on non-last LEB %d", 649 n_not_auth, sleb->lnum); 650 err = -EPERM; 651 } 652 } else { 653 err = 0; 654 } 655 out: 656 return err ? err : n_nodes - n_not_auth; 657 } 658 659 /** 660 * replay_bud - replay a bud logical eraseblock. 661 * @c: UBIFS file-system description object 662 * @b: bud entry which describes the bud 663 * 664 * This function replays bud @bud, recovers it if needed, and adds all nodes 665 * from this bud to the replay list. Returns zero in case of success and a 666 * negative error code in case of failure. 667 */ 668 static int replay_bud(struct ubifs_info *c, struct bud_entry *b) 669 { 670 int is_last = is_last_bud(c, b->bud); 671 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; 672 int n_nodes, n = 0; 673 struct ubifs_scan_leb *sleb; 674 struct ubifs_scan_node *snod; 675 676 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", 677 lnum, b->bud->jhead, offs, is_last); 678 679 if (c->need_recovery && is_last) 680 /* 681 * Recover only last LEBs in the journal heads, because power 682 * cuts may cause corruptions only in these LEBs, because only 683 * these LEBs could possibly be written to at the power cut 684 * time. 685 */ 686 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); 687 else 688 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 689 if (IS_ERR(sleb)) 690 return PTR_ERR(sleb); 691 692 n_nodes = authenticate_sleb(c, sleb, b->bud->log_hash, is_last); 693 if (n_nodes < 0) { 694 err = n_nodes; 695 goto out; 696 } 697 698 ubifs_shash_copy_state(c, b->bud->log_hash, 699 c->jheads[b->bud->jhead].log_hash); 700 701 /* 702 * The bud does not have to start from offset zero - the beginning of 703 * the 'lnum' LEB may contain previously committed data. One of the 704 * things we have to do in replay is to correctly update lprops with 705 * newer information about this LEB. 706 * 707 * At this point lprops thinks that this LEB has 'c->leb_size - offs' 708 * bytes of free space because it only contain information about 709 * committed data. 710 * 711 * But we know that real amount of free space is 'c->leb_size - 712 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 713 * 'sleb->endpt' is used by bud data. We have to correctly calculate 714 * how much of these data are dirty and update lprops with this 715 * information. 716 * 717 * The dirt in that LEB region is comprised of padding nodes, deletion 718 * nodes, truncation nodes and nodes which are obsoleted by subsequent 719 * nodes in this LEB. So instead of calculating clean space, we 720 * calculate used space ('used' variable). 721 */ 722 723 list_for_each_entry(snod, &sleb->nodes, list) { 724 u8 hash[UBIFS_HASH_ARR_SZ]; 725 int deletion = 0; 726 727 cond_resched(); 728 729 if (snod->sqnum >= SQNUM_WATERMARK) { 730 ubifs_err(c, "file system's life ended"); 731 goto out_dump; 732 } 733 734 ubifs_node_calc_hash(c, snod->node, hash); 735 736 if (snod->sqnum > c->max_sqnum) 737 c->max_sqnum = snod->sqnum; 738 739 switch (snod->type) { 740 case UBIFS_INO_NODE: 741 { 742 struct ubifs_ino_node *ino = snod->node; 743 loff_t new_size = le64_to_cpu(ino->size); 744 745 if (le32_to_cpu(ino->nlink) == 0) 746 deletion = 1; 747 err = insert_node(c, lnum, snod->offs, snod->len, hash, 748 &snod->key, snod->sqnum, deletion, 749 &used, 0, new_size); 750 break; 751 } 752 case UBIFS_DATA_NODE: 753 { 754 struct ubifs_data_node *dn = snod->node; 755 loff_t new_size = le32_to_cpu(dn->size) + 756 key_block(c, &snod->key) * 757 UBIFS_BLOCK_SIZE; 758 759 err = insert_node(c, lnum, snod->offs, snod->len, hash, 760 &snod->key, snod->sqnum, deletion, 761 &used, 0, new_size); 762 break; 763 } 764 case UBIFS_DENT_NODE: 765 case UBIFS_XENT_NODE: 766 { 767 struct ubifs_dent_node *dent = snod->node; 768 769 err = ubifs_validate_entry(c, dent); 770 if (err) 771 goto out_dump; 772 773 err = insert_dent(c, lnum, snod->offs, snod->len, hash, 774 &snod->key, dent->name, 775 le16_to_cpu(dent->nlen), snod->sqnum, 776 !le64_to_cpu(dent->inum), &used); 777 break; 778 } 779 case UBIFS_TRUN_NODE: 780 { 781 struct ubifs_trun_node *trun = snod->node; 782 loff_t old_size = le64_to_cpu(trun->old_size); 783 loff_t new_size = le64_to_cpu(trun->new_size); 784 union ubifs_key key; 785 786 /* Validate truncation node */ 787 if (old_size < 0 || old_size > c->max_inode_sz || 788 new_size < 0 || new_size > c->max_inode_sz || 789 old_size <= new_size) { 790 ubifs_err(c, "bad truncation node"); 791 goto out_dump; 792 } 793 794 /* 795 * Create a fake truncation key just to use the same 796 * functions which expect nodes to have keys. 797 */ 798 trun_key_init(c, &key, le32_to_cpu(trun->inum)); 799 err = insert_node(c, lnum, snod->offs, snod->len, hash, 800 &key, snod->sqnum, 1, &used, 801 old_size, new_size); 802 break; 803 } 804 case UBIFS_AUTH_NODE: 805 break; 806 default: 807 ubifs_err(c, "unexpected node type %d in bud LEB %d:%d", 808 snod->type, lnum, snod->offs); 809 err = -EINVAL; 810 goto out_dump; 811 } 812 if (err) 813 goto out; 814 815 n++; 816 if (n == n_nodes) 817 break; 818 } 819 820 ubifs_assert(c, ubifs_search_bud(c, lnum)); 821 ubifs_assert(c, sleb->endpt - offs >= used); 822 ubifs_assert(c, sleb->endpt % c->min_io_size == 0); 823 824 b->dirty = sleb->endpt - offs - used; 825 b->free = c->leb_size - sleb->endpt; 826 dbg_mnt("bud LEB %d replied: dirty %d, free %d", 827 lnum, b->dirty, b->free); 828 829 out: 830 ubifs_scan_destroy(sleb); 831 return err; 832 833 out_dump: 834 ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs); 835 ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); 836 ubifs_scan_destroy(sleb); 837 return -EINVAL; 838 } 839 840 /** 841 * replay_buds - replay all buds. 842 * @c: UBIFS file-system description object 843 * 844 * This function returns zero in case of success and a negative error code in 845 * case of failure. 846 */ 847 static int replay_buds(struct ubifs_info *c) 848 { 849 struct bud_entry *b; 850 int err; 851 unsigned long long prev_sqnum = 0; 852 853 list_for_each_entry(b, &c->replay_buds, list) { 854 err = replay_bud(c, b); 855 if (err) 856 return err; 857 858 ubifs_assert(c, b->sqnum > prev_sqnum); 859 prev_sqnum = b->sqnum; 860 } 861 862 return 0; 863 } 864 865 /** 866 * destroy_bud_list - destroy the list of buds to replay. 867 * @c: UBIFS file-system description object 868 */ 869 static void destroy_bud_list(struct ubifs_info *c) 870 { 871 struct bud_entry *b; 872 873 while (!list_empty(&c->replay_buds)) { 874 b = list_entry(c->replay_buds.next, struct bud_entry, list); 875 list_del(&b->list); 876 kfree(b); 877 } 878 } 879 880 /** 881 * add_replay_bud - add a bud to the list of buds to replay. 882 * @c: UBIFS file-system description object 883 * @lnum: bud logical eraseblock number to replay 884 * @offs: bud start offset 885 * @jhead: journal head to which this bud belongs 886 * @sqnum: reference node sequence number 887 * 888 * This function returns zero in case of success and a negative error code in 889 * case of failure. 890 */ 891 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 892 unsigned long long sqnum) 893 { 894 struct ubifs_bud *bud; 895 struct bud_entry *b; 896 int err; 897 898 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 899 900 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 901 if (!bud) 902 return -ENOMEM; 903 904 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 905 if (!b) { 906 err = -ENOMEM; 907 goto out; 908 } 909 910 bud->lnum = lnum; 911 bud->start = offs; 912 bud->jhead = jhead; 913 bud->log_hash = ubifs_hash_get_desc(c); 914 if (IS_ERR(bud->log_hash)) { 915 err = PTR_ERR(bud->log_hash); 916 goto out; 917 } 918 919 ubifs_shash_copy_state(c, c->log_hash, bud->log_hash); 920 921 ubifs_add_bud(c, bud); 922 923 b->bud = bud; 924 b->sqnum = sqnum; 925 list_add_tail(&b->list, &c->replay_buds); 926 927 return 0; 928 out: 929 kfree(bud); 930 kfree(b); 931 932 return err; 933 } 934 935 /** 936 * validate_ref - validate a reference node. 937 * @c: UBIFS file-system description object 938 * @ref: the reference node to validate 939 * 940 * This function returns %1 if a bud reference already exists for the LEB. %0 is 941 * returned if the reference node is new, otherwise %-EINVAL is returned if 942 * validation failed. 943 */ 944 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 945 { 946 struct ubifs_bud *bud; 947 int lnum = le32_to_cpu(ref->lnum); 948 unsigned int offs = le32_to_cpu(ref->offs); 949 unsigned int jhead = le32_to_cpu(ref->jhead); 950 951 /* 952 * ref->offs may point to the end of LEB when the journal head points 953 * to the end of LEB and we write reference node for it during commit. 954 * So this is why we require 'offs > c->leb_size'. 955 */ 956 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 957 lnum < c->main_first || offs > c->leb_size || 958 offs & (c->min_io_size - 1)) 959 return -EINVAL; 960 961 /* Make sure we have not already looked at this bud */ 962 bud = ubifs_search_bud(c, lnum); 963 if (bud) { 964 if (bud->jhead == jhead && bud->start <= offs) 965 return 1; 966 ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs); 967 return -EINVAL; 968 } 969 970 return 0; 971 } 972 973 /** 974 * replay_log_leb - replay a log logical eraseblock. 975 * @c: UBIFS file-system description object 976 * @lnum: log logical eraseblock to replay 977 * @offs: offset to start replaying from 978 * @sbuf: scan buffer 979 * 980 * This function replays a log LEB and returns zero in case of success, %1 if 981 * this is the last LEB in the log, and a negative error code in case of 982 * failure. 983 */ 984 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 985 { 986 int err; 987 struct ubifs_scan_leb *sleb; 988 struct ubifs_scan_node *snod; 989 const struct ubifs_cs_node *node; 990 991 dbg_mnt("replay log LEB %d:%d", lnum, offs); 992 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); 993 if (IS_ERR(sleb)) { 994 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) 995 return PTR_ERR(sleb); 996 /* 997 * Note, the below function will recover this log LEB only if 998 * it is the last, because unclean reboots can possibly corrupt 999 * only the tail of the log. 1000 */ 1001 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 1002 if (IS_ERR(sleb)) 1003 return PTR_ERR(sleb); 1004 } 1005 1006 if (sleb->nodes_cnt == 0) { 1007 err = 1; 1008 goto out; 1009 } 1010 1011 node = sleb->buf; 1012 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 1013 if (c->cs_sqnum == 0) { 1014 /* 1015 * This is the first log LEB we are looking at, make sure that 1016 * the first node is a commit start node. Also record its 1017 * sequence number so that UBIFS can determine where the log 1018 * ends, because all nodes which were have higher sequence 1019 * numbers. 1020 */ 1021 if (snod->type != UBIFS_CS_NODE) { 1022 ubifs_err(c, "first log node at LEB %d:%d is not CS node", 1023 lnum, offs); 1024 goto out_dump; 1025 } 1026 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 1027 ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu", 1028 lnum, offs, 1029 (unsigned long long)le64_to_cpu(node->cmt_no), 1030 c->cmt_no); 1031 goto out_dump; 1032 } 1033 1034 c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 1035 dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 1036 1037 err = ubifs_shash_init(c, c->log_hash); 1038 if (err) 1039 goto out; 1040 1041 err = ubifs_shash_update(c, c->log_hash, node, UBIFS_CS_NODE_SZ); 1042 if (err < 0) 1043 goto out; 1044 } 1045 1046 if (snod->sqnum < c->cs_sqnum) { 1047 /* 1048 * This means that we reached end of log and now 1049 * look to the older log data, which was already 1050 * committed but the eraseblock was not erased (UBIFS 1051 * only un-maps it). So this basically means we have to 1052 * exit with "end of log" code. 1053 */ 1054 err = 1; 1055 goto out; 1056 } 1057 1058 /* Make sure the first node sits at offset zero of the LEB */ 1059 if (snod->offs != 0) { 1060 ubifs_err(c, "first node is not at zero offset"); 1061 goto out_dump; 1062 } 1063 1064 list_for_each_entry(snod, &sleb->nodes, list) { 1065 cond_resched(); 1066 1067 if (snod->sqnum >= SQNUM_WATERMARK) { 1068 ubifs_err(c, "file system's life ended"); 1069 goto out_dump; 1070 } 1071 1072 if (snod->sqnum < c->cs_sqnum) { 1073 ubifs_err(c, "bad sqnum %llu, commit sqnum %llu", 1074 snod->sqnum, c->cs_sqnum); 1075 goto out_dump; 1076 } 1077 1078 if (snod->sqnum > c->max_sqnum) 1079 c->max_sqnum = snod->sqnum; 1080 1081 switch (snod->type) { 1082 case UBIFS_REF_NODE: { 1083 const struct ubifs_ref_node *ref = snod->node; 1084 1085 err = validate_ref(c, ref); 1086 if (err == 1) 1087 break; /* Already have this bud */ 1088 if (err) 1089 goto out_dump; 1090 1091 err = ubifs_shash_update(c, c->log_hash, ref, 1092 UBIFS_REF_NODE_SZ); 1093 if (err) 1094 goto out; 1095 1096 err = add_replay_bud(c, le32_to_cpu(ref->lnum), 1097 le32_to_cpu(ref->offs), 1098 le32_to_cpu(ref->jhead), 1099 snod->sqnum); 1100 if (err) 1101 goto out; 1102 1103 break; 1104 } 1105 case UBIFS_CS_NODE: 1106 /* Make sure it sits at the beginning of LEB */ 1107 if (snod->offs != 0) { 1108 ubifs_err(c, "unexpected node in log"); 1109 goto out_dump; 1110 } 1111 break; 1112 default: 1113 ubifs_err(c, "unexpected node in log"); 1114 goto out_dump; 1115 } 1116 } 1117 1118 if (sleb->endpt || c->lhead_offs >= c->leb_size) { 1119 c->lhead_lnum = lnum; 1120 c->lhead_offs = sleb->endpt; 1121 } 1122 1123 err = !sleb->endpt; 1124 out: 1125 ubifs_scan_destroy(sleb); 1126 return err; 1127 1128 out_dump: 1129 ubifs_err(c, "log error detected while replaying the log at LEB %d:%d", 1130 lnum, offs + snod->offs); 1131 ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); 1132 ubifs_scan_destroy(sleb); 1133 return -EINVAL; 1134 } 1135 1136 /** 1137 * take_ihead - update the status of the index head in lprops to 'taken'. 1138 * @c: UBIFS file-system description object 1139 * 1140 * This function returns the amount of free space in the index head LEB or a 1141 * negative error code. 1142 */ 1143 static int take_ihead(struct ubifs_info *c) 1144 { 1145 const struct ubifs_lprops *lp; 1146 int err, free; 1147 1148 ubifs_get_lprops(c); 1149 1150 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 1151 if (IS_ERR(lp)) { 1152 err = PTR_ERR(lp); 1153 goto out; 1154 } 1155 1156 free = lp->free; 1157 1158 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 1159 lp->flags | LPROPS_TAKEN, 0); 1160 if (IS_ERR(lp)) { 1161 err = PTR_ERR(lp); 1162 goto out; 1163 } 1164 1165 err = free; 1166 out: 1167 ubifs_release_lprops(c); 1168 return err; 1169 } 1170 1171 /** 1172 * ubifs_replay_journal - replay journal. 1173 * @c: UBIFS file-system description object 1174 * 1175 * This function scans the journal, replays and cleans it up. It makes sure all 1176 * memory data structures related to uncommitted journal are built (dirty TNC 1177 * tree, tree of buds, modified lprops, etc). 1178 */ 1179 int ubifs_replay_journal(struct ubifs_info *c) 1180 { 1181 int err, lnum, free; 1182 1183 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1184 1185 /* Update the status of the index head in lprops to 'taken' */ 1186 free = take_ihead(c); 1187 if (free < 0) 1188 return free; /* Error code */ 1189 1190 if (c->ihead_offs != c->leb_size - free) { 1191 ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum, 1192 c->ihead_offs); 1193 return -EINVAL; 1194 } 1195 1196 dbg_mnt("start replaying the journal"); 1197 c->replaying = 1; 1198 lnum = c->ltail_lnum = c->lhead_lnum; 1199 1200 do { 1201 err = replay_log_leb(c, lnum, 0, c->sbuf); 1202 if (err == 1) { 1203 if (lnum != c->lhead_lnum) 1204 /* We hit the end of the log */ 1205 break; 1206 1207 /* 1208 * The head of the log must always start with the 1209 * "commit start" node on a properly formatted UBIFS. 1210 * But we found no nodes at all, which means that 1211 * something went wrong and we cannot proceed mounting 1212 * the file-system. 1213 */ 1214 ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", 1215 lnum, 0); 1216 err = -EINVAL; 1217 } 1218 if (err) 1219 goto out; 1220 lnum = ubifs_next_log_lnum(c, lnum); 1221 } while (lnum != c->ltail_lnum); 1222 1223 err = replay_buds(c); 1224 if (err) 1225 goto out; 1226 1227 err = apply_replay_list(c); 1228 if (err) 1229 goto out; 1230 1231 err = set_buds_lprops(c); 1232 if (err) 1233 goto out; 1234 1235 /* 1236 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable 1237 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs 1238 * depend on it. This means we have to initialize it to make sure 1239 * budgeting works properly. 1240 */ 1241 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1242 c->bi.uncommitted_idx *= c->max_idx_node_sz; 1243 1244 ubifs_assert(c, c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1245 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu", 1246 c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1247 (unsigned long)c->highest_inum); 1248 out: 1249 destroy_replay_list(c); 1250 destroy_bud_list(c); 1251 c->replaying = 0; 1252 return err; 1253 } 1254
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.