1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_ag.h" 14 #include "xfs_inode.h" 15 #include "xfs_errortag.h" 16 #include "xfs_error.h" 17 #include "xfs_icache.h" 18 #include "xfs_trans.h" 19 #include "xfs_ialloc.h" 20 #include "xfs_dir2.h" 21 #include "xfs_health.h" 22 23 #include <linux/iversion.h> 24 25 /* 26 * If we are doing readahead on an inode buffer, we might be in log recovery 27 * reading an inode allocation buffer that hasn't yet been replayed, and hence 28 * has not had the inode cores stamped into it. Hence for readahead, the buffer 29 * may be potentially invalid. 30 * 31 * If the readahead buffer is invalid, we need to mark it with an error and 32 * clear the DONE status of the buffer so that a followup read will re-read it 33 * from disk. We don't report the error otherwise to avoid warnings during log 34 * recovery and we don't get unnecessary panics on debug kernels. We use EIO here 35 * because all we want to do is say readahead failed; there is no-one to report 36 * the error to, so this will distinguish it from a non-ra verifier failure. 37 * Changes to this readahead error behaviour also need to be reflected in 38 * xfs_dquot_buf_readahead_verify(). 39 */ 40 static void 41 xfs_inode_buf_verify( 42 struct xfs_buf *bp, 43 bool readahead) 44 { 45 struct xfs_mount *mp = bp->b_mount; 46 int i; 47 int ni; 48 49 /* 50 * Validate the magic number and version of every inode in the buffer 51 */ 52 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 53 for (i = 0; i < ni; i++) { 54 struct xfs_dinode *dip; 55 xfs_agino_t unlinked_ino; 56 int di_ok; 57 58 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); 59 unlinked_ino = be32_to_cpu(dip->di_next_unlinked); 60 di_ok = xfs_verify_magic16(bp, dip->di_magic) && 61 xfs_dinode_good_version(mp, dip->di_version) && 62 xfs_verify_agino_or_null(bp->b_pag, unlinked_ino); 63 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 64 XFS_ERRTAG_ITOBP_INOTOBP))) { 65 if (readahead) { 66 bp->b_flags &= ~XBF_DONE; 67 xfs_buf_ioerror(bp, -EIO); 68 return; 69 } 70 71 #ifdef DEBUG 72 xfs_alert(mp, 73 "bad inode magic/vsn daddr %lld #%d (magic=%x)", 74 (unsigned long long)xfs_buf_daddr(bp), i, 75 be16_to_cpu(dip->di_magic)); 76 #endif 77 xfs_buf_verifier_error(bp, -EFSCORRUPTED, 78 __func__, dip, sizeof(*dip), 79 NULL); 80 return; 81 } 82 } 83 } 84 85 86 static void 87 xfs_inode_buf_read_verify( 88 struct xfs_buf *bp) 89 { 90 xfs_inode_buf_verify(bp, false); 91 } 92 93 static void 94 xfs_inode_buf_readahead_verify( 95 struct xfs_buf *bp) 96 { 97 xfs_inode_buf_verify(bp, true); 98 } 99 100 static void 101 xfs_inode_buf_write_verify( 102 struct xfs_buf *bp) 103 { 104 xfs_inode_buf_verify(bp, false); 105 } 106 107 const struct xfs_buf_ops xfs_inode_buf_ops = { 108 .name = "xfs_inode", 109 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 110 cpu_to_be16(XFS_DINODE_MAGIC) }, 111 .verify_read = xfs_inode_buf_read_verify, 112 .verify_write = xfs_inode_buf_write_verify, 113 }; 114 115 const struct xfs_buf_ops xfs_inode_buf_ra_ops = { 116 .name = "xfs_inode_ra", 117 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 118 cpu_to_be16(XFS_DINODE_MAGIC) }, 119 .verify_read = xfs_inode_buf_readahead_verify, 120 .verify_write = xfs_inode_buf_write_verify, 121 }; 122 123 124 /* 125 * This routine is called to map an inode to the buffer containing the on-disk 126 * version of the inode. It returns a pointer to the buffer containing the 127 * on-disk inode in the bpp parameter. 128 */ 129 int 130 xfs_imap_to_bp( 131 struct xfs_mount *mp, 132 struct xfs_trans *tp, 133 struct xfs_imap *imap, 134 struct xfs_buf **bpp) 135 { 136 int error; 137 138 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 139 imap->im_len, XBF_UNMAPPED, bpp, &xfs_inode_buf_ops); 140 if (xfs_metadata_is_sick(error)) 141 xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno), 142 XFS_SICK_AG_INODES); 143 return error; 144 } 145 146 static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts) 147 { 148 struct timespec64 tv; 149 uint32_t n; 150 151 tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n)); 152 tv.tv_nsec = n; 153 154 return tv; 155 } 156 157 /* Convert an ondisk timestamp to an incore timestamp. */ 158 struct timespec64 159 xfs_inode_from_disk_ts( 160 struct xfs_dinode *dip, 161 const xfs_timestamp_t ts) 162 { 163 struct timespec64 tv; 164 struct xfs_legacy_timestamp *lts; 165 166 if (xfs_dinode_has_bigtime(dip)) 167 return xfs_inode_decode_bigtime(be64_to_cpu(ts)); 168 169 lts = (struct xfs_legacy_timestamp *)&ts; 170 tv.tv_sec = (int)be32_to_cpu(lts->t_sec); 171 tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec); 172 173 return tv; 174 } 175 176 int 177 xfs_inode_from_disk( 178 struct xfs_inode *ip, 179 struct xfs_dinode *from) 180 { 181 struct inode *inode = VFS_I(ip); 182 int error; 183 xfs_failaddr_t fa; 184 185 ASSERT(ip->i_cowfp == NULL); 186 187 fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from); 188 if (fa) { 189 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from, 190 sizeof(*from), fa); 191 return -EFSCORRUPTED; 192 } 193 194 /* 195 * First get the permanent information that is needed to allocate an 196 * inode. If the inode is unused, mode is zero and we shouldn't mess 197 * with the uninitialized part of it. 198 */ 199 if (!xfs_has_v3inodes(ip->i_mount)) 200 ip->i_flushiter = be16_to_cpu(from->di_flushiter); 201 inode->i_generation = be32_to_cpu(from->di_gen); 202 inode->i_mode = be16_to_cpu(from->di_mode); 203 if (!inode->i_mode) 204 return 0; 205 206 /* 207 * Convert v1 inodes immediately to v2 inode format as this is the 208 * minimum inode version format we support in the rest of the code. 209 * They will also be unconditionally written back to disk as v2 inodes. 210 */ 211 if (unlikely(from->di_version == 1)) { 212 set_nlink(inode, be16_to_cpu(from->di_onlink)); 213 ip->i_projid = 0; 214 } else { 215 set_nlink(inode, be32_to_cpu(from->di_nlink)); 216 ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | 217 be16_to_cpu(from->di_projid_lo); 218 } 219 220 i_uid_write(inode, be32_to_cpu(from->di_uid)); 221 i_gid_write(inode, be32_to_cpu(from->di_gid)); 222 223 /* 224 * Time is signed, so need to convert to signed 32 bit before 225 * storing in inode timestamp which may be 64 bit. Otherwise 226 * a time before epoch is converted to a time long after epoch 227 * on 64 bit systems. 228 */ 229 inode_set_atime_to_ts(inode, 230 xfs_inode_from_disk_ts(from, from->di_atime)); 231 inode_set_mtime_to_ts(inode, 232 xfs_inode_from_disk_ts(from, from->di_mtime)); 233 inode_set_ctime_to_ts(inode, 234 xfs_inode_from_disk_ts(from, from->di_ctime)); 235 236 ip->i_disk_size = be64_to_cpu(from->di_size); 237 ip->i_nblocks = be64_to_cpu(from->di_nblocks); 238 ip->i_extsize = be32_to_cpu(from->di_extsize); 239 ip->i_forkoff = from->di_forkoff; 240 ip->i_diflags = be16_to_cpu(from->di_flags); 241 ip->i_next_unlinked = be32_to_cpu(from->di_next_unlinked); 242 243 if (from->di_dmevmask || from->di_dmstate) 244 xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS); 245 246 if (xfs_has_v3inodes(ip->i_mount)) { 247 inode_set_iversion_queried(inode, 248 be64_to_cpu(from->di_changecount)); 249 ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime); 250 ip->i_diflags2 = be64_to_cpu(from->di_flags2); 251 ip->i_cowextsize = be32_to_cpu(from->di_cowextsize); 252 } 253 254 error = xfs_iformat_data_fork(ip, from); 255 if (error) 256 return error; 257 if (from->di_forkoff) { 258 error = xfs_iformat_attr_fork(ip, from); 259 if (error) 260 goto out_destroy_data_fork; 261 } 262 if (xfs_is_reflink_inode(ip)) 263 xfs_ifork_init_cow(ip); 264 return 0; 265 266 out_destroy_data_fork: 267 xfs_idestroy_fork(&ip->i_df); 268 return error; 269 } 270 271 /* Convert an incore timestamp to an ondisk timestamp. */ 272 static inline xfs_timestamp_t 273 xfs_inode_to_disk_ts( 274 struct xfs_inode *ip, 275 const struct timespec64 tv) 276 { 277 struct xfs_legacy_timestamp *lts; 278 xfs_timestamp_t ts; 279 280 if (xfs_inode_has_bigtime(ip)) 281 return cpu_to_be64(xfs_inode_encode_bigtime(tv)); 282 283 lts = (struct xfs_legacy_timestamp *)&ts; 284 lts->t_sec = cpu_to_be32(tv.tv_sec); 285 lts->t_nsec = cpu_to_be32(tv.tv_nsec); 286 287 return ts; 288 } 289 290 static inline void 291 xfs_inode_to_disk_iext_counters( 292 struct xfs_inode *ip, 293 struct xfs_dinode *to) 294 { 295 if (xfs_inode_has_large_extent_counts(ip)) { 296 to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df)); 297 to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_af)); 298 /* 299 * We might be upgrading the inode to use larger extent counters 300 * than was previously used. Hence zero the unused field. 301 */ 302 to->di_nrext64_pad = cpu_to_be16(0); 303 } else { 304 to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); 305 to->di_anextents = cpu_to_be16(xfs_ifork_nextents(&ip->i_af)); 306 } 307 } 308 309 void 310 xfs_inode_to_disk( 311 struct xfs_inode *ip, 312 struct xfs_dinode *to, 313 xfs_lsn_t lsn) 314 { 315 struct inode *inode = VFS_I(ip); 316 317 to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 318 to->di_onlink = 0; 319 320 to->di_format = xfs_ifork_format(&ip->i_df); 321 to->di_uid = cpu_to_be32(i_uid_read(inode)); 322 to->di_gid = cpu_to_be32(i_gid_read(inode)); 323 to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff); 324 to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16); 325 326 to->di_atime = xfs_inode_to_disk_ts(ip, inode_get_atime(inode)); 327 to->di_mtime = xfs_inode_to_disk_ts(ip, inode_get_mtime(inode)); 328 to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode)); 329 to->di_nlink = cpu_to_be32(inode->i_nlink); 330 to->di_gen = cpu_to_be32(inode->i_generation); 331 to->di_mode = cpu_to_be16(inode->i_mode); 332 333 to->di_size = cpu_to_be64(ip->i_disk_size); 334 to->di_nblocks = cpu_to_be64(ip->i_nblocks); 335 to->di_extsize = cpu_to_be32(ip->i_extsize); 336 to->di_forkoff = ip->i_forkoff; 337 to->di_aformat = xfs_ifork_format(&ip->i_af); 338 to->di_flags = cpu_to_be16(ip->i_diflags); 339 340 if (xfs_has_v3inodes(ip->i_mount)) { 341 to->di_version = 3; 342 to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); 343 to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime); 344 to->di_flags2 = cpu_to_be64(ip->i_diflags2); 345 to->di_cowextsize = cpu_to_be32(ip->i_cowextsize); 346 to->di_ino = cpu_to_be64(ip->i_ino); 347 to->di_lsn = cpu_to_be64(lsn); 348 memset(to->di_pad2, 0, sizeof(to->di_pad2)); 349 uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); 350 to->di_v3_pad = 0; 351 } else { 352 to->di_version = 2; 353 to->di_flushiter = cpu_to_be16(ip->i_flushiter); 354 memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); 355 } 356 357 xfs_inode_to_disk_iext_counters(ip, to); 358 } 359 360 static xfs_failaddr_t 361 xfs_dinode_verify_fork( 362 struct xfs_dinode *dip, 363 struct xfs_mount *mp, 364 int whichfork) 365 { 366 xfs_extnum_t di_nextents; 367 xfs_extnum_t max_extents; 368 mode_t mode = be16_to_cpu(dip->di_mode); 369 uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork); 370 uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork); 371 372 di_nextents = xfs_dfork_nextents(dip, whichfork); 373 374 /* 375 * For fork types that can contain local data, check that the fork 376 * format matches the size of local data contained within the fork. 377 */ 378 if (whichfork == XFS_DATA_FORK) { 379 /* 380 * A directory small enough to fit in the inode must be stored 381 * in local format. The directory sf <-> extents conversion 382 * code updates the directory size accordingly. Directories 383 * being truncated have zero size and are not subject to this 384 * check. 385 */ 386 if (S_ISDIR(mode)) { 387 if (dip->di_size && 388 be64_to_cpu(dip->di_size) <= fork_size && 389 fork_format != XFS_DINODE_FMT_LOCAL) 390 return __this_address; 391 } 392 393 /* 394 * A symlink with a target small enough to fit in the inode can 395 * be stored in extents format if xattrs were added (thus 396 * converting the data fork from shortform to remote format) 397 * and then removed. 398 */ 399 if (S_ISLNK(mode)) { 400 if (be64_to_cpu(dip->di_size) <= fork_size && 401 fork_format != XFS_DINODE_FMT_EXTENTS && 402 fork_format != XFS_DINODE_FMT_LOCAL) 403 return __this_address; 404 } 405 406 /* 407 * For all types, check that when the size says the fork should 408 * be in extent or btree format, the inode isn't claiming to be 409 * in local format. 410 */ 411 if (be64_to_cpu(dip->di_size) > fork_size && 412 fork_format == XFS_DINODE_FMT_LOCAL) 413 return __this_address; 414 } 415 416 switch (fork_format) { 417 case XFS_DINODE_FMT_LOCAL: 418 /* 419 * No local regular files yet. 420 */ 421 if (S_ISREG(mode) && whichfork == XFS_DATA_FORK) 422 return __this_address; 423 if (di_nextents) 424 return __this_address; 425 break; 426 case XFS_DINODE_FMT_EXTENTS: 427 if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) 428 return __this_address; 429 break; 430 case XFS_DINODE_FMT_BTREE: 431 max_extents = xfs_iext_max_nextents( 432 xfs_dinode_has_large_extent_counts(dip), 433 whichfork); 434 if (di_nextents > max_extents) 435 return __this_address; 436 break; 437 default: 438 return __this_address; 439 } 440 return NULL; 441 } 442 443 static xfs_failaddr_t 444 xfs_dinode_verify_forkoff( 445 struct xfs_dinode *dip, 446 struct xfs_mount *mp) 447 { 448 if (!dip->di_forkoff) 449 return NULL; 450 451 switch (dip->di_format) { 452 case XFS_DINODE_FMT_DEV: 453 if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) 454 return __this_address; 455 break; 456 case XFS_DINODE_FMT_LOCAL: /* fall through ... */ 457 case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ 458 case XFS_DINODE_FMT_BTREE: 459 if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3)) 460 return __this_address; 461 break; 462 default: 463 return __this_address; 464 } 465 return NULL; 466 } 467 468 static xfs_failaddr_t 469 xfs_dinode_verify_nrext64( 470 struct xfs_mount *mp, 471 struct xfs_dinode *dip) 472 { 473 if (xfs_dinode_has_large_extent_counts(dip)) { 474 if (!xfs_has_large_extent_counts(mp)) 475 return __this_address; 476 if (dip->di_nrext64_pad != 0) 477 return __this_address; 478 } else if (dip->di_version >= 3) { 479 if (dip->di_v3_pad != 0) 480 return __this_address; 481 } 482 483 return NULL; 484 } 485 486 xfs_failaddr_t 487 xfs_dinode_verify( 488 struct xfs_mount *mp, 489 xfs_ino_t ino, 490 struct xfs_dinode *dip) 491 { 492 xfs_failaddr_t fa; 493 uint16_t mode; 494 uint16_t flags; 495 uint64_t flags2; 496 uint64_t di_size; 497 xfs_extnum_t nextents; 498 xfs_extnum_t naextents; 499 xfs_filblks_t nblocks; 500 501 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 502 return __this_address; 503 504 /* Verify v3 integrity information first */ 505 if (dip->di_version >= 3) { 506 if (!xfs_has_v3inodes(mp)) 507 return __this_address; 508 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 509 XFS_DINODE_CRC_OFF)) 510 return __this_address; 511 if (be64_to_cpu(dip->di_ino) != ino) 512 return __this_address; 513 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 514 return __this_address; 515 } 516 517 /* 518 * Historical note: xfsprogs in the 3.2 era set up its incore inodes to 519 * have di_nlink track the link count, even if the actual filesystem 520 * only supported V1 inodes (i.e. di_onlink). When writing out the 521 * ondisk inode, it would set both the ondisk di_nlink and di_onlink to 522 * the the incore di_nlink value, which is why we cannot check for 523 * di_nlink==0 on a V1 inode. V2/3 inodes would get written out with 524 * di_onlink==0, so we can check that. 525 */ 526 if (dip->di_version >= 2) { 527 if (dip->di_onlink) 528 return __this_address; 529 } 530 531 /* don't allow invalid i_size */ 532 di_size = be64_to_cpu(dip->di_size); 533 if (di_size & (1ULL << 63)) 534 return __this_address; 535 536 mode = be16_to_cpu(dip->di_mode); 537 if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) 538 return __this_address; 539 540 /* 541 * No zero-length symlinks/dirs unless they're unlinked and hence being 542 * inactivated. 543 */ 544 if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) { 545 if (dip->di_version > 1) { 546 if (dip->di_nlink) 547 return __this_address; 548 } else { 549 if (dip->di_onlink) 550 return __this_address; 551 } 552 } 553 554 fa = xfs_dinode_verify_nrext64(mp, dip); 555 if (fa) 556 return fa; 557 558 nextents = xfs_dfork_data_extents(dip); 559 naextents = xfs_dfork_attr_extents(dip); 560 nblocks = be64_to_cpu(dip->di_nblocks); 561 562 /* Fork checks carried over from xfs_iformat_fork */ 563 if (mode && nextents + naextents > nblocks) 564 return __this_address; 565 566 if (nextents + naextents == 0 && nblocks != 0) 567 return __this_address; 568 569 if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) 570 return __this_address; 571 572 if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) 573 return __this_address; 574 575 flags = be16_to_cpu(dip->di_flags); 576 577 if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 578 return __this_address; 579 580 /* check for illegal values of forkoff */ 581 fa = xfs_dinode_verify_forkoff(dip, mp); 582 if (fa) 583 return fa; 584 585 /* Do we have appropriate data fork formats for the mode? */ 586 switch (mode & S_IFMT) { 587 case S_IFIFO: 588 case S_IFCHR: 589 case S_IFBLK: 590 case S_IFSOCK: 591 if (dip->di_format != XFS_DINODE_FMT_DEV) 592 return __this_address; 593 break; 594 case S_IFREG: 595 case S_IFLNK: 596 case S_IFDIR: 597 fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); 598 if (fa) 599 return fa; 600 break; 601 case 0: 602 /* Uninitialized inode ok. */ 603 break; 604 default: 605 return __this_address; 606 } 607 608 if (dip->di_forkoff) { 609 fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); 610 if (fa) 611 return fa; 612 } else { 613 /* 614 * If there is no fork offset, this may be a freshly-made inode 615 * in a new disk cluster, in which case di_aformat is zeroed. 616 * Otherwise, such an inode must be in EXTENTS format; this goes 617 * for freed inodes as well. 618 */ 619 switch (dip->di_aformat) { 620 case 0: 621 case XFS_DINODE_FMT_EXTENTS: 622 break; 623 default: 624 return __this_address; 625 } 626 if (naextents) 627 return __this_address; 628 } 629 630 /* extent size hint validation */ 631 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), 632 mode, flags); 633 if (fa) 634 return fa; 635 636 /* only version 3 or greater inodes are extensively verified here */ 637 if (dip->di_version < 3) 638 return NULL; 639 640 flags2 = be64_to_cpu(dip->di_flags2); 641 642 /* don't allow reflink/cowextsize if we don't have reflink */ 643 if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && 644 !xfs_has_reflink(mp)) 645 return __this_address; 646 647 /* only regular files get reflink */ 648 if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) 649 return __this_address; 650 651 /* don't let reflink and realtime mix */ 652 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) 653 return __this_address; 654 655 /* COW extent size hint validation */ 656 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), 657 mode, flags, flags2); 658 if (fa) 659 return fa; 660 661 /* bigtime iflag can only happen on bigtime filesystems */ 662 if (xfs_dinode_has_bigtime(dip) && 663 !xfs_has_bigtime(mp)) 664 return __this_address; 665 666 return NULL; 667 } 668 669 void 670 xfs_dinode_calc_crc( 671 struct xfs_mount *mp, 672 struct xfs_dinode *dip) 673 { 674 uint32_t crc; 675 676 if (dip->di_version < 3) 677 return; 678 679 ASSERT(xfs_has_crc(mp)); 680 crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, 681 XFS_DINODE_CRC_OFF); 682 dip->di_crc = xfs_end_cksum(crc); 683 } 684 685 /* 686 * Validate di_extsize hint. 687 * 688 * 1. Extent size hint is only valid for directories and regular files. 689 * 2. FS_XFLAG_EXTSIZE is only valid for regular files. 690 * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. 691 * 4. Hint cannot be larger than MAXTEXTLEN. 692 * 5. Can be changed on directories at any time. 693 * 6. Hint value of 0 turns off hints, clears inode flags. 694 * 7. Extent size must be a multiple of the appropriate block size. 695 * For realtime files, this is the rt extent size. 696 * 8. For non-realtime files, the extent size hint must be limited 697 * to half the AG size to avoid alignment extending the extent beyond the 698 * limits of the AG. 699 */ 700 xfs_failaddr_t 701 xfs_inode_validate_extsize( 702 struct xfs_mount *mp, 703 uint32_t extsize, 704 uint16_t mode, 705 uint16_t flags) 706 { 707 bool rt_flag; 708 bool hint_flag; 709 bool inherit_flag; 710 uint32_t extsize_bytes; 711 uint32_t blocksize_bytes; 712 713 rt_flag = (flags & XFS_DIFLAG_REALTIME); 714 hint_flag = (flags & XFS_DIFLAG_EXTSIZE); 715 inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); 716 extsize_bytes = XFS_FSB_TO_B(mp, extsize); 717 718 /* 719 * This comment describes a historic gap in this verifier function. 720 * 721 * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this 722 * function has never checked that the extent size hint is an integer 723 * multiple of the realtime extent size. Since we allow users to set 724 * this combination on non-rt filesystems /and/ to change the rt 725 * extent size when adding a rt device to a filesystem, the net effect 726 * is that users can configure a filesystem anticipating one rt 727 * geometry and change their minds later. Directories do not use the 728 * extent size hint, so this is harmless for them. 729 * 730 * If a directory with a misaligned extent size hint is allowed to 731 * propagate that hint into a new regular realtime file, the result 732 * is that the inode cluster buffer verifier will trigger a corruption 733 * shutdown the next time it is run, because the verifier has always 734 * enforced the alignment rule for regular files. 735 * 736 * Because we allow administrators to set a new rt extent size when 737 * adding a rt section, we cannot add a check to this verifier because 738 * that will result a new source of directory corruption errors when 739 * reading an existing filesystem. Instead, we rely on callers to 740 * decide when alignment checks are appropriate, and fix things up as 741 * needed. 742 */ 743 744 if (rt_flag) 745 blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 746 else 747 blocksize_bytes = mp->m_sb.sb_blocksize; 748 749 if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) 750 return __this_address; 751 752 if (hint_flag && !S_ISREG(mode)) 753 return __this_address; 754 755 if (inherit_flag && !S_ISDIR(mode)) 756 return __this_address; 757 758 if ((hint_flag || inherit_flag) && extsize == 0) 759 return __this_address; 760 761 /* free inodes get flags set to zero but extsize remains */ 762 if (mode && !(hint_flag || inherit_flag) && extsize != 0) 763 return __this_address; 764 765 if (extsize_bytes % blocksize_bytes) 766 return __this_address; 767 768 if (extsize > XFS_MAX_BMBT_EXTLEN) 769 return __this_address; 770 771 if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) 772 return __this_address; 773 774 return NULL; 775 } 776 777 /* 778 * Validate di_cowextsize hint. 779 * 780 * 1. CoW extent size hint can only be set if reflink is enabled on the fs. 781 * The inode does not have to have any shared blocks, but it must be a v3. 782 * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; 783 * for a directory, the hint is propagated to new files. 784 * 3. Can be changed on files & directories at any time. 785 * 4. Hint value of 0 turns off hints, clears inode flags. 786 * 5. Extent size must be a multiple of the appropriate block size. 787 * 6. The extent size hint must be limited to half the AG size to avoid 788 * alignment extending the extent beyond the limits of the AG. 789 */ 790 xfs_failaddr_t 791 xfs_inode_validate_cowextsize( 792 struct xfs_mount *mp, 793 uint32_t cowextsize, 794 uint16_t mode, 795 uint16_t flags, 796 uint64_t flags2) 797 { 798 bool rt_flag; 799 bool hint_flag; 800 uint32_t cowextsize_bytes; 801 802 rt_flag = (flags & XFS_DIFLAG_REALTIME); 803 hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); 804 cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); 805 806 if (hint_flag && !xfs_has_reflink(mp)) 807 return __this_address; 808 809 if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) 810 return __this_address; 811 812 if (hint_flag && cowextsize == 0) 813 return __this_address; 814 815 /* free inodes get flags set to zero but cowextsize remains */ 816 if (mode && !hint_flag && cowextsize != 0) 817 return __this_address; 818 819 if (hint_flag && rt_flag) 820 return __this_address; 821 822 if (cowextsize_bytes % mp->m_sb.sb_blocksize) 823 return __this_address; 824 825 if (cowextsize > XFS_MAX_BMBT_EXTLEN) 826 return __this_address; 827 828 if (cowextsize > mp->m_sb.sb_agblocks / 2) 829 return __this_address; 830 831 return NULL; 832 } 833
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.