1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_inode.h" 14 #include "xfs_btree.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_ialloc_btree.h" 17 #include "xfs_iwalk.h" 18 #include "xfs_itable.h" 19 #include "xfs_error.h" 20 #include "xfs_icache.h" 21 #include "xfs_health.h" 22 #include "xfs_trans.h" 23 24 /* 25 * Bulk Stat 26 * ========= 27 * 28 * Use the inode walking functions to fill out struct xfs_bulkstat for every 29 * allocated inode, then pass the stat information to some externally provided 30 * iteration function. 31 */ 32 33 struct xfs_bstat_chunk { 34 bulkstat_one_fmt_pf formatter; 35 struct xfs_ibulk *breq; 36 struct xfs_bulkstat *buf; 37 }; 38 39 /* 40 * Fill out the bulkstat info for a single inode and report it somewhere. 41 * 42 * bc->breq->lastino is effectively the inode cursor as we walk through the 43 * filesystem. Therefore, we update it any time we need to move the cursor 44 * forward, regardless of whether or not we're sending any bstat information 45 * back to userspace. If the inode is internal metadata or, has been freed 46 * out from under us, we just simply keep going. 47 * 48 * However, if any other type of error happens we want to stop right where we 49 * are so that userspace will call back with exact number of the bad inode and 50 * we can send back an error code. 51 * 52 * Note that if the formatter tells us there's no space left in the buffer we 53 * move the cursor forward and abort the walk. 54 */ 55 STATIC int 56 xfs_bulkstat_one_int( 57 struct xfs_mount *mp, 58 struct mnt_idmap *idmap, 59 struct xfs_trans *tp, 60 xfs_ino_t ino, 61 struct xfs_bstat_chunk *bc) 62 { 63 struct user_namespace *sb_userns = mp->m_super->s_user_ns; 64 struct xfs_inode *ip; /* incore inode pointer */ 65 struct inode *inode; 66 struct xfs_bulkstat *buf = bc->buf; 67 xfs_extnum_t nextents; 68 int error = -EINVAL; 69 vfsuid_t vfsuid; 70 vfsgid_t vfsgid; 71 72 if (xfs_internal_inum(mp, ino)) 73 goto out_advance; 74 75 error = xfs_iget(mp, tp, ino, 76 (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), 77 XFS_ILOCK_SHARED, &ip); 78 if (error == -ENOENT || error == -EINVAL) 79 goto out_advance; 80 if (error) 81 goto out; 82 83 /* Reload the incore unlinked list to avoid failure in inodegc. */ 84 if (xfs_inode_unlinked_incomplete(ip)) { 85 error = xfs_inode_reload_unlinked_bucket(tp, ip); 86 if (error) { 87 xfs_iunlock(ip, XFS_ILOCK_SHARED); 88 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 89 xfs_irele(ip); 90 return error; 91 } 92 } 93 94 ASSERT(ip != NULL); 95 ASSERT(ip->i_imap.im_blkno != 0); 96 inode = VFS_I(ip); 97 vfsuid = i_uid_into_vfsuid(idmap, inode); 98 vfsgid = i_gid_into_vfsgid(idmap, inode); 99 100 /* If this is a private inode, don't leak its details to userspace. */ 101 if (IS_PRIVATE(inode)) { 102 xfs_iunlock(ip, XFS_ILOCK_SHARED); 103 xfs_irele(ip); 104 error = -EINVAL; 105 goto out_advance; 106 } 107 108 /* xfs_iget returns the following without needing 109 * further change. 110 */ 111 buf->bs_projectid = ip->i_projid; 112 buf->bs_ino = ino; 113 buf->bs_uid = from_kuid(sb_userns, vfsuid_into_kuid(vfsuid)); 114 buf->bs_gid = from_kgid(sb_userns, vfsgid_into_kgid(vfsgid)); 115 buf->bs_size = ip->i_disk_size; 116 117 buf->bs_nlink = inode->i_nlink; 118 buf->bs_atime = inode_get_atime_sec(inode); 119 buf->bs_atime_nsec = inode_get_atime_nsec(inode); 120 buf->bs_mtime = inode_get_mtime_sec(inode); 121 buf->bs_mtime_nsec = inode_get_mtime_nsec(inode); 122 buf->bs_ctime = inode_get_ctime_sec(inode); 123 buf->bs_ctime_nsec = inode_get_ctime_nsec(inode); 124 buf->bs_gen = inode->i_generation; 125 buf->bs_mode = inode->i_mode; 126 127 buf->bs_xflags = xfs_ip2xflags(ip); 128 buf->bs_extsize_blks = ip->i_extsize; 129 130 nextents = xfs_ifork_nextents(&ip->i_df); 131 if (!(bc->breq->flags & XFS_IBULK_NREXT64)) 132 buf->bs_extents = min(nextents, XFS_MAX_EXTCNT_DATA_FORK_SMALL); 133 else 134 buf->bs_extents64 = nextents; 135 136 xfs_bulkstat_health(ip, buf); 137 buf->bs_aextents = xfs_ifork_nextents(&ip->i_af); 138 buf->bs_forkoff = xfs_inode_fork_boff(ip); 139 buf->bs_version = XFS_BULKSTAT_VERSION_V5; 140 141 if (xfs_has_v3inodes(mp)) { 142 buf->bs_btime = ip->i_crtime.tv_sec; 143 buf->bs_btime_nsec = ip->i_crtime.tv_nsec; 144 if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) 145 buf->bs_cowextsize_blks = ip->i_cowextsize; 146 } 147 148 switch (ip->i_df.if_format) { 149 case XFS_DINODE_FMT_DEV: 150 buf->bs_rdev = sysv_encode_dev(inode->i_rdev); 151 buf->bs_blksize = BLKDEV_IOSIZE; 152 buf->bs_blocks = 0; 153 break; 154 case XFS_DINODE_FMT_LOCAL: 155 buf->bs_rdev = 0; 156 buf->bs_blksize = mp->m_sb.sb_blocksize; 157 buf->bs_blocks = 0; 158 break; 159 case XFS_DINODE_FMT_EXTENTS: 160 case XFS_DINODE_FMT_BTREE: 161 buf->bs_rdev = 0; 162 buf->bs_blksize = mp->m_sb.sb_blocksize; 163 buf->bs_blocks = ip->i_nblocks + ip->i_delayed_blks; 164 break; 165 } 166 xfs_iunlock(ip, XFS_ILOCK_SHARED); 167 xfs_irele(ip); 168 169 error = bc->formatter(bc->breq, buf); 170 if (error == -ECANCELED) 171 goto out_advance; 172 if (error) 173 goto out; 174 175 out_advance: 176 /* 177 * Advance the cursor to the inode that comes after the one we just 178 * looked at. We want the caller to move along if the bulkstat 179 * information was copied successfully; if we tried to grab the inode 180 * but it's no longer allocated; or if it's internal metadata. 181 */ 182 bc->breq->startino = ino + 1; 183 out: 184 return error; 185 } 186 187 /* Bulkstat a single inode. */ 188 int 189 xfs_bulkstat_one( 190 struct xfs_ibulk *breq, 191 bulkstat_one_fmt_pf formatter) 192 { 193 struct xfs_bstat_chunk bc = { 194 .formatter = formatter, 195 .breq = breq, 196 }; 197 struct xfs_trans *tp; 198 int error; 199 200 if (breq->idmap != &nop_mnt_idmap) { 201 xfs_warn_ratelimited(breq->mp, 202 "bulkstat not supported inside of idmapped mounts."); 203 return -EINVAL; 204 } 205 206 ASSERT(breq->icount == 1); 207 208 bc.buf = kzalloc(sizeof(struct xfs_bulkstat), 209 GFP_KERNEL | __GFP_RETRY_MAYFAIL); 210 if (!bc.buf) 211 return -ENOMEM; 212 213 /* 214 * Grab an empty transaction so that we can use its recursive buffer 215 * locking abilities to detect cycles in the inobt without deadlocking. 216 */ 217 error = xfs_trans_alloc_empty(breq->mp, &tp); 218 if (error) 219 goto out; 220 221 error = xfs_bulkstat_one_int(breq->mp, breq->idmap, tp, 222 breq->startino, &bc); 223 xfs_trans_cancel(tp); 224 out: 225 kfree(bc.buf); 226 227 /* 228 * If we reported one inode to userspace then we abort because we hit 229 * the end of the buffer. Don't leak that back to userspace. 230 */ 231 if (error == -ECANCELED) 232 error = 0; 233 234 return error; 235 } 236 237 static int 238 xfs_bulkstat_iwalk( 239 struct xfs_mount *mp, 240 struct xfs_trans *tp, 241 xfs_ino_t ino, 242 void *data) 243 { 244 struct xfs_bstat_chunk *bc = data; 245 int error; 246 247 error = xfs_bulkstat_one_int(mp, bc->breq->idmap, tp, ino, data); 248 /* bulkstat just skips over missing inodes */ 249 if (error == -ENOENT || error == -EINVAL) 250 return 0; 251 return error; 252 } 253 254 /* 255 * Check the incoming lastino parameter. 256 * 257 * We allow any inode value that could map to physical space inside the 258 * filesystem because if there are no inodes there, bulkstat moves on to the 259 * next chunk. In other words, the magic agino value of zero takes us to the 260 * first chunk in the AG, and an agino value past the end of the AG takes us to 261 * the first chunk in the next AG. 262 * 263 * Therefore we can end early if the requested inode is beyond the end of the 264 * filesystem or doesn't map properly. 265 */ 266 static inline bool 267 xfs_bulkstat_already_done( 268 struct xfs_mount *mp, 269 xfs_ino_t startino) 270 { 271 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); 272 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, startino); 273 274 return agno >= mp->m_sb.sb_agcount || 275 startino != XFS_AGINO_TO_INO(mp, agno, agino); 276 } 277 278 /* Return stat information in bulk (by-inode) for the filesystem. */ 279 int 280 xfs_bulkstat( 281 struct xfs_ibulk *breq, 282 bulkstat_one_fmt_pf formatter) 283 { 284 struct xfs_bstat_chunk bc = { 285 .formatter = formatter, 286 .breq = breq, 287 }; 288 struct xfs_trans *tp; 289 unsigned int iwalk_flags = 0; 290 int error; 291 292 if (breq->idmap != &nop_mnt_idmap) { 293 xfs_warn_ratelimited(breq->mp, 294 "bulkstat not supported inside of idmapped mounts."); 295 return -EINVAL; 296 } 297 if (xfs_bulkstat_already_done(breq->mp, breq->startino)) 298 return 0; 299 300 bc.buf = kzalloc(sizeof(struct xfs_bulkstat), 301 GFP_KERNEL | __GFP_RETRY_MAYFAIL); 302 if (!bc.buf) 303 return -ENOMEM; 304 305 /* 306 * Grab an empty transaction so that we can use its recursive buffer 307 * locking abilities to detect cycles in the inobt without deadlocking. 308 */ 309 error = xfs_trans_alloc_empty(breq->mp, &tp); 310 if (error) 311 goto out; 312 313 if (breq->flags & XFS_IBULK_SAME_AG) 314 iwalk_flags |= XFS_IWALK_SAME_AG; 315 316 error = xfs_iwalk(breq->mp, tp, breq->startino, iwalk_flags, 317 xfs_bulkstat_iwalk, breq->icount, &bc); 318 xfs_trans_cancel(tp); 319 out: 320 kfree(bc.buf); 321 322 /* 323 * We found some inodes, so clear the error status and return them. 324 * The lastino pointer will point directly at the inode that triggered 325 * any error that occurred, so on the next call the error will be 326 * triggered again and propagated to userspace as there will be no 327 * formatted inodes in the buffer. 328 */ 329 if (breq->ocount > 0) 330 error = 0; 331 332 return error; 333 } 334 335 /* Convert bulkstat (v5) to bstat (v1). */ 336 void 337 xfs_bulkstat_to_bstat( 338 struct xfs_mount *mp, 339 struct xfs_bstat *bs1, 340 const struct xfs_bulkstat *bstat) 341 { 342 /* memset is needed here because of padding holes in the structure. */ 343 memset(bs1, 0, sizeof(struct xfs_bstat)); 344 bs1->bs_ino = bstat->bs_ino; 345 bs1->bs_mode = bstat->bs_mode; 346 bs1->bs_nlink = bstat->bs_nlink; 347 bs1->bs_uid = bstat->bs_uid; 348 bs1->bs_gid = bstat->bs_gid; 349 bs1->bs_rdev = bstat->bs_rdev; 350 bs1->bs_blksize = bstat->bs_blksize; 351 bs1->bs_size = bstat->bs_size; 352 bs1->bs_atime.tv_sec = bstat->bs_atime; 353 bs1->bs_mtime.tv_sec = bstat->bs_mtime; 354 bs1->bs_ctime.tv_sec = bstat->bs_ctime; 355 bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec; 356 bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec; 357 bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec; 358 bs1->bs_blocks = bstat->bs_blocks; 359 bs1->bs_xflags = bstat->bs_xflags; 360 bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks); 361 bs1->bs_extents = bstat->bs_extents; 362 bs1->bs_gen = bstat->bs_gen; 363 bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF; 364 bs1->bs_forkoff = bstat->bs_forkoff; 365 bs1->bs_projid_hi = bstat->bs_projectid >> 16; 366 bs1->bs_sick = bstat->bs_sick; 367 bs1->bs_checked = bstat->bs_checked; 368 bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks); 369 bs1->bs_dmevmask = 0; 370 bs1->bs_dmstate = 0; 371 bs1->bs_aextents = bstat->bs_aextents; 372 } 373 374 struct xfs_inumbers_chunk { 375 inumbers_fmt_pf formatter; 376 struct xfs_ibulk *breq; 377 }; 378 379 /* 380 * INUMBERS 381 * ======== 382 * This is how we export inode btree records to userspace, so that XFS tools 383 * can figure out where inodes are allocated. 384 */ 385 386 /* 387 * Format the inode group structure and report it somewhere. 388 * 389 * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk 390 * through the filesystem so we move it forward unless there was a runtime 391 * error. If the formatter tells us the buffer is now full we also move the 392 * cursor forward and abort the walk. 393 */ 394 STATIC int 395 xfs_inumbers_walk( 396 struct xfs_mount *mp, 397 struct xfs_trans *tp, 398 xfs_agnumber_t agno, 399 const struct xfs_inobt_rec_incore *irec, 400 void *data) 401 { 402 struct xfs_inumbers inogrp = { 403 .xi_startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino), 404 .xi_alloccount = irec->ir_count - irec->ir_freecount, 405 .xi_allocmask = ~irec->ir_free, 406 .xi_version = XFS_INUMBERS_VERSION_V5, 407 }; 408 struct xfs_inumbers_chunk *ic = data; 409 int error; 410 411 error = ic->formatter(ic->breq, &inogrp); 412 if (error && error != -ECANCELED) 413 return error; 414 415 ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) + 416 XFS_INODES_PER_CHUNK; 417 return error; 418 } 419 420 /* 421 * Return inode number table for the filesystem. 422 */ 423 int 424 xfs_inumbers( 425 struct xfs_ibulk *breq, 426 inumbers_fmt_pf formatter) 427 { 428 struct xfs_inumbers_chunk ic = { 429 .formatter = formatter, 430 .breq = breq, 431 }; 432 struct xfs_trans *tp; 433 int error = 0; 434 435 if (xfs_bulkstat_already_done(breq->mp, breq->startino)) 436 return 0; 437 438 /* 439 * Grab an empty transaction so that we can use its recursive buffer 440 * locking abilities to detect cycles in the inobt without deadlocking. 441 */ 442 error = xfs_trans_alloc_empty(breq->mp, &tp); 443 if (error) 444 goto out; 445 446 error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags, 447 xfs_inumbers_walk, breq->icount, &ic); 448 xfs_trans_cancel(tp); 449 out: 450 451 /* 452 * We found some inode groups, so clear the error status and return 453 * them. The lastino pointer will point directly at the inode that 454 * triggered any error that occurred, so on the next call the error 455 * will be triggered again and propagated to userspace as there will be 456 * no formatted inode groups in the buffer. 457 */ 458 if (breq->ocount > 0) 459 error = 0; 460 461 return error; 462 } 463 464 /* Convert an inumbers (v5) struct to a inogrp (v1) struct. */ 465 void 466 xfs_inumbers_to_inogrp( 467 struct xfs_inogrp *ig1, 468 const struct xfs_inumbers *ig) 469 { 470 /* memset is needed here because of padding holes in the structure. */ 471 memset(ig1, 0, sizeof(struct xfs_inogrp)); 472 ig1->xi_startino = ig->xi_startino; 473 ig1->xi_alloccount = ig->xi_alloccount; 474 ig1->xi_allocmask = ig->xi_allocmask; 475 } 476
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.