~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/xfs/scrub/ialloc_repair.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /*
  3  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
  4  * Author: Darrick J. Wong <djwong@kernel.org>
  5  */
  6 #include "xfs.h"
  7 #include "xfs_fs.h"
  8 #include "xfs_shared.h"
  9 #include "xfs_format.h"
 10 #include "xfs_trans_resv.h"
 11 #include "xfs_mount.h"
 12 #include "xfs_defer.h"
 13 #include "xfs_btree.h"
 14 #include "xfs_btree_staging.h"
 15 #include "xfs_bit.h"
 16 #include "xfs_log_format.h"
 17 #include "xfs_trans.h"
 18 #include "xfs_sb.h"
 19 #include "xfs_inode.h"
 20 #include "xfs_alloc.h"
 21 #include "xfs_ialloc.h"
 22 #include "xfs_ialloc_btree.h"
 23 #include "xfs_icache.h"
 24 #include "xfs_rmap.h"
 25 #include "xfs_rmap_btree.h"
 26 #include "xfs_log.h"
 27 #include "xfs_trans_priv.h"
 28 #include "xfs_error.h"
 29 #include "xfs_health.h"
 30 #include "xfs_ag.h"
 31 #include "scrub/xfs_scrub.h"
 32 #include "scrub/scrub.h"
 33 #include "scrub/common.h"
 34 #include "scrub/btree.h"
 35 #include "scrub/trace.h"
 36 #include "scrub/repair.h"
 37 #include "scrub/bitmap.h"
 38 #include "scrub/agb_bitmap.h"
 39 #include "scrub/xfile.h"
 40 #include "scrub/xfarray.h"
 41 #include "scrub/newbt.h"
 42 #include "scrub/reap.h"
 43 
 44 /*
 45  * Inode Btree Repair
 46  * ==================
 47  *
 48  * A quick refresher of inode btrees on a v5 filesystem:
 49  *
 50  * - Inode records are read into memory in units of 'inode clusters'.  However
 51  *   many inodes fit in a cluster buffer is the smallest number of inodes that
 52  *   can be allocated or freed.  Clusters are never smaller than one fs block
 53  *   though they can span multiple blocks.  The size (in fs blocks) is
 54  *   computed with xfs_icluster_size_fsb().  The fs block alignment of a
 55  *   cluster is computed with xfs_ialloc_cluster_alignment().
 56  *
 57  * - Each inode btree record can describe a single 'inode chunk'.  The chunk
 58  *   size is defined to be 64 inodes.  If sparse inodes are enabled, every
 59  *   inobt record must be aligned to the chunk size; if not, every record must
 60  *   be aligned to the start of a cluster.  It is possible to construct an XFS
 61  *   geometry where one inobt record maps to multiple inode clusters; it is
 62  *   also possible to construct a geometry where multiple inobt records map to
 63  *   different parts of one inode cluster.
 64  *
 65  * - If sparse inodes are not enabled, the smallest unit of allocation for
 66  *   inode records is enough to contain one inode chunk's worth of inodes.
 67  *
 68  * - If sparse inodes are enabled, the holemask field will be active.  Each
 69  *   bit of the holemask represents 4 potential inodes; if set, the
 70  *   corresponding space does *not* contain inodes and must be left alone.
 71  *   Clusters cannot be smaller than 4 inodes.  The smallest unit of allocation
 72  *   of inode records is one inode cluster.
 73  *
 74  * So what's the rebuild algorithm?
 75  *
 76  * Iterate the reverse mapping records looking for OWN_INODES and OWN_INOBT
 77  * records.  The OWN_INOBT records are the old inode btree blocks and will be
 78  * cleared out after we've rebuilt the tree.  Each possible inode cluster
 79  * within an OWN_INODES record will be read in; for each possible inobt record
 80  * associated with that cluster, compute the freemask calculated from the
 81  * i_mode data in the inode chunk.  For sparse inodes the holemask will be
 82  * calculated by creating the properly aligned inobt record and punching out
 83  * any chunk that's missing.  Inode allocations and frees grab the AGI first,
 84  * so repair protects itself from concurrent access by locking the AGI.
 85  *
 86  * Once we've reconstructed all the inode records, we can create new inode
 87  * btree roots and reload the btrees.  We rebuild both inode trees at the same
 88  * time because they have the same rmap owner and it would be more complex to
 89  * figure out if the other tree isn't in need of a rebuild and which OWN_INOBT
 90  * blocks it owns.  We have all the data we need to build both, so dump
 91  * everything and start over.
 92  *
 93  * We use the prefix 'xrep_ibt' because we rebuild both inode btrees at once.
 94  */
 95 
 96 struct xrep_ibt {
 97         /* Record under construction. */
 98         struct xfs_inobt_rec_incore     rie;
 99 
100         /* new inobt information */
101         struct xrep_newbt       new_inobt;
102 
103         /* new finobt information */
104         struct xrep_newbt       new_finobt;
105 
106         /* Old inode btree blocks we found in the rmap. */
107         struct xagb_bitmap      old_iallocbt_blocks;
108 
109         /* Reconstructed inode records. */
110         struct xfarray          *inode_records;
111 
112         struct xfs_scrub        *sc;
113 
114         /* Number of inodes assigned disk space. */
115         unsigned int            icount;
116 
117         /* Number of inodes in use. */
118         unsigned int            iused;
119 
120         /* Number of finobt records needed. */
121         unsigned int            finobt_recs;
122 
123         /* get_records()'s position in the inode record array. */
124         xfarray_idx_t           array_cur;
125 };
126 
127 /*
128  * Is this inode in use?  If the inode is in memory we can tell from i_mode,
129  * otherwise we have to check di_mode in the on-disk buffer.  We only care
130  * that the high (i.e. non-permission) bits of _mode are zero.  This should be
131  * safe because repair keeps all AG headers locked until the end, and process
132  * trying to perform an inode allocation/free must lock the AGI.
133  *
134  * @cluster_ag_base is the inode offset of the cluster within the AG.
135  * @cluster_bp is the cluster buffer.
136  * @cluster_index is the inode offset within the inode cluster.
137  */
138 STATIC int
139 xrep_ibt_check_ifree(
140         struct xrep_ibt         *ri,
141         xfs_agino_t             cluster_ag_base,
142         struct xfs_buf          *cluster_bp,
143         unsigned int            cluster_index,
144         bool                    *inuse)
145 {
146         struct xfs_scrub        *sc = ri->sc;
147         struct xfs_mount        *mp = sc->mp;
148         struct xfs_dinode       *dip;
149         xfs_ino_t               fsino;
150         xfs_agino_t             agino;
151         xfs_agnumber_t          agno = ri->sc->sa.pag->pag_agno;
152         unsigned int            cluster_buf_base;
153         unsigned int            offset;
154         int                     error;
155 
156         agino = cluster_ag_base + cluster_index;
157         fsino = XFS_AGINO_TO_INO(mp, agno, agino);
158 
159         /* Inode uncached or half assembled, read disk buffer */
160         cluster_buf_base = XFS_INO_TO_OFFSET(mp, cluster_ag_base);
161         offset = (cluster_buf_base + cluster_index) * mp->m_sb.sb_inodesize;
162         if (offset >= BBTOB(cluster_bp->b_length))
163                 return -EFSCORRUPTED;
164         dip = xfs_buf_offset(cluster_bp, offset);
165         if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)
166                 return -EFSCORRUPTED;
167 
168         if (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino)
169                 return -EFSCORRUPTED;
170 
171         /* Will the in-core inode tell us if it's in use? */
172         error = xchk_inode_is_allocated(sc, agino, inuse);
173         if (!error)
174                 return 0;
175 
176         *inuse = dip->di_mode != 0;
177         return 0;
178 }
179 
180 /* Stash the accumulated inobt record for rebuilding. */
181 STATIC int
182 xrep_ibt_stash(
183         struct xrep_ibt         *ri)
184 {
185         int                     error = 0;
186 
187         if (xchk_should_terminate(ri->sc, &error))
188                 return error;
189 
190         ri->rie.ir_freecount = xfs_inobt_rec_freecount(&ri->rie);
191         if (xfs_inobt_check_irec(ri->sc->sa.pag, &ri->rie) != NULL)
192                 return -EFSCORRUPTED;
193 
194         if (ri->rie.ir_freecount > 0)
195                 ri->finobt_recs++;
196 
197         trace_xrep_ibt_found(ri->sc->mp, ri->sc->sa.pag->pag_agno, &ri->rie);
198 
199         error = xfarray_append(ri->inode_records, &ri->rie);
200         if (error)
201                 return error;
202 
203         ri->rie.ir_startino = NULLAGINO;
204         return 0;
205 }
206 
207 /*
208  * Given an extent of inodes and an inode cluster buffer, calculate the
209  * location of the corresponding inobt record (creating it if necessary),
210  * then update the parts of the holemask and freemask of that record that
211  * correspond to the inode extent we were given.
212  *
213  * @cluster_ir_startino is the AG inode number of an inobt record that we're
214  * proposing to create for this inode cluster.  If sparse inodes are enabled,
215  * we must round down to a chunk boundary to find the actual sparse record.
216  * @cluster_bp is the buffer of the inode cluster.
217  * @nr_inodes is the number of inodes to check from the cluster.
218  */
219 STATIC int
220 xrep_ibt_cluster_record(
221         struct xrep_ibt         *ri,
222         xfs_agino_t             cluster_ir_startino,
223         struct xfs_buf          *cluster_bp,
224         unsigned int            nr_inodes)
225 {
226         struct xfs_scrub        *sc = ri->sc;
227         struct xfs_mount        *mp = sc->mp;
228         xfs_agino_t             ir_startino;
229         unsigned int            cluster_base;
230         unsigned int            cluster_index;
231         int                     error = 0;
232 
233         ir_startino = cluster_ir_startino;
234         if (xfs_has_sparseinodes(mp))
235                 ir_startino = rounddown(ir_startino, XFS_INODES_PER_CHUNK);
236         cluster_base = cluster_ir_startino - ir_startino;
237 
238         /*
239          * If the accumulated inobt record doesn't map this cluster, add it to
240          * the list and reset it.
241          */
242         if (ri->rie.ir_startino != NULLAGINO &&
243             ri->rie.ir_startino + XFS_INODES_PER_CHUNK <= ir_startino) {
244                 error = xrep_ibt_stash(ri);
245                 if (error)
246                         return error;
247         }
248 
249         if (ri->rie.ir_startino == NULLAGINO) {
250                 ri->rie.ir_startino = ir_startino;
251                 ri->rie.ir_free = XFS_INOBT_ALL_FREE;
252                 ri->rie.ir_holemask = 0xFFFF;
253                 ri->rie.ir_count = 0;
254         }
255 
256         /* Record the whole cluster. */
257         ri->icount += nr_inodes;
258         ri->rie.ir_count += nr_inodes;
259         ri->rie.ir_holemask &= ~xfs_inobt_maskn(
260                                 cluster_base / XFS_INODES_PER_HOLEMASK_BIT,
261                                 nr_inodes / XFS_INODES_PER_HOLEMASK_BIT);
262 
263         /* Which inodes within this cluster are free? */
264         for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) {
265                 bool            inuse = false;
266 
267                 error = xrep_ibt_check_ifree(ri, cluster_ir_startino,
268                                 cluster_bp, cluster_index, &inuse);
269                 if (error)
270                         return error;
271                 if (!inuse)
272                         continue;
273                 ri->iused++;
274                 ri->rie.ir_free &= ~XFS_INOBT_MASK(cluster_base +
275                                                    cluster_index);
276         }
277         return 0;
278 }
279 
280 /*
281  * For each inode cluster covering the physical extent recorded by the rmapbt,
282  * we must calculate the properly aligned startino of that cluster, then
283  * iterate each cluster to fill in used and filled masks appropriately.  We
284  * then use the (startino, used, filled) information to construct the
285  * appropriate inode records.
286  */
287 STATIC int
288 xrep_ibt_process_cluster(
289         struct xrep_ibt         *ri,
290         xfs_agblock_t           cluster_bno)
291 {
292         struct xfs_imap         imap;
293         struct xfs_buf          *cluster_bp;
294         struct xfs_scrub        *sc = ri->sc;
295         struct xfs_mount        *mp = sc->mp;
296         struct xfs_ino_geometry *igeo = M_IGEO(mp);
297         xfs_agino_t             cluster_ag_base;
298         xfs_agino_t             irec_index;
299         unsigned int            nr_inodes;
300         int                     error;
301 
302         nr_inodes = min_t(unsigned int, igeo->inodes_per_cluster,
303                         XFS_INODES_PER_CHUNK);
304 
305         /*
306          * Grab the inode cluster buffer.  This is safe to do with a broken
307          * inobt because imap_to_bp directly maps the buffer without touching
308          * either inode btree.
309          */
310         imap.im_blkno = XFS_AGB_TO_DADDR(mp, sc->sa.pag->pag_agno, cluster_bno);
311         imap.im_len = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
312         imap.im_boffset = 0;
313         error = xfs_imap_to_bp(mp, sc->tp, &imap, &cluster_bp);
314         if (error)
315                 return error;
316 
317         /*
318          * Record the contents of each possible inobt record mapping this
319          * cluster.
320          */
321         cluster_ag_base = XFS_AGB_TO_AGINO(mp, cluster_bno);
322         for (irec_index = 0;
323              irec_index < igeo->inodes_per_cluster;
324              irec_index += XFS_INODES_PER_CHUNK) {
325                 error = xrep_ibt_cluster_record(ri,
326                                 cluster_ag_base + irec_index, cluster_bp,
327                                 nr_inodes);
328                 if (error)
329                         break;
330 
331         }
332 
333         xfs_trans_brelse(sc->tp, cluster_bp);
334         return error;
335 }
336 
337 /* Check for any obvious conflicts in the inode chunk extent. */
338 STATIC int
339 xrep_ibt_check_inode_ext(
340         struct xfs_scrub        *sc,
341         xfs_agblock_t           agbno,
342         xfs_extlen_t            len)
343 {
344         struct xfs_mount        *mp = sc->mp;
345         struct xfs_ino_geometry *igeo = M_IGEO(mp);
346         xfs_agino_t             agino;
347         enum xbtree_recpacking  outcome;
348         int                     error;
349 
350         /* Inode records must be within the AG. */
351         if (!xfs_verify_agbext(sc->sa.pag, agbno, len))
352                 return -EFSCORRUPTED;
353 
354         /* The entire record must align to the inode cluster size. */
355         if (!IS_ALIGNED(agbno, igeo->blocks_per_cluster) ||
356             !IS_ALIGNED(agbno + len, igeo->blocks_per_cluster))
357                 return -EFSCORRUPTED;
358 
359         /*
360          * The entire record must also adhere to the inode cluster alignment
361          * size if sparse inodes are not enabled.
362          */
363         if (!xfs_has_sparseinodes(mp) &&
364             (!IS_ALIGNED(agbno, igeo->cluster_align) ||
365              !IS_ALIGNED(agbno + len, igeo->cluster_align)))
366                 return -EFSCORRUPTED;
367 
368         /*
369          * On a sparse inode fs, this cluster could be part of a sparse chunk.
370          * Sparse clusters must be aligned to sparse chunk alignment.
371          */
372         if (xfs_has_sparseinodes(mp) && mp->m_sb.sb_spino_align &&
373             (!IS_ALIGNED(agbno, mp->m_sb.sb_spino_align) ||
374              !IS_ALIGNED(agbno + len, mp->m_sb.sb_spino_align)))
375                 return -EFSCORRUPTED;
376 
377         /* Make sure the entire range of blocks are valid AG inodes. */
378         agino = XFS_AGB_TO_AGINO(mp, agbno);
379         if (!xfs_verify_agino(sc->sa.pag, agino))
380                 return -EFSCORRUPTED;
381 
382         agino = XFS_AGB_TO_AGINO(mp, agbno + len) - 1;
383         if (!xfs_verify_agino(sc->sa.pag, agino))
384                 return -EFSCORRUPTED;
385 
386         /* Make sure this isn't free space. */
387         error = xfs_alloc_has_records(sc->sa.bno_cur, agbno, len, &outcome);
388         if (error)
389                 return error;
390         if (outcome != XBTREE_RECPACKING_EMPTY)
391                 return -EFSCORRUPTED;
392 
393         return 0;
394 }
395 
396 /* Found a fragment of the old inode btrees; dispose of them later. */
397 STATIC int
398 xrep_ibt_record_old_btree_blocks(
399         struct xrep_ibt                 *ri,
400         const struct xfs_rmap_irec      *rec)
401 {
402         if (!xfs_verify_agbext(ri->sc->sa.pag, rec->rm_startblock,
403                                 rec->rm_blockcount))
404                 return -EFSCORRUPTED;
405 
406         return xagb_bitmap_set(&ri->old_iallocbt_blocks, rec->rm_startblock,
407                         rec->rm_blockcount);
408 }
409 
410 /* Record extents that belong to inode cluster blocks. */
411 STATIC int
412 xrep_ibt_record_inode_blocks(
413         struct xrep_ibt                 *ri,
414         const struct xfs_rmap_irec      *rec)
415 {
416         struct xfs_mount                *mp = ri->sc->mp;
417         struct xfs_ino_geometry         *igeo = M_IGEO(mp);
418         xfs_agblock_t                   cluster_base;
419         int                             error;
420 
421         error = xrep_ibt_check_inode_ext(ri->sc, rec->rm_startblock,
422                         rec->rm_blockcount);
423         if (error)
424                 return error;
425 
426         trace_xrep_ibt_walk_rmap(mp, ri->sc->sa.pag->pag_agno,
427                         rec->rm_startblock, rec->rm_blockcount, rec->rm_owner,
428                         rec->rm_offset, rec->rm_flags);
429 
430         /*
431          * Record the free/hole masks for each inode cluster that could be
432          * mapped by this rmap record.
433          */
434         for (cluster_base = 0;
435              cluster_base < rec->rm_blockcount;
436              cluster_base += igeo->blocks_per_cluster) {
437                 error = xrep_ibt_process_cluster(ri,
438                                 rec->rm_startblock + cluster_base);
439                 if (error)
440                         return error;
441         }
442 
443         return 0;
444 }
445 
446 STATIC int
447 xrep_ibt_walk_rmap(
448         struct xfs_btree_cur            *cur,
449         const struct xfs_rmap_irec      *rec,
450         void                            *priv)
451 {
452         struct xrep_ibt                 *ri = priv;
453         int                             error = 0;
454 
455         if (xchk_should_terminate(ri->sc, &error))
456                 return error;
457 
458         switch (rec->rm_owner) {
459         case XFS_RMAP_OWN_INOBT:
460                 return xrep_ibt_record_old_btree_blocks(ri, rec);
461         case XFS_RMAP_OWN_INODES:
462                 return xrep_ibt_record_inode_blocks(ri, rec);
463         }
464         return 0;
465 }
466 
467 /*
468  * Iterate all reverse mappings to find the inodes (OWN_INODES) and the inode
469  * btrees (OWN_INOBT).  Figure out if we have enough free space to reconstruct
470  * the inode btrees.  The caller must clean up the lists if anything goes
471  * wrong.
472  */
473 STATIC int
474 xrep_ibt_find_inodes(
475         struct xrep_ibt         *ri)
476 {
477         struct xfs_scrub        *sc = ri->sc;
478         int                     error;
479 
480         ri->rie.ir_startino = NULLAGINO;
481 
482         /* Collect all reverse mappings for inode blocks. */
483         xrep_ag_btcur_init(sc, &sc->sa);
484         error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_ibt_walk_rmap, ri);
485         xchk_ag_btcur_free(&sc->sa);
486         if (error)
487                 return error;
488 
489         /* If we have a record ready to go, add it to the array. */
490         if (ri->rie.ir_startino != NULLAGINO)
491                 return xrep_ibt_stash(ri);
492 
493         return 0;
494 }
495 
496 /* Update the AGI counters. */
497 STATIC int
498 xrep_ibt_reset_counters(
499         struct xrep_ibt         *ri)
500 {
501         struct xfs_scrub        *sc = ri->sc;
502         struct xfs_agi          *agi = sc->sa.agi_bp->b_addr;
503         unsigned int            freecount = ri->icount - ri->iused;
504 
505         /* Trigger inode count recalculation */
506         xfs_force_summary_recalc(sc->mp);
507 
508         /*
509          * The AGI header contains extra information related to the inode
510          * btrees, so we must update those fields here.
511          */
512         agi->agi_count = cpu_to_be32(ri->icount);
513         agi->agi_freecount = cpu_to_be32(freecount);
514         xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp,
515                            XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
516 
517         /* Reinitialize with the values we just logged. */
518         return xrep_reinit_pagi(sc);
519 }
520 
521 /* Retrieve finobt data for bulk load. */
522 STATIC int
523 xrep_fibt_get_records(
524         struct xfs_btree_cur            *cur,
525         unsigned int                    idx,
526         struct xfs_btree_block          *block,
527         unsigned int                    nr_wanted,
528         void                            *priv)
529 {
530         struct xfs_inobt_rec_incore     *irec = &cur->bc_rec.i;
531         struct xrep_ibt                 *ri = priv;
532         union xfs_btree_rec             *block_rec;
533         unsigned int                    loaded;
534         int                             error;
535 
536         for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
537                 do {
538                         error = xfarray_load(ri->inode_records,
539                                         ri->array_cur++, irec);
540                 } while (error == 0 && xfs_inobt_rec_freecount(irec) == 0);
541                 if (error)
542                         return error;
543 
544                 block_rec = xfs_btree_rec_addr(cur, idx, block);
545                 cur->bc_ops->init_rec_from_cur(cur, block_rec);
546         }
547 
548         return loaded;
549 }
550 
551 /* Retrieve inobt data for bulk load. */
552 STATIC int
553 xrep_ibt_get_records(
554         struct xfs_btree_cur            *cur,
555         unsigned int                    idx,
556         struct xfs_btree_block          *block,
557         unsigned int                    nr_wanted,
558         void                            *priv)
559 {
560         struct xfs_inobt_rec_incore     *irec = &cur->bc_rec.i;
561         struct xrep_ibt                 *ri = priv;
562         union xfs_btree_rec             *block_rec;
563         unsigned int                    loaded;
564         int                             error;
565 
566         for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
567                 error = xfarray_load(ri->inode_records, ri->array_cur++, irec);
568                 if (error)
569                         return error;
570 
571                 block_rec = xfs_btree_rec_addr(cur, idx, block);
572                 cur->bc_ops->init_rec_from_cur(cur, block_rec);
573         }
574 
575         return loaded;
576 }
577 
578 /* Feed one of the new inobt blocks to the bulk loader. */
579 STATIC int
580 xrep_ibt_claim_block(
581         struct xfs_btree_cur    *cur,
582         union xfs_btree_ptr     *ptr,
583         void                    *priv)
584 {
585         struct xrep_ibt         *ri = priv;
586 
587         return xrep_newbt_claim_block(cur, &ri->new_inobt, ptr);
588 }
589 
590 /* Feed one of the new finobt blocks to the bulk loader. */
591 STATIC int
592 xrep_fibt_claim_block(
593         struct xfs_btree_cur    *cur,
594         union xfs_btree_ptr     *ptr,
595         void                    *priv)
596 {
597         struct xrep_ibt         *ri = priv;
598 
599         return xrep_newbt_claim_block(cur, &ri->new_finobt, ptr);
600 }
601 
602 /* Make sure the records do not overlap in inumber address space. */
603 STATIC int
604 xrep_ibt_check_overlap(
605         struct xrep_ibt                 *ri)
606 {
607         struct xfs_inobt_rec_incore     irec;
608         xfarray_idx_t                   cur;
609         xfs_agino_t                     next_agino = 0;
610         int                             error = 0;
611 
612         foreach_xfarray_idx(ri->inode_records, cur) {
613                 if (xchk_should_terminate(ri->sc, &error))
614                         return error;
615 
616                 error = xfarray_load(ri->inode_records, cur, &irec);
617                 if (error)
618                         return error;
619 
620                 if (irec.ir_startino < next_agino)
621                         return -EFSCORRUPTED;
622 
623                 next_agino = irec.ir_startino + XFS_INODES_PER_CHUNK;
624         }
625 
626         return error;
627 }
628 
629 /* Build new inode btrees and dispose of the old one. */
630 STATIC int
631 xrep_ibt_build_new_trees(
632         struct xrep_ibt         *ri)
633 {
634         struct xfs_scrub        *sc = ri->sc;
635         struct xfs_btree_cur    *ino_cur;
636         struct xfs_btree_cur    *fino_cur = NULL;
637         xfs_fsblock_t           fsbno;
638         bool                    need_finobt;
639         int                     error;
640 
641         need_finobt = xfs_has_finobt(sc->mp);
642 
643         /*
644          * Create new btrees for staging all the inobt records we collected
645          * earlier.  The records were collected in order of increasing agino,
646          * so we do not have to sort them.  Ensure there are no overlapping
647          * records.
648          */
649         error = xrep_ibt_check_overlap(ri);
650         if (error)
651                 return error;
652 
653         /*
654          * The new inode btrees will not be rooted in the AGI until we've
655          * successfully rebuilt the tree.
656          *
657          * Start by setting up the inobt staging cursor.
658          */
659         fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
660                         XFS_IBT_BLOCK(sc->mp)),
661         xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, fsbno,
662                         XFS_AG_RESV_NONE);
663         ri->new_inobt.bload.claim_block = xrep_ibt_claim_block;
664         ri->new_inobt.bload.get_records = xrep_ibt_get_records;
665 
666         ino_cur = xfs_inobt_init_cursor(sc->sa.pag, NULL, NULL);
667         xfs_btree_stage_afakeroot(ino_cur, &ri->new_inobt.afake);
668         error = xfs_btree_bload_compute_geometry(ino_cur, &ri->new_inobt.bload,
669                         xfarray_length(ri->inode_records));
670         if (error)
671                 goto err_inocur;
672 
673         /* Set up finobt staging cursor. */
674         if (need_finobt) {
675                 enum xfs_ag_resv_type   resv = XFS_AG_RESV_METADATA;
676 
677                 if (sc->mp->m_finobt_nores)
678                         resv = XFS_AG_RESV_NONE;
679 
680                 fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
681                                 XFS_FIBT_BLOCK(sc->mp)),
682                 xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT,
683                                 fsbno, resv);
684                 ri->new_finobt.bload.claim_block = xrep_fibt_claim_block;
685                 ri->new_finobt.bload.get_records = xrep_fibt_get_records;
686 
687                 fino_cur = xfs_finobt_init_cursor(sc->sa.pag, NULL, NULL);
688                 xfs_btree_stage_afakeroot(fino_cur, &ri->new_finobt.afake);
689                 error = xfs_btree_bload_compute_geometry(fino_cur,
690                                 &ri->new_finobt.bload, ri->finobt_recs);
691                 if (error)
692                         goto err_finocur;
693         }
694 
695         /* Last chance to abort before we start committing fixes. */
696         if (xchk_should_terminate(sc, &error))
697                 goto err_finocur;
698 
699         /* Reserve all the space we need to build the new btrees. */
700         error = xrep_newbt_alloc_blocks(&ri->new_inobt,
701                         ri->new_inobt.bload.nr_blocks);
702         if (error)
703                 goto err_finocur;
704 
705         if (need_finobt) {
706                 error = xrep_newbt_alloc_blocks(&ri->new_finobt,
707                                 ri->new_finobt.bload.nr_blocks);
708                 if (error)
709                         goto err_finocur;
710         }
711 
712         /* Add all inobt records. */
713         ri->array_cur = XFARRAY_CURSOR_INIT;
714         error = xfs_btree_bload(ino_cur, &ri->new_inobt.bload, ri);
715         if (error)
716                 goto err_finocur;
717 
718         /* Add all finobt records. */
719         if (need_finobt) {
720                 ri->array_cur = XFARRAY_CURSOR_INIT;
721                 error = xfs_btree_bload(fino_cur, &ri->new_finobt.bload, ri);
722                 if (error)
723                         goto err_finocur;
724         }
725 
726         /*
727          * Install the new btrees in the AG header.  After this point the old
728          * btrees are no longer accessible and the new trees are live.
729          */
730         xfs_inobt_commit_staged_btree(ino_cur, sc->tp, sc->sa.agi_bp);
731         xfs_btree_del_cursor(ino_cur, 0);
732 
733         if (fino_cur) {
734                 xfs_inobt_commit_staged_btree(fino_cur, sc->tp, sc->sa.agi_bp);
735                 xfs_btree_del_cursor(fino_cur, 0);
736         }
737 
738         /* Reset the AGI counters now that we've changed the inode roots. */
739         error = xrep_ibt_reset_counters(ri);
740         if (error)
741                 goto err_finobt;
742 
743         /* Free unused blocks and bitmap. */
744         if (need_finobt) {
745                 error = xrep_newbt_commit(&ri->new_finobt);
746                 if (error)
747                         goto err_inobt;
748         }
749         error = xrep_newbt_commit(&ri->new_inobt);
750         if (error)
751                 return error;
752 
753         return xrep_roll_ag_trans(sc);
754 
755 err_finocur:
756         if (need_finobt)
757                 xfs_btree_del_cursor(fino_cur, error);
758 err_inocur:
759         xfs_btree_del_cursor(ino_cur, error);
760 err_finobt:
761         if (need_finobt)
762                 xrep_newbt_cancel(&ri->new_finobt);
763 err_inobt:
764         xrep_newbt_cancel(&ri->new_inobt);
765         return error;
766 }
767 
768 /*
769  * Now that we've logged the roots of the new btrees, invalidate all of the
770  * old blocks and free them.
771  */
772 STATIC int
773 xrep_ibt_remove_old_trees(
774         struct xrep_ibt         *ri)
775 {
776         struct xfs_scrub        *sc = ri->sc;
777         int                     error;
778 
779         /*
780          * Free the old inode btree blocks if they're not in use.  It's ok to
781          * reap with XFS_AG_RESV_NONE even if the finobt had a per-AG
782          * reservation because we reset the reservation before releasing the
783          * AGI and AGF header buffer locks.
784          */
785         error = xrep_reap_agblocks(sc, &ri->old_iallocbt_blocks,
786                         &XFS_RMAP_OINFO_INOBT, XFS_AG_RESV_NONE);
787         if (error)
788                 return error;
789 
790         /*
791          * If the finobt is enabled and has a per-AG reservation, make sure we
792          * reinitialize the per-AG reservations.
793          */
794         if (xfs_has_finobt(sc->mp) && !sc->mp->m_finobt_nores)
795                 sc->flags |= XREP_RESET_PERAG_RESV;
796 
797         return 0;
798 }
799 
800 /* Repair both inode btrees. */
801 int
802 xrep_iallocbt(
803         struct xfs_scrub        *sc)
804 {
805         struct xrep_ibt         *ri;
806         struct xfs_mount        *mp = sc->mp;
807         char                    *descr;
808         xfs_agino_t             first_agino, last_agino;
809         int                     error = 0;
810 
811         /* We require the rmapbt to rebuild anything. */
812         if (!xfs_has_rmapbt(mp))
813                 return -EOPNOTSUPP;
814 
815         ri = kzalloc(sizeof(struct xrep_ibt), XCHK_GFP_FLAGS);
816         if (!ri)
817                 return -ENOMEM;
818         ri->sc = sc;
819 
820         /* We rebuild both inode btrees. */
821         sc->sick_mask = XFS_SICK_AG_INOBT | XFS_SICK_AG_FINOBT;
822 
823         /* Set up enough storage to handle an AG with nothing but inodes. */
824         xfs_agino_range(mp, sc->sa.pag->pag_agno, &first_agino, &last_agino);
825         last_agino /= XFS_INODES_PER_CHUNK;
826         descr = xchk_xfile_ag_descr(sc, "inode index records");
827         error = xfarray_create(descr, last_agino,
828                         sizeof(struct xfs_inobt_rec_incore),
829                         &ri->inode_records);
830         kfree(descr);
831         if (error)
832                 goto out_ri;
833 
834         /* Collect the inode data and find the old btree blocks. */
835         xagb_bitmap_init(&ri->old_iallocbt_blocks);
836         error = xrep_ibt_find_inodes(ri);
837         if (error)
838                 goto out_bitmap;
839 
840         /* Rebuild the inode indexes. */
841         error = xrep_ibt_build_new_trees(ri);
842         if (error)
843                 goto out_bitmap;
844 
845         /* Kill the old tree. */
846         error = xrep_ibt_remove_old_trees(ri);
847         if (error)
848                 goto out_bitmap;
849 
850 out_bitmap:
851         xagb_bitmap_destroy(&ri->old_iallocbt_blocks);
852         xfarray_destroy(ri->inode_records);
853 out_ri:
854         kfree(ri);
855         return error;
856 }
857 
858 /* Make sure both btrees are ok after we've rebuilt them. */
859 int
860 xrep_revalidate_iallocbt(
861         struct xfs_scrub        *sc)
862 {
863         __u32                   old_type = sc->sm->sm_type;
864         int                     error;
865 
866         /*
867          * We must update sm_type temporarily so that the tree-to-tree cross
868          * reference checks will work in the correct direction, and also so
869          * that tracing will report correctly if there are more errors.
870          */
871         sc->sm->sm_type = XFS_SCRUB_TYPE_INOBT;
872         error = xchk_iallocbt(sc);
873         if (error)
874                 goto out;
875 
876         if (xfs_has_finobt(sc->mp)) {
877                 sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT;
878                 error = xchk_iallocbt(sc);
879         }
880 
881 out:
882         sc->sm->sm_type = old_type;
883         return error;
884 }
885 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php