~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/xfs/scrub/attr_repair.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /*
  3  * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
  4  * Author: Darrick J. Wong <djwong@kernel.org>
  5  */
  6 #include "xfs.h"
  7 #include "xfs_fs.h"
  8 #include "xfs_shared.h"
  9 #include "xfs_format.h"
 10 #include "xfs_trans_resv.h"
 11 #include "xfs_mount.h"
 12 #include "xfs_defer.h"
 13 #include "xfs_btree.h"
 14 #include "xfs_bit.h"
 15 #include "xfs_log_format.h"
 16 #include "xfs_trans.h"
 17 #include "xfs_sb.h"
 18 #include "xfs_inode.h"
 19 #include "xfs_da_format.h"
 20 #include "xfs_da_btree.h"
 21 #include "xfs_dir2.h"
 22 #include "xfs_attr.h"
 23 #include "xfs_attr_leaf.h"
 24 #include "xfs_attr_sf.h"
 25 #include "xfs_attr_remote.h"
 26 #include "xfs_bmap.h"
 27 #include "xfs_bmap_util.h"
 28 #include "xfs_exchmaps.h"
 29 #include "xfs_exchrange.h"
 30 #include "xfs_acl.h"
 31 #include "xfs_parent.h"
 32 #include "scrub/xfs_scrub.h"
 33 #include "scrub/scrub.h"
 34 #include "scrub/common.h"
 35 #include "scrub/trace.h"
 36 #include "scrub/repair.h"
 37 #include "scrub/tempfile.h"
 38 #include "scrub/tempexch.h"
 39 #include "scrub/xfile.h"
 40 #include "scrub/xfarray.h"
 41 #include "scrub/xfblob.h"
 42 #include "scrub/attr.h"
 43 #include "scrub/reap.h"
 44 #include "scrub/attr_repair.h"
 45 
 46 /*
 47  * Extended Attribute Repair
 48  * =========================
 49  *
 50  * We repair extended attributes by reading the attr leaf blocks looking for
 51  * attributes entries that look salvageable (name passes verifiers, value can
 52  * be retrieved, etc).  Each extended attribute worth salvaging is stashed in
 53  * memory, and the stashed entries are periodically replayed into a temporary
 54  * file to constrain memory use.  Batching the construction of the temporary
 55  * extended attribute structure in this fashion reduces lock cycling of the
 56  * file being repaired and the temporary file.
 57  *
 58  * When salvaging completes, the remaining stashed attributes are replayed to
 59  * the temporary file.  An atomic file contents exchange is used to commit the
 60  * new xattr blocks to the file being repaired.  This will disrupt attrmulti
 61  * cursors.
 62  */
 63 
 64 struct xrep_xattr_key {
 65         /* Cookie for retrieval of the xattr name. */
 66         xfblob_cookie           name_cookie;
 67 
 68         /* Cookie for retrieval of the xattr value. */
 69         xfblob_cookie           value_cookie;
 70 
 71         /* XFS_ATTR_* flags */
 72         int                     flags;
 73 
 74         /* Length of the value and name. */
 75         uint32_t                valuelen;
 76         uint16_t                namelen;
 77 };
 78 
 79 /*
 80  * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write
 81  * them to the temp file.
 82  */
 83 #define XREP_XATTR_MAX_STASH_BYTES      (PAGE_SIZE * 8)
 84 
 85 struct xrep_xattr {
 86         struct xfs_scrub        *sc;
 87 
 88         /* Information for exchanging attr fork mappings at the end. */
 89         struct xrep_tempexch    tx;
 90 
 91         /* xattr keys */
 92         struct xfarray          *xattr_records;
 93 
 94         /* xattr values */
 95         struct xfblob           *xattr_blobs;
 96 
 97         /* Number of attributes that we are salvaging. */
 98         unsigned long long      attrs_found;
 99 
100         /* Can we flush stashed attrs to the tempfile? */
101         bool                    can_flush;
102 
103         /* Did the live update fail, and hence the repair is now out of date? */
104         bool                    live_update_aborted;
105 
106         /* Lock protecting parent pointer updates */
107         struct mutex            lock;
108 
109         /* Fixed-size array of xrep_xattr_pptr structures. */
110         struct xfarray          *pptr_recs;
111 
112         /* Blobs containing parent pointer names. */
113         struct xfblob           *pptr_names;
114 
115         /* Hook to capture parent pointer updates. */
116         struct xfs_dir_hook     dhook;
117 
118         /* Scratch buffer for capturing parent pointers. */
119         struct xfs_da_args      pptr_args;
120 
121         /* Name buffer */
122         struct xfs_name         xname;
123         char                    namebuf[MAXNAMELEN];
124 };
125 
126 /* Create a parent pointer in the tempfile. */
127 #define XREP_XATTR_PPTR_ADD     (1)
128 
129 /* Remove a parent pointer from the tempfile. */
130 #define XREP_XATTR_PPTR_REMOVE  (2)
131 
132 /* A stashed parent pointer update. */
133 struct xrep_xattr_pptr {
134         /* Cookie for retrieval of the pptr name. */
135         xfblob_cookie           name_cookie;
136 
137         /* Parent pointer record. */
138         struct xfs_parent_rec   pptr_rec;
139 
140         /* Length of the pptr name. */
141         uint8_t                 namelen;
142 
143         /* XREP_XATTR_PPTR_{ADD,REMOVE} */
144         uint8_t                 action;
145 };
146 
147 /* Set up to recreate the extended attributes. */
148 int
149 xrep_setup_xattr(
150         struct xfs_scrub        *sc)
151 {
152         if (xfs_has_parent(sc->mp))
153                 xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
154 
155         return xrep_tempfile_create(sc, S_IFREG);
156 }
157 
158 /*
159  * Decide if we want to salvage this attribute.  We don't bother with
160  * incomplete or oversized keys or values.  The @value parameter can be null
161  * for remote attrs.
162  */
163 STATIC int
164 xrep_xattr_want_salvage(
165         struct xrep_xattr       *rx,
166         unsigned int            attr_flags,
167         const void              *name,
168         int                     namelen,
169         const void              *value,
170         int                     valuelen)
171 {
172         if (attr_flags & XFS_ATTR_INCOMPLETE)
173                 return false;
174         if (namelen > XATTR_NAME_MAX || namelen <= 0)
175                 return false;
176         if (!xfs_attr_namecheck(attr_flags, name, namelen))
177                 return false;
178         if (valuelen > XATTR_SIZE_MAX || valuelen < 0)
179                 return false;
180         if (attr_flags & XFS_ATTR_PARENT)
181                 return xfs_parent_valuecheck(rx->sc->mp, value, valuelen);
182 
183         return true;
184 }
185 
186 /* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */
187 STATIC int
188 xrep_xattr_salvage_key(
189         struct xrep_xattr       *rx,
190         int                     flags,
191         unsigned char           *name,
192         int                     namelen,
193         unsigned char           *value,
194         int                     valuelen)
195 {
196         struct xrep_xattr_key   key = {
197                 .valuelen       = valuelen,
198                 .flags          = flags & XFS_ATTR_NSP_ONDISK_MASK,
199         };
200         unsigned int            i = 0;
201         int                     error = 0;
202 
203         if (xchk_should_terminate(rx->sc, &error))
204                 return error;
205 
206         /*
207          * Truncate the name to the first character that would trip namecheck.
208          * If we no longer have a name after that, ignore this attribute.
209          */
210         if (flags & XFS_ATTR_PARENT) {
211                 key.namelen = namelen;
212 
213                 trace_xrep_xattr_salvage_pptr(rx->sc->ip, flags, name,
214                                 key.namelen, value, valuelen);
215         } else {
216                 while (i < namelen && name[i] != 0)
217                         i++;
218                 if (i == 0)
219                         return 0;
220                 key.namelen = i;
221 
222                 trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name,
223                                 key.namelen, valuelen);
224         }
225 
226         error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name,
227                         key.namelen);
228         if (error)
229                 return error;
230 
231         error = xfblob_store(rx->xattr_blobs, &key.value_cookie, value,
232                         key.valuelen);
233         if (error)
234                 return error;
235 
236         error = xfarray_append(rx->xattr_records, &key);
237         if (error)
238                 return error;
239 
240         rx->attrs_found++;
241         return 0;
242 }
243 
244 /*
245  * Record a shortform extended attribute key & value for later reinsertion
246  * into the inode.
247  */
248 STATIC int
249 xrep_xattr_salvage_sf_attr(
250         struct xrep_xattr               *rx,
251         struct xfs_attr_sf_hdr          *hdr,
252         struct xfs_attr_sf_entry        *sfe)
253 {
254         struct xfs_scrub                *sc = rx->sc;
255         struct xchk_xattr_buf           *ab = sc->buf;
256         unsigned char                   *name = sfe->nameval;
257         unsigned char                   *value = &sfe->nameval[sfe->namelen];
258 
259         if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)name - (char *)hdr,
260                         sfe->namelen))
261                 return 0;
262 
263         if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)value - (char *)hdr,
264                         sfe->valuelen))
265                 return 0;
266 
267         if (!xrep_xattr_want_salvage(rx, sfe->flags, sfe->nameval,
268                         sfe->namelen, value, sfe->valuelen))
269                 return 0;
270 
271         return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval,
272                         sfe->namelen, value, sfe->valuelen);
273 }
274 
275 /*
276  * Record a local format extended attribute key & value for later reinsertion
277  * into the inode.
278  */
279 STATIC int
280 xrep_xattr_salvage_local_attr(
281         struct xrep_xattr               *rx,
282         struct xfs_attr_leaf_entry      *ent,
283         unsigned int                    nameidx,
284         const char                      *buf_end,
285         struct xfs_attr_leaf_name_local *lentry)
286 {
287         struct xchk_xattr_buf           *ab = rx->sc->buf;
288         unsigned char                   *value;
289         unsigned int                    valuelen;
290         unsigned int                    namesize;
291 
292         /*
293          * Decode the leaf local entry format.  If something seems wrong, we
294          * junk the attribute.
295          */
296         value = &lentry->nameval[lentry->namelen];
297         valuelen = be16_to_cpu(lentry->valuelen);
298         namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen);
299         if ((char *)lentry + namesize > buf_end)
300                 return 0;
301         if (!xrep_xattr_want_salvage(rx, ent->flags, lentry->nameval,
302                         lentry->namelen, value, valuelen))
303                 return 0;
304         if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
305                 return 0;
306 
307         /* Try to save this attribute. */
308         return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval,
309                         lentry->namelen, value, valuelen);
310 }
311 
312 /*
313  * Record a remote format extended attribute key & value for later reinsertion
314  * into the inode.
315  */
316 STATIC int
317 xrep_xattr_salvage_remote_attr(
318         struct xrep_xattr               *rx,
319         struct xfs_attr_leaf_entry      *ent,
320         unsigned int                    nameidx,
321         const char                      *buf_end,
322         struct xfs_attr_leaf_name_remote *rentry,
323         unsigned int                    ent_idx,
324         struct xfs_buf                  *leaf_bp)
325 {
326         struct xchk_xattr_buf           *ab = rx->sc->buf;
327         struct xfs_da_args              args = {
328                 .trans                  = rx->sc->tp,
329                 .dp                     = rx->sc->ip,
330                 .index                  = ent_idx,
331                 .geo                    = rx->sc->mp->m_attr_geo,
332                 .owner                  = rx->sc->ip->i_ino,
333                 .attr_filter            = ent->flags & XFS_ATTR_NSP_ONDISK_MASK,
334                 .namelen                = rentry->namelen,
335                 .name                   = rentry->name,
336                 .value                  = ab->value,
337                 .valuelen               = be32_to_cpu(rentry->valuelen),
338         };
339         unsigned int                    namesize;
340         int                             error;
341 
342         /*
343          * Decode the leaf remote entry format.  If something seems wrong, we
344          * junk the attribute.  Note that we should never find a zero-length
345          * remote attribute value.
346          */
347         namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
348         if ((char *)rentry + namesize > buf_end)
349                 return 0;
350         if (args.valuelen == 0 ||
351             !xrep_xattr_want_salvage(rx, ent->flags, rentry->name,
352                         rentry->namelen, NULL, args.valuelen))
353                 return 0;
354         if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
355                 return 0;
356 
357         /*
358          * Enlarge the buffer (if needed) to hold the value that we're trying
359          * to salvage from the old extended attribute data.
360          */
361         error = xchk_setup_xattr_buf(rx->sc, args.valuelen);
362         if (error == -ENOMEM)
363                 error = -EDEADLOCK;
364         if (error)
365                 return error;
366 
367         /* Look up the remote value and stash it for reconstruction. */
368         error = xfs_attr3_leaf_getvalue(leaf_bp, &args);
369         if (error || args.rmtblkno == 0)
370                 goto err_free;
371 
372         error = xfs_attr_rmtval_get(&args);
373         if (error)
374                 goto err_free;
375 
376         /* Try to save this attribute. */
377         error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name,
378                         rentry->namelen, ab->value, args.valuelen);
379 err_free:
380         /* remote value was garbage, junk it */
381         if (error == -EFSBADCRC || error == -EFSCORRUPTED)
382                 error = 0;
383         return error;
384 }
385 
386 /* Extract every xattr key that we can from this attr fork block. */
387 STATIC int
388 xrep_xattr_recover_leaf(
389         struct xrep_xattr               *rx,
390         struct xfs_buf                  *bp)
391 {
392         struct xfs_attr3_icleaf_hdr     leafhdr;
393         struct xfs_scrub                *sc = rx->sc;
394         struct xfs_mount                *mp = sc->mp;
395         struct xfs_attr_leafblock       *leaf;
396         struct xfs_attr_leaf_name_local *lentry;
397         struct xfs_attr_leaf_name_remote *rentry;
398         struct xfs_attr_leaf_entry      *ent;
399         struct xfs_attr_leaf_entry      *entries;
400         struct xchk_xattr_buf           *ab = rx->sc->buf;
401         char                            *buf_end;
402         size_t                          off;
403         unsigned int                    nameidx;
404         unsigned int                    hdrsize;
405         int                             i;
406         int                             error = 0;
407 
408         bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize);
409 
410         /* Check the leaf header */
411         leaf = bp->b_addr;
412         xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
413         hdrsize = xfs_attr3_leaf_hdr_size(leaf);
414         xchk_xattr_set_map(sc, ab->usedmap, 0, hdrsize);
415         entries = xfs_attr3_leaf_entryp(leaf);
416 
417         buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
418         for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
419                 if (xchk_should_terminate(sc, &error))
420                         return error;
421 
422                 /* Skip key if it conflicts with something else? */
423                 off = (char *)ent - (char *)leaf;
424                 if (!xchk_xattr_set_map(sc, ab->usedmap, off,
425                                 sizeof(xfs_attr_leaf_entry_t)))
426                         continue;
427 
428                 /* Check the name information. */
429                 nameidx = be16_to_cpu(ent->nameidx);
430                 if (nameidx < leafhdr.firstused ||
431                     nameidx >= mp->m_attr_geo->blksize)
432                         continue;
433 
434                 if (ent->flags & XFS_ATTR_LOCAL) {
435                         lentry = xfs_attr3_leaf_name_local(leaf, i);
436                         error = xrep_xattr_salvage_local_attr(rx, ent, nameidx,
437                                         buf_end, lentry);
438                 } else {
439                         rentry = xfs_attr3_leaf_name_remote(leaf, i);
440                         error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx,
441                                         buf_end, rentry, i, bp);
442                 }
443                 if (error)
444                         return error;
445         }
446 
447         return 0;
448 }
449 
450 /* Try to recover shortform attrs. */
451 STATIC int
452 xrep_xattr_recover_sf(
453         struct xrep_xattr               *rx)
454 {
455         struct xfs_scrub                *sc = rx->sc;
456         struct xchk_xattr_buf           *ab = sc->buf;
457         struct xfs_attr_sf_hdr          *hdr;
458         struct xfs_attr_sf_entry        *sfe;
459         struct xfs_attr_sf_entry        *next;
460         struct xfs_ifork                *ifp;
461         unsigned char                   *end;
462         int                             i;
463         int                             error = 0;
464 
465         ifp = xfs_ifork_ptr(rx->sc->ip, XFS_ATTR_FORK);
466         hdr = ifp->if_data;
467 
468         bitmap_zero(ab->usedmap, ifp->if_bytes);
469         end = (unsigned char *)ifp->if_data + ifp->if_bytes;
470         xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(*hdr));
471 
472         sfe = xfs_attr_sf_firstentry(hdr);
473         if ((unsigned char *)sfe > end)
474                 return 0;
475 
476         for (i = 0; i < hdr->count; i++) {
477                 if (xchk_should_terminate(sc, &error))
478                         return error;
479 
480                 next = xfs_attr_sf_nextentry(sfe);
481                 if ((unsigned char *)next > end)
482                         break;
483 
484                 if (xchk_xattr_set_map(sc, ab->usedmap,
485                                 (char *)sfe - (char *)hdr,
486                                 sizeof(struct xfs_attr_sf_entry))) {
487                         /*
488                          * No conflicts with the sf entry; let's save this
489                          * attribute.
490                          */
491                         error = xrep_xattr_salvage_sf_attr(rx, hdr, sfe);
492                         if (error)
493                                 return error;
494                 }
495 
496                 sfe = next;
497         }
498 
499         return 0;
500 }
501 
502 /*
503  * Try to return a buffer of xattr data for a given physical extent.
504  *
505  * Because the buffer cache get function complains if it finds a buffer
506  * matching the block number but not matching the length, we must be careful to
507  * look for incore buffers (up to the maximum length of a remote value) that
508  * could be hiding anywhere in the physical range.  If we find an incore
509  * buffer, we can pass that to the caller.  Optionally, read a single block and
510  * pass that back.
511  *
512  * Note the subtlety that remote attr value blocks for which there is no incore
513  * buffer will be passed to the callback one block at a time.  These buffers
514  * will not have any ops attached and must be staled to prevent aliasing with
515  * multiblock buffers once we drop the ILOCK.
516  */
517 STATIC int
518 xrep_xattr_find_buf(
519         struct xfs_mount        *mp,
520         xfs_fsblock_t           fsbno,
521         xfs_extlen_t            max_len,
522         bool                    can_read,
523         struct xfs_buf          **bpp)
524 {
525         struct xrep_bufscan     scan = {
526                 .daddr          = XFS_FSB_TO_DADDR(mp, fsbno),
527                 .max_sectors    = xrep_bufscan_max_sectors(mp, max_len),
528                 .daddr_step     = XFS_FSB_TO_BB(mp, 1),
529         };
530         struct xfs_buf          *bp;
531 
532         while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
533                 *bpp = bp;
534                 return 0;
535         }
536 
537         if (!can_read) {
538                 *bpp = NULL;
539                 return 0;
540         }
541 
542         return xfs_buf_read(mp->m_ddev_targp, scan.daddr, XFS_FSB_TO_BB(mp, 1),
543                         XBF_TRYLOCK, bpp, NULL);
544 }
545 
546 /*
547  * Deal with a buffer that we found during our walk of the attr fork.
548  *
549  * Attribute leaf and node blocks are simple -- they're a single block, so we
550  * can walk them one at a time and we never have to worry about discontiguous
551  * multiblock buffers like we do for directories.
552  *
553  * Unfortunately, remote attr blocks add a lot of complexity here.  Each disk
554  * block is totally self contained, in the sense that the v5 header provides no
555  * indication that there could be more data in the next block.  The incore
556  * buffers can span multiple blocks, though they never cross extent records.
557  * However, they don't necessarily start or end on an extent record boundary.
558  * Therefore, we need a special buffer find function to walk the buffer cache
559  * for us.
560  *
561  * The caller must hold the ILOCK on the file being repaired.  We use
562  * XBF_TRYLOCK here to skip any locked buffer on the assumption that we don't
563  * own the block and don't want to hang the system on a potentially garbage
564  * buffer.
565  */
566 STATIC int
567 xrep_xattr_recover_block(
568         struct xrep_xattr       *rx,
569         xfs_dablk_t             dabno,
570         xfs_fsblock_t           fsbno,
571         xfs_extlen_t            max_len,
572         xfs_extlen_t            *actual_len)
573 {
574         struct xfs_da_blkinfo   *info;
575         struct xfs_buf          *bp;
576         int                     error;
577 
578         error = xrep_xattr_find_buf(rx->sc->mp, fsbno, max_len, true, &bp);
579         if (error)
580                 return error;
581         info = bp->b_addr;
582         *actual_len = XFS_BB_TO_FSB(rx->sc->mp, bp->b_length);
583 
584         trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno,
585                         be16_to_cpu(info->magic));
586 
587         /*
588          * If the buffer has the right magic number for an attr leaf block and
589          * passes a structure check (we don't care about checksums), salvage
590          * as much as we can from the block. */
591         if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) &&
592             xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops) &&
593             xfs_attr3_leaf_header_check(bp, rx->sc->ip->i_ino) == NULL)
594                 error = xrep_xattr_recover_leaf(rx, bp);
595 
596         /*
597          * If the buffer didn't already have buffer ops set, it was read in by
598          * the _find_buf function and could very well be /part/ of a multiblock
599          * remote block.  Mark it stale so that it doesn't hang around in
600          * memory to cause problems.
601          */
602         if (bp->b_ops == NULL)
603                 xfs_buf_stale(bp);
604 
605         xfs_buf_relse(bp);
606         return error;
607 }
608 
609 /* Insert one xattr key/value. */
610 STATIC int
611 xrep_xattr_insert_rec(
612         struct xrep_xattr               *rx,
613         const struct xrep_xattr_key     *key)
614 {
615         struct xfs_da_args              args = {
616                 .dp                     = rx->sc->tempip,
617                 .attr_filter            = key->flags,
618                 .namelen                = key->namelen,
619                 .valuelen               = key->valuelen,
620                 .owner                  = rx->sc->ip->i_ino,
621                 .geo                    = rx->sc->mp->m_attr_geo,
622                 .whichfork              = XFS_ATTR_FORK,
623                 .op_flags               = XFS_DA_OP_OKNOENT,
624         };
625         struct xchk_xattr_buf           *ab = rx->sc->buf;
626         int                             error;
627 
628         /*
629          * Grab pointers to the scrub buffer so that we can use them to insert
630          * attrs into the temp file.
631          */
632         args.name = ab->name;
633         args.value = ab->value;
634 
635         /*
636          * The attribute name is stored near the end of the in-core buffer,
637          * though we reserve one more byte to ensure null termination.
638          */
639         ab->name[XATTR_NAME_MAX] = 0;
640 
641         error = xfblob_load(rx->xattr_blobs, key->name_cookie, ab->name,
642                         key->namelen);
643         if (error)
644                 return error;
645 
646         error = xfblob_free(rx->xattr_blobs, key->name_cookie);
647         if (error)
648                 return error;
649 
650         error = xfblob_load(rx->xattr_blobs, key->value_cookie, args.value,
651                         key->valuelen);
652         if (error)
653                 return error;
654 
655         error = xfblob_free(rx->xattr_blobs, key->value_cookie);
656         if (error)
657                 return error;
658 
659         ab->name[key->namelen] = 0;
660 
661         if (key->flags & XFS_ATTR_PARENT) {
662                 trace_xrep_xattr_insert_pptr(rx->sc->tempip, key->flags,
663                                 ab->name, key->namelen, ab->value,
664                                 key->valuelen);
665                 args.op_flags |= XFS_DA_OP_LOGGED;
666         } else {
667                 trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags,
668                                 ab->name, key->namelen, key->valuelen);
669         }
670 
671         /*
672          * xfs_attr_set creates and commits its own transaction.  If the attr
673          * already exists, we'll just drop it during the rebuild.
674          */
675         xfs_attr_sethash(&args);
676         error = xfs_attr_set(&args, XFS_ATTRUPDATE_CREATE, false);
677         if (error == -EEXIST)
678                 error = 0;
679 
680         return error;
681 }
682 
683 /*
684  * Periodically flush salvaged attributes to the temporary file.  This is done
685  * to reduce the memory requirements of the xattr rebuild because files can
686  * contain millions of attributes.
687  */
688 STATIC int
689 xrep_xattr_flush_stashed(
690         struct xrep_xattr       *rx)
691 {
692         xfarray_idx_t           array_cur;
693         int                     error;
694 
695         /*
696          * Entering this function, the scrub context has a reference to the
697          * inode being repaired, the temporary file, and a scrub transaction
698          * that we use during xattr salvaging to avoid livelocking if there
699          * are cycles in the xattr structures.  We hold ILOCK_EXCL on both
700          * the inode being repaired, though it is not ijoined to the scrub
701          * transaction.
702          *
703          * To constrain kernel memory use, we occasionally flush salvaged
704          * xattrs from the xfarray and xfblob structures into the temporary
705          * file in preparation for exchanging the xattr structures at the end.
706          * Updating the temporary file requires a transaction, so we commit the
707          * scrub transaction and drop the two ILOCKs so that xfs_attr_set can
708          * allocate whatever transaction it wants.
709          *
710          * We still hold IOLOCK_EXCL on the inode being repaired, which
711          * prevents anyone from modifying the damaged xattr data while we
712          * repair it.
713          */
714         error = xrep_trans_commit(rx->sc);
715         if (error)
716                 return error;
717         xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
718 
719         /*
720          * Take the IOLOCK of the temporary file while we modify xattrs.  This
721          * isn't strictly required because the temporary file is never revealed
722          * to userspace, but we follow the same locking rules.  We still hold
723          * sc->ip's IOLOCK.
724          */
725         error = xrep_tempfile_iolock_polled(rx->sc);
726         if (error)
727                 return error;
728 
729         /* Add all the salvaged attrs to the temporary file. */
730         foreach_xfarray_idx(rx->xattr_records, array_cur) {
731                 struct xrep_xattr_key   key;
732 
733                 error = xfarray_load(rx->xattr_records, array_cur, &key);
734                 if (error)
735                         return error;
736 
737                 error = xrep_xattr_insert_rec(rx, &key);
738                 if (error)
739                         return error;
740         }
741 
742         /* Empty out both arrays now that we've added the entries. */
743         xfarray_truncate(rx->xattr_records);
744         xfblob_truncate(rx->xattr_blobs);
745 
746         xrep_tempfile_iounlock(rx->sc);
747 
748         /* Recreate the salvage transaction and relock the inode. */
749         error = xchk_trans_alloc(rx->sc, 0);
750         if (error)
751                 return error;
752         xchk_ilock(rx->sc, XFS_ILOCK_EXCL);
753         return 0;
754 }
755 
756 /* Decide if we've stashed too much xattr data in memory. */
757 static inline bool
758 xrep_xattr_want_flush_stashed(
759         struct xrep_xattr       *rx)
760 {
761         unsigned long long      bytes;
762 
763         if (!rx->can_flush)
764                 return false;
765 
766         bytes = xfarray_bytes(rx->xattr_records) +
767                 xfblob_bytes(rx->xattr_blobs);
768         return bytes > XREP_XATTR_MAX_STASH_BYTES;
769 }
770 
771 /*
772  * Did we observe rename changing parent pointer xattrs while we were flushing
773  * salvaged attrs?
774  */
775 static inline bool
776 xrep_xattr_saw_pptr_conflict(
777         struct xrep_xattr       *rx)
778 {
779         bool                    ret;
780 
781         ASSERT(rx->can_flush);
782 
783         if (!xfs_has_parent(rx->sc->mp))
784                 return false;
785 
786         xfs_assert_ilocked(rx->sc->ip, XFS_ILOCK_EXCL);
787 
788         mutex_lock(&rx->lock);
789         ret = xfarray_bytes(rx->pptr_recs) > 0;
790         mutex_unlock(&rx->lock);
791 
792         return ret;
793 }
794 
795 /*
796  * Reset the entire repair state back to initial conditions, now that we've
797  * detected a parent pointer update to the attr structure while we were
798  * flushing salvaged attrs.  See the locking notes in dir_repair.c for more
799  * information on why this is all necessary.
800  */
801 STATIC int
802 xrep_xattr_full_reset(
803         struct xrep_xattr       *rx)
804 {
805         struct xfs_scrub        *sc = rx->sc;
806         struct xfs_attr_sf_hdr  *hdr;
807         struct xfs_ifork        *ifp = &sc->tempip->i_af;
808         int                     error;
809 
810         trace_xrep_xattr_full_reset(sc->ip, sc->tempip);
811 
812         /* The temporary file's data fork had better not be in btree format. */
813         if (sc->tempip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
814                 ASSERT(0);
815                 return -EIO;
816         }
817 
818         /*
819          * We begin in transaction context with sc->ip ILOCKed but not joined
820          * to the transaction.  To reset to the initial state, we must hold
821          * sc->ip's ILOCK to prevent rename from updating parent pointer
822          * information and the tempfile's ILOCK to clear its contents.
823          */
824         xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
825         xrep_tempfile_ilock_both(sc);
826         xfs_trans_ijoin(sc->tp, sc->ip, 0);
827         xfs_trans_ijoin(sc->tp, sc->tempip, 0);
828 
829         /*
830          * Free all the blocks of the attr fork of the temp file, and reset
831          * it back to local format.
832          */
833         if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
834                 error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
835                 if (error)
836                         return error;
837 
838                 ASSERT(ifp->if_bytes == 0);
839                 ifp->if_format = XFS_DINODE_FMT_LOCAL;
840                 xfs_idata_realloc(sc->tempip, sizeof(*hdr), XFS_ATTR_FORK);
841         }
842 
843         /* Reinitialize the attr fork to an empty shortform structure. */
844         hdr = ifp->if_data;
845         memset(hdr, 0, sizeof(*hdr));
846         hdr->totsize = cpu_to_be16(sizeof(*hdr));
847         xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE | XFS_ILOG_ADATA);
848 
849         /*
850          * Roll this transaction to commit our reset ondisk.  The tempfile
851          * should no longer be joined to the transaction, so we drop its ILOCK.
852          * This should leave us in transaction context with sc->ip ILOCKed but
853          * not joined to the transaction.
854          */
855         error = xrep_roll_trans(sc);
856         if (error)
857                 return error;
858         xrep_tempfile_iunlock(sc);
859 
860         /*
861          * Erase any accumulated parent pointer updates now that we've erased
862          * the tempfile's attr fork.  We're resetting the entire repair state
863          * back to where we were initially, except now we won't flush salvaged
864          * xattrs until the very end.
865          */
866         mutex_lock(&rx->lock);
867         xfarray_truncate(rx->pptr_recs);
868         xfblob_truncate(rx->pptr_names);
869         mutex_unlock(&rx->lock);
870 
871         rx->can_flush = false;
872         rx->attrs_found = 0;
873 
874         ASSERT(xfarray_bytes(rx->xattr_records) == 0);
875         ASSERT(xfblob_bytes(rx->xattr_blobs) == 0);
876         return 0;
877 }
878 
879 /* Extract as many attribute keys and values as we can. */
880 STATIC int
881 xrep_xattr_recover(
882         struct xrep_xattr       *rx)
883 {
884         struct xfs_bmbt_irec    got;
885         struct xfs_scrub        *sc = rx->sc;
886         struct xfs_da_geometry  *geo = sc->mp->m_attr_geo;
887         xfs_fileoff_t           offset;
888         xfs_extlen_t            len;
889         xfs_dablk_t             dabno;
890         int                     nmap;
891         int                     error;
892 
893 restart:
894         /*
895          * Iterate each xattr leaf block in the attr fork to scan them for any
896          * attributes that we might salvage.
897          */
898         for (offset = 0;
899              offset < XFS_MAX_FILEOFF;
900              offset = got.br_startoff + got.br_blockcount) {
901                 nmap = 1;
902                 error = xfs_bmapi_read(sc->ip, offset, XFS_MAX_FILEOFF - offset,
903                                 &got, &nmap, XFS_BMAPI_ATTRFORK);
904                 if (error)
905                         return error;
906                 if (nmap != 1)
907                         return -EFSCORRUPTED;
908                 if (!xfs_bmap_is_written_extent(&got))
909                         continue;
910 
911                 for (dabno = round_up(got.br_startoff, geo->fsbcount);
912                      dabno < got.br_startoff + got.br_blockcount;
913                      dabno += len) {
914                         xfs_fileoff_t   curr_offset = dabno - got.br_startoff;
915                         xfs_extlen_t    maxlen;
916 
917                         if (xchk_should_terminate(rx->sc, &error))
918                                 return error;
919 
920                         maxlen = min_t(xfs_filblks_t, INT_MAX,
921                                         got.br_blockcount - curr_offset);
922                         error = xrep_xattr_recover_block(rx, dabno,
923                                         curr_offset + got.br_startblock,
924                                         maxlen, &len);
925                         if (error)
926                                 return error;
927 
928                         if (xrep_xattr_want_flush_stashed(rx)) {
929                                 error = xrep_xattr_flush_stashed(rx);
930                                 if (error)
931                                         return error;
932 
933                                 if (xrep_xattr_saw_pptr_conflict(rx)) {
934                                         error = xrep_xattr_full_reset(rx);
935                                         if (error)
936                                                 return error;
937 
938                                         goto restart;
939                                 }
940                         }
941                 }
942         }
943 
944         return 0;
945 }
946 
947 /*
948  * Reset the extended attribute fork to a state where we can start re-adding
949  * the salvaged attributes.
950  */
951 STATIC int
952 xrep_xattr_fork_remove(
953         struct xfs_scrub        *sc,
954         struct xfs_inode        *ip)
955 {
956         struct xfs_attr_sf_hdr  *hdr;
957         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK);
958 
959         /*
960          * If the data fork is in btree format, we can't change di_forkoff
961          * because we could run afoul of the rule that the data fork isn't
962          * supposed to be in btree format if there's enough space in the fork
963          * that it could have used extents format.  Instead, reinitialize the
964          * attr fork to have a shortform structure with zero attributes.
965          */
966         if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
967                 ifp->if_format = XFS_DINODE_FMT_LOCAL;
968                 hdr = xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes,
969                                 XFS_ATTR_FORK);
970                 hdr->count = 0;
971                 hdr->totsize = cpu_to_be16(sizeof(*hdr));
972                 xfs_trans_log_inode(sc->tp, ip,
973                                 XFS_ILOG_CORE | XFS_ILOG_ADATA);
974                 return 0;
975         }
976 
977         /* If we still have attr fork extents, something's wrong. */
978         if (ifp->if_nextents != 0) {
979                 struct xfs_iext_cursor  icur;
980                 struct xfs_bmbt_irec    irec;
981                 unsigned int            i = 0;
982 
983                 xfs_emerg(sc->mp,
984         "inode 0x%llx attr fork still has %llu attr extents, format %d?!",
985                                 ip->i_ino, ifp->if_nextents, ifp->if_format);
986                 for_each_xfs_iext(ifp, &icur, &irec) {
987                         xfs_err(sc->mp,
988         "[%u]: startoff %llu startblock %llu blockcount %llu state %u",
989                                         i++, irec.br_startoff,
990                                         irec.br_startblock, irec.br_blockcount,
991                                         irec.br_state);
992                 }
993                 ASSERT(0);
994                 return -EFSCORRUPTED;
995         }
996 
997         xfs_attr_fork_remove(ip, sc->tp);
998         return 0;
999 }
1000 
1001 /*
1002  * Free all the attribute fork blocks of the file being repaired and delete the
1003  * fork.  The caller must ILOCK the scrub file and join it to the transaction.
1004  * This function returns with the inode joined to a clean transaction.
1005  */
1006 int
1007 xrep_xattr_reset_fork(
1008         struct xfs_scrub        *sc)
1009 {
1010         int                     error;
1011 
1012         trace_xrep_xattr_reset_fork(sc->ip, sc->ip);
1013 
1014         /* Unmap all the attr blocks. */
1015         if (xfs_ifork_has_extents(&sc->ip->i_af)) {
1016                 error = xrep_reap_ifork(sc, sc->ip, XFS_ATTR_FORK);
1017                 if (error)
1018                         return error;
1019         }
1020 
1021         error = xrep_xattr_fork_remove(sc, sc->ip);
1022         if (error)
1023                 return error;
1024 
1025         return xfs_trans_roll_inode(&sc->tp, sc->ip);
1026 }
1027 
1028 /*
1029  * Free all the attribute fork blocks of the temporary file and delete the attr
1030  * fork.  The caller must ILOCK the tempfile and join it to the transaction.
1031  * This function returns with the inode joined to a clean scrub transaction.
1032  */
1033 int
1034 xrep_xattr_reset_tempfile_fork(
1035         struct xfs_scrub        *sc)
1036 {
1037         int                     error;
1038 
1039         trace_xrep_xattr_reset_fork(sc->ip, sc->tempip);
1040 
1041         /*
1042          * Wipe out the attr fork of the temp file so that regular inode
1043          * inactivation won't trip over the corrupt attr fork.
1044          */
1045         if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
1046                 error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
1047                 if (error)
1048                         return error;
1049         }
1050 
1051         return xrep_xattr_fork_remove(sc, sc->tempip);
1052 }
1053 
1054 /*
1055  * Find all the extended attributes for this inode by scraping them out of the
1056  * attribute key blocks by hand, and flushing them into the temp file.
1057  * When we're done, free the staging memory before exchanging the xattr
1058  * structures to reduce memory usage.
1059  */
1060 STATIC int
1061 xrep_xattr_salvage_attributes(
1062         struct xrep_xattr       *rx)
1063 {
1064         struct xfs_inode        *ip = rx->sc->ip;
1065         int                     error;
1066 
1067         /* Short format xattrs are easy! */
1068         if (rx->sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) {
1069                 error = xrep_xattr_recover_sf(rx);
1070                 if (error)
1071                         return error;
1072 
1073                 return xrep_xattr_flush_stashed(rx);
1074         }
1075 
1076         /*
1077          * For non-inline xattr structures, the salvage function scans the
1078          * buffer cache looking for potential attr leaf blocks.  The scan
1079          * requires the ability to lock any buffer found and runs independently
1080          * of any transaction <-> buffer item <-> buffer linkage.  Therefore,
1081          * roll the transaction to ensure there are no buffers joined.  We hold
1082          * the ILOCK independently of the transaction.
1083          */
1084         error = xfs_trans_roll(&rx->sc->tp);
1085         if (error)
1086                 return error;
1087 
1088         error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK);
1089         if (error)
1090                 return error;
1091 
1092         error = xrep_xattr_recover(rx);
1093         if (error)
1094                 return error;
1095 
1096         return xrep_xattr_flush_stashed(rx);
1097 }
1098 
1099 /*
1100  * Add this stashed incore parent pointer to the temporary file.  The caller
1101  * must hold the tempdir's IOLOCK, must not hold any ILOCKs, and must not be in
1102  * transaction context.
1103  */
1104 STATIC int
1105 xrep_xattr_replay_pptr_update(
1106         struct xrep_xattr               *rx,
1107         const struct xfs_name           *xname,
1108         struct xrep_xattr_pptr          *pptr)
1109 {
1110         struct xfs_scrub                *sc = rx->sc;
1111         int                             error;
1112 
1113         switch (pptr->action) {
1114         case XREP_XATTR_PPTR_ADD:
1115                 /* Create parent pointer. */
1116                 trace_xrep_xattr_replay_parentadd(sc->tempip, xname,
1117                                 &pptr->pptr_rec);
1118 
1119                 error = xfs_parent_set(sc->tempip, sc->ip->i_ino, xname,
1120                                 &pptr->pptr_rec, &rx->pptr_args);
1121                 ASSERT(error != -EEXIST);
1122                 return error;
1123         case XREP_XATTR_PPTR_REMOVE:
1124                 /* Remove parent pointer. */
1125                 trace_xrep_xattr_replay_parentremove(sc->tempip, xname,
1126                                 &pptr->pptr_rec);
1127 
1128                 error = xfs_parent_unset(sc->tempip, sc->ip->i_ino, xname,
1129                                 &pptr->pptr_rec, &rx->pptr_args);
1130                 ASSERT(error != -ENOATTR);
1131                 return error;
1132         }
1133 
1134         ASSERT(0);
1135         return -EIO;
1136 }
1137 
1138 /*
1139  * Flush stashed parent pointer updates that have been recorded by the scanner.
1140  * This is done to reduce the memory requirements of the xattr rebuild, since
1141  * files can have a lot of hardlinks and the fs can be busy.
1142  *
1143  * Caller must not hold transactions or ILOCKs.  Caller must hold the tempfile
1144  * IOLOCK.
1145  */
1146 STATIC int
1147 xrep_xattr_replay_pptr_updates(
1148         struct xrep_xattr       *rx)
1149 {
1150         xfarray_idx_t           array_cur;
1151         int                     error;
1152 
1153         mutex_lock(&rx->lock);
1154         foreach_xfarray_idx(rx->pptr_recs, array_cur) {
1155                 struct xrep_xattr_pptr  pptr;
1156 
1157                 error = xfarray_load(rx->pptr_recs, array_cur, &pptr);
1158                 if (error)
1159                         goto out_unlock;
1160 
1161                 error = xfblob_loadname(rx->pptr_names, pptr.name_cookie,
1162                                 &rx->xname, pptr.namelen);
1163                 if (error)
1164                         goto out_unlock;
1165                 mutex_unlock(&rx->lock);
1166 
1167                 error = xrep_xattr_replay_pptr_update(rx, &rx->xname, &pptr);
1168                 if (error)
1169                         return error;
1170 
1171                 mutex_lock(&rx->lock);
1172         }
1173 
1174         /* Empty out both arrays now that we've added the entries. */
1175         xfarray_truncate(rx->pptr_recs);
1176         xfblob_truncate(rx->pptr_names);
1177         mutex_unlock(&rx->lock);
1178         return 0;
1179 out_unlock:
1180         mutex_unlock(&rx->lock);
1181         return error;
1182 }
1183 
1184 /*
1185  * Remember that we want to create a parent pointer in the tempfile.  These
1186  * stashed actions will be replayed later.
1187  */
1188 STATIC int
1189 xrep_xattr_stash_parentadd(
1190         struct xrep_xattr       *rx,
1191         const struct xfs_name   *name,
1192         const struct xfs_inode  *dp)
1193 {
1194         struct xrep_xattr_pptr  pptr = {
1195                 .action         = XREP_XATTR_PPTR_ADD,
1196                 .namelen        = name->len,
1197         };
1198         int                     error;
1199 
1200         trace_xrep_xattr_stash_parentadd(rx->sc->tempip, dp, name);
1201 
1202         xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
1203         error = xfblob_storename(rx->pptr_names, &pptr.name_cookie, name);
1204         if (error)
1205                 return error;
1206 
1207         return xfarray_append(rx->pptr_recs, &pptr);
1208 }
1209 
1210 /*
1211  * Remember that we want to remove a parent pointer from the tempfile.  These
1212  * stashed actions will be replayed later.
1213  */
1214 STATIC int
1215 xrep_xattr_stash_parentremove(
1216         struct xrep_xattr       *rx,
1217         const struct xfs_name   *name,
1218         const struct xfs_inode  *dp)
1219 {
1220         struct xrep_xattr_pptr  pptr = {
1221                 .action         = XREP_XATTR_PPTR_REMOVE,
1222                 .namelen        = name->len,
1223         };
1224         int                     error;
1225 
1226         trace_xrep_xattr_stash_parentremove(rx->sc->tempip, dp, name);
1227 
1228         xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
1229         error = xfblob_storename(rx->pptr_names, &pptr.name_cookie, name);
1230         if (error)
1231                 return error;
1232 
1233         return xfarray_append(rx->pptr_recs, &pptr);
1234 }
1235 
1236 /*
1237  * Capture dirent updates being made by other threads.  We will have to replay
1238  * the parent pointer updates before exchanging attr forks.
1239  */
1240 STATIC int
1241 xrep_xattr_live_dirent_update(
1242         struct notifier_block           *nb,
1243         unsigned long                   action,
1244         void                            *data)
1245 {
1246         struct xfs_dir_update_params    *p = data;
1247         struct xrep_xattr               *rx;
1248         struct xfs_scrub                *sc;
1249         int                             error;
1250 
1251         rx = container_of(nb, struct xrep_xattr, dhook.dirent_hook.nb);
1252         sc = rx->sc;
1253 
1254         /*
1255          * This thread updated a dirent that points to the file that we're
1256          * repairing, so stash the update for replay against the temporary
1257          * file.
1258          */
1259         if (p->ip->i_ino != sc->ip->i_ino)
1260                 return NOTIFY_DONE;
1261 
1262         mutex_lock(&rx->lock);
1263         if (p->delta > 0)
1264                 error = xrep_xattr_stash_parentadd(rx, p->name, p->dp);
1265         else
1266                 error = xrep_xattr_stash_parentremove(rx, p->name, p->dp);
1267         if (error)
1268                 rx->live_update_aborted = true;
1269         mutex_unlock(&rx->lock);
1270         return NOTIFY_DONE;
1271 }
1272 
1273 /*
1274  * Prepare both inodes' attribute forks for an exchange.  Promote the tempfile
1275  * from short format to leaf format, and if the file being repaired has a short
1276  * format attr fork, turn it into an empty extent list.
1277  */
1278 STATIC int
1279 xrep_xattr_swap_prep(
1280         struct xfs_scrub        *sc,
1281         bool                    temp_local,
1282         bool                    ip_local)
1283 {
1284         int                     error;
1285 
1286         /*
1287          * If the tempfile's attributes are in shortform format, convert that
1288          * to a single leaf extent so that we can use the atomic mapping
1289          * exchange.
1290          */
1291         if (temp_local) {
1292                 struct xfs_da_args      args = {
1293                         .dp             = sc->tempip,
1294                         .geo            = sc->mp->m_attr_geo,
1295                         .whichfork      = XFS_ATTR_FORK,
1296                         .trans          = sc->tp,
1297                         .total          = 1,
1298                         .owner          = sc->ip->i_ino,
1299                 };
1300 
1301                 error = xfs_attr_shortform_to_leaf(&args);
1302                 if (error)
1303                         return error;
1304 
1305                 /*
1306                  * Roll the deferred log items to get us back to a clean
1307                  * transaction.
1308                  */
1309                 error = xfs_defer_finish(&sc->tp);
1310                 if (error)
1311                         return error;
1312         }
1313 
1314         /*
1315          * If the file being repaired had a shortform attribute fork, convert
1316          * that to an empty extent list in preparation for the atomic mapping
1317          * exchange.
1318          */
1319         if (ip_local) {
1320                 struct xfs_ifork        *ifp;
1321 
1322                 ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1323 
1324                 xfs_idestroy_fork(ifp);
1325                 ifp->if_format = XFS_DINODE_FMT_EXTENTS;
1326                 ifp->if_nextents = 0;
1327                 ifp->if_bytes = 0;
1328                 ifp->if_data = NULL;
1329                 ifp->if_height = 0;
1330 
1331                 xfs_trans_log_inode(sc->tp, sc->ip,
1332                                 XFS_ILOG_CORE | XFS_ILOG_ADATA);
1333         }
1334 
1335         return 0;
1336 }
1337 
1338 /* Exchange the temporary file's attribute fork with the one being repaired. */
1339 int
1340 xrep_xattr_swap(
1341         struct xfs_scrub        *sc,
1342         struct xrep_tempexch    *tx)
1343 {
1344         bool                    ip_local, temp_local;
1345         int                     error = 0;
1346 
1347         ip_local = sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
1348         temp_local = sc->tempip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
1349 
1350         /*
1351          * If the both files have a local format attr fork and the rebuilt
1352          * xattr data would fit in the repaired file's attr fork, just copy
1353          * the contents from the tempfile and declare ourselves done.
1354          */
1355         if (ip_local && temp_local) {
1356                 int     forkoff;
1357                 int     newsize;
1358 
1359                 newsize = xfs_attr_sf_totsize(sc->tempip);
1360                 forkoff = xfs_attr_shortform_bytesfit(sc->ip, newsize);
1361                 if (forkoff > 0) {
1362                         sc->ip->i_forkoff = forkoff;
1363                         xrep_tempfile_copyout_local(sc, XFS_ATTR_FORK);
1364                         return 0;
1365                 }
1366         }
1367 
1368         /* Otherwise, make sure both attr forks are in block-mapping mode. */
1369         error = xrep_xattr_swap_prep(sc, temp_local, ip_local);
1370         if (error)
1371                 return error;
1372 
1373         return xrep_tempexch_contents(sc, tx);
1374 }
1375 
1376 /*
1377  * Finish replaying stashed parent pointer updates, allocate a transaction for
1378  * exchanging extent mappings, and take the ILOCKs of both files before we
1379  * commit the new extended attribute structure.
1380  */
1381 STATIC int
1382 xrep_xattr_finalize_tempfile(
1383         struct xrep_xattr       *rx)
1384 {
1385         struct xfs_scrub        *sc = rx->sc;
1386         int                     error;
1387 
1388         if (!xfs_has_parent(sc->mp))
1389                 return xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
1390 
1391         /*
1392          * Repair relies on the ILOCK to quiesce all possible xattr updates.
1393          * Replay all queued parent pointer updates into the tempfile before
1394          * exchanging the contents, even if that means dropping the ILOCKs and
1395          * the transaction.
1396          */
1397         do {
1398                 error = xrep_xattr_replay_pptr_updates(rx);
1399                 if (error)
1400                         return error;
1401 
1402                 error = xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
1403                 if (error)
1404                         return error;
1405 
1406                 if (xfarray_length(rx->pptr_recs) == 0)
1407                         break;
1408 
1409                 xchk_trans_cancel(sc);
1410                 xrep_tempfile_iunlock_both(sc);
1411         } while (!xchk_should_terminate(sc, &error));
1412         return error;
1413 }
1414 
1415 /*
1416  * Exchange the new extended attribute data (which we created in the tempfile)
1417  * with the file being repaired.
1418  */
1419 STATIC int
1420 xrep_xattr_rebuild_tree(
1421         struct xrep_xattr       *rx)
1422 {
1423         struct xfs_scrub        *sc = rx->sc;
1424         int                     error;
1425 
1426         /*
1427          * If we didn't find any attributes to salvage, repair the file by
1428          * zapping its attr fork.
1429          */
1430         if (rx->attrs_found == 0) {
1431                 xfs_trans_ijoin(sc->tp, sc->ip, 0);
1432                 error = xrep_xattr_reset_fork(sc);
1433                 if (error)
1434                         return error;
1435 
1436                 goto forget_acls;
1437         }
1438 
1439         trace_xrep_xattr_rebuild_tree(sc->ip, sc->tempip);
1440 
1441         /*
1442          * Commit the repair transaction and drop the ILOCKs so that we can use
1443          * the atomic file content exchange helper functions to compute the
1444          * correct resource reservations.
1445          *
1446          * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent xattr
1447          * modifications, but there's nothing to prevent userspace from reading
1448          * the attributes until we're ready for the exchange operation.  Reads
1449          * will return -EIO without shutting down the fs, so we're ok with
1450          * that.
1451          */
1452         error = xrep_trans_commit(sc);
1453         if (error)
1454                 return error;
1455 
1456         xchk_iunlock(sc, XFS_ILOCK_EXCL);
1457 
1458         /*
1459          * Take the IOLOCK on the temporary file so that we can run xattr
1460          * operations with the same locks held as we would for a normal file.
1461          * We still hold sc->ip's IOLOCK.
1462          */
1463         error = xrep_tempfile_iolock_polled(rx->sc);
1464         if (error)
1465                 return error;
1466 
1467         /*
1468          * Allocate transaction, lock inodes, and make sure that we've replayed
1469          * all the stashed parent pointer updates to the temp file.  After this
1470          * point, we're ready to exchange attr fork mappings.
1471          */
1472         error = xrep_xattr_finalize_tempfile(rx);
1473         if (error)
1474                 return error;
1475 
1476         /*
1477          * Exchange the blocks mapped by the tempfile's attr fork with the file
1478          * being repaired.  The old attr blocks will then be attached to the
1479          * tempfile, so reap its attr fork.
1480          */
1481         error = xrep_xattr_swap(sc, &rx->tx);
1482         if (error)
1483                 return error;
1484 
1485         error = xrep_xattr_reset_tempfile_fork(sc);
1486         if (error)
1487                 return error;
1488 
1489         /*
1490          * Roll to get a transaction without any inodes joined to it.  Then we
1491          * can drop the tempfile's ILOCK and IOLOCK before doing more work on
1492          * the scrub target file.
1493          */
1494         error = xfs_trans_roll(&sc->tp);
1495         if (error)
1496                 return error;
1497 
1498         xrep_tempfile_iunlock(sc);
1499         xrep_tempfile_iounlock(sc);
1500 
1501 forget_acls:
1502         /* Invalidate cached ACLs now that we've reloaded all the xattrs. */
1503         xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_FILE);
1504         xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_DEFAULT);
1505         return 0;
1506 }
1507 
1508 /* Tear down all the incore scan stuff we created. */
1509 STATIC void
1510 xrep_xattr_teardown(
1511         struct xrep_xattr       *rx)
1512 {
1513         if (xfs_has_parent(rx->sc->mp))
1514                 xfs_dir_hook_del(rx->sc->mp, &rx->dhook);
1515         if (rx->pptr_names)
1516                 xfblob_destroy(rx->pptr_names);
1517         if (rx->pptr_recs)
1518                 xfarray_destroy(rx->pptr_recs);
1519         xfblob_destroy(rx->xattr_blobs);
1520         xfarray_destroy(rx->xattr_records);
1521         mutex_destroy(&rx->lock);
1522         kfree(rx);
1523 }
1524 
1525 /* Set up the filesystem scan so we can regenerate extended attributes. */
1526 STATIC int
1527 xrep_xattr_setup_scan(
1528         struct xfs_scrub        *sc,
1529         struct xrep_xattr       **rxp)
1530 {
1531         struct xrep_xattr       *rx;
1532         char                    *descr;
1533         int                     max_len;
1534         int                     error;
1535 
1536         rx = kzalloc(sizeof(struct xrep_xattr), XCHK_GFP_FLAGS);
1537         if (!rx)
1538                 return -ENOMEM;
1539         rx->sc = sc;
1540         rx->can_flush = true;
1541         rx->xname.name = rx->namebuf;
1542 
1543         mutex_init(&rx->lock);
1544 
1545         /*
1546          * Allocate enough memory to handle loading local attr values from the
1547          * xfblob data while flushing stashed attrs to the temporary file.
1548          * We only realloc the buffer when salvaging remote attr values.
1549          */
1550         max_len = xfs_attr_leaf_entsize_local_max(sc->mp->m_attr_geo->blksize);
1551         error = xchk_setup_xattr_buf(rx->sc, max_len);
1552         if (error == -ENOMEM)
1553                 error = -EDEADLOCK;
1554         if (error)
1555                 goto out_rx;
1556 
1557         /* Set up some staging for salvaged attribute keys and values */
1558         descr = xchk_xfile_ino_descr(sc, "xattr keys");
1559         error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key),
1560                         &rx->xattr_records);
1561         kfree(descr);
1562         if (error)
1563                 goto out_rx;
1564 
1565         descr = xchk_xfile_ino_descr(sc, "xattr names");
1566         error = xfblob_create(descr, &rx->xattr_blobs);
1567         kfree(descr);
1568         if (error)
1569                 goto out_keys;
1570 
1571         if (xfs_has_parent(sc->mp)) {
1572                 ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
1573 
1574                 descr = xchk_xfile_ino_descr(sc,
1575                                 "xattr retained parent pointer entries");
1576                 error = xfarray_create(descr, 0,
1577                                 sizeof(struct xrep_xattr_pptr),
1578                                 &rx->pptr_recs);
1579                 kfree(descr);
1580                 if (error)
1581                         goto out_values;
1582 
1583                 descr = xchk_xfile_ino_descr(sc,
1584                                 "xattr retained parent pointer names");
1585                 error = xfblob_create(descr, &rx->pptr_names);
1586                 kfree(descr);
1587                 if (error)
1588                         goto out_pprecs;
1589 
1590                 xfs_dir_hook_setup(&rx->dhook, xrep_xattr_live_dirent_update);
1591                 error = xfs_dir_hook_add(sc->mp, &rx->dhook);
1592                 if (error)
1593                         goto out_ppnames;
1594         }
1595 
1596         *rxp = rx;
1597         return 0;
1598 out_ppnames:
1599         xfblob_destroy(rx->pptr_names);
1600 out_pprecs:
1601         xfarray_destroy(rx->pptr_recs);
1602 out_values:
1603         xfblob_destroy(rx->xattr_blobs);
1604 out_keys:
1605         xfarray_destroy(rx->xattr_records);
1606 out_rx:
1607         mutex_destroy(&rx->lock);
1608         kfree(rx);
1609         return error;
1610 }
1611 
1612 /*
1613  * Repair the extended attribute metadata.
1614  *
1615  * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer.
1616  * The buffer cache in XFS can't handle aliased multiblock buffers, so this
1617  * might misbehave if the attr fork is crosslinked with other filesystem
1618  * metadata.
1619  */
1620 int
1621 xrep_xattr(
1622         struct xfs_scrub        *sc)
1623 {
1624         struct xrep_xattr       *rx = NULL;
1625         int                     error;
1626 
1627         if (!xfs_inode_hasattr(sc->ip))
1628                 return -ENOENT;
1629 
1630         /* The rmapbt is required to reap the old attr fork. */
1631         if (!xfs_has_rmapbt(sc->mp))
1632                 return -EOPNOTSUPP;
1633         /* We require atomic file exchange range to rebuild anything. */
1634         if (!xfs_has_exchange_range(sc->mp))
1635                 return -EOPNOTSUPP;
1636 
1637         error = xrep_xattr_setup_scan(sc, &rx);
1638         if (error)
1639                 return error;
1640 
1641         ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
1642 
1643         error = xrep_xattr_salvage_attributes(rx);
1644         if (error)
1645                 goto out_scan;
1646 
1647         if (rx->live_update_aborted) {
1648                 error = -EIO;
1649                 goto out_scan;
1650         }
1651 
1652         /* Last chance to abort before we start committing fixes. */
1653         if (xchk_should_terminate(sc, &error))
1654                 goto out_scan;
1655 
1656         error = xrep_xattr_rebuild_tree(rx);
1657         if (error)
1658                 goto out_scan;
1659 
1660 out_scan:
1661         xrep_xattr_teardown(rx);
1662         return error;
1663 }
1664 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php