~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/xfs/xfs_fsmap.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0+
  2 /*
  3  * Copyright (C) 2017 Oracle.  All Rights Reserved.
  4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
  5  */
  6 #include "xfs.h"
  7 #include "xfs_fs.h"
  8 #include "xfs_shared.h"
  9 #include "xfs_format.h"
 10 #include "xfs_log_format.h"
 11 #include "xfs_trans_resv.h"
 12 #include "xfs_mount.h"
 13 #include "xfs_inode.h"
 14 #include "xfs_trans.h"
 15 #include "xfs_btree.h"
 16 #include "xfs_rmap_btree.h"
 17 #include "xfs_trace.h"
 18 #include "xfs_rmap.h"
 19 #include "xfs_alloc.h"
 20 #include "xfs_bit.h"
 21 #include <linux/fsmap.h>
 22 #include "xfs_fsmap.h"
 23 #include "xfs_refcount.h"
 24 #include "xfs_refcount_btree.h"
 25 #include "xfs_alloc_btree.h"
 26 #include "xfs_rtbitmap.h"
 27 #include "xfs_ag.h"
 28 
 29 /* Convert an xfs_fsmap to an fsmap. */
 30 static void
 31 xfs_fsmap_from_internal(
 32         struct fsmap            *dest,
 33         struct xfs_fsmap        *src)
 34 {
 35         dest->fmr_device = src->fmr_device;
 36         dest->fmr_flags = src->fmr_flags;
 37         dest->fmr_physical = BBTOB(src->fmr_physical);
 38         dest->fmr_owner = src->fmr_owner;
 39         dest->fmr_offset = BBTOB(src->fmr_offset);
 40         dest->fmr_length = BBTOB(src->fmr_length);
 41         dest->fmr_reserved[0] = 0;
 42         dest->fmr_reserved[1] = 0;
 43         dest->fmr_reserved[2] = 0;
 44 }
 45 
 46 /* Convert an fsmap to an xfs_fsmap. */
 47 void
 48 xfs_fsmap_to_internal(
 49         struct xfs_fsmap        *dest,
 50         struct fsmap            *src)
 51 {
 52         dest->fmr_device = src->fmr_device;
 53         dest->fmr_flags = src->fmr_flags;
 54         dest->fmr_physical = BTOBBT(src->fmr_physical);
 55         dest->fmr_owner = src->fmr_owner;
 56         dest->fmr_offset = BTOBBT(src->fmr_offset);
 57         dest->fmr_length = BTOBBT(src->fmr_length);
 58 }
 59 
 60 /* Convert an fsmap owner into an rmapbt owner. */
 61 static int
 62 xfs_fsmap_owner_to_rmap(
 63         struct xfs_rmap_irec    *dest,
 64         const struct xfs_fsmap  *src)
 65 {
 66         if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) {
 67                 dest->rm_owner = src->fmr_owner;
 68                 return 0;
 69         }
 70 
 71         switch (src->fmr_owner) {
 72         case 0:                 /* "lowest owner id possible" */
 73         case -1ULL:             /* "highest owner id possible" */
 74                 dest->rm_owner = src->fmr_owner;
 75                 break;
 76         case XFS_FMR_OWN_FREE:
 77                 dest->rm_owner = XFS_RMAP_OWN_NULL;
 78                 break;
 79         case XFS_FMR_OWN_UNKNOWN:
 80                 dest->rm_owner = XFS_RMAP_OWN_UNKNOWN;
 81                 break;
 82         case XFS_FMR_OWN_FS:
 83                 dest->rm_owner = XFS_RMAP_OWN_FS;
 84                 break;
 85         case XFS_FMR_OWN_LOG:
 86                 dest->rm_owner = XFS_RMAP_OWN_LOG;
 87                 break;
 88         case XFS_FMR_OWN_AG:
 89                 dest->rm_owner = XFS_RMAP_OWN_AG;
 90                 break;
 91         case XFS_FMR_OWN_INOBT:
 92                 dest->rm_owner = XFS_RMAP_OWN_INOBT;
 93                 break;
 94         case XFS_FMR_OWN_INODES:
 95                 dest->rm_owner = XFS_RMAP_OWN_INODES;
 96                 break;
 97         case XFS_FMR_OWN_REFC:
 98                 dest->rm_owner = XFS_RMAP_OWN_REFC;
 99                 break;
100         case XFS_FMR_OWN_COW:
101                 dest->rm_owner = XFS_RMAP_OWN_COW;
102                 break;
103         case XFS_FMR_OWN_DEFECTIVE:     /* not implemented */
104                 /* fall through */
105         default:
106                 return -EINVAL;
107         }
108         return 0;
109 }
110 
111 /* Convert an rmapbt owner into an fsmap owner. */
112 static int
113 xfs_fsmap_owner_from_rmap(
114         struct xfs_fsmap                *dest,
115         const struct xfs_rmap_irec      *src)
116 {
117         dest->fmr_flags = 0;
118         if (!XFS_RMAP_NON_INODE_OWNER(src->rm_owner)) {
119                 dest->fmr_owner = src->rm_owner;
120                 return 0;
121         }
122         dest->fmr_flags |= FMR_OF_SPECIAL_OWNER;
123 
124         switch (src->rm_owner) {
125         case XFS_RMAP_OWN_FS:
126                 dest->fmr_owner = XFS_FMR_OWN_FS;
127                 break;
128         case XFS_RMAP_OWN_LOG:
129                 dest->fmr_owner = XFS_FMR_OWN_LOG;
130                 break;
131         case XFS_RMAP_OWN_AG:
132                 dest->fmr_owner = XFS_FMR_OWN_AG;
133                 break;
134         case XFS_RMAP_OWN_INOBT:
135                 dest->fmr_owner = XFS_FMR_OWN_INOBT;
136                 break;
137         case XFS_RMAP_OWN_INODES:
138                 dest->fmr_owner = XFS_FMR_OWN_INODES;
139                 break;
140         case XFS_RMAP_OWN_REFC:
141                 dest->fmr_owner = XFS_FMR_OWN_REFC;
142                 break;
143         case XFS_RMAP_OWN_COW:
144                 dest->fmr_owner = XFS_FMR_OWN_COW;
145                 break;
146         case XFS_RMAP_OWN_NULL: /* "free" */
147                 dest->fmr_owner = XFS_FMR_OWN_FREE;
148                 break;
149         default:
150                 ASSERT(0);
151                 return -EFSCORRUPTED;
152         }
153         return 0;
154 }
155 
156 /* getfsmap query state */
157 struct xfs_getfsmap_info {
158         struct xfs_fsmap_head   *head;
159         struct fsmap            *fsmap_recs;    /* mapping records */
160         struct xfs_buf          *agf_bp;        /* AGF, for refcount queries */
161         struct xfs_perag        *pag;           /* AG info, if applicable */
162         xfs_daddr_t             next_daddr;     /* next daddr we expect */
163         /* daddr of low fsmap key when we're using the rtbitmap */
164         xfs_daddr_t             low_daddr;
165         xfs_daddr_t             end_daddr;      /* daddr of high fsmap key */
166         u64                     missing_owner;  /* owner of holes */
167         u32                     dev;            /* device id */
168         /*
169          * Low rmap key for the query.  If low.rm_blockcount is nonzero, this
170          * is the second (or later) call to retrieve the recordset in pieces.
171          * xfs_getfsmap_rec_before_start will compare all records retrieved
172          * by the rmapbt query to filter out any records that start before
173          * the last record.
174          */
175         struct xfs_rmap_irec    low;
176         struct xfs_rmap_irec    high;           /* high rmap key */
177         bool                    last;           /* last extent? */
178 };
179 
180 /* Associate a device with a getfsmap handler. */
181 struct xfs_getfsmap_dev {
182         u32                     dev;
183         int                     (*fn)(struct xfs_trans *tp,
184                                       const struct xfs_fsmap *keys,
185                                       struct xfs_getfsmap_info *info);
186         sector_t                nr_sectors;
187 };
188 
189 /* Compare two getfsmap device handlers. */
190 static int
191 xfs_getfsmap_dev_compare(
192         const void                      *p1,
193         const void                      *p2)
194 {
195         const struct xfs_getfsmap_dev   *d1 = p1;
196         const struct xfs_getfsmap_dev   *d2 = p2;
197 
198         return d1->dev - d2->dev;
199 }
200 
201 /* Decide if this mapping is shared. */
202 STATIC int
203 xfs_getfsmap_is_shared(
204         struct xfs_trans                *tp,
205         struct xfs_getfsmap_info        *info,
206         const struct xfs_rmap_irec      *rec,
207         bool                            *stat)
208 {
209         struct xfs_mount                *mp = tp->t_mountp;
210         struct xfs_btree_cur            *cur;
211         xfs_agblock_t                   fbno;
212         xfs_extlen_t                    flen;
213         int                             error;
214 
215         *stat = false;
216         if (!xfs_has_reflink(mp))
217                 return 0;
218         /* rt files will have no perag structure */
219         if (!info->pag)
220                 return 0;
221 
222         /* Are there any shared blocks here? */
223         flen = 0;
224         cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp, info->pag);
225 
226         error = xfs_refcount_find_shared(cur, rec->rm_startblock,
227                         rec->rm_blockcount, &fbno, &flen, false);
228 
229         xfs_btree_del_cursor(cur, error);
230         if (error)
231                 return error;
232 
233         *stat = flen > 0;
234         return 0;
235 }
236 
237 static inline void
238 xfs_getfsmap_format(
239         struct xfs_mount                *mp,
240         struct xfs_fsmap                *xfm,
241         struct xfs_getfsmap_info        *info)
242 {
243         struct fsmap                    *rec;
244 
245         trace_xfs_getfsmap_mapping(mp, xfm);
246 
247         rec = &info->fsmap_recs[info->head->fmh_entries++];
248         xfs_fsmap_from_internal(rec, xfm);
249 }
250 
251 static inline bool
252 xfs_getfsmap_rec_before_start(
253         struct xfs_getfsmap_info        *info,
254         const struct xfs_rmap_irec      *rec,
255         xfs_daddr_t                     rec_daddr)
256 {
257         if (info->low_daddr != XFS_BUF_DADDR_NULL)
258                 return rec_daddr < info->low_daddr;
259         if (info->low.rm_blockcount)
260                 return xfs_rmap_compare(rec, &info->low) < 0;
261         return false;
262 }
263 
264 /*
265  * Format a reverse mapping for getfsmap, having translated rm_startblock
266  * into the appropriate daddr units.  Pass in a nonzero @len_daddr if the
267  * length could be larger than rm_blockcount in struct xfs_rmap_irec.
268  */
269 STATIC int
270 xfs_getfsmap_helper(
271         struct xfs_trans                *tp,
272         struct xfs_getfsmap_info        *info,
273         const struct xfs_rmap_irec      *rec,
274         xfs_daddr_t                     rec_daddr,
275         xfs_daddr_t                     len_daddr)
276 {
277         struct xfs_fsmap                fmr;
278         struct xfs_mount                *mp = tp->t_mountp;
279         bool                            shared;
280         int                             error;
281 
282         if (fatal_signal_pending(current))
283                 return -EINTR;
284 
285         if (len_daddr == 0)
286                 len_daddr = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
287 
288         /*
289          * Filter out records that start before our startpoint, if the
290          * caller requested that.
291          */
292         if (xfs_getfsmap_rec_before_start(info, rec, rec_daddr)) {
293                 rec_daddr += len_daddr;
294                 if (info->next_daddr < rec_daddr)
295                         info->next_daddr = rec_daddr;
296                 return 0;
297         }
298 
299         /*
300          * For an info->last query, we're looking for a gap between the last
301          * mapping emitted and the high key specified by userspace.  If the
302          * user's query spans less than 1 fsblock, then info->high and
303          * info->low will have the same rm_startblock, which causes rec_daddr
304          * and next_daddr to be the same.  Therefore, use the end_daddr that
305          * we calculated from userspace's high key to synthesize the record.
306          * Note that if the btree query found a mapping, there won't be a gap.
307          */
308         if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL)
309                 rec_daddr = info->end_daddr;
310 
311         /* Are we just counting mappings? */
312         if (info->head->fmh_count == 0) {
313                 if (info->head->fmh_entries == UINT_MAX)
314                         return -ECANCELED;
315 
316                 if (rec_daddr > info->next_daddr)
317                         info->head->fmh_entries++;
318 
319                 if (info->last)
320                         return 0;
321 
322                 info->head->fmh_entries++;
323 
324                 rec_daddr += len_daddr;
325                 if (info->next_daddr < rec_daddr)
326                         info->next_daddr = rec_daddr;
327                 return 0;
328         }
329 
330         /*
331          * If the record starts past the last physical block we saw,
332          * then we've found a gap.  Report the gap as being owned by
333          * whatever the caller specified is the missing owner.
334          */
335         if (rec_daddr > info->next_daddr) {
336                 if (info->head->fmh_entries >= info->head->fmh_count)
337                         return -ECANCELED;
338 
339                 fmr.fmr_device = info->dev;
340                 fmr.fmr_physical = info->next_daddr;
341                 fmr.fmr_owner = info->missing_owner;
342                 fmr.fmr_offset = 0;
343                 fmr.fmr_length = rec_daddr - info->next_daddr;
344                 fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
345                 xfs_getfsmap_format(mp, &fmr, info);
346         }
347 
348         if (info->last)
349                 goto out;
350 
351         /* Fill out the extent we found */
352         if (info->head->fmh_entries >= info->head->fmh_count)
353                 return -ECANCELED;
354 
355         trace_xfs_fsmap_mapping(mp, info->dev,
356                         info->pag ? info->pag->pag_agno : NULLAGNUMBER, rec);
357 
358         fmr.fmr_device = info->dev;
359         fmr.fmr_physical = rec_daddr;
360         error = xfs_fsmap_owner_from_rmap(&fmr, rec);
361         if (error)
362                 return error;
363         fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset);
364         fmr.fmr_length = len_daddr;
365         if (rec->rm_flags & XFS_RMAP_UNWRITTEN)
366                 fmr.fmr_flags |= FMR_OF_PREALLOC;
367         if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
368                 fmr.fmr_flags |= FMR_OF_ATTR_FORK;
369         if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
370                 fmr.fmr_flags |= FMR_OF_EXTENT_MAP;
371         if (fmr.fmr_flags == 0) {
372                 error = xfs_getfsmap_is_shared(tp, info, rec, &shared);
373                 if (error)
374                         return error;
375                 if (shared)
376                         fmr.fmr_flags |= FMR_OF_SHARED;
377         }
378 
379         xfs_getfsmap_format(mp, &fmr, info);
380 out:
381         rec_daddr += len_daddr;
382         if (info->next_daddr < rec_daddr)
383                 info->next_daddr = rec_daddr;
384         return 0;
385 }
386 
387 /* Transform a rmapbt irec into a fsmap */
388 STATIC int
389 xfs_getfsmap_datadev_helper(
390         struct xfs_btree_cur            *cur,
391         const struct xfs_rmap_irec      *rec,
392         void                            *priv)
393 {
394         struct xfs_mount                *mp = cur->bc_mp;
395         struct xfs_getfsmap_info        *info = priv;
396         xfs_fsblock_t                   fsb;
397         xfs_daddr_t                     rec_daddr;
398 
399         fsb = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, rec->rm_startblock);
400         rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
401 
402         return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr, 0);
403 }
404 
405 /* Transform a bnobt irec into a fsmap */
406 STATIC int
407 xfs_getfsmap_datadev_bnobt_helper(
408         struct xfs_btree_cur            *cur,
409         const struct xfs_alloc_rec_incore *rec,
410         void                            *priv)
411 {
412         struct xfs_mount                *mp = cur->bc_mp;
413         struct xfs_getfsmap_info        *info = priv;
414         struct xfs_rmap_irec            irec;
415         xfs_daddr_t                     rec_daddr;
416 
417         rec_daddr = XFS_AGB_TO_DADDR(mp, cur->bc_ag.pag->pag_agno,
418                         rec->ar_startblock);
419 
420         irec.rm_startblock = rec->ar_startblock;
421         irec.rm_blockcount = rec->ar_blockcount;
422         irec.rm_owner = XFS_RMAP_OWN_NULL;      /* "free" */
423         irec.rm_offset = 0;
424         irec.rm_flags = 0;
425 
426         return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr, 0);
427 }
428 
429 /* Set rmap flags based on the getfsmap flags */
430 static void
431 xfs_getfsmap_set_irec_flags(
432         struct xfs_rmap_irec    *irec,
433         const struct xfs_fsmap  *fmr)
434 {
435         irec->rm_flags = 0;
436         if (fmr->fmr_flags & FMR_OF_ATTR_FORK)
437                 irec->rm_flags |= XFS_RMAP_ATTR_FORK;
438         if (fmr->fmr_flags & FMR_OF_EXTENT_MAP)
439                 irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
440         if (fmr->fmr_flags & FMR_OF_PREALLOC)
441                 irec->rm_flags |= XFS_RMAP_UNWRITTEN;
442 }
443 
444 /* Execute a getfsmap query against the log device. */
445 STATIC int
446 xfs_getfsmap_logdev(
447         struct xfs_trans                *tp,
448         const struct xfs_fsmap          *keys,
449         struct xfs_getfsmap_info        *info)
450 {
451         struct xfs_mount                *mp = tp->t_mountp;
452         struct xfs_rmap_irec            rmap;
453         xfs_daddr_t                     rec_daddr, len_daddr;
454         xfs_fsblock_t                   start_fsb, end_fsb;
455         uint64_t                        eofs;
456 
457         eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
458         if (keys[0].fmr_physical >= eofs)
459                 return 0;
460         start_fsb = XFS_BB_TO_FSBT(mp,
461                                 keys[0].fmr_physical + keys[0].fmr_length);
462         end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
463 
464         /* Adjust the low key if we are continuing from where we left off. */
465         if (keys[0].fmr_length > 0)
466                 info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb);
467 
468         trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb);
469         trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb);
470 
471         if (start_fsb > 0)
472                 return 0;
473 
474         /* Fabricate an rmap entry for the external log device. */
475         rmap.rm_startblock = 0;
476         rmap.rm_blockcount = mp->m_sb.sb_logblocks;
477         rmap.rm_owner = XFS_RMAP_OWN_LOG;
478         rmap.rm_offset = 0;
479         rmap.rm_flags = 0;
480 
481         rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock);
482         len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount);
483         return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr);
484 }
485 
486 #ifdef CONFIG_XFS_RT
487 /* Transform a rtbitmap "record" into a fsmap */
488 STATIC int
489 xfs_getfsmap_rtdev_rtbitmap_helper(
490         struct xfs_mount                *mp,
491         struct xfs_trans                *tp,
492         const struct xfs_rtalloc_rec    *rec,
493         void                            *priv)
494 {
495         struct xfs_getfsmap_info        *info = priv;
496         struct xfs_rmap_irec            irec;
497         xfs_rtblock_t                   rtbno;
498         xfs_daddr_t                     rec_daddr, len_daddr;
499 
500         rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext);
501         rec_daddr = XFS_FSB_TO_BB(mp, rtbno);
502         irec.rm_startblock = rtbno;
503 
504         rtbno = xfs_rtx_to_rtb(mp, rec->ar_extcount);
505         len_daddr = XFS_FSB_TO_BB(mp, rtbno);
506         irec.rm_blockcount = rtbno;
507 
508         irec.rm_owner = XFS_RMAP_OWN_NULL;      /* "free" */
509         irec.rm_offset = 0;
510         irec.rm_flags = 0;
511 
512         return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr);
513 }
514 
515 /* Execute a getfsmap query against the realtime device rtbitmap. */
516 STATIC int
517 xfs_getfsmap_rtdev_rtbitmap(
518         struct xfs_trans                *tp,
519         const struct xfs_fsmap          *keys,
520         struct xfs_getfsmap_info        *info)
521 {
522 
523         struct xfs_rtalloc_rec          alow = { 0 };
524         struct xfs_rtalloc_rec          ahigh = { 0 };
525         struct xfs_mount                *mp = tp->t_mountp;
526         xfs_rtblock_t                   start_rtb;
527         xfs_rtblock_t                   end_rtb;
528         uint64_t                        eofs;
529         int                             error;
530 
531         eofs = XFS_FSB_TO_BB(mp, xfs_rtx_to_rtb(mp, mp->m_sb.sb_rextents));
532         if (keys[0].fmr_physical >= eofs)
533                 return 0;
534         start_rtb = XFS_BB_TO_FSBT(mp,
535                                 keys[0].fmr_physical + keys[0].fmr_length);
536         end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
537 
538         info->missing_owner = XFS_FMR_OWN_UNKNOWN;
539 
540         /* Adjust the low key if we are continuing from where we left off. */
541         if (keys[0].fmr_length > 0) {
542                 info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb);
543                 if (info->low_daddr >= eofs)
544                         return 0;
545         }
546 
547         trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb);
548         trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb);
549 
550         xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP);
551 
552         /*
553          * Set up query parameters to return free rtextents covering the range
554          * we want.
555          */
556         alow.ar_startext = xfs_rtb_to_rtx(mp, start_rtb);
557         ahigh.ar_startext = xfs_rtb_to_rtxup(mp, end_rtb);
558         error = xfs_rtalloc_query_range(mp, tp, &alow, &ahigh,
559                         xfs_getfsmap_rtdev_rtbitmap_helper, info);
560         if (error)
561                 goto err;
562 
563         /*
564          * Report any gaps at the end of the rtbitmap by simulating a null
565          * rmap starting at the block after the end of the query range.
566          */
567         info->last = true;
568         ahigh.ar_startext = min(mp->m_sb.sb_rextents, ahigh.ar_startext);
569 
570         error = xfs_getfsmap_rtdev_rtbitmap_helper(mp, tp, &ahigh, info);
571         if (error)
572                 goto err;
573 err:
574         xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
575         return error;
576 }
577 #endif /* CONFIG_XFS_RT */
578 
579 static inline bool
580 rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r)
581 {
582         if (!xfs_has_reflink(mp))
583                 return true;
584         if (XFS_RMAP_NON_INODE_OWNER(r->rm_owner))
585                 return true;
586         if (r->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
587                            XFS_RMAP_UNWRITTEN))
588                 return true;
589         return false;
590 }
591 
592 /* Execute a getfsmap query against the regular data device. */
593 STATIC int
594 __xfs_getfsmap_datadev(
595         struct xfs_trans                *tp,
596         const struct xfs_fsmap          *keys,
597         struct xfs_getfsmap_info        *info,
598         int                             (*query_fn)(struct xfs_trans *,
599                                                     struct xfs_getfsmap_info *,
600                                                     struct xfs_btree_cur **,
601                                                     void *),
602         void                            *priv)
603 {
604         struct xfs_mount                *mp = tp->t_mountp;
605         struct xfs_perag                *pag;
606         struct xfs_btree_cur            *bt_cur = NULL;
607         xfs_fsblock_t                   start_fsb;
608         xfs_fsblock_t                   end_fsb;
609         xfs_agnumber_t                  start_ag;
610         xfs_agnumber_t                  end_ag;
611         uint64_t                        eofs;
612         int                             error = 0;
613 
614         eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
615         if (keys[0].fmr_physical >= eofs)
616                 return 0;
617         start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical);
618         end_fsb = XFS_DADDR_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
619 
620         /*
621          * Convert the fsmap low/high keys to AG based keys.  Initialize
622          * low to the fsmap low key and max out the high key to the end
623          * of the AG.
624          */
625         info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
626         error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
627         if (error)
628                 return error;
629         info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
630         xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
631 
632         /* Adjust the low key if we are continuing from where we left off. */
633         if (info->low.rm_blockcount == 0) {
634                 /* No previous record from which to continue */
635         } else if (rmap_not_shareable(mp, &info->low)) {
636                 /* Last record seen was an unshareable extent */
637                 info->low.rm_owner = 0;
638                 info->low.rm_offset = 0;
639 
640                 start_fsb += info->low.rm_blockcount;
641                 if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs)
642                         return 0;
643         } else {
644                 /* Last record seen was a shareable file data extent */
645                 info->low.rm_offset += info->low.rm_blockcount;
646         }
647         info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
648 
649         info->high.rm_startblock = -1U;
650         info->high.rm_owner = ULLONG_MAX;
651         info->high.rm_offset = ULLONG_MAX;
652         info->high.rm_blockcount = 0;
653         info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
654 
655         start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
656         end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
657 
658         for_each_perag_range(mp, start_ag, end_ag, pag) {
659                 /*
660                  * Set the AG high key from the fsmap high key if this
661                  * is the last AG that we're querying.
662                  */
663                 info->pag = pag;
664                 if (pag->pag_agno == end_ag) {
665                         info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
666                                         end_fsb);
667                         info->high.rm_offset = XFS_BB_TO_FSBT(mp,
668                                         keys[1].fmr_offset);
669                         error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
670                         if (error)
671                                 break;
672                         xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
673                 }
674 
675                 if (bt_cur) {
676                         xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
677                         bt_cur = NULL;
678                         xfs_trans_brelse(tp, info->agf_bp);
679                         info->agf_bp = NULL;
680                 }
681 
682                 error = xfs_alloc_read_agf(pag, tp, 0, &info->agf_bp);
683                 if (error)
684                         break;
685 
686                 trace_xfs_fsmap_low_key(mp, info->dev, pag->pag_agno,
687                                 &info->low);
688                 trace_xfs_fsmap_high_key(mp, info->dev, pag->pag_agno,
689                                 &info->high);
690 
691                 error = query_fn(tp, info, &bt_cur, priv);
692                 if (error)
693                         break;
694 
695                 /*
696                  * Set the AG low key to the start of the AG prior to
697                  * moving on to the next AG.
698                  */
699                 if (pag->pag_agno == start_ag)
700                         memset(&info->low, 0, sizeof(info->low));
701 
702                 /*
703                  * If this is the last AG, report any gap at the end of it
704                  * before we drop the reference to the perag when the loop
705                  * terminates.
706                  */
707                 if (pag->pag_agno == end_ag) {
708                         info->last = true;
709                         error = query_fn(tp, info, &bt_cur, priv);
710                         if (error)
711                                 break;
712                 }
713                 info->pag = NULL;
714         }
715 
716         if (bt_cur)
717                 xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
718                                                          XFS_BTREE_NOERROR);
719         if (info->agf_bp) {
720                 xfs_trans_brelse(tp, info->agf_bp);
721                 info->agf_bp = NULL;
722         }
723         if (info->pag) {
724                 xfs_perag_rele(info->pag);
725                 info->pag = NULL;
726         } else if (pag) {
727                 /* loop termination case */
728                 xfs_perag_rele(pag);
729         }
730 
731         return error;
732 }
733 
734 /* Actually query the rmap btree. */
735 STATIC int
736 xfs_getfsmap_datadev_rmapbt_query(
737         struct xfs_trans                *tp,
738         struct xfs_getfsmap_info        *info,
739         struct xfs_btree_cur            **curpp,
740         void                            *priv)
741 {
742         /* Report any gap at the end of the last AG. */
743         if (info->last)
744                 return xfs_getfsmap_datadev_helper(*curpp, &info->high, info);
745 
746         /* Allocate cursor for this AG and query_range it. */
747         *curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp,
748                         info->pag);
749         return xfs_rmap_query_range(*curpp, &info->low, &info->high,
750                         xfs_getfsmap_datadev_helper, info);
751 }
752 
753 /* Execute a getfsmap query against the regular data device rmapbt. */
754 STATIC int
755 xfs_getfsmap_datadev_rmapbt(
756         struct xfs_trans                *tp,
757         const struct xfs_fsmap          *keys,
758         struct xfs_getfsmap_info        *info)
759 {
760         info->missing_owner = XFS_FMR_OWN_FREE;
761         return __xfs_getfsmap_datadev(tp, keys, info,
762                         xfs_getfsmap_datadev_rmapbt_query, NULL);
763 }
764 
765 /* Actually query the bno btree. */
766 STATIC int
767 xfs_getfsmap_datadev_bnobt_query(
768         struct xfs_trans                *tp,
769         struct xfs_getfsmap_info        *info,
770         struct xfs_btree_cur            **curpp,
771         void                            *priv)
772 {
773         struct xfs_alloc_rec_incore     *key = priv;
774 
775         /* Report any gap at the end of the last AG. */
776         if (info->last)
777                 return xfs_getfsmap_datadev_bnobt_helper(*curpp, &key[1], info);
778 
779         /* Allocate cursor for this AG and query_range it. */
780         *curpp = xfs_bnobt_init_cursor(tp->t_mountp, tp, info->agf_bp,
781                         info->pag);
782         key->ar_startblock = info->low.rm_startblock;
783         key[1].ar_startblock = info->high.rm_startblock;
784         return xfs_alloc_query_range(*curpp, key, &key[1],
785                         xfs_getfsmap_datadev_bnobt_helper, info);
786 }
787 
788 /* Execute a getfsmap query against the regular data device's bnobt. */
789 STATIC int
790 xfs_getfsmap_datadev_bnobt(
791         struct xfs_trans                *tp,
792         const struct xfs_fsmap          *keys,
793         struct xfs_getfsmap_info        *info)
794 {
795         struct xfs_alloc_rec_incore     akeys[2];
796 
797         memset(akeys, 0, sizeof(akeys));
798         info->missing_owner = XFS_FMR_OWN_UNKNOWN;
799         return __xfs_getfsmap_datadev(tp, keys, info,
800                         xfs_getfsmap_datadev_bnobt_query, &akeys[0]);
801 }
802 
803 /* Do we recognize the device? */
804 STATIC bool
805 xfs_getfsmap_is_valid_device(
806         struct xfs_mount        *mp,
807         struct xfs_fsmap        *fm)
808 {
809         if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
810             fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev))
811                 return true;
812         if (mp->m_logdev_targp &&
813             fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev))
814                 return true;
815         if (mp->m_rtdev_targp &&
816             fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev))
817                 return true;
818         return false;
819 }
820 
821 /* Ensure that the low key is less than the high key. */
822 STATIC bool
823 xfs_getfsmap_check_keys(
824         struct xfs_fsmap                *low_key,
825         struct xfs_fsmap                *high_key)
826 {
827         if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
828                 if (low_key->fmr_offset)
829                         return false;
830         }
831         if (high_key->fmr_flags != -1U &&
832             (high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER |
833                                     FMR_OF_EXTENT_MAP))) {
834                 if (high_key->fmr_offset && high_key->fmr_offset != -1ULL)
835                         return false;
836         }
837         if (high_key->fmr_length && high_key->fmr_length != -1ULL)
838                 return false;
839 
840         if (low_key->fmr_device > high_key->fmr_device)
841                 return false;
842         if (low_key->fmr_device < high_key->fmr_device)
843                 return true;
844 
845         if (low_key->fmr_physical > high_key->fmr_physical)
846                 return false;
847         if (low_key->fmr_physical < high_key->fmr_physical)
848                 return true;
849 
850         if (low_key->fmr_owner > high_key->fmr_owner)
851                 return false;
852         if (low_key->fmr_owner < high_key->fmr_owner)
853                 return true;
854 
855         if (low_key->fmr_offset > high_key->fmr_offset)
856                 return false;
857         if (low_key->fmr_offset < high_key->fmr_offset)
858                 return true;
859 
860         return false;
861 }
862 
863 /*
864  * There are only two devices if we didn't configure RT devices at build time.
865  */
866 #ifdef CONFIG_XFS_RT
867 #define XFS_GETFSMAP_DEVS       3
868 #else
869 #define XFS_GETFSMAP_DEVS       2
870 #endif /* CONFIG_XFS_RT */
871 
872 /*
873  * Get filesystem's extents as described in head, and format for output. Fills
874  * in the supplied records array until there are no more reverse mappings to
875  * return or head.fmh_entries == head.fmh_count.  In the second case, this
876  * function returns -ECANCELED to indicate that more records would have been
877  * returned.
878  *
879  * Key to Confusion
880  * ----------------
881  * There are multiple levels of keys and counters at work here:
882  * xfs_fsmap_head.fmh_keys      -- low and high fsmap keys passed in;
883  *                                 these reflect fs-wide sector addrs.
884  * dkeys                        -- fmh_keys used to query each device;
885  *                                 these are fmh_keys but w/ the low key
886  *                                 bumped up by fmr_length.
887  * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this
888  *                                 is how we detect gaps in the fsmap
889                                    records and report them.
890  * xfs_getfsmap_info.low/high   -- per-AG low/high keys computed from
891  *                                 dkeys; used to query the metadata.
892  */
893 int
894 xfs_getfsmap(
895         struct xfs_mount                *mp,
896         struct xfs_fsmap_head           *head,
897         struct fsmap                    *fsmap_recs)
898 {
899         struct xfs_trans                *tp = NULL;
900         struct xfs_fsmap                dkeys[2];       /* per-dev keys */
901         struct xfs_getfsmap_dev         handlers[XFS_GETFSMAP_DEVS];
902         struct xfs_getfsmap_info        info = { NULL };
903         bool                            use_rmap;
904         int                             i;
905         int                             error = 0;
906 
907         if (head->fmh_iflags & ~FMH_IF_VALID)
908                 return -EINVAL;
909         if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) ||
910             !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
911                 return -EINVAL;
912         if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1]))
913                 return -EINVAL;
914 
915         use_rmap = xfs_has_rmapbt(mp) &&
916                    has_capability_noaudit(current, CAP_SYS_ADMIN);
917         head->fmh_entries = 0;
918 
919         /* Set up our device handlers. */
920         memset(handlers, 0, sizeof(handlers));
921         handlers[0].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
922         handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
923         if (use_rmap)
924                 handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
925         else
926                 handlers[0].fn = xfs_getfsmap_datadev_bnobt;
927         if (mp->m_logdev_targp != mp->m_ddev_targp) {
928                 handlers[1].nr_sectors = XFS_FSB_TO_BB(mp,
929                                                        mp->m_sb.sb_logblocks);
930                 handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
931                 handlers[1].fn = xfs_getfsmap_logdev;
932         }
933 #ifdef CONFIG_XFS_RT
934         if (mp->m_rtdev_targp) {
935                 handlers[2].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
936                 handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
937                 handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
938         }
939 #endif /* CONFIG_XFS_RT */
940 
941         xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
942                         xfs_getfsmap_dev_compare);
943 
944         /*
945          * To continue where we left off, we allow userspace to use the
946          * last mapping from a previous call as the low key of the next.
947          * This is identified by a non-zero length in the low key. We
948          * have to increment the low key in this scenario to ensure we
949          * don't return the same mapping again, and instead return the
950          * very next mapping.
951          *
952          * If the low key mapping refers to file data, the same physical
953          * blocks could be mapped to several other files/offsets.
954          * According to rmapbt record ordering, the minimal next
955          * possible record for the block range is the next starting
956          * offset in the same inode. Therefore, each fsmap backend bumps
957          * the file offset to continue the search appropriately.  For
958          * all other low key mapping types (attr blocks, metadata), each
959          * fsmap backend bumps the physical offset as there can be no
960          * other mapping for the same physical block range.
961          */
962         dkeys[0] = head->fmh_keys[0];
963         memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
964 
965         info.next_daddr = head->fmh_keys[0].fmr_physical +
966                           head->fmh_keys[0].fmr_length;
967         info.end_daddr = XFS_BUF_DADDR_NULL;
968         info.fsmap_recs = fsmap_recs;
969         info.head = head;
970 
971         /* For each device we support... */
972         for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
973                 /* Is this device within the range the user asked for? */
974                 if (!handlers[i].fn)
975                         continue;
976                 if (head->fmh_keys[0].fmr_device > handlers[i].dev)
977                         continue;
978                 if (head->fmh_keys[1].fmr_device < handlers[i].dev)
979                         break;
980 
981                 /*
982                  * If this device number matches the high key, we have
983                  * to pass the high key to the handler to limit the
984                  * query results.  If the device number exceeds the
985                  * low key, zero out the low key so that we get
986                  * everything from the beginning.
987                  */
988                 if (handlers[i].dev == head->fmh_keys[1].fmr_device) {
989                         dkeys[1] = head->fmh_keys[1];
990                         info.end_daddr = min(handlers[i].nr_sectors - 1,
991                                              dkeys[1].fmr_physical);
992                 }
993                 if (handlers[i].dev > head->fmh_keys[0].fmr_device)
994                         memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
995 
996                 /*
997                  * Grab an empty transaction so that we can use its recursive
998                  * buffer locking abilities to detect cycles in the rmapbt
999                  * without deadlocking.
1000                  */
1001                 error = xfs_trans_alloc_empty(mp, &tp);
1002                 if (error)
1003                         break;
1004 
1005                 info.dev = handlers[i].dev;
1006                 info.last = false;
1007                 info.pag = NULL;
1008                 info.low_daddr = XFS_BUF_DADDR_NULL;
1009                 info.low.rm_blockcount = 0;
1010                 error = handlers[i].fn(tp, dkeys, &info);
1011                 if (error)
1012                         break;
1013                 xfs_trans_cancel(tp);
1014                 tp = NULL;
1015                 info.next_daddr = 0;
1016         }
1017 
1018         if (tp)
1019                 xfs_trans_cancel(tp);
1020         head->fmh_oflags = FMH_OF_DEV_T;
1021         return error;
1022 }
1023 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php