~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
xfs_exchrange.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~
  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /*
  3  * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
  4  * Author: Darrick J. Wong <djwong@kernel.org>
  5  */
  6 #include "xfs.h"
  7 #include "xfs_shared.h"
  8 #include "xfs_format.h"
  9 #include "xfs_log_format.h"
 10 #include "xfs_trans_resv.h"
 11 #include "xfs_mount.h"
 12 #include "xfs_defer.h"
 13 #include "xfs_inode.h"
 14 #include "xfs_trans.h"
 15 #include "xfs_quota.h"
 16 #include "xfs_bmap_util.h"
 17 #include "xfs_reflink.h"
 18 #include "xfs_trace.h"
 19 #include "xfs_exchrange.h"
 20 #include "xfs_exchmaps.h"
 21 #include "xfs_sb.h"
 22 #include "xfs_icache.h"
 23 #include "xfs_log.h"
 24 #include "xfs_rtbitmap.h"
 25 #include <linux/fsnotify.h>
 26 
 27 /* Lock (and optionally join) two inodes for a file range exchange. */
 28 void
 29 xfs_exchrange_ilock(
 30         struct xfs_trans        *tp,
 31         struct xfs_inode        *ip1,
 32         struct xfs_inode        *ip2)
 33 {
 34         if (ip1 != ip2)
 35                 xfs_lock_two_inodes(ip1, XFS_ILOCK_EXCL,
 36                                     ip2, XFS_ILOCK_EXCL);
 37         else
 38                 xfs_ilock(ip1, XFS_ILOCK_EXCL);
 39         if (tp) {
 40                 xfs_trans_ijoin(tp, ip1, 0);
 41                 if (ip2 != ip1)
 42                         xfs_trans_ijoin(tp, ip2, 0);
 43         }
 44 
 45 }
 46 
 47 /* Unlock two inodes after a file range exchange operation. */
 48 void
 49 xfs_exchrange_iunlock(
 50         struct xfs_inode        *ip1,
 51         struct xfs_inode        *ip2)
 52 {
 53         if (ip2 != ip1)
 54                 xfs_iunlock(ip2, XFS_ILOCK_EXCL);
 55         xfs_iunlock(ip1, XFS_ILOCK_EXCL);
 56 }
 57 
 58 /*
 59  * Estimate the resource requirements to exchange file contents between the two
 60  * files.  The caller is required to hold the IOLOCK and the MMAPLOCK and to
 61  * have flushed both inodes' pagecache and active direct-ios.
 62  */
 63 int
 64 xfs_exchrange_estimate(
 65         struct xfs_exchmaps_req *req)
 66 {
 67         int                     error;
 68 
 69         xfs_exchrange_ilock(NULL, req->ip1, req->ip2);
 70         error = xfs_exchmaps_estimate(req);
 71         xfs_exchrange_iunlock(req->ip1, req->ip2);
 72         return error;
 73 }
 74 
 75 #define QRETRY_IP1      (0x1)
 76 #define QRETRY_IP2      (0x2)
 77 
 78 /*
 79  * Obtain a quota reservation to make sure we don't hit EDQUOT.  We can skip
 80  * this if quota enforcement is disabled or if both inodes' dquots are the
 81  * same.  The qretry structure must be initialized to zeroes before the first
 82  * call to this function.
 83  */
 84 STATIC int
 85 xfs_exchrange_reserve_quota(
 86         struct xfs_trans                *tp,
 87         const struct xfs_exchmaps_req   *req,
 88         unsigned int                    *qretry)
 89 {
 90         int64_t                         ddelta, rdelta;
 91         int                             ip1_error = 0;
 92         int                             error;
 93 
 94         /*
 95          * Don't bother with a quota reservation if we're not enforcing them
 96          * or the two inodes have the same dquots.
 97          */
 98         if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
 99             (req->ip1->i_udquot == req->ip2->i_udquot &&
100              req->ip1->i_gdquot == req->ip2->i_gdquot &&
101              req->ip1->i_pdquot == req->ip2->i_pdquot))
102                 return 0;
103 
104         *qretry = 0;
105 
106         /*
107          * For each file, compute the net gain in the number of regular blocks
108          * that will be mapped into that file and reserve that much quota.  The
109          * quota counts must be able to absorb at least that much space.
110          */
111         ddelta = req->ip2_bcount - req->ip1_bcount;
112         rdelta = req->ip2_rtbcount - req->ip1_rtbcount;
113         if (ddelta > 0 || rdelta > 0) {
114                 error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
115                                 ddelta > 0 ? ddelta : 0,
116                                 rdelta > 0 ? rdelta : 0,
117                                 false);
118                 if (error == -EDQUOT || error == -ENOSPC) {
119                         /*
120                          * Save this error and see what happens if we try to
121                          * reserve quota for ip2.  Then report both.
122                          */
123                         *qretry |= QRETRY_IP1;
124                         ip1_error = error;
125                         error = 0;
126                 }
127                 if (error)
128                         return error;
129         }
130         if (ddelta < 0 || rdelta < 0) {
131                 error = xfs_trans_reserve_quota_nblks(tp, req->ip2,
132                                 ddelta < 0 ? -ddelta : 0,
133                                 rdelta < 0 ? -rdelta : 0,
134                                 false);
135                 if (error == -EDQUOT || error == -ENOSPC)
136                         *qretry |= QRETRY_IP2;
137                 if (error)
138                         return error;
139         }
140         if (ip1_error)
141                 return ip1_error;
142 
143         /*
144          * For each file, forcibly reserve the gross gain in mapped blocks so
145          * that we don't trip over any quota block reservation assertions.
146          * We must reserve the gross gain because the quota code subtracts from
147          * bcount the number of blocks that we unmap; it does not add that
148          * quantity back to the quota block reservation.
149          */
150         error = xfs_trans_reserve_quota_nblks(tp, req->ip1, req->ip1_bcount,
151                         req->ip1_rtbcount, true);
152         if (error)
153                 return error;
154 
155         return xfs_trans_reserve_quota_nblks(tp, req->ip2, req->ip2_bcount,
156                         req->ip2_rtbcount, true);
157 }
158 
159 /* Exchange the mappings (and hence the contents) of two files' forks. */
160 STATIC int
161 xfs_exchrange_mappings(
162         const struct xfs_exchrange      *fxr,
163         struct xfs_inode                *ip1,
164         struct xfs_inode                *ip2)
165 {
166         struct xfs_mount                *mp = ip1->i_mount;
167         struct xfs_exchmaps_req         req = {
168                 .ip1                    = ip1,
169                 .ip2                    = ip2,
170                 .startoff1              = XFS_B_TO_FSBT(mp, fxr->file1_offset),
171                 .startoff2              = XFS_B_TO_FSBT(mp, fxr->file2_offset),
172                 .blockcount             = XFS_B_TO_FSB(mp, fxr->length),
173         };
174         struct xfs_trans                *tp;
175         unsigned int                    qretry;
176         bool                            retried = false;
177         int                             error;
178 
179         trace_xfs_exchrange_mappings(fxr, ip1, ip2);
180 
181         if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)
182                 req.flags |= XFS_EXCHMAPS_SET_SIZES;
183         if (fxr->flags & XFS_EXCHANGE_RANGE_FILE1_WRITTEN)
184                 req.flags |= XFS_EXCHMAPS_INO1_WRITTEN;
185 
186         /*
187          * Round the request length up to the nearest file allocation unit.
188          * The prep function already checked that the request offsets and
189          * length in @fxr are safe to round up.
190          */
191         if (xfs_inode_has_bigrtalloc(ip2))
192                 req.blockcount = xfs_rtb_roundup_rtx(mp, req.blockcount);
193 
194         error = xfs_exchrange_estimate(&req);
195         if (error)
196                 return error;
197 
198 retry:
199         /* Allocate the transaction, lock the inodes, and join them. */
200         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, req.resblks, 0,
201                         XFS_TRANS_RES_FDBLKS, &tp);
202         if (error)
203                 return error;
204 
205         xfs_exchrange_ilock(tp, ip1, ip2);
206 
207         trace_xfs_exchrange_before(ip2, 2);
208         trace_xfs_exchrange_before(ip1, 1);
209 
210         error = xfs_exchmaps_check_forks(mp, &req);
211         if (error)
212                 goto out_trans_cancel;
213 
214         /*
215          * Reserve ourselves some quota if any of them are in enforcing mode.
216          * In theory we only need enough to satisfy the change in the number
217          * of blocks between the two ranges being remapped.
218          */
219         error = xfs_exchrange_reserve_quota(tp, &req, &qretry);
220         if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
221                 xfs_trans_cancel(tp);
222                 xfs_exchrange_iunlock(ip1, ip2);
223                 if (qretry & QRETRY_IP1)
224                         xfs_blockgc_free_quota(ip1, 0);
225                 if (qretry & QRETRY_IP2)
226                         xfs_blockgc_free_quota(ip2, 0);
227                 retried = true;
228                 goto retry;
229         }
230         if (error)
231                 goto out_trans_cancel;
232 
233         /* If we got this far on a dry run, all parameters are ok. */
234         if (fxr->flags & XFS_EXCHANGE_RANGE_DRY_RUN)
235                 goto out_trans_cancel;
236 
237         /* Update the mtime and ctime of both files. */
238         if (fxr->flags & __XFS_EXCHANGE_RANGE_UPD_CMTIME1)
239                 xfs_trans_ichgtime(tp, ip1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
240         if (fxr->flags & __XFS_EXCHANGE_RANGE_UPD_CMTIME2)
241                 xfs_trans_ichgtime(tp, ip2, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
242 
243         xfs_exchange_mappings(tp, &req);
244 
245         /*
246          * Force the log to persist metadata updates if the caller or the
247          * administrator requires this.  The generic prep function already
248          * flushed the relevant parts of the page cache.
249          */
250         if (xfs_has_wsync(mp) || (fxr->flags & XFS_EXCHANGE_RANGE_DSYNC))
251                 xfs_trans_set_sync(tp);
252 
253         error = xfs_trans_commit(tp);
254 
255         trace_xfs_exchrange_after(ip2, 2);
256         trace_xfs_exchrange_after(ip1, 1);
257 
258         if (error)
259                 goto out_unlock;
260 
261         /*
262          * If the caller wanted us to exchange the contents of two complete
263          * files of unequal length, exchange the incore sizes now.  This should
264          * be safe because we flushed both files' page caches, exchanged all
265          * the mappings, and updated the ondisk sizes.
266          */
267         if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) {
268                 loff_t  temp;
269 
270                 temp = i_size_read(VFS_I(ip2));
271                 i_size_write(VFS_I(ip2), i_size_read(VFS_I(ip1)));
272                 i_size_write(VFS_I(ip1), temp);
273         }
274 
275 out_unlock:
276         xfs_exchrange_iunlock(ip1, ip2);
277         return error;
278 
279 out_trans_cancel:
280         xfs_trans_cancel(tp);
281         goto out_unlock;
282 }
283 
284 /*
285  * Generic code for exchanging ranges of two files via XFS_IOC_EXCHANGE_RANGE.
286  * This part deals with struct file objects and byte ranges and does not deal
287  * with XFS-specific data structures such as xfs_inodes and block ranges.  This
288  * separation may some day facilitate porting to another filesystem.
289  *
290  * The goal is to exchange fxr.length bytes starting at fxr.file1_offset in
291  * file1 with the same number of bytes starting at fxr.file2_offset in file2.
292  * Implementations must call xfs_exchange_range_prep to prepare the two
293  * files prior to taking locks; and they must update the inode change and mod
294  * times of both files as part of the metadata update.  The timestamp update
295  * and freshness checks must be done atomically as part of the data exchange
296  * operation to ensure correctness of the freshness check.
297  * xfs_exchange_range_finish must be called after the operation completes
298  * successfully but before locks are dropped.
299  */
300 
301 /* Verify that we have security clearance to perform this operation. */
302 static int
303 xfs_exchange_range_verify_area(
304         struct xfs_exchrange    *fxr)
305 {
306         int                     ret;
307 
308         ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length,
309                         true);
310         if (ret)
311                 return ret;
312 
313         return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length,
314                         true);
315 }
316 
317 /*
318  * Performs necessary checks before doing a range exchange, having stabilized
319  * mutable inode attributes via i_rwsem.
320  */
321 static inline int
322 xfs_exchange_range_checks(
323         struct xfs_exchrange    *fxr,
324         unsigned int            alloc_unit)
325 {
326         struct inode            *inode1 = file_inode(fxr->file1);
327         struct inode            *inode2 = file_inode(fxr->file2);
328         uint64_t                allocmask = alloc_unit - 1;
329         int64_t                 test_len;
330         uint64_t                blen;
331         loff_t                  size1, size2, tmp;
332         int                     error;
333 
334         /* Don't touch certain kinds of inodes */
335         if (IS_IMMUTABLE(inode1) || IS_IMMUTABLE(inode2))
336                 return -EPERM;
337         if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2))
338                 return -ETXTBSY;
339 
340         size1 = i_size_read(inode1);
341         size2 = i_size_read(inode2);
342 
343         /* Ranges cannot start after EOF. */
344         if (fxr->file1_offset > size1 || fxr->file2_offset > size2)
345                 return -EINVAL;
346 
347         /*
348          * If the caller said to exchange to EOF, we set the length of the
349          * request large enough to cover everything to the end of both files.
350          */
351         if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) {
352                 fxr->length = max_t(int64_t, size1 - fxr->file1_offset,
353                                              size2 - fxr->file2_offset);
354 
355                 error = xfs_exchange_range_verify_area(fxr);
356                 if (error)
357                         return error;
358         }
359 
360         /*
361          * The start of both ranges must be aligned to the file allocation
362          * unit.
363          */
364         if (!IS_ALIGNED(fxr->file1_offset, alloc_unit) ||
365             !IS_ALIGNED(fxr->file2_offset, alloc_unit))
366                 return -EINVAL;
367 
368         /* Ensure offsets don't wrap. */
369         if (check_add_overflow(fxr->file1_offset, fxr->length, &tmp) ||
370             check_add_overflow(fxr->file2_offset, fxr->length, &tmp))
371                 return -EINVAL;
372 
373         /*
374          * We require both ranges to end within EOF, unless we're exchanging
375          * to EOF.
376          */
377         if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) &&
378             (fxr->file1_offset + fxr->length > size1 ||
379              fxr->file2_offset + fxr->length > size2))
380                 return -EINVAL;
381 
382         /*
383          * Make sure we don't hit any file size limits.  If we hit any size
384          * limits such that test_length was adjusted, we abort the whole
385          * operation.
386          */
387         test_len = fxr->length;
388         error = generic_write_check_limits(fxr->file2, fxr->file2_offset,
389                         &test_len);
390         if (error)
391                 return error;
392         error = generic_write_check_limits(fxr->file1, fxr->file1_offset,
393                         &test_len);
394         if (error)
395                 return error;
396         if (test_len != fxr->length)
397                 return -EINVAL;
398 
399         /*
400          * If the user wanted us to exchange up to the infile's EOF, round up
401          * to the next allocation unit boundary for this check.  Do the same
402          * for the outfile.
403          *
404          * Otherwise, reject the range length if it's not aligned to an
405          * allocation unit.
406          */
407         if (fxr->file1_offset + fxr->length == size1)
408                 blen = ALIGN(size1, alloc_unit) - fxr->file1_offset;
409         else if (fxr->file2_offset + fxr->length == size2)
410                 blen = ALIGN(size2, alloc_unit) - fxr->file2_offset;
411         else if (!IS_ALIGNED(fxr->length, alloc_unit))
412                 return -EINVAL;
413         else
414                 blen = fxr->length;
415 
416         /* Don't allow overlapped exchanges within the same file. */
417         if (inode1 == inode2 &&
418             fxr->file2_offset + blen > fxr->file1_offset &&
419             fxr->file1_offset + blen > fxr->file2_offset)
420                 return -EINVAL;
421 
422         /*
423          * Ensure that we don't exchange a partial EOF block into the middle of
424          * another file.
425          */
426         if ((fxr->length & allocmask) == 0)
427                 return 0;
428 
429         blen = fxr->length;
430         if (fxr->file2_offset + blen < size2)
431                 blen &= ~allocmask;
432 
433         if (fxr->file1_offset + blen < size1)
434                 blen &= ~allocmask;
435 
436         return blen == fxr->length ? 0 : -EINVAL;
437 }
438 
439 /*
440  * Check that the two inodes are eligible for range exchanges, the ranges make
441  * sense, and then flush all dirty data.  Caller must ensure that the inodes
442  * have been locked against any other modifications.
443  */
444 static inline int
445 xfs_exchange_range_prep(
446         struct xfs_exchrange    *fxr,
447         unsigned int            alloc_unit)
448 {
449         struct inode            *inode1 = file_inode(fxr->file1);
450         struct inode            *inode2 = file_inode(fxr->file2);
451         bool                    same_inode = (inode1 == inode2);
452         int                     error;
453 
454         /* Check that we don't violate system file offset limits. */
455         error = xfs_exchange_range_checks(fxr, alloc_unit);
456         if (error || fxr->length == 0)
457                 return error;
458 
459         /* Wait for the completion of any pending IOs on both files */
460         inode_dio_wait(inode1);
461         if (!same_inode)
462                 inode_dio_wait(inode2);
463 
464         error = filemap_write_and_wait_range(inode1->i_mapping,
465                         fxr->file1_offset,
466                         fxr->file1_offset + fxr->length - 1);
467         if (error)
468                 return error;
469 
470         error = filemap_write_and_wait_range(inode2->i_mapping,
471                         fxr->file2_offset,
472                         fxr->file2_offset + fxr->length - 1);
473         if (error)
474                 return error;
475 
476         /*
477          * If the files or inodes involved require synchronous writes, amend
478          * the request to force the filesystem to flush all data and metadata
479          * to disk after the operation completes.
480          */
481         if (((fxr->file1->f_flags | fxr->file2->f_flags) & O_SYNC) ||
482             IS_SYNC(inode1) || IS_SYNC(inode2))
483                 fxr->flags |= XFS_EXCHANGE_RANGE_DSYNC;
484 
485         return 0;
486 }
487 
488 /*
489  * Finish a range exchange operation, if it was successful.  Caller must ensure
490  * that the inodes are still locked against any other modifications.
491  */
492 static inline int
493 xfs_exchange_range_finish(
494         struct xfs_exchrange    *fxr)
495 {
496         int                     error;
497 
498         error = file_remove_privs(fxr->file1);
499         if (error)
500                 return error;
501         if (file_inode(fxr->file1) == file_inode(fxr->file2))
502                 return 0;
503 
504         return file_remove_privs(fxr->file2);
505 }
506 
507 /*
508  * Check the alignment of an exchange request when the allocation unit size
509  * isn't a power of two.  The generic file-level helpers use (fast)
510  * bitmask-based alignment checks, but here we have to use slow long division.
511  */
512 static int
513 xfs_exchrange_check_rtalign(
514         const struct xfs_exchrange      *fxr,
515         struct xfs_inode                *ip1,
516         struct xfs_inode                *ip2,
517         unsigned int                    alloc_unit)
518 {
519         uint64_t                        length = fxr->length;
520         uint64_t                        blen;
521         loff_t                          size1, size2;
522 
523         size1 = i_size_read(VFS_I(ip1));
524         size2 = i_size_read(VFS_I(ip2));
525 
526         /* The start of both ranges must be aligned to a rt extent. */
527         if (!isaligned_64(fxr->file1_offset, alloc_unit) ||
528             !isaligned_64(fxr->file2_offset, alloc_unit))
529                 return -EINVAL;
530 
531         if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)
532                 length = max_t(int64_t, size1 - fxr->file1_offset,
533                                         size2 - fxr->file2_offset);
534 
535         /*
536          * If the user wanted us to exchange up to the infile's EOF, round up
537          * to the next rt extent boundary for this check.  Do the same for the
538          * outfile.
539          *
540          * Otherwise, reject the range length if it's not rt extent aligned.
541          * We already confirmed the starting offsets' rt extent block
542          * alignment.
543          */
544         if (fxr->file1_offset + length == size1)
545                 blen = roundup_64(size1, alloc_unit) - fxr->file1_offset;
546         else if (fxr->file2_offset + length == size2)
547                 blen = roundup_64(size2, alloc_unit) - fxr->file2_offset;
548         else if (!isaligned_64(length, alloc_unit))
549                 return -EINVAL;
550         else
551                 blen = length;
552 
553         /* Don't allow overlapped exchanges within the same file. */
554         if (ip1 == ip2 &&
555             fxr->file2_offset + blen > fxr->file1_offset &&
556             fxr->file1_offset + blen > fxr->file2_offset)
557                 return -EINVAL;
558 
559         /*
560          * Ensure that we don't exchange a partial EOF rt extent into the
561          * middle of another file.
562          */
563         if (isaligned_64(length, alloc_unit))
564                 return 0;
565 
566         blen = length;
567         if (fxr->file2_offset + length < size2)
568                 blen = rounddown_64(blen, alloc_unit);
569 
570         if (fxr->file1_offset + blen < size1)
571                 blen = rounddown_64(blen, alloc_unit);
572 
573         return blen == length ? 0 : -EINVAL;
574 }
575 
576 /* Prepare two files to have their data exchanged. */
577 STATIC int
578 xfs_exchrange_prep(
579         struct xfs_exchrange    *fxr,
580         struct xfs_inode        *ip1,
581         struct xfs_inode        *ip2)
582 {
583         struct xfs_mount        *mp = ip2->i_mount;
584         unsigned int            alloc_unit = xfs_inode_alloc_unitsize(ip2);
585         int                     error;
586 
587         trace_xfs_exchrange_prep(fxr, ip1, ip2);
588 
589         /* Verify both files are either real-time or non-realtime */
590         if (XFS_IS_REALTIME_INODE(ip1) != XFS_IS_REALTIME_INODE(ip2))
591                 return -EINVAL;
592 
593         /* Check non-power of two alignment issues, if necessary. */
594         if (!is_power_of_2(alloc_unit)) {
595                 error = xfs_exchrange_check_rtalign(fxr, ip1, ip2, alloc_unit);
596                 if (error)
597                         return error;
598 
599                 /*
600                  * Do the generic file-level checks with the regular block
601                  * alignment.
602                  */
603                 alloc_unit = mp->m_sb.sb_blocksize;
604         }
605 
606         error = xfs_exchange_range_prep(fxr, alloc_unit);
607         if (error || fxr->length == 0)
608                 return error;
609 
610         /* Attach dquots to both inodes before changing block maps. */
611         error = xfs_qm_dqattach(ip2);
612         if (error)
613                 return error;
614         error = xfs_qm_dqattach(ip1);
615         if (error)
616                 return error;
617 
618         trace_xfs_exchrange_flush(fxr, ip1, ip2);
619 
620         /* Flush the relevant ranges of both files. */
621         error = xfs_flush_unmap_range(ip2, fxr->file2_offset, fxr->length);
622         if (error)
623                 return error;
624         error = xfs_flush_unmap_range(ip1, fxr->file1_offset, fxr->length);
625         if (error)
626                 return error;
627 
628         /*
629          * Cancel CoW fork preallocations for the ranges of both files.  The
630          * prep function should have flushed all the dirty data, so the only
631          * CoW mappings remaining should be speculative.
632          */
633         if (xfs_inode_has_cow_data(ip1)) {
634                 error = xfs_reflink_cancel_cow_range(ip1, fxr->file1_offset,
635                                 fxr->length, true);
636                 if (error)
637                         return error;
638         }
639 
640         if (xfs_inode_has_cow_data(ip2)) {
641                 error = xfs_reflink_cancel_cow_range(ip2, fxr->file2_offset,
642                                 fxr->length, true);
643                 if (error)
644                         return error;
645         }
646 
647         return 0;
648 }
649 
650 /*
651  * Exchange contents of files.  This is the binding between the generic
652  * file-level concepts and the XFS inode-specific implementation.
653  */
654 STATIC int
655 xfs_exchrange_contents(
656         struct xfs_exchrange    *fxr)
657 {
658         struct inode            *inode1 = file_inode(fxr->file1);
659         struct inode            *inode2 = file_inode(fxr->file2);
660         struct xfs_inode        *ip1 = XFS_I(inode1);
661         struct xfs_inode        *ip2 = XFS_I(inode2);
662         struct xfs_mount        *mp = ip1->i_mount;
663         int                     error;
664 
665         if (!xfs_has_exchange_range(mp))
666                 return -EOPNOTSUPP;
667 
668         if (fxr->flags & ~(XFS_EXCHANGE_RANGE_ALL_FLAGS |
669                            XFS_EXCHANGE_RANGE_PRIV_FLAGS))
670                 return -EINVAL;
671 
672         if (xfs_is_shutdown(mp))
673                 return -EIO;
674 
675         /* Lock both files against IO */
676         error = xfs_ilock2_io_mmap(ip1, ip2);
677         if (error)
678                 goto out_err;
679 
680         /* Prepare and then exchange file contents. */
681         error = xfs_exchrange_prep(fxr, ip1, ip2);
682         if (error)
683                 goto out_unlock;
684 
685         error = xfs_exchrange_mappings(fxr, ip1, ip2);
686         if (error)
687                 goto out_unlock;
688 
689         /*
690          * Finish the exchange by removing special file privileges like any
691          * other file write would do.  This may involve turning on support for
692          * logged xattrs if either file has security capabilities.
693          */
694         error = xfs_exchange_range_finish(fxr);
695         if (error)
696                 goto out_unlock;
697 
698 out_unlock:
699         xfs_iunlock2_io_mmap(ip1, ip2);
700 out_err:
701         if (error)
702                 trace_xfs_exchrange_error(ip2, error, _RET_IP_);
703         return error;
704 }
705 
706 /* Exchange parts of two files. */
707 static int
708 xfs_exchange_range(
709         struct xfs_exchrange    *fxr)
710 {
711         struct inode            *inode1 = file_inode(fxr->file1);
712         struct inode            *inode2 = file_inode(fxr->file2);
713         int                     ret;
714 
715         BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS &
716                      XFS_EXCHANGE_RANGE_PRIV_FLAGS);
717 
718         /* Both files must be on the same mount/filesystem. */
719         if (fxr->file1->f_path.mnt != fxr->file2->f_path.mnt)
720                 return -EXDEV;
721 
722         if (fxr->flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
723                 return -EINVAL;
724 
725         /* Userspace requests only honored for regular files. */
726         if (S_ISDIR(inode1->i_mode) || S_ISDIR(inode2->i_mode))
727                 return -EISDIR;
728         if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
729                 return -EINVAL;
730 
731         /* Both files must be opened for read and write. */
732         if (!(fxr->file1->f_mode & FMODE_READ) ||
733             !(fxr->file1->f_mode & FMODE_WRITE) ||
734             !(fxr->file2->f_mode & FMODE_READ) ||
735             !(fxr->file2->f_mode & FMODE_WRITE))
736                 return -EBADF;
737 
738         /* Neither file can be opened append-only. */
739         if ((fxr->file1->f_flags & O_APPEND) ||
740             (fxr->file2->f_flags & O_APPEND))
741                 return -EBADF;
742 
743         /*
744          * If we're not exchanging to EOF, we can check the areas before
745          * stabilizing both files' i_size.
746          */
747         if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) {
748                 ret = xfs_exchange_range_verify_area(fxr);
749                 if (ret)
750                         return ret;
751         }
752 
753         /* Update cmtime if the fd/inode don't forbid it. */
754         if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1))
755                 fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME1;
756         if (!(fxr->file2->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode2))
757                 fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME2;
758 
759         file_start_write(fxr->file2);
760         ret = xfs_exchrange_contents(fxr);
761         file_end_write(fxr->file2);
762         if (ret)
763                 return ret;
764 
765         fsnotify_modify(fxr->file1);
766         if (fxr->file2 != fxr->file1)
767                 fsnotify_modify(fxr->file2);
768         return 0;
769 }
770 
771 /* Collect exchange-range arguments from userspace. */
772 long
773 xfs_ioc_exchange_range(
774         struct file                     *file,
775         struct xfs_exchange_range __user *argp)
776 {
777         struct xfs_exchrange            fxr = {
778                 .file2                  = file,
779         };
780         struct xfs_exchange_range       args;
781         struct fd                       file1;
782         int                             error;
783 
784         if (copy_from_user(&args, argp, sizeof(args)))
785                 return -EFAULT;
786         if (memchr_inv(&args.pad, 0, sizeof(args.pad)))
787                 return -EINVAL;
788         if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
789                 return -EINVAL;
790 
791         fxr.file1_offset        = args.file1_offset;
792         fxr.file2_offset        = args.file2_offset;
793         fxr.length              = args.length;
794         fxr.flags               = args.flags;
795 
796         file1 = fdget(args.file1_fd);
797         if (!file1.file)
798                 return -EBADF;
799         fxr.file1 = file1.file;
800 
801         error = xfs_exchange_range(&fxr);
802         fdput(file1);
803         return error;
804 }
805
~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.
TOMOYO Linux Cross Reference Linux/fs/xfs/xfs_exchrange.c

TOMOYO Linux Cross Reference
Linux/fs/xfs/xfs_exchrange.c