~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/xfs/xfs_notify_failure.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * Copyright (c) 2022 Fujitsu.  All Rights Reserved.
  4  */
  5 
  6 #include "xfs.h"
  7 #include "xfs_shared.h"
  8 #include "xfs_format.h"
  9 #include "xfs_log_format.h"
 10 #include "xfs_trans_resv.h"
 11 #include "xfs_mount.h"
 12 #include "xfs_alloc.h"
 13 #include "xfs_bit.h"
 14 #include "xfs_btree.h"
 15 #include "xfs_inode.h"
 16 #include "xfs_icache.h"
 17 #include "xfs_rmap.h"
 18 #include "xfs_rmap_btree.h"
 19 #include "xfs_rtalloc.h"
 20 #include "xfs_trans.h"
 21 #include "xfs_ag.h"
 22 
 23 #include <linux/mm.h>
 24 #include <linux/dax.h>
 25 #include <linux/fs.h>
 26 
 27 struct xfs_failure_info {
 28         xfs_agblock_t           startblock;
 29         xfs_extlen_t            blockcount;
 30         int                     mf_flags;
 31         bool                    want_shutdown;
 32 };
 33 
 34 static pgoff_t
 35 xfs_failure_pgoff(
 36         struct xfs_mount                *mp,
 37         const struct xfs_rmap_irec      *rec,
 38         const struct xfs_failure_info   *notify)
 39 {
 40         loff_t                          pos = XFS_FSB_TO_B(mp, rec->rm_offset);
 41 
 42         if (notify->startblock > rec->rm_startblock)
 43                 pos += XFS_FSB_TO_B(mp,
 44                                 notify->startblock - rec->rm_startblock);
 45         return pos >> PAGE_SHIFT;
 46 }
 47 
 48 static unsigned long
 49 xfs_failure_pgcnt(
 50         struct xfs_mount                *mp,
 51         const struct xfs_rmap_irec      *rec,
 52         const struct xfs_failure_info   *notify)
 53 {
 54         xfs_agblock_t                   end_rec;
 55         xfs_agblock_t                   end_notify;
 56         xfs_agblock_t                   start_cross;
 57         xfs_agblock_t                   end_cross;
 58 
 59         start_cross = max(rec->rm_startblock, notify->startblock);
 60 
 61         end_rec = rec->rm_startblock + rec->rm_blockcount;
 62         end_notify = notify->startblock + notify->blockcount;
 63         end_cross = min(end_rec, end_notify);
 64 
 65         return XFS_FSB_TO_B(mp, end_cross - start_cross) >> PAGE_SHIFT;
 66 }
 67 
 68 static int
 69 xfs_dax_failure_fn(
 70         struct xfs_btree_cur            *cur,
 71         const struct xfs_rmap_irec      *rec,
 72         void                            *data)
 73 {
 74         struct xfs_mount                *mp = cur->bc_mp;
 75         struct xfs_inode                *ip;
 76         struct xfs_failure_info         *notify = data;
 77         struct address_space            *mapping;
 78         pgoff_t                         pgoff;
 79         unsigned long                   pgcnt;
 80         int                             error = 0;
 81 
 82         if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
 83             (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))) {
 84                 /* Continue the query because this isn't a failure. */
 85                 if (notify->mf_flags & MF_MEM_PRE_REMOVE)
 86                         return 0;
 87                 notify->want_shutdown = true;
 88                 return 0;
 89         }
 90 
 91         /* Get files that incore, filter out others that are not in use. */
 92         error = xfs_iget(mp, cur->bc_tp, rec->rm_owner, XFS_IGET_INCORE,
 93                          0, &ip);
 94         /* Continue the rmap query if the inode isn't incore */
 95         if (error == -ENODATA)
 96                 return 0;
 97         if (error) {
 98                 notify->want_shutdown = true;
 99                 return 0;
100         }
101 
102         mapping = VFS_I(ip)->i_mapping;
103         pgoff = xfs_failure_pgoff(mp, rec, notify);
104         pgcnt = xfs_failure_pgcnt(mp, rec, notify);
105 
106         /* Continue the rmap query if the inode isn't a dax file. */
107         if (dax_mapping(mapping))
108                 error = mf_dax_kill_procs(mapping, pgoff, pgcnt,
109                                           notify->mf_flags);
110 
111         /* Invalidate the cache in dax pages. */
112         if (notify->mf_flags & MF_MEM_PRE_REMOVE)
113                 invalidate_inode_pages2_range(mapping, pgoff,
114                                               pgoff + pgcnt - 1);
115 
116         xfs_irele(ip);
117         return error;
118 }
119 
120 static int
121 xfs_dax_notify_failure_freeze(
122         struct xfs_mount        *mp)
123 {
124         struct super_block      *sb = mp->m_super;
125         int                     error;
126 
127         error = freeze_super(sb, FREEZE_HOLDER_KERNEL);
128         if (error)
129                 xfs_emerg(mp, "already frozen by kernel, err=%d", error);
130 
131         return error;
132 }
133 
134 static void
135 xfs_dax_notify_failure_thaw(
136         struct xfs_mount        *mp,
137         bool                    kernel_frozen)
138 {
139         struct super_block      *sb = mp->m_super;
140         int                     error;
141 
142         if (kernel_frozen) {
143                 error = thaw_super(sb, FREEZE_HOLDER_KERNEL);
144                 if (error)
145                         xfs_emerg(mp, "still frozen after notify failure, err=%d",
146                                 error);
147         }
148 
149         /*
150          * Also thaw userspace call anyway because the device is about to be
151          * removed immediately.
152          */
153         thaw_super(sb, FREEZE_HOLDER_USERSPACE);
154 }
155 
156 static int
157 xfs_dax_notify_ddev_failure(
158         struct xfs_mount        *mp,
159         xfs_daddr_t             daddr,
160         xfs_daddr_t             bblen,
161         int                     mf_flags)
162 {
163         struct xfs_failure_info notify = { .mf_flags = mf_flags };
164         struct xfs_trans        *tp = NULL;
165         struct xfs_btree_cur    *cur = NULL;
166         struct xfs_buf          *agf_bp = NULL;
167         int                     error = 0;
168         bool                    kernel_frozen = false;
169         xfs_fsblock_t           fsbno = XFS_DADDR_TO_FSB(mp, daddr);
170         xfs_agnumber_t          agno = XFS_FSB_TO_AGNO(mp, fsbno);
171         xfs_fsblock_t           end_fsbno = XFS_DADDR_TO_FSB(mp,
172                                                              daddr + bblen - 1);
173         xfs_agnumber_t          end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno);
174 
175         if (mf_flags & MF_MEM_PRE_REMOVE) {
176                 xfs_info(mp, "Device is about to be removed!");
177                 /*
178                  * Freeze fs to prevent new mappings from being created.
179                  * - Keep going on if others already hold the kernel forzen.
180                  * - Keep going on if other errors too because this device is
181                  *   starting to fail.
182                  * - If kernel frozen state is hold successfully here, thaw it
183                  *   here as well at the end.
184                  */
185                 kernel_frozen = xfs_dax_notify_failure_freeze(mp) == 0;
186         }
187 
188         error = xfs_trans_alloc_empty(mp, &tp);
189         if (error)
190                 goto out;
191 
192         for (; agno <= end_agno; agno++) {
193                 struct xfs_rmap_irec    ri_low = { };
194                 struct xfs_rmap_irec    ri_high;
195                 struct xfs_agf          *agf;
196                 struct xfs_perag        *pag;
197                 xfs_agblock_t           range_agend;
198 
199                 pag = xfs_perag_get(mp, agno);
200                 error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
201                 if (error) {
202                         xfs_perag_put(pag);
203                         break;
204                 }
205 
206                 cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag);
207 
208                 /*
209                  * Set the rmap range from ri_low to ri_high, which represents
210                  * a [start, end] where we looking for the files or metadata.
211                  */
212                 memset(&ri_high, 0xFF, sizeof(ri_high));
213                 ri_low.rm_startblock = XFS_FSB_TO_AGBNO(mp, fsbno);
214                 if (agno == end_agno)
215                         ri_high.rm_startblock = XFS_FSB_TO_AGBNO(mp, end_fsbno);
216 
217                 agf = agf_bp->b_addr;
218                 range_agend = min(be32_to_cpu(agf->agf_length) - 1,
219                                 ri_high.rm_startblock);
220                 notify.startblock = ri_low.rm_startblock;
221                 notify.blockcount = range_agend + 1 - ri_low.rm_startblock;
222 
223                 error = xfs_rmap_query_range(cur, &ri_low, &ri_high,
224                                 xfs_dax_failure_fn, &notify);
225                 xfs_btree_del_cursor(cur, error);
226                 xfs_trans_brelse(tp, agf_bp);
227                 xfs_perag_put(pag);
228                 if (error)
229                         break;
230 
231                 fsbno = XFS_AGB_TO_FSB(mp, agno + 1, 0);
232         }
233 
234         xfs_trans_cancel(tp);
235 
236         /*
237          * Shutdown fs from a force umount in pre-remove case which won't fail,
238          * so errors can be ignored.  Otherwise, shutdown the filesystem with
239          * CORRUPT flag if error occured or notify.want_shutdown was set during
240          * RMAP querying.
241          */
242         if (mf_flags & MF_MEM_PRE_REMOVE)
243                 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
244         else if (error || notify.want_shutdown) {
245                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
246                 if (!error)
247                         error = -EFSCORRUPTED;
248         }
249 
250 out:
251         /* Thaw the fs if it has been frozen before. */
252         if (mf_flags & MF_MEM_PRE_REMOVE)
253                 xfs_dax_notify_failure_thaw(mp, kernel_frozen);
254 
255         return error;
256 }
257 
258 static int
259 xfs_dax_notify_failure(
260         struct dax_device       *dax_dev,
261         u64                     offset,
262         u64                     len,
263         int                     mf_flags)
264 {
265         struct xfs_mount        *mp = dax_holder(dax_dev);
266         u64                     ddev_start;
267         u64                     ddev_end;
268 
269         if (!(mp->m_super->s_flags & SB_BORN)) {
270                 xfs_warn(mp, "filesystem is not ready for notify_failure()!");
271                 return -EIO;
272         }
273 
274         if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_daxdev == dax_dev) {
275                 xfs_debug(mp,
276                          "notify_failure() not supported on realtime device!");
277                 return -EOPNOTSUPP;
278         }
279 
280         if (mp->m_logdev_targp && mp->m_logdev_targp->bt_daxdev == dax_dev &&
281             mp->m_logdev_targp != mp->m_ddev_targp) {
282                 /*
283                  * In the pre-remove case the failure notification is attempting
284                  * to trigger a force unmount.  The expectation is that the
285                  * device is still present, but its removal is in progress and
286                  * can not be cancelled, proceed with accessing the log device.
287                  */
288                 if (mf_flags & MF_MEM_PRE_REMOVE)
289                         return 0;
290                 xfs_err(mp, "ondisk log corrupt, shutting down fs!");
291                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
292                 return -EFSCORRUPTED;
293         }
294 
295         if (!xfs_has_rmapbt(mp)) {
296                 xfs_debug(mp, "notify_failure() needs rmapbt enabled!");
297                 return -EOPNOTSUPP;
298         }
299 
300         ddev_start = mp->m_ddev_targp->bt_dax_part_off;
301         ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1;
302 
303         /* Notify failure on the whole device. */
304         if (offset == 0 && len == U64_MAX) {
305                 offset = ddev_start;
306                 len = bdev_nr_bytes(mp->m_ddev_targp->bt_bdev);
307         }
308 
309         /* Ignore the range out of filesystem area */
310         if (offset + len - 1 < ddev_start)
311                 return -ENXIO;
312         if (offset > ddev_end)
313                 return -ENXIO;
314 
315         /* Calculate the real range when it touches the boundary */
316         if (offset > ddev_start)
317                 offset -= ddev_start;
318         else {
319                 len -= ddev_start - offset;
320                 offset = 0;
321         }
322         if (offset + len - 1 > ddev_end)
323                 len = ddev_end - offset + 1;
324 
325         return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len),
326                         mf_flags);
327 }
328 
329 const struct dax_holder_operations xfs_dax_holder_operations = {
330         .notify_failure         = xfs_dax_notify_failure,
331 };
332 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php