~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/xfs/libxfs/xfs_da_btree.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
  4  * Copyright (c) 2013 Red Hat, Inc.
  5  * All Rights Reserved.
  6  */
  7 #include "xfs.h"
  8 #include "xfs_fs.h"
  9 #include "xfs_shared.h"
 10 #include "xfs_format.h"
 11 #include "xfs_log_format.h"
 12 #include "xfs_trans_resv.h"
 13 #include "xfs_bit.h"
 14 #include "xfs_mount.h"
 15 #include "xfs_inode.h"
 16 #include "xfs_dir2.h"
 17 #include "xfs_dir2_priv.h"
 18 #include "xfs_trans.h"
 19 #include "xfs_bmap.h"
 20 #include "xfs_attr_leaf.h"
 21 #include "xfs_error.h"
 22 #include "xfs_trace.h"
 23 #include "xfs_buf_item.h"
 24 #include "xfs_log.h"
 25 #include "xfs_errortag.h"
 26 #include "xfs_health.h"
 27 
 28 /*
 29  * xfs_da_btree.c
 30  *
 31  * Routines to implement directories as Btrees of hashed names.
 32  */
 33 
 34 /*========================================================================
 35  * Function prototypes for the kernel.
 36  *========================================================================*/
 37 
 38 /*
 39  * Routines used for growing the Btree.
 40  */
 41 STATIC int xfs_da3_root_split(xfs_da_state_t *state,
 42                                             xfs_da_state_blk_t *existing_root,
 43                                             xfs_da_state_blk_t *new_child);
 44 STATIC int xfs_da3_node_split(xfs_da_state_t *state,
 45                                             xfs_da_state_blk_t *existing_blk,
 46                                             xfs_da_state_blk_t *split_blk,
 47                                             xfs_da_state_blk_t *blk_to_add,
 48                                             int treelevel,
 49                                             int *result);
 50 STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state,
 51                                          xfs_da_state_blk_t *node_blk_1,
 52                                          xfs_da_state_blk_t *node_blk_2);
 53 STATIC void xfs_da3_node_add(xfs_da_state_t *state,
 54                                    xfs_da_state_blk_t *old_node_blk,
 55                                    xfs_da_state_blk_t *new_node_blk);
 56 
 57 /*
 58  * Routines used for shrinking the Btree.
 59  */
 60 STATIC int xfs_da3_root_join(xfs_da_state_t *state,
 61                                            xfs_da_state_blk_t *root_blk);
 62 STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval);
 63 STATIC void xfs_da3_node_remove(xfs_da_state_t *state,
 64                                               xfs_da_state_blk_t *drop_blk);
 65 STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state,
 66                                          xfs_da_state_blk_t *src_node_blk,
 67                                          xfs_da_state_blk_t *dst_node_blk);
 68 
 69 /*
 70  * Utility routines.
 71  */
 72 STATIC int      xfs_da3_blk_unlink(xfs_da_state_t *state,
 73                                   xfs_da_state_blk_t *drop_blk,
 74                                   xfs_da_state_blk_t *save_blk);
 75 
 76 
 77 struct kmem_cache       *xfs_da_state_cache;    /* anchor for dir/attr state */
 78 
 79 /*
 80  * Allocate a dir-state structure.
 81  * We don't put them on the stack since they're large.
 82  */
 83 struct xfs_da_state *
 84 xfs_da_state_alloc(
 85         struct xfs_da_args      *args)
 86 {
 87         struct xfs_da_state     *state;
 88 
 89         state = kmem_cache_zalloc(xfs_da_state_cache,
 90                         GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
 91         state->args = args;
 92         state->mp = args->dp->i_mount;
 93         return state;
 94 }
 95 
 96 /*
 97  * Kill the altpath contents of a da-state structure.
 98  */
 99 STATIC void
100 xfs_da_state_kill_altpath(xfs_da_state_t *state)
101 {
102         int     i;
103 
104         for (i = 0; i < state->altpath.active; i++)
105                 state->altpath.blk[i].bp = NULL;
106         state->altpath.active = 0;
107 }
108 
109 /*
110  * Free a da-state structure.
111  */
112 void
113 xfs_da_state_free(xfs_da_state_t *state)
114 {
115         xfs_da_state_kill_altpath(state);
116 #ifdef DEBUG
117         memset((char *)state, 0, sizeof(*state));
118 #endif /* DEBUG */
119         kmem_cache_free(xfs_da_state_cache, state);
120 }
121 
122 void
123 xfs_da_state_reset(
124         struct xfs_da_state     *state,
125         struct xfs_da_args      *args)
126 {
127         xfs_da_state_kill_altpath(state);
128         memset(state, 0, sizeof(struct xfs_da_state));
129         state->args = args;
130         state->mp = state->args->dp->i_mount;
131 }
132 
133 static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork)
134 {
135         if (whichfork == XFS_DATA_FORK)
136                 return mp->m_dir_geo->fsbcount;
137         return mp->m_attr_geo->fsbcount;
138 }
139 
140 void
141 xfs_da3_node_hdr_from_disk(
142         struct xfs_mount                *mp,
143         struct xfs_da3_icnode_hdr       *to,
144         struct xfs_da_intnode           *from)
145 {
146         if (xfs_has_crc(mp)) {
147                 struct xfs_da3_intnode  *from3 = (struct xfs_da3_intnode *)from;
148 
149                 to->forw = be32_to_cpu(from3->hdr.info.hdr.forw);
150                 to->back = be32_to_cpu(from3->hdr.info.hdr.back);
151                 to->magic = be16_to_cpu(from3->hdr.info.hdr.magic);
152                 to->count = be16_to_cpu(from3->hdr.__count);
153                 to->level = be16_to_cpu(from3->hdr.__level);
154                 to->btree = from3->__btree;
155                 ASSERT(to->magic == XFS_DA3_NODE_MAGIC);
156         } else {
157                 to->forw = be32_to_cpu(from->hdr.info.forw);
158                 to->back = be32_to_cpu(from->hdr.info.back);
159                 to->magic = be16_to_cpu(from->hdr.info.magic);
160                 to->count = be16_to_cpu(from->hdr.__count);
161                 to->level = be16_to_cpu(from->hdr.__level);
162                 to->btree = from->__btree;
163                 ASSERT(to->magic == XFS_DA_NODE_MAGIC);
164         }
165 }
166 
167 void
168 xfs_da3_node_hdr_to_disk(
169         struct xfs_mount                *mp,
170         struct xfs_da_intnode           *to,
171         struct xfs_da3_icnode_hdr       *from)
172 {
173         if (xfs_has_crc(mp)) {
174                 struct xfs_da3_intnode  *to3 = (struct xfs_da3_intnode *)to;
175 
176                 ASSERT(from->magic == XFS_DA3_NODE_MAGIC);
177                 to3->hdr.info.hdr.forw = cpu_to_be32(from->forw);
178                 to3->hdr.info.hdr.back = cpu_to_be32(from->back);
179                 to3->hdr.info.hdr.magic = cpu_to_be16(from->magic);
180                 to3->hdr.__count = cpu_to_be16(from->count);
181                 to3->hdr.__level = cpu_to_be16(from->level);
182         } else {
183                 ASSERT(from->magic == XFS_DA_NODE_MAGIC);
184                 to->hdr.info.forw = cpu_to_be32(from->forw);
185                 to->hdr.info.back = cpu_to_be32(from->back);
186                 to->hdr.info.magic = cpu_to_be16(from->magic);
187                 to->hdr.__count = cpu_to_be16(from->count);
188                 to->hdr.__level = cpu_to_be16(from->level);
189         }
190 }
191 
192 /*
193  * Verify an xfs_da3_blkinfo structure. Note that the da3 fields are only
194  * accessible on v5 filesystems. This header format is common across da node,
195  * attr leaf and dir leaf blocks.
196  */
197 xfs_failaddr_t
198 xfs_da3_blkinfo_verify(
199         struct xfs_buf          *bp,
200         struct xfs_da3_blkinfo  *hdr3)
201 {
202         struct xfs_mount        *mp = bp->b_mount;
203         struct xfs_da_blkinfo   *hdr = &hdr3->hdr;
204 
205         if (!xfs_verify_magic16(bp, hdr->magic))
206                 return __this_address;
207 
208         if (xfs_has_crc(mp)) {
209                 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
210                         return __this_address;
211                 if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp))
212                         return __this_address;
213                 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
214                         return __this_address;
215         }
216 
217         return NULL;
218 }
219 
220 static xfs_failaddr_t
221 xfs_da3_node_verify(
222         struct xfs_buf          *bp)
223 {
224         struct xfs_mount        *mp = bp->b_mount;
225         struct xfs_da_intnode   *hdr = bp->b_addr;
226         struct xfs_da3_icnode_hdr ichdr;
227         xfs_failaddr_t          fa;
228 
229         xfs_da3_node_hdr_from_disk(mp, &ichdr, hdr);
230 
231         fa = xfs_da3_blkinfo_verify(bp, bp->b_addr);
232         if (fa)
233                 return fa;
234 
235         if (ichdr.level == 0)
236                 return __this_address;
237         if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
238                 return __this_address;
239         if (ichdr.count == 0)
240                 return __this_address;
241 
242         /*
243          * we don't know if the node is for and attribute or directory tree,
244          * so only fail if the count is outside both bounds
245          */
246         if (ichdr.count > mp->m_dir_geo->node_ents &&
247             ichdr.count > mp->m_attr_geo->node_ents)
248                 return __this_address;
249 
250         /* XXX: hash order check? */
251 
252         return NULL;
253 }
254 
255 xfs_failaddr_t
256 xfs_da3_node_header_check(
257         struct xfs_buf          *bp,
258         xfs_ino_t               owner)
259 {
260         struct xfs_mount        *mp = bp->b_mount;
261 
262         if (xfs_has_crc(mp)) {
263                 struct xfs_da3_blkinfo *hdr3 = bp->b_addr;
264 
265                 if (hdr3->hdr.magic != cpu_to_be16(XFS_DA3_NODE_MAGIC))
266                         return __this_address;
267 
268                 if (be64_to_cpu(hdr3->owner) != owner)
269                         return __this_address;
270         }
271 
272         return NULL;
273 }
274 
275 xfs_failaddr_t
276 xfs_da3_header_check(
277         struct xfs_buf          *bp,
278         xfs_ino_t               owner)
279 {
280         struct xfs_mount        *mp = bp->b_mount;
281         struct xfs_da_blkinfo   *hdr = bp->b_addr;
282 
283         if (!xfs_has_crc(mp))
284                 return NULL;
285 
286         switch (hdr->magic) {
287         case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
288                 return xfs_attr3_leaf_header_check(bp, owner);
289         case cpu_to_be16(XFS_DA3_NODE_MAGIC):
290                 return xfs_da3_node_header_check(bp, owner);
291         case cpu_to_be16(XFS_DIR3_LEAF1_MAGIC):
292         case cpu_to_be16(XFS_DIR3_LEAFN_MAGIC):
293                 return xfs_dir3_leaf_header_check(bp, owner);
294         }
295 
296         ASSERT(0);
297         return NULL;
298 }
299 
300 static void
301 xfs_da3_node_write_verify(
302         struct xfs_buf  *bp)
303 {
304         struct xfs_mount        *mp = bp->b_mount;
305         struct xfs_buf_log_item *bip = bp->b_log_item;
306         struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
307         xfs_failaddr_t          fa;
308 
309         fa = xfs_da3_node_verify(bp);
310         if (fa) {
311                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
312                 return;
313         }
314 
315         if (!xfs_has_crc(mp))
316                 return;
317 
318         if (bip)
319                 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
320 
321         xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
322 }
323 
324 /*
325  * leaf/node format detection on trees is sketchy, so a node read can be done on
326  * leaf level blocks when detection identifies the tree as a node format tree
327  * incorrectly. In this case, we need to swap the verifier to match the correct
328  * format of the block being read.
329  */
330 static void
331 xfs_da3_node_read_verify(
332         struct xfs_buf          *bp)
333 {
334         struct xfs_da_blkinfo   *info = bp->b_addr;
335         xfs_failaddr_t          fa;
336 
337         switch (be16_to_cpu(info->magic)) {
338                 case XFS_DA3_NODE_MAGIC:
339                         if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
340                                 xfs_verifier_error(bp, -EFSBADCRC,
341                                                 __this_address);
342                                 break;
343                         }
344                         fallthrough;
345                 case XFS_DA_NODE_MAGIC:
346                         fa = xfs_da3_node_verify(bp);
347                         if (fa)
348                                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
349                         return;
350                 case XFS_ATTR_LEAF_MAGIC:
351                 case XFS_ATTR3_LEAF_MAGIC:
352                         bp->b_ops = &xfs_attr3_leaf_buf_ops;
353                         bp->b_ops->verify_read(bp);
354                         return;
355                 case XFS_DIR2_LEAFN_MAGIC:
356                 case XFS_DIR3_LEAFN_MAGIC:
357                         bp->b_ops = &xfs_dir3_leafn_buf_ops;
358                         bp->b_ops->verify_read(bp);
359                         return;
360                 default:
361                         xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
362                         break;
363         }
364 }
365 
366 /* Verify the structure of a da3 block. */
367 static xfs_failaddr_t
368 xfs_da3_node_verify_struct(
369         struct xfs_buf          *bp)
370 {
371         struct xfs_da_blkinfo   *info = bp->b_addr;
372 
373         switch (be16_to_cpu(info->magic)) {
374         case XFS_DA3_NODE_MAGIC:
375         case XFS_DA_NODE_MAGIC:
376                 return xfs_da3_node_verify(bp);
377         case XFS_ATTR_LEAF_MAGIC:
378         case XFS_ATTR3_LEAF_MAGIC:
379                 bp->b_ops = &xfs_attr3_leaf_buf_ops;
380                 return bp->b_ops->verify_struct(bp);
381         case XFS_DIR2_LEAFN_MAGIC:
382         case XFS_DIR3_LEAFN_MAGIC:
383                 bp->b_ops = &xfs_dir3_leafn_buf_ops;
384                 return bp->b_ops->verify_struct(bp);
385         default:
386                 return __this_address;
387         }
388 }
389 
390 const struct xfs_buf_ops xfs_da3_node_buf_ops = {
391         .name = "xfs_da3_node",
392         .magic16 = { cpu_to_be16(XFS_DA_NODE_MAGIC),
393                      cpu_to_be16(XFS_DA3_NODE_MAGIC) },
394         .verify_read = xfs_da3_node_read_verify,
395         .verify_write = xfs_da3_node_write_verify,
396         .verify_struct = xfs_da3_node_verify_struct,
397 };
398 
399 static int
400 xfs_da3_node_set_type(
401         struct xfs_trans        *tp,
402         struct xfs_inode        *dp,
403         int                     whichfork,
404         struct xfs_buf          *bp)
405 {
406         struct xfs_da_blkinfo   *info = bp->b_addr;
407 
408         switch (be16_to_cpu(info->magic)) {
409         case XFS_DA_NODE_MAGIC:
410         case XFS_DA3_NODE_MAGIC:
411                 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
412                 return 0;
413         case XFS_ATTR_LEAF_MAGIC:
414         case XFS_ATTR3_LEAF_MAGIC:
415                 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_ATTR_LEAF_BUF);
416                 return 0;
417         case XFS_DIR2_LEAFN_MAGIC:
418         case XFS_DIR3_LEAFN_MAGIC:
419                 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
420                 return 0;
421         default:
422                 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp,
423                                 info, sizeof(*info));
424                 xfs_trans_brelse(tp, bp);
425                 xfs_dirattr_mark_sick(dp, whichfork);
426                 return -EFSCORRUPTED;
427         }
428 }
429 
430 int
431 xfs_da3_node_read(
432         struct xfs_trans        *tp,
433         struct xfs_inode        *dp,
434         xfs_dablk_t             bno,
435         struct xfs_buf          **bpp,
436         int                     whichfork)
437 {
438         int                     error;
439 
440         error = xfs_da_read_buf(tp, dp, bno, 0, bpp, whichfork,
441                         &xfs_da3_node_buf_ops);
442         if (error || !*bpp || !tp)
443                 return error;
444         return xfs_da3_node_set_type(tp, dp, whichfork, *bpp);
445 }
446 
447 int
448 xfs_da3_node_read_mapped(
449         struct xfs_trans        *tp,
450         struct xfs_inode        *dp,
451         xfs_daddr_t             mappedbno,
452         struct xfs_buf          **bpp,
453         int                     whichfork)
454 {
455         struct xfs_mount        *mp = dp->i_mount;
456         int                     error;
457 
458         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, mappedbno,
459                         XFS_FSB_TO_BB(mp, xfs_dabuf_nfsb(mp, whichfork)), 0,
460                         bpp, &xfs_da3_node_buf_ops);
461         if (xfs_metadata_is_sick(error))
462                 xfs_dirattr_mark_sick(dp, whichfork);
463         if (error || !*bpp)
464                 return error;
465 
466         if (whichfork == XFS_ATTR_FORK)
467                 xfs_buf_set_ref(*bpp, XFS_ATTR_BTREE_REF);
468         else
469                 xfs_buf_set_ref(*bpp, XFS_DIR_BTREE_REF);
470 
471         if (!tp)
472                 return 0;
473         return xfs_da3_node_set_type(tp, dp, whichfork, *bpp);
474 }
475 
476 /*
477  * Copy src directory/attr leaf/node buffer to the dst.
478  * For v5 file systems make sure the right blkno is stamped in.
479  */
480 void
481 xfs_da_buf_copy(
482         struct xfs_buf *dst,
483         struct xfs_buf *src,
484         size_t size)
485 {
486         struct xfs_da3_blkinfo *da3 = dst->b_addr;
487 
488         memcpy(dst->b_addr, src->b_addr, size);
489         dst->b_ops = src->b_ops;
490         xfs_trans_buf_copy_type(dst, src);
491         if (xfs_has_crc(dst->b_mount))
492                 da3->blkno = cpu_to_be64(xfs_buf_daddr(dst));
493 }
494 
495 /*========================================================================
496  * Routines used for growing the Btree.
497  *========================================================================*/
498 
499 /*
500  * Create the initial contents of an intermediate node.
501  */
502 int
503 xfs_da3_node_create(
504         struct xfs_da_args      *args,
505         xfs_dablk_t             blkno,
506         int                     level,
507         struct xfs_buf          **bpp,
508         int                     whichfork)
509 {
510         struct xfs_da_intnode   *node;
511         struct xfs_trans        *tp = args->trans;
512         struct xfs_mount        *mp = tp->t_mountp;
513         struct xfs_da3_icnode_hdr ichdr = {0};
514         struct xfs_buf          *bp;
515         int                     error;
516         struct xfs_inode        *dp = args->dp;
517 
518         trace_xfs_da_node_create(args);
519         ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
520 
521         error = xfs_da_get_buf(tp, dp, blkno, &bp, whichfork);
522         if (error)
523                 return error;
524         bp->b_ops = &xfs_da3_node_buf_ops;
525         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
526         node = bp->b_addr;
527 
528         if (xfs_has_crc(mp)) {
529                 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
530 
531                 memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
532                 ichdr.magic = XFS_DA3_NODE_MAGIC;
533                 hdr3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
534                 hdr3->info.owner = cpu_to_be64(args->owner);
535                 uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid);
536         } else {
537                 ichdr.magic = XFS_DA_NODE_MAGIC;
538         }
539         ichdr.level = level;
540 
541         xfs_da3_node_hdr_to_disk(dp->i_mount, node, &ichdr);
542         xfs_trans_log_buf(tp, bp,
543                 XFS_DA_LOGRANGE(node, &node->hdr, args->geo->node_hdr_size));
544 
545         *bpp = bp;
546         return 0;
547 }
548 
549 /*
550  * Split a leaf node, rebalance, then possibly split
551  * intermediate nodes, rebalance, etc.
552  */
553 int                                                     /* error */
554 xfs_da3_split(
555         struct xfs_da_state     *state)
556 {
557         struct xfs_da_state_blk *oldblk;
558         struct xfs_da_state_blk *newblk;
559         struct xfs_da_state_blk *addblk;
560         struct xfs_da_intnode   *node;
561         int                     max;
562         int                     action = 0;
563         int                     error;
564         int                     i;
565 
566         trace_xfs_da_split(state->args);
567 
568         if (XFS_TEST_ERROR(false, state->mp, XFS_ERRTAG_DA_LEAF_SPLIT))
569                 return -EIO;
570 
571         /*
572          * Walk back up the tree splitting/inserting/adjusting as necessary.
573          * If we need to insert and there isn't room, split the node, then
574          * decide which fragment to insert the new block from below into.
575          * Note that we may split the root this way, but we need more fixup.
576          */
577         max = state->path.active - 1;
578         ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
579         ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
580                state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
581 
582         addblk = &state->path.blk[max];         /* initial dummy value */
583         for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
584                 oldblk = &state->path.blk[i];
585                 newblk = &state->altpath.blk[i];
586 
587                 /*
588                  * If a leaf node then
589                  *     Allocate a new leaf node, then rebalance across them.
590                  * else if an intermediate node then
591                  *     We split on the last layer, must we split the node?
592                  */
593                 switch (oldblk->magic) {
594                 case XFS_ATTR_LEAF_MAGIC:
595                         error = xfs_attr3_leaf_split(state, oldblk, newblk);
596                         if ((error != 0) && (error != -ENOSPC)) {
597                                 return error;   /* GROT: attr is inconsistent */
598                         }
599                         if (!error) {
600                                 addblk = newblk;
601                                 break;
602                         }
603                         /*
604                          * Entry wouldn't fit, split the leaf again. The new
605                          * extrablk will be consumed by xfs_da3_node_split if
606                          * the node is split.
607                          */
608                         state->extravalid = 1;
609                         if (state->inleaf) {
610                                 state->extraafter = 0;  /* before newblk */
611                                 trace_xfs_attr_leaf_split_before(state->args);
612                                 error = xfs_attr3_leaf_split(state, oldblk,
613                                                             &state->extrablk);
614                         } else {
615                                 state->extraafter = 1;  /* after newblk */
616                                 trace_xfs_attr_leaf_split_after(state->args);
617                                 error = xfs_attr3_leaf_split(state, newblk,
618                                                             &state->extrablk);
619                         }
620                         if (error)
621                                 return error;   /* GROT: attr inconsistent */
622                         addblk = newblk;
623                         break;
624                 case XFS_DIR2_LEAFN_MAGIC:
625                         error = xfs_dir2_leafn_split(state, oldblk, newblk);
626                         if (error)
627                                 return error;
628                         addblk = newblk;
629                         break;
630                 case XFS_DA_NODE_MAGIC:
631                         error = xfs_da3_node_split(state, oldblk, newblk, addblk,
632                                                          max - i, &action);
633                         addblk->bp = NULL;
634                         if (error)
635                                 return error;   /* GROT: dir is inconsistent */
636                         /*
637                          * Record the newly split block for the next time thru?
638                          */
639                         if (action)
640                                 addblk = newblk;
641                         else
642                                 addblk = NULL;
643                         break;
644                 }
645 
646                 /*
647                  * Update the btree to show the new hashval for this child.
648                  */
649                 xfs_da3_fixhashpath(state, &state->path);
650         }
651         if (!addblk)
652                 return 0;
653 
654         /*
655          * xfs_da3_node_split() should have consumed any extra blocks we added
656          * during a double leaf split in the attr fork. This is guaranteed as
657          * we can't be here if the attr fork only has a single leaf block.
658          */
659         ASSERT(state->extravalid == 0 ||
660                state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
661 
662         /*
663          * Split the root node.
664          */
665         ASSERT(state->path.active == 0);
666         oldblk = &state->path.blk[0];
667         error = xfs_da3_root_split(state, oldblk, addblk);
668         if (error)
669                 goto out;
670 
671         /*
672          * Update pointers to the node which used to be block 0 and just got
673          * bumped because of the addition of a new root node.  Note that the
674          * original block 0 could be at any position in the list of blocks in
675          * the tree.
676          *
677          * Note: the magic numbers and sibling pointers are in the same physical
678          * place for both v2 and v3 headers (by design). Hence it doesn't matter
679          * which version of the xfs_da_intnode structure we use here as the
680          * result will be the same using either structure.
681          */
682         node = oldblk->bp->b_addr;
683         if (node->hdr.info.forw) {
684                 if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
685                         xfs_buf_mark_corrupt(oldblk->bp);
686                         xfs_da_mark_sick(state->args);
687                         error = -EFSCORRUPTED;
688                         goto out;
689                 }
690                 node = addblk->bp->b_addr;
691                 node->hdr.info.back = cpu_to_be32(oldblk->blkno);
692                 xfs_trans_log_buf(state->args->trans, addblk->bp,
693                                   XFS_DA_LOGRANGE(node, &node->hdr.info,
694                                   sizeof(node->hdr.info)));
695         }
696         node = oldblk->bp->b_addr;
697         if (node->hdr.info.back) {
698                 if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
699                         xfs_buf_mark_corrupt(oldblk->bp);
700                         xfs_da_mark_sick(state->args);
701                         error = -EFSCORRUPTED;
702                         goto out;
703                 }
704                 node = addblk->bp->b_addr;
705                 node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
706                 xfs_trans_log_buf(state->args->trans, addblk->bp,
707                                   XFS_DA_LOGRANGE(node, &node->hdr.info,
708                                   sizeof(node->hdr.info)));
709         }
710 out:
711         addblk->bp = NULL;
712         return error;
713 }
714 
715 /*
716  * Split the root.  We have to create a new root and point to the two
717  * parts (the split old root) that we just created.  Copy block zero to
718  * the EOF, extending the inode in process.
719  */
720 STATIC int                                              /* error */
721 xfs_da3_root_split(
722         struct xfs_da_state     *state,
723         struct xfs_da_state_blk *blk1,
724         struct xfs_da_state_blk *blk2)
725 {
726         struct xfs_da_intnode   *node;
727         struct xfs_da_intnode   *oldroot;
728         struct xfs_da_node_entry *btree;
729         struct xfs_da3_icnode_hdr nodehdr;
730         struct xfs_da_args      *args;
731         struct xfs_buf          *bp;
732         struct xfs_inode        *dp;
733         struct xfs_trans        *tp;
734         struct xfs_dir2_leaf    *leaf;
735         xfs_dablk_t             blkno;
736         int                     level;
737         int                     error;
738         int                     size;
739 
740         trace_xfs_da_root_split(state->args);
741 
742         /*
743          * Copy the existing (incorrect) block from the root node position
744          * to a free space somewhere.
745          */
746         args = state->args;
747         error = xfs_da_grow_inode(args, &blkno);
748         if (error)
749                 return error;
750 
751         dp = args->dp;
752         tp = args->trans;
753         error = xfs_da_get_buf(tp, dp, blkno, &bp, args->whichfork);
754         if (error)
755                 return error;
756         node = bp->b_addr;
757         oldroot = blk1->bp->b_addr;
758         if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
759             oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
760                 struct xfs_da3_icnode_hdr icnodehdr;
761 
762                 xfs_da3_node_hdr_from_disk(dp->i_mount, &icnodehdr, oldroot);
763                 btree = icnodehdr.btree;
764                 size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
765                 level = icnodehdr.level;
766         } else {
767                 struct xfs_dir3_icleaf_hdr leafhdr;
768 
769                 leaf = (xfs_dir2_leaf_t *)oldroot;
770                 xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
771 
772                 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
773                        leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
774                 size = (int)((char *)&leafhdr.ents[leafhdr.count] -
775                         (char *)leaf);
776                 level = 0;
777         }
778 
779         /*
780          * Copy old root to new buffer and log it.
781          */
782         xfs_da_buf_copy(bp, blk1->bp, size);
783         xfs_trans_log_buf(tp, bp, 0, size - 1);
784 
785         /*
786          * Update blk1 to point to new buffer.
787          */
788         blk1->bp = bp;
789         blk1->blkno = blkno;
790 
791         /*
792          * Set up the new root node.
793          */
794         error = xfs_da3_node_create(args,
795                 (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0,
796                 level + 1, &bp, args->whichfork);
797         if (error)
798                 return error;
799 
800         node = bp->b_addr;
801         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
802         btree = nodehdr.btree;
803         btree[0].hashval = cpu_to_be32(blk1->hashval);
804         btree[0].before = cpu_to_be32(blk1->blkno);
805         btree[1].hashval = cpu_to_be32(blk2->hashval);
806         btree[1].before = cpu_to_be32(blk2->blkno);
807         nodehdr.count = 2;
808         xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
809 
810 #ifdef DEBUG
811         if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
812             oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
813                 ASSERT(blk1->blkno >= args->geo->leafblk &&
814                        blk1->blkno < args->geo->freeblk);
815                 ASSERT(blk2->blkno >= args->geo->leafblk &&
816                        blk2->blkno < args->geo->freeblk);
817         }
818 #endif
819 
820         /* Header is already logged by xfs_da_node_create */
821         xfs_trans_log_buf(tp, bp,
822                 XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2));
823 
824         return 0;
825 }
826 
827 /*
828  * Split the node, rebalance, then add the new entry.
829  */
830 STATIC int                                              /* error */
831 xfs_da3_node_split(
832         struct xfs_da_state     *state,
833         struct xfs_da_state_blk *oldblk,
834         struct xfs_da_state_blk *newblk,
835         struct xfs_da_state_blk *addblk,
836         int                     treelevel,
837         int                     *result)
838 {
839         struct xfs_da_intnode   *node;
840         struct xfs_da3_icnode_hdr nodehdr;
841         xfs_dablk_t             blkno;
842         int                     newcount;
843         int                     error;
844         int                     useextra;
845         struct xfs_inode        *dp = state->args->dp;
846 
847         trace_xfs_da_node_split(state->args);
848 
849         node = oldblk->bp->b_addr;
850         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
851 
852         /*
853          * With V2 dirs the extra block is data or freespace.
854          */
855         useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
856         newcount = 1 + useextra;
857         /*
858          * Do we have to split the node?
859          */
860         if (nodehdr.count + newcount > state->args->geo->node_ents) {
861                 /*
862                  * Allocate a new node, add to the doubly linked chain of
863                  * nodes, then move some of our excess entries into it.
864                  */
865                 error = xfs_da_grow_inode(state->args, &blkno);
866                 if (error)
867                         return error;   /* GROT: dir is inconsistent */
868 
869                 error = xfs_da3_node_create(state->args, blkno, treelevel,
870                                            &newblk->bp, state->args->whichfork);
871                 if (error)
872                         return error;   /* GROT: dir is inconsistent */
873                 newblk->blkno = blkno;
874                 newblk->magic = XFS_DA_NODE_MAGIC;
875                 xfs_da3_node_rebalance(state, oldblk, newblk);
876                 error = xfs_da3_blk_link(state, oldblk, newblk);
877                 if (error)
878                         return error;
879                 *result = 1;
880         } else {
881                 *result = 0;
882         }
883 
884         /*
885          * Insert the new entry(s) into the correct block
886          * (updating last hashval in the process).
887          *
888          * xfs_da3_node_add() inserts BEFORE the given index,
889          * and as a result of using node_lookup_int() we always
890          * point to a valid entry (not after one), but a split
891          * operation always results in a new block whose hashvals
892          * FOLLOW the current block.
893          *
894          * If we had double-split op below us, then add the extra block too.
895          */
896         node = oldblk->bp->b_addr;
897         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
898         if (oldblk->index <= nodehdr.count) {
899                 oldblk->index++;
900                 xfs_da3_node_add(state, oldblk, addblk);
901                 if (useextra) {
902                         if (state->extraafter)
903                                 oldblk->index++;
904                         xfs_da3_node_add(state, oldblk, &state->extrablk);
905                         state->extravalid = 0;
906                 }
907         } else {
908                 newblk->index++;
909                 xfs_da3_node_add(state, newblk, addblk);
910                 if (useextra) {
911                         if (state->extraafter)
912                                 newblk->index++;
913                         xfs_da3_node_add(state, newblk, &state->extrablk);
914                         state->extravalid = 0;
915                 }
916         }
917 
918         return 0;
919 }
920 
921 /*
922  * Balance the btree elements between two intermediate nodes,
923  * usually one full and one empty.
924  *
925  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
926  */
927 STATIC void
928 xfs_da3_node_rebalance(
929         struct xfs_da_state     *state,
930         struct xfs_da_state_blk *blk1,
931         struct xfs_da_state_blk *blk2)
932 {
933         struct xfs_da_intnode   *node1;
934         struct xfs_da_intnode   *node2;
935         struct xfs_da_node_entry *btree1;
936         struct xfs_da_node_entry *btree2;
937         struct xfs_da_node_entry *btree_s;
938         struct xfs_da_node_entry *btree_d;
939         struct xfs_da3_icnode_hdr nodehdr1;
940         struct xfs_da3_icnode_hdr nodehdr2;
941         struct xfs_trans        *tp;
942         int                     count;
943         int                     tmp;
944         int                     swap = 0;
945         struct xfs_inode        *dp = state->args->dp;
946 
947         trace_xfs_da_node_rebalance(state->args);
948 
949         node1 = blk1->bp->b_addr;
950         node2 = blk2->bp->b_addr;
951         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
952         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
953         btree1 = nodehdr1.btree;
954         btree2 = nodehdr2.btree;
955 
956         /*
957          * Figure out how many entries need to move, and in which direction.
958          * Swap the nodes around if that makes it simpler.
959          */
960         if (nodehdr1.count > 0 && nodehdr2.count > 0 &&
961             ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
962              (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
963                         be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
964                 swap(node1, node2);
965                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
966                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
967                 btree1 = nodehdr1.btree;
968                 btree2 = nodehdr2.btree;
969                 swap = 1;
970         }
971 
972         count = (nodehdr1.count - nodehdr2.count) / 2;
973         if (count == 0)
974                 return;
975         tp = state->args->trans;
976         /*
977          * Two cases: high-to-low and low-to-high.
978          */
979         if (count > 0) {
980                 /*
981                  * Move elements in node2 up to make a hole.
982                  */
983                 tmp = nodehdr2.count;
984                 if (tmp > 0) {
985                         tmp *= (uint)sizeof(xfs_da_node_entry_t);
986                         btree_s = &btree2[0];
987                         btree_d = &btree2[count];
988                         memmove(btree_d, btree_s, tmp);
989                 }
990 
991                 /*
992                  * Move the req'd B-tree elements from high in node1 to
993                  * low in node2.
994                  */
995                 nodehdr2.count += count;
996                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
997                 btree_s = &btree1[nodehdr1.count - count];
998                 btree_d = &btree2[0];
999                 memcpy(btree_d, btree_s, tmp);
1000                 nodehdr1.count -= count;
1001         } else {
1002                 /*
1003                  * Move the req'd B-tree elements from low in node2 to
1004                  * high in node1.
1005                  */
1006                 count = -count;
1007                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
1008                 btree_s = &btree2[0];
1009                 btree_d = &btree1[nodehdr1.count];
1010                 memcpy(btree_d, btree_s, tmp);
1011                 nodehdr1.count += count;
1012 
1013                 xfs_trans_log_buf(tp, blk1->bp,
1014                         XFS_DA_LOGRANGE(node1, btree_d, tmp));
1015 
1016                 /*
1017                  * Move elements in node2 down to fill the hole.
1018                  */
1019                 tmp  = nodehdr2.count - count;
1020                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
1021                 btree_s = &btree2[count];
1022                 btree_d = &btree2[0];
1023                 memmove(btree_d, btree_s, tmp);
1024                 nodehdr2.count -= count;
1025         }
1026 
1027         /*
1028          * Log header of node 1 and all current bits of node 2.
1029          */
1030         xfs_da3_node_hdr_to_disk(dp->i_mount, node1, &nodehdr1);
1031         xfs_trans_log_buf(tp, blk1->bp,
1032                 XFS_DA_LOGRANGE(node1, &node1->hdr,
1033                                 state->args->geo->node_hdr_size));
1034 
1035         xfs_da3_node_hdr_to_disk(dp->i_mount, node2, &nodehdr2);
1036         xfs_trans_log_buf(tp, blk2->bp,
1037                 XFS_DA_LOGRANGE(node2, &node2->hdr,
1038                                 state->args->geo->node_hdr_size +
1039                                 (sizeof(btree2[0]) * nodehdr2.count)));
1040 
1041         /*
1042          * Record the last hashval from each block for upward propagation.
1043          * (note: don't use the swapped node pointers)
1044          */
1045         if (swap) {
1046                 node1 = blk1->bp->b_addr;
1047                 node2 = blk2->bp->b_addr;
1048                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
1049                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
1050                 btree1 = nodehdr1.btree;
1051                 btree2 = nodehdr2.btree;
1052         }
1053         blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);
1054         blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval);
1055 
1056         /*
1057          * Adjust the expected index for insertion.
1058          */
1059         if (blk1->index >= nodehdr1.count) {
1060                 blk2->index = blk1->index - nodehdr1.count;
1061                 blk1->index = nodehdr1.count + 1;       /* make it invalid */
1062         }
1063 }
1064 
1065 /*
1066  * Add a new entry to an intermediate node.
1067  */
1068 STATIC void
1069 xfs_da3_node_add(
1070         struct xfs_da_state     *state,
1071         struct xfs_da_state_blk *oldblk,
1072         struct xfs_da_state_blk *newblk)
1073 {
1074         struct xfs_da_intnode   *node;
1075         struct xfs_da3_icnode_hdr nodehdr;
1076         struct xfs_da_node_entry *btree;
1077         int                     tmp;
1078         struct xfs_inode        *dp = state->args->dp;
1079 
1080         trace_xfs_da_node_add(state->args);
1081 
1082         node = oldblk->bp->b_addr;
1083         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1084         btree = nodehdr.btree;
1085 
1086         ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
1087         ASSERT(newblk->blkno != 0);
1088         if (state->args->whichfork == XFS_DATA_FORK)
1089                 ASSERT(newblk->blkno >= state->args->geo->leafblk &&
1090                        newblk->blkno < state->args->geo->freeblk);
1091 
1092         /*
1093          * We may need to make some room before we insert the new node.
1094          */
1095         tmp = 0;
1096         if (oldblk->index < nodehdr.count) {
1097                 tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree);
1098                 memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp);
1099         }
1100         btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval);
1101         btree[oldblk->index].before = cpu_to_be32(newblk->blkno);
1102         xfs_trans_log_buf(state->args->trans, oldblk->bp,
1103                 XFS_DA_LOGRANGE(node, &btree[oldblk->index],
1104                                 tmp + sizeof(*btree)));
1105 
1106         nodehdr.count += 1;
1107         xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
1108         xfs_trans_log_buf(state->args->trans, oldblk->bp,
1109                 XFS_DA_LOGRANGE(node, &node->hdr,
1110                                 state->args->geo->node_hdr_size));
1111 
1112         /*
1113          * Copy the last hash value from the oldblk to propagate upwards.
1114          */
1115         oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1116 }
1117 
1118 /*========================================================================
1119  * Routines used for shrinking the Btree.
1120  *========================================================================*/
1121 
1122 /*
1123  * Deallocate an empty leaf node, remove it from its parent,
1124  * possibly deallocating that block, etc...
1125  */
1126 int
1127 xfs_da3_join(
1128         struct xfs_da_state     *state)
1129 {
1130         struct xfs_da_state_blk *drop_blk;
1131         struct xfs_da_state_blk *save_blk;
1132         int                     action = 0;
1133         int                     error;
1134 
1135         trace_xfs_da_join(state->args);
1136 
1137         drop_blk = &state->path.blk[ state->path.active-1 ];
1138         save_blk = &state->altpath.blk[ state->path.active-1 ];
1139         ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
1140         ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
1141                drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
1142 
1143         /*
1144          * Walk back up the tree joining/deallocating as necessary.
1145          * When we stop dropping blocks, break out.
1146          */
1147         for (  ; state->path.active >= 2; drop_blk--, save_blk--,
1148                  state->path.active--) {
1149                 /*
1150                  * See if we can combine the block with a neighbor.
1151                  *   (action == 0) => no options, just leave
1152                  *   (action == 1) => coalesce, then unlink
1153                  *   (action == 2) => block empty, unlink it
1154                  */
1155                 switch (drop_blk->magic) {
1156                 case XFS_ATTR_LEAF_MAGIC:
1157                         error = xfs_attr3_leaf_toosmall(state, &action);
1158                         if (error)
1159                                 return error;
1160                         if (action == 0)
1161                                 return 0;
1162                         xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
1163                         break;
1164                 case XFS_DIR2_LEAFN_MAGIC:
1165                         error = xfs_dir2_leafn_toosmall(state, &action);
1166                         if (error)
1167                                 return error;
1168                         if (action == 0)
1169                                 return 0;
1170                         xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
1171                         break;
1172                 case XFS_DA_NODE_MAGIC:
1173                         /*
1174                          * Remove the offending node, fixup hashvals,
1175                          * check for a toosmall neighbor.
1176                          */
1177                         xfs_da3_node_remove(state, drop_blk);
1178                         xfs_da3_fixhashpath(state, &state->path);
1179                         error = xfs_da3_node_toosmall(state, &action);
1180                         if (error)
1181                                 return error;
1182                         if (action == 0)
1183                                 return 0;
1184                         xfs_da3_node_unbalance(state, drop_blk, save_blk);
1185                         break;
1186                 }
1187                 xfs_da3_fixhashpath(state, &state->altpath);
1188                 error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
1189                 xfs_da_state_kill_altpath(state);
1190                 if (error)
1191                         return error;
1192                 error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
1193                                                          drop_blk->bp);
1194                 drop_blk->bp = NULL;
1195                 if (error)
1196                         return error;
1197         }
1198         /*
1199          * We joined all the way to the top.  If it turns out that
1200          * we only have one entry in the root, make the child block
1201          * the new root.
1202          */
1203         xfs_da3_node_remove(state, drop_blk);
1204         xfs_da3_fixhashpath(state, &state->path);
1205         error = xfs_da3_root_join(state, &state->path.blk[0]);
1206         return error;
1207 }
1208 
1209 #ifdef  DEBUG
1210 static void
1211 xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
1212 {
1213         __be16  magic = blkinfo->magic;
1214 
1215         if (level == 1) {
1216                 ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1217                        magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
1218                        magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
1219                        magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
1220         } else {
1221                 ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1222                        magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
1223         }
1224         ASSERT(!blkinfo->forw);
1225         ASSERT(!blkinfo->back);
1226 }
1227 #else   /* !DEBUG */
1228 #define xfs_da_blkinfo_onlychild_validate(blkinfo, level)
1229 #endif  /* !DEBUG */
1230 
1231 /*
1232  * We have only one entry in the root.  Copy the only remaining child of
1233  * the old root to block 0 as the new root node.
1234  */
1235 STATIC int
1236 xfs_da3_root_join(
1237         struct xfs_da_state     *state,
1238         struct xfs_da_state_blk *root_blk)
1239 {
1240         struct xfs_da_intnode   *oldroot;
1241         struct xfs_da_args      *args;
1242         xfs_dablk_t             child;
1243         struct xfs_buf          *bp;
1244         struct xfs_da3_icnode_hdr oldroothdr;
1245         int                     error;
1246         struct xfs_inode        *dp = state->args->dp;
1247         xfs_failaddr_t          fa;
1248 
1249         trace_xfs_da_root_join(state->args);
1250 
1251         ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
1252 
1253         args = state->args;
1254         oldroot = root_blk->bp->b_addr;
1255         xfs_da3_node_hdr_from_disk(dp->i_mount, &oldroothdr, oldroot);
1256         ASSERT(oldroothdr.forw == 0);
1257         ASSERT(oldroothdr.back == 0);
1258 
1259         /*
1260          * If the root has more than one child, then don't do anything.
1261          */
1262         if (oldroothdr.count > 1)
1263                 return 0;
1264 
1265         /*
1266          * Read in the (only) child block, then copy those bytes into
1267          * the root block's buffer and free the original child block.
1268          */
1269         child = be32_to_cpu(oldroothdr.btree[0].before);
1270         ASSERT(child != 0);
1271         error = xfs_da3_node_read(args->trans, dp, child, &bp, args->whichfork);
1272         if (error)
1273                 return error;
1274         fa = xfs_da3_header_check(bp, args->owner);
1275         if (fa) {
1276                 __xfs_buf_mark_corrupt(bp, fa);
1277                 xfs_trans_brelse(args->trans, bp);
1278                 xfs_da_mark_sick(args);
1279                 return -EFSCORRUPTED;
1280         }
1281         xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
1282 
1283         /*
1284          * Copy child to root buffer and log it.
1285          */
1286         xfs_da_buf_copy(root_blk->bp, bp, args->geo->blksize);
1287         xfs_trans_log_buf(args->trans, root_blk->bp, 0,
1288                           args->geo->blksize - 1);
1289         /*
1290          * Now we can drop the child buffer.
1291          */
1292         error = xfs_da_shrink_inode(args, child, bp);
1293         return error;
1294 }
1295 
1296 /*
1297  * Check a node block and its neighbors to see if the block should be
1298  * collapsed into one or the other neighbor.  Always keep the block
1299  * with the smaller block number.
1300  * If the current block is over 50% full, don't try to join it, return 0.
1301  * If the block is empty, fill in the state structure and return 2.
1302  * If it can be collapsed, fill in the state structure and return 1.
1303  * If nothing can be done, return 0.
1304  */
1305 STATIC int
1306 xfs_da3_node_toosmall(
1307         struct xfs_da_state     *state,
1308         int                     *action)
1309 {
1310         struct xfs_da_intnode   *node;
1311         struct xfs_da_state_blk *blk;
1312         struct xfs_da_blkinfo   *info;
1313         xfs_dablk_t             blkno;
1314         struct xfs_buf          *bp;
1315         xfs_failaddr_t          fa;
1316         struct xfs_da3_icnode_hdr nodehdr;
1317         int                     count;
1318         int                     forward;
1319         int                     error;
1320         int                     retval;
1321         int                     i;
1322         struct xfs_inode        *dp = state->args->dp;
1323 
1324         trace_xfs_da_node_toosmall(state->args);
1325 
1326         /*
1327          * Check for the degenerate case of the block being over 50% full.
1328          * If so, it's not worth even looking to see if we might be able
1329          * to coalesce with a sibling.
1330          */
1331         blk = &state->path.blk[ state->path.active-1 ];
1332         info = blk->bp->b_addr;
1333         node = (xfs_da_intnode_t *)info;
1334         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1335         if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
1336                 *action = 0;    /* blk over 50%, don't try to join */
1337                 return 0;       /* blk over 50%, don't try to join */
1338         }
1339 
1340         /*
1341          * Check for the degenerate case of the block being empty.
1342          * If the block is empty, we'll simply delete it, no need to
1343          * coalesce it with a sibling block.  We choose (arbitrarily)
1344          * to merge with the forward block unless it is NULL.
1345          */
1346         if (nodehdr.count == 0) {
1347                 /*
1348                  * Make altpath point to the block we want to keep and
1349                  * path point to the block we want to drop (this one).
1350                  */
1351                 forward = (info->forw != 0);
1352                 memcpy(&state->altpath, &state->path, sizeof(state->path));
1353                 error = xfs_da3_path_shift(state, &state->altpath, forward,
1354                                                  0, &retval);
1355                 if (error)
1356                         return error;
1357                 if (retval) {
1358                         *action = 0;
1359                 } else {
1360                         *action = 2;
1361                 }
1362                 return 0;
1363         }
1364 
1365         /*
1366          * Examine each sibling block to see if we can coalesce with
1367          * at least 25% free space to spare.  We need to figure out
1368          * whether to merge with the forward or the backward block.
1369          * We prefer coalescing with the lower numbered sibling so as
1370          * to shrink a directory over time.
1371          */
1372         count  = state->args->geo->node_ents;
1373         count -= state->args->geo->node_ents >> 2;
1374         count -= nodehdr.count;
1375 
1376         /* start with smaller blk num */
1377         forward = nodehdr.forw < nodehdr.back;
1378         for (i = 0; i < 2; forward = !forward, i++) {
1379                 struct xfs_da3_icnode_hdr thdr;
1380                 if (forward)
1381                         blkno = nodehdr.forw;
1382                 else
1383                         blkno = nodehdr.back;
1384                 if (blkno == 0)
1385                         continue;
1386                 error = xfs_da3_node_read(state->args->trans, dp, blkno, &bp,
1387                                 state->args->whichfork);
1388                 if (error)
1389                         return error;
1390                 fa = xfs_da3_node_header_check(bp, state->args->owner);
1391                 if (fa) {
1392                         __xfs_buf_mark_corrupt(bp, fa);
1393                         xfs_trans_brelse(state->args->trans, bp);
1394                         xfs_da_mark_sick(state->args);
1395                         return -EFSCORRUPTED;
1396                 }
1397 
1398                 node = bp->b_addr;
1399                 xfs_da3_node_hdr_from_disk(dp->i_mount, &thdr, node);
1400                 xfs_trans_brelse(state->args->trans, bp);
1401 
1402                 if (count - thdr.count >= 0)
1403                         break;  /* fits with at least 25% to spare */
1404         }
1405         if (i >= 2) {
1406                 *action = 0;
1407                 return 0;
1408         }
1409 
1410         /*
1411          * Make altpath point to the block we want to keep (the lower
1412          * numbered block) and path point to the block we want to drop.
1413          */
1414         memcpy(&state->altpath, &state->path, sizeof(state->path));
1415         if (blkno < blk->blkno) {
1416                 error = xfs_da3_path_shift(state, &state->altpath, forward,
1417                                                  0, &retval);
1418         } else {
1419                 error = xfs_da3_path_shift(state, &state->path, forward,
1420                                                  0, &retval);
1421         }
1422         if (error)
1423                 return error;
1424         if (retval) {
1425                 *action = 0;
1426                 return 0;
1427         }
1428         *action = 1;
1429         return 0;
1430 }
1431 
1432 /*
1433  * Pick up the last hashvalue from an intermediate node.
1434  */
1435 STATIC uint
1436 xfs_da3_node_lasthash(
1437         struct xfs_inode        *dp,
1438         struct xfs_buf          *bp,
1439         int                     *count)
1440 {
1441         struct xfs_da3_icnode_hdr nodehdr;
1442 
1443         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, bp->b_addr);
1444         if (count)
1445                 *count = nodehdr.count;
1446         if (!nodehdr.count)
1447                 return 0;
1448         return be32_to_cpu(nodehdr.btree[nodehdr.count - 1].hashval);
1449 }
1450 
1451 /*
1452  * Walk back up the tree adjusting hash values as necessary,
1453  * when we stop making changes, return.
1454  */
1455 void
1456 xfs_da3_fixhashpath(
1457         struct xfs_da_state     *state,
1458         struct xfs_da_state_path *path)
1459 {
1460         struct xfs_da_state_blk *blk;
1461         struct xfs_da_intnode   *node;
1462         struct xfs_da_node_entry *btree;
1463         xfs_dahash_t            lasthash=0;
1464         int                     level;
1465         int                     count;
1466         struct xfs_inode        *dp = state->args->dp;
1467 
1468         trace_xfs_da_fixhashpath(state->args);
1469 
1470         level = path->active-1;
1471         blk = &path->blk[ level ];
1472         switch (blk->magic) {
1473         case XFS_ATTR_LEAF_MAGIC:
1474                 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
1475                 if (count == 0)
1476                         return;
1477                 break;
1478         case XFS_DIR2_LEAFN_MAGIC:
1479                 lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count);
1480                 if (count == 0)
1481                         return;
1482                 break;
1483         case XFS_DA_NODE_MAGIC:
1484                 lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count);
1485                 if (count == 0)
1486                         return;
1487                 break;
1488         }
1489         for (blk--, level--; level >= 0; blk--, level--) {
1490                 struct xfs_da3_icnode_hdr nodehdr;
1491 
1492                 node = blk->bp->b_addr;
1493                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1494                 btree = nodehdr.btree;
1495                 if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
1496                         break;
1497                 blk->hashval = lasthash;
1498                 btree[blk->index].hashval = cpu_to_be32(lasthash);
1499                 xfs_trans_log_buf(state->args->trans, blk->bp,
1500                                   XFS_DA_LOGRANGE(node, &btree[blk->index],
1501                                                   sizeof(*btree)));
1502 
1503                 lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1504         }
1505 }
1506 
1507 /*
1508  * Remove an entry from an intermediate node.
1509  */
1510 STATIC void
1511 xfs_da3_node_remove(
1512         struct xfs_da_state     *state,
1513         struct xfs_da_state_blk *drop_blk)
1514 {
1515         struct xfs_da_intnode   *node;
1516         struct xfs_da3_icnode_hdr nodehdr;
1517         struct xfs_da_node_entry *btree;
1518         int                     index;
1519         int                     tmp;
1520         struct xfs_inode        *dp = state->args->dp;
1521 
1522         trace_xfs_da_node_remove(state->args);
1523 
1524         node = drop_blk->bp->b_addr;
1525         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1526         ASSERT(drop_blk->index < nodehdr.count);
1527         ASSERT(drop_blk->index >= 0);
1528 
1529         /*
1530          * Copy over the offending entry, or just zero it out.
1531          */
1532         index = drop_blk->index;
1533         btree = nodehdr.btree;
1534         if (index < nodehdr.count - 1) {
1535                 tmp  = nodehdr.count - index - 1;
1536                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
1537                 memmove(&btree[index], &btree[index + 1], tmp);
1538                 xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1539                     XFS_DA_LOGRANGE(node, &btree[index], tmp));
1540                 index = nodehdr.count - 1;
1541         }
1542         memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
1543         xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1544             XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
1545         nodehdr.count -= 1;
1546         xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
1547         xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1548             XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size));
1549 
1550         /*
1551          * Copy the last hash value from the block to propagate upwards.
1552          */
1553         drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval);
1554 }
1555 
1556 /*
1557  * Unbalance the elements between two intermediate nodes,
1558  * move all Btree elements from one node into another.
1559  */
1560 STATIC void
1561 xfs_da3_node_unbalance(
1562         struct xfs_da_state     *state,
1563         struct xfs_da_state_blk *drop_blk,
1564         struct xfs_da_state_blk *save_blk)
1565 {
1566         struct xfs_da_intnode   *drop_node;
1567         struct xfs_da_intnode   *save_node;
1568         struct xfs_da_node_entry *drop_btree;
1569         struct xfs_da_node_entry *save_btree;
1570         struct xfs_da3_icnode_hdr drop_hdr;
1571         struct xfs_da3_icnode_hdr save_hdr;
1572         struct xfs_trans        *tp;
1573         int                     sindex;
1574         int                     tmp;
1575         struct xfs_inode        *dp = state->args->dp;
1576 
1577         trace_xfs_da_node_unbalance(state->args);
1578 
1579         drop_node = drop_blk->bp->b_addr;
1580         save_node = save_blk->bp->b_addr;
1581         xfs_da3_node_hdr_from_disk(dp->i_mount, &drop_hdr, drop_node);
1582         xfs_da3_node_hdr_from_disk(dp->i_mount, &save_hdr, save_node);
1583         drop_btree = drop_hdr.btree;
1584         save_btree = save_hdr.btree;
1585         tp = state->args->trans;
1586 
1587         /*
1588          * If the dying block has lower hashvals, then move all the
1589          * elements in the remaining block up to make a hole.
1590          */
1591         if ((be32_to_cpu(drop_btree[0].hashval) <
1592                         be32_to_cpu(save_btree[0].hashval)) ||
1593             (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) <
1594                         be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) {
1595                 /* XXX: check this - is memmove dst correct? */
1596                 tmp = save_hdr.count * sizeof(xfs_da_node_entry_t);
1597                 memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp);
1598 
1599                 sindex = 0;
1600                 xfs_trans_log_buf(tp, save_blk->bp,
1601                         XFS_DA_LOGRANGE(save_node, &save_btree[0],
1602                                 (save_hdr.count + drop_hdr.count) *
1603                                                 sizeof(xfs_da_node_entry_t)));
1604         } else {
1605                 sindex = save_hdr.count;
1606                 xfs_trans_log_buf(tp, save_blk->bp,
1607                         XFS_DA_LOGRANGE(save_node, &save_btree[sindex],
1608                                 drop_hdr.count * sizeof(xfs_da_node_entry_t)));
1609         }
1610 
1611         /*
1612          * Move all the B-tree elements from drop_blk to save_blk.
1613          */
1614         tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t);
1615         memcpy(&save_btree[sindex], &drop_btree[0], tmp);
1616         save_hdr.count += drop_hdr.count;
1617 
1618         xfs_da3_node_hdr_to_disk(dp->i_mount, save_node, &save_hdr);
1619         xfs_trans_log_buf(tp, save_blk->bp,
1620                 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1621                                 state->args->geo->node_hdr_size));
1622 
1623         /*
1624          * Save the last hashval in the remaining block for upward propagation.
1625          */
1626         save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval);
1627 }
1628 
1629 /*========================================================================
1630  * Routines used for finding things in the Btree.
1631  *========================================================================*/
1632 
1633 /*
1634  * Walk down the Btree looking for a particular filename, filling
1635  * in the state structure as we go.
1636  *
1637  * We will set the state structure to point to each of the elements
1638  * in each of the nodes where either the hashval is or should be.
1639  *
1640  * We support duplicate hashval's so for each entry in the current
1641  * node that could contain the desired hashval, descend.  This is a
1642  * pruned depth-first tree search.
1643  */
1644 int                                                     /* error */
1645 xfs_da3_node_lookup_int(
1646         struct xfs_da_state     *state,
1647         int                     *result)
1648 {
1649         struct xfs_da_state_blk *blk;
1650         struct xfs_da_blkinfo   *curr;
1651         struct xfs_da_intnode   *node;
1652         struct xfs_da_node_entry *btree;
1653         struct xfs_da3_icnode_hdr nodehdr;
1654         struct xfs_da_args      *args;
1655         xfs_failaddr_t          fa;
1656         xfs_dablk_t             blkno;
1657         xfs_dahash_t            hashval;
1658         xfs_dahash_t            btreehashval;
1659         int                     probe;
1660         int                     span;
1661         int                     max;
1662         int                     error;
1663         int                     retval;
1664         unsigned int            expected_level = 0;
1665         uint16_t                magic;
1666         struct xfs_inode        *dp = state->args->dp;
1667 
1668         args = state->args;
1669 
1670         /*
1671          * Descend thru the B-tree searching each level for the right
1672          * node to use, until the right hashval is found.
1673          */
1674         blkno = args->geo->leafblk;
1675         for (blk = &state->path.blk[0], state->path.active = 1;
1676                          state->path.active <= XFS_DA_NODE_MAXDEPTH;
1677                          blk++, state->path.active++) {
1678                 /*
1679                  * Read the next node down in the tree.
1680                  */
1681                 blk->blkno = blkno;
1682                 error = xfs_da3_node_read(args->trans, args->dp, blkno,
1683                                         &blk->bp, args->whichfork);
1684                 if (error) {
1685                         blk->blkno = 0;
1686                         state->path.active--;
1687                         return error;
1688                 }
1689                 curr = blk->bp->b_addr;
1690                 magic = be16_to_cpu(curr->magic);
1691 
1692                 if (magic == XFS_ATTR_LEAF_MAGIC ||
1693                     magic == XFS_ATTR3_LEAF_MAGIC) {
1694                         fa = xfs_attr3_leaf_header_check(blk->bp, args->owner);
1695                         if (fa) {
1696                                 __xfs_buf_mark_corrupt(blk->bp, fa);
1697                                 xfs_da_mark_sick(args);
1698                                 return -EFSCORRUPTED;
1699                         }
1700                         blk->magic = XFS_ATTR_LEAF_MAGIC;
1701                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1702                         break;
1703                 }
1704 
1705                 if (magic == XFS_DIR2_LEAFN_MAGIC ||
1706                     magic == XFS_DIR3_LEAFN_MAGIC) {
1707                         fa = xfs_dir3_leaf_header_check(blk->bp, args->owner);
1708                         if (fa) {
1709                                 __xfs_buf_mark_corrupt(blk->bp, fa);
1710                                 xfs_da_mark_sick(args);
1711                                 return -EFSCORRUPTED;
1712                         }
1713                         blk->magic = XFS_DIR2_LEAFN_MAGIC;
1714                         blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
1715                                                               blk->bp, NULL);
1716                         break;
1717                 }
1718 
1719                 if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
1720                         xfs_buf_mark_corrupt(blk->bp);
1721                         xfs_da_mark_sick(args);
1722                         return -EFSCORRUPTED;
1723                 }
1724 
1725                 fa = xfs_da3_node_header_check(blk->bp, args->owner);
1726                 if (fa) {
1727                         __xfs_buf_mark_corrupt(blk->bp, fa);
1728                         xfs_da_mark_sick(args);
1729                         return -EFSCORRUPTED;
1730                 }
1731 
1732                 blk->magic = XFS_DA_NODE_MAGIC;
1733 
1734                 /*
1735                  * Search an intermediate node for a match.
1736                  */
1737                 node = blk->bp->b_addr;
1738                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1739                 btree = nodehdr.btree;
1740 
1741                 /* Tree taller than we can handle; bail out! */
1742                 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
1743                         xfs_buf_mark_corrupt(blk->bp);
1744                         xfs_da_mark_sick(args);
1745                         return -EFSCORRUPTED;
1746                 }
1747 
1748                 /* Check the level from the root. */
1749                 if (blkno == args->geo->leafblk)
1750                         expected_level = nodehdr.level - 1;
1751                 else if (expected_level != nodehdr.level) {
1752                         xfs_buf_mark_corrupt(blk->bp);
1753                         xfs_da_mark_sick(args);
1754                         return -EFSCORRUPTED;
1755                 } else
1756                         expected_level--;
1757 
1758                 max = nodehdr.count;
1759                 blk->hashval = be32_to_cpu(btree[max - 1].hashval);
1760 
1761                 /*
1762                  * Binary search.  (note: small blocks will skip loop)
1763                  */
1764                 probe = span = max / 2;
1765                 hashval = args->hashval;
1766                 while (span > 4) {
1767                         span /= 2;
1768                         btreehashval = be32_to_cpu(btree[probe].hashval);
1769                         if (btreehashval < hashval)
1770                                 probe += span;
1771                         else if (btreehashval > hashval)
1772                                 probe -= span;
1773                         else
1774                                 break;
1775                 }
1776                 ASSERT((probe >= 0) && (probe < max));
1777                 ASSERT((span <= 4) ||
1778                         (be32_to_cpu(btree[probe].hashval) == hashval));
1779 
1780                 /*
1781                  * Since we may have duplicate hashval's, find the first
1782                  * matching hashval in the node.
1783                  */
1784                 while (probe > 0 &&
1785                        be32_to_cpu(btree[probe].hashval) >= hashval) {
1786                         probe--;
1787                 }
1788                 while (probe < max &&
1789                        be32_to_cpu(btree[probe].hashval) < hashval) {
1790                         probe++;
1791                 }
1792 
1793                 /*
1794                  * Pick the right block to descend on.
1795                  */
1796                 if (probe == max) {
1797                         blk->index = max - 1;
1798                         blkno = be32_to_cpu(btree[max - 1].before);
1799                 } else {
1800                         blk->index = probe;
1801                         blkno = be32_to_cpu(btree[probe].before);
1802                 }
1803 
1804                 /* We can't point back to the root. */
1805                 if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) {
1806                         xfs_da_mark_sick(args);
1807                         return -EFSCORRUPTED;
1808                 }
1809         }
1810 
1811         if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) {
1812                 xfs_da_mark_sick(args);
1813                 return -EFSCORRUPTED;
1814         }
1815 
1816         /*
1817          * A leaf block that ends in the hashval that we are interested in
1818          * (final hashval == search hashval) means that the next block may
1819          * contain more entries with the same hashval, shift upward to the
1820          * next leaf and keep searching.
1821          */
1822         for (;;) {
1823                 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1824                         retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1825                                                         &blk->index, state);
1826                 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1827                         retval = xfs_attr3_leaf_lookup_int(blk->bp, args);
1828                         blk->index = args->index;
1829                         args->blkno = blk->blkno;
1830                 } else {
1831                         ASSERT(0);
1832                         xfs_da_mark_sick(args);
1833                         return -EFSCORRUPTED;
1834                 }
1835                 if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
1836                     (blk->hashval == args->hashval)) {
1837                         error = xfs_da3_path_shift(state, &state->path, 1, 1,
1838                                                          &retval);
1839                         if (error)
1840                                 return error;
1841                         if (retval == 0) {
1842                                 continue;
1843                         } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1844                                 /* path_shift() gives ENOENT */
1845                                 retval = -ENOATTR;
1846                         }
1847                 }
1848                 break;
1849         }
1850         *result = retval;
1851         return 0;
1852 }
1853 
1854 /*========================================================================
1855  * Utility routines.
1856  *========================================================================*/
1857 
1858 /*
1859  * Compare two intermediate nodes for "order".
1860  */
1861 STATIC int
1862 xfs_da3_node_order(
1863         struct xfs_inode *dp,
1864         struct xfs_buf  *node1_bp,
1865         struct xfs_buf  *node2_bp)
1866 {
1867         struct xfs_da_intnode   *node1;
1868         struct xfs_da_intnode   *node2;
1869         struct xfs_da_node_entry *btree1;
1870         struct xfs_da_node_entry *btree2;
1871         struct xfs_da3_icnode_hdr node1hdr;
1872         struct xfs_da3_icnode_hdr node2hdr;
1873 
1874         node1 = node1_bp->b_addr;
1875         node2 = node2_bp->b_addr;
1876         xfs_da3_node_hdr_from_disk(dp->i_mount, &node1hdr, node1);
1877         xfs_da3_node_hdr_from_disk(dp->i_mount, &node2hdr, node2);
1878         btree1 = node1hdr.btree;
1879         btree2 = node2hdr.btree;
1880 
1881         if (node1hdr.count > 0 && node2hdr.count > 0 &&
1882             ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
1883              (be32_to_cpu(btree2[node2hdr.count - 1].hashval) <
1884               be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) {
1885                 return 1;
1886         }
1887         return 0;
1888 }
1889 
1890 /*
1891  * Link a new block into a doubly linked list of blocks (of whatever type).
1892  */
1893 int                                                     /* error */
1894 xfs_da3_blk_link(
1895         struct xfs_da_state     *state,
1896         struct xfs_da_state_blk *old_blk,
1897         struct xfs_da_state_blk *new_blk)
1898 {
1899         struct xfs_da_blkinfo   *old_info;
1900         struct xfs_da_blkinfo   *new_info;
1901         struct xfs_da_blkinfo   *tmp_info;
1902         struct xfs_da_args      *args;
1903         struct xfs_buf          *bp;
1904         xfs_failaddr_t          fa;
1905         int                     before = 0;
1906         int                     error;
1907         struct xfs_inode        *dp = state->args->dp;
1908 
1909         /*
1910          * Set up environment.
1911          */
1912         args = state->args;
1913         ASSERT(args != NULL);
1914         old_info = old_blk->bp->b_addr;
1915         new_info = new_blk->bp->b_addr;
1916         ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1917                old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1918                old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1919 
1920         switch (old_blk->magic) {
1921         case XFS_ATTR_LEAF_MAGIC:
1922                 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1923                 break;
1924         case XFS_DIR2_LEAFN_MAGIC:
1925                 before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp);
1926                 break;
1927         case XFS_DA_NODE_MAGIC:
1928                 before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp);
1929                 break;
1930         }
1931 
1932         /*
1933          * Link blocks in appropriate order.
1934          */
1935         if (before) {
1936                 /*
1937                  * Link new block in before existing block.
1938                  */
1939                 trace_xfs_da_link_before(args);
1940                 new_info->forw = cpu_to_be32(old_blk->blkno);
1941                 new_info->back = old_info->back;
1942                 if (old_info->back) {
1943                         error = xfs_da3_node_read(args->trans, dp,
1944                                                 be32_to_cpu(old_info->back),
1945                                                 &bp, args->whichfork);
1946                         if (error)
1947                                 return error;
1948                         fa = xfs_da3_header_check(bp, args->owner);
1949                         if (fa) {
1950                                 __xfs_buf_mark_corrupt(bp, fa);
1951                                 xfs_trans_brelse(args->trans, bp);
1952                                 xfs_da_mark_sick(args);
1953                                 return -EFSCORRUPTED;
1954                         }
1955                         ASSERT(bp != NULL);
1956                         tmp_info = bp->b_addr;
1957                         ASSERT(tmp_info->magic == old_info->magic);
1958                         ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1959                         tmp_info->forw = cpu_to_be32(new_blk->blkno);
1960                         xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1961                 }
1962                 old_info->back = cpu_to_be32(new_blk->blkno);
1963         } else {
1964                 /*
1965                  * Link new block in after existing block.
1966                  */
1967                 trace_xfs_da_link_after(args);
1968                 new_info->forw = old_info->forw;
1969                 new_info->back = cpu_to_be32(old_blk->blkno);
1970                 if (old_info->forw) {
1971                         error = xfs_da3_node_read(args->trans, dp,
1972                                                 be32_to_cpu(old_info->forw),
1973                                                 &bp, args->whichfork);
1974                         if (error)
1975                                 return error;
1976                         fa = xfs_da3_header_check(bp, args->owner);
1977                         if (fa) {
1978                                 __xfs_buf_mark_corrupt(bp, fa);
1979                                 xfs_trans_brelse(args->trans, bp);
1980                                 xfs_da_mark_sick(args);
1981                                 return -EFSCORRUPTED;
1982                         }
1983                         ASSERT(bp != NULL);
1984                         tmp_info = bp->b_addr;
1985                         ASSERT(tmp_info->magic == old_info->magic);
1986                         ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
1987                         tmp_info->back = cpu_to_be32(new_blk->blkno);
1988                         xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1989                 }
1990                 old_info->forw = cpu_to_be32(new_blk->blkno);
1991         }
1992 
1993         xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1994         xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1995         return 0;
1996 }
1997 
1998 /*
1999  * Unlink a block from a doubly linked list of blocks.
2000  */
2001 STATIC int                                              /* error */
2002 xfs_da3_blk_unlink(
2003         struct xfs_da_state     *state,
2004         struct xfs_da_state_blk *drop_blk,
2005         struct xfs_da_state_blk *save_blk)
2006 {
2007         struct xfs_da_blkinfo   *drop_info;
2008         struct xfs_da_blkinfo   *save_info;
2009         struct xfs_da_blkinfo   *tmp_info;
2010         struct xfs_da_args      *args;
2011         struct xfs_buf          *bp;
2012         xfs_failaddr_t          fa;
2013         int                     error;
2014 
2015         /*
2016          * Set up environment.
2017          */
2018         args = state->args;
2019         ASSERT(args != NULL);
2020         save_info = save_blk->bp->b_addr;
2021         drop_info = drop_blk->bp->b_addr;
2022         ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
2023                save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
2024                save_blk->magic == XFS_ATTR_LEAF_MAGIC);
2025         ASSERT(save_blk->magic == drop_blk->magic);
2026         ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
2027                (be32_to_cpu(save_info->back) == drop_blk->blkno));
2028         ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
2029                (be32_to_cpu(drop_info->back) == save_blk->blkno));
2030 
2031         /*
2032          * Unlink the leaf block from the doubly linked chain of leaves.
2033          */
2034         if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
2035                 trace_xfs_da_unlink_back(args);
2036                 save_info->back = drop_info->back;
2037                 if (drop_info->back) {
2038                         error = xfs_da3_node_read(args->trans, args->dp,
2039                                                 be32_to_cpu(drop_info->back),
2040                                                 &bp, args->whichfork);
2041                         if (error)
2042                                 return error;
2043                         fa = xfs_da3_header_check(bp, args->owner);
2044                         if (fa) {
2045                                 __xfs_buf_mark_corrupt(bp, fa);
2046                                 xfs_trans_brelse(args->trans, bp);
2047                                 xfs_da_mark_sick(args);
2048                                 return -EFSCORRUPTED;
2049                         }
2050                         ASSERT(bp != NULL);
2051                         tmp_info = bp->b_addr;
2052                         ASSERT(tmp_info->magic == save_info->magic);
2053                         ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
2054                         tmp_info->forw = cpu_to_be32(save_blk->blkno);
2055                         xfs_trans_log_buf(args->trans, bp, 0,
2056                                                     sizeof(*tmp_info) - 1);
2057                 }
2058         } else {
2059                 trace_xfs_da_unlink_forward(args);
2060                 save_info->forw = drop_info->forw;
2061                 if (drop_info->forw) {
2062                         error = xfs_da3_node_read(args->trans, args->dp,
2063                                                 be32_to_cpu(drop_info->forw),
2064                                                 &bp, args->whichfork);
2065                         if (error)
2066                                 return error;
2067                         fa = xfs_da3_header_check(bp, args->owner);
2068                         if (fa) {
2069                                 __xfs_buf_mark_corrupt(bp, fa);
2070                                 xfs_trans_brelse(args->trans, bp);
2071                                 xfs_da_mark_sick(args);
2072                                 return -EFSCORRUPTED;
2073                         }
2074                         ASSERT(bp != NULL);
2075                         tmp_info = bp->b_addr;
2076                         ASSERT(tmp_info->magic == save_info->magic);
2077                         ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
2078                         tmp_info->back = cpu_to_be32(save_blk->blkno);
2079                         xfs_trans_log_buf(args->trans, bp, 0,
2080                                                     sizeof(*tmp_info) - 1);
2081                 }
2082         }
2083 
2084         xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
2085         return 0;
2086 }
2087 
2088 /*
2089  * Move a path "forward" or "!forward" one block at the current level.
2090  *
2091  * This routine will adjust a "path" to point to the next block
2092  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
2093  * Btree, including updating pointers to the intermediate nodes between
2094  * the new bottom and the root.
2095  */
2096 int                                                     /* error */
2097 xfs_da3_path_shift(
2098         struct xfs_da_state     *state,
2099         struct xfs_da_state_path *path,
2100         int                     forward,
2101         int                     release,
2102         int                     *result)
2103 {
2104         struct xfs_da_state_blk *blk;
2105         struct xfs_da_blkinfo   *info;
2106         struct xfs_da_args      *args;
2107         struct xfs_da_node_entry *btree;
2108         struct xfs_da3_icnode_hdr nodehdr;
2109         struct xfs_buf          *bp;
2110         xfs_failaddr_t          fa;
2111         xfs_dablk_t             blkno = 0;
2112         int                     level;
2113         int                     error;
2114         struct xfs_inode        *dp = state->args->dp;
2115 
2116         trace_xfs_da_path_shift(state->args);
2117 
2118         /*
2119          * Roll up the Btree looking for the first block where our
2120          * current index is not at the edge of the block.  Note that
2121          * we skip the bottom layer because we want the sibling block.
2122          */
2123         args = state->args;
2124         ASSERT(args != NULL);
2125         ASSERT(path != NULL);
2126         ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
2127         level = (path->active-1) - 1;   /* skip bottom layer in path */
2128         for (; level >= 0; level--) {
2129                 blk = &path->blk[level];
2130                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
2131                                            blk->bp->b_addr);
2132 
2133                 if (forward && (blk->index < nodehdr.count - 1)) {
2134                         blk->index++;
2135                         blkno = be32_to_cpu(nodehdr.btree[blk->index].before);
2136                         break;
2137                 } else if (!forward && (blk->index > 0)) {
2138                         blk->index--;
2139                         blkno = be32_to_cpu(nodehdr.btree[blk->index].before);
2140                         break;
2141                 }
2142         }
2143         if (level < 0) {
2144                 *result = -ENOENT;      /* we're out of our tree */
2145                 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
2146                 return 0;
2147         }
2148 
2149         /*
2150          * Roll down the edge of the subtree until we reach the
2151          * same depth we were at originally.
2152          */
2153         for (blk++, level++; level < path->active; blk++, level++) {
2154                 /*
2155                  * Read the next child block into a local buffer.
2156                  */
2157                 error = xfs_da3_node_read(args->trans, dp, blkno, &bp,
2158                                           args->whichfork);
2159                 if (error)
2160                         return error;
2161 
2162                 /*
2163                  * Release the old block (if it's dirty, the trans doesn't
2164                  * actually let go) and swap the local buffer into the path
2165                  * structure. This ensures failure of the above read doesn't set
2166                  * a NULL buffer in an active slot in the path.
2167                  */
2168                 if (release)
2169                         xfs_trans_brelse(args->trans, blk->bp);
2170                 blk->blkno = blkno;
2171                 blk->bp = bp;
2172 
2173                 info = blk->bp->b_addr;
2174                 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
2175                        info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
2176                        info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2177                        info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
2178                        info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
2179                        info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
2180 
2181 
2182                 /*
2183                  * Note: we flatten the magic number to a single type so we
2184                  * don't have to compare against crc/non-crc types elsewhere.
2185                  */
2186                 switch (be16_to_cpu(info->magic)) {
2187                 case XFS_DA_NODE_MAGIC:
2188                 case XFS_DA3_NODE_MAGIC:
2189                         fa = xfs_da3_node_header_check(blk->bp, args->owner);
2190                         if (fa) {
2191                                 __xfs_buf_mark_corrupt(blk->bp, fa);
2192                                 xfs_da_mark_sick(args);
2193                                 return -EFSCORRUPTED;
2194                         }
2195                         blk->magic = XFS_DA_NODE_MAGIC;
2196                         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
2197                                                    bp->b_addr);
2198                         btree = nodehdr.btree;
2199                         blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
2200                         if (forward)
2201                                 blk->index = 0;
2202                         else
2203                                 blk->index = nodehdr.count - 1;
2204                         blkno = be32_to_cpu(btree[blk->index].before);
2205                         break;
2206                 case XFS_ATTR_LEAF_MAGIC:
2207                 case XFS_ATTR3_LEAF_MAGIC:
2208                         fa = xfs_attr3_leaf_header_check(blk->bp, args->owner);
2209                         if (fa) {
2210                                 __xfs_buf_mark_corrupt(blk->bp, fa);
2211                                 xfs_da_mark_sick(args);
2212                                 return -EFSCORRUPTED;
2213                         }
2214                         blk->magic = XFS_ATTR_LEAF_MAGIC;
2215                         ASSERT(level == path->active-1);
2216                         blk->index = 0;
2217                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
2218                         break;
2219                 case XFS_DIR2_LEAFN_MAGIC:
2220                 case XFS_DIR3_LEAFN_MAGIC:
2221                         fa = xfs_dir3_leaf_header_check(blk->bp, args->owner);
2222                         if (fa) {
2223                                 __xfs_buf_mark_corrupt(blk->bp, fa);
2224                                 xfs_da_mark_sick(args);
2225                                 return -EFSCORRUPTED;
2226                         }
2227                         blk->magic = XFS_DIR2_LEAFN_MAGIC;
2228                         ASSERT(level == path->active-1);
2229                         blk->index = 0;
2230                         blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
2231                                                               blk->bp, NULL);
2232                         break;
2233                 default:
2234                         ASSERT(0);
2235                         break;
2236                 }
2237         }
2238         *result = 0;
2239         return 0;
2240 }
2241 
2242 
2243 /*========================================================================
2244  * Utility routines.
2245  *========================================================================*/
2246 
2247 /*
2248  * Implement a simple hash on a character string.
2249  * Rotate the hash value by 7 bits, then XOR each character in.
2250  * This is implemented with some source-level loop unrolling.
2251  */
2252 xfs_dahash_t
2253 xfs_da_hashname(const uint8_t *name, int namelen)
2254 {
2255         xfs_dahash_t hash;
2256 
2257         /*
2258          * Do four characters at a time as long as we can.
2259          */
2260         for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
2261                 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
2262                        (name[3] << 0) ^ rol32(hash, 7 * 4);
2263 
2264         /*
2265          * Now do the rest of the characters.
2266          */
2267         switch (namelen) {
2268         case 3:
2269                 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
2270                        rol32(hash, 7 * 3);
2271         case 2:
2272                 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
2273         case 1:
2274                 return (name[0] << 0) ^ rol32(hash, 7 * 1);
2275         default: /* case 0: */
2276                 return hash;
2277         }
2278 }
2279 
2280 enum xfs_dacmp
2281 xfs_da_compname(
2282         struct xfs_da_args *args,
2283         const unsigned char *name,
2284         int             len)
2285 {
2286         return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
2287                                         XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
2288 }
2289 
2290 int
2291 xfs_da_grow_inode_int(
2292         struct xfs_da_args      *args,
2293         xfs_fileoff_t           *bno,
2294         int                     count)
2295 {
2296         struct xfs_trans        *tp = args->trans;
2297         struct xfs_inode        *dp = args->dp;
2298         int                     w = args->whichfork;
2299         xfs_rfsblock_t          nblks = dp->i_nblocks;
2300         struct xfs_bmbt_irec    map, *mapp = &map;
2301         int                     nmap, error, got, i, mapi = 1;
2302 
2303         /*
2304          * Find a spot in the file space to put the new block.
2305          */
2306         error = xfs_bmap_first_unused(tp, dp, count, bno, w);
2307         if (error)
2308                 return error;
2309 
2310         /*
2311          * Try mapping it in one filesystem block.
2312          */
2313         nmap = 1;
2314         error = xfs_bmapi_write(tp, dp, *bno, count,
2315                         xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
2316                         args->total, &map, &nmap);
2317         if (error == -ENOSPC && count > 1) {
2318                 xfs_fileoff_t           b;
2319                 int                     c;
2320 
2321                 /*
2322                  * If we didn't get it and the block might work if fragmented,
2323                  * try without the CONTIG flag.  Loop until we get it all.
2324                  */
2325                 mapp = kmalloc(sizeof(*mapp) * count,
2326                                 GFP_KERNEL | __GFP_NOFAIL);
2327                 for (b = *bno, mapi = 0; b < *bno + count; ) {
2328                         c = (int)(*bno + count - b);
2329                         nmap = min(XFS_BMAP_MAX_NMAP, c);
2330                         error = xfs_bmapi_write(tp, dp, b, c,
2331                                         xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
2332                                         args->total, &mapp[mapi], &nmap);
2333                         if (error)
2334                                 goto out_free_map;
2335                         mapi += nmap;
2336                         b = mapp[mapi - 1].br_startoff +
2337                             mapp[mapi - 1].br_blockcount;
2338                 }
2339         }
2340         if (error)
2341                 goto out_free_map;
2342 
2343         /*
2344          * Count the blocks we got, make sure it matches the total.
2345          */
2346         for (i = 0, got = 0; i < mapi; i++)
2347                 got += mapp[i].br_blockcount;
2348         if (got != count || mapp[0].br_startoff != *bno ||
2349             mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
2350             *bno + count) {
2351                 error = -ENOSPC;
2352                 goto out_free_map;
2353         }
2354 
2355         /* account for newly allocated blocks in reserved blocks total */
2356         args->total -= dp->i_nblocks - nblks;
2357 
2358 out_free_map:
2359         if (mapp != &map)
2360                 kfree(mapp);
2361         return error;
2362 }
2363 
2364 /*
2365  * Add a block to the btree ahead of the file.
2366  * Return the new block number to the caller.
2367  */
2368 int
2369 xfs_da_grow_inode(
2370         struct xfs_da_args      *args,
2371         xfs_dablk_t             *new_blkno)
2372 {
2373         xfs_fileoff_t           bno;
2374         int                     error;
2375 
2376         trace_xfs_da_grow_inode(args);
2377 
2378         bno = args->geo->leafblk;
2379         error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount);
2380         if (!error)
2381                 *new_blkno = (xfs_dablk_t)bno;
2382         return error;
2383 }
2384 
2385 /*
2386  * Ick.  We need to always be able to remove a btree block, even
2387  * if there's no space reservation because the filesystem is full.
2388  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
2389  * It swaps the target block with the last block in the file.  The
2390  * last block in the file can always be removed since it can't cause
2391  * a bmap btree split to do that.
2392  */
2393 STATIC int
2394 xfs_da3_swap_lastblock(
2395         struct xfs_da_args      *args,
2396         xfs_dablk_t             *dead_blknop,
2397         struct xfs_buf          **dead_bufp)
2398 {
2399         struct xfs_da_blkinfo   *dead_info;
2400         struct xfs_da_blkinfo   *sib_info;
2401         struct xfs_da_intnode   *par_node;
2402         struct xfs_da_intnode   *dead_node;
2403         struct xfs_dir2_leaf    *dead_leaf2;
2404         struct xfs_da_node_entry *btree;
2405         struct xfs_da3_icnode_hdr par_hdr;
2406         struct xfs_inode        *dp;
2407         struct xfs_trans        *tp;
2408         struct xfs_mount        *mp;
2409         struct xfs_buf          *dead_buf;
2410         struct xfs_buf          *last_buf;
2411         struct xfs_buf          *sib_buf;
2412         struct xfs_buf          *par_buf;
2413         xfs_failaddr_t          fa;
2414         xfs_dahash_t            dead_hash;
2415         xfs_fileoff_t           lastoff;
2416         xfs_dablk_t             dead_blkno;
2417         xfs_dablk_t             last_blkno;
2418         xfs_dablk_t             sib_blkno;
2419         xfs_dablk_t             par_blkno;
2420         int                     error;
2421         int                     w;
2422         int                     entno;
2423         int                     level;
2424         int                     dead_level;
2425 
2426         trace_xfs_da_swap_lastblock(args);
2427 
2428         dead_buf = *dead_bufp;
2429         dead_blkno = *dead_blknop;
2430         tp = args->trans;
2431         dp = args->dp;
2432         w = args->whichfork;
2433         ASSERT(w == XFS_DATA_FORK);
2434         mp = dp->i_mount;
2435         lastoff = args->geo->freeblk;
2436         error = xfs_bmap_last_before(tp, dp, &lastoff, w);
2437         if (error)
2438                 return error;
2439         if (XFS_IS_CORRUPT(mp, lastoff == 0)) {
2440                 xfs_da_mark_sick(args);
2441                 return -EFSCORRUPTED;
2442         }
2443         /*
2444          * Read the last block in the btree space.
2445          */
2446         last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount;
2447         error = xfs_da3_node_read(tp, dp, last_blkno, &last_buf, w);
2448         if (error)
2449                 return error;
2450         fa = xfs_da3_header_check(last_buf, args->owner);
2451         if (fa) {
2452                 __xfs_buf_mark_corrupt(last_buf, fa);
2453                 xfs_trans_brelse(tp, last_buf);
2454                 xfs_da_mark_sick(args);
2455                 return -EFSCORRUPTED;
2456         }
2457 
2458         /*
2459          * Copy the last block into the dead buffer and log it.
2460          */
2461         xfs_da_buf_copy(dead_buf, last_buf, args->geo->blksize);
2462         xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
2463         dead_info = dead_buf->b_addr;
2464 
2465         /*
2466          * Get values from the moved block.
2467          */
2468         if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2469             dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
2470                 struct xfs_dir3_icleaf_hdr leafhdr;
2471                 struct xfs_dir2_leaf_entry *ents;
2472 
2473                 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
2474                 xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr,
2475                                             dead_leaf2);
2476                 ents = leafhdr.ents;
2477                 dead_level = 0;
2478                 dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
2479         } else {
2480                 struct xfs_da3_icnode_hdr deadhdr;
2481 
2482                 dead_node = (xfs_da_intnode_t *)dead_info;
2483                 xfs_da3_node_hdr_from_disk(dp->i_mount, &deadhdr, dead_node);
2484                 btree = deadhdr.btree;
2485                 dead_level = deadhdr.level;
2486                 dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);
2487         }
2488         sib_buf = par_buf = NULL;
2489         /*
2490          * If the moved block has a left sibling, fix up the pointers.
2491          */
2492         if ((sib_blkno = be32_to_cpu(dead_info->back))) {
2493                 error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
2494                 if (error)
2495                         goto done;
2496                 fa = xfs_da3_header_check(sib_buf, args->owner);
2497                 if (fa) {
2498                         __xfs_buf_mark_corrupt(sib_buf, fa);
2499                         xfs_da_mark_sick(args);
2500                         error = -EFSCORRUPTED;
2501                         goto done;
2502                 }
2503                 sib_info = sib_buf->b_addr;
2504                 if (XFS_IS_CORRUPT(mp,
2505                                    be32_to_cpu(sib_info->forw) != last_blkno ||
2506                                    sib_info->magic != dead_info->magic)) {
2507                         xfs_da_mark_sick(args);
2508                         error = -EFSCORRUPTED;
2509                         goto done;
2510                 }
2511                 sib_info->forw = cpu_to_be32(dead_blkno);
2512                 xfs_trans_log_buf(tp, sib_buf,
2513                         XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
2514                                         sizeof(sib_info->forw)));
2515                 sib_buf = NULL;
2516         }
2517         /*
2518          * If the moved block has a right sibling, fix up the pointers.
2519          */
2520         if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
2521                 error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
2522                 if (error)
2523                         goto done;
2524                 fa = xfs_da3_header_check(sib_buf, args->owner);
2525                 if (fa) {
2526                         __xfs_buf_mark_corrupt(sib_buf, fa);
2527                         xfs_da_mark_sick(args);
2528                         error = -EFSCORRUPTED;
2529                         goto done;
2530                 }
2531                 sib_info = sib_buf->b_addr;
2532                 if (XFS_IS_CORRUPT(mp,
2533                                    be32_to_cpu(sib_info->back) != last_blkno ||
2534                                    sib_info->magic != dead_info->magic)) {
2535                         xfs_da_mark_sick(args);
2536                         error = -EFSCORRUPTED;
2537                         goto done;
2538                 }
2539                 sib_info->back = cpu_to_be32(dead_blkno);
2540                 xfs_trans_log_buf(tp, sib_buf,
2541                         XFS_DA_LOGRANGE(sib_info, &sib_info->back,
2542                                         sizeof(sib_info->back)));
2543                 sib_buf = NULL;
2544         }
2545         par_blkno = args->geo->leafblk;
2546         level = -1;
2547         /*
2548          * Walk down the tree looking for the parent of the moved block.
2549          */
2550         for (;;) {
2551                 error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
2552                 if (error)
2553                         goto done;
2554                 fa = xfs_da3_node_header_check(par_buf, args->owner);
2555                 if (fa) {
2556                         __xfs_buf_mark_corrupt(par_buf, fa);
2557                         xfs_da_mark_sick(args);
2558                         error = -EFSCORRUPTED;
2559                         goto done;
2560                 }
2561                 par_node = par_buf->b_addr;
2562                 xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
2563                 if (XFS_IS_CORRUPT(mp,
2564                                    level >= 0 && level != par_hdr.level + 1)) {
2565                         xfs_da_mark_sick(args);
2566                         error = -EFSCORRUPTED;
2567                         goto done;
2568                 }
2569                 level = par_hdr.level;
2570                 btree = par_hdr.btree;
2571                 for (entno = 0;
2572                      entno < par_hdr.count &&
2573                      be32_to_cpu(btree[entno].hashval) < dead_hash;
2574                      entno++)
2575                         continue;
2576                 if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
2577                         xfs_da_mark_sick(args);
2578                         error = -EFSCORRUPTED;
2579                         goto done;
2580                 }
2581                 par_blkno = be32_to_cpu(btree[entno].before);
2582                 if (level == dead_level + 1)
2583                         break;
2584                 xfs_trans_brelse(tp, par_buf);
2585                 par_buf = NULL;
2586         }
2587         /*
2588          * We're in the right parent block.
2589          * Look for the right entry.
2590          */
2591         for (;;) {
2592                 for (;
2593                      entno < par_hdr.count &&
2594                      be32_to_cpu(btree[entno].before) != last_blkno;
2595                      entno++)
2596                         continue;
2597                 if (entno < par_hdr.count)
2598                         break;
2599                 par_blkno = par_hdr.forw;
2600                 xfs_trans_brelse(tp, par_buf);
2601                 par_buf = NULL;
2602                 if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
2603                         xfs_da_mark_sick(args);
2604                         error = -EFSCORRUPTED;
2605                         goto done;
2606                 }
2607                 error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
2608                 if (error)
2609                         goto done;
2610                 fa = xfs_da3_node_header_check(par_buf, args->owner);
2611                 if (fa) {
2612                         __xfs_buf_mark_corrupt(par_buf, fa);
2613                         xfs_da_mark_sick(args);
2614                         error = -EFSCORRUPTED;
2615                         goto done;
2616                 }
2617                 par_node = par_buf->b_addr;
2618                 xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
2619                 if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
2620                         xfs_da_mark_sick(args);
2621                         error = -EFSCORRUPTED;
2622                         goto done;
2623                 }
2624                 btree = par_hdr.btree;
2625                 entno = 0;
2626         }
2627         /*
2628          * Update the parent entry pointing to the moved block.
2629          */
2630         btree[entno].before = cpu_to_be32(dead_blkno);
2631         xfs_trans_log_buf(tp, par_buf,
2632                 XFS_DA_LOGRANGE(par_node, &btree[entno].before,
2633                                 sizeof(btree[entno].before)));
2634         *dead_blknop = last_blkno;
2635         *dead_bufp = last_buf;
2636         return 0;
2637 done:
2638         if (par_buf)
2639                 xfs_trans_brelse(tp, par_buf);
2640         if (sib_buf)
2641                 xfs_trans_brelse(tp, sib_buf);
2642         xfs_trans_brelse(tp, last_buf);
2643         return error;
2644 }
2645 
2646 /*
2647  * Remove a btree block from a directory or attribute.
2648  */
2649 int
2650 xfs_da_shrink_inode(
2651         struct xfs_da_args      *args,
2652         xfs_dablk_t             dead_blkno,
2653         struct xfs_buf          *dead_buf)
2654 {
2655         struct xfs_inode        *dp;
2656         int                     done, error, w, count;
2657         struct xfs_trans        *tp;
2658 
2659         trace_xfs_da_shrink_inode(args);
2660 
2661         dp = args->dp;
2662         w = args->whichfork;
2663         tp = args->trans;
2664         count = args->geo->fsbcount;
2665         for (;;) {
2666                 /*
2667                  * Remove extents.  If we get ENOSPC for a dir we have to move
2668                  * the last block to the place we want to kill.
2669                  */
2670                 error = xfs_bunmapi(tp, dp, dead_blkno, count,
2671                                     xfs_bmapi_aflag(w), 0, &done);
2672                 if (error == -ENOSPC) {
2673                         if (w != XFS_DATA_FORK)
2674                                 break;
2675                         error = xfs_da3_swap_lastblock(args, &dead_blkno,
2676                                                       &dead_buf);
2677                         if (error)
2678                                 break;
2679                 } else {
2680                         break;
2681                 }
2682         }
2683         xfs_trans_binval(tp, dead_buf);
2684         return error;
2685 }
2686 
2687 static int
2688 xfs_dabuf_map(
2689         struct xfs_inode        *dp,
2690         xfs_dablk_t             bno,
2691         unsigned int            flags,
2692         int                     whichfork,
2693         struct xfs_buf_map      **mapp,
2694         int                     *nmaps)
2695 {
2696         struct xfs_mount        *mp = dp->i_mount;
2697         int                     nfsb = xfs_dabuf_nfsb(mp, whichfork);
2698         struct xfs_bmbt_irec    irec, *irecs = &irec;
2699         struct xfs_buf_map      *map = *mapp;
2700         xfs_fileoff_t           off = bno;
2701         int                     error = 0, nirecs, i;
2702 
2703         if (nfsb > 1)
2704                 irecs = kzalloc(sizeof(irec) * nfsb,
2705                                 GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
2706 
2707         nirecs = nfsb;
2708         error = xfs_bmapi_read(dp, bno, nfsb, irecs, &nirecs,
2709                         xfs_bmapi_aflag(whichfork));
2710         if (error)
2711                 goto out_free_irecs;
2712 
2713         /*
2714          * Use the caller provided map for the single map case, else allocate a
2715          * larger one that needs to be free by the caller.
2716          */
2717         if (nirecs > 1) {
2718                 map = kzalloc(nirecs * sizeof(struct xfs_buf_map),
2719                                 GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
2720                 if (!map) {
2721                         error = -ENOMEM;
2722                         goto out_free_irecs;
2723                 }
2724                 *mapp = map;
2725         }
2726 
2727         for (i = 0; i < nirecs; i++) {
2728                 if (irecs[i].br_startblock == HOLESTARTBLOCK ||
2729                     irecs[i].br_startblock == DELAYSTARTBLOCK)
2730                         goto invalid_mapping;
2731                 if (off != irecs[i].br_startoff)
2732                         goto invalid_mapping;
2733 
2734                 map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
2735                 map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
2736                 off += irecs[i].br_blockcount;
2737         }
2738 
2739         if (off != bno + nfsb)
2740                 goto invalid_mapping;
2741 
2742         *nmaps = nirecs;
2743 out_free_irecs:
2744         if (irecs != &irec)
2745                 kfree(irecs);
2746         return error;
2747 
2748 invalid_mapping:
2749         /* Caller ok with no mapping. */
2750         if (XFS_IS_CORRUPT(mp, !(flags & XFS_DABUF_MAP_HOLE_OK))) {
2751                 xfs_dirattr_mark_sick(dp, whichfork);
2752                 error = -EFSCORRUPTED;
2753                 if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
2754                         xfs_alert(mp, "%s: bno %u inode %llu",
2755                                         __func__, bno, dp->i_ino);
2756 
2757                         for (i = 0; i < nirecs; i++) {
2758                                 xfs_alert(mp,
2759 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2760                                         i, irecs[i].br_startoff,
2761                                         irecs[i].br_startblock,
2762                                         irecs[i].br_blockcount,
2763                                         irecs[i].br_state);
2764                         }
2765                 }
2766         } else {
2767                 *nmaps = 0;
2768         }
2769         goto out_free_irecs;
2770 }
2771 
2772 /*
2773  * Get a buffer for the dir/attr block.
2774  */
2775 int
2776 xfs_da_get_buf(
2777         struct xfs_trans        *tp,
2778         struct xfs_inode        *dp,
2779         xfs_dablk_t             bno,
2780         struct xfs_buf          **bpp,
2781         int                     whichfork)
2782 {
2783         struct xfs_mount        *mp = dp->i_mount;
2784         struct xfs_buf          *bp;
2785         struct xfs_buf_map      map, *mapp = &map;
2786         int                     nmap = 1;
2787         int                     error;
2788 
2789         *bpp = NULL;
2790         error = xfs_dabuf_map(dp, bno, 0, whichfork, &mapp, &nmap);
2791         if (error || nmap == 0)
2792                 goto out_free;
2793 
2794         error = xfs_trans_get_buf_map(tp, mp->m_ddev_targp, mapp, nmap, 0, &bp);
2795         if (error)
2796                 goto out_free;
2797 
2798         *bpp = bp;
2799 
2800 out_free:
2801         if (mapp != &map)
2802                 kfree(mapp);
2803 
2804         return error;
2805 }
2806 
2807 /*
2808  * Get a buffer for the dir/attr block, fill in the contents.
2809  */
2810 int
2811 xfs_da_read_buf(
2812         struct xfs_trans        *tp,
2813         struct xfs_inode        *dp,
2814         xfs_dablk_t             bno,
2815         unsigned int            flags,
2816         struct xfs_buf          **bpp,
2817         int                     whichfork,
2818         const struct xfs_buf_ops *ops)
2819 {
2820         struct xfs_mount        *mp = dp->i_mount;
2821         struct xfs_buf          *bp;
2822         struct xfs_buf_map      map, *mapp = &map;
2823         int                     nmap = 1;
2824         int                     error;
2825 
2826         *bpp = NULL;
2827         error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap);
2828         if (error || !nmap)
2829                 goto out_free;
2830 
2831         error = xfs_trans_read_buf_map(mp, tp, mp->m_ddev_targp, mapp, nmap, 0,
2832                         &bp, ops);
2833         if (xfs_metadata_is_sick(error))
2834                 xfs_dirattr_mark_sick(dp, whichfork);
2835         if (error)
2836                 goto out_free;
2837 
2838         if (whichfork == XFS_ATTR_FORK)
2839                 xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
2840         else
2841                 xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
2842         *bpp = bp;
2843 out_free:
2844         if (mapp != &map)
2845                 kfree(mapp);
2846 
2847         return error;
2848 }
2849 
2850 /*
2851  * Readahead the dir/attr block.
2852  */
2853 int
2854 xfs_da_reada_buf(
2855         struct xfs_inode        *dp,
2856         xfs_dablk_t             bno,
2857         unsigned int            flags,
2858         int                     whichfork,
2859         const struct xfs_buf_ops *ops)
2860 {
2861         struct xfs_buf_map      map;
2862         struct xfs_buf_map      *mapp;
2863         int                     nmap;
2864         int                     error;
2865 
2866         mapp = &map;
2867         nmap = 1;
2868         error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap);
2869         if (error || !nmap)
2870                 goto out_free;
2871 
2872         xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops);
2873 
2874 out_free:
2875         if (mapp != &map)
2876                 kfree(mapp);
2877 
2878         return error;
2879 }
2880 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php