~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/bcachefs/inode.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /fs/bcachefs/inode.c (Architecture i386) and /fs/bcachefs/inode.c (Architecture sparc)


  1 // SPDX-License-Identifier: GPL-2.0                 1 // SPDX-License-Identifier: GPL-2.0
  2                                                     2 
  3 #include "bcachefs.h"                               3 #include "bcachefs.h"
  4 #include "btree_key_cache.h"                        4 #include "btree_key_cache.h"
  5 #include "btree_write_buffer.h"                     5 #include "btree_write_buffer.h"
  6 #include "bkey_methods.h"                           6 #include "bkey_methods.h"
  7 #include "btree_update.h"                           7 #include "btree_update.h"
  8 #include "buckets.h"                                8 #include "buckets.h"
  9 #include "compress.h"                               9 #include "compress.h"
 10 #include "dirent.h"                                10 #include "dirent.h"
 11 #include "disk_accounting.h"                       11 #include "disk_accounting.h"
 12 #include "error.h"                                 12 #include "error.h"
 13 #include "extents.h"                               13 #include "extents.h"
 14 #include "extent_update.h"                         14 #include "extent_update.h"
 15 #include "inode.h"                                 15 #include "inode.h"
 16 #include "str_hash.h"                              16 #include "str_hash.h"
 17 #include "snapshot.h"                              17 #include "snapshot.h"
 18 #include "subvolume.h"                             18 #include "subvolume.h"
 19 #include "varint.h"                                19 #include "varint.h"
 20                                                    20 
 21 #include <linux/random.h>                          21 #include <linux/random.h>
 22                                                    22 
 23 #include <asm/unaligned.h>                         23 #include <asm/unaligned.h>
 24                                                    24 
 25 #define x(name, ...)    #name,                     25 #define x(name, ...)    #name,
 26 const char * const bch2_inode_opts[] = {           26 const char * const bch2_inode_opts[] = {
 27         BCH_INODE_OPTS()                           27         BCH_INODE_OPTS()
 28         NULL,                                      28         NULL,
 29 };                                                 29 };
 30                                                    30 
 31 static const char * const bch2_inode_flag_strs     31 static const char * const bch2_inode_flag_strs[] = {
 32         BCH_INODE_FLAGS()                          32         BCH_INODE_FLAGS()
 33         NULL                                       33         NULL
 34 };                                                 34 };
 35 #undef  x                                          35 #undef  x
 36                                                    36 
 37 static const u8 byte_table[8] = { 1, 2, 3, 4,      37 static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
 38                                                    38 
 39 static int inode_decode_field(const u8 *in, co     39 static int inode_decode_field(const u8 *in, const u8 *end,
 40                               u64 out[2], unsi     40                               u64 out[2], unsigned *out_bits)
 41 {                                                  41 {
 42         __be64 be[2] = { 0, 0 };                   42         __be64 be[2] = { 0, 0 };
 43         unsigned bytes, shift;                     43         unsigned bytes, shift;
 44         u8 *p;                                     44         u8 *p;
 45                                                    45 
 46         if (in >= end)                             46         if (in >= end)
 47                 return -1;                         47                 return -1;
 48                                                    48 
 49         if (!*in)                                  49         if (!*in)
 50                 return -1;                         50                 return -1;
 51                                                    51 
 52         /*                                         52         /*
 53          * position of highest set bit indicat     53          * position of highest set bit indicates number of bytes:
 54          * shift = number of bits to remove in     54          * shift = number of bits to remove in high byte:
 55          */                                        55          */
 56         shift   = 8 - __fls(*in); /* 1 <= shif     56         shift   = 8 - __fls(*in); /* 1 <= shift <= 8 */
 57         bytes   = byte_table[shift - 1];           57         bytes   = byte_table[shift - 1];
 58                                                    58 
 59         if (in + bytes > end)                      59         if (in + bytes > end)
 60                 return -1;                         60                 return -1;
 61                                                    61 
 62         p = (u8 *) be + 16 - bytes;                62         p = (u8 *) be + 16 - bytes;
 63         memcpy(p, in, bytes);                      63         memcpy(p, in, bytes);
 64         *p ^= (1 << 8) >> shift;                   64         *p ^= (1 << 8) >> shift;
 65                                                    65 
 66         out[0] = be64_to_cpu(be[0]);               66         out[0] = be64_to_cpu(be[0]);
 67         out[1] = be64_to_cpu(be[1]);               67         out[1] = be64_to_cpu(be[1]);
 68         *out_bits = out[0] ? 64 + fls64(out[0]     68         *out_bits = out[0] ? 64 + fls64(out[0]) : fls64(out[1]);
 69                                                    69 
 70         return bytes;                              70         return bytes;
 71 }                                                  71 }
 72                                                    72 
 73 static inline void bch2_inode_pack_inlined(str     73 static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed,
 74                                            con     74                                            const struct bch_inode_unpacked *inode)
 75 {                                                  75 {
 76         struct bkey_i_inode_v3 *k = &packed->i     76         struct bkey_i_inode_v3 *k = &packed->inode;
 77         u8 *out = k->v.fields;                     77         u8 *out = k->v.fields;
 78         u8 *end = (void *) &packed[1];             78         u8 *end = (void *) &packed[1];
 79         u8 *last_nonzero_field = out;              79         u8 *last_nonzero_field = out;
 80         unsigned nr_fields = 0, last_nonzero_f     80         unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
 81         unsigned bytes;                            81         unsigned bytes;
 82         int ret;                                   82         int ret;
 83                                                    83 
 84         bkey_inode_v3_init(&packed->inode.k_i)     84         bkey_inode_v3_init(&packed->inode.k_i);
 85         packed->inode.k.p.offset        = inod     85         packed->inode.k.p.offset        = inode->bi_inum;
 86         packed->inode.v.bi_journal_seq  = cpu_     86         packed->inode.v.bi_journal_seq  = cpu_to_le64(inode->bi_journal_seq);
 87         packed->inode.v.bi_hash_seed    = inod     87         packed->inode.v.bi_hash_seed    = inode->bi_hash_seed;
 88         packed->inode.v.bi_flags        = cpu_     88         packed->inode.v.bi_flags        = cpu_to_le64(inode->bi_flags);
 89         packed->inode.v.bi_sectors      = cpu_     89         packed->inode.v.bi_sectors      = cpu_to_le64(inode->bi_sectors);
 90         packed->inode.v.bi_size         = cpu_     90         packed->inode.v.bi_size         = cpu_to_le64(inode->bi_size);
 91         packed->inode.v.bi_version      = cpu_     91         packed->inode.v.bi_version      = cpu_to_le64(inode->bi_version);
 92         SET_INODEv3_MODE(&packed->inode.v, ino     92         SET_INODEv3_MODE(&packed->inode.v, inode->bi_mode);
 93         SET_INODEv3_FIELDS_START(&packed->inod     93         SET_INODEv3_FIELDS_START(&packed->inode.v, INODEv3_FIELDS_START_CUR);
 94                                                    94 
 95                                                    95 
 96 #define x(_name, _bits)                            96 #define x(_name, _bits)                                                 \
 97         nr_fields++;                               97         nr_fields++;                                                    \
 98                                                    98                                                                         \
 99         if (inode->_name) {                        99         if (inode->_name) {                                             \
100                 ret = bch2_varint_encode_fast(    100                 ret = bch2_varint_encode_fast(out, inode->_name);       \
101                 out += ret;                       101                 out += ret;                                             \
102                                                   102                                                                         \
103                 if (_bits > 64)                   103                 if (_bits > 64)                                         \
104                         *out++ = 0;               104                         *out++ = 0;                                     \
105                                                   105                                                                         \
106                 last_nonzero_field = out;         106                 last_nonzero_field = out;                               \
107                 last_nonzero_fieldnr = nr_fiel    107                 last_nonzero_fieldnr = nr_fields;                       \
108         } else {                                  108         } else {                                                        \
109                 *out++ = 0;                       109                 *out++ = 0;                                             \
110                                                   110                                                                         \
111                 if (_bits > 64)                   111                 if (_bits > 64)                                         \
112                         *out++ = 0;               112                         *out++ = 0;                                     \
113         }                                         113         }
114                                                   114 
115         BCH_INODE_FIELDS_v3()                     115         BCH_INODE_FIELDS_v3()
116 #undef  x                                         116 #undef  x
117         BUG_ON(out > end);                        117         BUG_ON(out > end);
118                                                   118 
119         out = last_nonzero_field;                 119         out = last_nonzero_field;
120         nr_fields = last_nonzero_fieldnr;         120         nr_fields = last_nonzero_fieldnr;
121                                                   121 
122         bytes = out - (u8 *) &packed->inode.v;    122         bytes = out - (u8 *) &packed->inode.v;
123         set_bkey_val_bytes(&packed->inode.k, b    123         set_bkey_val_bytes(&packed->inode.k, bytes);
124         memset_u64s_tail(&packed->inode.v, 0,     124         memset_u64s_tail(&packed->inode.v, 0, bytes);
125                                                   125 
126         SET_INODEv3_NR_FIELDS(&k->v, nr_fields    126         SET_INODEv3_NR_FIELDS(&k->v, nr_fields);
127                                                   127 
128         if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))    128         if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
129                 struct bch_inode_unpacked unpa    129                 struct bch_inode_unpacked unpacked;
130                                                   130 
131                 ret = bch2_inode_unpack(bkey_i    131                 ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i), &unpacked);
132                 BUG_ON(ret);                      132                 BUG_ON(ret);
133                 BUG_ON(unpacked.bi_inum           133                 BUG_ON(unpacked.bi_inum         != inode->bi_inum);
134                 BUG_ON(unpacked.bi_hash_seed      134                 BUG_ON(unpacked.bi_hash_seed    != inode->bi_hash_seed);
135                 BUG_ON(unpacked.bi_sectors        135                 BUG_ON(unpacked.bi_sectors      != inode->bi_sectors);
136                 BUG_ON(unpacked.bi_size           136                 BUG_ON(unpacked.bi_size         != inode->bi_size);
137                 BUG_ON(unpacked.bi_version        137                 BUG_ON(unpacked.bi_version      != inode->bi_version);
138                 BUG_ON(unpacked.bi_mode           138                 BUG_ON(unpacked.bi_mode         != inode->bi_mode);
139                                                   139 
140 #define x(_name, _bits) if (unpacked._name !=     140 #define x(_name, _bits) if (unpacked._name != inode->_name)             \
141                         panic("unpacked %llu s    141                         panic("unpacked %llu should be %llu",           \
142                               (u64) unpacked._    142                               (u64) unpacked._name, (u64) inode->_name);
143                 BCH_INODE_FIELDS_v3()             143                 BCH_INODE_FIELDS_v3()
144 #undef  x                                         144 #undef  x
145         }                                         145         }
146 }                                                 146 }
147                                                   147 
148 void bch2_inode_pack(struct bkey_inode_buf *pa    148 void bch2_inode_pack(struct bkey_inode_buf *packed,
149                      const struct bch_inode_un    149                      const struct bch_inode_unpacked *inode)
150 {                                                 150 {
151         bch2_inode_pack_inlined(packed, inode)    151         bch2_inode_pack_inlined(packed, inode);
152 }                                                 152 }
153                                                   153 
154 static noinline int bch2_inode_unpack_v1(struc    154 static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
155                                 struct bch_ino    155                                 struct bch_inode_unpacked *unpacked)
156 {                                                 156 {
157         const u8 *in = inode.v->fields;           157         const u8 *in = inode.v->fields;
158         const u8 *end = bkey_val_end(inode);      158         const u8 *end = bkey_val_end(inode);
159         u64 field[2];                             159         u64 field[2];
160         unsigned fieldnr = 0, field_bits;         160         unsigned fieldnr = 0, field_bits;
161         int ret;                                  161         int ret;
162                                                   162 
163 #define x(_name, _bits)                           163 #define x(_name, _bits)                                 \
164         if (fieldnr++ == INODE_NR_FIELDS(inode    164         if (fieldnr++ == INODE_NR_FIELDS(inode.v)) {                    \
165                 unsigned offset = offsetof(str    165                 unsigned offset = offsetof(struct bch_inode_unpacked, _name);\
166                 memset((void *) unpacked + off    166                 memset((void *) unpacked + offset, 0,                   \
167                        sizeof(*unpacked) - off    167                        sizeof(*unpacked) - offset);                     \
168                 return 0;                         168                 return 0;                                               \
169         }                                         169         }                                                               \
170                                                   170                                                                         \
171         ret = inode_decode_field(in, end, fiel    171         ret = inode_decode_field(in, end, field, &field_bits);          \
172         if (ret < 0)                              172         if (ret < 0)                                                    \
173                 return ret;                       173                 return ret;                                             \
174                                                   174                                                                         \
175         if (field_bits > sizeof(unpacked->_nam    175         if (field_bits > sizeof(unpacked->_name) * 8)                   \
176                 return -1;                        176                 return -1;                                              \
177                                                   177                                                                         \
178         unpacked->_name = field[1];               178         unpacked->_name = field[1];                                     \
179         in += ret;                                179         in += ret;
180                                                   180 
181         BCH_INODE_FIELDS_v2()                     181         BCH_INODE_FIELDS_v2()
182 #undef  x                                         182 #undef  x
183                                                   183 
184         /* XXX: signal if there were more fiel    184         /* XXX: signal if there were more fields than expected? */
185         return 0;                                 185         return 0;
186 }                                                 186 }
187                                                   187 
188 static int bch2_inode_unpack_v2(struct bch_ino    188 static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked,
189                                 const u8 *in,     189                                 const u8 *in, const u8 *end,
190                                 unsigned nr_fi    190                                 unsigned nr_fields)
191 {                                                 191 {
192         unsigned fieldnr = 0;                     192         unsigned fieldnr = 0;
193         int ret;                                  193         int ret;
194         u64 v[2];                                 194         u64 v[2];
195                                                   195 
196 #define x(_name, _bits)                           196 #define x(_name, _bits)                                                 \
197         if (fieldnr < nr_fields) {                197         if (fieldnr < nr_fields) {                                      \
198                 ret = bch2_varint_decode_fast(    198                 ret = bch2_varint_decode_fast(in, end, &v[0]);          \
199                 if (ret < 0)                      199                 if (ret < 0)                                            \
200                         return ret;               200                         return ret;                                     \
201                 in += ret;                        201                 in += ret;                                              \
202                                                   202                                                                         \
203                 if (_bits > 64) {                 203                 if (_bits > 64) {                                       \
204                         ret = bch2_varint_deco    204                         ret = bch2_varint_decode_fast(in, end, &v[1]);  \
205                         if (ret < 0)              205                         if (ret < 0)                                    \
206                                 return ret;       206                                 return ret;                             \
207                         in += ret;                207                         in += ret;                                      \
208                 } else {                          208                 } else {                                                \
209                         v[1] = 0;                 209                         v[1] = 0;                                       \
210                 }                                 210                 }                                                       \
211         } else {                                  211         } else {                                                        \
212                 v[0] = v[1] = 0;                  212                 v[0] = v[1] = 0;                                        \
213         }                                         213         }                                                               \
214                                                   214                                                                         \
215         unpacked->_name = v[0];                   215         unpacked->_name = v[0];                                         \
216         if (v[1] || v[0] != unpacked->_name)      216         if (v[1] || v[0] != unpacked->_name)                            \
217                 return -1;                        217                 return -1;                                              \
218         fieldnr++;                                218         fieldnr++;
219                                                   219 
220         BCH_INODE_FIELDS_v2()                     220         BCH_INODE_FIELDS_v2()
221 #undef  x                                         221 #undef  x
222                                                   222 
223         /* XXX: signal if there were more fiel    223         /* XXX: signal if there were more fields than expected? */
224         return 0;                                 224         return 0;
225 }                                                 225 }
226                                                   226 
227 static int bch2_inode_unpack_v3(struct bkey_s_    227 static int bch2_inode_unpack_v3(struct bkey_s_c k,
228                                 struct bch_ino    228                                 struct bch_inode_unpacked *unpacked)
229 {                                                 229 {
230         struct bkey_s_c_inode_v3 inode = bkey_    230         struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
231         const u8 *in = inode.v->fields;           231         const u8 *in = inode.v->fields;
232         const u8 *end = bkey_val_end(inode);      232         const u8 *end = bkey_val_end(inode);
233         unsigned nr_fields = INODEv3_NR_FIELDS    233         unsigned nr_fields = INODEv3_NR_FIELDS(inode.v);
234         unsigned fieldnr = 0;                     234         unsigned fieldnr = 0;
235         int ret;                                  235         int ret;
236         u64 v[2];                                 236         u64 v[2];
237                                                   237 
238         unpacked->bi_inum       = inode.k->p.o    238         unpacked->bi_inum       = inode.k->p.offset;
239         unpacked->bi_journal_seq= le64_to_cpu(    239         unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
240         unpacked->bi_hash_seed  = inode.v->bi_    240         unpacked->bi_hash_seed  = inode.v->bi_hash_seed;
241         unpacked->bi_flags      = le64_to_cpu(    241         unpacked->bi_flags      = le64_to_cpu(inode.v->bi_flags);
242         unpacked->bi_sectors    = le64_to_cpu(    242         unpacked->bi_sectors    = le64_to_cpu(inode.v->bi_sectors);
243         unpacked->bi_size       = le64_to_cpu(    243         unpacked->bi_size       = le64_to_cpu(inode.v->bi_size);
244         unpacked->bi_version    = le64_to_cpu(    244         unpacked->bi_version    = le64_to_cpu(inode.v->bi_version);
245         unpacked->bi_mode       = INODEv3_MODE    245         unpacked->bi_mode       = INODEv3_MODE(inode.v);
246                                                   246 
247 #define x(_name, _bits)                           247 #define x(_name, _bits)                                                 \
248         if (fieldnr < nr_fields) {                248         if (fieldnr < nr_fields) {                                      \
249                 ret = bch2_varint_decode_fast(    249                 ret = bch2_varint_decode_fast(in, end, &v[0]);          \
250                 if (ret < 0)                      250                 if (ret < 0)                                            \
251                         return ret;               251                         return ret;                                     \
252                 in += ret;                        252                 in += ret;                                              \
253                                                   253                                                                         \
254                 if (_bits > 64) {                 254                 if (_bits > 64) {                                       \
255                         ret = bch2_varint_deco    255                         ret = bch2_varint_decode_fast(in, end, &v[1]);  \
256                         if (ret < 0)              256                         if (ret < 0)                                    \
257                                 return ret;       257                                 return ret;                             \
258                         in += ret;                258                         in += ret;                                      \
259                 } else {                          259                 } else {                                                \
260                         v[1] = 0;                 260                         v[1] = 0;                                       \
261                 }                                 261                 }                                                       \
262         } else {                                  262         } else {                                                        \
263                 v[0] = v[1] = 0;                  263                 v[0] = v[1] = 0;                                        \
264         }                                         264         }                                                               \
265                                                   265                                                                         \
266         unpacked->_name = v[0];                   266         unpacked->_name = v[0];                                         \
267         if (v[1] || v[0] != unpacked->_name)      267         if (v[1] || v[0] != unpacked->_name)                            \
268                 return -1;                        268                 return -1;                                              \
269         fieldnr++;                                269         fieldnr++;
270                                                   270 
271         BCH_INODE_FIELDS_v3()                     271         BCH_INODE_FIELDS_v3()
272 #undef  x                                         272 #undef  x
273                                                   273 
274         /* XXX: signal if there were more fiel    274         /* XXX: signal if there were more fields than expected? */
275         return 0;                                 275         return 0;
276 }                                                 276 }
277                                                   277 
278 static noinline int bch2_inode_unpack_slowpath    278 static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
279                                                   279                                                struct bch_inode_unpacked *unpacked)
280 {                                                 280 {
281         memset(unpacked, 0, sizeof(*unpacked))    281         memset(unpacked, 0, sizeof(*unpacked));
282                                                   282 
283         switch (k.k->type) {                      283         switch (k.k->type) {
284         case KEY_TYPE_inode: {                    284         case KEY_TYPE_inode: {
285                 struct bkey_s_c_inode inode =     285                 struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
286                                                   286 
287                 unpacked->bi_inum       = inod    287                 unpacked->bi_inum       = inode.k->p.offset;
288                 unpacked->bi_journal_seq= 0;      288                 unpacked->bi_journal_seq= 0;
289                 unpacked->bi_hash_seed  = inod    289                 unpacked->bi_hash_seed  = inode.v->bi_hash_seed;
290                 unpacked->bi_flags      = le32    290                 unpacked->bi_flags      = le32_to_cpu(inode.v->bi_flags);
291                 unpacked->bi_mode       = le16    291                 unpacked->bi_mode       = le16_to_cpu(inode.v->bi_mode);
292                                                   292 
293                 if (INODE_NEW_VARINT(inode.v))    293                 if (INODE_NEW_VARINT(inode.v)) {
294                         return bch2_inode_unpa    294                         return bch2_inode_unpack_v2(unpacked, inode.v->fields,
295                                                   295                                                     bkey_val_end(inode),
296                                                   296                                                     INODE_NR_FIELDS(inode.v));
297                 } else {                          297                 } else {
298                         return bch2_inode_unpa    298                         return bch2_inode_unpack_v1(inode, unpacked);
299                 }                                 299                 }
300                 break;                            300                 break;
301         }                                         301         }
302         case KEY_TYPE_inode_v2: {                 302         case KEY_TYPE_inode_v2: {
303                 struct bkey_s_c_inode_v2 inode    303                 struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
304                                                   304 
305                 unpacked->bi_inum       = inod    305                 unpacked->bi_inum       = inode.k->p.offset;
306                 unpacked->bi_journal_seq= le64    306                 unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
307                 unpacked->bi_hash_seed  = inod    307                 unpacked->bi_hash_seed  = inode.v->bi_hash_seed;
308                 unpacked->bi_flags      = le64    308                 unpacked->bi_flags      = le64_to_cpu(inode.v->bi_flags);
309                 unpacked->bi_mode       = le16    309                 unpacked->bi_mode       = le16_to_cpu(inode.v->bi_mode);
310                                                   310 
311                 return bch2_inode_unpack_v2(un    311                 return bch2_inode_unpack_v2(unpacked, inode.v->fields,
312                                             bk    312                                             bkey_val_end(inode),
313                                             IN    313                                             INODEv2_NR_FIELDS(inode.v));
314         }                                         314         }
315         default:                                  315         default:
316                 BUG();                            316                 BUG();
317         }                                         317         }
318 }                                                 318 }
319                                                   319 
320 int bch2_inode_unpack(struct bkey_s_c k,          320 int bch2_inode_unpack(struct bkey_s_c k,
321                       struct bch_inode_unpacke    321                       struct bch_inode_unpacked *unpacked)
322 {                                                 322 {
323         if (likely(k.k->type == KEY_TYPE_inode    323         if (likely(k.k->type == KEY_TYPE_inode_v3))
324                 return bch2_inode_unpack_v3(k,    324                 return bch2_inode_unpack_v3(k, unpacked);
325         return bch2_inode_unpack_slowpath(k, u    325         return bch2_inode_unpack_slowpath(k, unpacked);
326 }                                                 326 }
327                                                   327 
328 int bch2_inode_peek_nowarn(struct btree_trans     328 int bch2_inode_peek_nowarn(struct btree_trans *trans,
329                     struct btree_iter *iter,      329                     struct btree_iter *iter,
330                     struct bch_inode_unpacked     330                     struct bch_inode_unpacked *inode,
331                     subvol_inum inum, unsigned    331                     subvol_inum inum, unsigned flags)
332 {                                                 332 {
333         struct bkey_s_c k;                        333         struct bkey_s_c k;
334         u32 snapshot;                             334         u32 snapshot;
335         int ret;                                  335         int ret;
336                                                   336 
337         ret = bch2_subvolume_get_snapshot(tran    337         ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
338         if (ret)                                  338         if (ret)
339                 return ret;                       339                 return ret;
340                                                   340 
341         k = bch2_bkey_get_iter(trans, iter, BT    341         k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes,
342                                SPOS(0, inum.in    342                                SPOS(0, inum.inum, snapshot),
343                                flags|BTREE_ITE    343                                flags|BTREE_ITER_cached);
344         ret = bkey_err(k);                        344         ret = bkey_err(k);
345         if (ret)                                  345         if (ret)
346                 return ret;                       346                 return ret;
347                                                   347 
348         ret = bkey_is_inode(k.k) ? 0 : -BCH_ER    348         ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode;
349         if (ret)                                  349         if (ret)
350                 goto err;                         350                 goto err;
351                                                   351 
352         ret = bch2_inode_unpack(k, inode);        352         ret = bch2_inode_unpack(k, inode);
353         if (ret)                                  353         if (ret)
354                 goto err;                         354                 goto err;
355                                                   355 
356         return 0;                                 356         return 0;
357 err:                                              357 err:
358         bch2_trans_iter_exit(trans, iter);        358         bch2_trans_iter_exit(trans, iter);
359         return ret;                               359         return ret;
360 }                                                 360 }
361                                                   361 
362 int bch2_inode_peek(struct btree_trans *trans,    362 int bch2_inode_peek(struct btree_trans *trans,
363                     struct btree_iter *iter,      363                     struct btree_iter *iter,
364                     struct bch_inode_unpacked     364                     struct bch_inode_unpacked *inode,
365                     subvol_inum inum, unsigned    365                     subvol_inum inum, unsigned flags)
366 {                                                 366 {
367         int ret = bch2_inode_peek_nowarn(trans    367         int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags);
368         bch_err_msg(trans->c, ret, "looking up    368         bch_err_msg(trans->c, ret, "looking up inum %u:%llu:", inum.subvol, inum.inum);
369         return ret;                               369         return ret;
370 }                                                 370 }
371                                                   371 
372 int bch2_inode_write_flags(struct btree_trans     372 int bch2_inode_write_flags(struct btree_trans *trans,
373                      struct btree_iter *iter,     373                      struct btree_iter *iter,
374                      struct bch_inode_unpacked    374                      struct bch_inode_unpacked *inode,
375                      enum btree_iter_update_tr    375                      enum btree_iter_update_trigger_flags flags)
376 {                                                 376 {
377         struct bkey_inode_buf *inode_p;           377         struct bkey_inode_buf *inode_p;
378                                                   378 
379         inode_p = bch2_trans_kmalloc(trans, si    379         inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
380         if (IS_ERR(inode_p))                      380         if (IS_ERR(inode_p))
381                 return PTR_ERR(inode_p);          381                 return PTR_ERR(inode_p);
382                                                   382 
383         bch2_inode_pack_inlined(inode_p, inode    383         bch2_inode_pack_inlined(inode_p, inode);
384         inode_p->inode.k.p.snapshot = iter->sn    384         inode_p->inode.k.p.snapshot = iter->snapshot;
385         return bch2_trans_update(trans, iter,     385         return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags);
386 }                                                 386 }
387                                                   387 
388 int __bch2_fsck_write_inode(struct btree_trans    388 int __bch2_fsck_write_inode(struct btree_trans *trans,
389                          struct bch_inode_unpa    389                          struct bch_inode_unpacked *inode,
390                          u32 snapshot)            390                          u32 snapshot)
391 {                                                 391 {
392         struct bkey_inode_buf *inode_p =          392         struct bkey_inode_buf *inode_p =
393                 bch2_trans_kmalloc(trans, size    393                 bch2_trans_kmalloc(trans, sizeof(*inode_p));
394                                                   394 
395         if (IS_ERR(inode_p))                      395         if (IS_ERR(inode_p))
396                 return PTR_ERR(inode_p);          396                 return PTR_ERR(inode_p);
397                                                   397 
398         bch2_inode_pack(inode_p, inode);          398         bch2_inode_pack(inode_p, inode);
399         inode_p->inode.k.p.snapshot = snapshot    399         inode_p->inode.k.p.snapshot = snapshot;
400                                                   400 
401         return bch2_btree_insert_nonextent(tra    401         return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
402                                 &inode_p->inod    402                                 &inode_p->inode.k_i,
403                                 BTREE_UPDATE_i    403                                 BTREE_UPDATE_internal_snapshot_node);
404 }                                                 404 }
405                                                   405 
406 int bch2_fsck_write_inode(struct btree_trans *    406 int bch2_fsck_write_inode(struct btree_trans *trans,
407                             struct bch_inode_u    407                             struct bch_inode_unpacked *inode,
408                             u32 snapshot)         408                             u32 snapshot)
409 {                                                 409 {
410         int ret = commit_do(trans, NULL, NULL,    410         int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
411                             __bch2_fsck_write_    411                             __bch2_fsck_write_inode(trans, inode, snapshot));
412         bch_err_fn(trans->c, ret);                412         bch_err_fn(trans->c, ret);
413         return ret;                               413         return ret;
414 }                                                 414 }
415                                                   415 
416 struct bkey_i *bch2_inode_to_v3(struct btree_t    416 struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k)
417 {                                                 417 {
418         struct bch_inode_unpacked u;              418         struct bch_inode_unpacked u;
419         struct bkey_inode_buf *inode_p;           419         struct bkey_inode_buf *inode_p;
420         int ret;                                  420         int ret;
421                                                   421 
422         if (!bkey_is_inode(&k->k))                422         if (!bkey_is_inode(&k->k))
423                 return ERR_PTR(-ENOENT);          423                 return ERR_PTR(-ENOENT);
424                                                   424 
425         inode_p = bch2_trans_kmalloc(trans, si    425         inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
426         if (IS_ERR(inode_p))                      426         if (IS_ERR(inode_p))
427                 return ERR_CAST(inode_p);         427                 return ERR_CAST(inode_p);
428                                                   428 
429         ret = bch2_inode_unpack(bkey_i_to_s_c(    429         ret = bch2_inode_unpack(bkey_i_to_s_c(k), &u);
430         if (ret)                                  430         if (ret)
431                 return ERR_PTR(ret);              431                 return ERR_PTR(ret);
432                                                   432 
433         bch2_inode_pack(inode_p, &u);             433         bch2_inode_pack(inode_p, &u);
434         return &inode_p->inode.k_i;               434         return &inode_p->inode.k_i;
435 }                                                 435 }
436                                                   436 
437 static int __bch2_inode_invalid(struct bch_fs     437 static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err)
438 {                                                 438 {
439         struct bch_inode_unpacked unpacked;       439         struct bch_inode_unpacked unpacked;
440         int ret = 0;                              440         int ret = 0;
441                                                   441 
442         bkey_fsck_err_on(k.k->p.inode, c, err,    442         bkey_fsck_err_on(k.k->p.inode, c, err,
443                          inode_pos_inode_nonze    443                          inode_pos_inode_nonzero,
444                          "nonzero k.p.inode");    444                          "nonzero k.p.inode");
445                                                   445 
446         bkey_fsck_err_on(k.k->p.offset < BLOCK    446         bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err,
447                          inode_pos_blockdev_ra    447                          inode_pos_blockdev_range,
448                          "fs inode in blockdev    448                          "fs inode in blockdev range");
449                                                   449 
450         bkey_fsck_err_on(bch2_inode_unpack(k,     450         bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err,
451                          inode_unpack_error,      451                          inode_unpack_error,
452                          "invalid variable len    452                          "invalid variable length fields");
453                                                   453 
454         bkey_fsck_err_on(unpacked.bi_data_chec    454         bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err,
455                          inode_checksum_type_i    455                          inode_checksum_type_invalid,
456                          "invalid data checksu    456                          "invalid data checksum type (%u >= %u",
457                          unpacked.bi_data_chec    457                          unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1);
458                                                   458 
459         bkey_fsck_err_on(unpacked.bi_compressi    459         bkey_fsck_err_on(unpacked.bi_compression &&
460                          !bch2_compression_opt    460                          !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err,
461                          inode_compression_typ    461                          inode_compression_type_invalid,
462                          "invalid compression     462                          "invalid compression opt %u", unpacked.bi_compression - 1);
463                                                   463 
464         bkey_fsck_err_on((unpacked.bi_flags &     464         bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) &&
465                          unpacked.bi_nlink !=     465                          unpacked.bi_nlink != 0, c, err,
466                          inode_unlinked_but_nl    466                          inode_unlinked_but_nlink_nonzero,
467                          "flagged as unlinked     467                          "flagged as unlinked but bi_nlink != 0");
468                                                   468 
469         bkey_fsck_err_on(unpacked.bi_subvol &&    469         bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err,
470                          inode_subvol_root_but    470                          inode_subvol_root_but_not_dir,
471                          "subvolume root but n    471                          "subvolume root but not a directory");
472 fsck_err:                                         472 fsck_err:
473         return ret;                               473         return ret;
474 }                                                 474 }
475                                                   475 
476 int bch2_inode_invalid(struct bch_fs *c, struc    476 int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k,
477                        enum bch_validate_flags    477                        enum bch_validate_flags flags,
478                        struct printbuf *err)      478                        struct printbuf *err)
479 {                                                 479 {
480         struct bkey_s_c_inode inode = bkey_s_c    480         struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
481         int ret = 0;                              481         int ret = 0;
482                                                   482 
483         bkey_fsck_err_on(INODE_STR_HASH(inode.    483         bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err,
484                          inode_str_hash_invali    484                          inode_str_hash_invalid,
485                          "invalid str hash typ    485                          "invalid str hash type (%llu >= %u)",
486                          INODE_STR_HASH(inode.    486                          INODE_STR_HASH(inode.v), BCH_STR_HASH_NR);
487                                                   487 
488         ret = __bch2_inode_invalid(c, k, err);    488         ret = __bch2_inode_invalid(c, k, err);
489 fsck_err:                                         489 fsck_err:
490         return ret;                               490         return ret;
491 }                                                 491 }
492                                                   492 
493 int bch2_inode_v2_invalid(struct bch_fs *c, st    493 int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
494                           enum bch_validate_fl    494                           enum bch_validate_flags flags,
495                           struct printbuf *err    495                           struct printbuf *err)
496 {                                                 496 {
497         struct bkey_s_c_inode_v2 inode = bkey_    497         struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
498         int ret = 0;                              498         int ret = 0;
499                                                   499 
500         bkey_fsck_err_on(INODEv2_STR_HASH(inod    500         bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err,
501                          inode_str_hash_invali    501                          inode_str_hash_invalid,
502                          "invalid str hash typ    502                          "invalid str hash type (%llu >= %u)",
503                          INODEv2_STR_HASH(inod    503                          INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR);
504                                                   504 
505         ret = __bch2_inode_invalid(c, k, err);    505         ret = __bch2_inode_invalid(c, k, err);
506 fsck_err:                                         506 fsck_err:
507         return ret;                               507         return ret;
508 }                                                 508 }
509                                                   509 
510 int bch2_inode_v3_invalid(struct bch_fs *c, st    510 int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k,
511                           enum bch_validate_fl    511                           enum bch_validate_flags flags,
512                           struct printbuf *err    512                           struct printbuf *err)
513 {                                                 513 {
514         struct bkey_s_c_inode_v3 inode = bkey_    514         struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
515         int ret = 0;                              515         int ret = 0;
516                                                   516 
517         bkey_fsck_err_on(INODEv3_FIELDS_START(    517         bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL ||
518                          INODEv3_FIELDS_START(    518                          INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err,
519                          inode_v3_fields_start    519                          inode_v3_fields_start_bad,
520                          "invalid fields_start    520                          "invalid fields_start (got %llu, min %u max %zu)",
521                          INODEv3_FIELDS_START(    521                          INODEv3_FIELDS_START(inode.v),
522                          INODEv3_FIELDS_START_    522                          INODEv3_FIELDS_START_INITIAL,
523                          bkey_val_u64s(inode.k    523                          bkey_val_u64s(inode.k));
524                                                   524 
525         bkey_fsck_err_on(INODEv3_STR_HASH(inod    525         bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err,
526                          inode_str_hash_invali    526                          inode_str_hash_invalid,
527                          "invalid str hash typ    527                          "invalid str hash type (%llu >= %u)",
528                          INODEv3_STR_HASH(inod    528                          INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR);
529                                                   529 
530         ret = __bch2_inode_invalid(c, k, err);    530         ret = __bch2_inode_invalid(c, k, err);
531 fsck_err:                                         531 fsck_err:
532         return ret;                               532         return ret;
533 }                                                 533 }
534                                                   534 
535 static void __bch2_inode_unpacked_to_text(stru    535 static void __bch2_inode_unpacked_to_text(struct printbuf *out,
536                                           stru    536                                           struct bch_inode_unpacked *inode)
537 {                                                 537 {
538         prt_printf(out, "\n");                    538         prt_printf(out, "\n");
539         printbuf_indent_add(out, 2);              539         printbuf_indent_add(out, 2);
540         prt_printf(out, "mode=%o\n", inode->bi    540         prt_printf(out, "mode=%o\n", inode->bi_mode);
541                                                   541 
542         prt_str(out, "flags=");                   542         prt_str(out, "flags=");
543         prt_bitflags(out, bch2_inode_flag_strs    543         prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1));
544         prt_printf(out, "(%x)\n", inode->bi_fl    544         prt_printf(out, "(%x)\n", inode->bi_flags);
545                                                   545 
546         prt_printf(out, "journal_seq=%llu\n",     546         prt_printf(out, "journal_seq=%llu\n",   inode->bi_journal_seq);
547         prt_printf(out, "bi_size=%llu\n",         547         prt_printf(out, "bi_size=%llu\n",       inode->bi_size);
548         prt_printf(out, "bi_sectors=%llu\n",      548         prt_printf(out, "bi_sectors=%llu\n",    inode->bi_sectors);
549         prt_printf(out, "bi_version=%llu\n",      549         prt_printf(out, "bi_version=%llu\n",    inode->bi_version);
550                                                   550 
551 #define x(_name, _bits)                           551 #define x(_name, _bits)                                         \
552         prt_printf(out, #_name "=%llu\n", (u64    552         prt_printf(out, #_name "=%llu\n", (u64) inode->_name);
553         BCH_INODE_FIELDS_v3()                     553         BCH_INODE_FIELDS_v3()
554 #undef  x                                         554 #undef  x
555                                                   555 
556         bch2_printbuf_strip_trailing_newline(o    556         bch2_printbuf_strip_trailing_newline(out);
557         printbuf_indent_sub(out, 2);              557         printbuf_indent_sub(out, 2);
558 }                                                 558 }
559                                                   559 
560 void bch2_inode_unpacked_to_text(struct printb    560 void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
561 {                                                 561 {
562         prt_printf(out, "inum: %llu ", inode->    562         prt_printf(out, "inum: %llu ", inode->bi_inum);
563         __bch2_inode_unpacked_to_text(out, ino    563         __bch2_inode_unpacked_to_text(out, inode);
564 }                                                 564 }
565                                                   565 
566 void bch2_inode_to_text(struct printbuf *out,     566 void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
567 {                                                 567 {
568         struct bch_inode_unpacked inode;          568         struct bch_inode_unpacked inode;
569                                                   569 
570         if (bch2_inode_unpack(k, &inode)) {       570         if (bch2_inode_unpack(k, &inode)) {
571                 prt_printf(out, "(unpack error    571                 prt_printf(out, "(unpack error)");
572                 return;                           572                 return;
573         }                                         573         }
574                                                   574 
575         __bch2_inode_unpacked_to_text(out, &in    575         __bch2_inode_unpacked_to_text(out, &inode);
576 }                                                 576 }
577                                                   577 
578 static inline u64 bkey_inode_flags(struct bkey    578 static inline u64 bkey_inode_flags(struct bkey_s_c k)
579 {                                                 579 {
580         switch (k.k->type) {                      580         switch (k.k->type) {
581         case KEY_TYPE_inode:                      581         case KEY_TYPE_inode:
582                 return le32_to_cpu(bkey_s_c_to    582                 return le32_to_cpu(bkey_s_c_to_inode(k).v->bi_flags);
583         case KEY_TYPE_inode_v2:                   583         case KEY_TYPE_inode_v2:
584                 return le64_to_cpu(bkey_s_c_to    584                 return le64_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_flags);
585         case KEY_TYPE_inode_v3:                   585         case KEY_TYPE_inode_v3:
586                 return le64_to_cpu(bkey_s_c_to    586                 return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_flags);
587         default:                                  587         default:
588                 return 0;                         588                 return 0;
589         }                                         589         }
590 }                                                 590 }
591                                                   591 
592 static inline bool bkey_is_deleted_inode(struc    592 static inline bool bkey_is_deleted_inode(struct bkey_s_c k)
593 {                                                 593 {
594         return bkey_inode_flags(k) & BCH_INODE    594         return bkey_inode_flags(k) & BCH_INODE_unlinked;
595 }                                                 595 }
596                                                   596 
597 int bch2_trigger_inode(struct btree_trans *tra    597 int bch2_trigger_inode(struct btree_trans *trans,
598                        enum btree_id btree_id,    598                        enum btree_id btree_id, unsigned level,
599                        struct bkey_s_c old,       599                        struct bkey_s_c old,
600                        struct bkey_s new,         600                        struct bkey_s new,
601                        enum btree_iter_update_    601                        enum btree_iter_update_trigger_flags flags)
602 {                                                 602 {
603         if ((flags & BTREE_TRIGGER_atomic) &&     603         if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) {
604                 BUG_ON(!trans->journal_res.seq    604                 BUG_ON(!trans->journal_res.seq);
605                 bkey_s_to_inode_v3(new).v->bi_    605                 bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq);
606         }                                         606         }
607                                                   607 
608         s64 nr = bkey_is_inode(new.k) - bkey_i    608         s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k);
609         if ((flags & (BTREE_TRIGGER_transactio    609         if ((flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) && nr) {
610                 struct disk_accounting_pos acc    610                 struct disk_accounting_pos acc = { .type = BCH_DISK_ACCOUNTING_nr_inodes };
611                 int ret = bch2_disk_accounting    611                 int ret = bch2_disk_accounting_mod(trans, &acc, &nr, 1, flags & BTREE_TRIGGER_gc);
612                 if (ret)                          612                 if (ret)
613                         return ret;               613                         return ret;
614         }                                         614         }
615                                                   615 
616         int deleted_delta =     (int) bkey_is_    616         int deleted_delta =     (int) bkey_is_deleted_inode(new.s_c) -
617                                 (int) bkey_is_    617                                 (int) bkey_is_deleted_inode(old);
618         if ((flags & BTREE_TRIGGER_transaction    618         if ((flags & BTREE_TRIGGER_transactional) && deleted_delta) {
619                 int ret = bch2_btree_bit_mod_b    619                 int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes,
620                                                   620                                                       new.k->p, deleted_delta > 0);
621                 if (ret)                          621                 if (ret)
622                         return ret;               622                         return ret;
623         }                                         623         }
624                                                   624 
625         return 0;                                 625         return 0;
626 }                                                 626 }
627                                                   627 
628 int bch2_inode_generation_invalid(struct bch_f    628 int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k,
629                                   enum bch_val    629                                   enum bch_validate_flags flags,
630                                   struct print    630                                   struct printbuf *err)
631 {                                                 631 {
632         int ret = 0;                              632         int ret = 0;
633                                                   633 
634         bkey_fsck_err_on(k.k->p.inode, c, err,    634         bkey_fsck_err_on(k.k->p.inode, c, err,
635                          inode_pos_inode_nonze    635                          inode_pos_inode_nonzero,
636                          "nonzero k.p.inode");    636                          "nonzero k.p.inode");
637 fsck_err:                                         637 fsck_err:
638         return ret;                               638         return ret;
639 }                                                 639 }
640                                                   640 
641 void bch2_inode_generation_to_text(struct prin    641 void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
642                                    struct bkey    642                                    struct bkey_s_c k)
643 {                                                 643 {
644         struct bkey_s_c_inode_generation gen =    644         struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation(k);
645                                                   645 
646         prt_printf(out, "generation: %u", le32    646         prt_printf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation));
647 }                                                 647 }
648                                                   648 
649 void bch2_inode_init_early(struct bch_fs *c,      649 void bch2_inode_init_early(struct bch_fs *c,
650                            struct bch_inode_un    650                            struct bch_inode_unpacked *inode_u)
651 {                                                 651 {
652         enum bch_str_hash_type str_hash =         652         enum bch_str_hash_type str_hash =
653                 bch2_str_hash_opt_to_type(c, c    653                 bch2_str_hash_opt_to_type(c, c->opts.str_hash);
654                                                   654 
655         memset(inode_u, 0, sizeof(*inode_u));     655         memset(inode_u, 0, sizeof(*inode_u));
656                                                   656 
657         /* ick */                                 657         /* ick */
658         inode_u->bi_flags |= str_hash << INODE    658         inode_u->bi_flags |= str_hash << INODE_STR_HASH_OFFSET;
659         get_random_bytes(&inode_u->bi_hash_see    659         get_random_bytes(&inode_u->bi_hash_seed,
660                          sizeof(inode_u->bi_ha    660                          sizeof(inode_u->bi_hash_seed));
661 }                                                 661 }
662                                                   662 
663 void bch2_inode_init_late(struct bch_inode_unp    663 void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now,
664                           uid_t uid, gid_t gid    664                           uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
665                           struct bch_inode_unp    665                           struct bch_inode_unpacked *parent)
666 {                                                 666 {
667         inode_u->bi_mode        = mode;           667         inode_u->bi_mode        = mode;
668         inode_u->bi_uid         = uid;            668         inode_u->bi_uid         = uid;
669         inode_u->bi_gid         = gid;            669         inode_u->bi_gid         = gid;
670         inode_u->bi_dev         = rdev;           670         inode_u->bi_dev         = rdev;
671         inode_u->bi_atime       = now;            671         inode_u->bi_atime       = now;
672         inode_u->bi_mtime       = now;            672         inode_u->bi_mtime       = now;
673         inode_u->bi_ctime       = now;            673         inode_u->bi_ctime       = now;
674         inode_u->bi_otime       = now;            674         inode_u->bi_otime       = now;
675                                                   675 
676         if (parent && parent->bi_mode & S_ISGI    676         if (parent && parent->bi_mode & S_ISGID) {
677                 inode_u->bi_gid = parent->bi_g    677                 inode_u->bi_gid = parent->bi_gid;
678                 if (S_ISDIR(mode))                678                 if (S_ISDIR(mode))
679                         inode_u->bi_mode |= S_    679                         inode_u->bi_mode |= S_ISGID;
680         }                                         680         }
681                                                   681 
682         if (parent) {                             682         if (parent) {
683 #define x(_name, ...)   inode_u->bi_##_name =     683 #define x(_name, ...)   inode_u->bi_##_name = parent->bi_##_name;
684                 BCH_INODE_OPTS()                  684                 BCH_INODE_OPTS()
685 #undef x                                          685 #undef x
686         }                                         686         }
687 }                                                 687 }
688                                                   688 
689 void bch2_inode_init(struct bch_fs *c, struct     689 void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
690                      uid_t uid, gid_t gid, umo    690                      uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
691                      struct bch_inode_unpacked    691                      struct bch_inode_unpacked *parent)
692 {                                                 692 {
693         bch2_inode_init_early(c, inode_u);        693         bch2_inode_init_early(c, inode_u);
694         bch2_inode_init_late(inode_u, bch2_cur    694         bch2_inode_init_late(inode_u, bch2_current_time(c),
695                              uid, gid, mode, r    695                              uid, gid, mode, rdev, parent);
696 }                                                 696 }
697                                                   697 
698 static inline u32 bkey_generation(struct bkey_    698 static inline u32 bkey_generation(struct bkey_s_c k)
699 {                                                 699 {
700         switch (k.k->type) {                      700         switch (k.k->type) {
701         case KEY_TYPE_inode:                      701         case KEY_TYPE_inode:
702         case KEY_TYPE_inode_v2:                   702         case KEY_TYPE_inode_v2:
703                 BUG();                            703                 BUG();
704         case KEY_TYPE_inode_generation:           704         case KEY_TYPE_inode_generation:
705                 return le32_to_cpu(bkey_s_c_to    705                 return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
706         default:                                  706         default:
707                 return 0;                         707                 return 0;
708         }                                         708         }
709 }                                                 709 }
710                                                   710 
711 /*                                                711 /*
712  * This just finds an empty slot:                 712  * This just finds an empty slot:
713  */                                               713  */
714 int bch2_inode_create(struct btree_trans *tran    714 int bch2_inode_create(struct btree_trans *trans,
715                       struct btree_iter *iter,    715                       struct btree_iter *iter,
716                       struct bch_inode_unpacke    716                       struct bch_inode_unpacked *inode_u,
717                       u32 snapshot, u64 cpu)      717                       u32 snapshot, u64 cpu)
718 {                                                 718 {
719         struct bch_fs *c = trans->c;              719         struct bch_fs *c = trans->c;
720         struct bkey_s_c k;                        720         struct bkey_s_c k;
721         u64 min, max, start, pos, *hint;          721         u64 min, max, start, pos, *hint;
722         int ret = 0;                              722         int ret = 0;
723         unsigned bits = (c->opts.inodes_32bit     723         unsigned bits = (c->opts.inodes_32bit ? 31 : 63);
724                                                   724 
725         if (c->opts.shard_inode_numbers) {        725         if (c->opts.shard_inode_numbers) {
726                 bits -= c->inode_shard_bits;      726                 bits -= c->inode_shard_bits;
727                                                   727 
728                 min = (cpu << bits);              728                 min = (cpu << bits);
729                 max = (cpu << bits) | ~(ULLONG    729                 max = (cpu << bits) | ~(ULLONG_MAX << bits);
730                                                   730 
731                 min = max_t(u64, min, BLOCKDEV    731                 min = max_t(u64, min, BLOCKDEV_INODE_MAX);
732                 hint = c->unused_inode_hints +    732                 hint = c->unused_inode_hints + cpu;
733         } else {                                  733         } else {
734                 min = BLOCKDEV_INODE_MAX;         734                 min = BLOCKDEV_INODE_MAX;
735                 max = ~(ULLONG_MAX << bits);      735                 max = ~(ULLONG_MAX << bits);
736                 hint = c->unused_inode_hints;     736                 hint = c->unused_inode_hints;
737         }                                         737         }
738                                                   738 
739         start = READ_ONCE(*hint);                 739         start = READ_ONCE(*hint);
740                                                   740 
741         if (start >= max || start < min)          741         if (start >= max || start < min)
742                 start = min;                      742                 start = min;
743                                                   743 
744         pos = start;                              744         pos = start;
745         bch2_trans_iter_init(trans, iter, BTRE    745         bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos),
746                              BTREE_ITER_all_sn    746                              BTREE_ITER_all_snapshots|
747                              BTREE_ITER_intent    747                              BTREE_ITER_intent);
748 again:                                            748 again:
749         while ((k = bch2_btree_iter_peek(iter)    749         while ((k = bch2_btree_iter_peek(iter)).k &&
750                !(ret = bkey_err(k)) &&            750                !(ret = bkey_err(k)) &&
751                bkey_lt(k.k->p, POS(0, max))) {    751                bkey_lt(k.k->p, POS(0, max))) {
752                 if (pos < iter->pos.offset)       752                 if (pos < iter->pos.offset)
753                         goto found_slot;          753                         goto found_slot;
754                                                   754 
755                 /*                                755                 /*
756                  * We don't need to iterate ov    756                  * We don't need to iterate over keys in every snapshot once
757                  * we've found just one:          757                  * we've found just one:
758                  */                               758                  */
759                 pos = iter->pos.offset + 1;       759                 pos = iter->pos.offset + 1;
760                 bch2_btree_iter_set_pos(iter,     760                 bch2_btree_iter_set_pos(iter, POS(0, pos));
761         }                                         761         }
762                                                   762 
763         if (!ret && pos < max)                    763         if (!ret && pos < max)
764                 goto found_slot;                  764                 goto found_slot;
765                                                   765 
766         if (!ret && start == min)                 766         if (!ret && start == min)
767                 ret = -BCH_ERR_ENOSPC_inode_cr    767                 ret = -BCH_ERR_ENOSPC_inode_create;
768                                                   768 
769         if (ret) {                                769         if (ret) {
770                 bch2_trans_iter_exit(trans, it    770                 bch2_trans_iter_exit(trans, iter);
771                 return ret;                       771                 return ret;
772         }                                         772         }
773                                                   773 
774         /* Retry from start */                    774         /* Retry from start */
775         pos = start = min;                        775         pos = start = min;
776         bch2_btree_iter_set_pos(iter, POS(0, p    776         bch2_btree_iter_set_pos(iter, POS(0, pos));
777         goto again;                               777         goto again;
778 found_slot:                                       778 found_slot:
779         bch2_btree_iter_set_pos(iter, SPOS(0,     779         bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot));
780         k = bch2_btree_iter_peek_slot(iter);      780         k = bch2_btree_iter_peek_slot(iter);
781         ret = bkey_err(k);                        781         ret = bkey_err(k);
782         if (ret) {                                782         if (ret) {
783                 bch2_trans_iter_exit(trans, it    783                 bch2_trans_iter_exit(trans, iter);
784                 return ret;                       784                 return ret;
785         }                                         785         }
786                                                   786 
787         *hint                   = k.k->p.offse    787         *hint                   = k.k->p.offset;
788         inode_u->bi_inum        = k.k->p.offse    788         inode_u->bi_inum        = k.k->p.offset;
789         inode_u->bi_generation  = bkey_generat    789         inode_u->bi_generation  = bkey_generation(k);
790         return 0;                                 790         return 0;
791 }                                                 791 }
792                                                   792 
793 static int bch2_inode_delete_keys(struct btree    793 static int bch2_inode_delete_keys(struct btree_trans *trans,
794                                   subvol_inum     794                                   subvol_inum inum, enum btree_id id)
795 {                                                 795 {
796         struct btree_iter iter;                   796         struct btree_iter iter;
797         struct bkey_s_c k;                        797         struct bkey_s_c k;
798         struct bkey_i delete;                     798         struct bkey_i delete;
799         struct bpos end = POS(inum.inum, U64_M    799         struct bpos end = POS(inum.inum, U64_MAX);
800         u32 snapshot;                             800         u32 snapshot;
801         int ret = 0;                              801         int ret = 0;
802                                                   802 
803         /*                                        803         /*
804          * We're never going to be deleting pa    804          * We're never going to be deleting partial extents, no need to use an
805          * extent iterator:                       805          * extent iterator:
806          */                                       806          */
807         bch2_trans_iter_init(trans, &iter, id,    807         bch2_trans_iter_init(trans, &iter, id, POS(inum.inum, 0),
808                              BTREE_ITER_intent    808                              BTREE_ITER_intent);
809                                                   809 
810         while (1) {                               810         while (1) {
811                 bch2_trans_begin(trans);          811                 bch2_trans_begin(trans);
812                                                   812 
813                 ret = bch2_subvolume_get_snaps    813                 ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
814                 if (ret)                          814                 if (ret)
815                         goto err;                 815                         goto err;
816                                                   816 
817                 bch2_btree_iter_set_snapshot(&    817                 bch2_btree_iter_set_snapshot(&iter, snapshot);
818                                                   818 
819                 k = bch2_btree_iter_peek_upto(    819                 k = bch2_btree_iter_peek_upto(&iter, end);
820                 ret = bkey_err(k);                820                 ret = bkey_err(k);
821                 if (ret)                          821                 if (ret)
822                         goto err;                 822                         goto err;
823                                                   823 
824                 if (!k.k)                         824                 if (!k.k)
825                         break;                    825                         break;
826                                                   826 
827                 bkey_init(&delete.k);             827                 bkey_init(&delete.k);
828                 delete.k.p = iter.pos;            828                 delete.k.p = iter.pos;
829                                                   829 
830                 if (iter.flags & BTREE_ITER_is    830                 if (iter.flags & BTREE_ITER_is_extents)
831                         bch2_key_resize(&delet    831                         bch2_key_resize(&delete.k,
832                                         bpos_m    832                                         bpos_min(end, k.k->p).offset -
833                                         iter.p    833                                         iter.pos.offset);
834                                                   834 
835                 ret = bch2_trans_update(trans,    835                 ret = bch2_trans_update(trans, &iter, &delete, 0) ?:
836                       bch2_trans_commit(trans,    836                       bch2_trans_commit(trans, NULL, NULL,
837                                         BCH_TR    837                                         BCH_TRANS_COMMIT_no_enospc);
838 err:                                              838 err:
839                 if (ret && !bch2_err_matches(r    839                 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
840                         break;                    840                         break;
841         }                                         841         }
842                                                   842 
843         bch2_trans_iter_exit(trans, &iter);       843         bch2_trans_iter_exit(trans, &iter);
844         return ret;                               844         return ret;
845 }                                                 845 }
846                                                   846 
847 int bch2_inode_rm(struct bch_fs *c, subvol_inu    847 int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
848 {                                                 848 {
849         struct btree_trans *trans = bch2_trans    849         struct btree_trans *trans = bch2_trans_get(c);
850         struct btree_iter iter = { NULL };        850         struct btree_iter iter = { NULL };
851         struct bkey_i_inode_generation delete;    851         struct bkey_i_inode_generation delete;
852         struct bch_inode_unpacked inode_u;        852         struct bch_inode_unpacked inode_u;
853         struct bkey_s_c k;                        853         struct bkey_s_c k;
854         u32 snapshot;                             854         u32 snapshot;
855         int ret;                                  855         int ret;
856                                                   856 
857         /*                                        857         /*
858          * If this was a directory, there shou    858          * If this was a directory, there shouldn't be any real dirents left -
859          * but there could be whiteouts (from     859          * but there could be whiteouts (from hash collisions) that we should
860          * delete:                                860          * delete:
861          *                                        861          *
862          * XXX: the dirent could ideally would    862          * XXX: the dirent could ideally would delete whiteouts when they're no
863          * longer needed                          863          * longer needed
864          */                                       864          */
865         ret   = bch2_inode_delete_keys(trans,     865         ret   = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?:
866                 bch2_inode_delete_keys(trans,     866                 bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?:
867                 bch2_inode_delete_keys(trans,     867                 bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents);
868         if (ret)                                  868         if (ret)
869                 goto err;                         869                 goto err;
870 retry:                                            870 retry:
871         bch2_trans_begin(trans);                  871         bch2_trans_begin(trans);
872                                                   872 
873         ret = bch2_subvolume_get_snapshot(tran    873         ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
874         if (ret)                                  874         if (ret)
875                 goto err;                         875                 goto err;
876                                                   876 
877         k = bch2_bkey_get_iter(trans, &iter, B    877         k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
878                                SPOS(0, inum.in    878                                SPOS(0, inum.inum, snapshot),
879                                BTREE_ITER_inte    879                                BTREE_ITER_intent|BTREE_ITER_cached);
880         ret = bkey_err(k);                        880         ret = bkey_err(k);
881         if (ret)                                  881         if (ret)
882                 goto err;                         882                 goto err;
883                                                   883 
884         if (!bkey_is_inode(k.k)) {                884         if (!bkey_is_inode(k.k)) {
885                 bch2_fs_inconsistent(c,           885                 bch2_fs_inconsistent(c,
886                                      "inode %l    886                                      "inode %llu:%u not found when deleting",
887                                      inum.inum    887                                      inum.inum, snapshot);
888                 ret = -EIO;                       888                 ret = -EIO;
889                 goto err;                         889                 goto err;
890         }                                         890         }
891                                                   891 
892         bch2_inode_unpack(k, &inode_u);           892         bch2_inode_unpack(k, &inode_u);
893                                                   893 
894         bkey_inode_generation_init(&delete.k_i    894         bkey_inode_generation_init(&delete.k_i);
895         delete.k.p = iter.pos;                    895         delete.k.p = iter.pos;
896         delete.v.bi_generation = cpu_to_le32(i    896         delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
897                                                   897 
898         ret   = bch2_trans_update(trans, &iter    898         ret   = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
899                 bch2_trans_commit(trans, NULL,    899                 bch2_trans_commit(trans, NULL, NULL,
900                                 BCH_TRANS_COMM    900                                 BCH_TRANS_COMMIT_no_enospc);
901 err:                                              901 err:
902         bch2_trans_iter_exit(trans, &iter);       902         bch2_trans_iter_exit(trans, &iter);
903         if (bch2_err_matches(ret, BCH_ERR_tran    903         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
904                 goto retry;                       904                 goto retry;
905                                                   905 
906         bch2_trans_put(trans);                    906         bch2_trans_put(trans);
907         return ret;                               907         return ret;
908 }                                                 908 }
909                                                   909 
910 int bch2_inode_find_by_inum_nowarn_trans(struc    910 int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *trans,
911                                   subvol_inum     911                                   subvol_inum inum,
912                                   struct bch_i    912                                   struct bch_inode_unpacked *inode)
913 {                                                 913 {
914         struct btree_iter iter;                   914         struct btree_iter iter;
915         int ret;                                  915         int ret;
916                                                   916 
917         ret = bch2_inode_peek_nowarn(trans, &i    917         ret = bch2_inode_peek_nowarn(trans, &iter, inode, inum, 0);
918         if (!ret)                                 918         if (!ret)
919                 bch2_trans_iter_exit(trans, &i    919                 bch2_trans_iter_exit(trans, &iter);
920         return ret;                               920         return ret;
921 }                                                 921 }
922                                                   922 
923 int bch2_inode_find_by_inum_trans(struct btree    923 int bch2_inode_find_by_inum_trans(struct btree_trans *trans,
924                                   subvol_inum     924                                   subvol_inum inum,
925                                   struct bch_i    925                                   struct bch_inode_unpacked *inode)
926 {                                                 926 {
927         struct btree_iter iter;                   927         struct btree_iter iter;
928         int ret;                                  928         int ret;
929                                                   929 
930         ret = bch2_inode_peek(trans, &iter, in    930         ret = bch2_inode_peek(trans, &iter, inode, inum, 0);
931         if (!ret)                                 931         if (!ret)
932                 bch2_trans_iter_exit(trans, &i    932                 bch2_trans_iter_exit(trans, &iter);
933         return ret;                               933         return ret;
934 }                                                 934 }
935                                                   935 
936 int bch2_inode_find_by_inum(struct bch_fs *c,     936 int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum,
937                             struct bch_inode_u    937                             struct bch_inode_unpacked *inode)
938 {                                                 938 {
939         return bch2_trans_do(c, NULL, NULL, 0,    939         return bch2_trans_do(c, NULL, NULL, 0,
940                 bch2_inode_find_by_inum_trans(    940                 bch2_inode_find_by_inum_trans(trans, inum, inode));
941 }                                                 941 }
942                                                   942 
943 int bch2_inode_nlink_inc(struct bch_inode_unpa    943 int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
944 {                                                 944 {
945         if (bi->bi_flags & BCH_INODE_unlinked)    945         if (bi->bi_flags & BCH_INODE_unlinked)
946                 bi->bi_flags &= ~BCH_INODE_unl    946                 bi->bi_flags &= ~BCH_INODE_unlinked;
947         else {                                    947         else {
948                 if (bi->bi_nlink == U32_MAX)      948                 if (bi->bi_nlink == U32_MAX)
949                         return -EINVAL;           949                         return -EINVAL;
950                                                   950 
951                 bi->bi_nlink++;                   951                 bi->bi_nlink++;
952         }                                         952         }
953                                                   953 
954         return 0;                                 954         return 0;
955 }                                                 955 }
956                                                   956 
957 void bch2_inode_nlink_dec(struct btree_trans *    957 void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *bi)
958 {                                                 958 {
959         if (bi->bi_nlink && (bi->bi_flags & BC    959         if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_unlinked)) {
960                 bch2_trans_inconsistent(trans,    960                 bch2_trans_inconsistent(trans, "inode %llu unlinked but link count nonzero",
961                                         bi->bi    961                                         bi->bi_inum);
962                 return;                           962                 return;
963         }                                         963         }
964                                                   964 
965         if (bi->bi_flags & BCH_INODE_unlinked)    965         if (bi->bi_flags & BCH_INODE_unlinked) {
966                 bch2_trans_inconsistent(trans,    966                 bch2_trans_inconsistent(trans, "inode %llu link count underflow", bi->bi_inum);
967                 return;                           967                 return;
968         }                                         968         }
969                                                   969 
970         if (bi->bi_nlink)                         970         if (bi->bi_nlink)
971                 bi->bi_nlink--;                   971                 bi->bi_nlink--;
972         else                                      972         else
973                 bi->bi_flags |= BCH_INODE_unli    973                 bi->bi_flags |= BCH_INODE_unlinked;
974 }                                                 974 }
975                                                   975 
976 struct bch_opts bch2_inode_opts_to_opts(struct    976 struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode)
977 {                                                 977 {
978         struct bch_opts ret = { 0 };              978         struct bch_opts ret = { 0 };
979 #define x(_name, _bits)                           979 #define x(_name, _bits)                                                 \
980         if (inode->bi_##_name)                    980         if (inode->bi_##_name)                                          \
981                 opt_set(ret, _name, inode->bi_    981                 opt_set(ret, _name, inode->bi_##_name - 1);
982         BCH_INODE_OPTS()                          982         BCH_INODE_OPTS()
983 #undef x                                          983 #undef x
984         return ret;                               984         return ret;
985 }                                                 985 }
986                                                   986 
987 void bch2_inode_opts_get(struct bch_io_opts *o    987 void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c,
988                          struct bch_inode_unpa    988                          struct bch_inode_unpacked *inode)
989 {                                                 989 {
990 #define x(_name, _bits)         opts->_name =     990 #define x(_name, _bits)         opts->_name = inode_opt_get(c, inode, _name);
991         BCH_INODE_OPTS()                          991         BCH_INODE_OPTS()
992 #undef x                                          992 #undef x
993                                                   993 
994         if (opts->nocow)                          994         if (opts->nocow)
995                 opts->compression = opts->back    995                 opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0;
996 }                                                 996 }
997                                                   997 
998 int bch2_inum_opts_get(struct btree_trans *tra    998 int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts)
999 {                                                 999 {
1000         struct bch_inode_unpacked inode;         1000         struct bch_inode_unpacked inode;
1001         int ret = lockrestart_do(trans, bch2_    1001         int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode));
1002                                                  1002 
1003         if (ret)                                 1003         if (ret)
1004                 return ret;                      1004                 return ret;
1005                                                  1005 
1006         bch2_inode_opts_get(opts, trans->c, &    1006         bch2_inode_opts_get(opts, trans->c, &inode);
1007         return 0;                                1007         return 0;
1008 }                                                1008 }
1009                                                  1009 
1010 int bch2_inode_rm_snapshot(struct btree_trans    1010 int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
1011 {                                                1011 {
1012         struct bch_fs *c = trans->c;             1012         struct bch_fs *c = trans->c;
1013         struct btree_iter iter = { NULL };       1013         struct btree_iter iter = { NULL };
1014         struct bkey_i_inode_generation delete    1014         struct bkey_i_inode_generation delete;
1015         struct bch_inode_unpacked inode_u;       1015         struct bch_inode_unpacked inode_u;
1016         struct bkey_s_c k;                       1016         struct bkey_s_c k;
1017         int ret;                                 1017         int ret;
1018                                                  1018 
1019         do {                                     1019         do {
1020                 ret   = bch2_btree_delete_ran    1020                 ret   = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
1021                                                  1021                                                       SPOS(inum, 0, snapshot),
1022                                                  1022                                                       SPOS(inum, U64_MAX, snapshot),
1023                                                  1023                                                       0, NULL) ?:
1024                         bch2_btree_delete_ran    1024                         bch2_btree_delete_range_trans(trans, BTREE_ID_dirents,
1025                                                  1025                                                       SPOS(inum, 0, snapshot),
1026                                                  1026                                                       SPOS(inum, U64_MAX, snapshot),
1027                                                  1027                                                       0, NULL) ?:
1028                         bch2_btree_delete_ran    1028                         bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs,
1029                                                  1029                                                       SPOS(inum, 0, snapshot),
1030                                                  1030                                                       SPOS(inum, U64_MAX, snapshot),
1031                                                  1031                                                       0, NULL);
1032         } while (ret == -BCH_ERR_transaction_    1032         } while (ret == -BCH_ERR_transaction_restart_nested);
1033         if (ret)                                 1033         if (ret)
1034                 goto err;                        1034                 goto err;
1035 retry:                                           1035 retry:
1036         bch2_trans_begin(trans);                 1036         bch2_trans_begin(trans);
1037                                                  1037 
1038         k = bch2_bkey_get_iter(trans, &iter,     1038         k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
1039                                SPOS(0, inum,     1039                                SPOS(0, inum, snapshot), BTREE_ITER_intent);
1040         ret = bkey_err(k);                       1040         ret = bkey_err(k);
1041         if (ret)                                 1041         if (ret)
1042                 goto err;                        1042                 goto err;
1043                                                  1043 
1044         if (!bkey_is_inode(k.k)) {               1044         if (!bkey_is_inode(k.k)) {
1045                 bch2_fs_inconsistent(c,          1045                 bch2_fs_inconsistent(c,
1046                                      "inode %    1046                                      "inode %llu:%u not found when deleting",
1047                                      inum, sn    1047                                      inum, snapshot);
1048                 ret = -EIO;                      1048                 ret = -EIO;
1049                 goto err;                        1049                 goto err;
1050         }                                        1050         }
1051                                                  1051 
1052         bch2_inode_unpack(k, &inode_u);          1052         bch2_inode_unpack(k, &inode_u);
1053                                                  1053 
1054         /* Subvolume root? */                    1054         /* Subvolume root? */
1055         if (inode_u.bi_subvol)                   1055         if (inode_u.bi_subvol)
1056                 bch_warn(c, "deleting inode %    1056                 bch_warn(c, "deleting inode %llu marked as unlinked, but also a subvolume root!?", inode_u.bi_inum);
1057                                                  1057 
1058         bkey_inode_generation_init(&delete.k_    1058         bkey_inode_generation_init(&delete.k_i);
1059         delete.k.p = iter.pos;                   1059         delete.k.p = iter.pos;
1060         delete.v.bi_generation = cpu_to_le32(    1060         delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
1061                                                  1061 
1062         ret   = bch2_trans_update(trans, &ite    1062         ret   = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
1063                 bch2_trans_commit(trans, NULL    1063                 bch2_trans_commit(trans, NULL, NULL,
1064                                 BCH_TRANS_COM    1064                                 BCH_TRANS_COMMIT_no_enospc);
1065 err:                                             1065 err:
1066         bch2_trans_iter_exit(trans, &iter);      1066         bch2_trans_iter_exit(trans, &iter);
1067         if (bch2_err_matches(ret, BCH_ERR_tra    1067         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1068                 goto retry;                      1068                 goto retry;
1069                                                  1069 
1070         return ret ?: -BCH_ERR_transaction_re    1070         return ret ?: -BCH_ERR_transaction_restart_nested;
1071 }                                                1071 }
1072                                                  1072 
1073 static int may_delete_deleted_inode(struct bt    1073 static int may_delete_deleted_inode(struct btree_trans *trans,
1074                                     struct bt    1074                                     struct btree_iter *iter,
1075                                     struct bp    1075                                     struct bpos pos,
1076                                     bool *nee    1076                                     bool *need_another_pass)
1077 {                                                1077 {
1078         struct bch_fs *c = trans->c;             1078         struct bch_fs *c = trans->c;
1079         struct btree_iter inode_iter;            1079         struct btree_iter inode_iter;
1080         struct bkey_s_c k;                       1080         struct bkey_s_c k;
1081         struct bch_inode_unpacked inode;         1081         struct bch_inode_unpacked inode;
1082         int ret;                                 1082         int ret;
1083                                                  1083 
1084         k = bch2_bkey_get_iter(trans, &inode_    1084         k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, pos, BTREE_ITER_cached);
1085         ret = bkey_err(k);                       1085         ret = bkey_err(k);
1086         if (ret)                                 1086         if (ret)
1087                 return ret;                      1087                 return ret;
1088                                                  1088 
1089         ret = bkey_is_inode(k.k) ? 0 : -BCH_E    1089         ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode;
1090         if (fsck_err_on(!bkey_is_inode(k.k),     1090         if (fsck_err_on(!bkey_is_inode(k.k),
1091                         trans, deleted_inode_    1091                         trans, deleted_inode_missing,
1092                         "nonexistent inode %l    1092                         "nonexistent inode %llu:%u in deleted_inodes btree",
1093                         pos.offset, pos.snaps    1093                         pos.offset, pos.snapshot))
1094                 goto delete;                     1094                 goto delete;
1095                                                  1095 
1096         ret = bch2_inode_unpack(k, &inode);      1096         ret = bch2_inode_unpack(k, &inode);
1097         if (ret)                                 1097         if (ret)
1098                 goto out;                        1098                 goto out;
1099                                                  1099 
1100         if (S_ISDIR(inode.bi_mode)) {            1100         if (S_ISDIR(inode.bi_mode)) {
1101                 ret = bch2_empty_dir_snapshot    1101                 ret = bch2_empty_dir_snapshot(trans, pos.offset, 0, pos.snapshot);
1102                 if (fsck_err_on(bch2_err_matc    1102                 if (fsck_err_on(bch2_err_matches(ret, ENOTEMPTY),
1103                                 trans, delete    1103                                 trans, deleted_inode_is_dir,
1104                                 "non empty di    1104                                 "non empty directory %llu:%u in deleted_inodes btree",
1105                                 pos.offset, p    1105                                 pos.offset, pos.snapshot))
1106                         goto delete;             1106                         goto delete;
1107                 if (ret)                         1107                 if (ret)
1108                         goto out;                1108                         goto out;
1109         }                                        1109         }
1110                                                  1110 
1111         if (fsck_err_on(!(inode.bi_flags & BC    1111         if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked),
1112                         trans, deleted_inode_    1112                         trans, deleted_inode_not_unlinked,
1113                         "non-deleted inode %l    1113                         "non-deleted inode %llu:%u in deleted_inodes btree",
1114                         pos.offset, pos.snaps    1114                         pos.offset, pos.snapshot))
1115                 goto delete;                     1115                 goto delete;
1116                                                  1116 
1117         if (c->sb.clean &&                       1117         if (c->sb.clean &&
1118             !fsck_err(trans, deleted_inode_bu    1118             !fsck_err(trans, deleted_inode_but_clean,
1119                       "filesystem marked as c    1119                       "filesystem marked as clean but have deleted inode %llu:%u",
1120                       pos.offset, pos.snapsho    1120                       pos.offset, pos.snapshot)) {
1121                 ret = 0;                         1121                 ret = 0;
1122                 goto out;                        1122                 goto out;
1123         }                                        1123         }
1124                                                  1124 
1125         if (bch2_snapshot_is_internal_node(c,    1125         if (bch2_snapshot_is_internal_node(c, pos.snapshot)) {
1126                 struct bpos new_min_pos;         1126                 struct bpos new_min_pos;
1127                                                  1127 
1128                 ret = bch2_propagate_key_to_s    1128                 ret = bch2_propagate_key_to_snapshot_leaves(trans, inode_iter.btree_id, k, &new_min_pos);
1129                 if (ret)                         1129                 if (ret)
1130                         goto out;                1130                         goto out;
1131                                                  1131 
1132                 inode.bi_flags &= ~BCH_INODE_    1132                 inode.bi_flags &= ~BCH_INODE_unlinked;
1133                                                  1133 
1134                 ret = bch2_inode_write_flags(    1134                 ret = bch2_inode_write_flags(trans, &inode_iter, &inode,
1135                                                  1135                                              BTREE_UPDATE_internal_snapshot_node);
1136                 bch_err_msg(c, ret, "clearing    1136                 bch_err_msg(c, ret, "clearing inode unlinked flag");
1137                 if (ret)                         1137                 if (ret)
1138                         goto out;                1138                         goto out;
1139                                                  1139 
1140                 /*                               1140                 /*
1141                  * We'll need another write b    1141                  * We'll need another write buffer flush to pick up the new
1142                  * unlinked inodes in the sna    1142                  * unlinked inodes in the snapshot leaves:
1143                  */                              1143                  */
1144                 *need_another_pass = true;       1144                 *need_another_pass = true;
1145                 goto out;                        1145                 goto out;
1146         }                                        1146         }
1147                                                  1147 
1148         ret = 1;                                 1148         ret = 1;
1149 out:                                             1149 out:
1150 fsck_err:                                        1150 fsck_err:
1151         bch2_trans_iter_exit(trans, &inode_it    1151         bch2_trans_iter_exit(trans, &inode_iter);
1152         return ret;                              1152         return ret;
1153 delete:                                          1153 delete:
1154         ret = bch2_btree_bit_mod_buffered(tra    1154         ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false);
1155         goto out;                                1155         goto out;
1156 }                                                1156 }
1157                                                  1157 
1158 int bch2_delete_dead_inodes(struct bch_fs *c)    1158 int bch2_delete_dead_inodes(struct bch_fs *c)
1159 {                                                1159 {
1160         struct btree_trans *trans = bch2_tran    1160         struct btree_trans *trans = bch2_trans_get(c);
1161         bool need_another_pass;                  1161         bool need_another_pass;
1162         int ret;                                 1162         int ret;
1163 again:                                           1163 again:
1164         /*                                       1164         /*
1165          * if we ran check_inodes() unlinked     1165          * if we ran check_inodes() unlinked inodes will have already been
1166          * cleaned up but the write buffer wi    1166          * cleaned up but the write buffer will be out of sync; therefore we
1167          * alway need a write buffer flush       1167          * alway need a write buffer flush
1168          */                                      1168          */
1169         ret = bch2_btree_write_buffer_flush_s    1169         ret = bch2_btree_write_buffer_flush_sync(trans);
1170         if (ret)                                 1170         if (ret)
1171                 goto err;                        1171                 goto err;
1172                                                  1172 
1173         need_another_pass = false;               1173         need_another_pass = false;
1174                                                  1174 
1175         /*                                       1175         /*
1176          * Weird transaction restart handling    1176          * Weird transaction restart handling here because on successful delete,
1177          * bch2_inode_rm_snapshot() will retu    1177          * bch2_inode_rm_snapshot() will return a nested transaction restart,
1178          * but we can't retry because the btr    1178          * but we can't retry because the btree write buffer won't have been
1179          * flushed and we'd spin:                1179          * flushed and we'd spin:
1180          */                                      1180          */
1181         ret = for_each_btree_key_commit(trans    1181         ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
1182                                         BTREE    1182                                         BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
1183                                         NULL,    1183                                         NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
1184                 ret = may_delete_deleted_inod    1184                 ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass);
1185                 if (ret > 0) {                   1185                 if (ret > 0) {
1186                         bch_verbose(c, "delet    1186                         bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot);
1187                                                  1187 
1188                         ret = bch2_inode_rm_s    1188                         ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot);
1189                         /*                       1189                         /*
1190                          * We don't want to l    1190                          * We don't want to loop here: a transaction restart
1191                          * error here means w    1191                          * error here means we handled a transaction restart and
1192                          * we're actually don    1192                          * we're actually done, but if we loop we'll retry the
1193                          * same key because t    1193                          * same key because the write buffer hasn't been flushed
1194                          * yet                   1194                          * yet
1195                          */                      1195                          */
1196                         if (bch2_err_matches(    1196                         if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
1197                                 ret = 0;         1197                                 ret = 0;
1198                                 continue;        1198                                 continue;
1199                         }                        1199                         }
1200                 }                                1200                 }
1201                                                  1201 
1202                 ret;                             1202                 ret;
1203         }));                                     1203         }));
1204                                                  1204 
1205         if (!ret && need_another_pass)           1205         if (!ret && need_another_pass)
1206                 goto again;                      1206                 goto again;
1207 err:                                             1207 err:
1208         bch2_trans_put(trans);                   1208         bch2_trans_put(trans);
1209         return ret;                              1209         return ret;
1210 }                                                1210 }
1211                                                  1211 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php