~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/jffs2/gc.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * JFFS2 -- Journalling Flash File System, Version 2.
  3  *
  4  * Copyright © 2001-2007 Red Hat, Inc.
  5  * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  6  *
  7  * Created by David Woodhouse <dwmw2@infradead.org>
  8  *
  9  * For licensing information, see the file 'LICENCE' in this directory.
 10  *
 11  */
 12 
 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 14 
 15 #include <linux/kernel.h>
 16 #include <linux/mtd/mtd.h>
 17 #include <linux/slab.h>
 18 #include <linux/pagemap.h>
 19 #include <linux/crc32.h>
 20 #include <linux/compiler.h>
 21 #include <linux/stat.h>
 22 #include "nodelist.h"
 23 #include "compr.h"
 24 
 25 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
 26                                           struct jffs2_inode_cache *ic,
 27                                           struct jffs2_raw_node_ref *raw);
 28 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 29                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
 30 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 31                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
 32 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 33                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
 34 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 35                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
 36                                       uint32_t start, uint32_t end);
 37 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 38                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
 39                                        uint32_t start, uint32_t end);
 40 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
 41                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
 42 
 43 /* Called with erase_completion_lock held */
 44 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
 45 {
 46         struct jffs2_eraseblock *ret;
 47         struct list_head *nextlist = NULL;
 48         int n = jiffies % 128;
 49 
 50         /* Pick an eraseblock to garbage collect next. This is where we'll
 51            put the clever wear-levelling algorithms. Eventually.  */
 52         /* We possibly want to favour the dirtier blocks more when the
 53            number of free blocks is low. */
 54 again:
 55         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
 56                 jffs2_dbg(1, "Picking block from bad_used_list to GC next\n");
 57                 nextlist = &c->bad_used_list;
 58         } else if (n < 50 && !list_empty(&c->erasable_list)) {
 59                 /* Note that most of them will have gone directly to be erased.
 60                    So don't favour the erasable_list _too_ much. */
 61                 jffs2_dbg(1, "Picking block from erasable_list to GC next\n");
 62                 nextlist = &c->erasable_list;
 63         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
 64                 /* Most of the time, pick one off the very_dirty list */
 65                 jffs2_dbg(1, "Picking block from very_dirty_list to GC next\n");
 66                 nextlist = &c->very_dirty_list;
 67         } else if (n < 126 && !list_empty(&c->dirty_list)) {
 68                 jffs2_dbg(1, "Picking block from dirty_list to GC next\n");
 69                 nextlist = &c->dirty_list;
 70         } else if (!list_empty(&c->clean_list)) {
 71                 jffs2_dbg(1, "Picking block from clean_list to GC next\n");
 72                 nextlist = &c->clean_list;
 73         } else if (!list_empty(&c->dirty_list)) {
 74                 jffs2_dbg(1, "Picking block from dirty_list to GC next (clean_list was empty)\n");
 75 
 76                 nextlist = &c->dirty_list;
 77         } else if (!list_empty(&c->very_dirty_list)) {
 78                 jffs2_dbg(1, "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n");
 79                 nextlist = &c->very_dirty_list;
 80         } else if (!list_empty(&c->erasable_list)) {
 81                 jffs2_dbg(1, "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n");
 82 
 83                 nextlist = &c->erasable_list;
 84         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
 85                 /* There are blocks are wating for the wbuf sync */
 86                 jffs2_dbg(1, "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n");
 87                 spin_unlock(&c->erase_completion_lock);
 88                 jffs2_flush_wbuf_pad(c);
 89                 spin_lock(&c->erase_completion_lock);
 90                 goto again;
 91         } else {
 92                 /* Eep. All were empty */
 93                 jffs2_dbg(1, "No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n");
 94                 return NULL;
 95         }
 96 
 97         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
 98         list_del(&ret->list);
 99         c->gcblock = ret;
100         ret->gc_node = ret->first_node;
101         if (!ret->gc_node) {
102                 pr_warn("Eep. ret->gc_node for block at 0x%08x is NULL\n",
103                         ret->offset);
104                 BUG();
105         }
106 
107         /* Have we accidentally picked a clean block with wasted space ? */
108         if (ret->wasted_size) {
109                 jffs2_dbg(1, "Converting wasted_size %08x to dirty_size\n",
110                           ret->wasted_size);
111                 ret->dirty_size += ret->wasted_size;
112                 c->wasted_size -= ret->wasted_size;
113                 c->dirty_size += ret->wasted_size;
114                 ret->wasted_size = 0;
115         }
116 
117         return ret;
118 }
119 
120 /* jffs2_garbage_collect_pass
121  * Make a single attempt to progress GC. Move one node, and possibly
122  * start erasing one eraseblock.
123  */
124 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
125 {
126         struct jffs2_inode_info *f;
127         struct jffs2_inode_cache *ic;
128         struct jffs2_eraseblock *jeb;
129         struct jffs2_raw_node_ref *raw;
130         uint32_t gcblock_dirty;
131         int ret = 0, inum, nlink;
132         int xattr = 0;
133 
134         if (mutex_lock_interruptible(&c->alloc_sem))
135                 return -EINTR;
136 
137 
138         for (;;) {
139                 /* We can't start doing GC until we've finished checking
140                    the node CRCs etc. */
141                 int bucket, want_ino;
142 
143                 spin_lock(&c->erase_completion_lock);
144                 if (!c->unchecked_size)
145                         break;
146                 spin_unlock(&c->erase_completion_lock);
147 
148                 if (!xattr)
149                         xattr = jffs2_verify_xattr(c);
150 
151                 spin_lock(&c->inocache_lock);
152                 /* Instead of doing the inodes in numeric order, doing a lookup
153                  * in the hash for each possible number, just walk the hash
154                  * buckets of *existing* inodes. This means that we process
155                  * them out-of-order, but it can be a lot faster if there's
156                  * a sparse inode# space. Which there often is. */
157                 want_ino = c->check_ino;
158                 for (bucket = c->check_ino % c->inocache_hashsize ; bucket < c->inocache_hashsize; bucket++) {
159                         for (ic = c->inocache_list[bucket]; ic; ic = ic->next) {
160                                 if (ic->ino < want_ino)
161                                         continue;
162 
163                                 if (ic->state != INO_STATE_CHECKEDABSENT &&
164                                     ic->state != INO_STATE_PRESENT)
165                                         goto got_next; /* with inocache_lock held */
166 
167                                 jffs2_dbg(1, "Skipping ino #%u already checked\n",
168                                           ic->ino);
169                         }
170                         want_ino = 0;
171                 }
172 
173                 /* Point c->check_ino past the end of the last bucket. */
174                 c->check_ino = ((c->highest_ino + c->inocache_hashsize + 1) &
175                                 ~c->inocache_hashsize) - 1;
176 
177                 spin_unlock(&c->inocache_lock);
178 
179                 pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
180                         c->unchecked_size);
181                 jffs2_dbg_dump_block_lists_nolock(c);
182                 mutex_unlock(&c->alloc_sem);
183                 return -ENOSPC;
184 
185         got_next:
186                 /* For next time round the loop, we want c->checked_ino to indicate
187                  * the *next* one we want to check. And since we're walking the
188                  * buckets rather than doing it sequentially, it's: */
189                 c->check_ino = ic->ino + c->inocache_hashsize;
190 
191                 if (!ic->pino_nlink) {
192                         jffs2_dbg(1, "Skipping check of ino #%d with nlink/pino zero\n",
193                                   ic->ino);
194                         spin_unlock(&c->inocache_lock);
195                         jffs2_xattr_delete_inode(c, ic);
196                         continue;
197                 }
198                 switch(ic->state) {
199                 case INO_STATE_CHECKEDABSENT:
200                 case INO_STATE_PRESENT:
201                         spin_unlock(&c->inocache_lock);
202                         continue;
203 
204                 case INO_STATE_GC:
205                 case INO_STATE_CHECKING:
206                         pr_warn("Inode #%u is in state %d during CRC check phase!\n",
207                                 ic->ino, ic->state);
208                         spin_unlock(&c->inocache_lock);
209                         BUG();
210 
211                 case INO_STATE_READING:
212                         /* We need to wait for it to finish, lest we move on
213                            and trigger the BUG() above while we haven't yet
214                            finished checking all its nodes */
215                         jffs2_dbg(1, "Waiting for ino #%u to finish reading\n",
216                                   ic->ino);
217                         /* We need to come back again for the _same_ inode. We've
218                          made no progress in this case, but that should be OK */
219                         c->check_ino = ic->ino;
220 
221                         mutex_unlock(&c->alloc_sem);
222                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
223                         return 0;
224 
225                 default:
226                         BUG();
227 
228                 case INO_STATE_UNCHECKED:
229                         ;
230                 }
231                 ic->state = INO_STATE_CHECKING;
232                 spin_unlock(&c->inocache_lock);
233 
234                 jffs2_dbg(1, "%s(): triggering inode scan of ino#%u\n",
235                           __func__, ic->ino);
236 
237                 ret = jffs2_do_crccheck_inode(c, ic);
238                 if (ret)
239                         pr_warn("Returned error for crccheck of ino #%u. Expect badness...\n",
240                                 ic->ino);
241 
242                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
243                 mutex_unlock(&c->alloc_sem);
244                 return ret;
245         }
246 
247         /* If there are any blocks which need erasing, erase them now */
248         if (!list_empty(&c->erase_complete_list) ||
249             !list_empty(&c->erase_pending_list)) {
250                 spin_unlock(&c->erase_completion_lock);
251                 mutex_unlock(&c->alloc_sem);
252                 jffs2_dbg(1, "%s(): erasing pending blocks\n", __func__);
253                 if (jffs2_erase_pending_blocks(c, 1))
254                         return 0;
255 
256                 jffs2_dbg(1, "No progress from erasing block; doing GC anyway\n");
257                 mutex_lock(&c->alloc_sem);
258                 spin_lock(&c->erase_completion_lock);
259         }
260 
261         /* First, work out which block we're garbage-collecting */
262         jeb = c->gcblock;
263 
264         if (!jeb)
265                 jeb = jffs2_find_gc_block(c);
266 
267         if (!jeb) {
268                 /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
269                 if (c->nr_erasing_blocks) {
270                         spin_unlock(&c->erase_completion_lock);
271                         mutex_unlock(&c->alloc_sem);
272                         return -EAGAIN;
273                 }
274                 jffs2_dbg(1, "Couldn't find erase block to garbage collect!\n");
275                 spin_unlock(&c->erase_completion_lock);
276                 mutex_unlock(&c->alloc_sem);
277                 return -EIO;
278         }
279 
280         jffs2_dbg(1, "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n",
281                   jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size);
282         D1(if (c->nextblock)
283            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
284 
285         if (!jeb->used_size) {
286                 mutex_unlock(&c->alloc_sem);
287                 goto eraseit;
288         }
289 
290         raw = jeb->gc_node;
291         gcblock_dirty = jeb->dirty_size;
292 
293         while(ref_obsolete(raw)) {
294                 jffs2_dbg(1, "Node at 0x%08x is obsolete... skipping\n",
295                           ref_offset(raw));
296                 raw = ref_next(raw);
297                 if (unlikely(!raw)) {
298                         pr_warn("eep. End of raw list while still supposedly nodes to GC\n");
299                         pr_warn("erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
300                                 jeb->offset, jeb->free_size,
301                                 jeb->dirty_size, jeb->used_size);
302                         jeb->gc_node = raw;
303                         spin_unlock(&c->erase_completion_lock);
304                         mutex_unlock(&c->alloc_sem);
305                         BUG();
306                 }
307         }
308         jeb->gc_node = raw;
309 
310         jffs2_dbg(1, "Going to garbage collect node at 0x%08x\n",
311                   ref_offset(raw));
312 
313         if (!raw->next_in_ino) {
314                 /* Inode-less node. Clean marker, snapshot or something like that */
315                 spin_unlock(&c->erase_completion_lock);
316                 if (ref_flags(raw) == REF_PRISTINE) {
317                         /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
318                         jffs2_garbage_collect_pristine(c, NULL, raw);
319                 } else {
320                         /* Just mark it obsolete */
321                         jffs2_mark_node_obsolete(c, raw);
322                 }
323                 mutex_unlock(&c->alloc_sem);
324                 goto eraseit_lock;
325         }
326 
327         ic = jffs2_raw_ref_to_ic(raw);
328 
329 #ifdef CONFIG_JFFS2_FS_XATTR
330         /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
331          * We can decide whether this node is inode or xattr by ic->class.     */
332         if (ic->class == RAWNODE_CLASS_XATTR_DATUM
333             || ic->class == RAWNODE_CLASS_XATTR_REF) {
334                 spin_unlock(&c->erase_completion_lock);
335 
336                 if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
337                         ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
338                 } else {
339                         ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
340                 }
341                 goto test_gcnode;
342         }
343 #endif
344 
345         /* We need to hold the inocache. Either the erase_completion_lock or
346            the inocache_lock are sufficient; we trade down since the inocache_lock
347            causes less contention. */
348         spin_lock(&c->inocache_lock);
349 
350         spin_unlock(&c->erase_completion_lock);
351 
352         jffs2_dbg(1, "%s(): collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n",
353                   __func__, jeb->offset, ref_offset(raw), ref_flags(raw),
354                   ic->ino);
355 
356         /* Three possibilities:
357            1. Inode is already in-core. We must iget it and do proper
358               updating to its fragtree, etc.
359            2. Inode is not in-core, node is REF_PRISTINE. We lock the
360               inocache to prevent a read_inode(), copy the node intact.
361            3. Inode is not in-core, node is not pristine. We must iget()
362               and take the slow path.
363         */
364 
365         switch(ic->state) {
366         case INO_STATE_CHECKEDABSENT:
367                 /* It's been checked, but it's not currently in-core.
368                    We can just copy any pristine nodes, but have
369                    to prevent anyone else from doing read_inode() while
370                    we're at it, so we set the state accordingly */
371                 if (ref_flags(raw) == REF_PRISTINE)
372                         ic->state = INO_STATE_GC;
373                 else {
374                         jffs2_dbg(1, "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
375                                   ic->ino);
376                 }
377                 break;
378 
379         case INO_STATE_PRESENT:
380                 /* It's in-core. GC must iget() it. */
381                 break;
382 
383         case INO_STATE_UNCHECKED:
384         case INO_STATE_CHECKING:
385         case INO_STATE_GC:
386                 /* Should never happen. We should have finished checking
387                    by the time we actually start doing any GC, and since
388                    we're holding the alloc_sem, no other garbage collection
389                    can happen.
390                 */
391                 pr_crit("Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
392                         ic->ino, ic->state);
393                 mutex_unlock(&c->alloc_sem);
394                 spin_unlock(&c->inocache_lock);
395                 BUG();
396 
397         case INO_STATE_READING:
398                 /* Someone's currently trying to read it. We must wait for
399                    them to finish and then go through the full iget() route
400                    to do the GC. However, sometimes read_inode() needs to get
401                    the alloc_sem() (for marking nodes invalid) so we must
402                    drop the alloc_sem before sleeping. */
403 
404                 mutex_unlock(&c->alloc_sem);
405                 jffs2_dbg(1, "%s(): waiting for ino #%u in state %d\n",
406                           __func__, ic->ino, ic->state);
407                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
408                 /* And because we dropped the alloc_sem we must start again from the
409                    beginning. Ponder chance of livelock here -- we're returning success
410                    without actually making any progress.
411 
412                    Q: What are the chances that the inode is back in INO_STATE_READING
413                    again by the time we next enter this function? And that this happens
414                    enough times to cause a real delay?
415 
416                    A: Small enough that I don't care :)
417                 */
418                 return 0;
419         }
420 
421         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
422            node intact, and we don't have to muck about with the fragtree etc.
423            because we know it's not in-core. If it _was_ in-core, we go through
424            all the iget() crap anyway */
425 
426         if (ic->state == INO_STATE_GC) {
427                 spin_unlock(&c->inocache_lock);
428 
429                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
430 
431                 spin_lock(&c->inocache_lock);
432                 ic->state = INO_STATE_CHECKEDABSENT;
433                 wake_up(&c->inocache_wq);
434 
435                 if (ret != -EBADFD) {
436                         spin_unlock(&c->inocache_lock);
437                         goto test_gcnode;
438                 }
439 
440                 /* Fall through if it wanted us to, with inocache_lock held */
441         }
442 
443         /* Prevent the fairly unlikely race where the gcblock is
444            entirely obsoleted by the final close of a file which had
445            the only valid nodes in the block, followed by erasure,
446            followed by freeing of the ic because the erased block(s)
447            held _all_ the nodes of that inode.... never been seen but
448            it's vaguely possible. */
449 
450         inum = ic->ino;
451         nlink = ic->pino_nlink;
452         spin_unlock(&c->inocache_lock);
453 
454         f = jffs2_gc_fetch_inode(c, inum, !nlink);
455         if (IS_ERR(f)) {
456                 ret = PTR_ERR(f);
457                 goto release_sem;
458         }
459         if (!f) {
460                 ret = 0;
461                 goto release_sem;
462         }
463 
464         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
465 
466         jffs2_gc_release_inode(c, f);
467 
468  test_gcnode:
469         if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
470                 /* Eep. This really should never happen. GC is broken */
471                 pr_err("Error garbage collecting node at %08x!\n",
472                        ref_offset(jeb->gc_node));
473                 ret = -ENOSPC;
474         }
475  release_sem:
476         mutex_unlock(&c->alloc_sem);
477 
478  eraseit_lock:
479         /* If we've finished this block, start it erasing */
480         spin_lock(&c->erase_completion_lock);
481 
482  eraseit:
483         if (c->gcblock && !c->gcblock->used_size) {
484                 jffs2_dbg(1, "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n",
485                           c->gcblock->offset);
486                 /* We're GC'ing an empty block? */
487                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
488                 c->gcblock = NULL;
489                 c->nr_erasing_blocks++;
490                 jffs2_garbage_collect_trigger(c);
491         }
492         spin_unlock(&c->erase_completion_lock);
493 
494         return ret;
495 }
496 
497 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
498                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
499 {
500         struct jffs2_node_frag *frag;
501         struct jffs2_full_dnode *fn = NULL;
502         struct jffs2_full_dirent *fd;
503         uint32_t start = 0, end = 0, nrfrags = 0;
504         int ret = 0;
505 
506         mutex_lock(&f->sem);
507 
508         /* Now we have the lock for this inode. Check that it's still the one at the head
509            of the list. */
510 
511         spin_lock(&c->erase_completion_lock);
512 
513         if (c->gcblock != jeb) {
514                 spin_unlock(&c->erase_completion_lock);
515                 jffs2_dbg(1, "GC block is no longer gcblock. Restart\n");
516                 goto upnout;
517         }
518         if (ref_obsolete(raw)) {
519                 spin_unlock(&c->erase_completion_lock);
520                 jffs2_dbg(1, "node to be GC'd was obsoleted in the meantime.\n");
521                 /* They'll call again */
522                 goto upnout;
523         }
524         spin_unlock(&c->erase_completion_lock);
525 
526         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
527         if (f->metadata && f->metadata->raw == raw) {
528                 fn = f->metadata;
529                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
530                 goto upnout;
531         }
532 
533         /* FIXME. Read node and do lookup? */
534         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
535                 if (frag->node && frag->node->raw == raw) {
536                         fn = frag->node;
537                         end = frag->ofs + frag->size;
538                         if (!nrfrags++)
539                                 start = frag->ofs;
540                         if (nrfrags == frag->node->frags)
541                                 break; /* We've found them all */
542                 }
543         }
544         if (fn) {
545                 if (ref_flags(raw) == REF_PRISTINE) {
546                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
547                         if (!ret) {
548                                 /* Urgh. Return it sensibly. */
549                                 frag->node->raw = f->inocache->nodes;
550                         }
551                         if (ret != -EBADFD)
552                                 goto upnout;
553                 }
554                 /* We found a datanode. Do the GC */
555                 if((start >> PAGE_SHIFT) < ((end-1) >> PAGE_SHIFT)) {
556                         /* It crosses a page boundary. Therefore, it must be a hole. */
557                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
558                 } else {
559                         /* It could still be a hole. But we GC the page this way anyway */
560                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
561                 }
562                 goto upnout;
563         }
564 
565         /* Wasn't a dnode. Try dirent */
566         for (fd = f->dents; fd; fd=fd->next) {
567                 if (fd->raw == raw)
568                         break;
569         }
570 
571         if (fd && fd->ino) {
572                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
573         } else if (fd) {
574                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
575         } else {
576                 pr_warn("Raw node at 0x%08x wasn't in node lists for ino #%u\n",
577                         ref_offset(raw), f->inocache->ino);
578                 if (ref_obsolete(raw)) {
579                         pr_warn("But it's obsolete so we don't mind too much\n");
580                 } else {
581                         jffs2_dbg_dump_node(c, ref_offset(raw));
582                         BUG();
583                 }
584         }
585  upnout:
586         mutex_unlock(&f->sem);
587 
588         return ret;
589 }
590 
591 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
592                                           struct jffs2_inode_cache *ic,
593                                           struct jffs2_raw_node_ref *raw)
594 {
595         union jffs2_node_union *node;
596         size_t retlen;
597         int ret;
598         uint32_t phys_ofs, alloclen;
599         uint32_t crc, rawlen;
600         int retried = 0;
601 
602         jffs2_dbg(1, "Going to GC REF_PRISTINE node at 0x%08x\n",
603                   ref_offset(raw));
604 
605         alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
606 
607         /* Ask for a small amount of space (or the totlen if smaller) because we
608            don't want to force wastage of the end of a block if splitting would
609            work. */
610         if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
611                 alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
612 
613         ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
614         /* 'rawlen' is not the exact summary size; it is only an upper estimation */
615 
616         if (ret)
617                 return ret;
618 
619         if (alloclen < rawlen) {
620                 /* Doesn't fit untouched. We'll go the old route and split it */
621                 return -EBADFD;
622         }
623 
624         node = kmalloc(rawlen, GFP_KERNEL);
625         if (!node)
626                 return -ENOMEM;
627 
628         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
629         if (!ret && retlen != rawlen)
630                 ret = -EIO;
631         if (ret)
632                 goto out_node;
633 
634         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
635         if (je32_to_cpu(node->u.hdr_crc) != crc) {
636                 pr_warn("Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
637                         ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
638                 goto bail;
639         }
640 
641         switch(je16_to_cpu(node->u.nodetype)) {
642         case JFFS2_NODETYPE_INODE:
643                 crc = crc32(0, node, sizeof(node->i)-8);
644                 if (je32_to_cpu(node->i.node_crc) != crc) {
645                         pr_warn("Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
646                                 ref_offset(raw), je32_to_cpu(node->i.node_crc),
647                                 crc);
648                         goto bail;
649                 }
650 
651                 if (je32_to_cpu(node->i.dsize)) {
652                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
653                         if (je32_to_cpu(node->i.data_crc) != crc) {
654                                 pr_warn("Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
655                                         ref_offset(raw),
656                                         je32_to_cpu(node->i.data_crc), crc);
657                                 goto bail;
658                         }
659                 }
660                 break;
661 
662         case JFFS2_NODETYPE_DIRENT:
663                 crc = crc32(0, node, sizeof(node->d)-8);
664                 if (je32_to_cpu(node->d.node_crc) != crc) {
665                         pr_warn("Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
666                                 ref_offset(raw),
667                                 je32_to_cpu(node->d.node_crc), crc);
668                         goto bail;
669                 }
670 
671                 if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
672                         pr_warn("Name in dirent node at 0x%08x contains zeroes\n",
673                                 ref_offset(raw));
674                         goto bail;
675                 }
676 
677                 if (node->d.nsize) {
678                         crc = crc32(0, node->d.name, node->d.nsize);
679                         if (je32_to_cpu(node->d.name_crc) != crc) {
680                                 pr_warn("Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
681                                         ref_offset(raw),
682                                         je32_to_cpu(node->d.name_crc), crc);
683                                 goto bail;
684                         }
685                 }
686                 break;
687         default:
688                 /* If it's inode-less, we don't _know_ what it is. Just copy it intact */
689                 if (ic) {
690                         pr_warn("Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
691                                 ref_offset(raw), je16_to_cpu(node->u.nodetype));
692                         goto bail;
693                 }
694         }
695 
696         /* OK, all the CRCs are good; this node can just be copied as-is. */
697  retry:
698         phys_ofs = write_ofs(c);
699 
700         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
701 
702         if (ret || (retlen != rawlen)) {
703                 pr_notice("Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
704                           rawlen, phys_ofs, ret, retlen);
705                 if (retlen) {
706                         jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
707                 } else {
708                         pr_notice("Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n",
709                                   phys_ofs);
710                 }
711                 if (!retried) {
712                         /* Try to reallocate space and retry */
713                         uint32_t dummy;
714                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
715 
716                         retried = 1;
717 
718                         jffs2_dbg(1, "Retrying failed write of REF_PRISTINE node.\n");
719 
720                         jffs2_dbg_acct_sanity_check(c,jeb);
721                         jffs2_dbg_acct_paranoia_check(c, jeb);
722 
723                         ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
724                                                 /* this is not the exact summary size of it,
725                                                         it is only an upper estimation */
726 
727                         if (!ret) {
728                                 jffs2_dbg(1, "Allocated space at 0x%08x to retry failed write.\n",
729                                           phys_ofs);
730 
731                                 jffs2_dbg_acct_sanity_check(c,jeb);
732                                 jffs2_dbg_acct_paranoia_check(c, jeb);
733 
734                                 goto retry;
735                         }
736                         jffs2_dbg(1, "Failed to allocate space to retry failed write: %d!\n",
737                                   ret);
738                 }
739 
740                 if (!ret)
741                         ret = -EIO;
742                 goto out_node;
743         }
744         jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
745 
746         jffs2_mark_node_obsolete(c, raw);
747         jffs2_dbg(1, "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n",
748                   ref_offset(raw));
749 
750  out_node:
751         kfree(node);
752         return ret;
753  bail:
754         ret = -EBADFD;
755         goto out_node;
756 }
757 
758 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
759                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
760 {
761         struct jffs2_full_dnode *new_fn;
762         struct jffs2_raw_inode ri;
763         struct jffs2_node_frag *last_frag;
764         union jffs2_device_node dev;
765         char *mdata = NULL;
766         int mdatalen = 0;
767         uint32_t alloclen, ilen;
768         int ret;
769 
770         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
771             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
772                 /* For these, we don't actually need to read the old node */
773                 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
774                 mdata = (char *)&dev;
775                 jffs2_dbg(1, "%s(): Writing %d bytes of kdev_t\n",
776                           __func__, mdatalen);
777         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
778                 mdatalen = fn->size;
779                 mdata = kmalloc(fn->size, GFP_KERNEL);
780                 if (!mdata) {
781                         pr_warn("kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
782                         return -ENOMEM;
783                 }
784                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
785                 if (ret) {
786                         pr_warn("read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n",
787                                 ret);
788                         kfree(mdata);
789                         return ret;
790                 }
791                 jffs2_dbg(1, "%s(): Writing %d bites of symlink target\n",
792                           __func__, mdatalen);
793 
794         }
795 
796         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
797                                 JFFS2_SUMMARY_INODE_SIZE);
798         if (ret) {
799                 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
800                         sizeof(ri) + mdatalen, ret);
801                 goto out;
802         }
803 
804         last_frag = frag_last(&f->fragtree);
805         if (last_frag)
806                 /* Fetch the inode length from the fragtree rather then
807                  * from i_size since i_size may have not been updated yet */
808                 ilen = last_frag->ofs + last_frag->size;
809         else
810                 ilen = JFFS2_F_I_SIZE(f);
811 
812         memset(&ri, 0, sizeof(ri));
813         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
814         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
815         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
816         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
817 
818         ri.ino = cpu_to_je32(f->inocache->ino);
819         ri.version = cpu_to_je32(++f->highest_version);
820         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
821         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
822         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
823         ri.isize = cpu_to_je32(ilen);
824         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
825         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
826         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
827         ri.offset = cpu_to_je32(0);
828         ri.csize = cpu_to_je32(mdatalen);
829         ri.dsize = cpu_to_je32(mdatalen);
830         ri.compr = JFFS2_COMPR_NONE;
831         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
832         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
833 
834         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
835 
836         if (IS_ERR(new_fn)) {
837                 pr_warn("Error writing new dnode: %ld\n", PTR_ERR(new_fn));
838                 ret = PTR_ERR(new_fn);
839                 goto out;
840         }
841         jffs2_mark_node_obsolete(c, fn->raw);
842         jffs2_free_full_dnode(fn);
843         f->metadata = new_fn;
844  out:
845         if (S_ISLNK(JFFS2_F_I_MODE(f)))
846                 kfree(mdata);
847         return ret;
848 }
849 
850 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
851                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
852 {
853         struct jffs2_full_dirent *new_fd;
854         struct jffs2_raw_dirent rd;
855         uint32_t alloclen;
856         int ret;
857 
858         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
859         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
860         rd.nsize = strlen(fd->name);
861         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
862         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
863 
864         rd.pino = cpu_to_je32(f->inocache->ino);
865         rd.version = cpu_to_je32(++f->highest_version);
866         rd.ino = cpu_to_je32(fd->ino);
867         /* If the times on this inode were set by explicit utime() they can be different,
868            so refrain from splatting them. */
869         if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
870                 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
871         else
872                 rd.mctime = cpu_to_je32(0);
873         rd.type = fd->type;
874         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
875         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
876 
877         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
878                                 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
879         if (ret) {
880                 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
881                         sizeof(rd)+rd.nsize, ret);
882                 return ret;
883         }
884         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
885 
886         if (IS_ERR(new_fd)) {
887                 pr_warn("jffs2_write_dirent in garbage_collect_dirent failed: %ld\n",
888                         PTR_ERR(new_fd));
889                 return PTR_ERR(new_fd);
890         }
891         jffs2_add_fd_to_list(c, new_fd, &f->dents);
892         return 0;
893 }
894 
895 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
896                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
897 {
898         struct jffs2_full_dirent **fdp = &f->dents;
899         int found = 0;
900 
901         /* On a medium where we can't actually mark nodes obsolete
902            pernamently, such as NAND flash, we need to work out
903            whether this deletion dirent is still needed to actively
904            delete a 'real' dirent with the same name that's still
905            somewhere else on the flash. */
906         if (!jffs2_can_mark_obsolete(c)) {
907                 struct jffs2_raw_dirent *rd;
908                 struct jffs2_raw_node_ref *raw;
909                 int ret;
910                 size_t retlen;
911                 int name_len = strlen(fd->name);
912                 uint32_t name_crc = crc32(0, fd->name, name_len);
913                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
914 
915                 rd = kmalloc(rawlen, GFP_KERNEL);
916                 if (!rd)
917                         return -ENOMEM;
918 
919                 /* Prevent the erase code from nicking the obsolete node refs while
920                    we're looking at them. I really don't like this extra lock but
921                    can't see any alternative. Suggestions on a postcard to... */
922                 mutex_lock(&c->erase_free_sem);
923 
924                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
925 
926                         cond_resched();
927 
928                         /* We only care about obsolete ones */
929                         if (!(ref_obsolete(raw)))
930                                 continue;
931 
932                         /* Any dirent with the same name is going to have the same length... */
933                         if (ref_totlen(c, NULL, raw) != rawlen)
934                                 continue;
935 
936                         /* Doesn't matter if there's one in the same erase block. We're going to
937                            delete it too at the same time. */
938                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
939                                 continue;
940 
941                         jffs2_dbg(1, "Check potential deletion dirent at %08x\n",
942                                   ref_offset(raw));
943 
944                         /* This is an obsolete node belonging to the same directory, and it's of the right
945                            length. We need to take a closer look...*/
946                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
947                         if (ret) {
948                                 pr_warn("%s(): Read error (%d) reading obsolete node at %08x\n",
949                                         __func__, ret, ref_offset(raw));
950                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
951                                 continue;
952                         }
953                         if (retlen != rawlen) {
954                                 pr_warn("%s(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
955                                         __func__, retlen, rawlen,
956                                         ref_offset(raw));
957                                 continue;
958                         }
959 
960                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
961                                 continue;
962 
963                         /* If the name CRC doesn't match, skip */
964                         if (je32_to_cpu(rd->name_crc) != name_crc)
965                                 continue;
966 
967                         /* If the name length doesn't match, or it's another deletion dirent, skip */
968                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
969                                 continue;
970 
971                         /* OK, check the actual name now */
972                         if (memcmp(rd->name, fd->name, name_len))
973                                 continue;
974 
975                         /* OK. The name really does match. There really is still an older node on
976                            the flash which our deletion dirent obsoletes. So we have to write out
977                            a new deletion dirent to replace it */
978                         mutex_unlock(&c->erase_free_sem);
979 
980                         jffs2_dbg(1, "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
981                                   ref_offset(fd->raw), fd->name,
982                                   ref_offset(raw), je32_to_cpu(rd->ino));
983                         kfree(rd);
984 
985                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
986                 }
987 
988                 mutex_unlock(&c->erase_free_sem);
989                 kfree(rd);
990         }
991 
992         /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
993            we should update the metadata node with those times accordingly */
994 
995         /* No need for it any more. Just mark it obsolete and remove it from the list */
996         while (*fdp) {
997                 if ((*fdp) == fd) {
998                         found = 1;
999                         *fdp = fd->next;
1000                         break;
1001                 }
1002                 fdp = &(*fdp)->next;
1003         }
1004         if (!found) {
1005                 pr_warn("Deletion dirent \"%s\" not found in list for ino #%u\n",
1006                         fd->name, f->inocache->ino);
1007         }
1008         jffs2_mark_node_obsolete(c, fd->raw);
1009         jffs2_free_full_dirent(fd);
1010         return 0;
1011 }
1012 
1013 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1014                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1015                                       uint32_t start, uint32_t end)
1016 {
1017         struct jffs2_raw_inode ri;
1018         struct jffs2_node_frag *frag;
1019         struct jffs2_full_dnode *new_fn;
1020         uint32_t alloclen, ilen;
1021         int ret;
1022 
1023         jffs2_dbg(1, "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
1024                   f->inocache->ino, start, end);
1025 
1026         memset(&ri, 0, sizeof(ri));
1027 
1028         if(fn->frags > 1) {
1029                 size_t readlen;
1030                 uint32_t crc;
1031                 /* It's partially obsoleted by a later write. So we have to
1032                    write it out again with the _same_ version as before */
1033                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
1034                 if (readlen != sizeof(ri) || ret) {
1035                         pr_warn("Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n",
1036                                 ret, readlen);
1037                         goto fill;
1038                 }
1039                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
1040                         pr_warn("%s(): Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
1041                                 __func__, ref_offset(fn->raw),
1042                                 je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
1043                         return -EIO;
1044                 }
1045                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
1046                         pr_warn("%s(): Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
1047                                 __func__, ref_offset(fn->raw),
1048                                 je32_to_cpu(ri.totlen), sizeof(ri));
1049                         return -EIO;
1050                 }
1051                 crc = crc32(0, &ri, sizeof(ri)-8);
1052                 if (crc != je32_to_cpu(ri.node_crc)) {
1053                         pr_warn("%s: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
1054                                 __func__, ref_offset(fn->raw),
1055                                 je32_to_cpu(ri.node_crc), crc);
1056                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
1057                         pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1058                                 start, end, f->inocache->ino);
1059                         goto fill;
1060                 }
1061                 if (ri.compr != JFFS2_COMPR_ZERO) {
1062                         pr_warn("%s(): Node 0x%08x wasn't a hole node!\n",
1063                                 __func__, ref_offset(fn->raw));
1064                         pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1065                                 start, end, f->inocache->ino);
1066                         goto fill;
1067                 }
1068         } else {
1069         fill:
1070                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1071                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1072                 ri.totlen = cpu_to_je32(sizeof(ri));
1073                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1074 
1075                 ri.ino = cpu_to_je32(f->inocache->ino);
1076                 ri.version = cpu_to_je32(++f->highest_version);
1077                 ri.offset = cpu_to_je32(start);
1078                 ri.dsize = cpu_to_je32(end - start);
1079                 ri.csize = cpu_to_je32(0);
1080                 ri.compr = JFFS2_COMPR_ZERO;
1081         }
1082 
1083         frag = frag_last(&f->fragtree);
1084         if (frag)
1085                 /* Fetch the inode length from the fragtree rather then
1086                  * from i_size since i_size may have not been updated yet */
1087                 ilen = frag->ofs + frag->size;
1088         else
1089                 ilen = JFFS2_F_I_SIZE(f);
1090 
1091         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1092         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1093         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1094         ri.isize = cpu_to_je32(ilen);
1095         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1096         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1097         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1098         ri.data_crc = cpu_to_je32(0);
1099         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1100 
1101         ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1102                                      JFFS2_SUMMARY_INODE_SIZE);
1103         if (ret) {
1104                 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1105                         sizeof(ri), ret);
1106                 return ret;
1107         }
1108         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1109 
1110         if (IS_ERR(new_fn)) {
1111                 pr_warn("Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1112                 return PTR_ERR(new_fn);
1113         }
1114         if (je32_to_cpu(ri.version) == f->highest_version) {
1115                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1116                 if (f->metadata) {
1117                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1118                         jffs2_free_full_dnode(f->metadata);
1119                         f->metadata = NULL;
1120                 }
1121                 return 0;
1122         }
1123 
1124         /*
1125          * We should only get here in the case where the node we are
1126          * replacing had more than one frag, so we kept the same version
1127          * number as before. (Except in case of error -- see 'goto fill;'
1128          * above.)
1129          */
1130         D1(if(unlikely(fn->frags <= 1)) {
1131                         pr_warn("%s(): Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1132                                 __func__, fn->frags, je32_to_cpu(ri.version),
1133                                 f->highest_version, je32_to_cpu(ri.ino));
1134         });
1135 
1136         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1137         mark_ref_normal(new_fn->raw);
1138 
1139         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1140              frag; frag = frag_next(frag)) {
1141                 if (frag->ofs > fn->size + fn->ofs)
1142                         break;
1143                 if (frag->node == fn) {
1144                         frag->node = new_fn;
1145                         new_fn->frags++;
1146                         fn->frags--;
1147                 }
1148         }
1149         if (fn->frags) {
1150                 pr_warn("%s(): Old node still has frags!\n", __func__);
1151                 BUG();
1152         }
1153         if (!new_fn->frags) {
1154                 pr_warn("%s(): New node has no frags!\n", __func__);
1155                 BUG();
1156         }
1157 
1158         jffs2_mark_node_obsolete(c, fn->raw);
1159         jffs2_free_full_dnode(fn);
1160 
1161         return 0;
1162 }
1163 
1164 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1165                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1166                                        uint32_t start, uint32_t end)
1167 {
1168         struct inode *inode = OFNI_EDONI_2SFFJ(f);
1169         struct jffs2_full_dnode *new_fn;
1170         struct jffs2_raw_inode ri;
1171         uint32_t alloclen, offset, orig_end, orig_start;
1172         int ret = 0;
1173         unsigned char *comprbuf = NULL, *writebuf;
1174         struct page *page;
1175         unsigned char *pg_ptr;
1176 
1177         memset(&ri, 0, sizeof(ri));
1178 
1179         jffs2_dbg(1, "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1180                   f->inocache->ino, start, end);
1181 
1182         orig_end = end;
1183         orig_start = start;
1184 
1185         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1186                 /* Attempt to do some merging. But only expand to cover logically
1187                    adjacent frags if the block containing them is already considered
1188                    to be dirty. Otherwise we end up with GC just going round in
1189                    circles dirtying the nodes it already wrote out, especially
1190                    on NAND where we have small eraseblocks and hence a much higher
1191                    chance of nodes having to be split to cross boundaries. */
1192 
1193                 struct jffs2_node_frag *frag;
1194                 uint32_t min, max;
1195 
1196                 min = start & ~(PAGE_SIZE-1);
1197                 max = min + PAGE_SIZE;
1198 
1199                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1200 
1201                 /* BUG_ON(!frag) but that'll happen anyway... */
1202 
1203                 BUG_ON(frag->ofs != start);
1204 
1205                 /* First grow down... */
1206                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1207 
1208                         /* If the previous frag doesn't even reach the beginning, there's
1209                            excessive fragmentation. Just merge. */
1210                         if (frag->ofs > min) {
1211                                 jffs2_dbg(1, "Expanding down to cover partial frag (0x%x-0x%x)\n",
1212                                           frag->ofs, frag->ofs+frag->size);
1213                                 start = frag->ofs;
1214                                 continue;
1215                         }
1216                         /* OK. This frag holds the first byte of the page. */
1217                         if (!frag->node || !frag->node->raw) {
1218                                 jffs2_dbg(1, "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1219                                           frag->ofs, frag->ofs+frag->size);
1220                                 break;
1221                         } else {
1222 
1223                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1224                                    in a block which is still considered clean? If so, don't obsolete it.
1225                                    If not, cover it anyway. */
1226 
1227                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1228                                 struct jffs2_eraseblock *jeb;
1229 
1230                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1231 
1232                                 if (jeb == c->gcblock) {
1233                                         jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1234                                                   frag->ofs,
1235                                                   frag->ofs + frag->size,
1236                                                   ref_offset(raw));
1237                                         start = frag->ofs;
1238                                         break;
1239                                 }
1240                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1241                                         jffs2_dbg(1, "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1242                                                   frag->ofs,
1243                                                   frag->ofs + frag->size,
1244                                                   jeb->offset);
1245                                         break;
1246                                 }
1247 
1248                                 jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1249                                           frag->ofs,
1250                                           frag->ofs + frag->size,
1251                                           jeb->offset);
1252                                 start = frag->ofs;
1253                                 break;
1254                         }
1255                 }
1256 
1257                 /* ... then up */
1258 
1259                 /* Find last frag which is actually part of the node we're to GC. */
1260                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1261 
1262                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1263 
1264                         /* If the previous frag doesn't even reach the beginning, there's lots
1265                            of fragmentation. Just merge. */
1266                         if (frag->ofs+frag->size < max) {
1267                                 jffs2_dbg(1, "Expanding up to cover partial frag (0x%x-0x%x)\n",
1268                                           frag->ofs, frag->ofs+frag->size);
1269                                 end = frag->ofs + frag->size;
1270                                 continue;
1271                         }
1272 
1273                         if (!frag->node || !frag->node->raw) {
1274                                 jffs2_dbg(1, "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1275                                           frag->ofs, frag->ofs+frag->size);
1276                                 break;
1277                         } else {
1278 
1279                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1280                                    in a block which is still considered clean? If so, don't obsolete it.
1281                                    If not, cover it anyway. */
1282 
1283                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1284                                 struct jffs2_eraseblock *jeb;
1285 
1286                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1287 
1288                                 if (jeb == c->gcblock) {
1289                                         jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1290                                                   frag->ofs,
1291                                                   frag->ofs + frag->size,
1292                                                   ref_offset(raw));
1293                                         end = frag->ofs + frag->size;
1294                                         break;
1295                                 }
1296                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1297                                         jffs2_dbg(1, "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1298                                                   frag->ofs,
1299                                                   frag->ofs + frag->size,
1300                                                   jeb->offset);
1301                                         break;
1302                                 }
1303 
1304                                 jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1305                                           frag->ofs,
1306                                           frag->ofs + frag->size,
1307                                           jeb->offset);
1308                                 end = frag->ofs + frag->size;
1309                                 break;
1310                         }
1311                 }
1312                 jffs2_dbg(1, "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1313                           orig_start, orig_end, start, end);
1314 
1315                 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1316                 BUG_ON(end < orig_end);
1317                 BUG_ON(start > orig_start);
1318         }
1319 
1320         /* The rules state that we must obtain the page lock *before* f->sem, so
1321          * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's
1322          * actually going to *change* so we're safe; we only allow reading.
1323          *
1324          * It is important to note that jffs2_write_begin() will ensure that its
1325          * page is marked Uptodate before allocating space. That means that if we
1326          * end up here trying to GC the *same* page that jffs2_write_begin() is
1327          * trying to write out, read_cache_page() will not deadlock. */
1328         mutex_unlock(&f->sem);
1329         page = read_cache_page(inode->i_mapping, start >> PAGE_SHIFT,
1330                                __jffs2_read_folio, NULL);
1331         if (IS_ERR(page)) {
1332                 pr_warn("read_cache_page() returned error: %ld\n",
1333                         PTR_ERR(page));
1334                 mutex_lock(&f->sem);
1335                 return PTR_ERR(page);
1336         }
1337 
1338         pg_ptr = kmap(page);
1339         mutex_lock(&f->sem);
1340 
1341         offset = start;
1342         while(offset < orig_end) {
1343                 uint32_t datalen;
1344                 uint32_t cdatalen;
1345                 uint16_t comprtype = JFFS2_COMPR_NONE;
1346 
1347                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1348                                         &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1349 
1350                 if (ret) {
1351                         pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1352                                 sizeof(ri) + JFFS2_MIN_DATA_LEN, ret);
1353                         break;
1354                 }
1355                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1356                 datalen = end - offset;
1357 
1358                 writebuf = pg_ptr + (offset & (PAGE_SIZE -1));
1359 
1360                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1361 
1362                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1363                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1364                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1365                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1366 
1367                 ri.ino = cpu_to_je32(f->inocache->ino);
1368                 ri.version = cpu_to_je32(++f->highest_version);
1369                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1370                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1371                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1372                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1373                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1374                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1375                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1376                 ri.offset = cpu_to_je32(offset);
1377                 ri.csize = cpu_to_je32(cdatalen);
1378                 ri.dsize = cpu_to_je32(datalen);
1379                 ri.compr = comprtype & 0xff;
1380                 ri.usercompr = (comprtype >> 8) & 0xff;
1381                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1382                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1383 
1384                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1385 
1386                 jffs2_free_comprbuf(comprbuf, writebuf);
1387 
1388                 if (IS_ERR(new_fn)) {
1389                         pr_warn("Error writing new dnode: %ld\n",
1390                                 PTR_ERR(new_fn));
1391                         ret = PTR_ERR(new_fn);
1392                         break;
1393                 }
1394                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1395                 offset += datalen;
1396                 if (f->metadata) {
1397                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1398                         jffs2_free_full_dnode(f->metadata);
1399                         f->metadata = NULL;
1400                 }
1401         }
1402 
1403         kunmap(page);
1404         put_page(page);
1405         return ret;
1406 }
1407 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php