~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
aops.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~
Diff markup

Differences between /fs/ocfs2/aops.c (Version linux-6.11-rc3) and /fs/ocfs2/aops.c (Version linux-2.4.37.11)

  1 // SPDX-License-Identifier: GPL-2.0-or-later        1 
  2 /*                                                
  3  * Copyright (C) 2002, 2004 Oracle.  All right    
  4  */                                               
  5                                                   
  6 #include <linux/fs.h>                             
  7 #include <linux/slab.h>                           
  8 #include <linux/highmem.h>                        
  9 #include <linux/pagemap.h>                        
 10 #include <asm/byteorder.h>                        
 11 #include <linux/swap.h>                           
 12 #include <linux/mpage.h>                          
 13 #include <linux/quotaops.h>                       
 14 #include <linux/blkdev.h>                         
 15 #include <linux/uio.h>                            
 16 #include <linux/mm.h>                             
 17                                                   
 18 #include <cluster/masklog.h>                      
 19                                                   
 20 #include "ocfs2.h"                                
 21                                                   
 22 #include "alloc.h"                                
 23 #include "aops.h"                                 
 24 #include "dlmglue.h"                              
 25 #include "extent_map.h"                           
 26 #include "file.h"                                 
 27 #include "inode.h"                                
 28 #include "journal.h"                              
 29 #include "suballoc.h"                             
 30 #include "super.h"                                
 31 #include "symlink.h"                              
 32 #include "refcounttree.h"                         
 33 #include "ocfs2_trace.h"                          
 34                                                   
 35 #include "buffer_head_io.h"                       
 36 #include "dir.h"                                  
 37 #include "namei.h"                                
 38 #include "sysfile.h"                              
 39                                                   
 40 static int ocfs2_symlink_get_block(struct inod    
 41                                    struct buff    
 42 {                                                 
 43         int err = -EIO;                           
 44         int status;                               
 45         struct ocfs2_dinode *fe = NULL;           
 46         struct buffer_head *bh = NULL;            
 47         struct buffer_head *buffer_cache_bh =     
 48         struct ocfs2_super *osb = OCFS2_SB(ino    
 49         void *kaddr;                              
 50                                                   
 51         trace_ocfs2_symlink_get_block(            
 52                         (unsigned long long)OC    
 53                         (unsigned long long)ib    
 54                                                   
 55         BUG_ON(ocfs2_inode_is_fast_symlink(ino    
 56                                                   
 57         if ((iblock << inode->i_sb->s_blocksiz    
 58                 mlog(ML_ERROR, "block offset >    
 59                      (unsigned long long)ibloc    
 60                 goto bail;                        
 61         }                                         
 62                                                   
 63         status = ocfs2_read_inode_block(inode,    
 64         if (status < 0) {                         
 65                 mlog_errno(status);               
 66                 goto bail;                        
 67         }                                         
 68         fe = (struct ocfs2_dinode *) bh->b_dat    
 69                                                   
 70         if ((u64)iblock >= ocfs2_clusters_to_b    
 71                                                   
 72                 err = -ENOMEM;                    
 73                 mlog(ML_ERROR, "block offset i    
 74                      "%llu\n", (unsigned long     
 75                 goto bail;                        
 76         }                                         
 77                                                   
 78         /* We don't use the page cache to crea    
 79          * need be, copy it over from the buff    
 80         if (!buffer_uptodate(bh_result) && ocf    
 81                 u64 blkno = le64_to_cpu(fe->id    
 82                             iblock;               
 83                 buffer_cache_bh = sb_getblk(os    
 84                 if (!buffer_cache_bh) {           
 85                         err = -ENOMEM;            
 86                         mlog(ML_ERROR, "couldn    
 87                         goto bail;                
 88                 }                                 
 89                                                   
 90                 /* we haven't locked out trans    
 91                  * could've happened. Since we    
 92                  * the bh, even if it commits     
 93                  * copy, the data is still goo    
 94                 if (buffer_jbd(buffer_cache_bh    
 95                     && ocfs2_inode_is_new(inod    
 96                         kaddr = kmap_atomic(bh    
 97                         if (!kaddr) {             
 98                                 mlog(ML_ERROR,    
 99                                 goto bail;        
100                         }                         
101                         memcpy(kaddr + (bh_res    
102                                buffer_cache_bh    
103                                bh_result->b_si    
104                         kunmap_atomic(kaddr);     
105                         set_buffer_uptodate(bh    
106                 }                                 
107                 brelse(buffer_cache_bh);          
108         }                                         
109                                                   
110         map_bh(bh_result, inode->i_sb,            
111                le64_to_cpu(fe->id2.i_list.l_re    
112                                                   
113         err = 0;                                  
114                                                   
115 bail:                                             
116         brelse(bh);                               
117                                                   
118         return err;                               
119 }                                                 
120                                                   
121 static int ocfs2_lock_get_block(struct inode *    
122                     struct buffer_head *bh_res    
123 {                                                 
124         int ret = 0;                              
125         struct ocfs2_inode_info *oi = OCFS2_I(    
126                                                   
127         down_read(&oi->ip_alloc_sem);             
128         ret = ocfs2_get_block(inode, iblock, b    
129         up_read(&oi->ip_alloc_sem);               
130                                                   
131         return ret;                               
132 }                                                 
133                                                   
134 int ocfs2_get_block(struct inode *inode, secto    
135                     struct buffer_head *bh_res    
136 {                                                 
137         int err = 0;                              
138         unsigned int ext_flags;                   
139         u64 max_blocks = bh_result->b_size >>     
140         u64 p_blkno, count, past_eof;             
141         struct ocfs2_super *osb = OCFS2_SB(ino    
142                                                   
143         trace_ocfs2_get_block((unsigned long l    
144                               (unsigned long l    
145                                                   
146         if (OCFS2_I(inode)->ip_flags & OCFS2_I    
147                 mlog(ML_NOTICE, "get_block on     
148                      inode, inode->i_ino);        
149                                                   
150         if (S_ISLNK(inode->i_mode)) {             
151                 /* this always does I/O for so    
152                 err = ocfs2_symlink_get_block(    
153                 goto bail;                        
154         }                                         
155                                                   
156         err = ocfs2_extent_map_get_blocks(inod    
157                                           &ext    
158         if (err) {                                
159                 mlog(ML_ERROR, "Error %d from     
160                      "%llu, NULL)\n", err, ino    
161                      (unsigned long long)p_blk    
162                 goto bail;                        
163         }                                         
164                                                   
165         if (max_blocks < count)                   
166                 count = max_blocks;               
167                                                   
168         /*                                        
169          * ocfs2 never allocates in this funct    
170          * need to use BH_New is when we're ex    
171          * system which doesn't support holes,    
172          * allows __block_write_begin() to zer    
173          *                                        
174          * If we see this on a sparse file sys    
175          * raced us and removed the cluster. I    
176          * the buffers dirty and uptodate bits    
177          * ignore it as a hole.                   
178          */                                       
179         if (create && p_blkno == 0 && ocfs2_sp    
180                 clear_buffer_dirty(bh_result);    
181                 clear_buffer_uptodate(bh_resul    
182                 goto bail;                        
183         }                                         
184                                                   
185         /* Treat the unwritten extent as a hol    
186         if (p_blkno && !(ext_flags & OCFS2_EXT    
187                 map_bh(bh_result, inode->i_sb,    
188                                                   
189         bh_result->b_size = count << inode->i_    
190                                                   
191         if (!ocfs2_sparse_alloc(osb)) {           
192                 if (p_blkno == 0) {               
193                         err = -EIO;               
194                         mlog(ML_ERROR,            
195                              "iblock = %llu p_    
196                              (unsigned long lo    
197                              (unsigned long lo    
198                              (unsigned long lo    
199                         mlog(ML_ERROR, "Size %    
200                         dump_stack();             
201                         goto bail;                
202                 }                                 
203         }                                         
204                                                   
205         past_eof = ocfs2_blocks_for_bytes(inod    
206                                                   
207         trace_ocfs2_get_block_end((unsigned lo    
208                                   (unsigned lo    
209         if (create && (iblock >= past_eof))       
210                 set_buffer_new(bh_result);        
211                                                   
212 bail:                                             
213         if (err < 0)                              
214                 err = -EIO;                       
215                                                   
216         return err;                               
217 }                                                 
218                                                   
219 int ocfs2_read_inline_data(struct inode *inode    
220                            struct buffer_head     
221 {                                                 
222         void *kaddr;                              
223         loff_t size;                              
224         struct ocfs2_dinode *di = (struct ocfs    
225                                                   
226         if (!(le16_to_cpu(di->i_dyn_features)     
227                 ocfs2_error(inode->i_sb, "Inod    
228                             (unsigned long lon    
229                 return -EROFS;                    
230         }                                         
231                                                   
232         size = i_size_read(inode);                
233                                                   
234         if (size > PAGE_SIZE ||                   
235             size > ocfs2_max_inline_data_with_    
236                 ocfs2_error(inode->i_sb,          
237                             "Inode %llu has wi    
238                             (unsigned long lon    
239                             (unsigned long lon    
240                 return -EROFS;                    
241         }                                         
242                                                   
243         kaddr = kmap_atomic(page);                
244         if (size)                                 
245                 memcpy(kaddr, di->id2.i_data.i    
246         /* Clear the remaining part of the pag    
247         memset(kaddr + size, 0, PAGE_SIZE - si    
248         flush_dcache_page(page);                  
249         kunmap_atomic(kaddr);                     
250                                                   
251         SetPageUptodate(page);                    
252                                                   
253         return 0;                                 
254 }                                                 
255                                                   
256 static int ocfs2_readpage_inline(struct inode     
257 {                                                 
258         int ret;                                  
259         struct buffer_head *di_bh = NULL;         
260                                                   
261         BUG_ON(!PageLocked(page));                
262         BUG_ON(!(OCFS2_I(inode)->ip_dyn_featur    
263                                                   
264         ret = ocfs2_read_inode_block(inode, &d    
265         if (ret) {                                
266                 mlog_errno(ret);                  
267                 goto out;                         
268         }                                         
269                                                   
270         ret = ocfs2_read_inline_data(inode, pa    
271 out:                                              
272         unlock_page(page);                        
273                                                   
274         brelse(di_bh);                            
275         return ret;                               
276 }                                                 
277                                                   
278 static int ocfs2_read_folio(struct file *file,    
279 {                                                 
280         struct inode *inode = folio->mapping->    
281         struct ocfs2_inode_info *oi = OCFS2_I(    
282         loff_t start = folio_pos(folio);          
283         int ret, unlock = 1;                      
284                                                   
285         trace_ocfs2_readpage((unsigned long lo    
286                                                   
287         ret = ocfs2_inode_lock_with_page(inode    
288         if (ret != 0) {                           
289                 if (ret == AOP_TRUNCATED_PAGE)    
290                         unlock = 0;               
291                 mlog_errno(ret);                  
292                 goto out;                         
293         }                                         
294                                                   
295         if (down_read_trylock(&oi->ip_alloc_se    
296                 /*                                
297                  * Unlock the folio and cycle     
298                  * busyloop waiting for ip_all    
299                  */                               
300                 ret = AOP_TRUNCATED_PAGE;         
301                 folio_unlock(folio);              
302                 unlock = 0;                       
303                 down_read(&oi->ip_alloc_sem);     
304                 up_read(&oi->ip_alloc_sem);       
305                 goto out_inode_unlock;            
306         }                                         
307                                                   
308         /*                                        
309          * i_size might have just been updated    
310          * might now be discovering a truncate    
311          * block_read_full_folio->get_block fr    
312          * beyond the end of a file, so we che    
313          * (generic_file_read, vm_ops->fault)     
314          * and notice that the folio they just    
315          *                                        
316          * XXX sys_readahead() seems to get th    
317          */                                       
318         if (start >= i_size_read(inode)) {        
319                 folio_zero_segment(folio, 0, f    
320                 folio_mark_uptodate(folio);       
321                 ret = 0;                          
322                 goto out_alloc;                   
323         }                                         
324                                                   
325         if (oi->ip_dyn_features & OCFS2_INLINE    
326                 ret = ocfs2_readpage_inline(in    
327         else                                      
328                 ret = block_read_full_folio(fo    
329         unlock = 0;                               
330                                                   
331 out_alloc:                                        
332         up_read(&oi->ip_alloc_sem);               
333 out_inode_unlock:                                 
334         ocfs2_inode_unlock(inode, 0);             
335 out:                                              
336         if (unlock)                               
337                 folio_unlock(folio);              
338         return ret;                               
339 }                                                 
340                                                   
341 /*                                                
342  * This is used only for read-ahead. Failures     
343  * situations are safe to ignore.                 
344  *                                                
345  * Right now, we don't bother with BH_Boundary    
346  * are quite large (243 extents on 4k blocks),    
347  * grow out to a tree. If need be, detecting b    
348  * trivially be added in a future version of o    
349  */                                               
350 static void ocfs2_readahead(struct readahead_c    
351 {                                                 
352         int ret;                                  
353         struct inode *inode = rac->mapping->ho    
354         struct ocfs2_inode_info *oi = OCFS2_I(    
355                                                   
356         /*                                        
357          * Use the nonblocking flag for the dl    
358          * lock inversion, but don't bother wi    
359          */                                       
360         ret = ocfs2_inode_lock_full(inode, NUL    
361         if (ret)                                  
362                 return;                           
363                                                   
364         if (down_read_trylock(&oi->ip_alloc_se    
365                 goto out_unlock;                  
366                                                   
367         /*                                        
368          * Don't bother with inline-data. Ther    
369          * to read-ahead in that case anyway..    
370          */                                       
371         if (oi->ip_dyn_features & OCFS2_INLINE    
372                 goto out_up;                      
373                                                   
374         /*                                        
375          * Check whether a remote node truncat    
376          * drop out in that case as it's not w    
377          */                                       
378         if (readahead_pos(rac) >= i_size_read(    
379                 goto out_up;                      
380                                                   
381         mpage_readahead(rac, ocfs2_get_block);    
382                                                   
383 out_up:                                           
384         up_read(&oi->ip_alloc_sem);               
385 out_unlock:                                       
386         ocfs2_inode_unlock(inode, 0);             
387 }                                                 
388                                                   
389 /* Note: Because we don't support holes, our a    
390  * already happened (allocation writes zeros t    
391  * so we don't have to worry about ordered wri    
392  * ocfs2_writepages.                              
393  *                                                
394  * ->writepages is called during the process o    
395  * during blocked lock processing.  It can't b    
396  * to during block mapping.  It's relying on t    
397  * mapping can't have disappeared under the di    
398  * being asked to write back.                     
399  */                                               
400 static int ocfs2_writepages(struct address_spa    
401                 struct writeback_control *wbc)    
402 {                                                 
403         return mpage_writepages(mapping, wbc,     
404 }                                                 
405                                                   
406 /* Taken from ext3. We don't necessarily need     
407  * functionality yet, but IMHO it's better to     
408  * thing so we can avoid introducing our own b    
409  * their fixes when they happen) --Mark */        
410 int walk_page_buffers(  handle_t *handle,         
411                         struct buffer_head *he    
412                         unsigned from,            
413                         unsigned to,              
414                         int *partial,             
415                         int (*fn)(      handle    
416                                         struct    
417 {                                                 
418         struct buffer_head *bh;                   
419         unsigned block_start, block_end;          
420         unsigned blocksize = head->b_size;        
421         int err, ret = 0;                         
422         struct buffer_head *next;                 
423                                                   
424         for (   bh = head, block_start = 0;       
425                 ret == 0 && (bh != head || !bl    
426                 block_start = block_end, bh =     
427         {                                         
428                 next = bh->b_this_page;           
429                 block_end = block_start + bloc    
430                 if (block_end <= from || block    
431                         if (partial && !buffer    
432                                 *partial = 1;     
433                         continue;                 
434                 }                                 
435                 err = (*fn)(handle, bh);          
436                 if (!ret)                         
437                         ret = err;                
438         }                                         
439         return ret;                               
440 }                                                 
441                                                   
442 static sector_t ocfs2_bmap(struct address_spac    
443 {                                                 
444         sector_t status;                          
445         u64 p_blkno = 0;                          
446         int err = 0;                              
447         struct inode *inode = mapping->host;      
448                                                   
449         trace_ocfs2_bmap((unsigned long long)O    
450                          (unsigned long long)b    
451                                                   
452         /*                                        
453          * The swap code (ab-)uses ->bmap to g    
454          * bypasseѕ the file system for actua    
455          * that on refcounted inodes, so we ha    
456          * 0 is the magic code for a bmap erro    
457          */                                       
458         if (ocfs2_is_refcount_inode(inode))       
459                 return 0;                         
460                                                   
461         /* We don't need to lock journal syste    
462          * accessed concurrently from multiple    
463          */                                       
464         if (!INODE_JOURNAL(inode)) {              
465                 err = ocfs2_inode_lock(inode,     
466                 if (err) {                        
467                         if (err != -ENOENT)       
468                                 mlog_errno(err    
469                         goto bail;                
470                 }                                 
471                 down_read(&OCFS2_I(inode)->ip_    
472         }                                         
473                                                   
474         if (!(OCFS2_I(inode)->ip_dyn_features     
475                 err = ocfs2_extent_map_get_blo    
476                                                   
477                                                   
478         if (!INODE_JOURNAL(inode)) {              
479                 up_read(&OCFS2_I(inode)->ip_al    
480                 ocfs2_inode_unlock(inode, 0);     
481         }                                         
482                                                   
483         if (err) {                                
484                 mlog(ML_ERROR, "get_blocks() f    
485                      (unsigned long long)block    
486                 mlog_errno(err);                  
487                 goto bail;                        
488         }                                         
489                                                   
490 bail:                                             
491         status = err ? 0 : p_blkno;               
492                                                   
493         return status;                            
494 }                                                 
495                                                   
496 static bool ocfs2_release_folio(struct folio *    
497 {                                                 
498         if (!folio_buffers(folio))                
499                 return false;                     
500         return try_to_free_buffers(folio);        
501 }                                                 
502                                                   
503 static void ocfs2_figure_cluster_boundaries(st    
504                                             u3    
505                                             un    
506                                             un    
507 {                                                 
508         unsigned int cluster_start = 0, cluste    
509                                                   
510         if (unlikely(PAGE_SHIFT > osb->s_clust    
511                 unsigned int cpp;                 
512                                                   
513                 cpp = 1 << (PAGE_SHIFT - osb->    
514                                                   
515                 cluster_start = cpos % cpp;       
516                 cluster_start = cluster_start     
517                                                   
518                 cluster_end = cluster_start +     
519         }                                         
520                                                   
521         BUG_ON(cluster_start > PAGE_SIZE);        
522         BUG_ON(cluster_end > PAGE_SIZE);          
523                                                   
524         if (start)                                
525                 *start = cluster_start;           
526         if (end)                                  
527                 *end = cluster_end;               
528 }                                                 
529                                                   
530 /*                                                
531  * 'from' and 'to' are the region in the page     
532  *                                                
533  * If pagesize > clustersize, this function wi    
534  * of the cluster boundary.                       
535  *                                                
536  * from == to == 0 is code for "zero the entir    
537  */                                               
538 static void ocfs2_clear_page_regions(struct pa    
539                                      struct oc    
540                                      unsigned     
541 {                                                 
542         void *kaddr;                              
543         unsigned int cluster_start, cluster_en    
544                                                   
545         ocfs2_figure_cluster_boundaries(osb, c    
546                                                   
547         kaddr = kmap_atomic(page);                
548                                                   
549         if (from || to) {                         
550                 if (from > cluster_start)         
551                         memset(kaddr + cluster    
552                 if (to < cluster_end)             
553                         memset(kaddr + to, 0,     
554         } else {                                  
555                 memset(kaddr + cluster_start,     
556         }                                         
557                                                   
558         kunmap_atomic(kaddr);                     
559 }                                                 
560                                                   
561 /*                                                
562  * Nonsparse file systems fully allocate befor    
563  * code. This prevents ocfs2_write() from tagg    
564  * allocating one, which means ocfs2_map_page_    
565  * read-in the blocks at the tail of our file.    
566  * testing i_size against each block offset.      
567  */                                               
568 static int ocfs2_should_read_blk(struct inode     
569                                  unsigned int     
570 {                                                 
571         u64 offset = folio_pos(folio) + block_    
572                                                   
573         if (ocfs2_sparse_alloc(OCFS2_SB(inode-    
574                 return 1;                         
575                                                   
576         if (i_size_read(inode) > offset)          
577                 return 1;                         
578                                                   
579         return 0;                                 
580 }                                                 
581                                                   
582 /*                                                
583  * Some of this taken from __block_write_begin    
584  * mapping by now though, and the entire write    
585  * it won't, so not much need to use BH_New.      
586  *                                                
587  * This will also skip zeroing, which is handl    
588  */                                               
589 int ocfs2_map_page_blocks(struct page *page, u    
590                           struct inode *inode,    
591                           unsigned int to, int    
592 {                                                 
593         struct folio *folio = page_folio(page)    
594         int ret = 0;                              
595         struct buffer_head *head, *bh, *wait[2    
596         unsigned int block_end, block_start;      
597         unsigned int bsize = i_blocksize(inode    
598                                                   
599         head = folio_buffers(folio);              
600         if (!head)                                
601                 head = create_empty_buffers(fo    
602                                                   
603         for (bh = head, block_start = 0; bh !=    
604              bh = bh->b_this_page, block_start    
605                 block_end = block_start + bsiz    
606                                                   
607                 clear_buffer_new(bh);             
608                                                   
609                 /*                                
610                  * Ignore blocks outside of ou    
611                  * they may belong to unalloca    
612                  */                               
613                 if (block_start >= to || block    
614                         if (folio_test_uptodat    
615                                 set_buffer_upt    
616                         continue;                 
617                 }                                 
618                                                   
619                 /*                                
620                  * For an allocating write wit    
621                  * size, we always write the e    
622                  */                               
623                 if (new)                          
624                         set_buffer_new(bh);       
625                                                   
626                 if (!buffer_mapped(bh)) {         
627                         map_bh(bh, inode->i_sb    
628                         clean_bdev_bh_alias(bh    
629                 }                                 
630                                                   
631                 if (folio_test_uptodate(folio)    
632                         set_buffer_uptodate(bh    
633                 } else if (!buffer_uptodate(bh    
634                            !buffer_new(bh) &&     
635                            ocfs2_should_read_b    
636                            (block_start < from    
637                         bh_read_nowait(bh, 0);    
638                         *wait_bh++=bh;            
639                 }                                 
640                                                   
641                 *p_blkno = *p_blkno + 1;          
642         }                                         
643                                                   
644         /*                                        
645          * If we issued read requests - let th    
646          */                                       
647         while(wait_bh > wait) {                   
648                 wait_on_buffer(*--wait_bh);       
649                 if (!buffer_uptodate(*wait_bh)    
650                         ret = -EIO;               
651         }                                         
652                                                   
653         if (ret == 0 || !new)                     
654                 return ret;                       
655                                                   
656         /*                                        
657          * If we get -EIO above, zero out any     
658          * to avoid exposing stale data.          
659          */                                       
660         bh = head;                                
661         block_start = 0;                          
662         do {                                      
663                 block_end = block_start + bsiz    
664                 if (block_end <= from)            
665                         goto next_bh;             
666                 if (block_start >= to)            
667                         break;                    
668                                                   
669                 folio_zero_range(folio, block_    
670                 set_buffer_uptodate(bh);          
671                 mark_buffer_dirty(bh);            
672                                                   
673 next_bh:                                          
674                 block_start = block_end;          
675                 bh = bh->b_this_page;             
676         } while (bh != head);                     
677                                                   
678         return ret;                               
679 }                                                 
680                                                   
681 #if (PAGE_SIZE >= OCFS2_MAX_CLUSTERSIZE)          
682 #define OCFS2_MAX_CTXT_PAGES    1                 
683 #else                                             
684 #define OCFS2_MAX_CTXT_PAGES    (OCFS2_MAX_CLU    
685 #endif                                            
686                                                   
687 #define OCFS2_MAX_CLUSTERS_PER_PAGE     (PAGE_    
688                                                   
689 struct ocfs2_unwritten_extent {                   
690         struct list_head        ue_node;          
691         struct list_head        ue_ip_node;       
692         u32                     ue_cpos;          
693         u32                     ue_phys;          
694 };                                                
695                                                   
696 /*                                                
697  * Describe the state of a single cluster to b    
698  */                                               
699 struct ocfs2_write_cluster_desc {                 
700         u32             c_cpos;                   
701         u32             c_phys;                   
702         /*                                        
703          * Give this a unique field because c_    
704          * filled.                                
705          */                                       
706         unsigned        c_new;                    
707         unsigned        c_clear_unwritten;        
708         unsigned        c_needs_zero;             
709 };                                                
710                                                   
711 struct ocfs2_write_ctxt {                         
712         /* Logical cluster position / len of w    
713         u32                             w_cpos    
714         u32                             w_clen    
715                                                   
716         /* First cluster allocated in a nonspa    
717         u32                             w_firs    
718                                                   
719         /* Type of caller. Must be one of buff    
720         ocfs2_write_type_t              w_type    
721                                                   
722         struct ocfs2_write_cluster_desc w_desc    
723                                                   
724         /*                                        
725          * This is true if page_size > cluster    
726          *                                        
727          * It triggers a set of special cases     
728          * have to deal with allocating writes    
729          */                                       
730         unsigned int                    w_larg    
731                                                   
732         /*                                        
733          * Pages involved in this write.          
734          *                                        
735          * w_target_page is the page being wri    
736          *                                        
737          * w_pages is an array of pages which     
738          * w_target_page, and in the case of a    
739          * page_size < cluster size, it will c    
740          * pages adjacent to w_target_page whi    
741          * out in so that future reads from th    
742          * zero's.                                
743          */                                       
744         unsigned int                    w_num_    
745         struct page                     *w_pag    
746         struct page                     *w_tar    
747                                                   
748         /*                                        
749          * w_target_locked is used for page_mk    
750          * against w_target_page in ocfs2_writ    
751          */                                       
752         unsigned int                    w_targ    
753                                                   
754         /*                                        
755          * ocfs2_write_end() uses this to know    
756          * write in the target should be.         
757          */                                       
758         unsigned int                    w_targ    
759         unsigned int                    w_targ    
760                                                   
761         /*                                        
762          * We could use journal_current_handle    
763          * IMHO -Mark                             
764          */                                       
765         handle_t                        *w_han    
766                                                   
767         struct buffer_head              *w_di_    
768                                                   
769         struct ocfs2_cached_dealloc_ctxt w_dea    
770                                                   
771         struct list_head                w_unwr    
772         unsigned int                    w_unwr    
773 };                                                
774                                                   
775 void ocfs2_unlock_and_free_pages(struct page *    
776 {                                                 
777         int i;                                    
778                                                   
779         for(i = 0; i < num_pages; i++) {          
780                 if (pages[i]) {                   
781                         unlock_page(pages[i]);    
782                         mark_page_accessed(pag    
783                         put_page(pages[i]);       
784                 }                                 
785         }                                         
786 }                                                 
787                                                   
788 static void ocfs2_unlock_pages(struct ocfs2_wr    
789 {                                                 
790         int i;                                    
791                                                   
792         /*                                        
793          * w_target_locked is only set to true    
794          * The intent is to allow us to lock t    
795          * to write_end(). The caller must hol    
796          */                                       
797         if (wc->w_target_locked) {                
798                 BUG_ON(!wc->w_target_page);       
799                 for (i = 0; i < wc->w_num_page    
800                         if (wc->w_target_page     
801                                 wc->w_pages[i]    
802                                 break;            
803                         }                         
804                 }                                 
805                 mark_page_accessed(wc->w_targe    
806                 put_page(wc->w_target_page);      
807         }                                         
808         ocfs2_unlock_and_free_pages(wc->w_page    
809 }                                                 
810                                                   
811 static void ocfs2_free_unwritten_list(struct i    
812                                  struct list_h    
813 {                                                 
814         struct ocfs2_inode_info *oi = OCFS2_I(    
815         struct ocfs2_unwritten_extent *ue = NU    
816                                                   
817         list_for_each_entry_safe(ue, tmp, head    
818                 list_del(&ue->ue_node);           
819                 spin_lock(&oi->ip_lock);          
820                 list_del(&ue->ue_ip_node);        
821                 spin_unlock(&oi->ip_lock);        
822                 kfree(ue);                        
823         }                                         
824 }                                                 
825                                                   
826 static void ocfs2_free_write_ctxt(struct inode    
827                                   struct ocfs2    
828 {                                                 
829         ocfs2_free_unwritten_list(inode, &wc->    
830         ocfs2_unlock_pages(wc);                   
831         brelse(wc->w_di_bh);                      
832         kfree(wc);                                
833 }                                                 
834                                                   
835 static int ocfs2_alloc_write_ctxt(struct ocfs2    
836                                   struct ocfs2    
837                                   unsigned len    
838                                   struct buffe    
839 {                                                 
840         u32 cend;                                 
841         struct ocfs2_write_ctxt *wc;              
842                                                   
843         wc = kzalloc(sizeof(struct ocfs2_write    
844         if (!wc)                                  
845                 return -ENOMEM;                   
846                                                   
847         wc->w_cpos = pos >> osb->s_clustersize    
848         wc->w_first_new_cpos = UINT_MAX;          
849         cend = (pos + len - 1) >> osb->s_clust    
850         wc->w_clen = cend - wc->w_cpos + 1;       
851         get_bh(di_bh);                            
852         wc->w_di_bh = di_bh;                      
853         wc->w_type = type;                        
854                                                   
855         if (unlikely(PAGE_SHIFT > osb->s_clust    
856                 wc->w_large_pages = 1;            
857         else                                      
858                 wc->w_large_pages = 0;            
859                                                   
860         ocfs2_init_dealloc_ctxt(&wc->w_dealloc    
861         INIT_LIST_HEAD(&wc->w_unwritten_list);    
862                                                   
863         *wcp = wc;                                
864                                                   
865         return 0;                                 
866 }                                                 
867                                                   
868 /*                                                
869  * If a page has any new buffers, zero them ou    
870  * and dirty so they'll be written out (in ord    
871  * block data from leaking). And clear the new    
872  */                                               
873 static void ocfs2_zero_new_buffers(struct page    
874 {                                                 
875         unsigned int block_start, block_end;      
876         struct buffer_head *head, *bh;            
877                                                   
878         BUG_ON(!PageLocked(page));                
879         if (!page_has_buffers(page))              
880                 return;                           
881                                                   
882         bh = head = page_buffers(page);           
883         block_start = 0;                          
884         do {                                      
885                 block_end = block_start + bh->    
886                                                   
887                 if (buffer_new(bh)) {             
888                         if (block_end > from &    
889                                 if (!PageUptod    
890                                         unsign    
891                                                   
892                                         start     
893                                         end =     
894                                                   
895                                         zero_u    
896                                         set_bu    
897                                 }                 
898                                                   
899                                 clear_buffer_n    
900                                 mark_buffer_di    
901                         }                         
902                 }                                 
903                                                   
904                 block_start = block_end;          
905                 bh = bh->b_this_page;             
906         } while (bh != head);                     
907 }                                                 
908                                                   
909 /*                                                
910  * Only called when we have a failure during a    
911  * zero's to the newly allocated region.          
912  */                                               
913 static void ocfs2_write_failure(struct inode *    
914                                 struct ocfs2_w    
915                                 loff_t user_po    
916 {                                                 
917         int i;                                    
918         unsigned from = user_pos & (PAGE_SIZE     
919                 to = user_pos + user_len;         
920         struct page *tmppage;                     
921                                                   
922         if (wc->w_target_page)                    
923                 ocfs2_zero_new_buffers(wc->w_t    
924                                                   
925         for(i = 0; i < wc->w_num_pages; i++) {    
926                 tmppage = wc->w_pages[i];         
927                                                   
928                 if (tmppage && page_has_buffer    
929                         if (ocfs2_should_order    
930                                 ocfs2_jbd2_ino    
931                                                   
932                                                   
933                         block_commit_write(tmp    
934                 }                                 
935         }                                         
936 }                                                 
937                                                   
938 static int ocfs2_prepare_page_for_write(struct    
939                                         struct    
940                                         struct    
941                                         loff_t    
942                                         int ne    
943 {                                                 
944         int ret;                                  
945         unsigned int map_from = 0, map_to = 0;    
946         unsigned int cluster_start, cluster_en    
947         unsigned int user_data_from = 0, user_    
948                                                   
949         ocfs2_figure_cluster_boundaries(OCFS2_    
950                                         &clust    
951                                                   
952         /* treat the write as new if the a hol    
953          * the page boundary.                     
954          */                                       
955         new = new | ((i_size_read(inode) <= pa    
956                         (page_offset(page) <=     
957                                                   
958         if (page == wc->w_target_page) {          
959                 map_from = user_pos & (PAGE_SI    
960                 map_to = map_from + user_len;     
961                                                   
962                 if (new)                          
963                         ret = ocfs2_map_page_b    
964                                                   
965                                                   
966                 else                              
967                         ret = ocfs2_map_page_b    
968                                                   
969                 if (ret) {                        
970                         mlog_errno(ret);          
971                         goto out;                 
972                 }                                 
973                                                   
974                 user_data_from = map_from;        
975                 user_data_to = map_to;            
976                 if (new) {                        
977                         map_from = cluster_sta    
978                         map_to = cluster_end;     
979                 }                                 
980         } else {                                  
981                 /*                                
982                  * If we haven't allocated the    
983                  * shouldn't be writing it out    
984                  * data. This is likely a math    
985                  */                               
986                 BUG_ON(!new);                     
987                                                   
988                 map_from = cluster_start;         
989                 map_to = cluster_end;             
990                                                   
991                 ret = ocfs2_map_page_blocks(pa    
992                                             cl    
993                 if (ret) {                        
994                         mlog_errno(ret);          
995                         goto out;                 
996                 }                                 
997         }                                         
998                                                   
999         /*                                        
1000          * Parts of newly allocated pages nee    
1001          *                                       
1002          * Above, we have also rewritten 'to'    
1003          * the rest of the function is concer    
1004          * range inside of a page needs to be    
1005          *                                       
1006          * We can skip this if the page is up    
1007          * been zero'd from being read in as     
1008          */                                      
1009         if (new && !PageUptodate(page))          
1010                 ocfs2_clear_page_regions(page    
1011                                          cpos    
1012                                                  
1013         flush_dcache_page(page);                 
1014                                                  
1015 out:                                             
1016         return ret;                              
1017 }                                                
1018                                                  
1019 /*                                               
1020  * This function will only grab one clusters     
1021  */                                              
1022 static int ocfs2_grab_pages_for_write(struct     
1023                                       struct     
1024                                       u32 cpo    
1025                                       unsigne    
1026                                       struct     
1027 {                                                
1028         int ret = 0, i;                          
1029         unsigned long start, target_index, en    
1030         struct inode *inode = mapping->host;     
1031         loff_t last_byte;                        
1032                                                  
1033         target_index = user_pos >> PAGE_SHIFT    
1034                                                  
1035         /*                                       
1036          * Figure out how many pages we'll be    
1037          * non allocating write, we just chan    
1038          * page. Otherwise, we'll need a whol    
1039          * writing past i_size, we only need     
1040          * last page of the write.               
1041          */                                      
1042         if (new) {                               
1043                 wc->w_num_pages = ocfs2_pages    
1044                 start = ocfs2_align_clusters_    
1045                 /*                               
1046                  * We need the index *past* t    
1047                  * touch.  This is the page p    
1048                  * i_size, whichever is great    
1049                  */                              
1050                 last_byte = max(user_pos + us    
1051                 BUG_ON(last_byte < 1);           
1052                 end_index = ((last_byte - 1)     
1053                 if ((start + wc->w_num_pages)    
1054                         wc->w_num_pages = end    
1055         } else {                                 
1056                 wc->w_num_pages = 1;             
1057                 start = target_index;            
1058         }                                        
1059         end_index = (user_pos + user_len - 1)    
1060                                                  
1061         for(i = 0; i < wc->w_num_pages; i++)     
1062                 index = start + i;               
1063                                                  
1064                 if (index >= target_index &&     
1065                     wc->w_type == OCFS2_WRITE    
1066                         /*                       
1067                          * ocfs2_pagemkwrite(    
1068                          * and wants us to di    
1069                          * passed in.            
1070                          */                      
1071                         lock_page(mmap_page);    
1072                                                  
1073                         /* Exit and let the c    
1074                         if (mmap_page->mappin    
1075                                 WARN_ON(mmap_    
1076                                 unlock_page(m    
1077                                 ret = -EAGAIN    
1078                                 goto out;        
1079                         }                        
1080                                                  
1081                         get_page(mmap_page);     
1082                         wc->w_pages[i] = mmap    
1083                         wc->w_target_locked =    
1084                 } else if (index >= target_in    
1085                            wc->w_type == OCFS    
1086                         /* Direct write has n    
1087                         wc->w_pages[i] = NULL    
1088                         continue;                
1089                 } else {                         
1090                         wc->w_pages[i] = find    
1091                                                  
1092                         if (!wc->w_pages[i])     
1093                                 ret = -ENOMEM    
1094                                 mlog_errno(re    
1095                                 goto out;        
1096                         }                        
1097                 }                                
1098                 wait_for_stable_page(wc->w_pa    
1099                                                  
1100                 if (index == target_index)       
1101                         wc->w_target_page = w    
1102         }                                        
1103 out:                                             
1104         if (ret)                                 
1105                 wc->w_target_locked = false;     
1106         return ret;                              
1107 }                                                
1108                                                  
1109 /*                                               
1110  * Prepare a single cluster for write one clu    
1111  */                                              
1112 static int ocfs2_write_cluster(struct address    
1113                                u32 *phys, uns    
1114                                unsigned int c    
1115                                unsigned int s    
1116                                struct ocfs2_a    
1117                                struct ocfs2_a    
1118                                struct ocfs2_w    
1119                                loff_t user_po    
1120 {                                                
1121         int ret, i;                              
1122         u64 p_blkno;                             
1123         struct inode *inode = mapping->host;     
1124         struct ocfs2_extent_tree et;             
1125         int bpc = ocfs2_clusters_to_blocks(in    
1126                                                  
1127         if (new) {                               
1128                 u32 tmp_pos;                     
1129                                                  
1130                 /*                               
1131                  * This is safe to call with     
1132                  * any additional semaphores     
1133                  */                              
1134                 tmp_pos = cpos;                  
1135                 ret = ocfs2_add_inode_data(OC    
1136                                            &t    
1137                                            wc    
1138                                            da    
1139                 /*                               
1140                  * This shouldn't happen beca    
1141                  * calculated the correct met    
1142                  * internal tree allocation c    
1143                  * transaction credits itself    
1144                  *                               
1145                  * If need be, we could handl    
1146                  * RESTART_TRANS here.           
1147                  */                              
1148                 mlog_bug_on_msg(ret == -EAGAI    
1149                                 "Inode %llu:     
1150                                 (unsigned lon    
1151                 if (ret < 0) {                   
1152                         mlog_errno(ret);         
1153                         goto out;                
1154                 }                                
1155         } else if (clear_unwritten) {            
1156                 ocfs2_init_dinode_extent_tree    
1157                                                  
1158                 ret = ocfs2_mark_extent_writt    
1159                                                  
1160                                                  
1161                 if (ret < 0) {                   
1162                         mlog_errno(ret);         
1163                         goto out;                
1164                 }                                
1165         }                                        
1166                                                  
1167         /*                                       
1168          * The only reason this should fail i    
1169          * find the extent added.                
1170          */                                      
1171         ret = ocfs2_get_clusters(inode, cpos,    
1172         if (ret < 0) {                           
1173                 mlog(ML_ERROR, "Get physical     
1174                             "at logical clust    
1175                             (unsigned long lo    
1176                 goto out;                        
1177         }                                        
1178                                                  
1179         BUG_ON(*phys == 0);                      
1180                                                  
1181         p_blkno = ocfs2_clusters_to_blocks(in    
1182         if (!should_zero)                        
1183                 p_blkno += (user_pos >> inode    
1184                                                  
1185         for(i = 0; i < wc->w_num_pages; i++)     
1186                 int tmpret;                      
1187                                                  
1188                 /* This is the direct io targ    
1189                 if (wc->w_pages[i] == NULL) {    
1190                         p_blkno++;               
1191                         continue;                
1192                 }                                
1193                                                  
1194                 tmpret = ocfs2_prepare_page_f    
1195                                                  
1196                                                  
1197                                                  
1198                 if (tmpret) {                    
1199                         mlog_errno(tmpret);      
1200                         if (ret == 0)            
1201                                 ret = tmpret;    
1202                 }                                
1203         }                                        
1204                                                  
1205         /*                                       
1206          * We only have cleanup to do in case    
1207          */                                      
1208         if (ret && new)                          
1209                 ocfs2_write_failure(inode, wc    
1210                                                  
1211 out:                                             
1212                                                  
1213         return ret;                              
1214 }                                                
1215                                                  
1216 static int ocfs2_write_cluster_by_desc(struct    
1217                                        struct    
1218                                        struct    
1219                                        struct    
1220                                        loff_t    
1221 {                                                
1222         int ret, i;                              
1223         loff_t cluster_off;                      
1224         unsigned int local_len = len;            
1225         struct ocfs2_write_cluster_desc *desc    
1226         struct ocfs2_super *osb = OCFS2_SB(ma    
1227                                                  
1228         for (i = 0; i < wc->w_clen; i++) {       
1229                 desc = &wc->w_desc[i];           
1230                                                  
1231                 /*                               
1232                  * We have to make sure that     
1233                  * doesn't extend past a sing    
1234                  */                              
1235                 local_len = len;                 
1236                 cluster_off = pos & (osb->s_c    
1237                 if ((cluster_off + local_len)    
1238                         local_len = osb->s_cl    
1239                                                  
1240                 ret = ocfs2_write_cluster(map    
1241                                           des    
1242                                           des    
1243                                           des    
1244                                           dat    
1245                                           wc,    
1246                 if (ret) {                       
1247                         mlog_errno(ret);         
1248                         goto out;                
1249                 }                                
1250                                                  
1251                 len -= local_len;                
1252                 pos += local_len;                
1253         }                                        
1254                                                  
1255         ret = 0;                                 
1256 out:                                             
1257         return ret;                              
1258 }                                                
1259                                                  
1260 /*                                               
1261  * ocfs2_write_end() wants to know which part    
1262  * should complete the write on. It's easiest    
1263  * time when a more complete view of the writ    
1264  */                                              
1265 static void ocfs2_set_target_boundaries(struc    
1266                                         struc    
1267                                         loff_    
1268 {                                                
1269         struct ocfs2_write_cluster_desc *desc    
1270                                                  
1271         wc->w_target_from = pos & (PAGE_SIZE     
1272         wc->w_target_to = wc->w_target_from +    
1273                                                  
1274         if (alloc == 0)                          
1275                 return;                          
1276                                                  
1277         /*                                       
1278          * Allocating write - we may have dif    
1279          * on page size and cluster size.        
1280          *                                       
1281          * NOTE: We can no longer compute one    
1282          * the actual write length and user p    
1283          * different.                            
1284          */                                      
1285                                                  
1286         if (wc->w_large_pages) {                 
1287                 /*                               
1288                  * We only care about the 1st    
1289                  * our range and whether they    
1290                  * value may be extended out     
1291                  * newly allocated cluster.      
1292                  */                              
1293                 desc = &wc->w_desc[0];           
1294                 if (desc->c_needs_zero)          
1295                         ocfs2_figure_cluster_    
1296                                                  
1297                                                  
1298                                                  
1299                                                  
1300                 desc = &wc->w_desc[wc->w_clen    
1301                 if (desc->c_needs_zero)          
1302                         ocfs2_figure_cluster_    
1303                                                  
1304                                                  
1305                                                  
1306         } else {                                 
1307                 wc->w_target_from = 0;           
1308                 wc->w_target_to = PAGE_SIZE;     
1309         }                                        
1310 }                                                
1311                                                  
1312 /*                                               
1313  * Check if this extent is marked UNWRITTEN b    
1314  * do the zero work. And should not to clear     
1315  * by the direct io procedure.                   
1316  * If this is a new extent that allocated by     
1317  * the ip_unwritten_list.                        
1318  */                                              
1319 static int ocfs2_unwritten_check(struct inode    
1320                                  struct ocfs2    
1321                                  struct ocfs2    
1322 {                                                
1323         struct ocfs2_inode_info *oi = OCFS2_I    
1324         struct ocfs2_unwritten_extent *ue = N    
1325         int ret = 0;                             
1326                                                  
1327         if (!desc->c_needs_zero)                 
1328                 return 0;                        
1329                                                  
1330 retry:                                           
1331         spin_lock(&oi->ip_lock);                 
1332         /* Needs not to zero no metter buffer    
1333          * the cluster is doing zero. And he     
1334          * cluster io finished. */               
1335         list_for_each_entry(ue, &oi->ip_unwri    
1336                 if (desc->c_cpos == ue->ue_cp    
1337                         BUG_ON(desc->c_new);     
1338                         desc->c_needs_zero =     
1339                         desc->c_clear_unwritt    
1340                         goto unlock;             
1341                 }                                
1342         }                                        
1343                                                  
1344         if (wc->w_type != OCFS2_WRITE_DIRECT)    
1345                 goto unlock;                     
1346                                                  
1347         if (new == NULL) {                       
1348                 spin_unlock(&oi->ip_lock);       
1349                 new = kmalloc(sizeof(struct o    
1350                              GFP_NOFS);          
1351                 if (new == NULL) {               
1352                         ret = -ENOMEM;           
1353                         goto out;                
1354                 }                                
1355                 goto retry;                      
1356         }                                        
1357         /* This direct write will doing zero.    
1358         new->ue_cpos = desc->c_cpos;             
1359         new->ue_phys = desc->c_phys;             
1360         desc->c_clear_unwritten = 0;             
1361         list_add_tail(&new->ue_ip_node, &oi->    
1362         list_add_tail(&new->ue_node, &wc->w_u    
1363         wc->w_unwritten_count++;                 
1364         new = NULL;                              
1365 unlock:                                          
1366         spin_unlock(&oi->ip_lock);               
1367 out:                                             
1368         kfree(new);                              
1369         return ret;                              
1370 }                                                
1371                                                  
1372 /*                                               
1373  * Populate each single-cluster write descrip    
1374  * with information about the i/o to be done.    
1375  *                                               
1376  * Returns the number of clusters that will h    
1377  * well as a worst case estimate of the numbe    
1378  * would have to be created during a write to    
1379  */                                              
1380 static int ocfs2_populate_write_desc(struct i    
1381                                      struct o    
1382                                      unsigned    
1383                                      unsigned    
1384 {                                                
1385         int ret;                                 
1386         struct ocfs2_write_cluster_desc *desc    
1387         unsigned int num_clusters = 0;           
1388         unsigned int ext_flags = 0;              
1389         u32 phys = 0;                            
1390         int i;                                   
1391                                                  
1392         *clusters_to_alloc = 0;                  
1393         *extents_to_split = 0;                   
1394                                                  
1395         for (i = 0; i < wc->w_clen; i++) {       
1396                 desc = &wc->w_desc[i];           
1397                 desc->c_cpos = wc->w_cpos + i    
1398                                                  
1399                 if (num_clusters == 0) {         
1400                         /*                       
1401                          * Need to look up th    
1402                          */                      
1403                         ret = ocfs2_get_clust    
1404                                                  
1405                         if (ret) {               
1406                                 mlog_errno(re    
1407                                 goto out;        
1408                         }                        
1409                                                  
1410                         /* We should already     
1411                         BUG_ON(ext_flags & OC    
1412                                                  
1413                         /*                       
1414                          * Assume worst case     
1415                          * the middle of the     
1416                          *                       
1417                          * We can assume that    
1418                          * left to right, in     
1419                          * insert code is sma    
1420                          * next splits into t    
1421                          */                      
1422                         if (ext_flags & OCFS2    
1423                                 *extents_to_s    
1424                 } else if (phys) {               
1425                         /*                       
1426                          * Only increment phy    
1427                          * a hole.               
1428                          */                      
1429                         phys++;                  
1430                 }                                
1431                                                  
1432                 /*                               
1433                  * If w_first_new_cpos is < U    
1434                  * file that got extended.  w    
1435                  * where the newly allocated     
1436                  * zero them.                    
1437                  */                              
1438                 if (desc->c_cpos >= wc->w_fir    
1439                         BUG_ON(phys == 0);       
1440                         desc->c_needs_zero =     
1441                 }                                
1442                                                  
1443                 desc->c_phys = phys;             
1444                 if (phys == 0) {                 
1445                         desc->c_new = 1;         
1446                         desc->c_needs_zero =     
1447                         desc->c_clear_unwritt    
1448                         *clusters_to_alloc =     
1449                 }                                
1450                                                  
1451                 if (ext_flags & OCFS2_EXT_UNW    
1452                         desc->c_clear_unwritt    
1453                         desc->c_needs_zero =     
1454                 }                                
1455                                                  
1456                 ret = ocfs2_unwritten_check(i    
1457                 if (ret) {                       
1458                         mlog_errno(ret);         
1459                         goto out;                
1460                 }                                
1461                                                  
1462                 num_clusters--;                  
1463         }                                        
1464                                                  
1465         ret = 0;                                 
1466 out:                                             
1467         return ret;                              
1468 }                                                
1469                                                  
1470 static int ocfs2_write_begin_inline(struct ad    
1471                                     struct in    
1472                                     struct oc    
1473 {                                                
1474         int ret;                                 
1475         struct ocfs2_super *osb = OCFS2_SB(in    
1476         struct page *page;                       
1477         handle_t *handle;                        
1478         struct ocfs2_dinode *di = (struct ocf    
1479                                                  
1480         handle = ocfs2_start_trans(osb, OCFS2    
1481         if (IS_ERR(handle)) {                    
1482                 ret = PTR_ERR(handle);           
1483                 mlog_errno(ret);                 
1484                 goto out;                        
1485         }                                        
1486                                                  
1487         page = find_or_create_page(mapping, 0    
1488         if (!page) {                             
1489                 ocfs2_commit_trans(osb, handl    
1490                 ret = -ENOMEM;                   
1491                 mlog_errno(ret);                 
1492                 goto out;                        
1493         }                                        
1494         /*                                       
1495          * If we don't set w_num_pages then t    
1496          * and freed on cleanup of the write     
1497          */                                      
1498         wc->w_pages[0] = wc->w_target_page =     
1499         wc->w_num_pages = 1;                     
1500                                                  
1501         ret = ocfs2_journal_access_di(handle,    
1502                                       OCFS2_J    
1503         if (ret) {                               
1504                 ocfs2_commit_trans(osb, handl    
1505                                                  
1506                 mlog_errno(ret);                 
1507                 goto out;                        
1508         }                                        
1509                                                  
1510         if (!(OCFS2_I(inode)->ip_dyn_features    
1511                 ocfs2_set_inode_data_inline(i    
1512                                                  
1513         if (!PageUptodate(page)) {               
1514                 ret = ocfs2_read_inline_data(    
1515                 if (ret) {                       
1516                         ocfs2_commit_trans(os    
1517                                                  
1518                         goto out;                
1519                 }                                
1520         }                                        
1521                                                  
1522         wc->w_handle = handle;                   
1523 out:                                             
1524         return ret;                              
1525 }                                                
1526                                                  
1527 int ocfs2_size_fits_inline_data(struct buffer    
1528 {                                                
1529         struct ocfs2_dinode *di = (struct ocf    
1530                                                  
1531         if (new_size <= le16_to_cpu(di->id2.i    
1532                 return 1;                        
1533         return 0;                                
1534 }                                                
1535                                                  
1536 static int ocfs2_try_to_write_inline_data(str    
1537                                           str    
1538                                           uns    
1539                                           str    
1540 {                                                
1541         int ret, written = 0;                    
1542         loff_t end = pos + len;                  
1543         struct ocfs2_inode_info *oi = OCFS2_I    
1544         struct ocfs2_dinode *di = NULL;          
1545                                                  
1546         trace_ocfs2_try_to_write_inline_data(    
1547                                                  
1548                                                  
1549                                                  
1550         /*                                       
1551          * Handle inodes which already have i    
1552          */                                      
1553         if (oi->ip_dyn_features & OCFS2_INLIN    
1554                 if (mmap_page == NULL &&         
1555                     ocfs2_size_fits_inline_da    
1556                         goto do_inline_write;    
1557                                                  
1558                 /*                               
1559                  * The write won't fit - we h    
1560                  * inline extent list now.       
1561                  */                              
1562                 ret = ocfs2_convert_inline_da    
1563                 if (ret)                         
1564                         mlog_errno(ret);         
1565                 goto out;                        
1566         }                                        
1567                                                  
1568         /*                                       
1569          * Check whether the inode can accept    
1570          */                                      
1571         if (oi->ip_clusters != 0 || i_size_re    
1572                 return 0;                        
1573                                                  
1574         /*                                       
1575          * Check whether the write can fit.      
1576          */                                      
1577         di = (struct ocfs2_dinode *)wc->w_di_    
1578         if (mmap_page ||                         
1579             end > ocfs2_max_inline_data_with_    
1580                 return 0;                        
1581                                                  
1582 do_inline_write:                                 
1583         ret = ocfs2_write_begin_inline(mappin    
1584         if (ret) {                               
1585                 mlog_errno(ret);                 
1586                 goto out;                        
1587         }                                        
1588                                                  
1589         /*                                       
1590          * This signals to the caller that th    
1591          * inline.                               
1592          */                                      
1593         written = 1;                             
1594 out:                                             
1595         return written ? written : ret;          
1596 }                                                
1597                                                  
1598 /*                                               
1599  * This function only does anything for file     
1600  * handle sparse files.                          
1601  *                                               
1602  * What we want to do here is fill in any hol    
1603  * of allocation and the end of our write. Th    
1604  * write path can treat it as an non-allocati    
1605  * special case code for sparse/nonsparse fil    
1606  */                                              
1607 static int ocfs2_expand_nonsparse_inode(struc    
1608                                         struc    
1609                                         loff_    
1610                                         struc    
1611 {                                                
1612         int ret;                                 
1613         loff_t newsize = pos + len;              
1614                                                  
1615         BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(in    
1616                                                  
1617         if (newsize <= i_size_read(inode))       
1618                 return 0;                        
1619                                                  
1620         ret = ocfs2_extend_no_holes(inode, di    
1621         if (ret)                                 
1622                 mlog_errno(ret);                 
1623                                                  
1624         /* There is no wc if this is call fro    
1625         if (wc)                                  
1626                 wc->w_first_new_cpos =           
1627                         ocfs2_clusters_for_by    
1628                                                  
1629         return ret;                              
1630 }                                                
1631                                                  
1632 static int ocfs2_zero_tail(struct inode *inod    
1633                            loff_t pos)           
1634 {                                                
1635         int ret = 0;                             
1636                                                  
1637         BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(i    
1638         if (pos > i_size_read(inode))            
1639                 ret = ocfs2_zero_extend(inode    
1640                                                  
1641         return ret;                              
1642 }                                                
1643                                                  
1644 int ocfs2_write_begin_nolock(struct address_s    
1645                              loff_t pos, unsi    
1646                              struct page **pa    
1647                              struct buffer_he    
1648 {                                                
1649         int ret, cluster_of_pages, credits =     
1650         unsigned int clusters_to_alloc, exten    
1651         struct ocfs2_write_ctxt *wc;             
1652         struct inode *inode = mapping->host;     
1653         struct ocfs2_super *osb = OCFS2_SB(in    
1654         struct ocfs2_dinode *di;                 
1655         struct ocfs2_alloc_context *data_ac =    
1656         struct ocfs2_alloc_context *meta_ac =    
1657         handle_t *handle;                        
1658         struct ocfs2_extent_tree et;             
1659         int try_free = 1, ret1;                  
1660                                                  
1661 try_again:                                       
1662         ret = ocfs2_alloc_write_ctxt(&wc, osb    
1663         if (ret) {                               
1664                 mlog_errno(ret);                 
1665                 return ret;                      
1666         }                                        
1667                                                  
1668         if (ocfs2_supports_inline_data(osb))     
1669                 ret = ocfs2_try_to_write_inli    
1670                                                  
1671                 if (ret == 1) {                  
1672                         ret = 0;                 
1673                         goto success;            
1674                 }                                
1675                 if (ret < 0) {                   
1676                         mlog_errno(ret);         
1677                         goto out;                
1678                 }                                
1679         }                                        
1680                                                  
1681         /* Direct io change i_size late, shou    
1682         if (type != OCFS2_WRITE_DIRECT) {        
1683                 if (ocfs2_sparse_alloc(osb))     
1684                         ret = ocfs2_zero_tail    
1685                 else                             
1686                         ret = ocfs2_expand_no    
1687                                                  
1688                 if (ret) {                       
1689                         mlog_errno(ret);         
1690                         goto out;                
1691                 }                                
1692         }                                        
1693                                                  
1694         ret = ocfs2_check_range_for_refcount(    
1695         if (ret < 0) {                           
1696                 mlog_errno(ret);                 
1697                 goto out;                        
1698         } else if (ret == 1) {                   
1699                 clusters_need = wc->w_clen;      
1700                 ret = ocfs2_refcount_cow(inod    
1701                                          wc->    
1702                 if (ret) {                       
1703                         mlog_errno(ret);         
1704                         goto out;                
1705                 }                                
1706         }                                        
1707                                                  
1708         ret = ocfs2_populate_write_desc(inode    
1709                                         &exte    
1710         if (ret) {                               
1711                 mlog_errno(ret);                 
1712                 goto out;                        
1713         }                                        
1714         clusters_need += clusters_to_alloc;      
1715                                                  
1716         di = (struct ocfs2_dinode *)wc->w_di_    
1717                                                  
1718         trace_ocfs2_write_begin_nolock(          
1719                         (unsigned long long)O    
1720                         (long long)i_size_rea    
1721                         le32_to_cpu(di->i_clu    
1722                         pos, len, type, mmap_    
1723                         clusters_to_alloc, ex    
1724                                                  
1725         /*                                       
1726          * We set w_target_from, w_target_to     
1727          * ocfs2_write_end() knows which rang    
1728          * write out. An allocation requires     
1729          * cluster range.                        
1730          */                                      
1731         if (clusters_to_alloc || extents_to_s    
1732                 /*                               
1733                  * XXX: We are stretching the    
1734                  * ocfs2_lock_allocators(). I    
1735                  * the work to be done.          
1736                  */                              
1737                 ocfs2_init_dinode_extent_tree    
1738                                                  
1739                 ret = ocfs2_lock_allocators(i    
1740                                             c    
1741                                             &    
1742                 if (ret) {                       
1743                         mlog_errno(ret);         
1744                         goto out;                
1745                 }                                
1746                                                  
1747                 if (data_ac)                     
1748                         data_ac->ac_resv = &O    
1749                                                  
1750                 credits = ocfs2_calc_extend_c    
1751                                                  
1752         } else if (type == OCFS2_WRITE_DIRECT    
1753                 /* direct write needs not to     
1754                 goto success;                    
1755                                                  
1756         /*                                       
1757          * We have to zero sparse allocated c    
1758          * and non-sparse clusters we just ex    
1759          * we know zeros will only be needed     
1760          */                                      
1761         if (wc->w_clen && (wc->w_desc[0].c_ne    
1762                            wc->w_desc[wc->w_c    
1763                 cluster_of_pages = 1;            
1764         else                                     
1765                 cluster_of_pages = 0;            
1766                                                  
1767         ocfs2_set_target_boundaries(osb, wc,     
1768                                                  
1769         handle = ocfs2_start_trans(osb, credi    
1770         if (IS_ERR(handle)) {                    
1771                 ret = PTR_ERR(handle);           
1772                 mlog_errno(ret);                 
1773                 goto out;                        
1774         }                                        
1775                                                  
1776         wc->w_handle = handle;                   
1777                                                  
1778         if (clusters_to_alloc) {                 
1779                 ret = dquot_alloc_space_nodir    
1780                         ocfs2_clusters_to_byt    
1781                 if (ret)                         
1782                         goto out_commit;         
1783         }                                        
1784                                                  
1785         ret = ocfs2_journal_access_di(handle,    
1786                                       OCFS2_J    
1787         if (ret) {                               
1788                 mlog_errno(ret);                 
1789                 goto out_quota;                  
1790         }                                        
1791                                                  
1792         /*                                       
1793          * Fill our page array first. That wa    
1794          * that we can zero and flush if we e    
1795          * extent.                               
1796          */                                      
1797         ret = ocfs2_grab_pages_for_write(mapp    
1798                                          clus    
1799         if (ret) {                               
1800                 /*                               
1801                  * ocfs2_grab_pages_for_write    
1802                  * the target page. In this c    
1803                  * page. This will trigger th    
1804                  * the operation.                
1805                  */                              
1806                 if (type == OCFS2_WRITE_MMAP     
1807                         BUG_ON(wc->w_target_p    
1808                         ret = 0;                 
1809                         goto out_quota;          
1810                 }                                
1811                                                  
1812                 mlog_errno(ret);                 
1813                 goto out_quota;                  
1814         }                                        
1815                                                  
1816         ret = ocfs2_write_cluster_by_desc(map    
1817                                           len    
1818         if (ret) {                               
1819                 mlog_errno(ret);                 
1820                 goto out_quota;                  
1821         }                                        
1822                                                  
1823         if (data_ac)                             
1824                 ocfs2_free_alloc_context(data    
1825         if (meta_ac)                             
1826                 ocfs2_free_alloc_context(meta    
1827                                                  
1828 success:                                         
1829         if (pagep)                               
1830                 *pagep = wc->w_target_page;      
1831         *fsdata = wc;                            
1832         return 0;                                
1833 out_quota:                                       
1834         if (clusters_to_alloc)                   
1835                 dquot_free_space(inode,          
1836                           ocfs2_clusters_to_b    
1837 out_commit:                                      
1838         ocfs2_commit_trans(osb, handle);         
1839                                                  
1840 out:                                             
1841         /*                                       
1842          * The mmapped page won't be unlocked    
1843          * even in case of error here like EN    
1844          * to unlock the target page manually    
1845          * retrying again on ENOSPC, or when     
1846          * to VM code.                           
1847          */                                      
1848         if (wc->w_target_locked)                 
1849                 unlock_page(mmap_page);          
1850                                                  
1851         ocfs2_free_write_ctxt(inode, wc);        
1852                                                  
1853         if (data_ac) {                           
1854                 ocfs2_free_alloc_context(data    
1855                 data_ac = NULL;                  
1856         }                                        
1857         if (meta_ac) {                           
1858                 ocfs2_free_alloc_context(meta    
1859                 meta_ac = NULL;                  
1860         }                                        
1861                                                  
1862         if (ret == -ENOSPC && try_free) {        
1863                 /*                               
1864                  * Try to free some truncate     
1865                  * clusters to allocate.         
1866                  */                              
1867                 try_free = 0;                    
1868                                                  
1869                 ret1 = ocfs2_try_to_free_trun    
1870                 if (ret1 == 1)                   
1871                         goto try_again;          
1872                                                  
1873                 if (ret1 < 0)                    
1874                         mlog_errno(ret1);        
1875         }                                        
1876                                                  
1877         return ret;                              
1878 }                                                
1879                                                  
1880 static int ocfs2_write_begin(struct file *fil    
1881                              loff_t pos, unsi    
1882                              struct page **pa    
1883 {                                                
1884         int ret;                                 
1885         struct buffer_head *di_bh = NULL;        
1886         struct inode *inode = mapping->host;     
1887                                                  
1888         ret = ocfs2_inode_lock(inode, &di_bh,    
1889         if (ret) {                               
1890                 mlog_errno(ret);                 
1891                 return ret;                      
1892         }                                        
1893                                                  
1894         /*                                       
1895          * Take alloc sem here to prevent con    
1896          * the mapping, zeroing and tree mani    
1897          * ocfs2_write() will be safe against    
1898          * should also serve to lock out allo    
1899          * writeable region.                     
1900          */                                      
1901         down_write(&OCFS2_I(inode)->ip_alloc_    
1902                                                  
1903         ret = ocfs2_write_begin_nolock(mappin    
1904                                        pagep,    
1905         if (ret) {                               
1906                 mlog_errno(ret);                 
1907                 goto out_fail;                   
1908         }                                        
1909                                                  
1910         brelse(di_bh);                           
1911                                                  
1912         return 0;                                
1913                                                  
1914 out_fail:                                        
1915         up_write(&OCFS2_I(inode)->ip_alloc_se    
1916                                                  
1917         brelse(di_bh);                           
1918         ocfs2_inode_unlock(inode, 1);            
1919                                                  
1920         return ret;                              
1921 }                                                
1922                                                  
1923 static void ocfs2_write_end_inline(struct ino    
1924                                    unsigned l    
1925                                    struct ocf    
1926                                    struct ocf    
1927 {                                                
1928         void *kaddr;                             
1929                                                  
1930         if (unlikely(*copied < len)) {           
1931                 if (!PageUptodate(wc->w_targe    
1932                         *copied = 0;             
1933                         return;                  
1934                 }                                
1935         }                                        
1936                                                  
1937         kaddr = kmap_atomic(wc->w_target_page    
1938         memcpy(di->id2.i_data.id_data + pos,     
1939         kunmap_atomic(kaddr);                    
1940                                                  
1941         trace_ocfs2_write_end_inline(            
1942              (unsigned long long)OCFS2_I(inod    
1943              (unsigned long long)pos, *copied    
1944              le16_to_cpu(di->id2.i_data.id_co    
1945              le16_to_cpu(di->i_dyn_features))    
1946 }                                                
1947                                                  
1948 int ocfs2_write_end_nolock(struct address_spa    
1949                            loff_t pos, unsign    
1950 {                                                
1951         int i, ret;                              
1952         unsigned from, to, start = pos & (PAG    
1953         struct inode *inode = mapping->host;     
1954         struct ocfs2_super *osb = OCFS2_SB(in    
1955         struct ocfs2_write_ctxt *wc = fsdata;    
1956         struct ocfs2_dinode *di = (struct ocf    
1957         handle_t *handle = wc->w_handle;         
1958         struct page *tmppage;                    
1959                                                  
1960         BUG_ON(!list_empty(&wc->w_unwritten_l    
1961                                                  
1962         if (handle) {                            
1963                 ret = ocfs2_journal_access_di    
1964                                 wc->w_di_bh,     
1965                 if (ret) {                       
1966                         copied = ret;            
1967                         mlog_errno(ret);         
1968                         goto out;                
1969                 }                                
1970         }                                        
1971                                                  
1972         if (OCFS2_I(inode)->ip_dyn_features &    
1973                 ocfs2_write_end_inline(inode,    
1974                 goto out_write_size;             
1975         }                                        
1976                                                  
1977         if (unlikely(copied < len) && wc->w_t    
1978                 loff_t new_isize;                
1979                                                  
1980                 if (!PageUptodate(wc->w_targe    
1981                         copied = 0;              
1982                                                  
1983                 new_isize = max_t(loff_t, i_s    
1984                 if (new_isize > page_offset(w    
1985                         ocfs2_zero_new_buffer    
1986                                                  
1987                 else {                           
1988                         /*                       
1989                          * When page is fully    
1990                          * failed), do not bo    
1991                          * it instead so that    
1992                          * put page & buffer     
1993                          * state.                
1994                          */                      
1995                         block_invalidate_foli    
1996                                                  
1997                 }                                
1998         }                                        
1999         if (wc->w_target_page)                   
2000                 flush_dcache_page(wc->w_targe    
2001                                                  
2002         for(i = 0; i < wc->w_num_pages; i++)     
2003                 tmppage = wc->w_pages[i];        
2004                                                  
2005                 /* This is the direct io targ    
2006                 if (tmppage == NULL)             
2007                         continue;                
2008                                                  
2009                 if (tmppage == wc->w_target_p    
2010                         from = wc->w_target_f    
2011                         to = wc->w_target_to;    
2012                                                  
2013                         BUG_ON(from > PAGE_SI    
2014                                to > PAGE_SIZE    
2015                                to < from);       
2016                 } else {                         
2017                         /*                       
2018                          * Pages adjacent to     
2019                          * a hole-filling wri    
2020                          * to flush their ent    
2021                          */                      
2022                         from = 0;                
2023                         to = PAGE_SIZE;          
2024                 }                                
2025                                                  
2026                 if (page_has_buffers(tmppage)    
2027                         if (handle && ocfs2_s    
2028                                 loff_t start_    
2029                                         ((lof    
2030                                         from;    
2031                                 loff_t length    
2032                                 ocfs2_jbd2_in    
2033                                                  
2034                         }                        
2035                         block_commit_write(tm    
2036                 }                                
2037         }                                        
2038                                                  
2039 out_write_size:                                  
2040         /* Direct io do not update i_size her    
2041         if (wc->w_type != OCFS2_WRITE_DIRECT)    
2042                 pos += copied;                   
2043                 if (pos > i_size_read(inode))    
2044                         i_size_write(inode, p    
2045                         mark_inode_dirty(inod    
2046                 }                                
2047                 inode->i_blocks = ocfs2_inode    
2048                 di->i_size = cpu_to_le64((u64    
2049                 inode_set_mtime_to_ts(inode,     
2050                 di->i_mtime = di->i_ctime = c    
2051                 di->i_mtime_nsec = di->i_ctim    
2052                 if (handle)                      
2053                         ocfs2_update_inode_fs    
2054         }                                        
2055         if (handle)                              
2056                 ocfs2_journal_dirty(handle, w    
2057                                                  
2058 out:                                             
2059         /* unlock pages before dealloc since     
2060          * lock, or it will cause a deadlock     
2061          * this lock and will ask for the pag    
2062          * put it here to preserve the unlock    
2063          */                                      
2064         ocfs2_unlock_pages(wc);                  
2065                                                  
2066         if (handle)                              
2067                 ocfs2_commit_trans(osb, handl    
2068                                                  
2069         ocfs2_run_deallocs(osb, &wc->w_deallo    
2070                                                  
2071         brelse(wc->w_di_bh);                     
2072         kfree(wc);                               
2073                                                  
2074         return copied;                           
2075 }                                                
2076                                                  
2077 static int ocfs2_write_end(struct file *file,    
2078                            loff_t pos, unsign    
2079                            struct page *page,    
2080 {                                                
2081         int ret;                                 
2082         struct inode *inode = mapping->host;     
2083                                                  
2084         ret = ocfs2_write_end_nolock(mapping,    
2085                                                  
2086         up_write(&OCFS2_I(inode)->ip_alloc_se    
2087         ocfs2_inode_unlock(inode, 1);            
2088                                                  
2089         return ret;                              
2090 }                                                
2091                                                  
2092 struct ocfs2_dio_write_ctxt {                    
2093         struct list_head        dw_zero_list;    
2094         unsigned                dw_zero_count    
2095         int                     dw_orphaned;     
2096         pid_t                   dw_writer_pid    
2097 };                                               
2098                                                  
2099 static struct ocfs2_dio_write_ctxt *             
2100 ocfs2_dio_alloc_write_ctx(struct buffer_head     
2101 {                                                
2102         struct ocfs2_dio_write_ctxt *dwc = NU    
2103                                                  
2104         if (bh->b_private)                       
2105                 return bh->b_private;            
2106                                                  
2107         dwc = kmalloc(sizeof(struct ocfs2_dio    
2108         if (dwc == NULL)                         
2109                 return NULL;                     
2110         INIT_LIST_HEAD(&dwc->dw_zero_list);      
2111         dwc->dw_zero_count = 0;                  
2112         dwc->dw_orphaned = 0;                    
2113         dwc->dw_writer_pid = task_pid_nr(curr    
2114         bh->b_private = dwc;                     
2115         *alloc = 1;                              
2116                                                  
2117         return dwc;                              
2118 }                                                
2119                                                  
2120 static void ocfs2_dio_free_write_ctx(struct i    
2121                                      struct o    
2122 {                                                
2123         ocfs2_free_unwritten_list(inode, &dwc    
2124         kfree(dwc);                              
2125 }                                                
2126                                                  
2127 /*                                               
2128  * TODO: Make this into a generic get_blocks     
2129  *                                               
2130  * From do_direct_io in direct-io.c:             
2131  *  "So what we do is to permit the ->get_blo    
2132  *   bh.b_size with the size of IO which is p    
2133  *   this i_blkbits."                            
2134  *                                               
2135  * This function is called directly from get_    
2136  *                                               
2137  * called like this: dio->get_blocks(dio->ino    
2138  *                                      fs_co    
2139  */                                              
2140 static int ocfs2_dio_wr_get_block(struct inod    
2141                                struct buffer_    
2142 {                                                
2143         struct ocfs2_super *osb = OCFS2_SB(in    
2144         struct ocfs2_inode_info *oi = OCFS2_I    
2145         struct ocfs2_write_ctxt *wc;             
2146         struct ocfs2_write_cluster_desc *desc    
2147         struct ocfs2_dio_write_ctxt *dwc = NU    
2148         struct buffer_head *di_bh = NULL;        
2149         u64 p_blkno;                             
2150         unsigned int i_blkbits = inode->i_sb-    
2151         loff_t pos = iblock << i_blkbits;        
2152         sector_t endblk = (i_size_read(inode)    
2153         unsigned len, total_len = bh_result->    
2154         int ret = 0, first_get_block = 0;        
2155                                                  
2156         len = osb->s_clustersize - (pos & (os    
2157         len = min(total_len, len);               
2158                                                  
2159         /*                                       
2160          * bh_result->b_size is count in get_    
2161          * "pos" and "end", we need map twice    
2162          * 1. area in file size, not set NEW;    
2163          * 2. area out file size, set  NEW.      
2164          *                                       
2165          *                 iblock    endblk      
2166          * |--------|---------|---------|----    
2167          * |<-------area in file------->|        
2168          */                                      
2169                                                  
2170         if ((iblock <= endblk) &&                
2171             ((iblock + ((len - 1) >> i_blkbit    
2172                 len = (endblk - iblock + 1) <    
2173                                                  
2174         mlog(0, "get block of %lu at %llu:%u     
2175                         inode->i_ino, pos, le    
2176                                                  
2177         /*                                       
2178          * Because we need to change file siz    
2179          * we may need to add it to orphan di    
2180          * while file size will be changed.      
2181          */                                      
2182         if (pos + total_len <= i_size_read(in    
2183                                                  
2184                 /* This is the fast path for     
2185                 ret = ocfs2_lock_get_block(in    
2186                 if (buffer_mapped(bh_result)     
2187                     !buffer_new(bh_result) &&    
2188                     ret == 0)                    
2189                         goto out;                
2190                                                  
2191                 /* Clear state set by ocfs2_g    
2192                 bh_result->b_state = 0;          
2193         }                                        
2194                                                  
2195         dwc = ocfs2_dio_alloc_write_ctx(bh_re    
2196         if (unlikely(dwc == NULL)) {             
2197                 ret = -ENOMEM;                   
2198                 mlog_errno(ret);                 
2199                 goto out;                        
2200         }                                        
2201                                                  
2202         if (ocfs2_clusters_for_bytes(inode->i    
2203             ocfs2_clusters_for_bytes(inode->i    
2204             !dwc->dw_orphaned) {                 
2205                 /*                               
2206                  * when we are going to alloc    
2207                  * inode to orphan dir, so we    
2208                  * system crashed during writ    
2209                  */                              
2210                 ret = ocfs2_add_inode_to_orph    
2211                 if (ret < 0) {                   
2212                         mlog_errno(ret);         
2213                         goto out;                
2214                 }                                
2215                 dwc->dw_orphaned = 1;            
2216         }                                        
2217                                                  
2218         ret = ocfs2_inode_lock(inode, &di_bh,    
2219         if (ret) {                               
2220                 mlog_errno(ret);                 
2221                 goto out;                        
2222         }                                        
2223                                                  
2224         down_write(&oi->ip_alloc_sem);           
2225                                                  
2226         if (first_get_block) {                   
2227                 if (ocfs2_sparse_alloc(osb))     
2228                         ret = ocfs2_zero_tail    
2229                 else                             
2230                         ret = ocfs2_expand_no    
2231                                                  
2232                 if (ret < 0) {                   
2233                         mlog_errno(ret);         
2234                         goto unlock;             
2235                 }                                
2236         }                                        
2237                                                  
2238         ret = ocfs2_write_begin_nolock(inode-    
2239                                        OCFS2_    
2240                                        (void     
2241         if (ret) {                               
2242                 mlog_errno(ret);                 
2243                 goto unlock;                     
2244         }                                        
2245                                                  
2246         desc = &wc->w_desc[0];                   
2247                                                  
2248         p_blkno = ocfs2_clusters_to_blocks(in    
2249         BUG_ON(p_blkno == 0);                    
2250         p_blkno += iblock & (u64)(ocfs2_clust    
2251                                                  
2252         map_bh(bh_result, inode->i_sb, p_blkn    
2253         bh_result->b_size = len;                 
2254         if (desc->c_needs_zero)                  
2255                 set_buffer_new(bh_result);       
2256                                                  
2257         if (iblock > endblk)                     
2258                 set_buffer_new(bh_result);       
2259                                                  
2260         /* May sleep in end_io. It should not    
2261          * it to dio work queue. */              
2262         set_buffer_defer_completion(bh_result    
2263                                                  
2264         if (!list_empty(&wc->w_unwritten_list    
2265                 struct ocfs2_unwritten_extent    
2266                                                  
2267                 ue = list_first_entry(&wc->w_    
2268                                       struct     
2269                                       ue_node    
2270                 BUG_ON(ue->ue_cpos != desc->c    
2271                 /* The physical address may b    
2272                 ue->ue_phys = desc->c_phys;      
2273                                                  
2274                 list_splice_tail_init(&wc->w_    
2275                 dwc->dw_zero_count += wc->w_u    
2276         }                                        
2277                                                  
2278         ret = ocfs2_write_end_nolock(inode->i    
2279         BUG_ON(ret != len);                      
2280         ret = 0;                                 
2281 unlock:                                          
2282         up_write(&oi->ip_alloc_sem);             
2283         ocfs2_inode_unlock(inode, 1);            
2284         brelse(di_bh);                           
2285 out:                                             
2286         return ret;                              
2287 }                                                
2288                                                  
2289 static int ocfs2_dio_end_io_write(struct inod    
2290                                   struct ocfs    
2291                                   loff_t offs    
2292                                   ssize_t byt    
2293 {                                                
2294         struct ocfs2_cached_dealloc_ctxt deal    
2295         struct ocfs2_extent_tree et;             
2296         struct ocfs2_super *osb = OCFS2_SB(in    
2297         struct ocfs2_inode_info *oi = OCFS2_I    
2298         struct ocfs2_unwritten_extent *ue = N    
2299         struct buffer_head *di_bh = NULL;        
2300         struct ocfs2_dinode *di;                 
2301         struct ocfs2_alloc_context *data_ac =    
2302         struct ocfs2_alloc_context *meta_ac =    
2303         handle_t *handle = NULL;                 
2304         loff_t end = offset + bytes;             
2305         int ret = 0, credits = 0;                
2306                                                  
2307         ocfs2_init_dealloc_ctxt(&dealloc);       
2308                                                  
2309         /* We do clear unwritten, delete orph    
2310          * of these happen, we can skip all t    
2311         if (list_empty(&dwc->dw_zero_list) &&    
2312             end <= i_size_read(inode) &&         
2313             !dwc->dw_orphaned)                   
2314                 goto out;                        
2315                                                  
2316         ret = ocfs2_inode_lock(inode, &di_bh,    
2317         if (ret < 0) {                           
2318                 mlog_errno(ret);                 
2319                 goto out;                        
2320         }                                        
2321                                                  
2322         down_write(&oi->ip_alloc_sem);           
2323                                                  
2324         /* Delete orphan before acquire i_rws    
2325         if (dwc->dw_orphaned) {                  
2326                 BUG_ON(dwc->dw_writer_pid !=     
2327                                                  
2328                 end = end > i_size_read(inode    
2329                                                  
2330                 ret = ocfs2_del_inode_from_or    
2331                                 !!end, end);     
2332                 if (ret < 0)                     
2333                         mlog_errno(ret);         
2334         }                                        
2335                                                  
2336         di = (struct ocfs2_dinode *)di_bh->b_    
2337                                                  
2338         ocfs2_init_dinode_extent_tree(&et, IN    
2339                                                  
2340         /* Attach dealloc with extent tree in    
2341          * which are already unlinked from cu    
2342          * rotation and merging.                 
2343          */                                      
2344         et.et_dealloc = &dealloc;                
2345                                                  
2346         ret = ocfs2_lock_allocators(inode, &e    
2347                                     &data_ac,    
2348         if (ret) {                               
2349                 mlog_errno(ret);                 
2350                 goto unlock;                     
2351         }                                        
2352                                                  
2353         credits = ocfs2_calc_extend_credits(i    
2354                                                  
2355         handle = ocfs2_start_trans(osb, credi    
2356         if (IS_ERR(handle)) {                    
2357                 ret = PTR_ERR(handle);           
2358                 mlog_errno(ret);                 
2359                 goto unlock;                     
2360         }                                        
2361         ret = ocfs2_journal_access_di(handle,    
2362                                       OCFS2_J    
2363         if (ret) {                               
2364                 mlog_errno(ret);                 
2365                 goto commit;                     
2366         }                                        
2367                                                  
2368         list_for_each_entry(ue, &dwc->dw_zero    
2369                 ret = ocfs2_assure_trans_cred    
2370                 if (ret < 0) {                   
2371                         mlog_errno(ret);         
2372                         break;                   
2373                 }                                
2374                 ret = ocfs2_mark_extent_writt    
2375                                                  
2376                                                  
2377                                                  
2378                 if (ret < 0) {                   
2379                         mlog_errno(ret);         
2380                         break;                   
2381                 }                                
2382         }                                        
2383                                                  
2384         if (end > i_size_read(inode)) {          
2385                 ret = ocfs2_set_inode_size(ha    
2386                 if (ret < 0)                     
2387                         mlog_errno(ret);         
2388         }                                        
2389 commit:                                          
2390         ocfs2_commit_trans(osb, handle);         
2391 unlock:                                          
2392         up_write(&oi->ip_alloc_sem);             
2393         ocfs2_inode_unlock(inode, 1);            
2394         brelse(di_bh);                           
2395 out:                                             
2396         if (data_ac)                             
2397                 ocfs2_free_alloc_context(data    
2398         if (meta_ac)                             
2399                 ocfs2_free_alloc_context(meta    
2400         ocfs2_run_deallocs(osb, &dealloc);       
2401         ocfs2_dio_free_write_ctx(inode, dwc);    
2402                                                  
2403         return ret;                              
2404 }                                                
2405                                                  
2406 /*                                               
2407  * ocfs2_dio_end_io is called by the dio core    
2408  * particularly interested in the aio/dio cas    
2409  * to protect io on one node from truncation     
2410  */                                              
2411 static int ocfs2_dio_end_io(struct kiocb *ioc    
2412                             loff_t offset,       
2413                             ssize_t bytes,       
2414                             void *private)       
2415 {                                                
2416         struct inode *inode = file_inode(iocb    
2417         int level;                               
2418         int ret = 0;                             
2419                                                  
2420         /* this io's submitter should not hav    
2421         BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)    
2422                                                  
2423         if (bytes <= 0)                          
2424                 mlog_ratelimited(ML_ERROR, "D    
2425                                  (long long)b    
2426         if (private) {                           
2427                 if (bytes > 0)                   
2428                         ret = ocfs2_dio_end_i    
2429                                                  
2430                 else                             
2431                         ocfs2_dio_free_write_    
2432         }                                        
2433                                                  
2434         ocfs2_iocb_clear_rw_locked(iocb);        
2435                                                  
2436         level = ocfs2_iocb_rw_locked_level(io    
2437         ocfs2_rw_unlock(inode, level);           
2438         return ret;                              
2439 }                                                
2440                                                  
2441 static ssize_t ocfs2_direct_IO(struct kiocb *    
2442 {                                                
2443         struct file *file = iocb->ki_filp;       
2444         struct inode *inode = file->f_mapping    
2445         struct ocfs2_super *osb = OCFS2_SB(in    
2446         get_block_t *get_block;                  
2447                                                  
2448         /*                                       
2449          * Fallback to buffered I/O if we see    
2450          * extents.                              
2451          */                                      
2452         if (OCFS2_I(inode)->ip_dyn_features &    
2453                 return 0;                        
2454                                                  
2455         /* Fallback to buffered I/O if we do     
2456         if (iocb->ki_pos + iter->count > i_si    
2457             !ocfs2_supports_append_dio(osb))     
2458                 return 0;                        
2459                                                  
2460         if (iov_iter_rw(iter) == READ)           
2461                 get_block = ocfs2_lock_get_bl    
2462         else                                     
2463                 get_block = ocfs2_dio_wr_get_    
2464                                                  
2465         return __blockdev_direct_IO(iocb, ino    
2466                                     iter, get    
2467                                     ocfs2_dio    
2468 }                                                
2469                                                  
2470 const struct address_space_operations ocfs2_a    
2471         .dirty_folio            = block_dirty    
2472         .read_folio             = ocfs2_read_    
2473         .readahead              = ocfs2_reada    
2474         .writepages             = ocfs2_write    
2475         .write_begin            = ocfs2_write    
2476         .write_end              = ocfs2_write    
2477         .bmap                   = ocfs2_bmap,    
2478         .direct_IO              = ocfs2_direc    
2479         .invalidate_folio       = block_inval    
2480         .release_folio          = ocfs2_relea    
2481         .migrate_folio          = buffer_migr    
2482         .is_partially_uptodate  = block_is_pa    
2483         .error_remove_folio     = generic_err    
2484 };                                               
2485
~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.
TOMOYO Linux Cross Reference Linux/fs/ocfs2/aops.c

Diff markup

Differences between /fs/ocfs2/aops.c (Version linux-6.11-rc3) and /fs/ocfs2/aops.c (Version linux-2.4.37.11)

TOMOYO Linux Cross Reference
Linux/fs/ocfs2/aops.c