~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/smb/client/compress.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /fs/smb/client/compress.c (Version linux-6.12-rc7) and /fs/smb/client/compress.c (Version linux-2.6.0)


  1 // SPDX-License-Identifier: GPL-2.0-only            1 
  2 /*                                                
  3  * Copyright (C) 2024, SUSE LLC                   
  4  *                                                
  5  * Authors: Enzo Matsumiya <ematsumiya@suse.de    
  6  *                                                
  7  * This file implements I/O compression suppor    
  8  * See compress/ for implementation details of    
  9  *                                                
 10  * References:                                    
 11  * MS-SMB2 "3.1.4.4 Compressing the Message"      
 12  * MS-SMB2 "3.1.5.3 Decompressing the Chained     
 13  * MS-XCA - for details of the supported algor    
 14  */                                               
 15 #include <linux/slab.h>                           
 16 #include <linux/kernel.h>                         
 17 #include <linux/uio.h>                            
 18 #include <linux/sort.h>                           
 19                                                   
 20 #include "cifsglob.h"                             
 21 #include "../common/smb2pdu.h"                    
 22 #include "cifsproto.h"                            
 23 #include "smb2proto.h"                            
 24                                                   
 25 #include "compress/lz77.h"                        
 26 #include "compress.h"                             
 27                                                   
 28 /*                                                
 29  * The heuristic_*() functions below try to de    
 30  *                                                
 31  * Derived from fs/btrfs/compression.c, changi    
 32  * unused parts.                                  
 33  *                                                
 34  * Read that file for better and more detailed    
 35  *                                                
 36  * The algorithms are ran in a collected sampl    
 37  * The sample is formed of 2K reads in PAGE_SI    
 38  *                                                
 39  * Parsing the sample goes from "low-hanging f    
 40  * to "need more analysis" (likely uncompressi    
 41  */                                               
 42                                                   
 43 struct bucket {                                   
 44         unsigned int count;                       
 45 };                                                
 46                                                   
 47 /**                                               
 48  * has_low_entropy() - Compute Shannon entropy    
 49  * @bkt:        Bytes counts of the sample.       
 50  * @slen:       Size of the sample.               
 51  *                                                
 52  * Return: true if the level (percentage of nu    
 53  *         compress the data) is below the min    
 54  *                                                
 55  * Note:                                          
 56  * There _is_ an entropy level here that's > 6    
 57  * possibility of compression, but compressing    
 58  * resources that it's simply not worth it.       
 59  *                                                
 60  * Also Shannon entropy is the last computed h    
 61  * with uncertainty, just stay on the safe sid    
 62  */                                               
 63 static bool has_low_entropy(struct bucket *bkt    
 64 {                                                 
 65         const size_t threshold = 65, max_entro    
 66         size_t i, p, p2, len, sum = 0;            
 67                                                   
 68 #define pow4(n) (n * n * n * n)                   
 69         len = ilog2(pow4(slen));                  
 70                                                   
 71         for (i = 0; i < 256 && bkt[i].count >     
 72                 p = bkt[i].count;                 
 73                 p2 = ilog2(pow4(p));              
 74                 sum += p * (len - p2);            
 75         }                                         
 76                                                   
 77         sum /= slen;                              
 78                                                   
 79         return ((sum * 100 / max_entropy) <= t    
 80 }                                                 
 81                                                   
 82 #define BYTE_DIST_BAD           0                 
 83 #define BYTE_DIST_GOOD          1                 
 84 #define BYTE_DIST_MAYBE         2                 
 85 /**                                               
 86  * calc_byte_distribution() - Compute byte dis    
 87  * @bkt:        Byte counts of the sample.        
 88  * @slen:       Size of the sample.               
 89  *                                                
 90  * Return:                                        
 91  * BYTE_DIST_BAD:       A "hard no" for compre    
 92  *                      the bytes (e.g. random    
 93  * BYTE_DIST_GOOD:      High probability (norm    
 94  *                      compressible.             
 95  * BYTE_DIST_MAYBE:     When computed byte dis    
 96  *                      grounds.  has_low_entr    
 97  */                                               
 98 static int calc_byte_distribution(struct bucke    
 99 {                                                 
100         const size_t low = 64, high = 200, thr    
101         size_t sum = 0;                           
102         int i;                                    
103                                                   
104         for (i = 0; i < low; i++)                 
105                 sum += bkt[i].count;              
106                                                   
107         if (sum > threshold)                      
108                 return BYTE_DIST_BAD;             
109                                                   
110         for (; i < high && bkt[i].count > 0; i    
111                 sum += bkt[i].count;              
112                 if (sum > threshold)              
113                         break;                    
114         }                                         
115                                                   
116         if (i <= low)                             
117                 return BYTE_DIST_GOOD;            
118                                                   
119         if (i >= high)                            
120                 return BYTE_DIST_BAD;             
121                                                   
122         return BYTE_DIST_MAYBE;                   
123 }                                                 
124                                                   
125 static bool is_mostly_ascii(const struct bucke    
126 {                                                 
127         size_t count = 0;                         
128         int i;                                    
129                                                   
130         for (i = 0; i < 256; i++)                 
131                 if (bkt[i].count > 0)             
132                         /* Too many non-ASCII     
133                         if (++count > 64)         
134                                 return false;     
135                                                   
136         return true;                              
137 }                                                 
138                                                   
139 static bool has_repeated_data(const u8 *sample    
140 {                                                 
141         size_t s = len / 2;                       
142                                                   
143         return (!memcmp(&sample[0], &sample[s]    
144 }                                                 
145                                                   
146 static int cmp_bkt(const void *_a, const void     
147 {                                                 
148         const struct bucket *a = _a, *b = _b;     
149                                                   
150         /* Reverse sort. */                       
151         if (a->count > b->count)                  
152                 return -1;                        
153                                                   
154         return 1;                                 
155 }                                                 
156                                                   
157 /*                                                
158  * TODO:                                          
159  * Support other iter types, if required.         
160  * Only ITER_XARRAY is supported for now.         
161  */                                               
162 static int collect_sample(const struct iov_ite    
163 {                                                 
164         struct folio *folios[16], *folio;         
165         unsigned int nr, i, j, npages;            
166         loff_t start = iter->xarray_start + it    
167         pgoff_t last, index = start / PAGE_SIZ    
168         size_t len, off, foff;                    
169         void *p;                                  
170         int s = 0;                                
171                                                   
172         last = (start + max - 1) / PAGE_SIZE;     
173         do {                                      
174                 nr = xa_extract(iter->xarray,     
175                                 XA_PRESENT);      
176                 if (nr == 0)                      
177                         return -EIO;              
178                                                   
179                 for (i = 0; i < nr; i++) {        
180                         folio = folios[i];        
181                         npages = folio_nr_page    
182                         foff = start - folio_p    
183                         off = foff % PAGE_SIZE    
184                                                   
185                         for (j = foff / PAGE_S    
186                                 size_t len2;      
187                                                   
188                                 len = min_t(si    
189                                 len2 = min_t(s    
190                                                   
191                                 p = kmap_local    
192                                 memcpy(&sample    
193                                 kunmap_local(p    
194                                                   
195                                 s += len2;        
196                                                   
197                                 if (len2 < SZ_    
198                                         return    
199                                                   
200                                 max -= len;       
201                                 if (max <= 0)     
202                                         return    
203                                                   
204                                 start += len;     
205                                 off = 0;          
206                                 index++;          
207                         }                         
208                 }                                 
209         } while (nr == ARRAY_SIZE(folios));       
210                                                   
211         return s;                                 
212 }                                                 
213                                                   
214 /**                                               
215  * is_compressible() - Determines if a chunk o    
216  * @data: Iterator containing uncompressed dat    
217  *                                                
218  * Return: true if @data is compressible, fals    
219  *                                                
220  * Tests shows that this function is quite rel    
221  * matching close to 1:1 with the behaviour of    
222  */                                               
223 static bool is_compressible(const struct iov_i    
224 {                                                 
225         const size_t read_size = SZ_2K, bkt_si    
226         struct bucket *bkt = NULL;                
227         size_t len;                               
228         u8 *sample;                               
229         bool ret = false;                         
230         int i;                                    
231                                                   
232         /* Preventive double check -- already     
233         len = iov_iter_count(data);               
234         if (unlikely(len < read_size))            
235                 return ret;                       
236                                                   
237         if (len - read_size > max)                
238                 len = max;                        
239                                                   
240         sample = kvzalloc(len, GFP_KERNEL);       
241         if (!sample) {                            
242                 WARN_ON_ONCE(1);                  
243                                                   
244                 return ret;                       
245         }                                         
246                                                   
247         /* Sample 2K bytes per page of the unc    
248         i = collect_sample(data, len, sample);    
249         if (i <= 0) {                             
250                 WARN_ON_ONCE(1);                  
251                                                   
252                 goto out;                         
253         }                                         
254                                                   
255         len = i;                                  
256         ret = true;                               
257                                                   
258         if (has_repeated_data(sample, len))       
259                 goto out;                         
260                                                   
261         bkt = kcalloc(bkt_size, sizeof(*bkt),     
262         if (!bkt) {                               
263                 WARN_ON_ONCE(1);                  
264                 ret = false;                      
265                                                   
266                 goto out;                         
267         }                                         
268                                                   
269         for (i = 0; i < len; i++)                 
270                 bkt[sample[i]].count++;           
271                                                   
272         if (is_mostly_ascii(bkt))                 
273                 goto out;                         
274                                                   
275         /* Sort in descending order */            
276         sort(bkt, bkt_size, sizeof(*bkt), cmp_    
277                                                   
278         i = calc_byte_distribution(bkt, len);     
279         if (i != BYTE_DIST_MAYBE) {               
280                 ret = !!i;                        
281                                                   
282                 goto out;                         
283         }                                         
284                                                   
285         ret = has_low_entropy(bkt, len);          
286 out:                                              
287         kvfree(sample);                           
288         kfree(bkt);                               
289                                                   
290         return ret;                               
291 }                                                 
292                                                   
293 bool should_compress(const struct cifs_tcon *t    
294 {                                                 
295         const struct smb2_hdr *shdr = rq->rq_i    
296                                                   
297         if (unlikely(!tcon || !tcon->ses || !t    
298                 return false;                     
299                                                   
300         if (!tcon->ses->server->compression.en    
301                 return false;                     
302                                                   
303         if (!(tcon->share_flags & SMB2_SHAREFL    
304                 return false;                     
305                                                   
306         if (shdr->Command == SMB2_WRITE) {        
307                 const struct smb2_write_req *w    
308                                                   
309                 if (le32_to_cpu(wreq->Length)     
310                         return false;             
311                                                   
312                 return is_compressible(&rq->rq    
313         }                                         
314                                                   
315         return (shdr->Command == SMB2_READ);      
316 }                                                 
317                                                   
318 int smb_compress(struct TCP_Server_Info *serve    
319 {                                                 
320         struct iov_iter iter;                     
321         u32 slen, dlen;                           
322         void *src, *dst = NULL;                   
323         int ret;                                  
324                                                   
325         if (!server || !rq || !rq->rq_iov || !    
326                 return -EINVAL;                   
327                                                   
328         if (rq->rq_iov->iov_len != sizeof(stru    
329                 return -EINVAL;                   
330                                                   
331         slen = iov_iter_count(&rq->rq_iter);      
332         src = kvzalloc(slen, GFP_KERNEL);         
333         if (!src) {                               
334                 ret = -ENOMEM;                    
335                 goto err_free;                    
336         }                                         
337                                                   
338         /* Keep the original iter intact. */      
339         iter = rq->rq_iter;                       
340                                                   
341         if (!copy_from_iter_full(src, slen, &i    
342                 ret = -EIO;                       
343                 goto err_free;                    
344         }                                         
345                                                   
346         /*                                        
347          * This is just overprovisioning, as t    
348          * of @slen.                              
349          */                                       
350         dlen = slen;                              
351         dst = kvzalloc(dlen, GFP_KERNEL);         
352         if (!dst) {                               
353                 ret = -ENOMEM;                    
354                 goto err_free;                    
355         }                                         
356                                                   
357         ret = lz77_compress(src, slen, dst, &d    
358         if (!ret) {                               
359                 struct smb2_compression_hdr hd    
360                 struct smb_rqst comp_rq = { .r    
361                 struct kvec iov[3];               
362                                                   
363                 hdr.ProtocolId = SMB2_COMPRESS    
364                 hdr.OriginalCompressedSegmentS    
365                 hdr.CompressionAlgorithm = SMB    
366                 hdr.Flags = SMB2_COMPRESSION_F    
367                 hdr.Offset = cpu_to_le32(rq->r    
368                                                   
369                 iov[0].iov_base = &hdr;           
370                 iov[0].iov_len = sizeof(hdr);     
371                 iov[1] = rq->rq_iov[0];           
372                 iov[2].iov_base = dst;            
373                 iov[2].iov_len = dlen;            
374                                                   
375                 comp_rq.rq_iov = iov;             
376                                                   
377                 ret = send_fn(server, 1, &comp    
378         } else if (ret == -EMSGSIZE || dlen >=    
379                 ret = send_fn(server, 1, rq);     
380         }                                         
381 err_free:                                         
382         kvfree(dst);                              
383         kvfree(src);                              
384                                                   
385         return ret;                               
386 }                                                 
387                                                   

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php