1 // SPDX-License-Identifier: GPL-2.0-only 1 2 /* 3 * Copyright (C) 2024, SUSE LLC 4 * 5 * Authors: Enzo Matsumiya <ematsumiya@suse.de 6 * 7 * This file implements I/O compression suppor 8 * See compress/ for implementation details of 9 * 10 * References: 11 * MS-SMB2 "3.1.4.4 Compressing the Message" 12 * MS-SMB2 "3.1.5.3 Decompressing the Chained 13 * MS-XCA - for details of the supported algor 14 */ 15 #include <linux/slab.h> 16 #include <linux/kernel.h> 17 #include <linux/uio.h> 18 #include <linux/sort.h> 19 20 #include "cifsglob.h" 21 #include "../common/smb2pdu.h" 22 #include "cifsproto.h" 23 #include "smb2proto.h" 24 25 #include "compress/lz77.h" 26 #include "compress.h" 27 28 /* 29 * The heuristic_*() functions below try to de 30 * 31 * Derived from fs/btrfs/compression.c, changi 32 * unused parts. 33 * 34 * Read that file for better and more detailed 35 * 36 * The algorithms are ran in a collected sampl 37 * The sample is formed of 2K reads in PAGE_SI 38 * 39 * Parsing the sample goes from "low-hanging f 40 * to "need more analysis" (likely uncompressi 41 */ 42 43 struct bucket { 44 unsigned int count; 45 }; 46 47 /** 48 * has_low_entropy() - Compute Shannon entropy 49 * @bkt: Bytes counts of the sample. 50 * @slen: Size of the sample. 51 * 52 * Return: true if the level (percentage of nu 53 * compress the data) is below the min 54 * 55 * Note: 56 * There _is_ an entropy level here that's > 6 57 * possibility of compression, but compressing 58 * resources that it's simply not worth it. 59 * 60 * Also Shannon entropy is the last computed h 61 * with uncertainty, just stay on the safe sid 62 */ 63 static bool has_low_entropy(struct bucket *bkt 64 { 65 const size_t threshold = 65, max_entro 66 size_t i, p, p2, len, sum = 0; 67 68 #define pow4(n) (n * n * n * n) 69 len = ilog2(pow4(slen)); 70 71 for (i = 0; i < 256 && bkt[i].count > 72 p = bkt[i].count; 73 p2 = ilog2(pow4(p)); 74 sum += p * (len - p2); 75 } 76 77 sum /= slen; 78 79 return ((sum * 100 / max_entropy) <= t 80 } 81 82 #define BYTE_DIST_BAD 0 83 #define BYTE_DIST_GOOD 1 84 #define BYTE_DIST_MAYBE 2 85 /** 86 * calc_byte_distribution() - Compute byte dis 87 * @bkt: Byte counts of the sample. 88 * @slen: Size of the sample. 89 * 90 * Return: 91 * BYTE_DIST_BAD: A "hard no" for compre 92 * the bytes (e.g. random 93 * BYTE_DIST_GOOD: High probability (norm 94 * compressible. 95 * BYTE_DIST_MAYBE: When computed byte dis 96 * grounds. has_low_entr 97 */ 98 static int calc_byte_distribution(struct bucke 99 { 100 const size_t low = 64, high = 200, thr 101 size_t sum = 0; 102 int i; 103 104 for (i = 0; i < low; i++) 105 sum += bkt[i].count; 106 107 if (sum > threshold) 108 return BYTE_DIST_BAD; 109 110 for (; i < high && bkt[i].count > 0; i 111 sum += bkt[i].count; 112 if (sum > threshold) 113 break; 114 } 115 116 if (i <= low) 117 return BYTE_DIST_GOOD; 118 119 if (i >= high) 120 return BYTE_DIST_BAD; 121 122 return BYTE_DIST_MAYBE; 123 } 124 125 static bool is_mostly_ascii(const struct bucke 126 { 127 size_t count = 0; 128 int i; 129 130 for (i = 0; i < 256; i++) 131 if (bkt[i].count > 0) 132 /* Too many non-ASCII 133 if (++count > 64) 134 return false; 135 136 return true; 137 } 138 139 static bool has_repeated_data(const u8 *sample 140 { 141 size_t s = len / 2; 142 143 return (!memcmp(&sample[0], &sample[s] 144 } 145 146 static int cmp_bkt(const void *_a, const void 147 { 148 const struct bucket *a = _a, *b = _b; 149 150 /* Reverse sort. */ 151 if (a->count > b->count) 152 return -1; 153 154 return 1; 155 } 156 157 /* 158 * TODO: 159 * Support other iter types, if required. 160 * Only ITER_XARRAY is supported for now. 161 */ 162 static int collect_sample(const struct iov_ite 163 { 164 struct folio *folios[16], *folio; 165 unsigned int nr, i, j, npages; 166 loff_t start = iter->xarray_start + it 167 pgoff_t last, index = start / PAGE_SIZ 168 size_t len, off, foff; 169 void *p; 170 int s = 0; 171 172 last = (start + max - 1) / PAGE_SIZE; 173 do { 174 nr = xa_extract(iter->xarray, 175 XA_PRESENT); 176 if (nr == 0) 177 return -EIO; 178 179 for (i = 0; i < nr; i++) { 180 folio = folios[i]; 181 npages = folio_nr_page 182 foff = start - folio_p 183 off = foff % PAGE_SIZE 184 185 for (j = foff / PAGE_S 186 size_t len2; 187 188 len = min_t(si 189 len2 = min_t(s 190 191 p = kmap_local 192 memcpy(&sample 193 kunmap_local(p 194 195 s += len2; 196 197 if (len2 < SZ_ 198 return 199 200 max -= len; 201 if (max <= 0) 202 return 203 204 start += len; 205 off = 0; 206 index++; 207 } 208 } 209 } while (nr == ARRAY_SIZE(folios)); 210 211 return s; 212 } 213 214 /** 215 * is_compressible() - Determines if a chunk o 216 * @data: Iterator containing uncompressed dat 217 * 218 * Return: true if @data is compressible, fals 219 * 220 * Tests shows that this function is quite rel 221 * matching close to 1:1 with the behaviour of 222 */ 223 static bool is_compressible(const struct iov_i 224 { 225 const size_t read_size = SZ_2K, bkt_si 226 struct bucket *bkt = NULL; 227 size_t len; 228 u8 *sample; 229 bool ret = false; 230 int i; 231 232 /* Preventive double check -- already 233 len = iov_iter_count(data); 234 if (unlikely(len < read_size)) 235 return ret; 236 237 if (len - read_size > max) 238 len = max; 239 240 sample = kvzalloc(len, GFP_KERNEL); 241 if (!sample) { 242 WARN_ON_ONCE(1); 243 244 return ret; 245 } 246 247 /* Sample 2K bytes per page of the unc 248 i = collect_sample(data, len, sample); 249 if (i <= 0) { 250 WARN_ON_ONCE(1); 251 252 goto out; 253 } 254 255 len = i; 256 ret = true; 257 258 if (has_repeated_data(sample, len)) 259 goto out; 260 261 bkt = kcalloc(bkt_size, sizeof(*bkt), 262 if (!bkt) { 263 WARN_ON_ONCE(1); 264 ret = false; 265 266 goto out; 267 } 268 269 for (i = 0; i < len; i++) 270 bkt[sample[i]].count++; 271 272 if (is_mostly_ascii(bkt)) 273 goto out; 274 275 /* Sort in descending order */ 276 sort(bkt, bkt_size, sizeof(*bkt), cmp_ 277 278 i = calc_byte_distribution(bkt, len); 279 if (i != BYTE_DIST_MAYBE) { 280 ret = !!i; 281 282 goto out; 283 } 284 285 ret = has_low_entropy(bkt, len); 286 out: 287 kvfree(sample); 288 kfree(bkt); 289 290 return ret; 291 } 292 293 bool should_compress(const struct cifs_tcon *t 294 { 295 const struct smb2_hdr *shdr = rq->rq_i 296 297 if (unlikely(!tcon || !tcon->ses || !t 298 return false; 299 300 if (!tcon->ses->server->compression.en 301 return false; 302 303 if (!(tcon->share_flags & SMB2_SHAREFL 304 return false; 305 306 if (shdr->Command == SMB2_WRITE) { 307 const struct smb2_write_req *w 308 309 if (le32_to_cpu(wreq->Length) 310 return false; 311 312 return is_compressible(&rq->rq 313 } 314 315 return (shdr->Command == SMB2_READ); 316 } 317 318 int smb_compress(struct TCP_Server_Info *serve 319 { 320 struct iov_iter iter; 321 u32 slen, dlen; 322 void *src, *dst = NULL; 323 int ret; 324 325 if (!server || !rq || !rq->rq_iov || ! 326 return -EINVAL; 327 328 if (rq->rq_iov->iov_len != sizeof(stru 329 return -EINVAL; 330 331 slen = iov_iter_count(&rq->rq_iter); 332 src = kvzalloc(slen, GFP_KERNEL); 333 if (!src) { 334 ret = -ENOMEM; 335 goto err_free; 336 } 337 338 /* Keep the original iter intact. */ 339 iter = rq->rq_iter; 340 341 if (!copy_from_iter_full(src, slen, &i 342 ret = -EIO; 343 goto err_free; 344 } 345 346 /* 347 * This is just overprovisioning, as t 348 * of @slen. 349 */ 350 dlen = slen; 351 dst = kvzalloc(dlen, GFP_KERNEL); 352 if (!dst) { 353 ret = -ENOMEM; 354 goto err_free; 355 } 356 357 ret = lz77_compress(src, slen, dst, &d 358 if (!ret) { 359 struct smb2_compression_hdr hd 360 struct smb_rqst comp_rq = { .r 361 struct kvec iov[3]; 362 363 hdr.ProtocolId = SMB2_COMPRESS 364 hdr.OriginalCompressedSegmentS 365 hdr.CompressionAlgorithm = SMB 366 hdr.Flags = SMB2_COMPRESSION_F 367 hdr.Offset = cpu_to_le32(rq->r 368 369 iov[0].iov_base = &hdr; 370 iov[0].iov_len = sizeof(hdr); 371 iov[1] = rq->rq_iov[0]; 372 iov[2].iov_base = dst; 373 iov[2].iov_len = dlen; 374 375 comp_rq.rq_iov = iov; 376 377 ret = send_fn(server, 1, &comp 378 } else if (ret == -EMSGSIZE || dlen >= 379 ret = send_fn(server, 1, rq); 380 } 381 err_free: 382 kvfree(dst); 383 kvfree(src); 384 385 return ret; 386 } 387
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.