1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com> 4 * 5 * Scatterlist handling helpers. 6 */ 7 #include <linux/export.h> 8 #include <linux/slab.h> 9 #include <linux/scatterlist.h> 10 #include <linux/highmem.h> 11 #include <linux/kmemleak.h> 12 #include <linux/bvec.h> 13 #include <linux/uio.h> 14 15 /** 16 * sg_next - return the next scatterlist entry in a list 17 * @sg: The current sg entry 18 * 19 * Description: 20 * Usually the next entry will be @sg@ + 1, but if this sg element is part 21 * of a chained scatterlist, it could jump to the start of a new 22 * scatterlist array. 23 * 24 **/ 25 struct scatterlist *sg_next(struct scatterlist *sg) 26 { 27 if (sg_is_last(sg)) 28 return NULL; 29 30 sg++; 31 if (unlikely(sg_is_chain(sg))) 32 sg = sg_chain_ptr(sg); 33 34 return sg; 35 } 36 EXPORT_SYMBOL(sg_next); 37 38 /** 39 * sg_nents - return total count of entries in scatterlist 40 * @sg: The scatterlist 41 * 42 * Description: 43 * Allows to know how many entries are in sg, taking into account 44 * chaining as well 45 * 46 **/ 47 int sg_nents(struct scatterlist *sg) 48 { 49 int nents; 50 for (nents = 0; sg; sg = sg_next(sg)) 51 nents++; 52 return nents; 53 } 54 EXPORT_SYMBOL(sg_nents); 55 56 /** 57 * sg_nents_for_len - return total count of entries in scatterlist 58 * needed to satisfy the supplied length 59 * @sg: The scatterlist 60 * @len: The total required length 61 * 62 * Description: 63 * Determines the number of entries in sg that are required to meet 64 * the supplied length, taking into account chaining as well 65 * 66 * Returns: 67 * the number of sg entries needed, negative error on failure 68 * 69 **/ 70 int sg_nents_for_len(struct scatterlist *sg, u64 len) 71 { 72 int nents; 73 u64 total; 74 75 if (!len) 76 return 0; 77 78 for (nents = 0, total = 0; sg; sg = sg_next(sg)) { 79 nents++; 80 total += sg->length; 81 if (total >= len) 82 return nents; 83 } 84 85 return -EINVAL; 86 } 87 EXPORT_SYMBOL(sg_nents_for_len); 88 89 /** 90 * sg_last - return the last scatterlist entry in a list 91 * @sgl: First entry in the scatterlist 92 * @nents: Number of entries in the scatterlist 93 * 94 * Description: 95 * Should only be used casually, it (currently) scans the entire list 96 * to get the last entry. 97 * 98 * Note that the @sgl@ pointer passed in need not be the first one, 99 * the important bit is that @nents@ denotes the number of entries that 100 * exist from @sgl@. 101 * 102 **/ 103 struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) 104 { 105 struct scatterlist *sg, *ret = NULL; 106 unsigned int i; 107 108 for_each_sg(sgl, sg, nents, i) 109 ret = sg; 110 111 BUG_ON(!sg_is_last(ret)); 112 return ret; 113 } 114 EXPORT_SYMBOL(sg_last); 115 116 /** 117 * sg_init_table - Initialize SG table 118 * @sgl: The SG table 119 * @nents: Number of entries in table 120 * 121 * Notes: 122 * If this is part of a chained sg table, sg_mark_end() should be 123 * used only on the last table part. 124 * 125 **/ 126 void sg_init_table(struct scatterlist *sgl, unsigned int nents) 127 { 128 memset(sgl, 0, sizeof(*sgl) * nents); 129 sg_init_marker(sgl, nents); 130 } 131 EXPORT_SYMBOL(sg_init_table); 132 133 /** 134 * sg_init_one - Initialize a single entry sg list 135 * @sg: SG entry 136 * @buf: Virtual address for IO 137 * @buflen: IO length 138 * 139 **/ 140 void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen) 141 { 142 sg_init_table(sg, 1); 143 sg_set_buf(sg, buf, buflen); 144 } 145 EXPORT_SYMBOL(sg_init_one); 146 147 /* 148 * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree 149 * helpers. 150 */ 151 static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask) 152 { 153 if (nents == SG_MAX_SINGLE_ALLOC) { 154 /* 155 * Kmemleak doesn't track page allocations as they are not 156 * commonly used (in a raw form) for kernel data structures. 157 * As we chain together a list of pages and then a normal 158 * kmalloc (tracked by kmemleak), in order to for that last 159 * allocation not to become decoupled (and thus a 160 * false-positive) we need to inform kmemleak of all the 161 * intermediate allocations. 162 */ 163 void *ptr = (void *) __get_free_page(gfp_mask); 164 kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask); 165 return ptr; 166 } else 167 return kmalloc_array(nents, sizeof(struct scatterlist), 168 gfp_mask); 169 } 170 171 static void sg_kfree(struct scatterlist *sg, unsigned int nents) 172 { 173 if (nents == SG_MAX_SINGLE_ALLOC) { 174 kmemleak_free(sg); 175 free_page((unsigned long) sg); 176 } else 177 kfree(sg); 178 } 179 180 /** 181 * __sg_free_table - Free a previously mapped sg table 182 * @table: The sg table header to use 183 * @max_ents: The maximum number of entries per single scatterlist 184 * @nents_first_chunk: Number of entries int the (preallocated) first 185 * scatterlist chunk, 0 means no such preallocated first chunk 186 * @free_fn: Free function 187 * @num_ents: Number of entries in the table 188 * 189 * Description: 190 * Free an sg table previously allocated and setup with 191 * __sg_alloc_table(). The @max_ents value must be identical to 192 * that previously used with __sg_alloc_table(). 193 * 194 **/ 195 void __sg_free_table(struct sg_table *table, unsigned int max_ents, 196 unsigned int nents_first_chunk, sg_free_fn *free_fn, 197 unsigned int num_ents) 198 { 199 struct scatterlist *sgl, *next; 200 unsigned curr_max_ents = nents_first_chunk ?: max_ents; 201 202 if (unlikely(!table->sgl)) 203 return; 204 205 sgl = table->sgl; 206 while (num_ents) { 207 unsigned int alloc_size = num_ents; 208 unsigned int sg_size; 209 210 /* 211 * If we have more than max_ents segments left, 212 * then assign 'next' to the sg table after the current one. 213 * sg_size is then one less than alloc size, since the last 214 * element is the chain pointer. 215 */ 216 if (alloc_size > curr_max_ents) { 217 next = sg_chain_ptr(&sgl[curr_max_ents - 1]); 218 alloc_size = curr_max_ents; 219 sg_size = alloc_size - 1; 220 } else { 221 sg_size = alloc_size; 222 next = NULL; 223 } 224 225 num_ents -= sg_size; 226 if (nents_first_chunk) 227 nents_first_chunk = 0; 228 else 229 free_fn(sgl, alloc_size); 230 sgl = next; 231 curr_max_ents = max_ents; 232 } 233 234 table->sgl = NULL; 235 } 236 EXPORT_SYMBOL(__sg_free_table); 237 238 /** 239 * sg_free_append_table - Free a previously allocated append sg table. 240 * @table: The mapped sg append table header 241 * 242 **/ 243 void sg_free_append_table(struct sg_append_table *table) 244 { 245 __sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, 0, sg_kfree, 246 table->total_nents); 247 } 248 EXPORT_SYMBOL(sg_free_append_table); 249 250 251 /** 252 * sg_free_table - Free a previously allocated sg table 253 * @table: The mapped sg table header 254 * 255 **/ 256 void sg_free_table(struct sg_table *table) 257 { 258 __sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree, 259 table->orig_nents); 260 } 261 EXPORT_SYMBOL(sg_free_table); 262 263 /** 264 * __sg_alloc_table - Allocate and initialize an sg table with given allocator 265 * @table: The sg table header to use 266 * @nents: Number of entries in sg list 267 * @max_ents: The maximum number of entries the allocator returns per call 268 * @first_chunk: first SGL if preallocated (may be %NULL) 269 * @nents_first_chunk: Number of entries in the (preallocated) first 270 * scatterlist chunk, 0 means no such preallocated chunk provided by user 271 * @gfp_mask: GFP allocation mask 272 * @alloc_fn: Allocator to use 273 * 274 * Description: 275 * This function returns a @table @nents long. The allocator is 276 * defined to return scatterlist chunks of maximum size @max_ents. 277 * Thus if @nents is bigger than @max_ents, the scatterlists will be 278 * chained in units of @max_ents. 279 * 280 * Notes: 281 * If this function returns non-0 (eg failure), the caller must call 282 * __sg_free_table() to cleanup any leftover allocations. 283 * 284 **/ 285 int __sg_alloc_table(struct sg_table *table, unsigned int nents, 286 unsigned int max_ents, struct scatterlist *first_chunk, 287 unsigned int nents_first_chunk, gfp_t gfp_mask, 288 sg_alloc_fn *alloc_fn) 289 { 290 struct scatterlist *sg, *prv; 291 unsigned int left; 292 unsigned curr_max_ents = nents_first_chunk ?: max_ents; 293 unsigned prv_max_ents; 294 295 memset(table, 0, sizeof(*table)); 296 297 if (nents == 0) 298 return -EINVAL; 299 #ifdef CONFIG_ARCH_NO_SG_CHAIN 300 if (WARN_ON_ONCE(nents > max_ents)) 301 return -EINVAL; 302 #endif 303 304 left = nents; 305 prv = NULL; 306 do { 307 unsigned int sg_size, alloc_size = left; 308 309 if (alloc_size > curr_max_ents) { 310 alloc_size = curr_max_ents; 311 sg_size = alloc_size - 1; 312 } else 313 sg_size = alloc_size; 314 315 left -= sg_size; 316 317 if (first_chunk) { 318 sg = first_chunk; 319 first_chunk = NULL; 320 } else { 321 sg = alloc_fn(alloc_size, gfp_mask); 322 } 323 if (unlikely(!sg)) { 324 /* 325 * Adjust entry count to reflect that the last 326 * entry of the previous table won't be used for 327 * linkage. Without this, sg_kfree() may get 328 * confused. 329 */ 330 if (prv) 331 table->nents = ++table->orig_nents; 332 333 return -ENOMEM; 334 } 335 336 sg_init_table(sg, alloc_size); 337 table->nents = table->orig_nents += sg_size; 338 339 /* 340 * If this is the first mapping, assign the sg table header. 341 * If this is not the first mapping, chain previous part. 342 */ 343 if (prv) 344 sg_chain(prv, prv_max_ents, sg); 345 else 346 table->sgl = sg; 347 348 /* 349 * If no more entries after this one, mark the end 350 */ 351 if (!left) 352 sg_mark_end(&sg[sg_size - 1]); 353 354 prv = sg; 355 prv_max_ents = curr_max_ents; 356 curr_max_ents = max_ents; 357 } while (left); 358 359 return 0; 360 } 361 EXPORT_SYMBOL(__sg_alloc_table); 362 363 /** 364 * sg_alloc_table - Allocate and initialize an sg table 365 * @table: The sg table header to use 366 * @nents: Number of entries in sg list 367 * @gfp_mask: GFP allocation mask 368 * 369 * Description: 370 * Allocate and initialize an sg table. If @nents@ is larger than 371 * SG_MAX_SINGLE_ALLOC a chained sg table will be setup. 372 * 373 **/ 374 int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask) 375 { 376 int ret; 377 378 ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC, 379 NULL, 0, gfp_mask, sg_kmalloc); 380 if (unlikely(ret)) 381 sg_free_table(table); 382 return ret; 383 } 384 EXPORT_SYMBOL(sg_alloc_table); 385 386 static struct scatterlist *get_next_sg(struct sg_append_table *table, 387 struct scatterlist *cur, 388 unsigned long needed_sges, 389 gfp_t gfp_mask) 390 { 391 struct scatterlist *new_sg, *next_sg; 392 unsigned int alloc_size; 393 394 if (cur) { 395 next_sg = sg_next(cur); 396 /* Check if last entry should be keeped for chainning */ 397 if (!sg_is_last(next_sg) || needed_sges == 1) 398 return next_sg; 399 } 400 401 alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC); 402 new_sg = sg_kmalloc(alloc_size, gfp_mask); 403 if (!new_sg) 404 return ERR_PTR(-ENOMEM); 405 sg_init_table(new_sg, alloc_size); 406 if (cur) { 407 table->total_nents += alloc_size - 1; 408 __sg_chain(next_sg, new_sg); 409 } else { 410 table->sgt.sgl = new_sg; 411 table->total_nents = alloc_size; 412 } 413 return new_sg; 414 } 415 416 static bool pages_are_mergeable(struct page *a, struct page *b) 417 { 418 if (page_to_pfn(a) != page_to_pfn(b) + 1) 419 return false; 420 if (!zone_device_pages_have_same_pgmap(a, b)) 421 return false; 422 return true; 423 } 424 425 /** 426 * sg_alloc_append_table_from_pages - Allocate and initialize an append sg 427 * table from an array of pages 428 * @sgt_append: The sg append table to use 429 * @pages: Pointer to an array of page pointers 430 * @n_pages: Number of pages in the pages array 431 * @offset: Offset from start of the first page to the start of a buffer 432 * @size: Number of valid bytes in the buffer (after offset) 433 * @max_segment: Maximum size of a scatterlist element in bytes 434 * @left_pages: Left pages caller have to set after this call 435 * @gfp_mask: GFP allocation mask 436 * 437 * Description: 438 * In the first call it allocate and initialize an sg table from a list of 439 * pages, else reuse the scatterlist from sgt_append. Contiguous ranges of 440 * the pages are squashed into a single scatterlist entry up to the maximum 441 * size specified in @max_segment. A user may provide an offset at a start 442 * and a size of valid data in a buffer specified by the page array. The 443 * returned sg table is released by sg_free_append_table 444 * 445 * Returns: 446 * 0 on success, negative error on failure 447 * 448 * Notes: 449 * If this function returns non-0 (eg failure), the caller must call 450 * sg_free_append_table() to cleanup any leftover allocations. 451 * 452 * In the fist call, sgt_append must by initialized. 453 */ 454 int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, 455 struct page **pages, unsigned int n_pages, unsigned int offset, 456 unsigned long size, unsigned int max_segment, 457 unsigned int left_pages, gfp_t gfp_mask) 458 { 459 unsigned int chunks, cur_page, seg_len, i, prv_len = 0; 460 unsigned int added_nents = 0; 461 struct scatterlist *s = sgt_append->prv; 462 struct page *last_pg; 463 464 /* 465 * The algorithm below requires max_segment to be aligned to PAGE_SIZE 466 * otherwise it can overshoot. 467 */ 468 max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE); 469 if (WARN_ON(max_segment < PAGE_SIZE)) 470 return -EINVAL; 471 472 if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv) 473 return -EOPNOTSUPP; 474 475 if (sgt_append->prv) { 476 unsigned long next_pfn = (page_to_phys(sg_page(sgt_append->prv)) + 477 sgt_append->prv->offset + sgt_append->prv->length) / PAGE_SIZE; 478 479 if (WARN_ON(offset)) 480 return -EINVAL; 481 482 /* Merge contiguous pages into the last SG */ 483 prv_len = sgt_append->prv->length; 484 if (page_to_pfn(pages[0]) == next_pfn) { 485 last_pg = pfn_to_page(next_pfn - 1); 486 while (n_pages && pages_are_mergeable(pages[0], last_pg)) { 487 if (sgt_append->prv->length + PAGE_SIZE > max_segment) 488 break; 489 sgt_append->prv->length += PAGE_SIZE; 490 last_pg = pages[0]; 491 pages++; 492 n_pages--; 493 } 494 if (!n_pages) 495 goto out; 496 } 497 } 498 499 /* compute number of contiguous chunks */ 500 chunks = 1; 501 seg_len = 0; 502 for (i = 1; i < n_pages; i++) { 503 seg_len += PAGE_SIZE; 504 if (seg_len >= max_segment || 505 !pages_are_mergeable(pages[i], pages[i - 1])) { 506 chunks++; 507 seg_len = 0; 508 } 509 } 510 511 /* merging chunks and putting them into the scatterlist */ 512 cur_page = 0; 513 for (i = 0; i < chunks; i++) { 514 unsigned int j, chunk_size; 515 516 /* look for the end of the current chunk */ 517 seg_len = 0; 518 for (j = cur_page + 1; j < n_pages; j++) { 519 seg_len += PAGE_SIZE; 520 if (seg_len >= max_segment || 521 !pages_are_mergeable(pages[j], pages[j - 1])) 522 break; 523 } 524 525 /* Pass how many chunks might be left */ 526 s = get_next_sg(sgt_append, s, chunks - i + left_pages, 527 gfp_mask); 528 if (IS_ERR(s)) { 529 /* 530 * Adjust entry length to be as before function was 531 * called. 532 */ 533 if (sgt_append->prv) 534 sgt_append->prv->length = prv_len; 535 return PTR_ERR(s); 536 } 537 chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset; 538 sg_set_page(s, pages[cur_page], 539 min_t(unsigned long, size, chunk_size), offset); 540 added_nents++; 541 size -= chunk_size; 542 offset = 0; 543 cur_page = j; 544 } 545 sgt_append->sgt.nents += added_nents; 546 sgt_append->sgt.orig_nents = sgt_append->sgt.nents; 547 sgt_append->prv = s; 548 out: 549 if (!left_pages) 550 sg_mark_end(s); 551 return 0; 552 } 553 EXPORT_SYMBOL(sg_alloc_append_table_from_pages); 554 555 /** 556 * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from 557 * an array of pages and given maximum 558 * segment. 559 * @sgt: The sg table header to use 560 * @pages: Pointer to an array of page pointers 561 * @n_pages: Number of pages in the pages array 562 * @offset: Offset from start of the first page to the start of a buffer 563 * @size: Number of valid bytes in the buffer (after offset) 564 * @max_segment: Maximum size of a scatterlist element in bytes 565 * @gfp_mask: GFP allocation mask 566 * 567 * Description: 568 * Allocate and initialize an sg table from a list of pages. Contiguous 569 * ranges of the pages are squashed into a single scatterlist node up to the 570 * maximum size specified in @max_segment. A user may provide an offset at a 571 * start and a size of valid data in a buffer specified by the page array. 572 * 573 * The returned sg table is released by sg_free_table. 574 * 575 * Returns: 576 * 0 on success, negative error on failure 577 */ 578 int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages, 579 unsigned int n_pages, unsigned int offset, 580 unsigned long size, unsigned int max_segment, 581 gfp_t gfp_mask) 582 { 583 struct sg_append_table append = {}; 584 int err; 585 586 err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset, 587 size, max_segment, 0, gfp_mask); 588 if (err) { 589 sg_free_append_table(&append); 590 return err; 591 } 592 memcpy(sgt, &append.sgt, sizeof(*sgt)); 593 WARN_ON(append.total_nents != sgt->orig_nents); 594 return 0; 595 } 596 EXPORT_SYMBOL(sg_alloc_table_from_pages_segment); 597 598 #ifdef CONFIG_SGL_ALLOC 599 600 /** 601 * sgl_alloc_order - allocate a scatterlist and its pages 602 * @length: Length in bytes of the scatterlist. Must be at least one 603 * @order: Second argument for alloc_pages() 604 * @chainable: Whether or not to allocate an extra element in the scatterlist 605 * for scatterlist chaining purposes 606 * @gfp: Memory allocation flags 607 * @nent_p: [out] Number of entries in the scatterlist that have pages 608 * 609 * Returns: A pointer to an initialized scatterlist or %NULL upon failure. 610 */ 611 struct scatterlist *sgl_alloc_order(unsigned long long length, 612 unsigned int order, bool chainable, 613 gfp_t gfp, unsigned int *nent_p) 614 { 615 struct scatterlist *sgl, *sg; 616 struct page *page; 617 unsigned int nent, nalloc; 618 u32 elem_len; 619 620 nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order); 621 /* Check for integer overflow */ 622 if (length > (nent << (PAGE_SHIFT + order))) 623 return NULL; 624 nalloc = nent; 625 if (chainable) { 626 /* Check for integer overflow */ 627 if (nalloc + 1 < nalloc) 628 return NULL; 629 nalloc++; 630 } 631 sgl = kmalloc_array(nalloc, sizeof(struct scatterlist), 632 gfp & ~GFP_DMA); 633 if (!sgl) 634 return NULL; 635 636 sg_init_table(sgl, nalloc); 637 sg = sgl; 638 while (length) { 639 elem_len = min_t(u64, length, PAGE_SIZE << order); 640 page = alloc_pages(gfp, order); 641 if (!page) { 642 sgl_free_order(sgl, order); 643 return NULL; 644 } 645 646 sg_set_page(sg, page, elem_len, 0); 647 length -= elem_len; 648 sg = sg_next(sg); 649 } 650 WARN_ONCE(length, "length = %lld\n", length); 651 if (nent_p) 652 *nent_p = nent; 653 return sgl; 654 } 655 EXPORT_SYMBOL(sgl_alloc_order); 656 657 /** 658 * sgl_alloc - allocate a scatterlist and its pages 659 * @length: Length in bytes of the scatterlist 660 * @gfp: Memory allocation flags 661 * @nent_p: [out] Number of entries in the scatterlist 662 * 663 * Returns: A pointer to an initialized scatterlist or %NULL upon failure. 664 */ 665 struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, 666 unsigned int *nent_p) 667 { 668 return sgl_alloc_order(length, 0, false, gfp, nent_p); 669 } 670 EXPORT_SYMBOL(sgl_alloc); 671 672 /** 673 * sgl_free_n_order - free a scatterlist and its pages 674 * @sgl: Scatterlist with one or more elements 675 * @nents: Maximum number of elements to free 676 * @order: Second argument for __free_pages() 677 * 678 * Notes: 679 * - If several scatterlists have been chained and each chain element is 680 * freed separately then it's essential to set nents correctly to avoid that a 681 * page would get freed twice. 682 * - All pages in a chained scatterlist can be freed at once by setting @nents 683 * to a high number. 684 */ 685 void sgl_free_n_order(struct scatterlist *sgl, int nents, int order) 686 { 687 struct scatterlist *sg; 688 struct page *page; 689 int i; 690 691 for_each_sg(sgl, sg, nents, i) { 692 if (!sg) 693 break; 694 page = sg_page(sg); 695 if (page) 696 __free_pages(page, order); 697 } 698 kfree(sgl); 699 } 700 EXPORT_SYMBOL(sgl_free_n_order); 701 702 /** 703 * sgl_free_order - free a scatterlist and its pages 704 * @sgl: Scatterlist with one or more elements 705 * @order: Second argument for __free_pages() 706 */ 707 void sgl_free_order(struct scatterlist *sgl, int order) 708 { 709 sgl_free_n_order(sgl, INT_MAX, order); 710 } 711 EXPORT_SYMBOL(sgl_free_order); 712 713 /** 714 * sgl_free - free a scatterlist and its pages 715 * @sgl: Scatterlist with one or more elements 716 */ 717 void sgl_free(struct scatterlist *sgl) 718 { 719 sgl_free_order(sgl, 0); 720 } 721 EXPORT_SYMBOL(sgl_free); 722 723 #endif /* CONFIG_SGL_ALLOC */ 724 725 void __sg_page_iter_start(struct sg_page_iter *piter, 726 struct scatterlist *sglist, unsigned int nents, 727 unsigned long pgoffset) 728 { 729 piter->__pg_advance = 0; 730 piter->__nents = nents; 731 732 piter->sg = sglist; 733 piter->sg_pgoffset = pgoffset; 734 } 735 EXPORT_SYMBOL(__sg_page_iter_start); 736 737 static int sg_page_count(struct scatterlist *sg) 738 { 739 return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT; 740 } 741 742 bool __sg_page_iter_next(struct sg_page_iter *piter) 743 { 744 if (!piter->__nents || !piter->sg) 745 return false; 746 747 piter->sg_pgoffset += piter->__pg_advance; 748 piter->__pg_advance = 1; 749 750 while (piter->sg_pgoffset >= sg_page_count(piter->sg)) { 751 piter->sg_pgoffset -= sg_page_count(piter->sg); 752 piter->sg = sg_next(piter->sg); 753 if (!--piter->__nents || !piter->sg) 754 return false; 755 } 756 757 return true; 758 } 759 EXPORT_SYMBOL(__sg_page_iter_next); 760 761 static int sg_dma_page_count(struct scatterlist *sg) 762 { 763 return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT; 764 } 765 766 bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter) 767 { 768 struct sg_page_iter *piter = &dma_iter->base; 769 770 if (!piter->__nents || !piter->sg) 771 return false; 772 773 piter->sg_pgoffset += piter->__pg_advance; 774 piter->__pg_advance = 1; 775 776 while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) { 777 piter->sg_pgoffset -= sg_dma_page_count(piter->sg); 778 piter->sg = sg_next(piter->sg); 779 if (!--piter->__nents || !piter->sg) 780 return false; 781 } 782 783 return true; 784 } 785 EXPORT_SYMBOL(__sg_page_iter_dma_next); 786 787 /** 788 * sg_miter_start - start mapping iteration over a sg list 789 * @miter: sg mapping iter to be started 790 * @sgl: sg list to iterate over 791 * @nents: number of sg entries 792 * @flags: sg iterator flags 793 * 794 * Description: 795 * Starts mapping iterator @miter. 796 * 797 * Context: 798 * Don't care. 799 */ 800 void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl, 801 unsigned int nents, unsigned int flags) 802 { 803 memset(miter, 0, sizeof(struct sg_mapping_iter)); 804 805 __sg_page_iter_start(&miter->piter, sgl, nents, 0); 806 WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG))); 807 miter->__flags = flags; 808 } 809 EXPORT_SYMBOL(sg_miter_start); 810 811 static bool sg_miter_get_next_page(struct sg_mapping_iter *miter) 812 { 813 if (!miter->__remaining) { 814 struct scatterlist *sg; 815 816 if (!__sg_page_iter_next(&miter->piter)) 817 return false; 818 819 sg = miter->piter.sg; 820 821 miter->__offset = miter->piter.sg_pgoffset ? 0 : sg->offset; 822 miter->piter.sg_pgoffset += miter->__offset >> PAGE_SHIFT; 823 miter->__offset &= PAGE_SIZE - 1; 824 miter->__remaining = sg->offset + sg->length - 825 (miter->piter.sg_pgoffset << PAGE_SHIFT) - 826 miter->__offset; 827 miter->__remaining = min_t(unsigned long, miter->__remaining, 828 PAGE_SIZE - miter->__offset); 829 } 830 831 return true; 832 } 833 834 /** 835 * sg_miter_skip - reposition mapping iterator 836 * @miter: sg mapping iter to be skipped 837 * @offset: number of bytes to plus the current location 838 * 839 * Description: 840 * Sets the offset of @miter to its current location plus @offset bytes. 841 * If mapping iterator @miter has been proceeded by sg_miter_next(), this 842 * stops @miter. 843 * 844 * Context: 845 * Don't care. 846 * 847 * Returns: 848 * true if @miter contains the valid mapping. false if end of sg 849 * list is reached. 850 */ 851 bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) 852 { 853 sg_miter_stop(miter); 854 855 while (offset) { 856 off_t consumed; 857 858 if (!sg_miter_get_next_page(miter)) 859 return false; 860 861 consumed = min_t(off_t, offset, miter->__remaining); 862 miter->__offset += consumed; 863 miter->__remaining -= consumed; 864 offset -= consumed; 865 } 866 867 return true; 868 } 869 EXPORT_SYMBOL(sg_miter_skip); 870 871 /** 872 * sg_miter_next - proceed mapping iterator to the next mapping 873 * @miter: sg mapping iter to proceed 874 * 875 * Description: 876 * Proceeds @miter to the next mapping. @miter should have been started 877 * using sg_miter_start(). On successful return, @miter->page, 878 * @miter->addr and @miter->length point to the current mapping. 879 * 880 * Context: 881 * May sleep if !SG_MITER_ATOMIC. 882 * 883 * Returns: 884 * true if @miter contains the next mapping. false if end of sg 885 * list is reached. 886 */ 887 bool sg_miter_next(struct sg_mapping_iter *miter) 888 { 889 sg_miter_stop(miter); 890 891 /* 892 * Get to the next page if necessary. 893 * __remaining, __offset is adjusted by sg_miter_stop 894 */ 895 if (!sg_miter_get_next_page(miter)) 896 return false; 897 898 miter->page = sg_page_iter_page(&miter->piter); 899 miter->consumed = miter->length = miter->__remaining; 900 901 if (miter->__flags & SG_MITER_ATOMIC) 902 miter->addr = kmap_atomic(miter->page) + miter->__offset; 903 else 904 miter->addr = kmap(miter->page) + miter->__offset; 905 906 return true; 907 } 908 EXPORT_SYMBOL(sg_miter_next); 909 910 /** 911 * sg_miter_stop - stop mapping iteration 912 * @miter: sg mapping iter to be stopped 913 * 914 * Description: 915 * Stops mapping iterator @miter. @miter should have been started 916 * using sg_miter_start(). A stopped iteration can be resumed by 917 * calling sg_miter_next() on it. This is useful when resources (kmap) 918 * need to be released during iteration. 919 * 920 * Context: 921 * Don't care otherwise. 922 */ 923 void sg_miter_stop(struct sg_mapping_iter *miter) 924 { 925 WARN_ON(miter->consumed > miter->length); 926 927 /* drop resources from the last iteration */ 928 if (miter->addr) { 929 miter->__offset += miter->consumed; 930 miter->__remaining -= miter->consumed; 931 932 if (miter->__flags & SG_MITER_TO_SG) 933 flush_dcache_page(miter->page); 934 935 if (miter->__flags & SG_MITER_ATOMIC) { 936 WARN_ON_ONCE(!pagefault_disabled()); 937 kunmap_atomic(miter->addr); 938 } else 939 kunmap(miter->page); 940 941 miter->page = NULL; 942 miter->addr = NULL; 943 miter->length = 0; 944 miter->consumed = 0; 945 } 946 } 947 EXPORT_SYMBOL(sg_miter_stop); 948 949 /** 950 * sg_copy_buffer - Copy data between a linear buffer and an SG list 951 * @sgl: The SG list 952 * @nents: Number of SG entries 953 * @buf: Where to copy from 954 * @buflen: The number of bytes to copy 955 * @skip: Number of bytes to skip before copying 956 * @to_buffer: transfer direction (true == from an sg list to a 957 * buffer, false == from a buffer to an sg list) 958 * 959 * Returns the number of copied bytes. 960 * 961 **/ 962 size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, 963 size_t buflen, off_t skip, bool to_buffer) 964 { 965 unsigned int offset = 0; 966 struct sg_mapping_iter miter; 967 unsigned int sg_flags = SG_MITER_ATOMIC; 968 969 if (to_buffer) 970 sg_flags |= SG_MITER_FROM_SG; 971 else 972 sg_flags |= SG_MITER_TO_SG; 973 974 sg_miter_start(&miter, sgl, nents, sg_flags); 975 976 if (!sg_miter_skip(&miter, skip)) 977 return 0; 978 979 while ((offset < buflen) && sg_miter_next(&miter)) { 980 unsigned int len; 981 982 len = min(miter.length, buflen - offset); 983 984 if (to_buffer) 985 memcpy(buf + offset, miter.addr, len); 986 else 987 memcpy(miter.addr, buf + offset, len); 988 989 offset += len; 990 } 991 992 sg_miter_stop(&miter); 993 994 return offset; 995 } 996 EXPORT_SYMBOL(sg_copy_buffer); 997 998 /** 999 * sg_copy_from_buffer - Copy from a linear buffer to an SG list 1000 * @sgl: The SG list 1001 * @nents: Number of SG entries 1002 * @buf: Where to copy from 1003 * @buflen: The number of bytes to copy 1004 * 1005 * Returns the number of copied bytes. 1006 * 1007 **/ 1008 size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents, 1009 const void *buf, size_t buflen) 1010 { 1011 return sg_copy_buffer(sgl, nents, (void *)buf, buflen, 0, false); 1012 } 1013 EXPORT_SYMBOL(sg_copy_from_buffer); 1014 1015 /** 1016 * sg_copy_to_buffer - Copy from an SG list to a linear buffer 1017 * @sgl: The SG list 1018 * @nents: Number of SG entries 1019 * @buf: Where to copy to 1020 * @buflen: The number of bytes to copy 1021 * 1022 * Returns the number of copied bytes. 1023 * 1024 **/ 1025 size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, 1026 void *buf, size_t buflen) 1027 { 1028 return sg_copy_buffer(sgl, nents, buf, buflen, 0, true); 1029 } 1030 EXPORT_SYMBOL(sg_copy_to_buffer); 1031 1032 /** 1033 * sg_pcopy_from_buffer - Copy from a linear buffer to an SG list 1034 * @sgl: The SG list 1035 * @nents: Number of SG entries 1036 * @buf: Where to copy from 1037 * @buflen: The number of bytes to copy 1038 * @skip: Number of bytes to skip before copying 1039 * 1040 * Returns the number of copied bytes. 1041 * 1042 **/ 1043 size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents, 1044 const void *buf, size_t buflen, off_t skip) 1045 { 1046 return sg_copy_buffer(sgl, nents, (void *)buf, buflen, skip, false); 1047 } 1048 EXPORT_SYMBOL(sg_pcopy_from_buffer); 1049 1050 /** 1051 * sg_pcopy_to_buffer - Copy from an SG list to a linear buffer 1052 * @sgl: The SG list 1053 * @nents: Number of SG entries 1054 * @buf: Where to copy to 1055 * @buflen: The number of bytes to copy 1056 * @skip: Number of bytes to skip before copying 1057 * 1058 * Returns the number of copied bytes. 1059 * 1060 **/ 1061 size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, 1062 void *buf, size_t buflen, off_t skip) 1063 { 1064 return sg_copy_buffer(sgl, nents, buf, buflen, skip, true); 1065 } 1066 EXPORT_SYMBOL(sg_pcopy_to_buffer); 1067 1068 /** 1069 * sg_zero_buffer - Zero-out a part of a SG list 1070 * @sgl: The SG list 1071 * @nents: Number of SG entries 1072 * @buflen: The number of bytes to zero out 1073 * @skip: Number of bytes to skip before zeroing 1074 * 1075 * Returns the number of bytes zeroed. 1076 **/ 1077 size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, 1078 size_t buflen, off_t skip) 1079 { 1080 unsigned int offset = 0; 1081 struct sg_mapping_iter miter; 1082 unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG; 1083 1084 sg_miter_start(&miter, sgl, nents, sg_flags); 1085 1086 if (!sg_miter_skip(&miter, skip)) 1087 return false; 1088 1089 while (offset < buflen && sg_miter_next(&miter)) { 1090 unsigned int len; 1091 1092 len = min(miter.length, buflen - offset); 1093 memset(miter.addr, 0, len); 1094 1095 offset += len; 1096 } 1097 1098 sg_miter_stop(&miter); 1099 return offset; 1100 } 1101 EXPORT_SYMBOL(sg_zero_buffer); 1102 1103 /* 1104 * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class 1105 * iterators, and add them to the scatterlist. 1106 */ 1107 static ssize_t extract_user_to_sg(struct iov_iter *iter, 1108 ssize_t maxsize, 1109 struct sg_table *sgtable, 1110 unsigned int sg_max, 1111 iov_iter_extraction_t extraction_flags) 1112 { 1113 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1114 struct page **pages; 1115 unsigned int npages; 1116 ssize_t ret = 0, res; 1117 size_t len, off; 1118 1119 /* We decant the page list into the tail of the scatterlist */ 1120 pages = (void *)sgtable->sgl + 1121 array_size(sg_max, sizeof(struct scatterlist)); 1122 pages -= sg_max; 1123 1124 do { 1125 res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max, 1126 extraction_flags, &off); 1127 if (res <= 0) 1128 goto failed; 1129 1130 len = res; 1131 maxsize -= len; 1132 ret += len; 1133 npages = DIV_ROUND_UP(off + len, PAGE_SIZE); 1134 sg_max -= npages; 1135 1136 for (; npages > 0; npages--) { 1137 struct page *page = *pages; 1138 size_t seg = min_t(size_t, PAGE_SIZE - off, len); 1139 1140 *pages++ = NULL; 1141 sg_set_page(sg, page, seg, off); 1142 sgtable->nents++; 1143 sg++; 1144 len -= seg; 1145 off = 0; 1146 } 1147 } while (maxsize > 0 && sg_max > 0); 1148 1149 return ret; 1150 1151 failed: 1152 while (sgtable->nents > sgtable->orig_nents) 1153 unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents])); 1154 return res; 1155 } 1156 1157 /* 1158 * Extract up to sg_max pages from a BVEC-type iterator and add them to the 1159 * scatterlist. The pages are not pinned. 1160 */ 1161 static ssize_t extract_bvec_to_sg(struct iov_iter *iter, 1162 ssize_t maxsize, 1163 struct sg_table *sgtable, 1164 unsigned int sg_max, 1165 iov_iter_extraction_t extraction_flags) 1166 { 1167 const struct bio_vec *bv = iter->bvec; 1168 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1169 unsigned long start = iter->iov_offset; 1170 unsigned int i; 1171 ssize_t ret = 0; 1172 1173 for (i = 0; i < iter->nr_segs; i++) { 1174 size_t off, len; 1175 1176 len = bv[i].bv_len; 1177 if (start >= len) { 1178 start -= len; 1179 continue; 1180 } 1181 1182 len = min_t(size_t, maxsize, len - start); 1183 off = bv[i].bv_offset + start; 1184 1185 sg_set_page(sg, bv[i].bv_page, len, off); 1186 sgtable->nents++; 1187 sg++; 1188 sg_max--; 1189 1190 ret += len; 1191 maxsize -= len; 1192 if (maxsize <= 0 || sg_max == 0) 1193 break; 1194 start = 0; 1195 } 1196 1197 if (ret > 0) 1198 iov_iter_advance(iter, ret); 1199 return ret; 1200 } 1201 1202 /* 1203 * Extract up to sg_max pages from a KVEC-type iterator and add them to the 1204 * scatterlist. This can deal with vmalloc'd buffers as well as kmalloc'd or 1205 * static buffers. The pages are not pinned. 1206 */ 1207 static ssize_t extract_kvec_to_sg(struct iov_iter *iter, 1208 ssize_t maxsize, 1209 struct sg_table *sgtable, 1210 unsigned int sg_max, 1211 iov_iter_extraction_t extraction_flags) 1212 { 1213 const struct kvec *kv = iter->kvec; 1214 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1215 unsigned long start = iter->iov_offset; 1216 unsigned int i; 1217 ssize_t ret = 0; 1218 1219 for (i = 0; i < iter->nr_segs; i++) { 1220 struct page *page; 1221 unsigned long kaddr; 1222 size_t off, len, seg; 1223 1224 len = kv[i].iov_len; 1225 if (start >= len) { 1226 start -= len; 1227 continue; 1228 } 1229 1230 kaddr = (unsigned long)kv[i].iov_base + start; 1231 off = kaddr & ~PAGE_MASK; 1232 len = min_t(size_t, maxsize, len - start); 1233 kaddr &= PAGE_MASK; 1234 1235 maxsize -= len; 1236 ret += len; 1237 do { 1238 seg = min_t(size_t, len, PAGE_SIZE - off); 1239 if (is_vmalloc_or_module_addr((void *)kaddr)) 1240 page = vmalloc_to_page((void *)kaddr); 1241 else 1242 page = virt_to_page((void *)kaddr); 1243 1244 sg_set_page(sg, page, len, off); 1245 sgtable->nents++; 1246 sg++; 1247 sg_max--; 1248 1249 len -= seg; 1250 kaddr += PAGE_SIZE; 1251 off = 0; 1252 } while (len > 0 && sg_max > 0); 1253 1254 if (maxsize <= 0 || sg_max == 0) 1255 break; 1256 start = 0; 1257 } 1258 1259 if (ret > 0) 1260 iov_iter_advance(iter, ret); 1261 return ret; 1262 } 1263 1264 /* 1265 * Extract up to sg_max folios from an XARRAY-type iterator and add them to 1266 * the scatterlist. The pages are not pinned. 1267 */ 1268 static ssize_t extract_xarray_to_sg(struct iov_iter *iter, 1269 ssize_t maxsize, 1270 struct sg_table *sgtable, 1271 unsigned int sg_max, 1272 iov_iter_extraction_t extraction_flags) 1273 { 1274 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1275 struct xarray *xa = iter->xarray; 1276 struct folio *folio; 1277 loff_t start = iter->xarray_start + iter->iov_offset; 1278 pgoff_t index = start / PAGE_SIZE; 1279 ssize_t ret = 0; 1280 size_t offset, len; 1281 XA_STATE(xas, xa, index); 1282 1283 rcu_read_lock(); 1284 1285 xas_for_each(&xas, folio, ULONG_MAX) { 1286 if (xas_retry(&xas, folio)) 1287 continue; 1288 if (WARN_ON(xa_is_value(folio))) 1289 break; 1290 if (WARN_ON(folio_test_hugetlb(folio))) 1291 break; 1292 1293 offset = offset_in_folio(folio, start); 1294 len = min_t(size_t, maxsize, folio_size(folio) - offset); 1295 1296 sg_set_page(sg, folio_page(folio, 0), len, offset); 1297 sgtable->nents++; 1298 sg++; 1299 sg_max--; 1300 1301 maxsize -= len; 1302 ret += len; 1303 if (maxsize <= 0 || sg_max == 0) 1304 break; 1305 } 1306 1307 rcu_read_unlock(); 1308 if (ret > 0) 1309 iov_iter_advance(iter, ret); 1310 return ret; 1311 } 1312 1313 /** 1314 * extract_iter_to_sg - Extract pages from an iterator and add to an sglist 1315 * @iter: The iterator to extract from 1316 * @maxsize: The amount of iterator to copy 1317 * @sgtable: The scatterlist table to fill in 1318 * @sg_max: Maximum number of elements in @sgtable that may be filled 1319 * @extraction_flags: Flags to qualify the request 1320 * 1321 * Extract the page fragments from the given amount of the source iterator and 1322 * add them to a scatterlist that refers to all of those bits, to a maximum 1323 * addition of @sg_max elements. 1324 * 1325 * The pages referred to by UBUF- and IOVEC-type iterators are extracted and 1326 * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE- 1327 * and DISCARD-type are not supported. 1328 * 1329 * No end mark is placed on the scatterlist; that's left to the caller. 1330 * 1331 * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA 1332 * be allowed on the pages extracted. 1333 * 1334 * If successful, @sgtable->nents is updated to include the number of elements 1335 * added and the number of bytes added is returned. @sgtable->orig_nents is 1336 * left unaltered. 1337 * 1338 * The iov_iter_extract_mode() function should be used to query how cleanup 1339 * should be performed. 1340 */ 1341 ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize, 1342 struct sg_table *sgtable, unsigned int sg_max, 1343 iov_iter_extraction_t extraction_flags) 1344 { 1345 if (maxsize == 0) 1346 return 0; 1347 1348 switch (iov_iter_type(iter)) { 1349 case ITER_UBUF: 1350 case ITER_IOVEC: 1351 return extract_user_to_sg(iter, maxsize, sgtable, sg_max, 1352 extraction_flags); 1353 case ITER_BVEC: 1354 return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max, 1355 extraction_flags); 1356 case ITER_KVEC: 1357 return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max, 1358 extraction_flags); 1359 case ITER_XARRAY: 1360 return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max, 1361 extraction_flags); 1362 default: 1363 pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter)); 1364 WARN_ON_ONCE(1); 1365 return -EIO; 1366 } 1367 } 1368 EXPORT_SYMBOL_GPL(extract_iter_to_sg); 1369
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.