1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/init.h> 4 #include <linux/errno.h> 5 #include <linux/mm.h> 6 #include <linux/mman.h> 7 #include <linux/slab.h> 8 #include <linux/vmalloc.h> 9 #include <linux/io_uring.h> 10 #include <linux/io_uring_types.h> 11 #include <asm/shmparam.h> 12 13 #include "memmap.h" 14 #include "kbuf.h" 15 16 static void *io_mem_alloc_compound(struct page **pages, int nr_pages, 17 size_t size, gfp_t gfp) 18 { 19 struct page *page; 20 int i, order; 21 22 order = get_order(size); 23 if (order > MAX_PAGE_ORDER) 24 return ERR_PTR(-ENOMEM); 25 else if (order) 26 gfp |= __GFP_COMP; 27 28 page = alloc_pages(gfp, order); 29 if (!page) 30 return ERR_PTR(-ENOMEM); 31 32 for (i = 0; i < nr_pages; i++) 33 pages[i] = page + i; 34 35 return page_address(page); 36 } 37 38 static void *io_mem_alloc_single(struct page **pages, int nr_pages, size_t size, 39 gfp_t gfp) 40 { 41 void *ret; 42 int i; 43 44 for (i = 0; i < nr_pages; i++) { 45 pages[i] = alloc_page(gfp); 46 if (!pages[i]) 47 goto err; 48 } 49 50 ret = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 51 if (ret) 52 return ret; 53 err: 54 while (i--) 55 put_page(pages[i]); 56 return ERR_PTR(-ENOMEM); 57 } 58 59 void *io_pages_map(struct page ***out_pages, unsigned short *npages, 60 size_t size) 61 { 62 gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN; 63 struct page **pages; 64 int nr_pages; 65 void *ret; 66 67 nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 68 pages = kvmalloc_array(nr_pages, sizeof(struct page *), gfp); 69 if (!pages) 70 return ERR_PTR(-ENOMEM); 71 72 ret = io_mem_alloc_compound(pages, nr_pages, size, gfp); 73 if (!IS_ERR(ret)) 74 goto done; 75 76 ret = io_mem_alloc_single(pages, nr_pages, size, gfp); 77 if (!IS_ERR(ret)) { 78 done: 79 *out_pages = pages; 80 *npages = nr_pages; 81 return ret; 82 } 83 84 kvfree(pages); 85 *out_pages = NULL; 86 *npages = 0; 87 return ret; 88 } 89 90 void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages, 91 bool put_pages) 92 { 93 bool do_vunmap = false; 94 95 if (!ptr) 96 return; 97 98 if (put_pages && *npages) { 99 struct page **to_free = *pages; 100 int i; 101 102 /* 103 * Only did vmap for the non-compound multiple page case. 104 * For the compound page, we just need to put the head. 105 */ 106 if (PageCompound(to_free[0])) 107 *npages = 1; 108 else if (*npages > 1) 109 do_vunmap = true; 110 for (i = 0; i < *npages; i++) 111 put_page(to_free[i]); 112 } 113 if (do_vunmap) 114 vunmap(ptr); 115 kvfree(*pages); 116 *pages = NULL; 117 *npages = 0; 118 } 119 120 void io_pages_free(struct page ***pages, int npages) 121 { 122 struct page **page_array = *pages; 123 124 if (!page_array) 125 return; 126 127 unpin_user_pages(page_array, npages); 128 kvfree(page_array); 129 *pages = NULL; 130 } 131 132 struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages) 133 { 134 unsigned long start, end, nr_pages; 135 struct page **pages; 136 int ret; 137 138 end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 139 start = uaddr >> PAGE_SHIFT; 140 nr_pages = end - start; 141 if (WARN_ON_ONCE(!nr_pages)) 142 return ERR_PTR(-EINVAL); 143 144 pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); 145 if (!pages) 146 return ERR_PTR(-ENOMEM); 147 148 ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM, 149 pages); 150 /* success, mapped all pages */ 151 if (ret == nr_pages) { 152 *npages = nr_pages; 153 return pages; 154 } 155 156 /* partial map, or didn't map anything */ 157 if (ret >= 0) { 158 /* if we did partial map, release any pages we did get */ 159 if (ret) 160 unpin_user_pages(pages, ret); 161 ret = -EFAULT; 162 } 163 kvfree(pages); 164 return ERR_PTR(ret); 165 } 166 167 void *__io_uaddr_map(struct page ***pages, unsigned short *npages, 168 unsigned long uaddr, size_t size) 169 { 170 struct page **page_array; 171 unsigned int nr_pages; 172 void *page_addr; 173 174 *npages = 0; 175 176 if (uaddr & (PAGE_SIZE - 1) || !size) 177 return ERR_PTR(-EINVAL); 178 179 nr_pages = 0; 180 page_array = io_pin_pages(uaddr, size, &nr_pages); 181 if (IS_ERR(page_array)) 182 return page_array; 183 184 page_addr = vmap(page_array, nr_pages, VM_MAP, PAGE_KERNEL); 185 if (page_addr) { 186 *pages = page_array; 187 *npages = nr_pages; 188 return page_addr; 189 } 190 191 io_pages_free(&page_array, nr_pages); 192 return ERR_PTR(-ENOMEM); 193 } 194 195 static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff, 196 size_t sz) 197 { 198 struct io_ring_ctx *ctx = file->private_data; 199 loff_t offset = pgoff << PAGE_SHIFT; 200 201 switch ((pgoff << PAGE_SHIFT) & IORING_OFF_MMAP_MASK) { 202 case IORING_OFF_SQ_RING: 203 case IORING_OFF_CQ_RING: 204 /* Don't allow mmap if the ring was setup without it */ 205 if (ctx->flags & IORING_SETUP_NO_MMAP) 206 return ERR_PTR(-EINVAL); 207 return ctx->rings; 208 case IORING_OFF_SQES: 209 /* Don't allow mmap if the ring was setup without it */ 210 if (ctx->flags & IORING_SETUP_NO_MMAP) 211 return ERR_PTR(-EINVAL); 212 return ctx->sq_sqes; 213 case IORING_OFF_PBUF_RING: { 214 struct io_buffer_list *bl; 215 unsigned int bgid; 216 void *ptr; 217 218 bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; 219 bl = io_pbuf_get_bl(ctx, bgid); 220 if (IS_ERR(bl)) 221 return bl; 222 ptr = bl->buf_ring; 223 io_put_bl(ctx, bl); 224 return ptr; 225 } 226 } 227 228 return ERR_PTR(-EINVAL); 229 } 230 231 int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma, 232 struct page **pages, int npages) 233 { 234 unsigned long nr_pages = npages; 235 236 vm_flags_set(vma, VM_DONTEXPAND); 237 return vm_insert_pages(vma, vma->vm_start, pages, &nr_pages); 238 } 239 240 #ifdef CONFIG_MMU 241 242 __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma) 243 { 244 struct io_ring_ctx *ctx = file->private_data; 245 size_t sz = vma->vm_end - vma->vm_start; 246 long offset = vma->vm_pgoff << PAGE_SHIFT; 247 unsigned int npages; 248 void *ptr; 249 250 ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz); 251 if (IS_ERR(ptr)) 252 return PTR_ERR(ptr); 253 254 switch (offset & IORING_OFF_MMAP_MASK) { 255 case IORING_OFF_SQ_RING: 256 case IORING_OFF_CQ_RING: 257 npages = min(ctx->n_ring_pages, (sz + PAGE_SIZE - 1) >> PAGE_SHIFT); 258 return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, npages); 259 case IORING_OFF_SQES: 260 return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages, 261 ctx->n_sqe_pages); 262 case IORING_OFF_PBUF_RING: 263 return io_pbuf_mmap(file, vma); 264 } 265 266 return -EINVAL; 267 } 268 269 unsigned long io_uring_get_unmapped_area(struct file *filp, unsigned long addr, 270 unsigned long len, unsigned long pgoff, 271 unsigned long flags) 272 { 273 void *ptr; 274 275 /* 276 * Do not allow to map to user-provided address to avoid breaking the 277 * aliasing rules. Userspace is not able to guess the offset address of 278 * kernel kmalloc()ed memory area. 279 */ 280 if (addr) 281 return -EINVAL; 282 283 ptr = io_uring_validate_mmap_request(filp, pgoff, len); 284 if (IS_ERR(ptr)) 285 return -ENOMEM; 286 287 /* 288 * Some architectures have strong cache aliasing requirements. 289 * For such architectures we need a coherent mapping which aliases 290 * kernel memory *and* userspace memory. To achieve that: 291 * - use a NULL file pointer to reference physical memory, and 292 * - use the kernel virtual address of the shared io_uring context 293 * (instead of the userspace-provided address, which has to be 0UL 294 * anyway). 295 * - use the same pgoff which the get_unmapped_area() uses to 296 * calculate the page colouring. 297 * For architectures without such aliasing requirements, the 298 * architecture will return any suitable mapping because addr is 0. 299 */ 300 filp = NULL; 301 flags |= MAP_SHARED; 302 pgoff = 0; /* has been translated to ptr above */ 303 #ifdef SHM_COLOUR 304 addr = (uintptr_t) ptr; 305 pgoff = addr >> PAGE_SHIFT; 306 #else 307 addr = 0UL; 308 #endif 309 return mm_get_unmapped_area(current->mm, filp, addr, len, pgoff, flags); 310 } 311 312 #else /* !CONFIG_MMU */ 313 314 int io_uring_mmap(struct file *file, struct vm_area_struct *vma) 315 { 316 return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -EINVAL; 317 } 318 319 unsigned int io_uring_nommu_mmap_capabilities(struct file *file) 320 { 321 return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE; 322 } 323 324 unsigned long io_uring_get_unmapped_area(struct file *file, unsigned long addr, 325 unsigned long len, unsigned long pgoff, 326 unsigned long flags) 327 { 328 void *ptr; 329 330 ptr = io_uring_validate_mmap_request(file, pgoff, len); 331 if (IS_ERR(ptr)) 332 return PTR_ERR(ptr); 333 334 return (unsigned long) ptr; 335 } 336 337 #endif /* !CONFIG_MMU */ 338
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.