1 // SPDX-License-Identifier: GPL-2.0-only 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 2 /* 3 * linux/mm/nommu.c 3 * linux/mm/nommu.c 4 * 4 * 5 * Replacement code for mm functions to suppo 5 * Replacement code for mm functions to support CPU's that don't 6 * have any form of memory management unit (t 6 * have any form of memory management unit (thus no virtual memory). 7 * 7 * 8 * See Documentation/admin-guide/mm/nommu-mma 8 * See Documentation/admin-guide/mm/nommu-mmap.rst 9 * 9 * 10 * Copyright (c) 2004-2008 David Howells <dho 10 * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com> 11 * Copyright (c) 2000-2003 David McCullough < 11 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 12 * Copyright (c) 2000-2001 D Jeff Dionne <jef 12 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 13 * Copyright (c) 2002 Greg Ungerer <gerg 13 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 14 * Copyright (c) 2007-2010 Paul Mundt <lethal 14 * Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org> 15 */ 15 */ 16 16 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 18 19 #include <linux/export.h> 19 #include <linux/export.h> 20 #include <linux/mm.h> 20 #include <linux/mm.h> 21 #include <linux/sched/mm.h> 21 #include <linux/sched/mm.h> 22 #include <linux/mman.h> 22 #include <linux/mman.h> 23 #include <linux/swap.h> 23 #include <linux/swap.h> 24 #include <linux/file.h> 24 #include <linux/file.h> 25 #include <linux/highmem.h> 25 #include <linux/highmem.h> 26 #include <linux/pagemap.h> 26 #include <linux/pagemap.h> 27 #include <linux/slab.h> 27 #include <linux/slab.h> 28 #include <linux/vmalloc.h> 28 #include <linux/vmalloc.h> 29 #include <linux/backing-dev.h> 29 #include <linux/backing-dev.h> 30 #include <linux/compiler.h> 30 #include <linux/compiler.h> 31 #include <linux/mount.h> 31 #include <linux/mount.h> 32 #include <linux/personality.h> 32 #include <linux/personality.h> 33 #include <linux/security.h> 33 #include <linux/security.h> 34 #include <linux/syscalls.h> 34 #include <linux/syscalls.h> 35 #include <linux/audit.h> 35 #include <linux/audit.h> 36 #include <linux/printk.h> 36 #include <linux/printk.h> 37 37 38 #include <linux/uaccess.h> 38 #include <linux/uaccess.h> 39 #include <linux/uio.h> 39 #include <linux/uio.h> 40 #include <asm/tlb.h> 40 #include <asm/tlb.h> 41 #include <asm/tlbflush.h> 41 #include <asm/tlbflush.h> 42 #include <asm/mmu_context.h> 42 #include <asm/mmu_context.h> 43 #include "internal.h" 43 #include "internal.h" 44 44 45 void *high_memory; 45 void *high_memory; 46 EXPORT_SYMBOL(high_memory); 46 EXPORT_SYMBOL(high_memory); 47 struct page *mem_map; 47 struct page *mem_map; 48 unsigned long max_mapnr; 48 unsigned long max_mapnr; 49 EXPORT_SYMBOL(max_mapnr); 49 EXPORT_SYMBOL(max_mapnr); 50 unsigned long highest_memmap_pfn; 50 unsigned long highest_memmap_pfn; 51 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIA 51 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; 52 int heap_stack_gap = 0; 52 int heap_stack_gap = 0; 53 53 54 atomic_long_t mmap_pages_allocated; 54 atomic_long_t mmap_pages_allocated; 55 55 56 EXPORT_SYMBOL(mem_map); 56 EXPORT_SYMBOL(mem_map); 57 57 58 /* list of mapped, potentially shareable regio 58 /* list of mapped, potentially shareable regions */ 59 static struct kmem_cache *vm_region_jar; 59 static struct kmem_cache *vm_region_jar; 60 struct rb_root nommu_region_tree = RB_ROOT; 60 struct rb_root nommu_region_tree = RB_ROOT; 61 DECLARE_RWSEM(nommu_region_sem); 61 DECLARE_RWSEM(nommu_region_sem); 62 62 63 const struct vm_operations_struct generic_file 63 const struct vm_operations_struct generic_file_vm_ops = { 64 }; 64 }; 65 65 66 /* 66 /* 67 * Return the total memory allocated for this 67 * Return the total memory allocated for this pointer, not 68 * just what the caller asked for. 68 * just what the caller asked for. 69 * 69 * 70 * Doesn't have to be accurate, i.e. may have 70 * Doesn't have to be accurate, i.e. may have races. 71 */ 71 */ 72 unsigned int kobjsize(const void *objp) 72 unsigned int kobjsize(const void *objp) 73 { 73 { 74 struct page *page; 74 struct page *page; 75 75 76 /* 76 /* 77 * If the object we have should not ha 77 * If the object we have should not have ksize performed on it, 78 * return size of 0 78 * return size of 0 79 */ 79 */ 80 if (!objp || !virt_addr_valid(objp)) 80 if (!objp || !virt_addr_valid(objp)) 81 return 0; 81 return 0; 82 82 83 page = virt_to_head_page(objp); 83 page = virt_to_head_page(objp); 84 84 85 /* 85 /* 86 * If the allocator sets PageSlab, we 86 * If the allocator sets PageSlab, we know the pointer came from 87 * kmalloc(). 87 * kmalloc(). 88 */ 88 */ 89 if (PageSlab(page)) 89 if (PageSlab(page)) 90 return ksize(objp); 90 return ksize(objp); 91 91 92 /* 92 /* 93 * If it's not a compound page, see if 93 * If it's not a compound page, see if we have a matching VMA 94 * region. This test is intentionally 94 * region. This test is intentionally done in reverse order, 95 * so if there's no VMA, we still fall 95 * so if there's no VMA, we still fall through and hand back 96 * PAGE_SIZE for 0-order pages. 96 * PAGE_SIZE for 0-order pages. 97 */ 97 */ 98 if (!PageCompound(page)) { 98 if (!PageCompound(page)) { 99 struct vm_area_struct *vma; 99 struct vm_area_struct *vma; 100 100 101 vma = find_vma(current->mm, (u 101 vma = find_vma(current->mm, (unsigned long)objp); 102 if (vma) 102 if (vma) 103 return vma->vm_end - v 103 return vma->vm_end - vma->vm_start; 104 } 104 } 105 105 106 /* 106 /* 107 * The ksize() function is only guaran 107 * The ksize() function is only guaranteed to work for pointers 108 * returned by kmalloc(). So handle ar 108 * returned by kmalloc(). So handle arbitrary pointers here. 109 */ 109 */ 110 return page_size(page); 110 return page_size(page); 111 } 111 } 112 112 113 void vfree(const void *addr) 113 void vfree(const void *addr) 114 { 114 { 115 kfree(addr); 115 kfree(addr); 116 } 116 } 117 EXPORT_SYMBOL(vfree); 117 EXPORT_SYMBOL(vfree); 118 118 119 void *__vmalloc_noprof(unsigned long size, gfp 119 void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask) 120 { 120 { 121 /* 121 /* 122 * You can't specify __GFP_HIGHMEM wi 122 * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() 123 * returns only a logical address. 123 * returns only a logical address. 124 */ 124 */ 125 return kmalloc_noprof(size, (gfp_mask 125 return kmalloc_noprof(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); 126 } 126 } 127 EXPORT_SYMBOL(__vmalloc_noprof); 127 EXPORT_SYMBOL(__vmalloc_noprof); 128 128 129 void *vrealloc_noprof(const void *p, size_t si 129 void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) 130 { 130 { 131 return krealloc_noprof(p, size, (flags 131 return krealloc_noprof(p, size, (flags | __GFP_COMP) & ~__GFP_HIGHMEM); 132 } 132 } 133 133 134 void *__vmalloc_node_range_noprof(unsigned lon 134 void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, 135 unsigned long start, unsigned 135 unsigned long start, unsigned long end, gfp_t gfp_mask, 136 pgprot_t prot, unsigned long v 136 pgprot_t prot, unsigned long vm_flags, int node, 137 const void *caller) 137 const void *caller) 138 { 138 { 139 return __vmalloc_noprof(size, gfp_mask 139 return __vmalloc_noprof(size, gfp_mask); 140 } 140 } 141 141 142 void *__vmalloc_node_noprof(unsigned long size 142 void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask, 143 int node, const void *caller) 143 int node, const void *caller) 144 { 144 { 145 return __vmalloc_noprof(size, gfp_mask 145 return __vmalloc_noprof(size, gfp_mask); 146 } 146 } 147 147 148 static void *__vmalloc_user_flags(unsigned lon 148 static void *__vmalloc_user_flags(unsigned long size, gfp_t flags) 149 { 149 { 150 void *ret; 150 void *ret; 151 151 152 ret = __vmalloc(size, flags); 152 ret = __vmalloc(size, flags); 153 if (ret) { 153 if (ret) { 154 struct vm_area_struct *vma; 154 struct vm_area_struct *vma; 155 155 156 mmap_write_lock(current->mm); 156 mmap_write_lock(current->mm); 157 vma = find_vma(current->mm, (u 157 vma = find_vma(current->mm, (unsigned long)ret); 158 if (vma) 158 if (vma) 159 vm_flags_set(vma, VM_U 159 vm_flags_set(vma, VM_USERMAP); 160 mmap_write_unlock(current->mm) 160 mmap_write_unlock(current->mm); 161 } 161 } 162 162 163 return ret; 163 return ret; 164 } 164 } 165 165 166 void *vmalloc_user_noprof(unsigned long size) 166 void *vmalloc_user_noprof(unsigned long size) 167 { 167 { 168 return __vmalloc_user_flags(size, GFP_ 168 return __vmalloc_user_flags(size, GFP_KERNEL | __GFP_ZERO); 169 } 169 } 170 EXPORT_SYMBOL(vmalloc_user_noprof); 170 EXPORT_SYMBOL(vmalloc_user_noprof); 171 171 172 struct page *vmalloc_to_page(const void *addr) 172 struct page *vmalloc_to_page(const void *addr) 173 { 173 { 174 return virt_to_page(addr); 174 return virt_to_page(addr); 175 } 175 } 176 EXPORT_SYMBOL(vmalloc_to_page); 176 EXPORT_SYMBOL(vmalloc_to_page); 177 177 178 unsigned long vmalloc_to_pfn(const void *addr) 178 unsigned long vmalloc_to_pfn(const void *addr) 179 { 179 { 180 return page_to_pfn(virt_to_page(addr)) 180 return page_to_pfn(virt_to_page(addr)); 181 } 181 } 182 EXPORT_SYMBOL(vmalloc_to_pfn); 182 EXPORT_SYMBOL(vmalloc_to_pfn); 183 183 184 long vread_iter(struct iov_iter *iter, const c 184 long vread_iter(struct iov_iter *iter, const char *addr, size_t count) 185 { 185 { 186 /* Don't allow overflow */ 186 /* Don't allow overflow */ 187 if ((unsigned long) addr + count < cou 187 if ((unsigned long) addr + count < count) 188 count = -(unsigned long) addr; 188 count = -(unsigned long) addr; 189 189 190 return copy_to_iter(addr, count, iter) 190 return copy_to_iter(addr, count, iter); 191 } 191 } 192 192 193 /* 193 /* 194 * vmalloc - allocate virtually contigu 194 * vmalloc - allocate virtually contiguous memory 195 * 195 * 196 * @size: allocation size 196 * @size: allocation size 197 * 197 * 198 * Allocate enough pages to cover @size f 198 * Allocate enough pages to cover @size from the page level 199 * allocator and map them into contiguous 199 * allocator and map them into contiguous kernel virtual space. 200 * 200 * 201 * For tight control over page level allo 201 * For tight control over page level allocator and protection flags 202 * use __vmalloc() instead. 202 * use __vmalloc() instead. 203 */ 203 */ 204 void *vmalloc_noprof(unsigned long size) 204 void *vmalloc_noprof(unsigned long size) 205 { 205 { 206 return __vmalloc_noprof(size, GFP_KERN 206 return __vmalloc_noprof(size, GFP_KERNEL); 207 } 207 } 208 EXPORT_SYMBOL(vmalloc_noprof); 208 EXPORT_SYMBOL(vmalloc_noprof); 209 209 210 void *vmalloc_huge_noprof(unsigned long size, 210 void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc_noprof); 211 211 212 /* 212 /* 213 * vzalloc - allocate virtually contiguou 213 * vzalloc - allocate virtually contiguous memory with zero fill 214 * 214 * 215 * @size: allocation size 215 * @size: allocation size 216 * 216 * 217 * Allocate enough pages to cover @size f 217 * Allocate enough pages to cover @size from the page level 218 * allocator and map them into contiguous 218 * allocator and map them into contiguous kernel virtual space. 219 * The memory allocated is set to zero. 219 * The memory allocated is set to zero. 220 * 220 * 221 * For tight control over page level allo 221 * For tight control over page level allocator and protection flags 222 * use __vmalloc() instead. 222 * use __vmalloc() instead. 223 */ 223 */ 224 void *vzalloc_noprof(unsigned long size) 224 void *vzalloc_noprof(unsigned long size) 225 { 225 { 226 return __vmalloc_noprof(size, GFP_KERN 226 return __vmalloc_noprof(size, GFP_KERNEL | __GFP_ZERO); 227 } 227 } 228 EXPORT_SYMBOL(vzalloc_noprof); 228 EXPORT_SYMBOL(vzalloc_noprof); 229 229 230 /** 230 /** 231 * vmalloc_node - allocate memory on a specifi 231 * vmalloc_node - allocate memory on a specific node 232 * @size: allocation size 232 * @size: allocation size 233 * @node: numa node 233 * @node: numa node 234 * 234 * 235 * Allocate enough pages to cover @size from t 235 * Allocate enough pages to cover @size from the page level 236 * allocator and map them into contiguous kern 236 * allocator and map them into contiguous kernel virtual space. 237 * 237 * 238 * For tight control over page level allocator 238 * For tight control over page level allocator and protection flags 239 * use __vmalloc() instead. 239 * use __vmalloc() instead. 240 */ 240 */ 241 void *vmalloc_node_noprof(unsigned long size, 241 void *vmalloc_node_noprof(unsigned long size, int node) 242 { 242 { 243 return vmalloc_noprof(size); 243 return vmalloc_noprof(size); 244 } 244 } 245 EXPORT_SYMBOL(vmalloc_node_noprof); 245 EXPORT_SYMBOL(vmalloc_node_noprof); 246 246 247 /** 247 /** 248 * vzalloc_node - allocate memory on a specifi 248 * vzalloc_node - allocate memory on a specific node with zero fill 249 * @size: allocation size 249 * @size: allocation size 250 * @node: numa node 250 * @node: numa node 251 * 251 * 252 * Allocate enough pages to cover @size from t 252 * Allocate enough pages to cover @size from the page level 253 * allocator and map them into contiguous kern 253 * allocator and map them into contiguous kernel virtual space. 254 * The memory allocated is set to zero. 254 * The memory allocated is set to zero. 255 * 255 * 256 * For tight control over page level allocator 256 * For tight control over page level allocator and protection flags 257 * use __vmalloc() instead. 257 * use __vmalloc() instead. 258 */ 258 */ 259 void *vzalloc_node_noprof(unsigned long size, 259 void *vzalloc_node_noprof(unsigned long size, int node) 260 { 260 { 261 return vzalloc_noprof(size); 261 return vzalloc_noprof(size); 262 } 262 } 263 EXPORT_SYMBOL(vzalloc_node_noprof); 263 EXPORT_SYMBOL(vzalloc_node_noprof); 264 264 265 /** 265 /** 266 * vmalloc_32 - allocate virtually contiguou 266 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 267 * @size: allocation size 267 * @size: allocation size 268 * 268 * 269 * Allocate enough 32bit PA addressable p 269 * Allocate enough 32bit PA addressable pages to cover @size from the 270 * page level allocator and map them into 270 * page level allocator and map them into contiguous kernel virtual space. 271 */ 271 */ 272 void *vmalloc_32_noprof(unsigned long size) 272 void *vmalloc_32_noprof(unsigned long size) 273 { 273 { 274 return __vmalloc_noprof(size, GFP_KERN 274 return __vmalloc_noprof(size, GFP_KERNEL); 275 } 275 } 276 EXPORT_SYMBOL(vmalloc_32_noprof); 276 EXPORT_SYMBOL(vmalloc_32_noprof); 277 277 278 /** 278 /** 279 * vmalloc_32_user - allocate zeroed virtually 279 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory 280 * @size: allocation size 280 * @size: allocation size 281 * 281 * 282 * The resulting memory area is 32bit addressa 282 * The resulting memory area is 32bit addressable and zeroed so it can be 283 * mapped to userspace without leaking data. 283 * mapped to userspace without leaking data. 284 * 284 * 285 * VM_USERMAP is set on the corresponding VMA 285 * VM_USERMAP is set on the corresponding VMA so that subsequent calls to 286 * remap_vmalloc_range() are permissible. 286 * remap_vmalloc_range() are permissible. 287 */ 287 */ 288 void *vmalloc_32_user_noprof(unsigned long siz 288 void *vmalloc_32_user_noprof(unsigned long size) 289 { 289 { 290 /* 290 /* 291 * We'll have to sort out the ZONE_DMA 291 * We'll have to sort out the ZONE_DMA bits for 64-bit, 292 * but for now this can simply use vma 292 * but for now this can simply use vmalloc_user() directly. 293 */ 293 */ 294 return vmalloc_user_noprof(size); 294 return vmalloc_user_noprof(size); 295 } 295 } 296 EXPORT_SYMBOL(vmalloc_32_user_noprof); 296 EXPORT_SYMBOL(vmalloc_32_user_noprof); 297 297 298 void *vmap(struct page **pages, unsigned int c 298 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) 299 { 299 { 300 BUG(); 300 BUG(); 301 return NULL; 301 return NULL; 302 } 302 } 303 EXPORT_SYMBOL(vmap); 303 EXPORT_SYMBOL(vmap); 304 304 305 void vunmap(const void *addr) 305 void vunmap(const void *addr) 306 { 306 { 307 BUG(); 307 BUG(); 308 } 308 } 309 EXPORT_SYMBOL(vunmap); 309 EXPORT_SYMBOL(vunmap); 310 310 311 void *vm_map_ram(struct page **pages, unsigned 311 void *vm_map_ram(struct page **pages, unsigned int count, int node) 312 { 312 { 313 BUG(); 313 BUG(); 314 return NULL; 314 return NULL; 315 } 315 } 316 EXPORT_SYMBOL(vm_map_ram); 316 EXPORT_SYMBOL(vm_map_ram); 317 317 318 void vm_unmap_ram(const void *mem, unsigned in 318 void vm_unmap_ram(const void *mem, unsigned int count) 319 { 319 { 320 BUG(); 320 BUG(); 321 } 321 } 322 EXPORT_SYMBOL(vm_unmap_ram); 322 EXPORT_SYMBOL(vm_unmap_ram); 323 323 324 void vm_unmap_aliases(void) 324 void vm_unmap_aliases(void) 325 { 325 { 326 } 326 } 327 EXPORT_SYMBOL_GPL(vm_unmap_aliases); 327 EXPORT_SYMBOL_GPL(vm_unmap_aliases); 328 328 329 void free_vm_area(struct vm_struct *area) 329 void free_vm_area(struct vm_struct *area) 330 { 330 { 331 BUG(); 331 BUG(); 332 } 332 } 333 EXPORT_SYMBOL_GPL(free_vm_area); 333 EXPORT_SYMBOL_GPL(free_vm_area); 334 334 335 int vm_insert_page(struct vm_area_struct *vma, 335 int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 336 struct page *page) 336 struct page *page) 337 { 337 { 338 return -EINVAL; 338 return -EINVAL; 339 } 339 } 340 EXPORT_SYMBOL(vm_insert_page); 340 EXPORT_SYMBOL(vm_insert_page); 341 341 342 int vm_insert_pages(struct vm_area_struct *vma 342 int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, 343 struct page **pages, u 343 struct page **pages, unsigned long *num) 344 { 344 { 345 return -EINVAL; 345 return -EINVAL; 346 } 346 } 347 EXPORT_SYMBOL(vm_insert_pages); 347 EXPORT_SYMBOL(vm_insert_pages); 348 348 349 int vm_map_pages(struct vm_area_struct *vma, s 349 int vm_map_pages(struct vm_area_struct *vma, struct page **pages, 350 unsigned long num) 350 unsigned long num) 351 { 351 { 352 return -EINVAL; 352 return -EINVAL; 353 } 353 } 354 EXPORT_SYMBOL(vm_map_pages); 354 EXPORT_SYMBOL(vm_map_pages); 355 355 356 int vm_map_pages_zero(struct vm_area_struct *v 356 int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, 357 unsigned long 357 unsigned long num) 358 { 358 { 359 return -EINVAL; 359 return -EINVAL; 360 } 360 } 361 EXPORT_SYMBOL(vm_map_pages_zero); 361 EXPORT_SYMBOL(vm_map_pages_zero); 362 362 363 /* 363 /* 364 * sys_brk() for the most part doesn't need t 364 * sys_brk() for the most part doesn't need the global kernel 365 * lock, except when an application is doing 365 * lock, except when an application is doing something nasty 366 * like trying to un-brk an area that has alr 366 * like trying to un-brk an area that has already been mapped 367 * to a regular file. in this case, the unma 367 * to a regular file. in this case, the unmapping will need 368 * to invoke file system routines that need t 368 * to invoke file system routines that need the global lock. 369 */ 369 */ 370 SYSCALL_DEFINE1(brk, unsigned long, brk) 370 SYSCALL_DEFINE1(brk, unsigned long, brk) 371 { 371 { 372 struct mm_struct *mm = current->mm; 372 struct mm_struct *mm = current->mm; 373 373 374 if (brk < mm->start_brk || brk > mm->c 374 if (brk < mm->start_brk || brk > mm->context.end_brk) 375 return mm->brk; 375 return mm->brk; 376 376 377 if (mm->brk == brk) 377 if (mm->brk == brk) 378 return mm->brk; 378 return mm->brk; 379 379 380 /* 380 /* 381 * Always allow shrinking brk 381 * Always allow shrinking brk 382 */ 382 */ 383 if (brk <= mm->brk) { 383 if (brk <= mm->brk) { 384 mm->brk = brk; 384 mm->brk = brk; 385 return brk; 385 return brk; 386 } 386 } 387 387 388 /* 388 /* 389 * Ok, looks good - let it rip. 389 * Ok, looks good - let it rip. 390 */ 390 */ 391 flush_icache_user_range(mm->brk, brk); 391 flush_icache_user_range(mm->brk, brk); 392 return mm->brk = brk; 392 return mm->brk = brk; 393 } 393 } 394 394 395 /* 395 /* 396 * initialise the percpu counter for VM and re 396 * initialise the percpu counter for VM and region record slabs 397 */ 397 */ 398 void __init mmap_init(void) 398 void __init mmap_init(void) 399 { 399 { 400 int ret; 400 int ret; 401 401 402 ret = percpu_counter_init(&vm_committe 402 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); 403 VM_BUG_ON(ret); 403 VM_BUG_ON(ret); 404 vm_region_jar = KMEM_CACHE(vm_region, 404 vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT); 405 } 405 } 406 406 407 /* 407 /* 408 * validate the region tree 408 * validate the region tree 409 * - the caller must hold the region lock 409 * - the caller must hold the region lock 410 */ 410 */ 411 #ifdef CONFIG_DEBUG_NOMMU_REGIONS 411 #ifdef CONFIG_DEBUG_NOMMU_REGIONS 412 static noinline void validate_nommu_regions(vo 412 static noinline void validate_nommu_regions(void) 413 { 413 { 414 struct vm_region *region, *last; 414 struct vm_region *region, *last; 415 struct rb_node *p, *lastp; 415 struct rb_node *p, *lastp; 416 416 417 lastp = rb_first(&nommu_region_tree); 417 lastp = rb_first(&nommu_region_tree); 418 if (!lastp) 418 if (!lastp) 419 return; 419 return; 420 420 421 last = rb_entry(lastp, struct vm_regio 421 last = rb_entry(lastp, struct vm_region, vm_rb); 422 BUG_ON(last->vm_end <= last->vm_start) 422 BUG_ON(last->vm_end <= last->vm_start); 423 BUG_ON(last->vm_top < last->vm_end); 423 BUG_ON(last->vm_top < last->vm_end); 424 424 425 while ((p = rb_next(lastp))) { 425 while ((p = rb_next(lastp))) { 426 region = rb_entry(p, struct vm 426 region = rb_entry(p, struct vm_region, vm_rb); 427 last = rb_entry(lastp, struct 427 last = rb_entry(lastp, struct vm_region, vm_rb); 428 428 429 BUG_ON(region->vm_end <= regio 429 BUG_ON(region->vm_end <= region->vm_start); 430 BUG_ON(region->vm_top < region 430 BUG_ON(region->vm_top < region->vm_end); 431 BUG_ON(region->vm_start < last 431 BUG_ON(region->vm_start < last->vm_top); 432 432 433 lastp = p; 433 lastp = p; 434 } 434 } 435 } 435 } 436 #else 436 #else 437 static void validate_nommu_regions(void) 437 static void validate_nommu_regions(void) 438 { 438 { 439 } 439 } 440 #endif 440 #endif 441 441 442 /* 442 /* 443 * add a region into the global tree 443 * add a region into the global tree 444 */ 444 */ 445 static void add_nommu_region(struct vm_region 445 static void add_nommu_region(struct vm_region *region) 446 { 446 { 447 struct vm_region *pregion; 447 struct vm_region *pregion; 448 struct rb_node **p, *parent; 448 struct rb_node **p, *parent; 449 449 450 validate_nommu_regions(); 450 validate_nommu_regions(); 451 451 452 parent = NULL; 452 parent = NULL; 453 p = &nommu_region_tree.rb_node; 453 p = &nommu_region_tree.rb_node; 454 while (*p) { 454 while (*p) { 455 parent = *p; 455 parent = *p; 456 pregion = rb_entry(parent, str 456 pregion = rb_entry(parent, struct vm_region, vm_rb); 457 if (region->vm_start < pregion 457 if (region->vm_start < pregion->vm_start) 458 p = &(*p)->rb_left; 458 p = &(*p)->rb_left; 459 else if (region->vm_start > pr 459 else if (region->vm_start > pregion->vm_start) 460 p = &(*p)->rb_right; 460 p = &(*p)->rb_right; 461 else if (pregion == region) 461 else if (pregion == region) 462 return; 462 return; 463 else 463 else 464 BUG(); 464 BUG(); 465 } 465 } 466 466 467 rb_link_node(®ion->vm_rb, parent, p 467 rb_link_node(®ion->vm_rb, parent, p); 468 rb_insert_color(®ion->vm_rb, &nommu 468 rb_insert_color(®ion->vm_rb, &nommu_region_tree); 469 469 470 validate_nommu_regions(); 470 validate_nommu_regions(); 471 } 471 } 472 472 473 /* 473 /* 474 * delete a region from the global tree 474 * delete a region from the global tree 475 */ 475 */ 476 static void delete_nommu_region(struct vm_regi 476 static void delete_nommu_region(struct vm_region *region) 477 { 477 { 478 BUG_ON(!nommu_region_tree.rb_node); 478 BUG_ON(!nommu_region_tree.rb_node); 479 479 480 validate_nommu_regions(); 480 validate_nommu_regions(); 481 rb_erase(®ion->vm_rb, &nommu_region 481 rb_erase(®ion->vm_rb, &nommu_region_tree); 482 validate_nommu_regions(); 482 validate_nommu_regions(); 483 } 483 } 484 484 485 /* 485 /* 486 * free a contiguous series of pages 486 * free a contiguous series of pages 487 */ 487 */ 488 static void free_page_series(unsigned long fro 488 static void free_page_series(unsigned long from, unsigned long to) 489 { 489 { 490 for (; from < to; from += PAGE_SIZE) { 490 for (; from < to; from += PAGE_SIZE) { 491 struct page *page = virt_to_pa 491 struct page *page = virt_to_page((void *)from); 492 492 493 atomic_long_dec(&mmap_pages_al 493 atomic_long_dec(&mmap_pages_allocated); 494 put_page(page); 494 put_page(page); 495 } 495 } 496 } 496 } 497 497 498 /* 498 /* 499 * release a reference to a region 499 * release a reference to a region 500 * - the caller must hold the region semaphore 500 * - the caller must hold the region semaphore for writing, which this releases 501 * - the region may not have been added to the 501 * - the region may not have been added to the tree yet, in which case vm_top 502 * will equal vm_start 502 * will equal vm_start 503 */ 503 */ 504 static void __put_nommu_region(struct vm_regio 504 static void __put_nommu_region(struct vm_region *region) 505 __releases(nommu_region_sem) 505 __releases(nommu_region_sem) 506 { 506 { 507 BUG_ON(!nommu_region_tree.rb_node); 507 BUG_ON(!nommu_region_tree.rb_node); 508 508 509 if (--region->vm_usage == 0) { 509 if (--region->vm_usage == 0) { 510 if (region->vm_top > region->v 510 if (region->vm_top > region->vm_start) 511 delete_nommu_region(re 511 delete_nommu_region(region); 512 up_write(&nommu_region_sem); 512 up_write(&nommu_region_sem); 513 513 514 if (region->vm_file) 514 if (region->vm_file) 515 fput(region->vm_file); 515 fput(region->vm_file); 516 516 517 /* IO memory and memory shared 517 /* IO memory and memory shared directly out of the pagecache 518 * from ramfs/tmpfs mustn't be 518 * from ramfs/tmpfs mustn't be released here */ 519 if (region->vm_flags & VM_MAPP 519 if (region->vm_flags & VM_MAPPED_COPY) 520 free_page_series(regio 520 free_page_series(region->vm_start, region->vm_top); 521 kmem_cache_free(vm_region_jar, 521 kmem_cache_free(vm_region_jar, region); 522 } else { 522 } else { 523 up_write(&nommu_region_sem); 523 up_write(&nommu_region_sem); 524 } 524 } 525 } 525 } 526 526 527 /* 527 /* 528 * release a reference to a region 528 * release a reference to a region 529 */ 529 */ 530 static void put_nommu_region(struct vm_region 530 static void put_nommu_region(struct vm_region *region) 531 { 531 { 532 down_write(&nommu_region_sem); 532 down_write(&nommu_region_sem); 533 __put_nommu_region(region); 533 __put_nommu_region(region); 534 } 534 } 535 535 536 static void setup_vma_to_mm(struct vm_area_str 536 static void setup_vma_to_mm(struct vm_area_struct *vma, struct mm_struct *mm) 537 { 537 { 538 vma->vm_mm = mm; 538 vma->vm_mm = mm; 539 539 540 /* add the VMA to the mapping */ 540 /* add the VMA to the mapping */ 541 if (vma->vm_file) { 541 if (vma->vm_file) { 542 struct address_space *mapping 542 struct address_space *mapping = vma->vm_file->f_mapping; 543 543 544 i_mmap_lock_write(mapping); 544 i_mmap_lock_write(mapping); 545 flush_dcache_mmap_lock(mapping 545 flush_dcache_mmap_lock(mapping); 546 vma_interval_tree_insert(vma, 546 vma_interval_tree_insert(vma, &mapping->i_mmap); 547 flush_dcache_mmap_unlock(mappi 547 flush_dcache_mmap_unlock(mapping); 548 i_mmap_unlock_write(mapping); 548 i_mmap_unlock_write(mapping); 549 } 549 } 550 } 550 } 551 551 552 static void cleanup_vma_from_mm(struct vm_area 552 static void cleanup_vma_from_mm(struct vm_area_struct *vma) 553 { 553 { 554 vma->vm_mm->map_count--; 554 vma->vm_mm->map_count--; 555 /* remove the VMA from the mapping */ 555 /* remove the VMA from the mapping */ 556 if (vma->vm_file) { 556 if (vma->vm_file) { 557 struct address_space *mapping; 557 struct address_space *mapping; 558 mapping = vma->vm_file->f_mapp 558 mapping = vma->vm_file->f_mapping; 559 559 560 i_mmap_lock_write(mapping); 560 i_mmap_lock_write(mapping); 561 flush_dcache_mmap_lock(mapping 561 flush_dcache_mmap_lock(mapping); 562 vma_interval_tree_remove(vma, 562 vma_interval_tree_remove(vma, &mapping->i_mmap); 563 flush_dcache_mmap_unlock(mappi 563 flush_dcache_mmap_unlock(mapping); 564 i_mmap_unlock_write(mapping); 564 i_mmap_unlock_write(mapping); 565 } 565 } 566 } 566 } 567 567 568 /* 568 /* 569 * delete a VMA from its owning mm_struct and 569 * delete a VMA from its owning mm_struct and address space 570 */ 570 */ 571 static int delete_vma_from_mm(struct vm_area_s 571 static int delete_vma_from_mm(struct vm_area_struct *vma) 572 { 572 { 573 VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_ 573 VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_start); 574 574 575 vma_iter_config(&vmi, vma->vm_start, v 575 vma_iter_config(&vmi, vma->vm_start, vma->vm_end); 576 if (vma_iter_prealloc(&vmi, vma)) { 576 if (vma_iter_prealloc(&vmi, vma)) { 577 pr_warn("Allocation of vma tre 577 pr_warn("Allocation of vma tree for process %d failed\n", 578 current->pid); 578 current->pid); 579 return -ENOMEM; 579 return -ENOMEM; 580 } 580 } 581 cleanup_vma_from_mm(vma); 581 cleanup_vma_from_mm(vma); 582 582 583 /* remove from the MM's tree and list 583 /* remove from the MM's tree and list */ 584 vma_iter_clear(&vmi); 584 vma_iter_clear(&vmi); 585 return 0; 585 return 0; 586 } 586 } 587 /* 587 /* 588 * destroy a VMA record 588 * destroy a VMA record 589 */ 589 */ 590 static void delete_vma(struct mm_struct *mm, s 590 static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) 591 { 591 { 592 vma_close(vma); 592 vma_close(vma); 593 if (vma->vm_file) 593 if (vma->vm_file) 594 fput(vma->vm_file); 594 fput(vma->vm_file); 595 put_nommu_region(vma->vm_region); 595 put_nommu_region(vma->vm_region); 596 vm_area_free(vma); 596 vm_area_free(vma); 597 } 597 } 598 598 599 struct vm_area_struct *find_vma_intersection(s 599 struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, 600 u 600 unsigned long start_addr, 601 u 601 unsigned long end_addr) 602 { 602 { 603 unsigned long index = start_addr; 603 unsigned long index = start_addr; 604 604 605 mmap_assert_locked(mm); 605 mmap_assert_locked(mm); 606 return mt_find(&mm->mm_mt, &index, end 606 return mt_find(&mm->mm_mt, &index, end_addr - 1); 607 } 607 } 608 EXPORT_SYMBOL(find_vma_intersection); 608 EXPORT_SYMBOL(find_vma_intersection); 609 609 610 /* 610 /* 611 * look up the first VMA in which addr resides 611 * look up the first VMA in which addr resides, NULL if none 612 * - should be called with mm->mmap_lock at le 612 * - should be called with mm->mmap_lock at least held readlocked 613 */ 613 */ 614 struct vm_area_struct *find_vma(struct mm_stru 614 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 615 { 615 { 616 VMA_ITERATOR(vmi, mm, addr); 616 VMA_ITERATOR(vmi, mm, addr); 617 617 618 return vma_iter_load(&vmi); 618 return vma_iter_load(&vmi); 619 } 619 } 620 EXPORT_SYMBOL(find_vma); 620 EXPORT_SYMBOL(find_vma); 621 621 622 /* 622 /* 623 * At least xtensa ends up having protection f 623 * At least xtensa ends up having protection faults even with no 624 * MMU.. No stack expansion, at least. 624 * MMU.. No stack expansion, at least. 625 */ 625 */ 626 struct vm_area_struct *lock_mm_and_find_vma(st 626 struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, 627 unsigned long addr, st 627 unsigned long addr, struct pt_regs *regs) 628 { 628 { 629 struct vm_area_struct *vma; 629 struct vm_area_struct *vma; 630 630 631 mmap_read_lock(mm); 631 mmap_read_lock(mm); 632 vma = vma_lookup(mm, addr); 632 vma = vma_lookup(mm, addr); 633 if (!vma) 633 if (!vma) 634 mmap_read_unlock(mm); 634 mmap_read_unlock(mm); 635 return vma; 635 return vma; 636 } 636 } 637 637 638 /* 638 /* 639 * expand a stack to a given address 639 * expand a stack to a given address 640 * - not supported under NOMMU conditions 640 * - not supported under NOMMU conditions 641 */ 641 */ 642 int expand_stack_locked(struct vm_area_struct 642 int expand_stack_locked(struct vm_area_struct *vma, unsigned long addr) 643 { 643 { 644 return -ENOMEM; 644 return -ENOMEM; 645 } 645 } 646 646 647 struct vm_area_struct *expand_stack(struct mm_ 647 struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr) 648 { 648 { 649 mmap_read_unlock(mm); 649 mmap_read_unlock(mm); 650 return NULL; 650 return NULL; 651 } 651 } 652 652 653 /* 653 /* 654 * look up the first VMA exactly that exactly 654 * look up the first VMA exactly that exactly matches addr 655 * - should be called with mm->mmap_lock at le 655 * - should be called with mm->mmap_lock at least held readlocked 656 */ 656 */ 657 static struct vm_area_struct *find_vma_exact(s 657 static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 658 u 658 unsigned long addr, 659 u 659 unsigned long len) 660 { 660 { 661 struct vm_area_struct *vma; 661 struct vm_area_struct *vma; 662 unsigned long end = addr + len; 662 unsigned long end = addr + len; 663 VMA_ITERATOR(vmi, mm, addr); 663 VMA_ITERATOR(vmi, mm, addr); 664 664 665 vma = vma_iter_load(&vmi); 665 vma = vma_iter_load(&vmi); 666 if (!vma) 666 if (!vma) 667 return NULL; 667 return NULL; 668 if (vma->vm_start != addr) 668 if (vma->vm_start != addr) 669 return NULL; 669 return NULL; 670 if (vma->vm_end != end) 670 if (vma->vm_end != end) 671 return NULL; 671 return NULL; 672 672 673 return vma; 673 return vma; 674 } 674 } 675 675 676 /* 676 /* 677 * determine whether a mapping should be permi 677 * determine whether a mapping should be permitted and, if so, what sort of 678 * mapping we're capable of supporting 678 * mapping we're capable of supporting 679 */ 679 */ 680 static int validate_mmap_request(struct file * 680 static int validate_mmap_request(struct file *file, 681 unsigned long 681 unsigned long addr, 682 unsigned long 682 unsigned long len, 683 unsigned long 683 unsigned long prot, 684 unsigned long 684 unsigned long flags, 685 unsigned long 685 unsigned long pgoff, 686 unsigned long 686 unsigned long *_capabilities) 687 { 687 { 688 unsigned long capabilities, rlen; 688 unsigned long capabilities, rlen; 689 int ret; 689 int ret; 690 690 691 /* do the simple checks first */ 691 /* do the simple checks first */ 692 if (flags & MAP_FIXED) 692 if (flags & MAP_FIXED) 693 return -EINVAL; 693 return -EINVAL; 694 694 695 if ((flags & MAP_TYPE) != MAP_PRIVATE 695 if ((flags & MAP_TYPE) != MAP_PRIVATE && 696 (flags & MAP_TYPE) != MAP_SHARED) 696 (flags & MAP_TYPE) != MAP_SHARED) 697 return -EINVAL; 697 return -EINVAL; 698 698 699 if (!len) 699 if (!len) 700 return -EINVAL; 700 return -EINVAL; 701 701 702 /* Careful about overflows.. */ 702 /* Careful about overflows.. */ 703 rlen = PAGE_ALIGN(len); 703 rlen = PAGE_ALIGN(len); 704 if (!rlen || rlen > TASK_SIZE) 704 if (!rlen || rlen > TASK_SIZE) 705 return -ENOMEM; 705 return -ENOMEM; 706 706 707 /* offset overflow? */ 707 /* offset overflow? */ 708 if ((pgoff + (rlen >> PAGE_SHIFT)) < p 708 if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff) 709 return -EOVERFLOW; 709 return -EOVERFLOW; 710 710 711 if (file) { 711 if (file) { 712 /* files must support mmap */ 712 /* files must support mmap */ 713 if (!file->f_op->mmap) 713 if (!file->f_op->mmap) 714 return -ENODEV; 714 return -ENODEV; 715 715 716 /* work out if what we've got 716 /* work out if what we've got could possibly be shared 717 * - we support chardevs that 717 * - we support chardevs that provide their own "memory" 718 * - we support files/blockdev 718 * - we support files/blockdevs that are memory backed 719 */ 719 */ 720 if (file->f_op->mmap_capabilit 720 if (file->f_op->mmap_capabilities) { 721 capabilities = file->f 721 capabilities = file->f_op->mmap_capabilities(file); 722 } else { 722 } else { 723 /* no explicit capabil 723 /* no explicit capabilities set, so assume some 724 * defaults */ 724 * defaults */ 725 switch (file_inode(fil 725 switch (file_inode(file)->i_mode & S_IFMT) { 726 case S_IFREG: 726 case S_IFREG: 727 case S_IFBLK: 727 case S_IFBLK: 728 capabilities = 728 capabilities = NOMMU_MAP_COPY; 729 break; 729 break; 730 730 731 case S_IFCHR: 731 case S_IFCHR: 732 capabilities = 732 capabilities = 733 NOMMU_ 733 NOMMU_MAP_DIRECT | 734 NOMMU_ 734 NOMMU_MAP_READ | 735 NOMMU_ 735 NOMMU_MAP_WRITE; 736 break; 736 break; 737 737 738 default: 738 default: 739 return -EINVAL 739 return -EINVAL; 740 } 740 } 741 } 741 } 742 742 743 /* eliminate any capabilities 743 /* eliminate any capabilities that we can't support on this 744 * device */ 744 * device */ 745 if (!file->f_op->get_unmapped_ 745 if (!file->f_op->get_unmapped_area) 746 capabilities &= ~NOMMU 746 capabilities &= ~NOMMU_MAP_DIRECT; 747 if (!(file->f_mode & FMODE_CAN 747 if (!(file->f_mode & FMODE_CAN_READ)) 748 capabilities &= ~NOMMU 748 capabilities &= ~NOMMU_MAP_COPY; 749 749 750 /* The file shall have been op 750 /* The file shall have been opened with read permission. */ 751 if (!(file->f_mode & FMODE_REA 751 if (!(file->f_mode & FMODE_READ)) 752 return -EACCES; 752 return -EACCES; 753 753 754 if (flags & MAP_SHARED) { 754 if (flags & MAP_SHARED) { 755 /* do checks for writi 755 /* do checks for writing, appending and locking */ 756 if ((prot & PROT_WRITE 756 if ((prot & PROT_WRITE) && 757 !(file->f_mode & F 757 !(file->f_mode & FMODE_WRITE)) 758 return -EACCES 758 return -EACCES; 759 759 760 if (IS_APPEND(file_ino 760 if (IS_APPEND(file_inode(file)) && 761 (file->f_mode & FM 761 (file->f_mode & FMODE_WRITE)) 762 return -EACCES 762 return -EACCES; 763 763 764 if (!(capabilities & N 764 if (!(capabilities & NOMMU_MAP_DIRECT)) 765 return -ENODEV 765 return -ENODEV; 766 766 767 /* we mustn't privatis 767 /* we mustn't privatise shared mappings */ 768 capabilities &= ~NOMMU 768 capabilities &= ~NOMMU_MAP_COPY; 769 } else { 769 } else { 770 /* we're going to read 770 /* we're going to read the file into private memory we 771 * allocate */ 771 * allocate */ 772 if (!(capabilities & N 772 if (!(capabilities & NOMMU_MAP_COPY)) 773 return -ENODEV 773 return -ENODEV; 774 774 775 /* we don't permit a p 775 /* we don't permit a private writable mapping to be 776 * shared with the bac 776 * shared with the backing device */ 777 if (prot & PROT_WRITE) 777 if (prot & PROT_WRITE) 778 capabilities & 778 capabilities &= ~NOMMU_MAP_DIRECT; 779 } 779 } 780 780 781 if (capabilities & NOMMU_MAP_D 781 if (capabilities & NOMMU_MAP_DIRECT) { 782 if (((prot & PROT_READ 782 if (((prot & PROT_READ) && !(capabilities & NOMMU_MAP_READ)) || 783 ((prot & PROT_WRIT 783 ((prot & PROT_WRITE) && !(capabilities & NOMMU_MAP_WRITE)) || 784 ((prot & PROT_EXEC 784 ((prot & PROT_EXEC) && !(capabilities & NOMMU_MAP_EXEC)) 785 ) { 785 ) { 786 capabilities & 786 capabilities &= ~NOMMU_MAP_DIRECT; 787 if (flags & MA 787 if (flags & MAP_SHARED) { 788 pr_war 788 pr_warn("MAP_SHARED not completely supported on !MMU\n"); 789 return 789 return -EINVAL; 790 } 790 } 791 } 791 } 792 } 792 } 793 793 794 /* handle executable mappings 794 /* handle executable mappings and implied executable 795 * mappings */ 795 * mappings */ 796 if (path_noexec(&file->f_path) 796 if (path_noexec(&file->f_path)) { 797 if (prot & PROT_EXEC) 797 if (prot & PROT_EXEC) 798 return -EPERM; 798 return -EPERM; 799 } else if ((prot & PROT_READ) 799 } else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { 800 /* handle implication 800 /* handle implication of PROT_EXEC by PROT_READ */ 801 if (current->personali 801 if (current->personality & READ_IMPLIES_EXEC) { 802 if (capabiliti 802 if (capabilities & NOMMU_MAP_EXEC) 803 prot | 803 prot |= PROT_EXEC; 804 } 804 } 805 } else if ((prot & PROT_READ) 805 } else if ((prot & PROT_READ) && 806 (prot & PROT_EXEC) && 806 (prot & PROT_EXEC) && 807 !(capabilities & NOMM 807 !(capabilities & NOMMU_MAP_EXEC) 808 ) { 808 ) { 809 /* backing file is not 809 /* backing file is not executable, try to copy */ 810 capabilities &= ~NOMMU 810 capabilities &= ~NOMMU_MAP_DIRECT; 811 } 811 } 812 } else { 812 } else { 813 /* anonymous mappings are alwa 813 /* anonymous mappings are always memory backed and can be 814 * privately mapped 814 * privately mapped 815 */ 815 */ 816 capabilities = NOMMU_MAP_COPY; 816 capabilities = NOMMU_MAP_COPY; 817 817 818 /* handle PROT_EXEC implicatio 818 /* handle PROT_EXEC implication by PROT_READ */ 819 if ((prot & PROT_READ) && 819 if ((prot & PROT_READ) && 820 (current->personality & RE 820 (current->personality & READ_IMPLIES_EXEC)) 821 prot |= PROT_EXEC; 821 prot |= PROT_EXEC; 822 } 822 } 823 823 824 /* allow the security API to have its 824 /* allow the security API to have its say */ 825 ret = security_mmap_addr(addr); 825 ret = security_mmap_addr(addr); 826 if (ret < 0) 826 if (ret < 0) 827 return ret; 827 return ret; 828 828 829 /* looks okay */ 829 /* looks okay */ 830 *_capabilities = capabilities; 830 *_capabilities = capabilities; 831 return 0; 831 return 0; 832 } 832 } 833 833 834 /* 834 /* 835 * we've determined that we can make the mappi 835 * we've determined that we can make the mapping, now translate what we 836 * now know into VMA flags 836 * now know into VMA flags 837 */ 837 */ 838 static unsigned long determine_vm_flags(struct 838 static unsigned long determine_vm_flags(struct file *file, 839 unsign 839 unsigned long prot, 840 unsign 840 unsigned long flags, 841 unsign 841 unsigned long capabilities) 842 { 842 { 843 unsigned long vm_flags; 843 unsigned long vm_flags; 844 844 845 vm_flags = calc_vm_prot_bits(prot, 0) 845 vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(file, flags); 846 846 847 if (!file) { 847 if (!file) { 848 /* 848 /* 849 * MAP_ANONYMOUS. MAP_SHARED i 849 * MAP_ANONYMOUS. MAP_SHARED is mapped to MAP_PRIVATE, because 850 * there is no fork(). 850 * there is no fork(). 851 */ 851 */ 852 vm_flags |= VM_MAYREAD | VM_MA 852 vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 853 } else if (flags & MAP_PRIVATE) { 853 } else if (flags & MAP_PRIVATE) { 854 /* MAP_PRIVATE file mapping */ 854 /* MAP_PRIVATE file mapping */ 855 if (capabilities & NOMMU_MAP_D 855 if (capabilities & NOMMU_MAP_DIRECT) 856 vm_flags |= (capabilit 856 vm_flags |= (capabilities & NOMMU_VMFLAGS); 857 else 857 else 858 vm_flags |= VM_MAYREAD 858 vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 859 859 860 if (!(prot & PROT_WRITE) && !c 860 if (!(prot & PROT_WRITE) && !current->ptrace) 861 /* 861 /* 862 * R/O private file ma 862 * R/O private file mapping which cannot be used to 863 * modify memory, espe 863 * modify memory, especially also not via active ptrace 864 * (e.g., set breakpoi 864 * (e.g., set breakpoints) or later by upgrading 865 * permissions (no mpr 865 * permissions (no mprotect()). We can try overlaying 866 * the file mapping, w 866 * the file mapping, which will work e.g., on chardevs, 867 * ramfs/tmpfs/shmfs a 867 * ramfs/tmpfs/shmfs and romfs/cramf. 868 */ 868 */ 869 vm_flags |= VM_MAYOVER 869 vm_flags |= VM_MAYOVERLAY; 870 } else { 870 } else { 871 /* MAP_SHARED file mapping: NO 871 /* MAP_SHARED file mapping: NOMMU_MAP_DIRECT is set. */ 872 vm_flags |= VM_SHARED | VM_MAY 872 vm_flags |= VM_SHARED | VM_MAYSHARE | 873 (capabilities & NO 873 (capabilities & NOMMU_VMFLAGS); 874 } 874 } 875 875 876 return vm_flags; 876 return vm_flags; 877 } 877 } 878 878 879 /* 879 /* 880 * set up a shared mapping on a file (the driv 880 * set up a shared mapping on a file (the driver or filesystem provides and 881 * pins the storage) 881 * pins the storage) 882 */ 882 */ 883 static int do_mmap_shared_file(struct vm_area_ 883 static int do_mmap_shared_file(struct vm_area_struct *vma) 884 { 884 { 885 int ret; 885 int ret; 886 886 887 ret = mmap_file(vma->vm_file, vma); 887 ret = mmap_file(vma->vm_file, vma); 888 if (ret == 0) { 888 if (ret == 0) { 889 vma->vm_region->vm_top = vma-> 889 vma->vm_region->vm_top = vma->vm_region->vm_end; 890 return 0; 890 return 0; 891 } 891 } 892 if (ret != -ENOSYS) 892 if (ret != -ENOSYS) 893 return ret; 893 return ret; 894 894 895 /* getting -ENOSYS indicates that dire 895 /* getting -ENOSYS indicates that direct mmap isn't possible (as 896 * opposed to tried but failed) so we 896 * opposed to tried but failed) so we can only give a suitable error as 897 * it's not possible to make a private 897 * it's not possible to make a private copy if MAP_SHARED was given */ 898 return -ENODEV; 898 return -ENODEV; 899 } 899 } 900 900 901 /* 901 /* 902 * set up a private mapping or an anonymous sh 902 * set up a private mapping or an anonymous shared mapping 903 */ 903 */ 904 static int do_mmap_private(struct vm_area_stru 904 static int do_mmap_private(struct vm_area_struct *vma, 905 struct vm_region *r 905 struct vm_region *region, 906 unsigned long len, 906 unsigned long len, 907 unsigned long capab 907 unsigned long capabilities) 908 { 908 { 909 unsigned long total, point; 909 unsigned long total, point; 910 void *base; 910 void *base; 911 int ret, order; 911 int ret, order; 912 912 913 /* 913 /* 914 * Invoke the file's mapping function 914 * Invoke the file's mapping function so that it can keep track of 915 * shared mappings on devices or memor 915 * shared mappings on devices or memory. VM_MAYOVERLAY will be set if 916 * it may attempt to share, which will 916 * it may attempt to share, which will make is_nommu_shared_mapping() 917 * happy. 917 * happy. 918 */ 918 */ 919 if (capabilities & NOMMU_MAP_DIRECT) { 919 if (capabilities & NOMMU_MAP_DIRECT) { 920 ret = mmap_file(vma->vm_file, 920 ret = mmap_file(vma->vm_file, vma); 921 /* shouldn't return success if 921 /* shouldn't return success if we're not sharing */ 922 if (WARN_ON_ONCE(!is_nommu_sha 922 if (WARN_ON_ONCE(!is_nommu_shared_mapping(vma->vm_flags))) 923 ret = -ENOSYS; 923 ret = -ENOSYS; 924 if (ret == 0) { 924 if (ret == 0) { 925 vma->vm_region->vm_top 925 vma->vm_region->vm_top = vma->vm_region->vm_end; 926 return 0; 926 return 0; 927 } 927 } 928 if (ret != -ENOSYS) 928 if (ret != -ENOSYS) 929 return ret; 929 return ret; 930 930 931 /* getting an ENOSYS error ind 931 /* getting an ENOSYS error indicates that direct mmap isn't 932 * possible (as opposed to tri 932 * possible (as opposed to tried but failed) so we'll try to 933 * make a private copy of the 933 * make a private copy of the data and map that instead */ 934 } 934 } 935 935 936 936 937 /* allocate some memory to hold the ma 937 /* allocate some memory to hold the mapping 938 * - note that this may not return a p 938 * - note that this may not return a page-aligned address if the object 939 * we're allocating is smaller than 939 * we're allocating is smaller than a page 940 */ 940 */ 941 order = get_order(len); 941 order = get_order(len); 942 total = 1 << order; 942 total = 1 << order; 943 point = len >> PAGE_SHIFT; 943 point = len >> PAGE_SHIFT; 944 944 945 /* we don't want to allocate a power-o 945 /* we don't want to allocate a power-of-2 sized page set */ 946 if (sysctl_nr_trim_pages && total - po 946 if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) 947 total = point; 947 total = point; 948 948 949 base = alloc_pages_exact(total << PAGE 949 base = alloc_pages_exact(total << PAGE_SHIFT, GFP_KERNEL); 950 if (!base) 950 if (!base) 951 goto enomem; 951 goto enomem; 952 952 953 atomic_long_add(total, &mmap_pages_all 953 atomic_long_add(total, &mmap_pages_allocated); 954 954 955 vm_flags_set(vma, VM_MAPPED_COPY); 955 vm_flags_set(vma, VM_MAPPED_COPY); 956 region->vm_flags = vma->vm_flags; 956 region->vm_flags = vma->vm_flags; 957 region->vm_start = (unsigned long) bas 957 region->vm_start = (unsigned long) base; 958 region->vm_end = region->vm_start + 958 region->vm_end = region->vm_start + len; 959 region->vm_top = region->vm_start + 959 region->vm_top = region->vm_start + (total << PAGE_SHIFT); 960 960 961 vma->vm_start = region->vm_start; 961 vma->vm_start = region->vm_start; 962 vma->vm_end = region->vm_start + len 962 vma->vm_end = region->vm_start + len; 963 963 964 if (vma->vm_file) { 964 if (vma->vm_file) { 965 /* read the contents of a file 965 /* read the contents of a file into the copy */ 966 loff_t fpos; 966 loff_t fpos; 967 967 968 fpos = vma->vm_pgoff; 968 fpos = vma->vm_pgoff; 969 fpos <<= PAGE_SHIFT; 969 fpos <<= PAGE_SHIFT; 970 970 971 ret = kernel_read(vma->vm_file 971 ret = kernel_read(vma->vm_file, base, len, &fpos); 972 if (ret < 0) 972 if (ret < 0) 973 goto error_free; 973 goto error_free; 974 974 975 /* clear the last little bit * 975 /* clear the last little bit */ 976 if (ret < len) 976 if (ret < len) 977 memset(base + ret, 0, 977 memset(base + ret, 0, len - ret); 978 978 979 } else { 979 } else { 980 vma_set_anonymous(vma); 980 vma_set_anonymous(vma); 981 } 981 } 982 982 983 return 0; 983 return 0; 984 984 985 error_free: 985 error_free: 986 free_page_series(region->vm_start, reg 986 free_page_series(region->vm_start, region->vm_top); 987 region->vm_start = vma->vm_start = 0; 987 region->vm_start = vma->vm_start = 0; 988 region->vm_end = vma->vm_end = 0; 988 region->vm_end = vma->vm_end = 0; 989 region->vm_top = 0; 989 region->vm_top = 0; 990 return ret; 990 return ret; 991 991 992 enomem: 992 enomem: 993 pr_err("Allocation of length %lu from 993 pr_err("Allocation of length %lu from process %d (%s) failed\n", 994 len, current->pid, current->com 994 len, current->pid, current->comm); 995 show_mem(); 995 show_mem(); 996 return -ENOMEM; 996 return -ENOMEM; 997 } 997 } 998 998 999 /* 999 /* 1000 * handle mapping creation for uClinux 1000 * handle mapping creation for uClinux 1001 */ 1001 */ 1002 unsigned long do_mmap(struct file *file, 1002 unsigned long do_mmap(struct file *file, 1003 unsigned long addr, 1003 unsigned long addr, 1004 unsigned long len, 1004 unsigned long len, 1005 unsigned long prot, 1005 unsigned long prot, 1006 unsigned long flags, 1006 unsigned long flags, 1007 vm_flags_t vm_flags, 1007 vm_flags_t vm_flags, 1008 unsigned long pgoff, 1008 unsigned long pgoff, 1009 unsigned long *popula 1009 unsigned long *populate, 1010 struct list_head *uf) 1010 struct list_head *uf) 1011 { 1011 { 1012 struct vm_area_struct *vma; 1012 struct vm_area_struct *vma; 1013 struct vm_region *region; 1013 struct vm_region *region; 1014 struct rb_node *rb; 1014 struct rb_node *rb; 1015 unsigned long capabilities, result; 1015 unsigned long capabilities, result; 1016 int ret; 1016 int ret; 1017 VMA_ITERATOR(vmi, current->mm, 0); 1017 VMA_ITERATOR(vmi, current->mm, 0); 1018 1018 1019 *populate = 0; 1019 *populate = 0; 1020 1020 1021 /* decide whether we should attempt t 1021 /* decide whether we should attempt the mapping, and if so what sort of 1022 * mapping */ 1022 * mapping */ 1023 ret = validate_mmap_request(file, add 1023 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 1024 &capabili 1024 &capabilities); 1025 if (ret < 0) 1025 if (ret < 0) 1026 return ret; 1026 return ret; 1027 1027 1028 /* we ignore the address hint */ 1028 /* we ignore the address hint */ 1029 addr = 0; 1029 addr = 0; 1030 len = PAGE_ALIGN(len); 1030 len = PAGE_ALIGN(len); 1031 1031 1032 /* we've determined that we can make 1032 /* we've determined that we can make the mapping, now translate what we 1033 * now know into VMA flags */ 1033 * now know into VMA flags */ 1034 vm_flags |= determine_vm_flags(file, 1034 vm_flags |= determine_vm_flags(file, prot, flags, capabilities); 1035 1035 1036 1036 1037 /* we're going to need to record the 1037 /* we're going to need to record the mapping */ 1038 region = kmem_cache_zalloc(vm_region_ 1038 region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); 1039 if (!region) 1039 if (!region) 1040 goto error_getting_region; 1040 goto error_getting_region; 1041 1041 1042 vma = vm_area_alloc(current->mm); 1042 vma = vm_area_alloc(current->mm); 1043 if (!vma) 1043 if (!vma) 1044 goto error_getting_vma; 1044 goto error_getting_vma; 1045 1045 1046 region->vm_usage = 1; 1046 region->vm_usage = 1; 1047 region->vm_flags = vm_flags; 1047 region->vm_flags = vm_flags; 1048 region->vm_pgoff = pgoff; 1048 region->vm_pgoff = pgoff; 1049 1049 1050 vm_flags_init(vma, vm_flags); 1050 vm_flags_init(vma, vm_flags); 1051 vma->vm_pgoff = pgoff; 1051 vma->vm_pgoff = pgoff; 1052 1052 1053 if (file) { 1053 if (file) { 1054 region->vm_file = get_file(fi 1054 region->vm_file = get_file(file); 1055 vma->vm_file = get_file(file) 1055 vma->vm_file = get_file(file); 1056 } 1056 } 1057 1057 1058 down_write(&nommu_region_sem); 1058 down_write(&nommu_region_sem); 1059 1059 1060 /* if we want to share, we need to ch 1060 /* if we want to share, we need to check for regions created by other 1061 * mmap() calls that overlap with our 1061 * mmap() calls that overlap with our proposed mapping 1062 * - we can only share with a superse 1062 * - we can only share with a superset match on most regular files 1063 * - shared mappings on character dev 1063 * - shared mappings on character devices and memory backed files are 1064 * permitted to overlap inexactly a 1064 * permitted to overlap inexactly as far as we are concerned for in 1065 * these cases, sharing is handled 1065 * these cases, sharing is handled in the driver or filesystem rather 1066 * than here 1066 * than here 1067 */ 1067 */ 1068 if (is_nommu_shared_mapping(vm_flags) 1068 if (is_nommu_shared_mapping(vm_flags)) { 1069 struct vm_region *pregion; 1069 struct vm_region *pregion; 1070 unsigned long pglen, rpglen, 1070 unsigned long pglen, rpglen, pgend, rpgend, start; 1071 1071 1072 pglen = (len + PAGE_SIZE - 1) 1072 pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1073 pgend = pgoff + pglen; 1073 pgend = pgoff + pglen; 1074 1074 1075 for (rb = rb_first(&nommu_reg 1075 for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { 1076 pregion = rb_entry(rb 1076 pregion = rb_entry(rb, struct vm_region, vm_rb); 1077 1077 1078 if (!is_nommu_shared_ 1078 if (!is_nommu_shared_mapping(pregion->vm_flags)) 1079 continue; 1079 continue; 1080 1080 1081 /* search for overlap 1081 /* search for overlapping mappings on the same file */ 1082 if (file_inode(pregio 1082 if (file_inode(pregion->vm_file) != 1083 file_inode(file)) 1083 file_inode(file)) 1084 continue; 1084 continue; 1085 1085 1086 if (pregion->vm_pgoff 1086 if (pregion->vm_pgoff >= pgend) 1087 continue; 1087 continue; 1088 1088 1089 rpglen = pregion->vm_ 1089 rpglen = pregion->vm_end - pregion->vm_start; 1090 rpglen = (rpglen + PA 1090 rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT; 1091 rpgend = pregion->vm_ 1091 rpgend = pregion->vm_pgoff + rpglen; 1092 if (pgoff >= rpgend) 1092 if (pgoff >= rpgend) 1093 continue; 1093 continue; 1094 1094 1095 /* handle inexactly o 1095 /* handle inexactly overlapping matches between 1096 * mappings */ 1096 * mappings */ 1097 if ((pregion->vm_pgof 1097 if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && 1098 !(pgoff >= pregio 1098 !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { 1099 /* new mappin 1099 /* new mapping is not a subset of the region */ 1100 if (!(capabil 1100 if (!(capabilities & NOMMU_MAP_DIRECT)) 1101 goto 1101 goto sharing_violation; 1102 continue; 1102 continue; 1103 } 1103 } 1104 1104 1105 /* we've found a regi 1105 /* we've found a region we can share */ 1106 pregion->vm_usage++; 1106 pregion->vm_usage++; 1107 vma->vm_region = preg 1107 vma->vm_region = pregion; 1108 start = pregion->vm_s 1108 start = pregion->vm_start; 1109 start += (pgoff - pre 1109 start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; 1110 vma->vm_start = start 1110 vma->vm_start = start; 1111 vma->vm_end = start + 1111 vma->vm_end = start + len; 1112 1112 1113 if (pregion->vm_flags 1113 if (pregion->vm_flags & VM_MAPPED_COPY) 1114 vm_flags_set( 1114 vm_flags_set(vma, VM_MAPPED_COPY); 1115 else { 1115 else { 1116 ret = do_mmap 1116 ret = do_mmap_shared_file(vma); 1117 if (ret < 0) 1117 if (ret < 0) { 1118 vma-> 1118 vma->vm_region = NULL; 1119 vma-> 1119 vma->vm_start = 0; 1120 vma-> 1120 vma->vm_end = 0; 1121 pregi 1121 pregion->vm_usage--; 1122 pregi 1122 pregion = NULL; 1123 goto 1123 goto error_just_free; 1124 } 1124 } 1125 } 1125 } 1126 fput(region->vm_file) 1126 fput(region->vm_file); 1127 kmem_cache_free(vm_re 1127 kmem_cache_free(vm_region_jar, region); 1128 region = pregion; 1128 region = pregion; 1129 result = start; 1129 result = start; 1130 goto share; 1130 goto share; 1131 } 1131 } 1132 1132 1133 /* obtain the address at whic 1133 /* obtain the address at which to make a shared mapping 1134 * - this is the hook for qua 1134 * - this is the hook for quasi-memory character devices to 1135 * tell us the location of 1135 * tell us the location of a shared mapping 1136 */ 1136 */ 1137 if (capabilities & NOMMU_MAP_ 1137 if (capabilities & NOMMU_MAP_DIRECT) { 1138 addr = file->f_op->ge 1138 addr = file->f_op->get_unmapped_area(file, addr, len, 1139 1139 pgoff, flags); 1140 if (IS_ERR_VALUE(addr 1140 if (IS_ERR_VALUE(addr)) { 1141 ret = addr; 1141 ret = addr; 1142 if (ret != -E 1142 if (ret != -ENOSYS) 1143 goto 1143 goto error_just_free; 1144 1144 1145 /* the driver 1145 /* the driver refused to tell us where to site 1146 * the mappin 1146 * the mapping so we'll have to attempt to copy 1147 * it */ 1147 * it */ 1148 ret = -ENODEV 1148 ret = -ENODEV; 1149 if (!(capabil 1149 if (!(capabilities & NOMMU_MAP_COPY)) 1150 goto 1150 goto error_just_free; 1151 1151 1152 capabilities 1152 capabilities &= ~NOMMU_MAP_DIRECT; 1153 } else { 1153 } else { 1154 vma->vm_start 1154 vma->vm_start = region->vm_start = addr; 1155 vma->vm_end = 1155 vma->vm_end = region->vm_end = addr + len; 1156 } 1156 } 1157 } 1157 } 1158 } 1158 } 1159 1159 1160 vma->vm_region = region; 1160 vma->vm_region = region; 1161 1161 1162 /* set up the mapping 1162 /* set up the mapping 1163 * - the region is filled in if NOMMU 1163 * - the region is filled in if NOMMU_MAP_DIRECT is still set 1164 */ 1164 */ 1165 if (file && vma->vm_flags & VM_SHARED 1165 if (file && vma->vm_flags & VM_SHARED) 1166 ret = do_mmap_shared_file(vma 1166 ret = do_mmap_shared_file(vma); 1167 else 1167 else 1168 ret = do_mmap_private(vma, re 1168 ret = do_mmap_private(vma, region, len, capabilities); 1169 if (ret < 0) 1169 if (ret < 0) 1170 goto error_just_free; 1170 goto error_just_free; 1171 add_nommu_region(region); 1171 add_nommu_region(region); 1172 1172 1173 /* clear anonymous mappings that don' 1173 /* clear anonymous mappings that don't ask for uninitialized data */ 1174 if (!vma->vm_file && 1174 if (!vma->vm_file && 1175 (!IS_ENABLED(CONFIG_MMAP_ALLOW_UN 1175 (!IS_ENABLED(CONFIG_MMAP_ALLOW_UNINITIALIZED) || 1176 !(flags & MAP_UNINITIALIZED))) 1176 !(flags & MAP_UNINITIALIZED))) 1177 memset((void *)region->vm_sta 1177 memset((void *)region->vm_start, 0, 1178 region->vm_end - regio 1178 region->vm_end - region->vm_start); 1179 1179 1180 /* okay... we have a mapping; now we 1180 /* okay... we have a mapping; now we have to register it */ 1181 result = vma->vm_start; 1181 result = vma->vm_start; 1182 1182 1183 current->mm->total_vm += len >> PAGE_ 1183 current->mm->total_vm += len >> PAGE_SHIFT; 1184 1184 1185 share: 1185 share: 1186 BUG_ON(!vma->vm_region); 1186 BUG_ON(!vma->vm_region); 1187 vma_iter_config(&vmi, vma->vm_start, 1187 vma_iter_config(&vmi, vma->vm_start, vma->vm_end); 1188 if (vma_iter_prealloc(&vmi, vma)) 1188 if (vma_iter_prealloc(&vmi, vma)) 1189 goto error_just_free; 1189 goto error_just_free; 1190 1190 1191 setup_vma_to_mm(vma, current->mm); 1191 setup_vma_to_mm(vma, current->mm); 1192 current->mm->map_count++; 1192 current->mm->map_count++; 1193 /* add the VMA to the tree */ 1193 /* add the VMA to the tree */ 1194 vma_iter_store(&vmi, vma); 1194 vma_iter_store(&vmi, vma); 1195 1195 1196 /* we flush the region from the icach 1196 /* we flush the region from the icache only when the first executable 1197 * mapping of it is made */ 1197 * mapping of it is made */ 1198 if (vma->vm_flags & VM_EXEC && !regio 1198 if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { 1199 flush_icache_user_range(regio 1199 flush_icache_user_range(region->vm_start, region->vm_end); 1200 region->vm_icache_flushed = t 1200 region->vm_icache_flushed = true; 1201 } 1201 } 1202 1202 1203 up_write(&nommu_region_sem); 1203 up_write(&nommu_region_sem); 1204 1204 1205 return result; 1205 return result; 1206 1206 1207 error_just_free: 1207 error_just_free: 1208 up_write(&nommu_region_sem); 1208 up_write(&nommu_region_sem); 1209 error: 1209 error: 1210 vma_iter_free(&vmi); 1210 vma_iter_free(&vmi); 1211 if (region->vm_file) 1211 if (region->vm_file) 1212 fput(region->vm_file); 1212 fput(region->vm_file); 1213 kmem_cache_free(vm_region_jar, region 1213 kmem_cache_free(vm_region_jar, region); 1214 if (vma->vm_file) 1214 if (vma->vm_file) 1215 fput(vma->vm_file); 1215 fput(vma->vm_file); 1216 vm_area_free(vma); 1216 vm_area_free(vma); 1217 return ret; 1217 return ret; 1218 1218 1219 sharing_violation: 1219 sharing_violation: 1220 up_write(&nommu_region_sem); 1220 up_write(&nommu_region_sem); 1221 pr_warn("Attempt to share mismatched 1221 pr_warn("Attempt to share mismatched mappings\n"); 1222 ret = -EINVAL; 1222 ret = -EINVAL; 1223 goto error; 1223 goto error; 1224 1224 1225 error_getting_vma: 1225 error_getting_vma: 1226 kmem_cache_free(vm_region_jar, region 1226 kmem_cache_free(vm_region_jar, region); 1227 pr_warn("Allocation of vma for %lu by 1227 pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n", 1228 len, current->pid); 1228 len, current->pid); 1229 show_mem(); 1229 show_mem(); 1230 return -ENOMEM; 1230 return -ENOMEM; 1231 1231 1232 error_getting_region: 1232 error_getting_region: 1233 pr_warn("Allocation of vm region for 1233 pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n", 1234 len, current->pid); 1234 len, current->pid); 1235 show_mem(); 1235 show_mem(); 1236 return -ENOMEM; 1236 return -ENOMEM; 1237 } 1237 } 1238 1238 1239 unsigned long ksys_mmap_pgoff(unsigned long a 1239 unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, 1240 unsigned long p 1240 unsigned long prot, unsigned long flags, 1241 unsigned long f 1241 unsigned long fd, unsigned long pgoff) 1242 { 1242 { 1243 struct file *file = NULL; 1243 struct file *file = NULL; 1244 unsigned long retval = -EBADF; 1244 unsigned long retval = -EBADF; 1245 1245 1246 audit_mmap_fd(fd, flags); 1246 audit_mmap_fd(fd, flags); 1247 if (!(flags & MAP_ANONYMOUS)) { 1247 if (!(flags & MAP_ANONYMOUS)) { 1248 file = fget(fd); 1248 file = fget(fd); 1249 if (!file) 1249 if (!file) 1250 goto out; 1250 goto out; 1251 } 1251 } 1252 1252 1253 retval = vm_mmap_pgoff(file, addr, le 1253 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); 1254 1254 1255 if (file) 1255 if (file) 1256 fput(file); 1256 fput(file); 1257 out: 1257 out: 1258 return retval; 1258 return retval; 1259 } 1259 } 1260 1260 1261 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, ad 1261 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, 1262 unsigned long, prot, unsigned 1262 unsigned long, prot, unsigned long, flags, 1263 unsigned long, fd, unsigned l 1263 unsigned long, fd, unsigned long, pgoff) 1264 { 1264 { 1265 return ksys_mmap_pgoff(addr, len, pro 1265 return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); 1266 } 1266 } 1267 1267 1268 #ifdef __ARCH_WANT_SYS_OLD_MMAP 1268 #ifdef __ARCH_WANT_SYS_OLD_MMAP 1269 struct mmap_arg_struct { 1269 struct mmap_arg_struct { 1270 unsigned long addr; 1270 unsigned long addr; 1271 unsigned long len; 1271 unsigned long len; 1272 unsigned long prot; 1272 unsigned long prot; 1273 unsigned long flags; 1273 unsigned long flags; 1274 unsigned long fd; 1274 unsigned long fd; 1275 unsigned long offset; 1275 unsigned long offset; 1276 }; 1276 }; 1277 1277 1278 SYSCALL_DEFINE1(old_mmap, struct mmap_arg_str 1278 SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) 1279 { 1279 { 1280 struct mmap_arg_struct a; 1280 struct mmap_arg_struct a; 1281 1281 1282 if (copy_from_user(&a, arg, sizeof(a) 1282 if (copy_from_user(&a, arg, sizeof(a))) 1283 return -EFAULT; 1283 return -EFAULT; 1284 if (offset_in_page(a.offset)) 1284 if (offset_in_page(a.offset)) 1285 return -EINVAL; 1285 return -EINVAL; 1286 1286 1287 return ksys_mmap_pgoff(a.addr, a.len, 1287 return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, 1288 a.offset >> PA 1288 a.offset >> PAGE_SHIFT); 1289 } 1289 } 1290 #endif /* __ARCH_WANT_SYS_OLD_MMAP */ 1290 #endif /* __ARCH_WANT_SYS_OLD_MMAP */ 1291 1291 1292 /* 1292 /* 1293 * split a vma into two pieces at address 'ad 1293 * split a vma into two pieces at address 'addr', a new vma is allocated either 1294 * for the first part or the tail. 1294 * for the first part or the tail. 1295 */ 1295 */ 1296 static int split_vma(struct vma_iterator *vmi 1296 static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, 1297 unsigned long addr, int 1297 unsigned long addr, int new_below) 1298 { 1298 { 1299 struct vm_area_struct *new; 1299 struct vm_area_struct *new; 1300 struct vm_region *region; 1300 struct vm_region *region; 1301 unsigned long npages; 1301 unsigned long npages; 1302 struct mm_struct *mm; 1302 struct mm_struct *mm; 1303 1303 1304 /* we're only permitted to split anon 1304 /* we're only permitted to split anonymous regions (these should have 1305 * only a single usage on the region) 1305 * only a single usage on the region) */ 1306 if (vma->vm_file) 1306 if (vma->vm_file) 1307 return -ENOMEM; 1307 return -ENOMEM; 1308 1308 1309 mm = vma->vm_mm; 1309 mm = vma->vm_mm; 1310 if (mm->map_count >= sysctl_max_map_c 1310 if (mm->map_count >= sysctl_max_map_count) 1311 return -ENOMEM; 1311 return -ENOMEM; 1312 1312 1313 region = kmem_cache_alloc(vm_region_j 1313 region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); 1314 if (!region) 1314 if (!region) 1315 return -ENOMEM; 1315 return -ENOMEM; 1316 1316 1317 new = vm_area_dup(vma); 1317 new = vm_area_dup(vma); 1318 if (!new) 1318 if (!new) 1319 goto err_vma_dup; 1319 goto err_vma_dup; 1320 1320 1321 /* most fields are the same, copy all 1321 /* most fields are the same, copy all, and then fixup */ 1322 *region = *vma->vm_region; 1322 *region = *vma->vm_region; 1323 new->vm_region = region; 1323 new->vm_region = region; 1324 1324 1325 npages = (addr - vma->vm_start) >> PA 1325 npages = (addr - vma->vm_start) >> PAGE_SHIFT; 1326 1326 1327 if (new_below) { 1327 if (new_below) { 1328 region->vm_top = region->vm_e 1328 region->vm_top = region->vm_end = new->vm_end = addr; 1329 } else { 1329 } else { 1330 region->vm_start = new->vm_st 1330 region->vm_start = new->vm_start = addr; 1331 region->vm_pgoff = new->vm_pg 1331 region->vm_pgoff = new->vm_pgoff += npages; 1332 } 1332 } 1333 1333 1334 vma_iter_config(vmi, new->vm_start, n 1334 vma_iter_config(vmi, new->vm_start, new->vm_end); 1335 if (vma_iter_prealloc(vmi, vma)) { 1335 if (vma_iter_prealloc(vmi, vma)) { 1336 pr_warn("Allocation of vma tr 1336 pr_warn("Allocation of vma tree for process %d failed\n", 1337 current->pid); 1337 current->pid); 1338 goto err_vmi_preallocate; 1338 goto err_vmi_preallocate; 1339 } 1339 } 1340 1340 1341 if (new->vm_ops && new->vm_ops->open) 1341 if (new->vm_ops && new->vm_ops->open) 1342 new->vm_ops->open(new); 1342 new->vm_ops->open(new); 1343 1343 1344 down_write(&nommu_region_sem); 1344 down_write(&nommu_region_sem); 1345 delete_nommu_region(vma->vm_region); 1345 delete_nommu_region(vma->vm_region); 1346 if (new_below) { 1346 if (new_below) { 1347 vma->vm_region->vm_start = vm 1347 vma->vm_region->vm_start = vma->vm_start = addr; 1348 vma->vm_region->vm_pgoff = vm 1348 vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; 1349 } else { 1349 } else { 1350 vma->vm_region->vm_end = vma- 1350 vma->vm_region->vm_end = vma->vm_end = addr; 1351 vma->vm_region->vm_top = addr 1351 vma->vm_region->vm_top = addr; 1352 } 1352 } 1353 add_nommu_region(vma->vm_region); 1353 add_nommu_region(vma->vm_region); 1354 add_nommu_region(new->vm_region); 1354 add_nommu_region(new->vm_region); 1355 up_write(&nommu_region_sem); 1355 up_write(&nommu_region_sem); 1356 1356 1357 setup_vma_to_mm(vma, mm); 1357 setup_vma_to_mm(vma, mm); 1358 setup_vma_to_mm(new, mm); 1358 setup_vma_to_mm(new, mm); 1359 vma_iter_store(vmi, new); 1359 vma_iter_store(vmi, new); 1360 mm->map_count++; 1360 mm->map_count++; 1361 return 0; 1361 return 0; 1362 1362 1363 err_vmi_preallocate: 1363 err_vmi_preallocate: 1364 vm_area_free(new); 1364 vm_area_free(new); 1365 err_vma_dup: 1365 err_vma_dup: 1366 kmem_cache_free(vm_region_jar, region 1366 kmem_cache_free(vm_region_jar, region); 1367 return -ENOMEM; 1367 return -ENOMEM; 1368 } 1368 } 1369 1369 1370 /* 1370 /* 1371 * shrink a VMA by removing the specified chu 1371 * shrink a VMA by removing the specified chunk from either the beginning or 1372 * the end 1372 * the end 1373 */ 1373 */ 1374 static int vmi_shrink_vma(struct vma_iterator 1374 static int vmi_shrink_vma(struct vma_iterator *vmi, 1375 struct vm_area_struct * 1375 struct vm_area_struct *vma, 1376 unsigned long from, uns 1376 unsigned long from, unsigned long to) 1377 { 1377 { 1378 struct vm_region *region; 1378 struct vm_region *region; 1379 1379 1380 /* adjust the VMA's pointers, which m 1380 /* adjust the VMA's pointers, which may reposition it in the MM's tree 1381 * and list */ 1381 * and list */ 1382 if (from > vma->vm_start) { 1382 if (from > vma->vm_start) { 1383 if (vma_iter_clear_gfp(vmi, f 1383 if (vma_iter_clear_gfp(vmi, from, vma->vm_end, GFP_KERNEL)) 1384 return -ENOMEM; 1384 return -ENOMEM; 1385 vma->vm_end = from; 1385 vma->vm_end = from; 1386 } else { 1386 } else { 1387 if (vma_iter_clear_gfp(vmi, v 1387 if (vma_iter_clear_gfp(vmi, vma->vm_start, to, GFP_KERNEL)) 1388 return -ENOMEM; 1388 return -ENOMEM; 1389 vma->vm_start = to; 1389 vma->vm_start = to; 1390 } 1390 } 1391 1391 1392 /* cut the backing region down to siz 1392 /* cut the backing region down to size */ 1393 region = vma->vm_region; 1393 region = vma->vm_region; 1394 BUG_ON(region->vm_usage != 1); 1394 BUG_ON(region->vm_usage != 1); 1395 1395 1396 down_write(&nommu_region_sem); 1396 down_write(&nommu_region_sem); 1397 delete_nommu_region(region); 1397 delete_nommu_region(region); 1398 if (from > region->vm_start) { 1398 if (from > region->vm_start) { 1399 to = region->vm_top; 1399 to = region->vm_top; 1400 region->vm_top = region->vm_e 1400 region->vm_top = region->vm_end = from; 1401 } else { 1401 } else { 1402 region->vm_start = to; 1402 region->vm_start = to; 1403 } 1403 } 1404 add_nommu_region(region); 1404 add_nommu_region(region); 1405 up_write(&nommu_region_sem); 1405 up_write(&nommu_region_sem); 1406 1406 1407 free_page_series(from, to); 1407 free_page_series(from, to); 1408 return 0; 1408 return 0; 1409 } 1409 } 1410 1410 1411 /* 1411 /* 1412 * release a mapping 1412 * release a mapping 1413 * - under NOMMU conditions the chunk to be u 1413 * - under NOMMU conditions the chunk to be unmapped must be backed by a single 1414 * VMA, though it need not cover the whole 1414 * VMA, though it need not cover the whole VMA 1415 */ 1415 */ 1416 int do_munmap(struct mm_struct *mm, unsigned 1416 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf) 1417 { 1417 { 1418 VMA_ITERATOR(vmi, mm, start); 1418 VMA_ITERATOR(vmi, mm, start); 1419 struct vm_area_struct *vma; 1419 struct vm_area_struct *vma; 1420 unsigned long end; 1420 unsigned long end; 1421 int ret = 0; 1421 int ret = 0; 1422 1422 1423 len = PAGE_ALIGN(len); 1423 len = PAGE_ALIGN(len); 1424 if (len == 0) 1424 if (len == 0) 1425 return -EINVAL; 1425 return -EINVAL; 1426 1426 1427 end = start + len; 1427 end = start + len; 1428 1428 1429 /* find the first potentially overlap 1429 /* find the first potentially overlapping VMA */ 1430 vma = vma_find(&vmi, end); 1430 vma = vma_find(&vmi, end); 1431 if (!vma) { 1431 if (!vma) { 1432 static int limit; 1432 static int limit; 1433 if (limit < 5) { 1433 if (limit < 5) { 1434 pr_warn("munmap of me 1434 pr_warn("munmap of memory not mmapped by process %d (%s): 0x%lx-0x%lx\n", 1435 curre 1435 current->pid, current->comm, 1436 start 1436 start, start + len - 1); 1437 limit++; 1437 limit++; 1438 } 1438 } 1439 return -EINVAL; 1439 return -EINVAL; 1440 } 1440 } 1441 1441 1442 /* we're allowed to split an anonymou 1442 /* we're allowed to split an anonymous VMA but not a file-backed one */ 1443 if (vma->vm_file) { 1443 if (vma->vm_file) { 1444 do { 1444 do { 1445 if (start > vma->vm_s 1445 if (start > vma->vm_start) 1446 return -EINVA 1446 return -EINVAL; 1447 if (end == vma->vm_en 1447 if (end == vma->vm_end) 1448 goto erase_wh 1448 goto erase_whole_vma; 1449 vma = vma_find(&vmi, 1449 vma = vma_find(&vmi, end); 1450 } while (vma); 1450 } while (vma); 1451 return -EINVAL; 1451 return -EINVAL; 1452 } else { 1452 } else { 1453 /* the chunk must be a subset 1453 /* the chunk must be a subset of the VMA found */ 1454 if (start == vma->vm_start && 1454 if (start == vma->vm_start && end == vma->vm_end) 1455 goto erase_whole_vma; 1455 goto erase_whole_vma; 1456 if (start < vma->vm_start || 1456 if (start < vma->vm_start || end > vma->vm_end) 1457 return -EINVAL; 1457 return -EINVAL; 1458 if (offset_in_page(start)) 1458 if (offset_in_page(start)) 1459 return -EINVAL; 1459 return -EINVAL; 1460 if (end != vma->vm_end && off 1460 if (end != vma->vm_end && offset_in_page(end)) 1461 return -EINVAL; 1461 return -EINVAL; 1462 if (start != vma->vm_start && 1462 if (start != vma->vm_start && end != vma->vm_end) { 1463 ret = split_vma(&vmi, 1463 ret = split_vma(&vmi, vma, start, 1); 1464 if (ret < 0) 1464 if (ret < 0) 1465 return ret; 1465 return ret; 1466 } 1466 } 1467 return vmi_shrink_vma(&vmi, v 1467 return vmi_shrink_vma(&vmi, vma, start, end); 1468 } 1468 } 1469 1469 1470 erase_whole_vma: 1470 erase_whole_vma: 1471 if (delete_vma_from_mm(vma)) 1471 if (delete_vma_from_mm(vma)) 1472 ret = -ENOMEM; 1472 ret = -ENOMEM; 1473 else 1473 else 1474 delete_vma(mm, vma); 1474 delete_vma(mm, vma); 1475 return ret; 1475 return ret; 1476 } 1476 } 1477 1477 1478 int vm_munmap(unsigned long addr, size_t len) 1478 int vm_munmap(unsigned long addr, size_t len) 1479 { 1479 { 1480 struct mm_struct *mm = current->mm; 1480 struct mm_struct *mm = current->mm; 1481 int ret; 1481 int ret; 1482 1482 1483 mmap_write_lock(mm); 1483 mmap_write_lock(mm); 1484 ret = do_munmap(mm, addr, len, NULL); 1484 ret = do_munmap(mm, addr, len, NULL); 1485 mmap_write_unlock(mm); 1485 mmap_write_unlock(mm); 1486 return ret; 1486 return ret; 1487 } 1487 } 1488 EXPORT_SYMBOL(vm_munmap); 1488 EXPORT_SYMBOL(vm_munmap); 1489 1489 1490 SYSCALL_DEFINE2(munmap, unsigned long, addr, 1490 SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) 1491 { 1491 { 1492 return vm_munmap(addr, len); 1492 return vm_munmap(addr, len); 1493 } 1493 } 1494 1494 1495 /* 1495 /* 1496 * release all the mappings made in a process 1496 * release all the mappings made in a process's VM space 1497 */ 1497 */ 1498 void exit_mmap(struct mm_struct *mm) 1498 void exit_mmap(struct mm_struct *mm) 1499 { 1499 { 1500 VMA_ITERATOR(vmi, mm, 0); 1500 VMA_ITERATOR(vmi, mm, 0); 1501 struct vm_area_struct *vma; 1501 struct vm_area_struct *vma; 1502 1502 1503 if (!mm) 1503 if (!mm) 1504 return; 1504 return; 1505 1505 1506 mm->total_vm = 0; 1506 mm->total_vm = 0; 1507 1507 1508 /* 1508 /* 1509 * Lock the mm to avoid assert compla 1509 * Lock the mm to avoid assert complaining even though this is the only 1510 * user of the mm 1510 * user of the mm 1511 */ 1511 */ 1512 mmap_write_lock(mm); 1512 mmap_write_lock(mm); 1513 for_each_vma(vmi, vma) { 1513 for_each_vma(vmi, vma) { 1514 cleanup_vma_from_mm(vma); 1514 cleanup_vma_from_mm(vma); 1515 delete_vma(mm, vma); 1515 delete_vma(mm, vma); 1516 cond_resched(); 1516 cond_resched(); 1517 } 1517 } 1518 __mt_destroy(&mm->mm_mt); 1518 __mt_destroy(&mm->mm_mt); 1519 mmap_write_unlock(mm); 1519 mmap_write_unlock(mm); 1520 } 1520 } 1521 1521 1522 /* 1522 /* 1523 * expand (or shrink) an existing mapping, po 1523 * expand (or shrink) an existing mapping, potentially moving it at the same 1524 * time (controlled by the MREMAP_MAYMOVE fla 1524 * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 1525 * 1525 * 1526 * under NOMMU conditions, we only permit cha 1526 * under NOMMU conditions, we only permit changing a mapping's size, and only 1527 * as long as it stays within the region allo 1527 * as long as it stays within the region allocated by do_mmap_private() and the 1528 * block is not shareable 1528 * block is not shareable 1529 * 1529 * 1530 * MREMAP_FIXED is not supported under NOMMU 1530 * MREMAP_FIXED is not supported under NOMMU conditions 1531 */ 1531 */ 1532 static unsigned long do_mremap(unsigned long 1532 static unsigned long do_mremap(unsigned long addr, 1533 unsigned long old_len 1533 unsigned long old_len, unsigned long new_len, 1534 unsigned long flags, 1534 unsigned long flags, unsigned long new_addr) 1535 { 1535 { 1536 struct vm_area_struct *vma; 1536 struct vm_area_struct *vma; 1537 1537 1538 /* insanity checks first */ 1538 /* insanity checks first */ 1539 old_len = PAGE_ALIGN(old_len); 1539 old_len = PAGE_ALIGN(old_len); 1540 new_len = PAGE_ALIGN(new_len); 1540 new_len = PAGE_ALIGN(new_len); 1541 if (old_len == 0 || new_len == 0) 1541 if (old_len == 0 || new_len == 0) 1542 return (unsigned long) -EINVA 1542 return (unsigned long) -EINVAL; 1543 1543 1544 if (offset_in_page(addr)) 1544 if (offset_in_page(addr)) 1545 return -EINVAL; 1545 return -EINVAL; 1546 1546 1547 if (flags & MREMAP_FIXED && new_addr 1547 if (flags & MREMAP_FIXED && new_addr != addr) 1548 return (unsigned long) -EINVA 1548 return (unsigned long) -EINVAL; 1549 1549 1550 vma = find_vma_exact(current->mm, add 1550 vma = find_vma_exact(current->mm, addr, old_len); 1551 if (!vma) 1551 if (!vma) 1552 return (unsigned long) -EINVA 1552 return (unsigned long) -EINVAL; 1553 1553 1554 if (vma->vm_end != vma->vm_start + ol 1554 if (vma->vm_end != vma->vm_start + old_len) 1555 return (unsigned long) -EFAUL 1555 return (unsigned long) -EFAULT; 1556 1556 1557 if (is_nommu_shared_mapping(vma->vm_f 1557 if (is_nommu_shared_mapping(vma->vm_flags)) 1558 return (unsigned long) -EPERM 1558 return (unsigned long) -EPERM; 1559 1559 1560 if (new_len > vma->vm_region->vm_end 1560 if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start) 1561 return (unsigned long) -ENOME 1561 return (unsigned long) -ENOMEM; 1562 1562 1563 /* all checks complete - do it */ 1563 /* all checks complete - do it */ 1564 vma->vm_end = vma->vm_start + new_len 1564 vma->vm_end = vma->vm_start + new_len; 1565 return vma->vm_start; 1565 return vma->vm_start; 1566 } 1566 } 1567 1567 1568 SYSCALL_DEFINE5(mremap, unsigned long, addr, 1568 SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, 1569 unsigned long, new_len, unsig 1569 unsigned long, new_len, unsigned long, flags, 1570 unsigned long, new_addr) 1570 unsigned long, new_addr) 1571 { 1571 { 1572 unsigned long ret; 1572 unsigned long ret; 1573 1573 1574 mmap_write_lock(current->mm); 1574 mmap_write_lock(current->mm); 1575 ret = do_mremap(addr, old_len, new_le 1575 ret = do_mremap(addr, old_len, new_len, flags, new_addr); 1576 mmap_write_unlock(current->mm); 1576 mmap_write_unlock(current->mm); 1577 return ret; 1577 return ret; 1578 } 1578 } 1579 1579 1580 int remap_pfn_range(struct vm_area_struct *vm 1580 int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, 1581 unsigned long pfn, unsigned l 1581 unsigned long pfn, unsigned long size, pgprot_t prot) 1582 { 1582 { 1583 if (addr != (pfn << PAGE_SHIFT)) 1583 if (addr != (pfn << PAGE_SHIFT)) 1584 return -EINVAL; 1584 return -EINVAL; 1585 1585 1586 vm_flags_set(vma, VM_IO | VM_PFNMAP | 1586 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); 1587 return 0; 1587 return 0; 1588 } 1588 } 1589 EXPORT_SYMBOL(remap_pfn_range); 1589 EXPORT_SYMBOL(remap_pfn_range); 1590 1590 1591 int vm_iomap_memory(struct vm_area_struct *vm 1591 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) 1592 { 1592 { 1593 unsigned long pfn = start >> PAGE_SHI 1593 unsigned long pfn = start >> PAGE_SHIFT; 1594 unsigned long vm_len = vma->vm_end - 1594 unsigned long vm_len = vma->vm_end - vma->vm_start; 1595 1595 1596 pfn += vma->vm_pgoff; 1596 pfn += vma->vm_pgoff; 1597 return io_remap_pfn_range(vma, vma->v 1597 return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); 1598 } 1598 } 1599 EXPORT_SYMBOL(vm_iomap_memory); 1599 EXPORT_SYMBOL(vm_iomap_memory); 1600 1600 1601 int remap_vmalloc_range(struct vm_area_struct 1601 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 1602 unsigned long pgoff) 1602 unsigned long pgoff) 1603 { 1603 { 1604 unsigned int size = vma->vm_end - vma 1604 unsigned int size = vma->vm_end - vma->vm_start; 1605 1605 1606 if (!(vma->vm_flags & VM_USERMAP)) 1606 if (!(vma->vm_flags & VM_USERMAP)) 1607 return -EINVAL; 1607 return -EINVAL; 1608 1608 1609 vma->vm_start = (unsigned long)(addr 1609 vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); 1610 vma->vm_end = vma->vm_start + size; 1610 vma->vm_end = vma->vm_start + size; 1611 1611 1612 return 0; 1612 return 0; 1613 } 1613 } 1614 EXPORT_SYMBOL(remap_vmalloc_range); 1614 EXPORT_SYMBOL(remap_vmalloc_range); 1615 1615 1616 vm_fault_t filemap_fault(struct vm_fault *vmf 1616 vm_fault_t filemap_fault(struct vm_fault *vmf) 1617 { 1617 { 1618 BUG(); 1618 BUG(); 1619 return 0; 1619 return 0; 1620 } 1620 } 1621 EXPORT_SYMBOL(filemap_fault); 1621 EXPORT_SYMBOL(filemap_fault); 1622 1622 1623 vm_fault_t filemap_map_pages(struct vm_fault 1623 vm_fault_t filemap_map_pages(struct vm_fault *vmf, 1624 pgoff_t start_pgoff, pgoff_t 1624 pgoff_t start_pgoff, pgoff_t end_pgoff) 1625 { 1625 { 1626 BUG(); 1626 BUG(); 1627 return 0; 1627 return 0; 1628 } 1628 } 1629 EXPORT_SYMBOL(filemap_map_pages); 1629 EXPORT_SYMBOL(filemap_map_pages); 1630 1630 1631 static int __access_remote_vm(struct mm_struc 1631 static int __access_remote_vm(struct mm_struct *mm, unsigned long addr, 1632 void *buf, int 1632 void *buf, int len, unsigned int gup_flags) 1633 { 1633 { 1634 struct vm_area_struct *vma; 1634 struct vm_area_struct *vma; 1635 int write = gup_flags & FOLL_WRITE; 1635 int write = gup_flags & FOLL_WRITE; 1636 1636 1637 if (mmap_read_lock_killable(mm)) 1637 if (mmap_read_lock_killable(mm)) 1638 return 0; 1638 return 0; 1639 1639 1640 /* the access must start within one o 1640 /* the access must start within one of the target process's mappings */ 1641 vma = find_vma(mm, addr); 1641 vma = find_vma(mm, addr); 1642 if (vma) { 1642 if (vma) { 1643 /* don't overrun this mapping 1643 /* don't overrun this mapping */ 1644 if (addr + len >= vma->vm_end 1644 if (addr + len >= vma->vm_end) 1645 len = vma->vm_end - a 1645 len = vma->vm_end - addr; 1646 1646 1647 /* only read or write mapping 1647 /* only read or write mappings where it is permitted */ 1648 if (write && vma->vm_flags & 1648 if (write && vma->vm_flags & VM_MAYWRITE) 1649 copy_to_user_page(vma 1649 copy_to_user_page(vma, NULL, addr, 1650 (voi 1650 (void *) addr, buf, len); 1651 else if (!write && vma->vm_fl 1651 else if (!write && vma->vm_flags & VM_MAYREAD) 1652 copy_from_user_page(v 1652 copy_from_user_page(vma, NULL, addr, 1653 b 1653 buf, (void *) addr, len); 1654 else 1654 else 1655 len = 0; 1655 len = 0; 1656 } else { 1656 } else { 1657 len = 0; 1657 len = 0; 1658 } 1658 } 1659 1659 1660 mmap_read_unlock(mm); 1660 mmap_read_unlock(mm); 1661 1661 1662 return len; 1662 return len; 1663 } 1663 } 1664 1664 1665 /** 1665 /** 1666 * access_remote_vm - access another process' 1666 * access_remote_vm - access another process' address space 1667 * @mm: the mm_struct of the target a 1667 * @mm: the mm_struct of the target address space 1668 * @addr: start address to access 1668 * @addr: start address to access 1669 * @buf: source or destination buffer 1669 * @buf: source or destination buffer 1670 * @len: number of bytes to transfer 1670 * @len: number of bytes to transfer 1671 * @gup_flags: flags modifying lookup behavi 1671 * @gup_flags: flags modifying lookup behaviour 1672 * 1672 * 1673 * The caller must hold a reference on @mm. 1673 * The caller must hold a reference on @mm. 1674 */ 1674 */ 1675 int access_remote_vm(struct mm_struct *mm, un 1675 int access_remote_vm(struct mm_struct *mm, unsigned long addr, 1676 void *buf, int len, unsigned 1676 void *buf, int len, unsigned int gup_flags) 1677 { 1677 { 1678 return __access_remote_vm(mm, addr, b 1678 return __access_remote_vm(mm, addr, buf, len, gup_flags); 1679 } 1679 } 1680 1680 1681 /* 1681 /* 1682 * Access another process' address space. 1682 * Access another process' address space. 1683 * - source/target buffer must be kernel spac 1683 * - source/target buffer must be kernel space 1684 */ 1684 */ 1685 int access_process_vm(struct task_struct *tsk 1685 int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, 1686 unsigned int gup_flags) 1686 unsigned int gup_flags) 1687 { 1687 { 1688 struct mm_struct *mm; 1688 struct mm_struct *mm; 1689 1689 1690 if (addr + len < addr) 1690 if (addr + len < addr) 1691 return 0; 1691 return 0; 1692 1692 1693 mm = get_task_mm(tsk); 1693 mm = get_task_mm(tsk); 1694 if (!mm) 1694 if (!mm) 1695 return 0; 1695 return 0; 1696 1696 1697 len = __access_remote_vm(mm, addr, bu 1697 len = __access_remote_vm(mm, addr, buf, len, gup_flags); 1698 1698 1699 mmput(mm); 1699 mmput(mm); 1700 return len; 1700 return len; 1701 } 1701 } 1702 EXPORT_SYMBOL_GPL(access_process_vm); 1702 EXPORT_SYMBOL_GPL(access_process_vm); 1703 1703 1704 /** 1704 /** 1705 * nommu_shrink_inode_mappings - Shrink the s 1705 * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode 1706 * @inode: The inode to check 1706 * @inode: The inode to check 1707 * @size: The current filesize of the inode 1707 * @size: The current filesize of the inode 1708 * @newsize: The proposed filesize of the ino 1708 * @newsize: The proposed filesize of the inode 1709 * 1709 * 1710 * Check the shared mappings on an inode on b 1710 * Check the shared mappings on an inode on behalf of a shrinking truncate to 1711 * make sure that any outstanding VMAs aren't 1711 * make sure that any outstanding VMAs aren't broken and then shrink the 1712 * vm_regions that extend beyond so that do_m 1712 * vm_regions that extend beyond so that do_mmap() doesn't 1713 * automatically grant mappings that are too 1713 * automatically grant mappings that are too large. 1714 */ 1714 */ 1715 int nommu_shrink_inode_mappings(struct inode 1715 int nommu_shrink_inode_mappings(struct inode *inode, size_t size, 1716 size_t newsiz 1716 size_t newsize) 1717 { 1717 { 1718 struct vm_area_struct *vma; 1718 struct vm_area_struct *vma; 1719 struct vm_region *region; 1719 struct vm_region *region; 1720 pgoff_t low, high; 1720 pgoff_t low, high; 1721 size_t r_size, r_top; 1721 size_t r_size, r_top; 1722 1722 1723 low = newsize >> PAGE_SHIFT; 1723 low = newsize >> PAGE_SHIFT; 1724 high = (size + PAGE_SIZE - 1) >> PAGE 1724 high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 1725 1725 1726 down_write(&nommu_region_sem); 1726 down_write(&nommu_region_sem); 1727 i_mmap_lock_read(inode->i_mapping); 1727 i_mmap_lock_read(inode->i_mapping); 1728 1728 1729 /* search for VMAs that fall within t 1729 /* search for VMAs that fall within the dead zone */ 1730 vma_interval_tree_foreach(vma, &inode 1730 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) { 1731 /* found one - only intereste 1731 /* found one - only interested if it's shared out of the page 1732 * cache */ 1732 * cache */ 1733 if (vma->vm_flags & VM_SHARED 1733 if (vma->vm_flags & VM_SHARED) { 1734 i_mmap_unlock_read(in 1734 i_mmap_unlock_read(inode->i_mapping); 1735 up_write(&nommu_regio 1735 up_write(&nommu_region_sem); 1736 return -ETXTBSY; /* n 1736 return -ETXTBSY; /* not quite true, but near enough */ 1737 } 1737 } 1738 } 1738 } 1739 1739 1740 /* reduce any regions that overlap th 1740 /* reduce any regions that overlap the dead zone - if in existence, 1741 * these will be pointed to by VMAs t 1741 * these will be pointed to by VMAs that don't overlap the dead zone 1742 * 1742 * 1743 * we don't check for any regions tha 1743 * we don't check for any regions that start beyond the EOF as there 1744 * shouldn't be any 1744 * shouldn't be any 1745 */ 1745 */ 1746 vma_interval_tree_foreach(vma, &inode 1746 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, 0, ULONG_MAX) { 1747 if (!(vma->vm_flags & VM_SHAR 1747 if (!(vma->vm_flags & VM_SHARED)) 1748 continue; 1748 continue; 1749 1749 1750 region = vma->vm_region; 1750 region = vma->vm_region; 1751 r_size = region->vm_top - reg 1751 r_size = region->vm_top - region->vm_start; 1752 r_top = (region->vm_pgoff << 1752 r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size; 1753 1753 1754 if (r_top > newsize) { 1754 if (r_top > newsize) { 1755 region->vm_top -= r_t 1755 region->vm_top -= r_top - newsize; 1756 if (region->vm_end > 1756 if (region->vm_end > region->vm_top) 1757 region->vm_en 1757 region->vm_end = region->vm_top; 1758 } 1758 } 1759 } 1759 } 1760 1760 1761 i_mmap_unlock_read(inode->i_mapping); 1761 i_mmap_unlock_read(inode->i_mapping); 1762 up_write(&nommu_region_sem); 1762 up_write(&nommu_region_sem); 1763 return 0; 1763 return 0; 1764 } 1764 } 1765 1765 1766 /* 1766 /* 1767 * Initialise sysctl_user_reserve_kbytes. 1767 * Initialise sysctl_user_reserve_kbytes. 1768 * 1768 * 1769 * This is intended to prevent a user from st 1769 * This is intended to prevent a user from starting a single memory hogging 1770 * process, such that they cannot recover (ki 1770 * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER 1771 * mode. 1771 * mode. 1772 * 1772 * 1773 * The default value is min(3% of free memory 1773 * The default value is min(3% of free memory, 128MB) 1774 * 128MB is enough to recover with sshd/login 1774 * 128MB is enough to recover with sshd/login, bash, and top/kill. 1775 */ 1775 */ 1776 static int __meminit init_user_reserve(void) 1776 static int __meminit init_user_reserve(void) 1777 { 1777 { 1778 unsigned long free_kbytes; 1778 unsigned long free_kbytes; 1779 1779 1780 free_kbytes = K(global_zone_page_stat 1780 free_kbytes = K(global_zone_page_state(NR_FREE_PAGES)); 1781 1781 1782 sysctl_user_reserve_kbytes = min(free 1782 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); 1783 return 0; 1783 return 0; 1784 } 1784 } 1785 subsys_initcall(init_user_reserve); 1785 subsys_initcall(init_user_reserve); 1786 1786 1787 /* 1787 /* 1788 * Initialise sysctl_admin_reserve_kbytes. 1788 * Initialise sysctl_admin_reserve_kbytes. 1789 * 1789 * 1790 * The purpose of sysctl_admin_reserve_kbytes 1790 * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin 1791 * to log in and kill a memory hogging proces 1791 * to log in and kill a memory hogging process. 1792 * 1792 * 1793 * Systems with more than 256MB will reserve 1793 * Systems with more than 256MB will reserve 8MB, enough to recover 1794 * with sshd, bash, and top in OVERCOMMIT_GUE 1794 * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will 1795 * only reserve 3% of free pages by default. 1795 * only reserve 3% of free pages by default. 1796 */ 1796 */ 1797 static int __meminit init_admin_reserve(void) 1797 static int __meminit init_admin_reserve(void) 1798 { 1798 { 1799 unsigned long free_kbytes; 1799 unsigned long free_kbytes; 1800 1800 1801 free_kbytes = K(global_zone_page_stat 1801 free_kbytes = K(global_zone_page_state(NR_FREE_PAGES)); 1802 1802 1803 sysctl_admin_reserve_kbytes = min(fre 1803 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); 1804 return 0; 1804 return 0; 1805 } 1805 } 1806 subsys_initcall(init_admin_reserve); 1806 subsys_initcall(init_admin_reserve); 1807 1807
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.