1 // SPDX-License-Identifier: GPL-2.0-only 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 2 /* 3 * linux/mm/nommu.c 3 * linux/mm/nommu.c 4 * 4 * 5 * Replacement code for mm functions to suppo 5 * Replacement code for mm functions to support CPU's that don't 6 * have any form of memory management unit (t 6 * have any form of memory management unit (thus no virtual memory). 7 * 7 * 8 * See Documentation/admin-guide/mm/nommu-mma 8 * See Documentation/admin-guide/mm/nommu-mmap.rst 9 * 9 * 10 * Copyright (c) 2004-2008 David Howells <dho 10 * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com> 11 * Copyright (c) 2000-2003 David McCullough < 11 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 12 * Copyright (c) 2000-2001 D Jeff Dionne <jef 12 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 13 * Copyright (c) 2002 Greg Ungerer <gerg 13 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 14 * Copyright (c) 2007-2010 Paul Mundt <lethal 14 * Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org> 15 */ 15 */ 16 16 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 18 19 #include <linux/export.h> 19 #include <linux/export.h> 20 #include <linux/mm.h> 20 #include <linux/mm.h> 21 #include <linux/sched/mm.h> 21 #include <linux/sched/mm.h> >> 22 #include <linux/vmacache.h> 22 #include <linux/mman.h> 23 #include <linux/mman.h> 23 #include <linux/swap.h> 24 #include <linux/swap.h> 24 #include <linux/file.h> 25 #include <linux/file.h> 25 #include <linux/highmem.h> 26 #include <linux/highmem.h> 26 #include <linux/pagemap.h> 27 #include <linux/pagemap.h> 27 #include <linux/slab.h> 28 #include <linux/slab.h> 28 #include <linux/vmalloc.h> 29 #include <linux/vmalloc.h> 29 #include <linux/backing-dev.h> 30 #include <linux/backing-dev.h> 30 #include <linux/compiler.h> 31 #include <linux/compiler.h> 31 #include <linux/mount.h> 32 #include <linux/mount.h> 32 #include <linux/personality.h> 33 #include <linux/personality.h> 33 #include <linux/security.h> 34 #include <linux/security.h> 34 #include <linux/syscalls.h> 35 #include <linux/syscalls.h> 35 #include <linux/audit.h> 36 #include <linux/audit.h> 36 #include <linux/printk.h> 37 #include <linux/printk.h> 37 38 38 #include <linux/uaccess.h> 39 #include <linux/uaccess.h> 39 #include <linux/uio.h> << 40 #include <asm/tlb.h> 40 #include <asm/tlb.h> 41 #include <asm/tlbflush.h> 41 #include <asm/tlbflush.h> 42 #include <asm/mmu_context.h> 42 #include <asm/mmu_context.h> 43 #include "internal.h" 43 #include "internal.h" 44 44 45 void *high_memory; 45 void *high_memory; 46 EXPORT_SYMBOL(high_memory); 46 EXPORT_SYMBOL(high_memory); 47 struct page *mem_map; 47 struct page *mem_map; 48 unsigned long max_mapnr; 48 unsigned long max_mapnr; 49 EXPORT_SYMBOL(max_mapnr); 49 EXPORT_SYMBOL(max_mapnr); 50 unsigned long highest_memmap_pfn; 50 unsigned long highest_memmap_pfn; 51 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIA 51 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; 52 int heap_stack_gap = 0; 52 int heap_stack_gap = 0; 53 53 54 atomic_long_t mmap_pages_allocated; 54 atomic_long_t mmap_pages_allocated; 55 55 56 EXPORT_SYMBOL(mem_map); 56 EXPORT_SYMBOL(mem_map); 57 57 58 /* list of mapped, potentially shareable regio 58 /* list of mapped, potentially shareable regions */ 59 static struct kmem_cache *vm_region_jar; 59 static struct kmem_cache *vm_region_jar; 60 struct rb_root nommu_region_tree = RB_ROOT; 60 struct rb_root nommu_region_tree = RB_ROOT; 61 DECLARE_RWSEM(nommu_region_sem); 61 DECLARE_RWSEM(nommu_region_sem); 62 62 63 const struct vm_operations_struct generic_file 63 const struct vm_operations_struct generic_file_vm_ops = { 64 }; 64 }; 65 65 66 /* 66 /* 67 * Return the total memory allocated for this 67 * Return the total memory allocated for this pointer, not 68 * just what the caller asked for. 68 * just what the caller asked for. 69 * 69 * 70 * Doesn't have to be accurate, i.e. may have 70 * Doesn't have to be accurate, i.e. may have races. 71 */ 71 */ 72 unsigned int kobjsize(const void *objp) 72 unsigned int kobjsize(const void *objp) 73 { 73 { 74 struct page *page; 74 struct page *page; 75 75 76 /* 76 /* 77 * If the object we have should not ha 77 * If the object we have should not have ksize performed on it, 78 * return size of 0 78 * return size of 0 79 */ 79 */ 80 if (!objp || !virt_addr_valid(objp)) 80 if (!objp || !virt_addr_valid(objp)) 81 return 0; 81 return 0; 82 82 83 page = virt_to_head_page(objp); 83 page = virt_to_head_page(objp); 84 84 85 /* 85 /* 86 * If the allocator sets PageSlab, we 86 * If the allocator sets PageSlab, we know the pointer came from 87 * kmalloc(). 87 * kmalloc(). 88 */ 88 */ 89 if (PageSlab(page)) 89 if (PageSlab(page)) 90 return ksize(objp); 90 return ksize(objp); 91 91 92 /* 92 /* 93 * If it's not a compound page, see if 93 * If it's not a compound page, see if we have a matching VMA 94 * region. This test is intentionally 94 * region. This test is intentionally done in reverse order, 95 * so if there's no VMA, we still fall 95 * so if there's no VMA, we still fall through and hand back 96 * PAGE_SIZE for 0-order pages. 96 * PAGE_SIZE for 0-order pages. 97 */ 97 */ 98 if (!PageCompound(page)) { 98 if (!PageCompound(page)) { 99 struct vm_area_struct *vma; 99 struct vm_area_struct *vma; 100 100 101 vma = find_vma(current->mm, (u 101 vma = find_vma(current->mm, (unsigned long)objp); 102 if (vma) 102 if (vma) 103 return vma->vm_end - v 103 return vma->vm_end - vma->vm_start; 104 } 104 } 105 105 106 /* 106 /* 107 * The ksize() function is only guaran 107 * The ksize() function is only guaranteed to work for pointers 108 * returned by kmalloc(). So handle ar 108 * returned by kmalloc(). So handle arbitrary pointers here. 109 */ 109 */ 110 return page_size(page); 110 return page_size(page); 111 } 111 } 112 112 >> 113 /** >> 114 * follow_pfn - look up PFN at a user virtual address >> 115 * @vma: memory mapping >> 116 * @address: user virtual address >> 117 * @pfn: location to store found PFN >> 118 * >> 119 * Only IO mappings and raw PFN mappings are allowed. >> 120 * >> 121 * Returns zero and the pfn at @pfn on success, -ve otherwise. >> 122 */ >> 123 int follow_pfn(struct vm_area_struct *vma, unsigned long address, >> 124 unsigned long *pfn) >> 125 { >> 126 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) >> 127 return -EINVAL; >> 128 >> 129 *pfn = address >> PAGE_SHIFT; >> 130 return 0; >> 131 } >> 132 EXPORT_SYMBOL(follow_pfn); >> 133 >> 134 LIST_HEAD(vmap_area_list); >> 135 113 void vfree(const void *addr) 136 void vfree(const void *addr) 114 { 137 { 115 kfree(addr); 138 kfree(addr); 116 } 139 } 117 EXPORT_SYMBOL(vfree); 140 EXPORT_SYMBOL(vfree); 118 141 119 void *__vmalloc_noprof(unsigned long size, gfp !! 142 void *__vmalloc(unsigned long size, gfp_t gfp_mask) 120 { 143 { 121 /* 144 /* 122 * You can't specify __GFP_HIGHMEM wi 145 * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() 123 * returns only a logical address. 146 * returns only a logical address. 124 */ 147 */ 125 return kmalloc_noprof(size, (gfp_mask !! 148 return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); 126 } << 127 EXPORT_SYMBOL(__vmalloc_noprof); << 128 << 129 void *vrealloc_noprof(const void *p, size_t si << 130 { << 131 return krealloc_noprof(p, size, (flags << 132 } 149 } >> 150 EXPORT_SYMBOL(__vmalloc); 133 151 134 void *__vmalloc_node_range_noprof(unsigned lon !! 152 void *__vmalloc_node_range(unsigned long size, unsigned long align, 135 unsigned long start, unsigned 153 unsigned long start, unsigned long end, gfp_t gfp_mask, 136 pgprot_t prot, unsigned long v 154 pgprot_t prot, unsigned long vm_flags, int node, 137 const void *caller) 155 const void *caller) 138 { 156 { 139 return __vmalloc_noprof(size, gfp_mask !! 157 return __vmalloc(size, gfp_mask); 140 } 158 } 141 159 142 void *__vmalloc_node_noprof(unsigned long size !! 160 void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, 143 int node, const void *caller) 161 int node, const void *caller) 144 { 162 { 145 return __vmalloc_noprof(size, gfp_mask !! 163 return __vmalloc(size, gfp_mask); 146 } 164 } 147 165 148 static void *__vmalloc_user_flags(unsigned lon 166 static void *__vmalloc_user_flags(unsigned long size, gfp_t flags) 149 { 167 { 150 void *ret; 168 void *ret; 151 169 152 ret = __vmalloc(size, flags); 170 ret = __vmalloc(size, flags); 153 if (ret) { 171 if (ret) { 154 struct vm_area_struct *vma; 172 struct vm_area_struct *vma; 155 173 156 mmap_write_lock(current->mm); 174 mmap_write_lock(current->mm); 157 vma = find_vma(current->mm, (u 175 vma = find_vma(current->mm, (unsigned long)ret); 158 if (vma) 176 if (vma) 159 vm_flags_set(vma, VM_U !! 177 vma->vm_flags |= VM_USERMAP; 160 mmap_write_unlock(current->mm) 178 mmap_write_unlock(current->mm); 161 } 179 } 162 180 163 return ret; 181 return ret; 164 } 182 } 165 183 166 void *vmalloc_user_noprof(unsigned long size) !! 184 void *vmalloc_user(unsigned long size) 167 { 185 { 168 return __vmalloc_user_flags(size, GFP_ 186 return __vmalloc_user_flags(size, GFP_KERNEL | __GFP_ZERO); 169 } 187 } 170 EXPORT_SYMBOL(vmalloc_user_noprof); !! 188 EXPORT_SYMBOL(vmalloc_user); 171 189 172 struct page *vmalloc_to_page(const void *addr) 190 struct page *vmalloc_to_page(const void *addr) 173 { 191 { 174 return virt_to_page(addr); 192 return virt_to_page(addr); 175 } 193 } 176 EXPORT_SYMBOL(vmalloc_to_page); 194 EXPORT_SYMBOL(vmalloc_to_page); 177 195 178 unsigned long vmalloc_to_pfn(const void *addr) 196 unsigned long vmalloc_to_pfn(const void *addr) 179 { 197 { 180 return page_to_pfn(virt_to_page(addr)) 198 return page_to_pfn(virt_to_page(addr)); 181 } 199 } 182 EXPORT_SYMBOL(vmalloc_to_pfn); 200 EXPORT_SYMBOL(vmalloc_to_pfn); 183 201 184 long vread_iter(struct iov_iter *iter, const c !! 202 long vread(char *buf, char *addr, unsigned long count) 185 { 203 { 186 /* Don't allow overflow */ 204 /* Don't allow overflow */ 187 if ((unsigned long) addr + count < cou !! 205 if ((unsigned long) buf + count < count) 188 count = -(unsigned long) addr; !! 206 count = -(unsigned long) buf; 189 207 190 return copy_to_iter(addr, count, iter) !! 208 memcpy(buf, addr, count); >> 209 return count; 191 } 210 } 192 211 193 /* 212 /* 194 * vmalloc - allocate virtually contigu 213 * vmalloc - allocate virtually contiguous memory 195 * 214 * 196 * @size: allocation size 215 * @size: allocation size 197 * 216 * 198 * Allocate enough pages to cover @size f 217 * Allocate enough pages to cover @size from the page level 199 * allocator and map them into contiguous 218 * allocator and map them into contiguous kernel virtual space. 200 * 219 * 201 * For tight control over page level allo 220 * For tight control over page level allocator and protection flags 202 * use __vmalloc() instead. 221 * use __vmalloc() instead. 203 */ 222 */ 204 void *vmalloc_noprof(unsigned long size) !! 223 void *vmalloc(unsigned long size) 205 { 224 { 206 return __vmalloc_noprof(size, GFP_KERN !! 225 return __vmalloc(size, GFP_KERNEL); 207 } 226 } 208 EXPORT_SYMBOL(vmalloc_noprof); !! 227 EXPORT_SYMBOL(vmalloc); 209 << 210 void *vmalloc_huge_noprof(unsigned long size, << 211 228 212 /* 229 /* 213 * vzalloc - allocate virtually contiguou 230 * vzalloc - allocate virtually contiguous memory with zero fill 214 * 231 * 215 * @size: allocation size 232 * @size: allocation size 216 * 233 * 217 * Allocate enough pages to cover @size f 234 * Allocate enough pages to cover @size from the page level 218 * allocator and map them into contiguous 235 * allocator and map them into contiguous kernel virtual space. 219 * The memory allocated is set to zero. 236 * The memory allocated is set to zero. 220 * 237 * 221 * For tight control over page level allo 238 * For tight control over page level allocator and protection flags 222 * use __vmalloc() instead. 239 * use __vmalloc() instead. 223 */ 240 */ 224 void *vzalloc_noprof(unsigned long size) !! 241 void *vzalloc(unsigned long size) 225 { 242 { 226 return __vmalloc_noprof(size, GFP_KERN !! 243 return __vmalloc(size, GFP_KERNEL | __GFP_ZERO); 227 } 244 } 228 EXPORT_SYMBOL(vzalloc_noprof); !! 245 EXPORT_SYMBOL(vzalloc); 229 246 230 /** 247 /** 231 * vmalloc_node - allocate memory on a specifi 248 * vmalloc_node - allocate memory on a specific node 232 * @size: allocation size 249 * @size: allocation size 233 * @node: numa node 250 * @node: numa node 234 * 251 * 235 * Allocate enough pages to cover @size from t 252 * Allocate enough pages to cover @size from the page level 236 * allocator and map them into contiguous kern 253 * allocator and map them into contiguous kernel virtual space. 237 * 254 * 238 * For tight control over page level allocator 255 * For tight control over page level allocator and protection flags 239 * use __vmalloc() instead. 256 * use __vmalloc() instead. 240 */ 257 */ 241 void *vmalloc_node_noprof(unsigned long size, !! 258 void *vmalloc_node(unsigned long size, int node) 242 { 259 { 243 return vmalloc_noprof(size); !! 260 return vmalloc(size); 244 } 261 } 245 EXPORT_SYMBOL(vmalloc_node_noprof); !! 262 EXPORT_SYMBOL(vmalloc_node); 246 263 247 /** 264 /** 248 * vzalloc_node - allocate memory on a specifi 265 * vzalloc_node - allocate memory on a specific node with zero fill 249 * @size: allocation size 266 * @size: allocation size 250 * @node: numa node 267 * @node: numa node 251 * 268 * 252 * Allocate enough pages to cover @size from t 269 * Allocate enough pages to cover @size from the page level 253 * allocator and map them into contiguous kern 270 * allocator and map them into contiguous kernel virtual space. 254 * The memory allocated is set to zero. 271 * The memory allocated is set to zero. 255 * 272 * 256 * For tight control over page level allocator 273 * For tight control over page level allocator and protection flags 257 * use __vmalloc() instead. 274 * use __vmalloc() instead. 258 */ 275 */ 259 void *vzalloc_node_noprof(unsigned long size, !! 276 void *vzalloc_node(unsigned long size, int node) 260 { 277 { 261 return vzalloc_noprof(size); !! 278 return vzalloc(size); 262 } 279 } 263 EXPORT_SYMBOL(vzalloc_node_noprof); !! 280 EXPORT_SYMBOL(vzalloc_node); 264 281 265 /** 282 /** 266 * vmalloc_32 - allocate virtually contiguou 283 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 267 * @size: allocation size 284 * @size: allocation size 268 * 285 * 269 * Allocate enough 32bit PA addressable p 286 * Allocate enough 32bit PA addressable pages to cover @size from the 270 * page level allocator and map them into 287 * page level allocator and map them into contiguous kernel virtual space. 271 */ 288 */ 272 void *vmalloc_32_noprof(unsigned long size) !! 289 void *vmalloc_32(unsigned long size) 273 { 290 { 274 return __vmalloc_noprof(size, GFP_KERN !! 291 return __vmalloc(size, GFP_KERNEL); 275 } 292 } 276 EXPORT_SYMBOL(vmalloc_32_noprof); !! 293 EXPORT_SYMBOL(vmalloc_32); 277 294 278 /** 295 /** 279 * vmalloc_32_user - allocate zeroed virtually 296 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory 280 * @size: allocation size 297 * @size: allocation size 281 * 298 * 282 * The resulting memory area is 32bit addressa 299 * The resulting memory area is 32bit addressable and zeroed so it can be 283 * mapped to userspace without leaking data. 300 * mapped to userspace without leaking data. 284 * 301 * 285 * VM_USERMAP is set on the corresponding VMA 302 * VM_USERMAP is set on the corresponding VMA so that subsequent calls to 286 * remap_vmalloc_range() are permissible. 303 * remap_vmalloc_range() are permissible. 287 */ 304 */ 288 void *vmalloc_32_user_noprof(unsigned long siz !! 305 void *vmalloc_32_user(unsigned long size) 289 { 306 { 290 /* 307 /* 291 * We'll have to sort out the ZONE_DMA 308 * We'll have to sort out the ZONE_DMA bits for 64-bit, 292 * but for now this can simply use vma 309 * but for now this can simply use vmalloc_user() directly. 293 */ 310 */ 294 return vmalloc_user_noprof(size); !! 311 return vmalloc_user(size); 295 } 312 } 296 EXPORT_SYMBOL(vmalloc_32_user_noprof); !! 313 EXPORT_SYMBOL(vmalloc_32_user); 297 314 298 void *vmap(struct page **pages, unsigned int c 315 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) 299 { 316 { 300 BUG(); 317 BUG(); 301 return NULL; 318 return NULL; 302 } 319 } 303 EXPORT_SYMBOL(vmap); 320 EXPORT_SYMBOL(vmap); 304 321 305 void vunmap(const void *addr) 322 void vunmap(const void *addr) 306 { 323 { 307 BUG(); 324 BUG(); 308 } 325 } 309 EXPORT_SYMBOL(vunmap); 326 EXPORT_SYMBOL(vunmap); 310 327 311 void *vm_map_ram(struct page **pages, unsigned 328 void *vm_map_ram(struct page **pages, unsigned int count, int node) 312 { 329 { 313 BUG(); 330 BUG(); 314 return NULL; 331 return NULL; 315 } 332 } 316 EXPORT_SYMBOL(vm_map_ram); 333 EXPORT_SYMBOL(vm_map_ram); 317 334 318 void vm_unmap_ram(const void *mem, unsigned in 335 void vm_unmap_ram(const void *mem, unsigned int count) 319 { 336 { 320 BUG(); 337 BUG(); 321 } 338 } 322 EXPORT_SYMBOL(vm_unmap_ram); 339 EXPORT_SYMBOL(vm_unmap_ram); 323 340 324 void vm_unmap_aliases(void) 341 void vm_unmap_aliases(void) 325 { 342 { 326 } 343 } 327 EXPORT_SYMBOL_GPL(vm_unmap_aliases); 344 EXPORT_SYMBOL_GPL(vm_unmap_aliases); 328 345 329 void free_vm_area(struct vm_struct *area) 346 void free_vm_area(struct vm_struct *area) 330 { 347 { 331 BUG(); 348 BUG(); 332 } 349 } 333 EXPORT_SYMBOL_GPL(free_vm_area); 350 EXPORT_SYMBOL_GPL(free_vm_area); 334 351 335 int vm_insert_page(struct vm_area_struct *vma, 352 int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 336 struct page *page) 353 struct page *page) 337 { 354 { 338 return -EINVAL; 355 return -EINVAL; 339 } 356 } 340 EXPORT_SYMBOL(vm_insert_page); 357 EXPORT_SYMBOL(vm_insert_page); 341 358 342 int vm_insert_pages(struct vm_area_struct *vma << 343 struct page **pages, u << 344 { << 345 return -EINVAL; << 346 } << 347 EXPORT_SYMBOL(vm_insert_pages); << 348 << 349 int vm_map_pages(struct vm_area_struct *vma, s 359 int vm_map_pages(struct vm_area_struct *vma, struct page **pages, 350 unsigned long num) 360 unsigned long num) 351 { 361 { 352 return -EINVAL; 362 return -EINVAL; 353 } 363 } 354 EXPORT_SYMBOL(vm_map_pages); 364 EXPORT_SYMBOL(vm_map_pages); 355 365 356 int vm_map_pages_zero(struct vm_area_struct *v 366 int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, 357 unsigned long 367 unsigned long num) 358 { 368 { 359 return -EINVAL; 369 return -EINVAL; 360 } 370 } 361 EXPORT_SYMBOL(vm_map_pages_zero); 371 EXPORT_SYMBOL(vm_map_pages_zero); 362 372 363 /* 373 /* 364 * sys_brk() for the most part doesn't need t 374 * sys_brk() for the most part doesn't need the global kernel 365 * lock, except when an application is doing 375 * lock, except when an application is doing something nasty 366 * like trying to un-brk an area that has alr 376 * like trying to un-brk an area that has already been mapped 367 * to a regular file. in this case, the unma 377 * to a regular file. in this case, the unmapping will need 368 * to invoke file system routines that need t 378 * to invoke file system routines that need the global lock. 369 */ 379 */ 370 SYSCALL_DEFINE1(brk, unsigned long, brk) 380 SYSCALL_DEFINE1(brk, unsigned long, brk) 371 { 381 { 372 struct mm_struct *mm = current->mm; 382 struct mm_struct *mm = current->mm; 373 383 374 if (brk < mm->start_brk || brk > mm->c 384 if (brk < mm->start_brk || brk > mm->context.end_brk) 375 return mm->brk; 385 return mm->brk; 376 386 377 if (mm->brk == brk) 387 if (mm->brk == brk) 378 return mm->brk; 388 return mm->brk; 379 389 380 /* 390 /* 381 * Always allow shrinking brk 391 * Always allow shrinking brk 382 */ 392 */ 383 if (brk <= mm->brk) { 393 if (brk <= mm->brk) { 384 mm->brk = brk; 394 mm->brk = brk; 385 return brk; 395 return brk; 386 } 396 } 387 397 388 /* 398 /* 389 * Ok, looks good - let it rip. 399 * Ok, looks good - let it rip. 390 */ 400 */ 391 flush_icache_user_range(mm->brk, brk); 401 flush_icache_user_range(mm->brk, brk); 392 return mm->brk = brk; 402 return mm->brk = brk; 393 } 403 } 394 404 395 /* 405 /* 396 * initialise the percpu counter for VM and re 406 * initialise the percpu counter for VM and region record slabs 397 */ 407 */ 398 void __init mmap_init(void) 408 void __init mmap_init(void) 399 { 409 { 400 int ret; 410 int ret; 401 411 402 ret = percpu_counter_init(&vm_committe 412 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); 403 VM_BUG_ON(ret); 413 VM_BUG_ON(ret); 404 vm_region_jar = KMEM_CACHE(vm_region, 414 vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT); 405 } 415 } 406 416 407 /* 417 /* 408 * validate the region tree 418 * validate the region tree 409 * - the caller must hold the region lock 419 * - the caller must hold the region lock 410 */ 420 */ 411 #ifdef CONFIG_DEBUG_NOMMU_REGIONS 421 #ifdef CONFIG_DEBUG_NOMMU_REGIONS 412 static noinline void validate_nommu_regions(vo 422 static noinline void validate_nommu_regions(void) 413 { 423 { 414 struct vm_region *region, *last; 424 struct vm_region *region, *last; 415 struct rb_node *p, *lastp; 425 struct rb_node *p, *lastp; 416 426 417 lastp = rb_first(&nommu_region_tree); 427 lastp = rb_first(&nommu_region_tree); 418 if (!lastp) 428 if (!lastp) 419 return; 429 return; 420 430 421 last = rb_entry(lastp, struct vm_regio 431 last = rb_entry(lastp, struct vm_region, vm_rb); 422 BUG_ON(last->vm_end <= last->vm_start) 432 BUG_ON(last->vm_end <= last->vm_start); 423 BUG_ON(last->vm_top < last->vm_end); 433 BUG_ON(last->vm_top < last->vm_end); 424 434 425 while ((p = rb_next(lastp))) { 435 while ((p = rb_next(lastp))) { 426 region = rb_entry(p, struct vm 436 region = rb_entry(p, struct vm_region, vm_rb); 427 last = rb_entry(lastp, struct 437 last = rb_entry(lastp, struct vm_region, vm_rb); 428 438 429 BUG_ON(region->vm_end <= regio 439 BUG_ON(region->vm_end <= region->vm_start); 430 BUG_ON(region->vm_top < region 440 BUG_ON(region->vm_top < region->vm_end); 431 BUG_ON(region->vm_start < last 441 BUG_ON(region->vm_start < last->vm_top); 432 442 433 lastp = p; 443 lastp = p; 434 } 444 } 435 } 445 } 436 #else 446 #else 437 static void validate_nommu_regions(void) 447 static void validate_nommu_regions(void) 438 { 448 { 439 } 449 } 440 #endif 450 #endif 441 451 442 /* 452 /* 443 * add a region into the global tree 453 * add a region into the global tree 444 */ 454 */ 445 static void add_nommu_region(struct vm_region 455 static void add_nommu_region(struct vm_region *region) 446 { 456 { 447 struct vm_region *pregion; 457 struct vm_region *pregion; 448 struct rb_node **p, *parent; 458 struct rb_node **p, *parent; 449 459 450 validate_nommu_regions(); 460 validate_nommu_regions(); 451 461 452 parent = NULL; 462 parent = NULL; 453 p = &nommu_region_tree.rb_node; 463 p = &nommu_region_tree.rb_node; 454 while (*p) { 464 while (*p) { 455 parent = *p; 465 parent = *p; 456 pregion = rb_entry(parent, str 466 pregion = rb_entry(parent, struct vm_region, vm_rb); 457 if (region->vm_start < pregion 467 if (region->vm_start < pregion->vm_start) 458 p = &(*p)->rb_left; 468 p = &(*p)->rb_left; 459 else if (region->vm_start > pr 469 else if (region->vm_start > pregion->vm_start) 460 p = &(*p)->rb_right; 470 p = &(*p)->rb_right; 461 else if (pregion == region) 471 else if (pregion == region) 462 return; 472 return; 463 else 473 else 464 BUG(); 474 BUG(); 465 } 475 } 466 476 467 rb_link_node(®ion->vm_rb, parent, p 477 rb_link_node(®ion->vm_rb, parent, p); 468 rb_insert_color(®ion->vm_rb, &nommu 478 rb_insert_color(®ion->vm_rb, &nommu_region_tree); 469 479 470 validate_nommu_regions(); 480 validate_nommu_regions(); 471 } 481 } 472 482 473 /* 483 /* 474 * delete a region from the global tree 484 * delete a region from the global tree 475 */ 485 */ 476 static void delete_nommu_region(struct vm_regi 486 static void delete_nommu_region(struct vm_region *region) 477 { 487 { 478 BUG_ON(!nommu_region_tree.rb_node); 488 BUG_ON(!nommu_region_tree.rb_node); 479 489 480 validate_nommu_regions(); 490 validate_nommu_regions(); 481 rb_erase(®ion->vm_rb, &nommu_region 491 rb_erase(®ion->vm_rb, &nommu_region_tree); 482 validate_nommu_regions(); 492 validate_nommu_regions(); 483 } 493 } 484 494 485 /* 495 /* 486 * free a contiguous series of pages 496 * free a contiguous series of pages 487 */ 497 */ 488 static void free_page_series(unsigned long fro 498 static void free_page_series(unsigned long from, unsigned long to) 489 { 499 { 490 for (; from < to; from += PAGE_SIZE) { 500 for (; from < to; from += PAGE_SIZE) { 491 struct page *page = virt_to_pa !! 501 struct page *page = virt_to_page(from); 492 502 493 atomic_long_dec(&mmap_pages_al 503 atomic_long_dec(&mmap_pages_allocated); 494 put_page(page); 504 put_page(page); 495 } 505 } 496 } 506 } 497 507 498 /* 508 /* 499 * release a reference to a region 509 * release a reference to a region 500 * - the caller must hold the region semaphore 510 * - the caller must hold the region semaphore for writing, which this releases 501 * - the region may not have been added to the 511 * - the region may not have been added to the tree yet, in which case vm_top 502 * will equal vm_start 512 * will equal vm_start 503 */ 513 */ 504 static void __put_nommu_region(struct vm_regio 514 static void __put_nommu_region(struct vm_region *region) 505 __releases(nommu_region_sem) 515 __releases(nommu_region_sem) 506 { 516 { 507 BUG_ON(!nommu_region_tree.rb_node); 517 BUG_ON(!nommu_region_tree.rb_node); 508 518 509 if (--region->vm_usage == 0) { 519 if (--region->vm_usage == 0) { 510 if (region->vm_top > region->v 520 if (region->vm_top > region->vm_start) 511 delete_nommu_region(re 521 delete_nommu_region(region); 512 up_write(&nommu_region_sem); 522 up_write(&nommu_region_sem); 513 523 514 if (region->vm_file) 524 if (region->vm_file) 515 fput(region->vm_file); 525 fput(region->vm_file); 516 526 517 /* IO memory and memory shared 527 /* IO memory and memory shared directly out of the pagecache 518 * from ramfs/tmpfs mustn't be 528 * from ramfs/tmpfs mustn't be released here */ 519 if (region->vm_flags & VM_MAPP 529 if (region->vm_flags & VM_MAPPED_COPY) 520 free_page_series(regio 530 free_page_series(region->vm_start, region->vm_top); 521 kmem_cache_free(vm_region_jar, 531 kmem_cache_free(vm_region_jar, region); 522 } else { 532 } else { 523 up_write(&nommu_region_sem); 533 up_write(&nommu_region_sem); 524 } 534 } 525 } 535 } 526 536 527 /* 537 /* 528 * release a reference to a region 538 * release a reference to a region 529 */ 539 */ 530 static void put_nommu_region(struct vm_region 540 static void put_nommu_region(struct vm_region *region) 531 { 541 { 532 down_write(&nommu_region_sem); 542 down_write(&nommu_region_sem); 533 __put_nommu_region(region); 543 __put_nommu_region(region); 534 } 544 } 535 545 536 static void setup_vma_to_mm(struct vm_area_str !! 546 /* 537 { !! 547 * add a VMA into a process's mm_struct in the appropriate place in the list >> 548 * and tree and add to the address space's page tree also if not an anonymous >> 549 * page >> 550 * - should be called with mm->mmap_lock held writelocked >> 551 */ >> 552 static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) >> 553 { >> 554 struct vm_area_struct *pvma, *prev; >> 555 struct address_space *mapping; >> 556 struct rb_node **p, *parent, *rb_prev; >> 557 >> 558 BUG_ON(!vma->vm_region); >> 559 >> 560 mm->map_count++; 538 vma->vm_mm = mm; 561 vma->vm_mm = mm; 539 562 540 /* add the VMA to the mapping */ 563 /* add the VMA to the mapping */ 541 if (vma->vm_file) { 564 if (vma->vm_file) { 542 struct address_space *mapping !! 565 mapping = vma->vm_file->f_mapping; 543 566 544 i_mmap_lock_write(mapping); 567 i_mmap_lock_write(mapping); 545 flush_dcache_mmap_lock(mapping 568 flush_dcache_mmap_lock(mapping); 546 vma_interval_tree_insert(vma, 569 vma_interval_tree_insert(vma, &mapping->i_mmap); 547 flush_dcache_mmap_unlock(mappi 570 flush_dcache_mmap_unlock(mapping); 548 i_mmap_unlock_write(mapping); 571 i_mmap_unlock_write(mapping); 549 } 572 } >> 573 >> 574 /* add the VMA to the tree */ >> 575 parent = rb_prev = NULL; >> 576 p = &mm->mm_rb.rb_node; >> 577 while (*p) { >> 578 parent = *p; >> 579 pvma = rb_entry(parent, struct vm_area_struct, vm_rb); >> 580 >> 581 /* sort by: start addr, end addr, VMA struct addr in that order >> 582 * (the latter is necessary as we may get identical VMAs) */ >> 583 if (vma->vm_start < pvma->vm_start) >> 584 p = &(*p)->rb_left; >> 585 else if (vma->vm_start > pvma->vm_start) { >> 586 rb_prev = parent; >> 587 p = &(*p)->rb_right; >> 588 } else if (vma->vm_end < pvma->vm_end) >> 589 p = &(*p)->rb_left; >> 590 else if (vma->vm_end > pvma->vm_end) { >> 591 rb_prev = parent; >> 592 p = &(*p)->rb_right; >> 593 } else if (vma < pvma) >> 594 p = &(*p)->rb_left; >> 595 else if (vma > pvma) { >> 596 rb_prev = parent; >> 597 p = &(*p)->rb_right; >> 598 } else >> 599 BUG(); >> 600 } >> 601 >> 602 rb_link_node(&vma->vm_rb, parent, p); >> 603 rb_insert_color(&vma->vm_rb, &mm->mm_rb); >> 604 >> 605 /* add VMA to the VMA list also */ >> 606 prev = NULL; >> 607 if (rb_prev) >> 608 prev = rb_entry(rb_prev, struct vm_area_struct, vm_rb); >> 609 >> 610 __vma_link_list(mm, vma, prev); 550 } 611 } 551 612 552 static void cleanup_vma_from_mm(struct vm_area !! 613 /* >> 614 * delete a VMA from its owning mm_struct and address space >> 615 */ >> 616 static void delete_vma_from_mm(struct vm_area_struct *vma) 553 { 617 { 554 vma->vm_mm->map_count--; !! 618 int i; >> 619 struct address_space *mapping; >> 620 struct mm_struct *mm = vma->vm_mm; >> 621 struct task_struct *curr = current; >> 622 >> 623 mm->map_count--; >> 624 for (i = 0; i < VMACACHE_SIZE; i++) { >> 625 /* if the vma is cached, invalidate the entire cache */ >> 626 if (curr->vmacache.vmas[i] == vma) { >> 627 vmacache_invalidate(mm); >> 628 break; >> 629 } >> 630 } >> 631 555 /* remove the VMA from the mapping */ 632 /* remove the VMA from the mapping */ 556 if (vma->vm_file) { 633 if (vma->vm_file) { 557 struct address_space *mapping; << 558 mapping = vma->vm_file->f_mapp 634 mapping = vma->vm_file->f_mapping; 559 635 560 i_mmap_lock_write(mapping); 636 i_mmap_lock_write(mapping); 561 flush_dcache_mmap_lock(mapping 637 flush_dcache_mmap_lock(mapping); 562 vma_interval_tree_remove(vma, 638 vma_interval_tree_remove(vma, &mapping->i_mmap); 563 flush_dcache_mmap_unlock(mappi 639 flush_dcache_mmap_unlock(mapping); 564 i_mmap_unlock_write(mapping); 640 i_mmap_unlock_write(mapping); 565 } 641 } 566 } << 567 << 568 /* << 569 * delete a VMA from its owning mm_struct and << 570 */ << 571 static int delete_vma_from_mm(struct vm_area_s << 572 { << 573 VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_ << 574 << 575 vma_iter_config(&vmi, vma->vm_start, v << 576 if (vma_iter_prealloc(&vmi, vma)) { << 577 pr_warn("Allocation of vma tre << 578 current->pid); << 579 return -ENOMEM; << 580 } << 581 cleanup_vma_from_mm(vma); << 582 642 583 /* remove from the MM's tree and list 643 /* remove from the MM's tree and list */ 584 vma_iter_clear(&vmi); !! 644 rb_erase(&vma->vm_rb, &mm->mm_rb); 585 return 0; !! 645 >> 646 __vma_unlink_list(mm, vma); 586 } 647 } >> 648 587 /* 649 /* 588 * destroy a VMA record 650 * destroy a VMA record 589 */ 651 */ 590 static void delete_vma(struct mm_struct *mm, s 652 static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) 591 { 653 { 592 vma_close(vma); !! 654 if (vma->vm_ops && vma->vm_ops->close) >> 655 vma->vm_ops->close(vma); 593 if (vma->vm_file) 656 if (vma->vm_file) 594 fput(vma->vm_file); 657 fput(vma->vm_file); 595 put_nommu_region(vma->vm_region); 658 put_nommu_region(vma->vm_region); 596 vm_area_free(vma); 659 vm_area_free(vma); 597 } 660 } 598 661 599 struct vm_area_struct *find_vma_intersection(s << 600 u << 601 u << 602 { << 603 unsigned long index = start_addr; << 604 << 605 mmap_assert_locked(mm); << 606 return mt_find(&mm->mm_mt, &index, end << 607 } << 608 EXPORT_SYMBOL(find_vma_intersection); << 609 << 610 /* 662 /* 611 * look up the first VMA in which addr resides 663 * look up the first VMA in which addr resides, NULL if none 612 * - should be called with mm->mmap_lock at le 664 * - should be called with mm->mmap_lock at least held readlocked 613 */ 665 */ 614 struct vm_area_struct *find_vma(struct mm_stru 666 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 615 { 667 { 616 VMA_ITERATOR(vmi, mm, addr); !! 668 struct vm_area_struct *vma; >> 669 >> 670 /* check the cache first */ >> 671 vma = vmacache_find(mm, addr); >> 672 if (likely(vma)) >> 673 return vma; >> 674 >> 675 /* trawl the list (there may be multiple mappings in which addr >> 676 * resides) */ >> 677 for (vma = mm->mmap; vma; vma = vma->vm_next) { >> 678 if (vma->vm_start > addr) >> 679 return NULL; >> 680 if (vma->vm_end > addr) { >> 681 vmacache_update(addr, vma); >> 682 return vma; >> 683 } >> 684 } 617 685 618 return vma_iter_load(&vmi); !! 686 return NULL; 619 } 687 } 620 EXPORT_SYMBOL(find_vma); 688 EXPORT_SYMBOL(find_vma); 621 689 622 /* 690 /* 623 * At least xtensa ends up having protection f !! 691 * find a VMA 624 * MMU.. No stack expansion, at least. !! 692 * - we don't extend stack VMAs under NOMMU conditions 625 */ 693 */ 626 struct vm_area_struct *lock_mm_and_find_vma(st !! 694 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 627 unsigned long addr, st << 628 { 695 { 629 struct vm_area_struct *vma; !! 696 return find_vma(mm, addr); 630 << 631 mmap_read_lock(mm); << 632 vma = vma_lookup(mm, addr); << 633 if (!vma) << 634 mmap_read_unlock(mm); << 635 return vma; << 636 } 697 } 637 698 638 /* 699 /* 639 * expand a stack to a given address 700 * expand a stack to a given address 640 * - not supported under NOMMU conditions 701 * - not supported under NOMMU conditions 641 */ 702 */ 642 int expand_stack_locked(struct vm_area_struct !! 703 int expand_stack(struct vm_area_struct *vma, unsigned long address) 643 { 704 { 644 return -ENOMEM; 705 return -ENOMEM; 645 } 706 } 646 707 647 struct vm_area_struct *expand_stack(struct mm_ << 648 { << 649 mmap_read_unlock(mm); << 650 return NULL; << 651 } << 652 << 653 /* 708 /* 654 * look up the first VMA exactly that exactly 709 * look up the first VMA exactly that exactly matches addr 655 * - should be called with mm->mmap_lock at le 710 * - should be called with mm->mmap_lock at least held readlocked 656 */ 711 */ 657 static struct vm_area_struct *find_vma_exact(s 712 static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 658 u 713 unsigned long addr, 659 u 714 unsigned long len) 660 { 715 { 661 struct vm_area_struct *vma; 716 struct vm_area_struct *vma; 662 unsigned long end = addr + len; 717 unsigned long end = addr + len; 663 VMA_ITERATOR(vmi, mm, addr); << 664 718 665 vma = vma_iter_load(&vmi); !! 719 /* check the cache first */ 666 if (!vma) !! 720 vma = vmacache_find_exact(mm, addr, end); 667 return NULL; !! 721 if (vma) 668 if (vma->vm_start != addr) !! 722 return vma; 669 return NULL; !! 723 670 if (vma->vm_end != end) !! 724 /* trawl the list (there may be multiple mappings in which addr 671 return NULL; !! 725 * resides) */ >> 726 for (vma = mm->mmap; vma; vma = vma->vm_next) { >> 727 if (vma->vm_start < addr) >> 728 continue; >> 729 if (vma->vm_start > addr) >> 730 return NULL; >> 731 if (vma->vm_end == end) { >> 732 vmacache_update(addr, vma); >> 733 return vma; >> 734 } >> 735 } 672 736 673 return vma; !! 737 return NULL; 674 } 738 } 675 739 676 /* 740 /* 677 * determine whether a mapping should be permi 741 * determine whether a mapping should be permitted and, if so, what sort of 678 * mapping we're capable of supporting 742 * mapping we're capable of supporting 679 */ 743 */ 680 static int validate_mmap_request(struct file * 744 static int validate_mmap_request(struct file *file, 681 unsigned long 745 unsigned long addr, 682 unsigned long 746 unsigned long len, 683 unsigned long 747 unsigned long prot, 684 unsigned long 748 unsigned long flags, 685 unsigned long 749 unsigned long pgoff, 686 unsigned long 750 unsigned long *_capabilities) 687 { 751 { 688 unsigned long capabilities, rlen; 752 unsigned long capabilities, rlen; 689 int ret; 753 int ret; 690 754 691 /* do the simple checks first */ 755 /* do the simple checks first */ 692 if (flags & MAP_FIXED) 756 if (flags & MAP_FIXED) 693 return -EINVAL; 757 return -EINVAL; 694 758 695 if ((flags & MAP_TYPE) != MAP_PRIVATE 759 if ((flags & MAP_TYPE) != MAP_PRIVATE && 696 (flags & MAP_TYPE) != MAP_SHARED) 760 (flags & MAP_TYPE) != MAP_SHARED) 697 return -EINVAL; 761 return -EINVAL; 698 762 699 if (!len) 763 if (!len) 700 return -EINVAL; 764 return -EINVAL; 701 765 702 /* Careful about overflows.. */ 766 /* Careful about overflows.. */ 703 rlen = PAGE_ALIGN(len); 767 rlen = PAGE_ALIGN(len); 704 if (!rlen || rlen > TASK_SIZE) 768 if (!rlen || rlen > TASK_SIZE) 705 return -ENOMEM; 769 return -ENOMEM; 706 770 707 /* offset overflow? */ 771 /* offset overflow? */ 708 if ((pgoff + (rlen >> PAGE_SHIFT)) < p 772 if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff) 709 return -EOVERFLOW; 773 return -EOVERFLOW; 710 774 711 if (file) { 775 if (file) { 712 /* files must support mmap */ 776 /* files must support mmap */ 713 if (!file->f_op->mmap) 777 if (!file->f_op->mmap) 714 return -ENODEV; 778 return -ENODEV; 715 779 716 /* work out if what we've got 780 /* work out if what we've got could possibly be shared 717 * - we support chardevs that 781 * - we support chardevs that provide their own "memory" 718 * - we support files/blockdev 782 * - we support files/blockdevs that are memory backed 719 */ 783 */ 720 if (file->f_op->mmap_capabilit 784 if (file->f_op->mmap_capabilities) { 721 capabilities = file->f 785 capabilities = file->f_op->mmap_capabilities(file); 722 } else { 786 } else { 723 /* no explicit capabil 787 /* no explicit capabilities set, so assume some 724 * defaults */ 788 * defaults */ 725 switch (file_inode(fil 789 switch (file_inode(file)->i_mode & S_IFMT) { 726 case S_IFREG: 790 case S_IFREG: 727 case S_IFBLK: 791 case S_IFBLK: 728 capabilities = 792 capabilities = NOMMU_MAP_COPY; 729 break; 793 break; 730 794 731 case S_IFCHR: 795 case S_IFCHR: 732 capabilities = 796 capabilities = 733 NOMMU_ 797 NOMMU_MAP_DIRECT | 734 NOMMU_ 798 NOMMU_MAP_READ | 735 NOMMU_ 799 NOMMU_MAP_WRITE; 736 break; 800 break; 737 801 738 default: 802 default: 739 return -EINVAL 803 return -EINVAL; 740 } 804 } 741 } 805 } 742 806 743 /* eliminate any capabilities 807 /* eliminate any capabilities that we can't support on this 744 * device */ 808 * device */ 745 if (!file->f_op->get_unmapped_ 809 if (!file->f_op->get_unmapped_area) 746 capabilities &= ~NOMMU 810 capabilities &= ~NOMMU_MAP_DIRECT; 747 if (!(file->f_mode & FMODE_CAN 811 if (!(file->f_mode & FMODE_CAN_READ)) 748 capabilities &= ~NOMMU 812 capabilities &= ~NOMMU_MAP_COPY; 749 813 750 /* The file shall have been op 814 /* The file shall have been opened with read permission. */ 751 if (!(file->f_mode & FMODE_REA 815 if (!(file->f_mode & FMODE_READ)) 752 return -EACCES; 816 return -EACCES; 753 817 754 if (flags & MAP_SHARED) { 818 if (flags & MAP_SHARED) { 755 /* do checks for writi 819 /* do checks for writing, appending and locking */ 756 if ((prot & PROT_WRITE 820 if ((prot & PROT_WRITE) && 757 !(file->f_mode & F 821 !(file->f_mode & FMODE_WRITE)) 758 return -EACCES 822 return -EACCES; 759 823 760 if (IS_APPEND(file_ino 824 if (IS_APPEND(file_inode(file)) && 761 (file->f_mode & FM 825 (file->f_mode & FMODE_WRITE)) 762 return -EACCES 826 return -EACCES; 763 827 764 if (!(capabilities & N 828 if (!(capabilities & NOMMU_MAP_DIRECT)) 765 return -ENODEV 829 return -ENODEV; 766 830 767 /* we mustn't privatis 831 /* we mustn't privatise shared mappings */ 768 capabilities &= ~NOMMU 832 capabilities &= ~NOMMU_MAP_COPY; 769 } else { 833 } else { 770 /* we're going to read 834 /* we're going to read the file into private memory we 771 * allocate */ 835 * allocate */ 772 if (!(capabilities & N 836 if (!(capabilities & NOMMU_MAP_COPY)) 773 return -ENODEV 837 return -ENODEV; 774 838 775 /* we don't permit a p 839 /* we don't permit a private writable mapping to be 776 * shared with the bac 840 * shared with the backing device */ 777 if (prot & PROT_WRITE) 841 if (prot & PROT_WRITE) 778 capabilities & 842 capabilities &= ~NOMMU_MAP_DIRECT; 779 } 843 } 780 844 781 if (capabilities & NOMMU_MAP_D 845 if (capabilities & NOMMU_MAP_DIRECT) { 782 if (((prot & PROT_READ 846 if (((prot & PROT_READ) && !(capabilities & NOMMU_MAP_READ)) || 783 ((prot & PROT_WRIT 847 ((prot & PROT_WRITE) && !(capabilities & NOMMU_MAP_WRITE)) || 784 ((prot & PROT_EXEC 848 ((prot & PROT_EXEC) && !(capabilities & NOMMU_MAP_EXEC)) 785 ) { 849 ) { 786 capabilities & 850 capabilities &= ~NOMMU_MAP_DIRECT; 787 if (flags & MA 851 if (flags & MAP_SHARED) { 788 pr_war 852 pr_warn("MAP_SHARED not completely supported on !MMU\n"); 789 return 853 return -EINVAL; 790 } 854 } 791 } 855 } 792 } 856 } 793 857 794 /* handle executable mappings 858 /* handle executable mappings and implied executable 795 * mappings */ 859 * mappings */ 796 if (path_noexec(&file->f_path) 860 if (path_noexec(&file->f_path)) { 797 if (prot & PROT_EXEC) 861 if (prot & PROT_EXEC) 798 return -EPERM; 862 return -EPERM; 799 } else if ((prot & PROT_READ) 863 } else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { 800 /* handle implication 864 /* handle implication of PROT_EXEC by PROT_READ */ 801 if (current->personali 865 if (current->personality & READ_IMPLIES_EXEC) { 802 if (capabiliti 866 if (capabilities & NOMMU_MAP_EXEC) 803 prot | 867 prot |= PROT_EXEC; 804 } 868 } 805 } else if ((prot & PROT_READ) 869 } else if ((prot & PROT_READ) && 806 (prot & PROT_EXEC) && 870 (prot & PROT_EXEC) && 807 !(capabilities & NOMM 871 !(capabilities & NOMMU_MAP_EXEC) 808 ) { 872 ) { 809 /* backing file is not 873 /* backing file is not executable, try to copy */ 810 capabilities &= ~NOMMU 874 capabilities &= ~NOMMU_MAP_DIRECT; 811 } 875 } 812 } else { 876 } else { 813 /* anonymous mappings are alwa 877 /* anonymous mappings are always memory backed and can be 814 * privately mapped 878 * privately mapped 815 */ 879 */ 816 capabilities = NOMMU_MAP_COPY; 880 capabilities = NOMMU_MAP_COPY; 817 881 818 /* handle PROT_EXEC implicatio 882 /* handle PROT_EXEC implication by PROT_READ */ 819 if ((prot & PROT_READ) && 883 if ((prot & PROT_READ) && 820 (current->personality & RE 884 (current->personality & READ_IMPLIES_EXEC)) 821 prot |= PROT_EXEC; 885 prot |= PROT_EXEC; 822 } 886 } 823 887 824 /* allow the security API to have its 888 /* allow the security API to have its say */ 825 ret = security_mmap_addr(addr); 889 ret = security_mmap_addr(addr); 826 if (ret < 0) 890 if (ret < 0) 827 return ret; 891 return ret; 828 892 829 /* looks okay */ 893 /* looks okay */ 830 *_capabilities = capabilities; 894 *_capabilities = capabilities; 831 return 0; 895 return 0; 832 } 896 } 833 897 834 /* 898 /* 835 * we've determined that we can make the mappi 899 * we've determined that we can make the mapping, now translate what we 836 * now know into VMA flags 900 * now know into VMA flags 837 */ 901 */ 838 static unsigned long determine_vm_flags(struct 902 static unsigned long determine_vm_flags(struct file *file, 839 unsign 903 unsigned long prot, 840 unsign 904 unsigned long flags, 841 unsign 905 unsigned long capabilities) 842 { 906 { 843 unsigned long vm_flags; 907 unsigned long vm_flags; 844 908 845 vm_flags = calc_vm_prot_bits(prot, 0) !! 909 vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags); >> 910 /* vm_flags |= mm->def_flags; */ 846 911 847 if (!file) { !! 912 if (!(capabilities & NOMMU_MAP_DIRECT)) { 848 /* !! 913 /* attempt to share read-only copies of mapped file chunks */ 849 * MAP_ANONYMOUS. MAP_SHARED i << 850 * there is no fork(). << 851 */ << 852 vm_flags |= VM_MAYREAD | VM_MA 914 vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 853 } else if (flags & MAP_PRIVATE) { !! 915 if (file && !(prot & PROT_WRITE)) 854 /* MAP_PRIVATE file mapping */ !! 916 vm_flags |= VM_MAYSHARE; 855 if (capabilities & NOMMU_MAP_D << 856 vm_flags |= (capabilit << 857 else << 858 vm_flags |= VM_MAYREAD << 859 << 860 if (!(prot & PROT_WRITE) && !c << 861 /* << 862 * R/O private file ma << 863 * modify memory, espe << 864 * (e.g., set breakpoi << 865 * permissions (no mpr << 866 * the file mapping, w << 867 * ramfs/tmpfs/shmfs a << 868 */ << 869 vm_flags |= VM_MAYOVER << 870 } else { 917 } else { 871 /* MAP_SHARED file mapping: NO !! 918 /* overlay a shareable mapping on the backing device or inode 872 vm_flags |= VM_SHARED | VM_MAY !! 919 * if possible - used for chardevs, ramfs/tmpfs/shmfs and 873 (capabilities & NO !! 920 * romfs/cramfs */ >> 921 vm_flags |= VM_MAYSHARE | (capabilities & NOMMU_VMFLAGS); >> 922 if (flags & MAP_SHARED) >> 923 vm_flags |= VM_SHARED; 874 } 924 } 875 925 >> 926 /* refuse to let anyone share private mappings with this process if >> 927 * it's being traced - otherwise breakpoints set in it may interfere >> 928 * with another untraced process >> 929 */ >> 930 if ((flags & MAP_PRIVATE) && current->ptrace) >> 931 vm_flags &= ~VM_MAYSHARE; >> 932 876 return vm_flags; 933 return vm_flags; 877 } 934 } 878 935 879 /* 936 /* 880 * set up a shared mapping on a file (the driv 937 * set up a shared mapping on a file (the driver or filesystem provides and 881 * pins the storage) 938 * pins the storage) 882 */ 939 */ 883 static int do_mmap_shared_file(struct vm_area_ 940 static int do_mmap_shared_file(struct vm_area_struct *vma) 884 { 941 { 885 int ret; 942 int ret; 886 943 887 ret = mmap_file(vma->vm_file, vma); !! 944 ret = call_mmap(vma->vm_file, vma); 888 if (ret == 0) { 945 if (ret == 0) { 889 vma->vm_region->vm_top = vma-> 946 vma->vm_region->vm_top = vma->vm_region->vm_end; 890 return 0; 947 return 0; 891 } 948 } 892 if (ret != -ENOSYS) 949 if (ret != -ENOSYS) 893 return ret; 950 return ret; 894 951 895 /* getting -ENOSYS indicates that dire 952 /* getting -ENOSYS indicates that direct mmap isn't possible (as 896 * opposed to tried but failed) so we 953 * opposed to tried but failed) so we can only give a suitable error as 897 * it's not possible to make a private 954 * it's not possible to make a private copy if MAP_SHARED was given */ 898 return -ENODEV; 955 return -ENODEV; 899 } 956 } 900 957 901 /* 958 /* 902 * set up a private mapping or an anonymous sh 959 * set up a private mapping or an anonymous shared mapping 903 */ 960 */ 904 static int do_mmap_private(struct vm_area_stru 961 static int do_mmap_private(struct vm_area_struct *vma, 905 struct vm_region *r 962 struct vm_region *region, 906 unsigned long len, 963 unsigned long len, 907 unsigned long capab 964 unsigned long capabilities) 908 { 965 { 909 unsigned long total, point; 966 unsigned long total, point; 910 void *base; 967 void *base; 911 int ret, order; 968 int ret, order; 912 969 913 /* !! 970 /* invoke the file's mapping function so that it can keep track of 914 * Invoke the file's mapping function !! 971 * shared mappings on devices or memory 915 * shared mappings on devices or memor !! 972 * - VM_MAYSHARE will be set if it may attempt to share 916 * it may attempt to share, which will << 917 * happy. << 918 */ 973 */ 919 if (capabilities & NOMMU_MAP_DIRECT) { 974 if (capabilities & NOMMU_MAP_DIRECT) { 920 ret = mmap_file(vma->vm_file, !! 975 ret = call_mmap(vma->vm_file, vma); 921 /* shouldn't return success if << 922 if (WARN_ON_ONCE(!is_nommu_sha << 923 ret = -ENOSYS; << 924 if (ret == 0) { 976 if (ret == 0) { >> 977 /* shouldn't return success if we're not sharing */ >> 978 BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); 925 vma->vm_region->vm_top 979 vma->vm_region->vm_top = vma->vm_region->vm_end; 926 return 0; 980 return 0; 927 } 981 } 928 if (ret != -ENOSYS) 982 if (ret != -ENOSYS) 929 return ret; 983 return ret; 930 984 931 /* getting an ENOSYS error ind 985 /* getting an ENOSYS error indicates that direct mmap isn't 932 * possible (as opposed to tri 986 * possible (as opposed to tried but failed) so we'll try to 933 * make a private copy of the 987 * make a private copy of the data and map that instead */ 934 } 988 } 935 989 936 990 937 /* allocate some memory to hold the ma 991 /* allocate some memory to hold the mapping 938 * - note that this may not return a p 992 * - note that this may not return a page-aligned address if the object 939 * we're allocating is smaller than 993 * we're allocating is smaller than a page 940 */ 994 */ 941 order = get_order(len); 995 order = get_order(len); 942 total = 1 << order; 996 total = 1 << order; 943 point = len >> PAGE_SHIFT; 997 point = len >> PAGE_SHIFT; 944 998 945 /* we don't want to allocate a power-o 999 /* we don't want to allocate a power-of-2 sized page set */ 946 if (sysctl_nr_trim_pages && total - po 1000 if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) 947 total = point; 1001 total = point; 948 1002 949 base = alloc_pages_exact(total << PAGE 1003 base = alloc_pages_exact(total << PAGE_SHIFT, GFP_KERNEL); 950 if (!base) 1004 if (!base) 951 goto enomem; 1005 goto enomem; 952 1006 953 atomic_long_add(total, &mmap_pages_all 1007 atomic_long_add(total, &mmap_pages_allocated); 954 1008 955 vm_flags_set(vma, VM_MAPPED_COPY); !! 1009 region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; 956 region->vm_flags = vma->vm_flags; << 957 region->vm_start = (unsigned long) bas 1010 region->vm_start = (unsigned long) base; 958 region->vm_end = region->vm_start + 1011 region->vm_end = region->vm_start + len; 959 region->vm_top = region->vm_start + 1012 region->vm_top = region->vm_start + (total << PAGE_SHIFT); 960 1013 961 vma->vm_start = region->vm_start; 1014 vma->vm_start = region->vm_start; 962 vma->vm_end = region->vm_start + len 1015 vma->vm_end = region->vm_start + len; 963 1016 964 if (vma->vm_file) { 1017 if (vma->vm_file) { 965 /* read the contents of a file 1018 /* read the contents of a file into the copy */ 966 loff_t fpos; 1019 loff_t fpos; 967 1020 968 fpos = vma->vm_pgoff; 1021 fpos = vma->vm_pgoff; 969 fpos <<= PAGE_SHIFT; 1022 fpos <<= PAGE_SHIFT; 970 1023 971 ret = kernel_read(vma->vm_file 1024 ret = kernel_read(vma->vm_file, base, len, &fpos); 972 if (ret < 0) 1025 if (ret < 0) 973 goto error_free; 1026 goto error_free; 974 1027 975 /* clear the last little bit * 1028 /* clear the last little bit */ 976 if (ret < len) 1029 if (ret < len) 977 memset(base + ret, 0, 1030 memset(base + ret, 0, len - ret); 978 1031 979 } else { 1032 } else { 980 vma_set_anonymous(vma); 1033 vma_set_anonymous(vma); 981 } 1034 } 982 1035 983 return 0; 1036 return 0; 984 1037 985 error_free: 1038 error_free: 986 free_page_series(region->vm_start, reg 1039 free_page_series(region->vm_start, region->vm_top); 987 region->vm_start = vma->vm_start = 0; 1040 region->vm_start = vma->vm_start = 0; 988 region->vm_end = vma->vm_end = 0; 1041 region->vm_end = vma->vm_end = 0; 989 region->vm_top = 0; 1042 region->vm_top = 0; 990 return ret; 1043 return ret; 991 1044 992 enomem: 1045 enomem: 993 pr_err("Allocation of length %lu from 1046 pr_err("Allocation of length %lu from process %d (%s) failed\n", 994 len, current->pid, current->com 1047 len, current->pid, current->comm); 995 show_mem(); !! 1048 show_free_areas(0, NULL); 996 return -ENOMEM; 1049 return -ENOMEM; 997 } 1050 } 998 1051 999 /* 1052 /* 1000 * handle mapping creation for uClinux 1053 * handle mapping creation for uClinux 1001 */ 1054 */ 1002 unsigned long do_mmap(struct file *file, 1055 unsigned long do_mmap(struct file *file, 1003 unsigned long addr, 1056 unsigned long addr, 1004 unsigned long len, 1057 unsigned long len, 1005 unsigned long prot, 1058 unsigned long prot, 1006 unsigned long flags, 1059 unsigned long flags, 1007 vm_flags_t vm_flags, << 1008 unsigned long pgoff, 1060 unsigned long pgoff, 1009 unsigned long *popula 1061 unsigned long *populate, 1010 struct list_head *uf) 1062 struct list_head *uf) 1011 { 1063 { 1012 struct vm_area_struct *vma; 1064 struct vm_area_struct *vma; 1013 struct vm_region *region; 1065 struct vm_region *region; 1014 struct rb_node *rb; 1066 struct rb_node *rb; >> 1067 vm_flags_t vm_flags; 1015 unsigned long capabilities, result; 1068 unsigned long capabilities, result; 1016 int ret; 1069 int ret; 1017 VMA_ITERATOR(vmi, current->mm, 0); << 1018 1070 1019 *populate = 0; 1071 *populate = 0; 1020 1072 1021 /* decide whether we should attempt t 1073 /* decide whether we should attempt the mapping, and if so what sort of 1022 * mapping */ 1074 * mapping */ 1023 ret = validate_mmap_request(file, add 1075 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 1024 &capabili 1076 &capabilities); 1025 if (ret < 0) 1077 if (ret < 0) 1026 return ret; 1078 return ret; 1027 1079 1028 /* we ignore the address hint */ 1080 /* we ignore the address hint */ 1029 addr = 0; 1081 addr = 0; 1030 len = PAGE_ALIGN(len); 1082 len = PAGE_ALIGN(len); 1031 1083 1032 /* we've determined that we can make 1084 /* we've determined that we can make the mapping, now translate what we 1033 * now know into VMA flags */ 1085 * now know into VMA flags */ 1034 vm_flags |= determine_vm_flags(file, !! 1086 vm_flags = determine_vm_flags(file, prot, flags, capabilities); 1035 << 1036 1087 1037 /* we're going to need to record the 1088 /* we're going to need to record the mapping */ 1038 region = kmem_cache_zalloc(vm_region_ 1089 region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); 1039 if (!region) 1090 if (!region) 1040 goto error_getting_region; 1091 goto error_getting_region; 1041 1092 1042 vma = vm_area_alloc(current->mm); 1093 vma = vm_area_alloc(current->mm); 1043 if (!vma) 1094 if (!vma) 1044 goto error_getting_vma; 1095 goto error_getting_vma; 1045 1096 1046 region->vm_usage = 1; 1097 region->vm_usage = 1; 1047 region->vm_flags = vm_flags; 1098 region->vm_flags = vm_flags; 1048 region->vm_pgoff = pgoff; 1099 region->vm_pgoff = pgoff; 1049 1100 1050 vm_flags_init(vma, vm_flags); !! 1101 vma->vm_flags = vm_flags; 1051 vma->vm_pgoff = pgoff; 1102 vma->vm_pgoff = pgoff; 1052 1103 1053 if (file) { 1104 if (file) { 1054 region->vm_file = get_file(fi 1105 region->vm_file = get_file(file); 1055 vma->vm_file = get_file(file) 1106 vma->vm_file = get_file(file); 1056 } 1107 } 1057 1108 1058 down_write(&nommu_region_sem); 1109 down_write(&nommu_region_sem); 1059 1110 1060 /* if we want to share, we need to ch 1111 /* if we want to share, we need to check for regions created by other 1061 * mmap() calls that overlap with our 1112 * mmap() calls that overlap with our proposed mapping 1062 * - we can only share with a superse 1113 * - we can only share with a superset match on most regular files 1063 * - shared mappings on character dev 1114 * - shared mappings on character devices and memory backed files are 1064 * permitted to overlap inexactly a 1115 * permitted to overlap inexactly as far as we are concerned for in 1065 * these cases, sharing is handled 1116 * these cases, sharing is handled in the driver or filesystem rather 1066 * than here 1117 * than here 1067 */ 1118 */ 1068 if (is_nommu_shared_mapping(vm_flags) !! 1119 if (vm_flags & VM_MAYSHARE) { 1069 struct vm_region *pregion; 1120 struct vm_region *pregion; 1070 unsigned long pglen, rpglen, 1121 unsigned long pglen, rpglen, pgend, rpgend, start; 1071 1122 1072 pglen = (len + PAGE_SIZE - 1) 1123 pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1073 pgend = pgoff + pglen; 1124 pgend = pgoff + pglen; 1074 1125 1075 for (rb = rb_first(&nommu_reg 1126 for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { 1076 pregion = rb_entry(rb 1127 pregion = rb_entry(rb, struct vm_region, vm_rb); 1077 1128 1078 if (!is_nommu_shared_ !! 1129 if (!(pregion->vm_flags & VM_MAYSHARE)) 1079 continue; 1130 continue; 1080 1131 1081 /* search for overlap 1132 /* search for overlapping mappings on the same file */ 1082 if (file_inode(pregio 1133 if (file_inode(pregion->vm_file) != 1083 file_inode(file)) 1134 file_inode(file)) 1084 continue; 1135 continue; 1085 1136 1086 if (pregion->vm_pgoff 1137 if (pregion->vm_pgoff >= pgend) 1087 continue; 1138 continue; 1088 1139 1089 rpglen = pregion->vm_ 1140 rpglen = pregion->vm_end - pregion->vm_start; 1090 rpglen = (rpglen + PA 1141 rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT; 1091 rpgend = pregion->vm_ 1142 rpgend = pregion->vm_pgoff + rpglen; 1092 if (pgoff >= rpgend) 1143 if (pgoff >= rpgend) 1093 continue; 1144 continue; 1094 1145 1095 /* handle inexactly o 1146 /* handle inexactly overlapping matches between 1096 * mappings */ 1147 * mappings */ 1097 if ((pregion->vm_pgof 1148 if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && 1098 !(pgoff >= pregio 1149 !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { 1099 /* new mappin 1150 /* new mapping is not a subset of the region */ 1100 if (!(capabil 1151 if (!(capabilities & NOMMU_MAP_DIRECT)) 1101 goto 1152 goto sharing_violation; 1102 continue; 1153 continue; 1103 } 1154 } 1104 1155 1105 /* we've found a regi 1156 /* we've found a region we can share */ 1106 pregion->vm_usage++; 1157 pregion->vm_usage++; 1107 vma->vm_region = preg 1158 vma->vm_region = pregion; 1108 start = pregion->vm_s 1159 start = pregion->vm_start; 1109 start += (pgoff - pre 1160 start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; 1110 vma->vm_start = start 1161 vma->vm_start = start; 1111 vma->vm_end = start + 1162 vma->vm_end = start + len; 1112 1163 1113 if (pregion->vm_flags 1164 if (pregion->vm_flags & VM_MAPPED_COPY) 1114 vm_flags_set( !! 1165 vma->vm_flags |= VM_MAPPED_COPY; 1115 else { 1166 else { 1116 ret = do_mmap 1167 ret = do_mmap_shared_file(vma); 1117 if (ret < 0) 1168 if (ret < 0) { 1118 vma-> 1169 vma->vm_region = NULL; 1119 vma-> 1170 vma->vm_start = 0; 1120 vma-> 1171 vma->vm_end = 0; 1121 pregi 1172 pregion->vm_usage--; 1122 pregi 1173 pregion = NULL; 1123 goto 1174 goto error_just_free; 1124 } 1175 } 1125 } 1176 } 1126 fput(region->vm_file) 1177 fput(region->vm_file); 1127 kmem_cache_free(vm_re 1178 kmem_cache_free(vm_region_jar, region); 1128 region = pregion; 1179 region = pregion; 1129 result = start; 1180 result = start; 1130 goto share; 1181 goto share; 1131 } 1182 } 1132 1183 1133 /* obtain the address at whic 1184 /* obtain the address at which to make a shared mapping 1134 * - this is the hook for qua 1185 * - this is the hook for quasi-memory character devices to 1135 * tell us the location of 1186 * tell us the location of a shared mapping 1136 */ 1187 */ 1137 if (capabilities & NOMMU_MAP_ 1188 if (capabilities & NOMMU_MAP_DIRECT) { 1138 addr = file->f_op->ge 1189 addr = file->f_op->get_unmapped_area(file, addr, len, 1139 1190 pgoff, flags); 1140 if (IS_ERR_VALUE(addr 1191 if (IS_ERR_VALUE(addr)) { 1141 ret = addr; 1192 ret = addr; 1142 if (ret != -E 1193 if (ret != -ENOSYS) 1143 goto 1194 goto error_just_free; 1144 1195 1145 /* the driver 1196 /* the driver refused to tell us where to site 1146 * the mappin 1197 * the mapping so we'll have to attempt to copy 1147 * it */ 1198 * it */ 1148 ret = -ENODEV 1199 ret = -ENODEV; 1149 if (!(capabil 1200 if (!(capabilities & NOMMU_MAP_COPY)) 1150 goto 1201 goto error_just_free; 1151 1202 1152 capabilities 1203 capabilities &= ~NOMMU_MAP_DIRECT; 1153 } else { 1204 } else { 1154 vma->vm_start 1205 vma->vm_start = region->vm_start = addr; 1155 vma->vm_end = 1206 vma->vm_end = region->vm_end = addr + len; 1156 } 1207 } 1157 } 1208 } 1158 } 1209 } 1159 1210 1160 vma->vm_region = region; 1211 vma->vm_region = region; 1161 1212 1162 /* set up the mapping 1213 /* set up the mapping 1163 * - the region is filled in if NOMMU 1214 * - the region is filled in if NOMMU_MAP_DIRECT is still set 1164 */ 1215 */ 1165 if (file && vma->vm_flags & VM_SHARED 1216 if (file && vma->vm_flags & VM_SHARED) 1166 ret = do_mmap_shared_file(vma 1217 ret = do_mmap_shared_file(vma); 1167 else 1218 else 1168 ret = do_mmap_private(vma, re 1219 ret = do_mmap_private(vma, region, len, capabilities); 1169 if (ret < 0) 1220 if (ret < 0) 1170 goto error_just_free; 1221 goto error_just_free; 1171 add_nommu_region(region); 1222 add_nommu_region(region); 1172 1223 1173 /* clear anonymous mappings that don' 1224 /* clear anonymous mappings that don't ask for uninitialized data */ 1174 if (!vma->vm_file && 1225 if (!vma->vm_file && 1175 (!IS_ENABLED(CONFIG_MMAP_ALLOW_UN 1226 (!IS_ENABLED(CONFIG_MMAP_ALLOW_UNINITIALIZED) || 1176 !(flags & MAP_UNINITIALIZED))) 1227 !(flags & MAP_UNINITIALIZED))) 1177 memset((void *)region->vm_sta 1228 memset((void *)region->vm_start, 0, 1178 region->vm_end - regio 1229 region->vm_end - region->vm_start); 1179 1230 1180 /* okay... we have a mapping; now we 1231 /* okay... we have a mapping; now we have to register it */ 1181 result = vma->vm_start; 1232 result = vma->vm_start; 1182 1233 1183 current->mm->total_vm += len >> PAGE_ 1234 current->mm->total_vm += len >> PAGE_SHIFT; 1184 1235 1185 share: 1236 share: 1186 BUG_ON(!vma->vm_region); !! 1237 add_vma_to_mm(current->mm, vma); 1187 vma_iter_config(&vmi, vma->vm_start, << 1188 if (vma_iter_prealloc(&vmi, vma)) << 1189 goto error_just_free; << 1190 << 1191 setup_vma_to_mm(vma, current->mm); << 1192 current->mm->map_count++; << 1193 /* add the VMA to the tree */ << 1194 vma_iter_store(&vmi, vma); << 1195 1238 1196 /* we flush the region from the icach 1239 /* we flush the region from the icache only when the first executable 1197 * mapping of it is made */ 1240 * mapping of it is made */ 1198 if (vma->vm_flags & VM_EXEC && !regio 1241 if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { 1199 flush_icache_user_range(regio 1242 flush_icache_user_range(region->vm_start, region->vm_end); 1200 region->vm_icache_flushed = t 1243 region->vm_icache_flushed = true; 1201 } 1244 } 1202 1245 1203 up_write(&nommu_region_sem); 1246 up_write(&nommu_region_sem); 1204 1247 1205 return result; 1248 return result; 1206 1249 1207 error_just_free: 1250 error_just_free: 1208 up_write(&nommu_region_sem); 1251 up_write(&nommu_region_sem); 1209 error: 1252 error: 1210 vma_iter_free(&vmi); << 1211 if (region->vm_file) 1253 if (region->vm_file) 1212 fput(region->vm_file); 1254 fput(region->vm_file); 1213 kmem_cache_free(vm_region_jar, region 1255 kmem_cache_free(vm_region_jar, region); 1214 if (vma->vm_file) 1256 if (vma->vm_file) 1215 fput(vma->vm_file); 1257 fput(vma->vm_file); 1216 vm_area_free(vma); 1258 vm_area_free(vma); 1217 return ret; 1259 return ret; 1218 1260 1219 sharing_violation: 1261 sharing_violation: 1220 up_write(&nommu_region_sem); 1262 up_write(&nommu_region_sem); 1221 pr_warn("Attempt to share mismatched 1263 pr_warn("Attempt to share mismatched mappings\n"); 1222 ret = -EINVAL; 1264 ret = -EINVAL; 1223 goto error; 1265 goto error; 1224 1266 1225 error_getting_vma: 1267 error_getting_vma: 1226 kmem_cache_free(vm_region_jar, region 1268 kmem_cache_free(vm_region_jar, region); 1227 pr_warn("Allocation of vma for %lu by 1269 pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n", 1228 len, current->pid); 1270 len, current->pid); 1229 show_mem(); !! 1271 show_free_areas(0, NULL); 1230 return -ENOMEM; 1272 return -ENOMEM; 1231 1273 1232 error_getting_region: 1274 error_getting_region: 1233 pr_warn("Allocation of vm region for 1275 pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n", 1234 len, current->pid); 1276 len, current->pid); 1235 show_mem(); !! 1277 show_free_areas(0, NULL); 1236 return -ENOMEM; 1278 return -ENOMEM; 1237 } 1279 } 1238 1280 1239 unsigned long ksys_mmap_pgoff(unsigned long a 1281 unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, 1240 unsigned long p 1282 unsigned long prot, unsigned long flags, 1241 unsigned long f 1283 unsigned long fd, unsigned long pgoff) 1242 { 1284 { 1243 struct file *file = NULL; 1285 struct file *file = NULL; 1244 unsigned long retval = -EBADF; 1286 unsigned long retval = -EBADF; 1245 1287 1246 audit_mmap_fd(fd, flags); 1288 audit_mmap_fd(fd, flags); 1247 if (!(flags & MAP_ANONYMOUS)) { 1289 if (!(flags & MAP_ANONYMOUS)) { 1248 file = fget(fd); 1290 file = fget(fd); 1249 if (!file) 1291 if (!file) 1250 goto out; 1292 goto out; 1251 } 1293 } 1252 1294 1253 retval = vm_mmap_pgoff(file, addr, le 1295 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); 1254 1296 1255 if (file) 1297 if (file) 1256 fput(file); 1298 fput(file); 1257 out: 1299 out: 1258 return retval; 1300 return retval; 1259 } 1301 } 1260 1302 1261 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, ad 1303 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, 1262 unsigned long, prot, unsigned 1304 unsigned long, prot, unsigned long, flags, 1263 unsigned long, fd, unsigned l 1305 unsigned long, fd, unsigned long, pgoff) 1264 { 1306 { 1265 return ksys_mmap_pgoff(addr, len, pro 1307 return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); 1266 } 1308 } 1267 1309 1268 #ifdef __ARCH_WANT_SYS_OLD_MMAP 1310 #ifdef __ARCH_WANT_SYS_OLD_MMAP 1269 struct mmap_arg_struct { 1311 struct mmap_arg_struct { 1270 unsigned long addr; 1312 unsigned long addr; 1271 unsigned long len; 1313 unsigned long len; 1272 unsigned long prot; 1314 unsigned long prot; 1273 unsigned long flags; 1315 unsigned long flags; 1274 unsigned long fd; 1316 unsigned long fd; 1275 unsigned long offset; 1317 unsigned long offset; 1276 }; 1318 }; 1277 1319 1278 SYSCALL_DEFINE1(old_mmap, struct mmap_arg_str 1320 SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) 1279 { 1321 { 1280 struct mmap_arg_struct a; 1322 struct mmap_arg_struct a; 1281 1323 1282 if (copy_from_user(&a, arg, sizeof(a) 1324 if (copy_from_user(&a, arg, sizeof(a))) 1283 return -EFAULT; 1325 return -EFAULT; 1284 if (offset_in_page(a.offset)) 1326 if (offset_in_page(a.offset)) 1285 return -EINVAL; 1327 return -EINVAL; 1286 1328 1287 return ksys_mmap_pgoff(a.addr, a.len, 1329 return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, 1288 a.offset >> PA 1330 a.offset >> PAGE_SHIFT); 1289 } 1331 } 1290 #endif /* __ARCH_WANT_SYS_OLD_MMAP */ 1332 #endif /* __ARCH_WANT_SYS_OLD_MMAP */ 1291 1333 1292 /* 1334 /* 1293 * split a vma into two pieces at address 'ad 1335 * split a vma into two pieces at address 'addr', a new vma is allocated either 1294 * for the first part or the tail. 1336 * for the first part or the tail. 1295 */ 1337 */ 1296 static int split_vma(struct vma_iterator *vmi !! 1338 int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, 1297 unsigned long addr, int !! 1339 unsigned long addr, int new_below) 1298 { 1340 { 1299 struct vm_area_struct *new; 1341 struct vm_area_struct *new; 1300 struct vm_region *region; 1342 struct vm_region *region; 1301 unsigned long npages; 1343 unsigned long npages; 1302 struct mm_struct *mm; << 1303 1344 1304 /* we're only permitted to split anon 1345 /* we're only permitted to split anonymous regions (these should have 1305 * only a single usage on the region) 1346 * only a single usage on the region) */ 1306 if (vma->vm_file) 1347 if (vma->vm_file) 1307 return -ENOMEM; 1348 return -ENOMEM; 1308 1349 1309 mm = vma->vm_mm; << 1310 if (mm->map_count >= sysctl_max_map_c 1350 if (mm->map_count >= sysctl_max_map_count) 1311 return -ENOMEM; 1351 return -ENOMEM; 1312 1352 1313 region = kmem_cache_alloc(vm_region_j 1353 region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); 1314 if (!region) 1354 if (!region) 1315 return -ENOMEM; 1355 return -ENOMEM; 1316 1356 1317 new = vm_area_dup(vma); 1357 new = vm_area_dup(vma); 1318 if (!new) !! 1358 if (!new) { 1319 goto err_vma_dup; !! 1359 kmem_cache_free(vm_region_jar, region); >> 1360 return -ENOMEM; >> 1361 } 1320 1362 1321 /* most fields are the same, copy all 1363 /* most fields are the same, copy all, and then fixup */ 1322 *region = *vma->vm_region; 1364 *region = *vma->vm_region; 1323 new->vm_region = region; 1365 new->vm_region = region; 1324 1366 1325 npages = (addr - vma->vm_start) >> PA 1367 npages = (addr - vma->vm_start) >> PAGE_SHIFT; 1326 1368 1327 if (new_below) { 1369 if (new_below) { 1328 region->vm_top = region->vm_e 1370 region->vm_top = region->vm_end = new->vm_end = addr; 1329 } else { 1371 } else { 1330 region->vm_start = new->vm_st 1372 region->vm_start = new->vm_start = addr; 1331 region->vm_pgoff = new->vm_pg 1373 region->vm_pgoff = new->vm_pgoff += npages; 1332 } 1374 } 1333 1375 1334 vma_iter_config(vmi, new->vm_start, n << 1335 if (vma_iter_prealloc(vmi, vma)) { << 1336 pr_warn("Allocation of vma tr << 1337 current->pid); << 1338 goto err_vmi_preallocate; << 1339 } << 1340 << 1341 if (new->vm_ops && new->vm_ops->open) 1376 if (new->vm_ops && new->vm_ops->open) 1342 new->vm_ops->open(new); 1377 new->vm_ops->open(new); 1343 1378 >> 1379 delete_vma_from_mm(vma); 1344 down_write(&nommu_region_sem); 1380 down_write(&nommu_region_sem); 1345 delete_nommu_region(vma->vm_region); 1381 delete_nommu_region(vma->vm_region); 1346 if (new_below) { 1382 if (new_below) { 1347 vma->vm_region->vm_start = vm 1383 vma->vm_region->vm_start = vma->vm_start = addr; 1348 vma->vm_region->vm_pgoff = vm 1384 vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; 1349 } else { 1385 } else { 1350 vma->vm_region->vm_end = vma- 1386 vma->vm_region->vm_end = vma->vm_end = addr; 1351 vma->vm_region->vm_top = addr 1387 vma->vm_region->vm_top = addr; 1352 } 1388 } 1353 add_nommu_region(vma->vm_region); 1389 add_nommu_region(vma->vm_region); 1354 add_nommu_region(new->vm_region); 1390 add_nommu_region(new->vm_region); 1355 up_write(&nommu_region_sem); 1391 up_write(&nommu_region_sem); 1356 !! 1392 add_vma_to_mm(mm, vma); 1357 setup_vma_to_mm(vma, mm); !! 1393 add_vma_to_mm(mm, new); 1358 setup_vma_to_mm(new, mm); << 1359 vma_iter_store(vmi, new); << 1360 mm->map_count++; << 1361 return 0; 1394 return 0; 1362 << 1363 err_vmi_preallocate: << 1364 vm_area_free(new); << 1365 err_vma_dup: << 1366 kmem_cache_free(vm_region_jar, region << 1367 return -ENOMEM; << 1368 } 1395 } 1369 1396 1370 /* 1397 /* 1371 * shrink a VMA by removing the specified chu 1398 * shrink a VMA by removing the specified chunk from either the beginning or 1372 * the end 1399 * the end 1373 */ 1400 */ 1374 static int vmi_shrink_vma(struct vma_iterator !! 1401 static int shrink_vma(struct mm_struct *mm, 1375 struct vm_area_struct * 1402 struct vm_area_struct *vma, 1376 unsigned long from, uns 1403 unsigned long from, unsigned long to) 1377 { 1404 { 1378 struct vm_region *region; 1405 struct vm_region *region; 1379 1406 1380 /* adjust the VMA's pointers, which m 1407 /* adjust the VMA's pointers, which may reposition it in the MM's tree 1381 * and list */ 1408 * and list */ 1382 if (from > vma->vm_start) { !! 1409 delete_vma_from_mm(vma); 1383 if (vma_iter_clear_gfp(vmi, f !! 1410 if (from > vma->vm_start) 1384 return -ENOMEM; << 1385 vma->vm_end = from; 1411 vma->vm_end = from; 1386 } else { !! 1412 else 1387 if (vma_iter_clear_gfp(vmi, v << 1388 return -ENOMEM; << 1389 vma->vm_start = to; 1413 vma->vm_start = to; 1390 } !! 1414 add_vma_to_mm(mm, vma); 1391 1415 1392 /* cut the backing region down to siz 1416 /* cut the backing region down to size */ 1393 region = vma->vm_region; 1417 region = vma->vm_region; 1394 BUG_ON(region->vm_usage != 1); 1418 BUG_ON(region->vm_usage != 1); 1395 1419 1396 down_write(&nommu_region_sem); 1420 down_write(&nommu_region_sem); 1397 delete_nommu_region(region); 1421 delete_nommu_region(region); 1398 if (from > region->vm_start) { 1422 if (from > region->vm_start) { 1399 to = region->vm_top; 1423 to = region->vm_top; 1400 region->vm_top = region->vm_e 1424 region->vm_top = region->vm_end = from; 1401 } else { 1425 } else { 1402 region->vm_start = to; 1426 region->vm_start = to; 1403 } 1427 } 1404 add_nommu_region(region); 1428 add_nommu_region(region); 1405 up_write(&nommu_region_sem); 1429 up_write(&nommu_region_sem); 1406 1430 1407 free_page_series(from, to); 1431 free_page_series(from, to); 1408 return 0; 1432 return 0; 1409 } 1433 } 1410 1434 1411 /* 1435 /* 1412 * release a mapping 1436 * release a mapping 1413 * - under NOMMU conditions the chunk to be u 1437 * - under NOMMU conditions the chunk to be unmapped must be backed by a single 1414 * VMA, though it need not cover the whole 1438 * VMA, though it need not cover the whole VMA 1415 */ 1439 */ 1416 int do_munmap(struct mm_struct *mm, unsigned 1440 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf) 1417 { 1441 { 1418 VMA_ITERATOR(vmi, mm, start); << 1419 struct vm_area_struct *vma; 1442 struct vm_area_struct *vma; 1420 unsigned long end; 1443 unsigned long end; 1421 int ret = 0; !! 1444 int ret; 1422 1445 1423 len = PAGE_ALIGN(len); 1446 len = PAGE_ALIGN(len); 1424 if (len == 0) 1447 if (len == 0) 1425 return -EINVAL; 1448 return -EINVAL; 1426 1449 1427 end = start + len; 1450 end = start + len; 1428 1451 1429 /* find the first potentially overlap 1452 /* find the first potentially overlapping VMA */ 1430 vma = vma_find(&vmi, end); !! 1453 vma = find_vma(mm, start); 1431 if (!vma) { 1454 if (!vma) { 1432 static int limit; 1455 static int limit; 1433 if (limit < 5) { 1456 if (limit < 5) { 1434 pr_warn("munmap of me 1457 pr_warn("munmap of memory not mmapped by process %d (%s): 0x%lx-0x%lx\n", 1435 curre 1458 current->pid, current->comm, 1436 start 1459 start, start + len - 1); 1437 limit++; 1460 limit++; 1438 } 1461 } 1439 return -EINVAL; 1462 return -EINVAL; 1440 } 1463 } 1441 1464 1442 /* we're allowed to split an anonymou 1465 /* we're allowed to split an anonymous VMA but not a file-backed one */ 1443 if (vma->vm_file) { 1466 if (vma->vm_file) { 1444 do { 1467 do { 1445 if (start > vma->vm_s 1468 if (start > vma->vm_start) 1446 return -EINVA 1469 return -EINVAL; 1447 if (end == vma->vm_en 1470 if (end == vma->vm_end) 1448 goto erase_wh 1471 goto erase_whole_vma; 1449 vma = vma_find(&vmi, !! 1472 vma = vma->vm_next; 1450 } while (vma); 1473 } while (vma); 1451 return -EINVAL; 1474 return -EINVAL; 1452 } else { 1475 } else { 1453 /* the chunk must be a subset 1476 /* the chunk must be a subset of the VMA found */ 1454 if (start == vma->vm_start && 1477 if (start == vma->vm_start && end == vma->vm_end) 1455 goto erase_whole_vma; 1478 goto erase_whole_vma; 1456 if (start < vma->vm_start || 1479 if (start < vma->vm_start || end > vma->vm_end) 1457 return -EINVAL; 1480 return -EINVAL; 1458 if (offset_in_page(start)) 1481 if (offset_in_page(start)) 1459 return -EINVAL; 1482 return -EINVAL; 1460 if (end != vma->vm_end && off 1483 if (end != vma->vm_end && offset_in_page(end)) 1461 return -EINVAL; 1484 return -EINVAL; 1462 if (start != vma->vm_start && 1485 if (start != vma->vm_start && end != vma->vm_end) { 1463 ret = split_vma(&vmi, !! 1486 ret = split_vma(mm, vma, start, 1); 1464 if (ret < 0) 1487 if (ret < 0) 1465 return ret; 1488 return ret; 1466 } 1489 } 1467 return vmi_shrink_vma(&vmi, v !! 1490 return shrink_vma(mm, vma, start, end); 1468 } 1491 } 1469 1492 1470 erase_whole_vma: 1493 erase_whole_vma: 1471 if (delete_vma_from_mm(vma)) !! 1494 delete_vma_from_mm(vma); 1472 ret = -ENOMEM; !! 1495 delete_vma(mm, vma); 1473 else !! 1496 return 0; 1474 delete_vma(mm, vma); << 1475 return ret; << 1476 } 1497 } 1477 1498 1478 int vm_munmap(unsigned long addr, size_t len) 1499 int vm_munmap(unsigned long addr, size_t len) 1479 { 1500 { 1480 struct mm_struct *mm = current->mm; 1501 struct mm_struct *mm = current->mm; 1481 int ret; 1502 int ret; 1482 1503 1483 mmap_write_lock(mm); 1504 mmap_write_lock(mm); 1484 ret = do_munmap(mm, addr, len, NULL); 1505 ret = do_munmap(mm, addr, len, NULL); 1485 mmap_write_unlock(mm); 1506 mmap_write_unlock(mm); 1486 return ret; 1507 return ret; 1487 } 1508 } 1488 EXPORT_SYMBOL(vm_munmap); 1509 EXPORT_SYMBOL(vm_munmap); 1489 1510 1490 SYSCALL_DEFINE2(munmap, unsigned long, addr, 1511 SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) 1491 { 1512 { 1492 return vm_munmap(addr, len); 1513 return vm_munmap(addr, len); 1493 } 1514 } 1494 1515 1495 /* 1516 /* 1496 * release all the mappings made in a process 1517 * release all the mappings made in a process's VM space 1497 */ 1518 */ 1498 void exit_mmap(struct mm_struct *mm) 1519 void exit_mmap(struct mm_struct *mm) 1499 { 1520 { 1500 VMA_ITERATOR(vmi, mm, 0); << 1501 struct vm_area_struct *vma; 1521 struct vm_area_struct *vma; 1502 1522 1503 if (!mm) 1523 if (!mm) 1504 return; 1524 return; 1505 1525 1506 mm->total_vm = 0; 1526 mm->total_vm = 0; 1507 1527 1508 /* !! 1528 while ((vma = mm->mmap)) { 1509 * Lock the mm to avoid assert compla !! 1529 mm->mmap = vma->vm_next; 1510 * user of the mm !! 1530 delete_vma_from_mm(vma); 1511 */ << 1512 mmap_write_lock(mm); << 1513 for_each_vma(vmi, vma) { << 1514 cleanup_vma_from_mm(vma); << 1515 delete_vma(mm, vma); 1531 delete_vma(mm, vma); 1516 cond_resched(); 1532 cond_resched(); 1517 } 1533 } 1518 __mt_destroy(&mm->mm_mt); !! 1534 } 1519 mmap_write_unlock(mm); !! 1535 >> 1536 int vm_brk(unsigned long addr, unsigned long len) >> 1537 { >> 1538 return -ENOMEM; 1520 } 1539 } 1521 1540 1522 /* 1541 /* 1523 * expand (or shrink) an existing mapping, po 1542 * expand (or shrink) an existing mapping, potentially moving it at the same 1524 * time (controlled by the MREMAP_MAYMOVE fla 1543 * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 1525 * 1544 * 1526 * under NOMMU conditions, we only permit cha 1545 * under NOMMU conditions, we only permit changing a mapping's size, and only 1527 * as long as it stays within the region allo 1546 * as long as it stays within the region allocated by do_mmap_private() and the 1528 * block is not shareable 1547 * block is not shareable 1529 * 1548 * 1530 * MREMAP_FIXED is not supported under NOMMU 1549 * MREMAP_FIXED is not supported under NOMMU conditions 1531 */ 1550 */ 1532 static unsigned long do_mremap(unsigned long 1551 static unsigned long do_mremap(unsigned long addr, 1533 unsigned long old_len 1552 unsigned long old_len, unsigned long new_len, 1534 unsigned long flags, 1553 unsigned long flags, unsigned long new_addr) 1535 { 1554 { 1536 struct vm_area_struct *vma; 1555 struct vm_area_struct *vma; 1537 1556 1538 /* insanity checks first */ 1557 /* insanity checks first */ 1539 old_len = PAGE_ALIGN(old_len); 1558 old_len = PAGE_ALIGN(old_len); 1540 new_len = PAGE_ALIGN(new_len); 1559 new_len = PAGE_ALIGN(new_len); 1541 if (old_len == 0 || new_len == 0) 1560 if (old_len == 0 || new_len == 0) 1542 return (unsigned long) -EINVA 1561 return (unsigned long) -EINVAL; 1543 1562 1544 if (offset_in_page(addr)) 1563 if (offset_in_page(addr)) 1545 return -EINVAL; 1564 return -EINVAL; 1546 1565 1547 if (flags & MREMAP_FIXED && new_addr 1566 if (flags & MREMAP_FIXED && new_addr != addr) 1548 return (unsigned long) -EINVA 1567 return (unsigned long) -EINVAL; 1549 1568 1550 vma = find_vma_exact(current->mm, add 1569 vma = find_vma_exact(current->mm, addr, old_len); 1551 if (!vma) 1570 if (!vma) 1552 return (unsigned long) -EINVA 1571 return (unsigned long) -EINVAL; 1553 1572 1554 if (vma->vm_end != vma->vm_start + ol 1573 if (vma->vm_end != vma->vm_start + old_len) 1555 return (unsigned long) -EFAUL 1574 return (unsigned long) -EFAULT; 1556 1575 1557 if (is_nommu_shared_mapping(vma->vm_f !! 1576 if (vma->vm_flags & VM_MAYSHARE) 1558 return (unsigned long) -EPERM 1577 return (unsigned long) -EPERM; 1559 1578 1560 if (new_len > vma->vm_region->vm_end 1579 if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start) 1561 return (unsigned long) -ENOME 1580 return (unsigned long) -ENOMEM; 1562 1581 1563 /* all checks complete - do it */ 1582 /* all checks complete - do it */ 1564 vma->vm_end = vma->vm_start + new_len 1583 vma->vm_end = vma->vm_start + new_len; 1565 return vma->vm_start; 1584 return vma->vm_start; 1566 } 1585 } 1567 1586 1568 SYSCALL_DEFINE5(mremap, unsigned long, addr, 1587 SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, 1569 unsigned long, new_len, unsig 1588 unsigned long, new_len, unsigned long, flags, 1570 unsigned long, new_addr) 1589 unsigned long, new_addr) 1571 { 1590 { 1572 unsigned long ret; 1591 unsigned long ret; 1573 1592 1574 mmap_write_lock(current->mm); 1593 mmap_write_lock(current->mm); 1575 ret = do_mremap(addr, old_len, new_le 1594 ret = do_mremap(addr, old_len, new_len, flags, new_addr); 1576 mmap_write_unlock(current->mm); 1595 mmap_write_unlock(current->mm); 1577 return ret; 1596 return ret; 1578 } 1597 } 1579 1598 >> 1599 struct page *follow_page(struct vm_area_struct *vma, unsigned long address, >> 1600 unsigned int foll_flags) >> 1601 { >> 1602 return NULL; >> 1603 } >> 1604 1580 int remap_pfn_range(struct vm_area_struct *vm 1605 int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, 1581 unsigned long pfn, unsigned l 1606 unsigned long pfn, unsigned long size, pgprot_t prot) 1582 { 1607 { 1583 if (addr != (pfn << PAGE_SHIFT)) 1608 if (addr != (pfn << PAGE_SHIFT)) 1584 return -EINVAL; 1609 return -EINVAL; 1585 1610 1586 vm_flags_set(vma, VM_IO | VM_PFNMAP | !! 1611 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; 1587 return 0; 1612 return 0; 1588 } 1613 } 1589 EXPORT_SYMBOL(remap_pfn_range); 1614 EXPORT_SYMBOL(remap_pfn_range); 1590 1615 1591 int vm_iomap_memory(struct vm_area_struct *vm 1616 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) 1592 { 1617 { 1593 unsigned long pfn = start >> PAGE_SHI 1618 unsigned long pfn = start >> PAGE_SHIFT; 1594 unsigned long vm_len = vma->vm_end - 1619 unsigned long vm_len = vma->vm_end - vma->vm_start; 1595 1620 1596 pfn += vma->vm_pgoff; 1621 pfn += vma->vm_pgoff; 1597 return io_remap_pfn_range(vma, vma->v 1622 return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); 1598 } 1623 } 1599 EXPORT_SYMBOL(vm_iomap_memory); 1624 EXPORT_SYMBOL(vm_iomap_memory); 1600 1625 1601 int remap_vmalloc_range(struct vm_area_struct 1626 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 1602 unsigned long pgoff) 1627 unsigned long pgoff) 1603 { 1628 { 1604 unsigned int size = vma->vm_end - vma 1629 unsigned int size = vma->vm_end - vma->vm_start; 1605 1630 1606 if (!(vma->vm_flags & VM_USERMAP)) 1631 if (!(vma->vm_flags & VM_USERMAP)) 1607 return -EINVAL; 1632 return -EINVAL; 1608 1633 1609 vma->vm_start = (unsigned long)(addr 1634 vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); 1610 vma->vm_end = vma->vm_start + size; 1635 vma->vm_end = vma->vm_start + size; 1611 1636 1612 return 0; 1637 return 0; 1613 } 1638 } 1614 EXPORT_SYMBOL(remap_vmalloc_range); 1639 EXPORT_SYMBOL(remap_vmalloc_range); 1615 1640 1616 vm_fault_t filemap_fault(struct vm_fault *vmf 1641 vm_fault_t filemap_fault(struct vm_fault *vmf) 1617 { 1642 { 1618 BUG(); 1643 BUG(); 1619 return 0; 1644 return 0; 1620 } 1645 } 1621 EXPORT_SYMBOL(filemap_fault); 1646 EXPORT_SYMBOL(filemap_fault); 1622 1647 1623 vm_fault_t filemap_map_pages(struct vm_fault 1648 vm_fault_t filemap_map_pages(struct vm_fault *vmf, 1624 pgoff_t start_pgoff, pgoff_t 1649 pgoff_t start_pgoff, pgoff_t end_pgoff) 1625 { 1650 { 1626 BUG(); 1651 BUG(); 1627 return 0; 1652 return 0; 1628 } 1653 } 1629 EXPORT_SYMBOL(filemap_map_pages); 1654 EXPORT_SYMBOL(filemap_map_pages); 1630 1655 1631 static int __access_remote_vm(struct mm_struc !! 1656 int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, 1632 void *buf, int !! 1657 int len, unsigned int gup_flags) 1633 { 1658 { 1634 struct vm_area_struct *vma; 1659 struct vm_area_struct *vma; 1635 int write = gup_flags & FOLL_WRITE; 1660 int write = gup_flags & FOLL_WRITE; 1636 1661 1637 if (mmap_read_lock_killable(mm)) 1662 if (mmap_read_lock_killable(mm)) 1638 return 0; 1663 return 0; 1639 1664 1640 /* the access must start within one o 1665 /* the access must start within one of the target process's mappings */ 1641 vma = find_vma(mm, addr); 1666 vma = find_vma(mm, addr); 1642 if (vma) { 1667 if (vma) { 1643 /* don't overrun this mapping 1668 /* don't overrun this mapping */ 1644 if (addr + len >= vma->vm_end 1669 if (addr + len >= vma->vm_end) 1645 len = vma->vm_end - a 1670 len = vma->vm_end - addr; 1646 1671 1647 /* only read or write mapping 1672 /* only read or write mappings where it is permitted */ 1648 if (write && vma->vm_flags & 1673 if (write && vma->vm_flags & VM_MAYWRITE) 1649 copy_to_user_page(vma 1674 copy_to_user_page(vma, NULL, addr, 1650 (voi 1675 (void *) addr, buf, len); 1651 else if (!write && vma->vm_fl 1676 else if (!write && vma->vm_flags & VM_MAYREAD) 1652 copy_from_user_page(v 1677 copy_from_user_page(vma, NULL, addr, 1653 b 1678 buf, (void *) addr, len); 1654 else 1679 else 1655 len = 0; 1680 len = 0; 1656 } else { 1681 } else { 1657 len = 0; 1682 len = 0; 1658 } 1683 } 1659 1684 1660 mmap_read_unlock(mm); 1685 mmap_read_unlock(mm); 1661 1686 1662 return len; 1687 return len; 1663 } 1688 } 1664 1689 1665 /** 1690 /** 1666 * access_remote_vm - access another process' 1691 * access_remote_vm - access another process' address space 1667 * @mm: the mm_struct of the target a 1692 * @mm: the mm_struct of the target address space 1668 * @addr: start address to access 1693 * @addr: start address to access 1669 * @buf: source or destination buffer 1694 * @buf: source or destination buffer 1670 * @len: number of bytes to transfer 1695 * @len: number of bytes to transfer 1671 * @gup_flags: flags modifying lookup behavi 1696 * @gup_flags: flags modifying lookup behaviour 1672 * 1697 * 1673 * The caller must hold a reference on @mm. 1698 * The caller must hold a reference on @mm. 1674 */ 1699 */ 1675 int access_remote_vm(struct mm_struct *mm, un 1700 int access_remote_vm(struct mm_struct *mm, unsigned long addr, 1676 void *buf, int len, unsigned 1701 void *buf, int len, unsigned int gup_flags) 1677 { 1702 { 1678 return __access_remote_vm(mm, addr, b 1703 return __access_remote_vm(mm, addr, buf, len, gup_flags); 1679 } 1704 } 1680 1705 1681 /* 1706 /* 1682 * Access another process' address space. 1707 * Access another process' address space. 1683 * - source/target buffer must be kernel spac 1708 * - source/target buffer must be kernel space 1684 */ 1709 */ 1685 int access_process_vm(struct task_struct *tsk 1710 int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, 1686 unsigned int gup_flags) 1711 unsigned int gup_flags) 1687 { 1712 { 1688 struct mm_struct *mm; 1713 struct mm_struct *mm; 1689 1714 1690 if (addr + len < addr) 1715 if (addr + len < addr) 1691 return 0; 1716 return 0; 1692 1717 1693 mm = get_task_mm(tsk); 1718 mm = get_task_mm(tsk); 1694 if (!mm) 1719 if (!mm) 1695 return 0; 1720 return 0; 1696 1721 1697 len = __access_remote_vm(mm, addr, bu 1722 len = __access_remote_vm(mm, addr, buf, len, gup_flags); 1698 1723 1699 mmput(mm); 1724 mmput(mm); 1700 return len; 1725 return len; 1701 } 1726 } 1702 EXPORT_SYMBOL_GPL(access_process_vm); 1727 EXPORT_SYMBOL_GPL(access_process_vm); 1703 1728 1704 /** 1729 /** 1705 * nommu_shrink_inode_mappings - Shrink the s 1730 * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode 1706 * @inode: The inode to check 1731 * @inode: The inode to check 1707 * @size: The current filesize of the inode 1732 * @size: The current filesize of the inode 1708 * @newsize: The proposed filesize of the ino 1733 * @newsize: The proposed filesize of the inode 1709 * 1734 * 1710 * Check the shared mappings on an inode on b 1735 * Check the shared mappings on an inode on behalf of a shrinking truncate to 1711 * make sure that any outstanding VMAs aren't 1736 * make sure that any outstanding VMAs aren't broken and then shrink the 1712 * vm_regions that extend beyond so that do_m 1737 * vm_regions that extend beyond so that do_mmap() doesn't 1713 * automatically grant mappings that are too 1738 * automatically grant mappings that are too large. 1714 */ 1739 */ 1715 int nommu_shrink_inode_mappings(struct inode 1740 int nommu_shrink_inode_mappings(struct inode *inode, size_t size, 1716 size_t newsiz 1741 size_t newsize) 1717 { 1742 { 1718 struct vm_area_struct *vma; 1743 struct vm_area_struct *vma; 1719 struct vm_region *region; 1744 struct vm_region *region; 1720 pgoff_t low, high; 1745 pgoff_t low, high; 1721 size_t r_size, r_top; 1746 size_t r_size, r_top; 1722 1747 1723 low = newsize >> PAGE_SHIFT; 1748 low = newsize >> PAGE_SHIFT; 1724 high = (size + PAGE_SIZE - 1) >> PAGE 1749 high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 1725 1750 1726 down_write(&nommu_region_sem); 1751 down_write(&nommu_region_sem); 1727 i_mmap_lock_read(inode->i_mapping); 1752 i_mmap_lock_read(inode->i_mapping); 1728 1753 1729 /* search for VMAs that fall within t 1754 /* search for VMAs that fall within the dead zone */ 1730 vma_interval_tree_foreach(vma, &inode 1755 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) { 1731 /* found one - only intereste 1756 /* found one - only interested if it's shared out of the page 1732 * cache */ 1757 * cache */ 1733 if (vma->vm_flags & VM_SHARED 1758 if (vma->vm_flags & VM_SHARED) { 1734 i_mmap_unlock_read(in 1759 i_mmap_unlock_read(inode->i_mapping); 1735 up_write(&nommu_regio 1760 up_write(&nommu_region_sem); 1736 return -ETXTBSY; /* n 1761 return -ETXTBSY; /* not quite true, but near enough */ 1737 } 1762 } 1738 } 1763 } 1739 1764 1740 /* reduce any regions that overlap th 1765 /* reduce any regions that overlap the dead zone - if in existence, 1741 * these will be pointed to by VMAs t 1766 * these will be pointed to by VMAs that don't overlap the dead zone 1742 * 1767 * 1743 * we don't check for any regions tha 1768 * we don't check for any regions that start beyond the EOF as there 1744 * shouldn't be any 1769 * shouldn't be any 1745 */ 1770 */ 1746 vma_interval_tree_foreach(vma, &inode 1771 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, 0, ULONG_MAX) { 1747 if (!(vma->vm_flags & VM_SHAR 1772 if (!(vma->vm_flags & VM_SHARED)) 1748 continue; 1773 continue; 1749 1774 1750 region = vma->vm_region; 1775 region = vma->vm_region; 1751 r_size = region->vm_top - reg 1776 r_size = region->vm_top - region->vm_start; 1752 r_top = (region->vm_pgoff << 1777 r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size; 1753 1778 1754 if (r_top > newsize) { 1779 if (r_top > newsize) { 1755 region->vm_top -= r_t 1780 region->vm_top -= r_top - newsize; 1756 if (region->vm_end > 1781 if (region->vm_end > region->vm_top) 1757 region->vm_en 1782 region->vm_end = region->vm_top; 1758 } 1783 } 1759 } 1784 } 1760 1785 1761 i_mmap_unlock_read(inode->i_mapping); 1786 i_mmap_unlock_read(inode->i_mapping); 1762 up_write(&nommu_region_sem); 1787 up_write(&nommu_region_sem); 1763 return 0; 1788 return 0; 1764 } 1789 } 1765 1790 1766 /* 1791 /* 1767 * Initialise sysctl_user_reserve_kbytes. 1792 * Initialise sysctl_user_reserve_kbytes. 1768 * 1793 * 1769 * This is intended to prevent a user from st 1794 * This is intended to prevent a user from starting a single memory hogging 1770 * process, such that they cannot recover (ki 1795 * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER 1771 * mode. 1796 * mode. 1772 * 1797 * 1773 * The default value is min(3% of free memory 1798 * The default value is min(3% of free memory, 128MB) 1774 * 128MB is enough to recover with sshd/login 1799 * 128MB is enough to recover with sshd/login, bash, and top/kill. 1775 */ 1800 */ 1776 static int __meminit init_user_reserve(void) 1801 static int __meminit init_user_reserve(void) 1777 { 1802 { 1778 unsigned long free_kbytes; 1803 unsigned long free_kbytes; 1779 1804 1780 free_kbytes = K(global_zone_page_stat !! 1805 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 1781 1806 1782 sysctl_user_reserve_kbytes = min(free 1807 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); 1783 return 0; 1808 return 0; 1784 } 1809 } 1785 subsys_initcall(init_user_reserve); 1810 subsys_initcall(init_user_reserve); 1786 1811 1787 /* 1812 /* 1788 * Initialise sysctl_admin_reserve_kbytes. 1813 * Initialise sysctl_admin_reserve_kbytes. 1789 * 1814 * 1790 * The purpose of sysctl_admin_reserve_kbytes 1815 * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin 1791 * to log in and kill a memory hogging proces 1816 * to log in and kill a memory hogging process. 1792 * 1817 * 1793 * Systems with more than 256MB will reserve 1818 * Systems with more than 256MB will reserve 8MB, enough to recover 1794 * with sshd, bash, and top in OVERCOMMIT_GUE 1819 * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will 1795 * only reserve 3% of free pages by default. 1820 * only reserve 3% of free pages by default. 1796 */ 1821 */ 1797 static int __meminit init_admin_reserve(void) 1822 static int __meminit init_admin_reserve(void) 1798 { 1823 { 1799 unsigned long free_kbytes; 1824 unsigned long free_kbytes; 1800 1825 1801 free_kbytes = K(global_zone_page_stat !! 1826 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 1802 1827 1803 sysctl_admin_reserve_kbytes = min(fre 1828 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); 1804 return 0; 1829 return 0; 1805 } 1830 } 1806 subsys_initcall(init_admin_reserve); 1831 subsys_initcall(init_admin_reserve); 1807 1832
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.