1 // SPDX-License-Identifier: GPL-2.0-only 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 2 /* 3 * linux/mm/nommu.c 3 * linux/mm/nommu.c 4 * 4 * 5 * Replacement code for mm functions to suppo 5 * Replacement code for mm functions to support CPU's that don't 6 * have any form of memory management unit (t 6 * have any form of memory management unit (thus no virtual memory). 7 * 7 * 8 * See Documentation/admin-guide/mm/nommu-mma !! 8 * See Documentation/nommu-mmap.txt 9 * 9 * 10 * Copyright (c) 2004-2008 David Howells <dho 10 * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com> 11 * Copyright (c) 2000-2003 David McCullough < 11 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 12 * Copyright (c) 2000-2001 D Jeff Dionne <jef 12 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 13 * Copyright (c) 2002 Greg Ungerer <gerg 13 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 14 * Copyright (c) 2007-2010 Paul Mundt <lethal 14 * Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org> 15 */ 15 */ 16 16 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 18 19 #include <linux/export.h> 19 #include <linux/export.h> 20 #include <linux/mm.h> 20 #include <linux/mm.h> 21 #include <linux/sched/mm.h> 21 #include <linux/sched/mm.h> >> 22 #include <linux/vmacache.h> 22 #include <linux/mman.h> 23 #include <linux/mman.h> 23 #include <linux/swap.h> 24 #include <linux/swap.h> 24 #include <linux/file.h> 25 #include <linux/file.h> 25 #include <linux/highmem.h> 26 #include <linux/highmem.h> 26 #include <linux/pagemap.h> 27 #include <linux/pagemap.h> 27 #include <linux/slab.h> 28 #include <linux/slab.h> 28 #include <linux/vmalloc.h> 29 #include <linux/vmalloc.h> >> 30 #include <linux/blkdev.h> 29 #include <linux/backing-dev.h> 31 #include <linux/backing-dev.h> 30 #include <linux/compiler.h> 32 #include <linux/compiler.h> 31 #include <linux/mount.h> 33 #include <linux/mount.h> 32 #include <linux/personality.h> 34 #include <linux/personality.h> 33 #include <linux/security.h> 35 #include <linux/security.h> 34 #include <linux/syscalls.h> 36 #include <linux/syscalls.h> 35 #include <linux/audit.h> 37 #include <linux/audit.h> 36 #include <linux/printk.h> 38 #include <linux/printk.h> 37 39 38 #include <linux/uaccess.h> 40 #include <linux/uaccess.h> 39 #include <linux/uio.h> << 40 #include <asm/tlb.h> 41 #include <asm/tlb.h> 41 #include <asm/tlbflush.h> 42 #include <asm/tlbflush.h> 42 #include <asm/mmu_context.h> 43 #include <asm/mmu_context.h> 43 #include "internal.h" 44 #include "internal.h" 44 45 45 void *high_memory; 46 void *high_memory; 46 EXPORT_SYMBOL(high_memory); 47 EXPORT_SYMBOL(high_memory); 47 struct page *mem_map; 48 struct page *mem_map; 48 unsigned long max_mapnr; 49 unsigned long max_mapnr; 49 EXPORT_SYMBOL(max_mapnr); 50 EXPORT_SYMBOL(max_mapnr); 50 unsigned long highest_memmap_pfn; 51 unsigned long highest_memmap_pfn; 51 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIA 52 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; 52 int heap_stack_gap = 0; 53 int heap_stack_gap = 0; 53 54 54 atomic_long_t mmap_pages_allocated; 55 atomic_long_t mmap_pages_allocated; 55 56 56 EXPORT_SYMBOL(mem_map); 57 EXPORT_SYMBOL(mem_map); 57 58 58 /* list of mapped, potentially shareable regio 59 /* list of mapped, potentially shareable regions */ 59 static struct kmem_cache *vm_region_jar; 60 static struct kmem_cache *vm_region_jar; 60 struct rb_root nommu_region_tree = RB_ROOT; 61 struct rb_root nommu_region_tree = RB_ROOT; 61 DECLARE_RWSEM(nommu_region_sem); 62 DECLARE_RWSEM(nommu_region_sem); 62 63 63 const struct vm_operations_struct generic_file 64 const struct vm_operations_struct generic_file_vm_ops = { 64 }; 65 }; 65 66 66 /* 67 /* 67 * Return the total memory allocated for this 68 * Return the total memory allocated for this pointer, not 68 * just what the caller asked for. 69 * just what the caller asked for. 69 * 70 * 70 * Doesn't have to be accurate, i.e. may have 71 * Doesn't have to be accurate, i.e. may have races. 71 */ 72 */ 72 unsigned int kobjsize(const void *objp) 73 unsigned int kobjsize(const void *objp) 73 { 74 { 74 struct page *page; 75 struct page *page; 75 76 76 /* 77 /* 77 * If the object we have should not ha 78 * If the object we have should not have ksize performed on it, 78 * return size of 0 79 * return size of 0 79 */ 80 */ 80 if (!objp || !virt_addr_valid(objp)) 81 if (!objp || !virt_addr_valid(objp)) 81 return 0; 82 return 0; 82 83 83 page = virt_to_head_page(objp); 84 page = virt_to_head_page(objp); 84 85 85 /* 86 /* 86 * If the allocator sets PageSlab, we 87 * If the allocator sets PageSlab, we know the pointer came from 87 * kmalloc(). 88 * kmalloc(). 88 */ 89 */ 89 if (PageSlab(page)) 90 if (PageSlab(page)) 90 return ksize(objp); 91 return ksize(objp); 91 92 92 /* 93 /* 93 * If it's not a compound page, see if 94 * If it's not a compound page, see if we have a matching VMA 94 * region. This test is intentionally 95 * region. This test is intentionally done in reverse order, 95 * so if there's no VMA, we still fall 96 * so if there's no VMA, we still fall through and hand back 96 * PAGE_SIZE for 0-order pages. 97 * PAGE_SIZE for 0-order pages. 97 */ 98 */ 98 if (!PageCompound(page)) { 99 if (!PageCompound(page)) { 99 struct vm_area_struct *vma; 100 struct vm_area_struct *vma; 100 101 101 vma = find_vma(current->mm, (u 102 vma = find_vma(current->mm, (unsigned long)objp); 102 if (vma) 103 if (vma) 103 return vma->vm_end - v 104 return vma->vm_end - vma->vm_start; 104 } 105 } 105 106 106 /* 107 /* 107 * The ksize() function is only guaran 108 * The ksize() function is only guaranteed to work for pointers 108 * returned by kmalloc(). So handle ar 109 * returned by kmalloc(). So handle arbitrary pointers here. 109 */ 110 */ 110 return page_size(page); !! 111 return PAGE_SIZE << compound_order(page); 111 } 112 } 112 113 >> 114 static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, >> 115 unsigned long start, unsigned long nr_pages, >> 116 unsigned int foll_flags, struct page **pages, >> 117 struct vm_area_struct **vmas, int *nonblocking) >> 118 { >> 119 struct vm_area_struct *vma; >> 120 unsigned long vm_flags; >> 121 int i; >> 122 >> 123 /* calculate required read or write permissions. >> 124 * If FOLL_FORCE is set, we only require the "MAY" flags. >> 125 */ >> 126 vm_flags = (foll_flags & FOLL_WRITE) ? >> 127 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); >> 128 vm_flags &= (foll_flags & FOLL_FORCE) ? >> 129 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); >> 130 >> 131 for (i = 0; i < nr_pages; i++) { >> 132 vma = find_vma(mm, start); >> 133 if (!vma) >> 134 goto finish_or_fault; >> 135 >> 136 /* protect what we can, including chardevs */ >> 137 if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) || >> 138 !(vm_flags & vma->vm_flags)) >> 139 goto finish_or_fault; >> 140 >> 141 if (pages) { >> 142 pages[i] = virt_to_page(start); >> 143 if (pages[i]) >> 144 get_page(pages[i]); >> 145 } >> 146 if (vmas) >> 147 vmas[i] = vma; >> 148 start = (start + PAGE_SIZE) & PAGE_MASK; >> 149 } >> 150 >> 151 return i; >> 152 >> 153 finish_or_fault: >> 154 return i ? : -EFAULT; >> 155 } >> 156 >> 157 /* >> 158 * get a list of pages in an address range belonging to the specified process >> 159 * and indicate the VMA that covers each page >> 160 * - this is potentially dodgy as we may end incrementing the page count of a >> 161 * slab page or a secondary page from a compound page >> 162 * - don't permit access to VMAs that don't support it, such as I/O mappings >> 163 */ >> 164 long get_user_pages(unsigned long start, unsigned long nr_pages, >> 165 unsigned int gup_flags, struct page **pages, >> 166 struct vm_area_struct **vmas) >> 167 { >> 168 return __get_user_pages(current, current->mm, start, nr_pages, >> 169 gup_flags, pages, vmas, NULL); >> 170 } >> 171 EXPORT_SYMBOL(get_user_pages); >> 172 >> 173 long get_user_pages_locked(unsigned long start, unsigned long nr_pages, >> 174 unsigned int gup_flags, struct page **pages, >> 175 int *locked) >> 176 { >> 177 return get_user_pages(start, nr_pages, gup_flags, pages, NULL); >> 178 } >> 179 EXPORT_SYMBOL(get_user_pages_locked); >> 180 >> 181 static long __get_user_pages_unlocked(struct task_struct *tsk, >> 182 struct mm_struct *mm, unsigned long start, >> 183 unsigned long nr_pages, struct page **pages, >> 184 unsigned int gup_flags) >> 185 { >> 186 long ret; >> 187 down_read(&mm->mmap_sem); >> 188 ret = __get_user_pages(tsk, mm, start, nr_pages, gup_flags, pages, >> 189 NULL, NULL); >> 190 up_read(&mm->mmap_sem); >> 191 return ret; >> 192 } >> 193 >> 194 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, >> 195 struct page **pages, unsigned int gup_flags) >> 196 { >> 197 return __get_user_pages_unlocked(current, current->mm, start, nr_pages, >> 198 pages, gup_flags); >> 199 } >> 200 EXPORT_SYMBOL(get_user_pages_unlocked); >> 201 >> 202 /** >> 203 * follow_pfn - look up PFN at a user virtual address >> 204 * @vma: memory mapping >> 205 * @address: user virtual address >> 206 * @pfn: location to store found PFN >> 207 * >> 208 * Only IO mappings and raw PFN mappings are allowed. >> 209 * >> 210 * Returns zero and the pfn at @pfn on success, -ve otherwise. >> 211 */ >> 212 int follow_pfn(struct vm_area_struct *vma, unsigned long address, >> 213 unsigned long *pfn) >> 214 { >> 215 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) >> 216 return -EINVAL; >> 217 >> 218 *pfn = address >> PAGE_SHIFT; >> 219 return 0; >> 220 } >> 221 EXPORT_SYMBOL(follow_pfn); >> 222 >> 223 LIST_HEAD(vmap_area_list); >> 224 113 void vfree(const void *addr) 225 void vfree(const void *addr) 114 { 226 { 115 kfree(addr); 227 kfree(addr); 116 } 228 } 117 EXPORT_SYMBOL(vfree); 229 EXPORT_SYMBOL(vfree); 118 230 119 void *__vmalloc_noprof(unsigned long size, gfp !! 231 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) 120 { 232 { 121 /* 233 /* 122 * You can't specify __GFP_HIGHMEM wi 234 * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() 123 * returns only a logical address. 235 * returns only a logical address. 124 */ 236 */ 125 return kmalloc_noprof(size, (gfp_mask !! 237 return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); 126 } << 127 EXPORT_SYMBOL(__vmalloc_noprof); << 128 << 129 void *vrealloc_noprof(const void *p, size_t si << 130 { << 131 return krealloc_noprof(p, size, (flags << 132 } 238 } >> 239 EXPORT_SYMBOL(__vmalloc); 133 240 134 void *__vmalloc_node_range_noprof(unsigned lon !! 241 void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags) 135 unsigned long start, unsigned << 136 pgprot_t prot, unsigned long v << 137 const void *caller) << 138 { 242 { 139 return __vmalloc_noprof(size, gfp_mask !! 243 return __vmalloc(size, flags, PAGE_KERNEL); 140 } 244 } 141 245 142 void *__vmalloc_node_noprof(unsigned long size !! 246 void *vmalloc_user(unsigned long size) 143 int node, const void *caller) << 144 { << 145 return __vmalloc_noprof(size, gfp_mask << 146 } << 147 << 148 static void *__vmalloc_user_flags(unsigned lon << 149 { 247 { 150 void *ret; 248 void *ret; 151 249 152 ret = __vmalloc(size, flags); !! 250 ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 153 if (ret) { 251 if (ret) { 154 struct vm_area_struct *vma; 252 struct vm_area_struct *vma; 155 253 156 mmap_write_lock(current->mm); !! 254 down_write(¤t->mm->mmap_sem); 157 vma = find_vma(current->mm, (u 255 vma = find_vma(current->mm, (unsigned long)ret); 158 if (vma) 256 if (vma) 159 vm_flags_set(vma, VM_U !! 257 vma->vm_flags |= VM_USERMAP; 160 mmap_write_unlock(current->mm) !! 258 up_write(¤t->mm->mmap_sem); 161 } 259 } 162 260 163 return ret; 261 return ret; 164 } 262 } 165 !! 263 EXPORT_SYMBOL(vmalloc_user); 166 void *vmalloc_user_noprof(unsigned long size) << 167 { << 168 return __vmalloc_user_flags(size, GFP_ << 169 } << 170 EXPORT_SYMBOL(vmalloc_user_noprof); << 171 264 172 struct page *vmalloc_to_page(const void *addr) 265 struct page *vmalloc_to_page(const void *addr) 173 { 266 { 174 return virt_to_page(addr); 267 return virt_to_page(addr); 175 } 268 } 176 EXPORT_SYMBOL(vmalloc_to_page); 269 EXPORT_SYMBOL(vmalloc_to_page); 177 270 178 unsigned long vmalloc_to_pfn(const void *addr) 271 unsigned long vmalloc_to_pfn(const void *addr) 179 { 272 { 180 return page_to_pfn(virt_to_page(addr)) 273 return page_to_pfn(virt_to_page(addr)); 181 } 274 } 182 EXPORT_SYMBOL(vmalloc_to_pfn); 275 EXPORT_SYMBOL(vmalloc_to_pfn); 183 276 184 long vread_iter(struct iov_iter *iter, const c !! 277 long vread(char *buf, char *addr, unsigned long count) >> 278 { >> 279 /* Don't allow overflow */ >> 280 if ((unsigned long) buf + count < count) >> 281 count = -(unsigned long) buf; >> 282 >> 283 memcpy(buf, addr, count); >> 284 return count; >> 285 } >> 286 >> 287 long vwrite(char *buf, char *addr, unsigned long count) 185 { 288 { 186 /* Don't allow overflow */ 289 /* Don't allow overflow */ 187 if ((unsigned long) addr + count < cou 290 if ((unsigned long) addr + count < count) 188 count = -(unsigned long) addr; 291 count = -(unsigned long) addr; 189 292 190 return copy_to_iter(addr, count, iter) !! 293 memcpy(addr, buf, count); >> 294 return count; 191 } 295 } 192 296 193 /* 297 /* 194 * vmalloc - allocate virtually contigu 298 * vmalloc - allocate virtually contiguous memory 195 * 299 * 196 * @size: allocation size 300 * @size: allocation size 197 * 301 * 198 * Allocate enough pages to cover @size f 302 * Allocate enough pages to cover @size from the page level 199 * allocator and map them into contiguous 303 * allocator and map them into contiguous kernel virtual space. 200 * 304 * 201 * For tight control over page level allo 305 * For tight control over page level allocator and protection flags 202 * use __vmalloc() instead. 306 * use __vmalloc() instead. 203 */ 307 */ 204 void *vmalloc_noprof(unsigned long size) !! 308 void *vmalloc(unsigned long size) 205 { 309 { 206 return __vmalloc_noprof(size, GFP_KERN !! 310 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); 207 } 311 } 208 EXPORT_SYMBOL(vmalloc_noprof); !! 312 EXPORT_SYMBOL(vmalloc); 209 << 210 void *vmalloc_huge_noprof(unsigned long size, << 211 313 212 /* 314 /* 213 * vzalloc - allocate virtually contiguou 315 * vzalloc - allocate virtually contiguous memory with zero fill 214 * 316 * 215 * @size: allocation size 317 * @size: allocation size 216 * 318 * 217 * Allocate enough pages to cover @size f 319 * Allocate enough pages to cover @size from the page level 218 * allocator and map them into contiguous 320 * allocator and map them into contiguous kernel virtual space. 219 * The memory allocated is set to zero. 321 * The memory allocated is set to zero. 220 * 322 * 221 * For tight control over page level allo 323 * For tight control over page level allocator and protection flags 222 * use __vmalloc() instead. 324 * use __vmalloc() instead. 223 */ 325 */ 224 void *vzalloc_noprof(unsigned long size) !! 326 void *vzalloc(unsigned long size) 225 { 327 { 226 return __vmalloc_noprof(size, GFP_KERN !! 328 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, >> 329 PAGE_KERNEL); 227 } 330 } 228 EXPORT_SYMBOL(vzalloc_noprof); !! 331 EXPORT_SYMBOL(vzalloc); 229 332 230 /** 333 /** 231 * vmalloc_node - allocate memory on a specifi 334 * vmalloc_node - allocate memory on a specific node 232 * @size: allocation size 335 * @size: allocation size 233 * @node: numa node 336 * @node: numa node 234 * 337 * 235 * Allocate enough pages to cover @size from t 338 * Allocate enough pages to cover @size from the page level 236 * allocator and map them into contiguous kern 339 * allocator and map them into contiguous kernel virtual space. 237 * 340 * 238 * For tight control over page level allocator 341 * For tight control over page level allocator and protection flags 239 * use __vmalloc() instead. 342 * use __vmalloc() instead. 240 */ 343 */ 241 void *vmalloc_node_noprof(unsigned long size, !! 344 void *vmalloc_node(unsigned long size, int node) 242 { 345 { 243 return vmalloc_noprof(size); !! 346 return vmalloc(size); 244 } 347 } 245 EXPORT_SYMBOL(vmalloc_node_noprof); !! 348 EXPORT_SYMBOL(vmalloc_node); 246 349 247 /** 350 /** 248 * vzalloc_node - allocate memory on a specifi 351 * vzalloc_node - allocate memory on a specific node with zero fill 249 * @size: allocation size 352 * @size: allocation size 250 * @node: numa node 353 * @node: numa node 251 * 354 * 252 * Allocate enough pages to cover @size from t 355 * Allocate enough pages to cover @size from the page level 253 * allocator and map them into contiguous kern 356 * allocator and map them into contiguous kernel virtual space. 254 * The memory allocated is set to zero. 357 * The memory allocated is set to zero. 255 * 358 * 256 * For tight control over page level allocator 359 * For tight control over page level allocator and protection flags 257 * use __vmalloc() instead. 360 * use __vmalloc() instead. 258 */ 361 */ 259 void *vzalloc_node_noprof(unsigned long size, !! 362 void *vzalloc_node(unsigned long size, int node) 260 { 363 { 261 return vzalloc_noprof(size); !! 364 return vzalloc(size); >> 365 } >> 366 EXPORT_SYMBOL(vzalloc_node); >> 367 >> 368 /** >> 369 * vmalloc_exec - allocate virtually contiguous, executable memory >> 370 * @size: allocation size >> 371 * >> 372 * Kernel-internal function to allocate enough pages to cover @size >> 373 * the page level allocator and map them into contiguous and >> 374 * executable kernel virtual space. >> 375 * >> 376 * For tight control over page level allocator and protection flags >> 377 * use __vmalloc() instead. >> 378 */ >> 379 >> 380 void *vmalloc_exec(unsigned long size) >> 381 { >> 382 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); 262 } 383 } 263 EXPORT_SYMBOL(vzalloc_node_noprof); << 264 384 265 /** 385 /** 266 * vmalloc_32 - allocate virtually contiguou 386 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 267 * @size: allocation size 387 * @size: allocation size 268 * 388 * 269 * Allocate enough 32bit PA addressable p 389 * Allocate enough 32bit PA addressable pages to cover @size from the 270 * page level allocator and map them into 390 * page level allocator and map them into contiguous kernel virtual space. 271 */ 391 */ 272 void *vmalloc_32_noprof(unsigned long size) !! 392 void *vmalloc_32(unsigned long size) 273 { 393 { 274 return __vmalloc_noprof(size, GFP_KERN !! 394 return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); 275 } 395 } 276 EXPORT_SYMBOL(vmalloc_32_noprof); !! 396 EXPORT_SYMBOL(vmalloc_32); 277 397 278 /** 398 /** 279 * vmalloc_32_user - allocate zeroed virtually 399 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory 280 * @size: allocation size 400 * @size: allocation size 281 * 401 * 282 * The resulting memory area is 32bit addressa 402 * The resulting memory area is 32bit addressable and zeroed so it can be 283 * mapped to userspace without leaking data. 403 * mapped to userspace without leaking data. 284 * 404 * 285 * VM_USERMAP is set on the corresponding VMA 405 * VM_USERMAP is set on the corresponding VMA so that subsequent calls to 286 * remap_vmalloc_range() are permissible. 406 * remap_vmalloc_range() are permissible. 287 */ 407 */ 288 void *vmalloc_32_user_noprof(unsigned long siz !! 408 void *vmalloc_32_user(unsigned long size) 289 { 409 { 290 /* 410 /* 291 * We'll have to sort out the ZONE_DMA 411 * We'll have to sort out the ZONE_DMA bits for 64-bit, 292 * but for now this can simply use vma 412 * but for now this can simply use vmalloc_user() directly. 293 */ 413 */ 294 return vmalloc_user_noprof(size); !! 414 return vmalloc_user(size); 295 } 415 } 296 EXPORT_SYMBOL(vmalloc_32_user_noprof); !! 416 EXPORT_SYMBOL(vmalloc_32_user); 297 417 298 void *vmap(struct page **pages, unsigned int c 418 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) 299 { 419 { 300 BUG(); 420 BUG(); 301 return NULL; 421 return NULL; 302 } 422 } 303 EXPORT_SYMBOL(vmap); 423 EXPORT_SYMBOL(vmap); 304 424 305 void vunmap(const void *addr) 425 void vunmap(const void *addr) 306 { 426 { 307 BUG(); 427 BUG(); 308 } 428 } 309 EXPORT_SYMBOL(vunmap); 429 EXPORT_SYMBOL(vunmap); 310 430 311 void *vm_map_ram(struct page **pages, unsigned !! 431 void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) 312 { 432 { 313 BUG(); 433 BUG(); 314 return NULL; 434 return NULL; 315 } 435 } 316 EXPORT_SYMBOL(vm_map_ram); 436 EXPORT_SYMBOL(vm_map_ram); 317 437 318 void vm_unmap_ram(const void *mem, unsigned in 438 void vm_unmap_ram(const void *mem, unsigned int count) 319 { 439 { 320 BUG(); 440 BUG(); 321 } 441 } 322 EXPORT_SYMBOL(vm_unmap_ram); 442 EXPORT_SYMBOL(vm_unmap_ram); 323 443 324 void vm_unmap_aliases(void) 444 void vm_unmap_aliases(void) 325 { 445 { 326 } 446 } 327 EXPORT_SYMBOL_GPL(vm_unmap_aliases); 447 EXPORT_SYMBOL_GPL(vm_unmap_aliases); 328 448 >> 449 /* >> 450 * Implement a stub for vmalloc_sync_all() if the architecture chose not to >> 451 * have one. >> 452 */ >> 453 void __weak vmalloc_sync_all(void) >> 454 { >> 455 } >> 456 >> 457 struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) >> 458 { >> 459 BUG(); >> 460 return NULL; >> 461 } >> 462 EXPORT_SYMBOL_GPL(alloc_vm_area); >> 463 329 void free_vm_area(struct vm_struct *area) 464 void free_vm_area(struct vm_struct *area) 330 { 465 { 331 BUG(); 466 BUG(); 332 } 467 } 333 EXPORT_SYMBOL_GPL(free_vm_area); 468 EXPORT_SYMBOL_GPL(free_vm_area); 334 469 335 int vm_insert_page(struct vm_area_struct *vma, 470 int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 336 struct page *page) 471 struct page *page) 337 { 472 { 338 return -EINVAL; 473 return -EINVAL; 339 } 474 } 340 EXPORT_SYMBOL(vm_insert_page); 475 EXPORT_SYMBOL(vm_insert_page); 341 476 342 int vm_insert_pages(struct vm_area_struct *vma << 343 struct page **pages, u << 344 { << 345 return -EINVAL; << 346 } << 347 EXPORT_SYMBOL(vm_insert_pages); << 348 << 349 int vm_map_pages(struct vm_area_struct *vma, s 477 int vm_map_pages(struct vm_area_struct *vma, struct page **pages, 350 unsigned long num) 478 unsigned long num) 351 { 479 { 352 return -EINVAL; 480 return -EINVAL; 353 } 481 } 354 EXPORT_SYMBOL(vm_map_pages); 482 EXPORT_SYMBOL(vm_map_pages); 355 483 356 int vm_map_pages_zero(struct vm_area_struct *v 484 int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, 357 unsigned long 485 unsigned long num) 358 { 486 { 359 return -EINVAL; 487 return -EINVAL; 360 } 488 } 361 EXPORT_SYMBOL(vm_map_pages_zero); 489 EXPORT_SYMBOL(vm_map_pages_zero); 362 490 363 /* 491 /* 364 * sys_brk() for the most part doesn't need t 492 * sys_brk() for the most part doesn't need the global kernel 365 * lock, except when an application is doing 493 * lock, except when an application is doing something nasty 366 * like trying to un-brk an area that has alr 494 * like trying to un-brk an area that has already been mapped 367 * to a regular file. in this case, the unma 495 * to a regular file. in this case, the unmapping will need 368 * to invoke file system routines that need t 496 * to invoke file system routines that need the global lock. 369 */ 497 */ 370 SYSCALL_DEFINE1(brk, unsigned long, brk) 498 SYSCALL_DEFINE1(brk, unsigned long, brk) 371 { 499 { 372 struct mm_struct *mm = current->mm; 500 struct mm_struct *mm = current->mm; 373 501 374 if (brk < mm->start_brk || brk > mm->c 502 if (brk < mm->start_brk || brk > mm->context.end_brk) 375 return mm->brk; 503 return mm->brk; 376 504 377 if (mm->brk == brk) 505 if (mm->brk == brk) 378 return mm->brk; 506 return mm->brk; 379 507 380 /* 508 /* 381 * Always allow shrinking brk 509 * Always allow shrinking brk 382 */ 510 */ 383 if (brk <= mm->brk) { 511 if (brk <= mm->brk) { 384 mm->brk = brk; 512 mm->brk = brk; 385 return brk; 513 return brk; 386 } 514 } 387 515 388 /* 516 /* 389 * Ok, looks good - let it rip. 517 * Ok, looks good - let it rip. 390 */ 518 */ 391 flush_icache_user_range(mm->brk, brk); !! 519 flush_icache_range(mm->brk, brk); 392 return mm->brk = brk; 520 return mm->brk = brk; 393 } 521 } 394 522 395 /* 523 /* 396 * initialise the percpu counter for VM and re 524 * initialise the percpu counter for VM and region record slabs 397 */ 525 */ 398 void __init mmap_init(void) 526 void __init mmap_init(void) 399 { 527 { 400 int ret; 528 int ret; 401 529 402 ret = percpu_counter_init(&vm_committe 530 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); 403 VM_BUG_ON(ret); 531 VM_BUG_ON(ret); 404 vm_region_jar = KMEM_CACHE(vm_region, 532 vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT); 405 } 533 } 406 534 407 /* 535 /* 408 * validate the region tree 536 * validate the region tree 409 * - the caller must hold the region lock 537 * - the caller must hold the region lock 410 */ 538 */ 411 #ifdef CONFIG_DEBUG_NOMMU_REGIONS 539 #ifdef CONFIG_DEBUG_NOMMU_REGIONS 412 static noinline void validate_nommu_regions(vo 540 static noinline void validate_nommu_regions(void) 413 { 541 { 414 struct vm_region *region, *last; 542 struct vm_region *region, *last; 415 struct rb_node *p, *lastp; 543 struct rb_node *p, *lastp; 416 544 417 lastp = rb_first(&nommu_region_tree); 545 lastp = rb_first(&nommu_region_tree); 418 if (!lastp) 546 if (!lastp) 419 return; 547 return; 420 548 421 last = rb_entry(lastp, struct vm_regio 549 last = rb_entry(lastp, struct vm_region, vm_rb); 422 BUG_ON(last->vm_end <= last->vm_start) 550 BUG_ON(last->vm_end <= last->vm_start); 423 BUG_ON(last->vm_top < last->vm_end); 551 BUG_ON(last->vm_top < last->vm_end); 424 552 425 while ((p = rb_next(lastp))) { 553 while ((p = rb_next(lastp))) { 426 region = rb_entry(p, struct vm 554 region = rb_entry(p, struct vm_region, vm_rb); 427 last = rb_entry(lastp, struct 555 last = rb_entry(lastp, struct vm_region, vm_rb); 428 556 429 BUG_ON(region->vm_end <= regio 557 BUG_ON(region->vm_end <= region->vm_start); 430 BUG_ON(region->vm_top < region 558 BUG_ON(region->vm_top < region->vm_end); 431 BUG_ON(region->vm_start < last 559 BUG_ON(region->vm_start < last->vm_top); 432 560 433 lastp = p; 561 lastp = p; 434 } 562 } 435 } 563 } 436 #else 564 #else 437 static void validate_nommu_regions(void) 565 static void validate_nommu_regions(void) 438 { 566 { 439 } 567 } 440 #endif 568 #endif 441 569 442 /* 570 /* 443 * add a region into the global tree 571 * add a region into the global tree 444 */ 572 */ 445 static void add_nommu_region(struct vm_region 573 static void add_nommu_region(struct vm_region *region) 446 { 574 { 447 struct vm_region *pregion; 575 struct vm_region *pregion; 448 struct rb_node **p, *parent; 576 struct rb_node **p, *parent; 449 577 450 validate_nommu_regions(); 578 validate_nommu_regions(); 451 579 452 parent = NULL; 580 parent = NULL; 453 p = &nommu_region_tree.rb_node; 581 p = &nommu_region_tree.rb_node; 454 while (*p) { 582 while (*p) { 455 parent = *p; 583 parent = *p; 456 pregion = rb_entry(parent, str 584 pregion = rb_entry(parent, struct vm_region, vm_rb); 457 if (region->vm_start < pregion 585 if (region->vm_start < pregion->vm_start) 458 p = &(*p)->rb_left; 586 p = &(*p)->rb_left; 459 else if (region->vm_start > pr 587 else if (region->vm_start > pregion->vm_start) 460 p = &(*p)->rb_right; 588 p = &(*p)->rb_right; 461 else if (pregion == region) 589 else if (pregion == region) 462 return; 590 return; 463 else 591 else 464 BUG(); 592 BUG(); 465 } 593 } 466 594 467 rb_link_node(®ion->vm_rb, parent, p 595 rb_link_node(®ion->vm_rb, parent, p); 468 rb_insert_color(®ion->vm_rb, &nommu 596 rb_insert_color(®ion->vm_rb, &nommu_region_tree); 469 597 470 validate_nommu_regions(); 598 validate_nommu_regions(); 471 } 599 } 472 600 473 /* 601 /* 474 * delete a region from the global tree 602 * delete a region from the global tree 475 */ 603 */ 476 static void delete_nommu_region(struct vm_regi 604 static void delete_nommu_region(struct vm_region *region) 477 { 605 { 478 BUG_ON(!nommu_region_tree.rb_node); 606 BUG_ON(!nommu_region_tree.rb_node); 479 607 480 validate_nommu_regions(); 608 validate_nommu_regions(); 481 rb_erase(®ion->vm_rb, &nommu_region 609 rb_erase(®ion->vm_rb, &nommu_region_tree); 482 validate_nommu_regions(); 610 validate_nommu_regions(); 483 } 611 } 484 612 485 /* 613 /* 486 * free a contiguous series of pages 614 * free a contiguous series of pages 487 */ 615 */ 488 static void free_page_series(unsigned long fro 616 static void free_page_series(unsigned long from, unsigned long to) 489 { 617 { 490 for (; from < to; from += PAGE_SIZE) { 618 for (; from < to; from += PAGE_SIZE) { 491 struct page *page = virt_to_pa !! 619 struct page *page = virt_to_page(from); 492 620 493 atomic_long_dec(&mmap_pages_al 621 atomic_long_dec(&mmap_pages_allocated); 494 put_page(page); 622 put_page(page); 495 } 623 } 496 } 624 } 497 625 498 /* 626 /* 499 * release a reference to a region 627 * release a reference to a region 500 * - the caller must hold the region semaphore 628 * - the caller must hold the region semaphore for writing, which this releases 501 * - the region may not have been added to the 629 * - the region may not have been added to the tree yet, in which case vm_top 502 * will equal vm_start 630 * will equal vm_start 503 */ 631 */ 504 static void __put_nommu_region(struct vm_regio 632 static void __put_nommu_region(struct vm_region *region) 505 __releases(nommu_region_sem) 633 __releases(nommu_region_sem) 506 { 634 { 507 BUG_ON(!nommu_region_tree.rb_node); 635 BUG_ON(!nommu_region_tree.rb_node); 508 636 509 if (--region->vm_usage == 0) { 637 if (--region->vm_usage == 0) { 510 if (region->vm_top > region->v 638 if (region->vm_top > region->vm_start) 511 delete_nommu_region(re 639 delete_nommu_region(region); 512 up_write(&nommu_region_sem); 640 up_write(&nommu_region_sem); 513 641 514 if (region->vm_file) 642 if (region->vm_file) 515 fput(region->vm_file); 643 fput(region->vm_file); 516 644 517 /* IO memory and memory shared 645 /* IO memory and memory shared directly out of the pagecache 518 * from ramfs/tmpfs mustn't be 646 * from ramfs/tmpfs mustn't be released here */ 519 if (region->vm_flags & VM_MAPP 647 if (region->vm_flags & VM_MAPPED_COPY) 520 free_page_series(regio 648 free_page_series(region->vm_start, region->vm_top); 521 kmem_cache_free(vm_region_jar, 649 kmem_cache_free(vm_region_jar, region); 522 } else { 650 } else { 523 up_write(&nommu_region_sem); 651 up_write(&nommu_region_sem); 524 } 652 } 525 } 653 } 526 654 527 /* 655 /* 528 * release a reference to a region 656 * release a reference to a region 529 */ 657 */ 530 static void put_nommu_region(struct vm_region 658 static void put_nommu_region(struct vm_region *region) 531 { 659 { 532 down_write(&nommu_region_sem); 660 down_write(&nommu_region_sem); 533 __put_nommu_region(region); 661 __put_nommu_region(region); 534 } 662 } 535 663 536 static void setup_vma_to_mm(struct vm_area_str !! 664 /* 537 { !! 665 * add a VMA into a process's mm_struct in the appropriate place in the list >> 666 * and tree and add to the address space's page tree also if not an anonymous >> 667 * page >> 668 * - should be called with mm->mmap_sem held writelocked >> 669 */ >> 670 static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) >> 671 { >> 672 struct vm_area_struct *pvma, *prev; >> 673 struct address_space *mapping; >> 674 struct rb_node **p, *parent, *rb_prev; >> 675 >> 676 BUG_ON(!vma->vm_region); >> 677 >> 678 mm->map_count++; 538 vma->vm_mm = mm; 679 vma->vm_mm = mm; 539 680 540 /* add the VMA to the mapping */ 681 /* add the VMA to the mapping */ 541 if (vma->vm_file) { 682 if (vma->vm_file) { 542 struct address_space *mapping !! 683 mapping = vma->vm_file->f_mapping; 543 684 544 i_mmap_lock_write(mapping); 685 i_mmap_lock_write(mapping); 545 flush_dcache_mmap_lock(mapping 686 flush_dcache_mmap_lock(mapping); 546 vma_interval_tree_insert(vma, 687 vma_interval_tree_insert(vma, &mapping->i_mmap); 547 flush_dcache_mmap_unlock(mappi 688 flush_dcache_mmap_unlock(mapping); 548 i_mmap_unlock_write(mapping); 689 i_mmap_unlock_write(mapping); 549 } 690 } >> 691 >> 692 /* add the VMA to the tree */ >> 693 parent = rb_prev = NULL; >> 694 p = &mm->mm_rb.rb_node; >> 695 while (*p) { >> 696 parent = *p; >> 697 pvma = rb_entry(parent, struct vm_area_struct, vm_rb); >> 698 >> 699 /* sort by: start addr, end addr, VMA struct addr in that order >> 700 * (the latter is necessary as we may get identical VMAs) */ >> 701 if (vma->vm_start < pvma->vm_start) >> 702 p = &(*p)->rb_left; >> 703 else if (vma->vm_start > pvma->vm_start) { >> 704 rb_prev = parent; >> 705 p = &(*p)->rb_right; >> 706 } else if (vma->vm_end < pvma->vm_end) >> 707 p = &(*p)->rb_left; >> 708 else if (vma->vm_end > pvma->vm_end) { >> 709 rb_prev = parent; >> 710 p = &(*p)->rb_right; >> 711 } else if (vma < pvma) >> 712 p = &(*p)->rb_left; >> 713 else if (vma > pvma) { >> 714 rb_prev = parent; >> 715 p = &(*p)->rb_right; >> 716 } else >> 717 BUG(); >> 718 } >> 719 >> 720 rb_link_node(&vma->vm_rb, parent, p); >> 721 rb_insert_color(&vma->vm_rb, &mm->mm_rb); >> 722 >> 723 /* add VMA to the VMA list also */ >> 724 prev = NULL; >> 725 if (rb_prev) >> 726 prev = rb_entry(rb_prev, struct vm_area_struct, vm_rb); >> 727 >> 728 __vma_link_list(mm, vma, prev, parent); 550 } 729 } 551 730 552 static void cleanup_vma_from_mm(struct vm_area !! 731 /* >> 732 * delete a VMA from its owning mm_struct and address space >> 733 */ >> 734 static void delete_vma_from_mm(struct vm_area_struct *vma) 553 { 735 { 554 vma->vm_mm->map_count--; !! 736 int i; >> 737 struct address_space *mapping; >> 738 struct mm_struct *mm = vma->vm_mm; >> 739 struct task_struct *curr = current; >> 740 >> 741 mm->map_count--; >> 742 for (i = 0; i < VMACACHE_SIZE; i++) { >> 743 /* if the vma is cached, invalidate the entire cache */ >> 744 if (curr->vmacache.vmas[i] == vma) { >> 745 vmacache_invalidate(mm); >> 746 break; >> 747 } >> 748 } >> 749 555 /* remove the VMA from the mapping */ 750 /* remove the VMA from the mapping */ 556 if (vma->vm_file) { 751 if (vma->vm_file) { 557 struct address_space *mapping; << 558 mapping = vma->vm_file->f_mapp 752 mapping = vma->vm_file->f_mapping; 559 753 560 i_mmap_lock_write(mapping); 754 i_mmap_lock_write(mapping); 561 flush_dcache_mmap_lock(mapping 755 flush_dcache_mmap_lock(mapping); 562 vma_interval_tree_remove(vma, 756 vma_interval_tree_remove(vma, &mapping->i_mmap); 563 flush_dcache_mmap_unlock(mappi 757 flush_dcache_mmap_unlock(mapping); 564 i_mmap_unlock_write(mapping); 758 i_mmap_unlock_write(mapping); 565 } 759 } 566 } << 567 760 568 /* !! 761 /* remove from the MM's tree and list */ 569 * delete a VMA from its owning mm_struct and !! 762 rb_erase(&vma->vm_rb, &mm->mm_rb); 570 */ << 571 static int delete_vma_from_mm(struct vm_area_s << 572 { << 573 VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_ << 574 763 575 vma_iter_config(&vmi, vma->vm_start, v !! 764 if (vma->vm_prev) 576 if (vma_iter_prealloc(&vmi, vma)) { !! 765 vma->vm_prev->vm_next = vma->vm_next; 577 pr_warn("Allocation of vma tre !! 766 else 578 current->pid); !! 767 mm->mmap = vma->vm_next; 579 return -ENOMEM; << 580 } << 581 cleanup_vma_from_mm(vma); << 582 768 583 /* remove from the MM's tree and list !! 769 if (vma->vm_next) 584 vma_iter_clear(&vmi); !! 770 vma->vm_next->vm_prev = vma->vm_prev; 585 return 0; << 586 } 771 } >> 772 587 /* 773 /* 588 * destroy a VMA record 774 * destroy a VMA record 589 */ 775 */ 590 static void delete_vma(struct mm_struct *mm, s 776 static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) 591 { 777 { 592 vma_close(vma); !! 778 if (vma->vm_ops && vma->vm_ops->close) >> 779 vma->vm_ops->close(vma); 593 if (vma->vm_file) 780 if (vma->vm_file) 594 fput(vma->vm_file); 781 fput(vma->vm_file); 595 put_nommu_region(vma->vm_region); 782 put_nommu_region(vma->vm_region); 596 vm_area_free(vma); 783 vm_area_free(vma); 597 } 784 } 598 785 599 struct vm_area_struct *find_vma_intersection(s << 600 u << 601 u << 602 { << 603 unsigned long index = start_addr; << 604 << 605 mmap_assert_locked(mm); << 606 return mt_find(&mm->mm_mt, &index, end << 607 } << 608 EXPORT_SYMBOL(find_vma_intersection); << 609 << 610 /* 786 /* 611 * look up the first VMA in which addr resides 787 * look up the first VMA in which addr resides, NULL if none 612 * - should be called with mm->mmap_lock at le !! 788 * - should be called with mm->mmap_sem at least held readlocked 613 */ 789 */ 614 struct vm_area_struct *find_vma(struct mm_stru 790 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 615 { 791 { 616 VMA_ITERATOR(vmi, mm, addr); !! 792 struct vm_area_struct *vma; 617 793 618 return vma_iter_load(&vmi); !! 794 /* check the cache first */ >> 795 vma = vmacache_find(mm, addr); >> 796 if (likely(vma)) >> 797 return vma; >> 798 >> 799 /* trawl the list (there may be multiple mappings in which addr >> 800 * resides) */ >> 801 for (vma = mm->mmap; vma; vma = vma->vm_next) { >> 802 if (vma->vm_start > addr) >> 803 return NULL; >> 804 if (vma->vm_end > addr) { >> 805 vmacache_update(addr, vma); >> 806 return vma; >> 807 } >> 808 } >> 809 >> 810 return NULL; 619 } 811 } 620 EXPORT_SYMBOL(find_vma); 812 EXPORT_SYMBOL(find_vma); 621 813 622 /* 814 /* 623 * At least xtensa ends up having protection f !! 815 * find a VMA 624 * MMU.. No stack expansion, at least. !! 816 * - we don't extend stack VMAs under NOMMU conditions 625 */ 817 */ 626 struct vm_area_struct *lock_mm_and_find_vma(st !! 818 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 627 unsigned long addr, st << 628 { 819 { 629 struct vm_area_struct *vma; !! 820 return find_vma(mm, addr); 630 << 631 mmap_read_lock(mm); << 632 vma = vma_lookup(mm, addr); << 633 if (!vma) << 634 mmap_read_unlock(mm); << 635 return vma; << 636 } 821 } 637 822 638 /* 823 /* 639 * expand a stack to a given address 824 * expand a stack to a given address 640 * - not supported under NOMMU conditions 825 * - not supported under NOMMU conditions 641 */ 826 */ 642 int expand_stack_locked(struct vm_area_struct !! 827 int expand_stack(struct vm_area_struct *vma, unsigned long address) 643 { 828 { 644 return -ENOMEM; 829 return -ENOMEM; 645 } 830 } 646 831 647 struct vm_area_struct *expand_stack(struct mm_ << 648 { << 649 mmap_read_unlock(mm); << 650 return NULL; << 651 } << 652 << 653 /* 832 /* 654 * look up the first VMA exactly that exactly 833 * look up the first VMA exactly that exactly matches addr 655 * - should be called with mm->mmap_lock at le !! 834 * - should be called with mm->mmap_sem at least held readlocked 656 */ 835 */ 657 static struct vm_area_struct *find_vma_exact(s 836 static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 658 u 837 unsigned long addr, 659 u 838 unsigned long len) 660 { 839 { 661 struct vm_area_struct *vma; 840 struct vm_area_struct *vma; 662 unsigned long end = addr + len; 841 unsigned long end = addr + len; 663 VMA_ITERATOR(vmi, mm, addr); << 664 842 665 vma = vma_iter_load(&vmi); !! 843 /* check the cache first */ 666 if (!vma) !! 844 vma = vmacache_find_exact(mm, addr, end); 667 return NULL; !! 845 if (vma) 668 if (vma->vm_start != addr) !! 846 return vma; 669 return NULL; !! 847 670 if (vma->vm_end != end) !! 848 /* trawl the list (there may be multiple mappings in which addr 671 return NULL; !! 849 * resides) */ >> 850 for (vma = mm->mmap; vma; vma = vma->vm_next) { >> 851 if (vma->vm_start < addr) >> 852 continue; >> 853 if (vma->vm_start > addr) >> 854 return NULL; >> 855 if (vma->vm_end == end) { >> 856 vmacache_update(addr, vma); >> 857 return vma; >> 858 } >> 859 } 672 860 673 return vma; !! 861 return NULL; 674 } 862 } 675 863 676 /* 864 /* 677 * determine whether a mapping should be permi 865 * determine whether a mapping should be permitted and, if so, what sort of 678 * mapping we're capable of supporting 866 * mapping we're capable of supporting 679 */ 867 */ 680 static int validate_mmap_request(struct file * 868 static int validate_mmap_request(struct file *file, 681 unsigned long 869 unsigned long addr, 682 unsigned long 870 unsigned long len, 683 unsigned long 871 unsigned long prot, 684 unsigned long 872 unsigned long flags, 685 unsigned long 873 unsigned long pgoff, 686 unsigned long 874 unsigned long *_capabilities) 687 { 875 { 688 unsigned long capabilities, rlen; 876 unsigned long capabilities, rlen; 689 int ret; 877 int ret; 690 878 691 /* do the simple checks first */ 879 /* do the simple checks first */ 692 if (flags & MAP_FIXED) 880 if (flags & MAP_FIXED) 693 return -EINVAL; 881 return -EINVAL; 694 882 695 if ((flags & MAP_TYPE) != MAP_PRIVATE 883 if ((flags & MAP_TYPE) != MAP_PRIVATE && 696 (flags & MAP_TYPE) != MAP_SHARED) 884 (flags & MAP_TYPE) != MAP_SHARED) 697 return -EINVAL; 885 return -EINVAL; 698 886 699 if (!len) 887 if (!len) 700 return -EINVAL; 888 return -EINVAL; 701 889 702 /* Careful about overflows.. */ 890 /* Careful about overflows.. */ 703 rlen = PAGE_ALIGN(len); 891 rlen = PAGE_ALIGN(len); 704 if (!rlen || rlen > TASK_SIZE) 892 if (!rlen || rlen > TASK_SIZE) 705 return -ENOMEM; 893 return -ENOMEM; 706 894 707 /* offset overflow? */ 895 /* offset overflow? */ 708 if ((pgoff + (rlen >> PAGE_SHIFT)) < p 896 if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff) 709 return -EOVERFLOW; 897 return -EOVERFLOW; 710 898 711 if (file) { 899 if (file) { 712 /* files must support mmap */ 900 /* files must support mmap */ 713 if (!file->f_op->mmap) 901 if (!file->f_op->mmap) 714 return -ENODEV; 902 return -ENODEV; 715 903 716 /* work out if what we've got 904 /* work out if what we've got could possibly be shared 717 * - we support chardevs that 905 * - we support chardevs that provide their own "memory" 718 * - we support files/blockdev 906 * - we support files/blockdevs that are memory backed 719 */ 907 */ 720 if (file->f_op->mmap_capabilit 908 if (file->f_op->mmap_capabilities) { 721 capabilities = file->f 909 capabilities = file->f_op->mmap_capabilities(file); 722 } else { 910 } else { 723 /* no explicit capabil 911 /* no explicit capabilities set, so assume some 724 * defaults */ 912 * defaults */ 725 switch (file_inode(fil 913 switch (file_inode(file)->i_mode & S_IFMT) { 726 case S_IFREG: 914 case S_IFREG: 727 case S_IFBLK: 915 case S_IFBLK: 728 capabilities = 916 capabilities = NOMMU_MAP_COPY; 729 break; 917 break; 730 918 731 case S_IFCHR: 919 case S_IFCHR: 732 capabilities = 920 capabilities = 733 NOMMU_ 921 NOMMU_MAP_DIRECT | 734 NOMMU_ 922 NOMMU_MAP_READ | 735 NOMMU_ 923 NOMMU_MAP_WRITE; 736 break; 924 break; 737 925 738 default: 926 default: 739 return -EINVAL 927 return -EINVAL; 740 } 928 } 741 } 929 } 742 930 743 /* eliminate any capabilities 931 /* eliminate any capabilities that we can't support on this 744 * device */ 932 * device */ 745 if (!file->f_op->get_unmapped_ 933 if (!file->f_op->get_unmapped_area) 746 capabilities &= ~NOMMU 934 capabilities &= ~NOMMU_MAP_DIRECT; 747 if (!(file->f_mode & FMODE_CAN 935 if (!(file->f_mode & FMODE_CAN_READ)) 748 capabilities &= ~NOMMU 936 capabilities &= ~NOMMU_MAP_COPY; 749 937 750 /* The file shall have been op 938 /* The file shall have been opened with read permission. */ 751 if (!(file->f_mode & FMODE_REA 939 if (!(file->f_mode & FMODE_READ)) 752 return -EACCES; 940 return -EACCES; 753 941 754 if (flags & MAP_SHARED) { 942 if (flags & MAP_SHARED) { 755 /* do checks for writi 943 /* do checks for writing, appending and locking */ 756 if ((prot & PROT_WRITE 944 if ((prot & PROT_WRITE) && 757 !(file->f_mode & F 945 !(file->f_mode & FMODE_WRITE)) 758 return -EACCES 946 return -EACCES; 759 947 760 if (IS_APPEND(file_ino 948 if (IS_APPEND(file_inode(file)) && 761 (file->f_mode & FM 949 (file->f_mode & FMODE_WRITE)) 762 return -EACCES 950 return -EACCES; 763 951 >> 952 if (locks_verify_locked(file)) >> 953 return -EAGAIN; >> 954 764 if (!(capabilities & N 955 if (!(capabilities & NOMMU_MAP_DIRECT)) 765 return -ENODEV 956 return -ENODEV; 766 957 767 /* we mustn't privatis 958 /* we mustn't privatise shared mappings */ 768 capabilities &= ~NOMMU 959 capabilities &= ~NOMMU_MAP_COPY; 769 } else { 960 } else { 770 /* we're going to read 961 /* we're going to read the file into private memory we 771 * allocate */ 962 * allocate */ 772 if (!(capabilities & N 963 if (!(capabilities & NOMMU_MAP_COPY)) 773 return -ENODEV 964 return -ENODEV; 774 965 775 /* we don't permit a p 966 /* we don't permit a private writable mapping to be 776 * shared with the bac 967 * shared with the backing device */ 777 if (prot & PROT_WRITE) 968 if (prot & PROT_WRITE) 778 capabilities & 969 capabilities &= ~NOMMU_MAP_DIRECT; 779 } 970 } 780 971 781 if (capabilities & NOMMU_MAP_D 972 if (capabilities & NOMMU_MAP_DIRECT) { 782 if (((prot & PROT_READ 973 if (((prot & PROT_READ) && !(capabilities & NOMMU_MAP_READ)) || 783 ((prot & PROT_WRIT 974 ((prot & PROT_WRITE) && !(capabilities & NOMMU_MAP_WRITE)) || 784 ((prot & PROT_EXEC 975 ((prot & PROT_EXEC) && !(capabilities & NOMMU_MAP_EXEC)) 785 ) { 976 ) { 786 capabilities & 977 capabilities &= ~NOMMU_MAP_DIRECT; 787 if (flags & MA 978 if (flags & MAP_SHARED) { 788 pr_war 979 pr_warn("MAP_SHARED not completely supported on !MMU\n"); 789 return 980 return -EINVAL; 790 } 981 } 791 } 982 } 792 } 983 } 793 984 794 /* handle executable mappings 985 /* handle executable mappings and implied executable 795 * mappings */ 986 * mappings */ 796 if (path_noexec(&file->f_path) 987 if (path_noexec(&file->f_path)) { 797 if (prot & PROT_EXEC) 988 if (prot & PROT_EXEC) 798 return -EPERM; 989 return -EPERM; 799 } else if ((prot & PROT_READ) 990 } else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { 800 /* handle implication 991 /* handle implication of PROT_EXEC by PROT_READ */ 801 if (current->personali 992 if (current->personality & READ_IMPLIES_EXEC) { 802 if (capabiliti 993 if (capabilities & NOMMU_MAP_EXEC) 803 prot | 994 prot |= PROT_EXEC; 804 } 995 } 805 } else if ((prot & PROT_READ) 996 } else if ((prot & PROT_READ) && 806 (prot & PROT_EXEC) && 997 (prot & PROT_EXEC) && 807 !(capabilities & NOMM 998 !(capabilities & NOMMU_MAP_EXEC) 808 ) { 999 ) { 809 /* backing file is not 1000 /* backing file is not executable, try to copy */ 810 capabilities &= ~NOMMU 1001 capabilities &= ~NOMMU_MAP_DIRECT; 811 } 1002 } 812 } else { 1003 } else { 813 /* anonymous mappings are alwa 1004 /* anonymous mappings are always memory backed and can be 814 * privately mapped 1005 * privately mapped 815 */ 1006 */ 816 capabilities = NOMMU_MAP_COPY; 1007 capabilities = NOMMU_MAP_COPY; 817 1008 818 /* handle PROT_EXEC implicatio 1009 /* handle PROT_EXEC implication by PROT_READ */ 819 if ((prot & PROT_READ) && 1010 if ((prot & PROT_READ) && 820 (current->personality & RE 1011 (current->personality & READ_IMPLIES_EXEC)) 821 prot |= PROT_EXEC; 1012 prot |= PROT_EXEC; 822 } 1013 } 823 1014 824 /* allow the security API to have its 1015 /* allow the security API to have its say */ 825 ret = security_mmap_addr(addr); 1016 ret = security_mmap_addr(addr); 826 if (ret < 0) 1017 if (ret < 0) 827 return ret; 1018 return ret; 828 1019 829 /* looks okay */ 1020 /* looks okay */ 830 *_capabilities = capabilities; 1021 *_capabilities = capabilities; 831 return 0; 1022 return 0; 832 } 1023 } 833 1024 834 /* 1025 /* 835 * we've determined that we can make the mappi 1026 * we've determined that we can make the mapping, now translate what we 836 * now know into VMA flags 1027 * now know into VMA flags 837 */ 1028 */ 838 static unsigned long determine_vm_flags(struct 1029 static unsigned long determine_vm_flags(struct file *file, 839 unsign 1030 unsigned long prot, 840 unsign 1031 unsigned long flags, 841 unsign 1032 unsigned long capabilities) 842 { 1033 { 843 unsigned long vm_flags; 1034 unsigned long vm_flags; 844 1035 845 vm_flags = calc_vm_prot_bits(prot, 0) !! 1036 vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags); >> 1037 /* vm_flags |= mm->def_flags; */ 846 1038 847 if (!file) { !! 1039 if (!(capabilities & NOMMU_MAP_DIRECT)) { 848 /* !! 1040 /* attempt to share read-only copies of mapped file chunks */ 849 * MAP_ANONYMOUS. MAP_SHARED i << 850 * there is no fork(). << 851 */ << 852 vm_flags |= VM_MAYREAD | VM_MA 1041 vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 853 } else if (flags & MAP_PRIVATE) { !! 1042 if (file && !(prot & PROT_WRITE)) 854 /* MAP_PRIVATE file mapping */ !! 1043 vm_flags |= VM_MAYSHARE; 855 if (capabilities & NOMMU_MAP_D << 856 vm_flags |= (capabilit << 857 else << 858 vm_flags |= VM_MAYREAD << 859 << 860 if (!(prot & PROT_WRITE) && !c << 861 /* << 862 * R/O private file ma << 863 * modify memory, espe << 864 * (e.g., set breakpoi << 865 * permissions (no mpr << 866 * the file mapping, w << 867 * ramfs/tmpfs/shmfs a << 868 */ << 869 vm_flags |= VM_MAYOVER << 870 } else { 1044 } else { 871 /* MAP_SHARED file mapping: NO !! 1045 /* overlay a shareable mapping on the backing device or inode 872 vm_flags |= VM_SHARED | VM_MAY !! 1046 * if possible - used for chardevs, ramfs/tmpfs/shmfs and 873 (capabilities & NO !! 1047 * romfs/cramfs */ >> 1048 vm_flags |= VM_MAYSHARE | (capabilities & NOMMU_VMFLAGS); >> 1049 if (flags & MAP_SHARED) >> 1050 vm_flags |= VM_SHARED; 874 } 1051 } 875 1052 >> 1053 /* refuse to let anyone share private mappings with this process if >> 1054 * it's being traced - otherwise breakpoints set in it may interfere >> 1055 * with another untraced process >> 1056 */ >> 1057 if ((flags & MAP_PRIVATE) && current->ptrace) >> 1058 vm_flags &= ~VM_MAYSHARE; >> 1059 876 return vm_flags; 1060 return vm_flags; 877 } 1061 } 878 1062 879 /* 1063 /* 880 * set up a shared mapping on a file (the driv 1064 * set up a shared mapping on a file (the driver or filesystem provides and 881 * pins the storage) 1065 * pins the storage) 882 */ 1066 */ 883 static int do_mmap_shared_file(struct vm_area_ 1067 static int do_mmap_shared_file(struct vm_area_struct *vma) 884 { 1068 { 885 int ret; 1069 int ret; 886 1070 887 ret = mmap_file(vma->vm_file, vma); !! 1071 ret = call_mmap(vma->vm_file, vma); 888 if (ret == 0) { 1072 if (ret == 0) { 889 vma->vm_region->vm_top = vma-> 1073 vma->vm_region->vm_top = vma->vm_region->vm_end; 890 return 0; 1074 return 0; 891 } 1075 } 892 if (ret != -ENOSYS) 1076 if (ret != -ENOSYS) 893 return ret; 1077 return ret; 894 1078 895 /* getting -ENOSYS indicates that dire 1079 /* getting -ENOSYS indicates that direct mmap isn't possible (as 896 * opposed to tried but failed) so we 1080 * opposed to tried but failed) so we can only give a suitable error as 897 * it's not possible to make a private 1081 * it's not possible to make a private copy if MAP_SHARED was given */ 898 return -ENODEV; 1082 return -ENODEV; 899 } 1083 } 900 1084 901 /* 1085 /* 902 * set up a private mapping or an anonymous sh 1086 * set up a private mapping or an anonymous shared mapping 903 */ 1087 */ 904 static int do_mmap_private(struct vm_area_stru 1088 static int do_mmap_private(struct vm_area_struct *vma, 905 struct vm_region *r 1089 struct vm_region *region, 906 unsigned long len, 1090 unsigned long len, 907 unsigned long capab 1091 unsigned long capabilities) 908 { 1092 { 909 unsigned long total, point; 1093 unsigned long total, point; 910 void *base; 1094 void *base; 911 int ret, order; 1095 int ret, order; 912 1096 913 /* !! 1097 /* invoke the file's mapping function so that it can keep track of 914 * Invoke the file's mapping function !! 1098 * shared mappings on devices or memory 915 * shared mappings on devices or memor !! 1099 * - VM_MAYSHARE will be set if it may attempt to share 916 * it may attempt to share, which will << 917 * happy. << 918 */ 1100 */ 919 if (capabilities & NOMMU_MAP_DIRECT) { 1101 if (capabilities & NOMMU_MAP_DIRECT) { 920 ret = mmap_file(vma->vm_file, !! 1102 ret = call_mmap(vma->vm_file, vma); 921 /* shouldn't return success if << 922 if (WARN_ON_ONCE(!is_nommu_sha << 923 ret = -ENOSYS; << 924 if (ret == 0) { 1103 if (ret == 0) { >> 1104 /* shouldn't return success if we're not sharing */ >> 1105 BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); 925 vma->vm_region->vm_top 1106 vma->vm_region->vm_top = vma->vm_region->vm_end; 926 return 0; 1107 return 0; 927 } 1108 } 928 if (ret != -ENOSYS) 1109 if (ret != -ENOSYS) 929 return ret; 1110 return ret; 930 1111 931 /* getting an ENOSYS error ind 1112 /* getting an ENOSYS error indicates that direct mmap isn't 932 * possible (as opposed to tri 1113 * possible (as opposed to tried but failed) so we'll try to 933 * make a private copy of the 1114 * make a private copy of the data and map that instead */ 934 } 1115 } 935 1116 936 1117 937 /* allocate some memory to hold the ma 1118 /* allocate some memory to hold the mapping 938 * - note that this may not return a p 1119 * - note that this may not return a page-aligned address if the object 939 * we're allocating is smaller than 1120 * we're allocating is smaller than a page 940 */ 1121 */ 941 order = get_order(len); 1122 order = get_order(len); 942 total = 1 << order; 1123 total = 1 << order; 943 point = len >> PAGE_SHIFT; 1124 point = len >> PAGE_SHIFT; 944 1125 945 /* we don't want to allocate a power-o 1126 /* we don't want to allocate a power-of-2 sized page set */ 946 if (sysctl_nr_trim_pages && total - po 1127 if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) 947 total = point; 1128 total = point; 948 1129 949 base = alloc_pages_exact(total << PAGE 1130 base = alloc_pages_exact(total << PAGE_SHIFT, GFP_KERNEL); 950 if (!base) 1131 if (!base) 951 goto enomem; 1132 goto enomem; 952 1133 953 atomic_long_add(total, &mmap_pages_all 1134 atomic_long_add(total, &mmap_pages_allocated); 954 1135 955 vm_flags_set(vma, VM_MAPPED_COPY); !! 1136 region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; 956 region->vm_flags = vma->vm_flags; << 957 region->vm_start = (unsigned long) bas 1137 region->vm_start = (unsigned long) base; 958 region->vm_end = region->vm_start + 1138 region->vm_end = region->vm_start + len; 959 region->vm_top = region->vm_start + 1139 region->vm_top = region->vm_start + (total << PAGE_SHIFT); 960 1140 961 vma->vm_start = region->vm_start; 1141 vma->vm_start = region->vm_start; 962 vma->vm_end = region->vm_start + len 1142 vma->vm_end = region->vm_start + len; 963 1143 964 if (vma->vm_file) { 1144 if (vma->vm_file) { 965 /* read the contents of a file 1145 /* read the contents of a file into the copy */ 966 loff_t fpos; 1146 loff_t fpos; 967 1147 968 fpos = vma->vm_pgoff; 1148 fpos = vma->vm_pgoff; 969 fpos <<= PAGE_SHIFT; 1149 fpos <<= PAGE_SHIFT; 970 1150 971 ret = kernel_read(vma->vm_file 1151 ret = kernel_read(vma->vm_file, base, len, &fpos); 972 if (ret < 0) 1152 if (ret < 0) 973 goto error_free; 1153 goto error_free; 974 1154 975 /* clear the last little bit * 1155 /* clear the last little bit */ 976 if (ret < len) 1156 if (ret < len) 977 memset(base + ret, 0, 1157 memset(base + ret, 0, len - ret); 978 1158 979 } else { 1159 } else { 980 vma_set_anonymous(vma); 1160 vma_set_anonymous(vma); 981 } 1161 } 982 1162 983 return 0; 1163 return 0; 984 1164 985 error_free: 1165 error_free: 986 free_page_series(region->vm_start, reg 1166 free_page_series(region->vm_start, region->vm_top); 987 region->vm_start = vma->vm_start = 0; 1167 region->vm_start = vma->vm_start = 0; 988 region->vm_end = vma->vm_end = 0; 1168 region->vm_end = vma->vm_end = 0; 989 region->vm_top = 0; 1169 region->vm_top = 0; 990 return ret; 1170 return ret; 991 1171 992 enomem: 1172 enomem: 993 pr_err("Allocation of length %lu from 1173 pr_err("Allocation of length %lu from process %d (%s) failed\n", 994 len, current->pid, current->com 1174 len, current->pid, current->comm); 995 show_mem(); !! 1175 show_free_areas(0, NULL); 996 return -ENOMEM; 1176 return -ENOMEM; 997 } 1177 } 998 1178 999 /* 1179 /* 1000 * handle mapping creation for uClinux 1180 * handle mapping creation for uClinux 1001 */ 1181 */ 1002 unsigned long do_mmap(struct file *file, 1182 unsigned long do_mmap(struct file *file, 1003 unsigned long addr, 1183 unsigned long addr, 1004 unsigned long len, 1184 unsigned long len, 1005 unsigned long prot, 1185 unsigned long prot, 1006 unsigned long flags, 1186 unsigned long flags, 1007 vm_flags_t vm_flags, 1187 vm_flags_t vm_flags, 1008 unsigned long pgoff, 1188 unsigned long pgoff, 1009 unsigned long *popula 1189 unsigned long *populate, 1010 struct list_head *uf) 1190 struct list_head *uf) 1011 { 1191 { 1012 struct vm_area_struct *vma; 1192 struct vm_area_struct *vma; 1013 struct vm_region *region; 1193 struct vm_region *region; 1014 struct rb_node *rb; 1194 struct rb_node *rb; 1015 unsigned long capabilities, result; 1195 unsigned long capabilities, result; 1016 int ret; 1196 int ret; 1017 VMA_ITERATOR(vmi, current->mm, 0); << 1018 1197 1019 *populate = 0; 1198 *populate = 0; 1020 1199 1021 /* decide whether we should attempt t 1200 /* decide whether we should attempt the mapping, and if so what sort of 1022 * mapping */ 1201 * mapping */ 1023 ret = validate_mmap_request(file, add 1202 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 1024 &capabili 1203 &capabilities); 1025 if (ret < 0) 1204 if (ret < 0) 1026 return ret; 1205 return ret; 1027 1206 1028 /* we ignore the address hint */ 1207 /* we ignore the address hint */ 1029 addr = 0; 1208 addr = 0; 1030 len = PAGE_ALIGN(len); 1209 len = PAGE_ALIGN(len); 1031 1210 1032 /* we've determined that we can make 1211 /* we've determined that we can make the mapping, now translate what we 1033 * now know into VMA flags */ 1212 * now know into VMA flags */ 1034 vm_flags |= determine_vm_flags(file, 1213 vm_flags |= determine_vm_flags(file, prot, flags, capabilities); 1035 1214 1036 << 1037 /* we're going to need to record the 1215 /* we're going to need to record the mapping */ 1038 region = kmem_cache_zalloc(vm_region_ 1216 region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); 1039 if (!region) 1217 if (!region) 1040 goto error_getting_region; 1218 goto error_getting_region; 1041 1219 1042 vma = vm_area_alloc(current->mm); 1220 vma = vm_area_alloc(current->mm); 1043 if (!vma) 1221 if (!vma) 1044 goto error_getting_vma; 1222 goto error_getting_vma; 1045 1223 1046 region->vm_usage = 1; 1224 region->vm_usage = 1; 1047 region->vm_flags = vm_flags; 1225 region->vm_flags = vm_flags; 1048 region->vm_pgoff = pgoff; 1226 region->vm_pgoff = pgoff; 1049 1227 1050 vm_flags_init(vma, vm_flags); !! 1228 vma->vm_flags = vm_flags; 1051 vma->vm_pgoff = pgoff; 1229 vma->vm_pgoff = pgoff; 1052 1230 1053 if (file) { 1231 if (file) { 1054 region->vm_file = get_file(fi 1232 region->vm_file = get_file(file); 1055 vma->vm_file = get_file(file) 1233 vma->vm_file = get_file(file); 1056 } 1234 } 1057 1235 1058 down_write(&nommu_region_sem); 1236 down_write(&nommu_region_sem); 1059 1237 1060 /* if we want to share, we need to ch 1238 /* if we want to share, we need to check for regions created by other 1061 * mmap() calls that overlap with our 1239 * mmap() calls that overlap with our proposed mapping 1062 * - we can only share with a superse 1240 * - we can only share with a superset match on most regular files 1063 * - shared mappings on character dev 1241 * - shared mappings on character devices and memory backed files are 1064 * permitted to overlap inexactly a 1242 * permitted to overlap inexactly as far as we are concerned for in 1065 * these cases, sharing is handled 1243 * these cases, sharing is handled in the driver or filesystem rather 1066 * than here 1244 * than here 1067 */ 1245 */ 1068 if (is_nommu_shared_mapping(vm_flags) !! 1246 if (vm_flags & VM_MAYSHARE) { 1069 struct vm_region *pregion; 1247 struct vm_region *pregion; 1070 unsigned long pglen, rpglen, 1248 unsigned long pglen, rpglen, pgend, rpgend, start; 1071 1249 1072 pglen = (len + PAGE_SIZE - 1) 1250 pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1073 pgend = pgoff + pglen; 1251 pgend = pgoff + pglen; 1074 1252 1075 for (rb = rb_first(&nommu_reg 1253 for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { 1076 pregion = rb_entry(rb 1254 pregion = rb_entry(rb, struct vm_region, vm_rb); 1077 1255 1078 if (!is_nommu_shared_ !! 1256 if (!(pregion->vm_flags & VM_MAYSHARE)) 1079 continue; 1257 continue; 1080 1258 1081 /* search for overlap 1259 /* search for overlapping mappings on the same file */ 1082 if (file_inode(pregio 1260 if (file_inode(pregion->vm_file) != 1083 file_inode(file)) 1261 file_inode(file)) 1084 continue; 1262 continue; 1085 1263 1086 if (pregion->vm_pgoff 1264 if (pregion->vm_pgoff >= pgend) 1087 continue; 1265 continue; 1088 1266 1089 rpglen = pregion->vm_ 1267 rpglen = pregion->vm_end - pregion->vm_start; 1090 rpglen = (rpglen + PA 1268 rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT; 1091 rpgend = pregion->vm_ 1269 rpgend = pregion->vm_pgoff + rpglen; 1092 if (pgoff >= rpgend) 1270 if (pgoff >= rpgend) 1093 continue; 1271 continue; 1094 1272 1095 /* handle inexactly o 1273 /* handle inexactly overlapping matches between 1096 * mappings */ 1274 * mappings */ 1097 if ((pregion->vm_pgof 1275 if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && 1098 !(pgoff >= pregio 1276 !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { 1099 /* new mappin 1277 /* new mapping is not a subset of the region */ 1100 if (!(capabil 1278 if (!(capabilities & NOMMU_MAP_DIRECT)) 1101 goto 1279 goto sharing_violation; 1102 continue; 1280 continue; 1103 } 1281 } 1104 1282 1105 /* we've found a regi 1283 /* we've found a region we can share */ 1106 pregion->vm_usage++; 1284 pregion->vm_usage++; 1107 vma->vm_region = preg 1285 vma->vm_region = pregion; 1108 start = pregion->vm_s 1286 start = pregion->vm_start; 1109 start += (pgoff - pre 1287 start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; 1110 vma->vm_start = start 1288 vma->vm_start = start; 1111 vma->vm_end = start + 1289 vma->vm_end = start + len; 1112 1290 1113 if (pregion->vm_flags 1291 if (pregion->vm_flags & VM_MAPPED_COPY) 1114 vm_flags_set( !! 1292 vma->vm_flags |= VM_MAPPED_COPY; 1115 else { 1293 else { 1116 ret = do_mmap 1294 ret = do_mmap_shared_file(vma); 1117 if (ret < 0) 1295 if (ret < 0) { 1118 vma-> 1296 vma->vm_region = NULL; 1119 vma-> 1297 vma->vm_start = 0; 1120 vma-> 1298 vma->vm_end = 0; 1121 pregi 1299 pregion->vm_usage--; 1122 pregi 1300 pregion = NULL; 1123 goto 1301 goto error_just_free; 1124 } 1302 } 1125 } 1303 } 1126 fput(region->vm_file) 1304 fput(region->vm_file); 1127 kmem_cache_free(vm_re 1305 kmem_cache_free(vm_region_jar, region); 1128 region = pregion; 1306 region = pregion; 1129 result = start; 1307 result = start; 1130 goto share; 1308 goto share; 1131 } 1309 } 1132 1310 1133 /* obtain the address at whic 1311 /* obtain the address at which to make a shared mapping 1134 * - this is the hook for qua 1312 * - this is the hook for quasi-memory character devices to 1135 * tell us the location of 1313 * tell us the location of a shared mapping 1136 */ 1314 */ 1137 if (capabilities & NOMMU_MAP_ 1315 if (capabilities & NOMMU_MAP_DIRECT) { 1138 addr = file->f_op->ge 1316 addr = file->f_op->get_unmapped_area(file, addr, len, 1139 1317 pgoff, flags); 1140 if (IS_ERR_VALUE(addr 1318 if (IS_ERR_VALUE(addr)) { 1141 ret = addr; 1319 ret = addr; 1142 if (ret != -E 1320 if (ret != -ENOSYS) 1143 goto 1321 goto error_just_free; 1144 1322 1145 /* the driver 1323 /* the driver refused to tell us where to site 1146 * the mappin 1324 * the mapping so we'll have to attempt to copy 1147 * it */ 1325 * it */ 1148 ret = -ENODEV 1326 ret = -ENODEV; 1149 if (!(capabil 1327 if (!(capabilities & NOMMU_MAP_COPY)) 1150 goto 1328 goto error_just_free; 1151 1329 1152 capabilities 1330 capabilities &= ~NOMMU_MAP_DIRECT; 1153 } else { 1331 } else { 1154 vma->vm_start 1332 vma->vm_start = region->vm_start = addr; 1155 vma->vm_end = 1333 vma->vm_end = region->vm_end = addr + len; 1156 } 1334 } 1157 } 1335 } 1158 } 1336 } 1159 1337 1160 vma->vm_region = region; 1338 vma->vm_region = region; 1161 1339 1162 /* set up the mapping 1340 /* set up the mapping 1163 * - the region is filled in if NOMMU 1341 * - the region is filled in if NOMMU_MAP_DIRECT is still set 1164 */ 1342 */ 1165 if (file && vma->vm_flags & VM_SHARED 1343 if (file && vma->vm_flags & VM_SHARED) 1166 ret = do_mmap_shared_file(vma 1344 ret = do_mmap_shared_file(vma); 1167 else 1345 else 1168 ret = do_mmap_private(vma, re 1346 ret = do_mmap_private(vma, region, len, capabilities); 1169 if (ret < 0) 1347 if (ret < 0) 1170 goto error_just_free; 1348 goto error_just_free; 1171 add_nommu_region(region); 1349 add_nommu_region(region); 1172 1350 1173 /* clear anonymous mappings that don' 1351 /* clear anonymous mappings that don't ask for uninitialized data */ 1174 if (!vma->vm_file && !! 1352 if (!vma->vm_file && !(flags & MAP_UNINITIALIZED)) 1175 (!IS_ENABLED(CONFIG_MMAP_ALLOW_UN << 1176 !(flags & MAP_UNINITIALIZED))) << 1177 memset((void *)region->vm_sta 1353 memset((void *)region->vm_start, 0, 1178 region->vm_end - regio 1354 region->vm_end - region->vm_start); 1179 1355 1180 /* okay... we have a mapping; now we 1356 /* okay... we have a mapping; now we have to register it */ 1181 result = vma->vm_start; 1357 result = vma->vm_start; 1182 1358 1183 current->mm->total_vm += len >> PAGE_ 1359 current->mm->total_vm += len >> PAGE_SHIFT; 1184 1360 1185 share: 1361 share: 1186 BUG_ON(!vma->vm_region); !! 1362 add_vma_to_mm(current->mm, vma); 1187 vma_iter_config(&vmi, vma->vm_start, << 1188 if (vma_iter_prealloc(&vmi, vma)) << 1189 goto error_just_free; << 1190 << 1191 setup_vma_to_mm(vma, current->mm); << 1192 current->mm->map_count++; << 1193 /* add the VMA to the tree */ << 1194 vma_iter_store(&vmi, vma); << 1195 1363 1196 /* we flush the region from the icach 1364 /* we flush the region from the icache only when the first executable 1197 * mapping of it is made */ 1365 * mapping of it is made */ 1198 if (vma->vm_flags & VM_EXEC && !regio 1366 if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { 1199 flush_icache_user_range(regio !! 1367 flush_icache_range(region->vm_start, region->vm_end); 1200 region->vm_icache_flushed = t 1368 region->vm_icache_flushed = true; 1201 } 1369 } 1202 1370 1203 up_write(&nommu_region_sem); 1371 up_write(&nommu_region_sem); 1204 1372 1205 return result; 1373 return result; 1206 1374 1207 error_just_free: 1375 error_just_free: 1208 up_write(&nommu_region_sem); 1376 up_write(&nommu_region_sem); 1209 error: 1377 error: 1210 vma_iter_free(&vmi); << 1211 if (region->vm_file) 1378 if (region->vm_file) 1212 fput(region->vm_file); 1379 fput(region->vm_file); 1213 kmem_cache_free(vm_region_jar, region 1380 kmem_cache_free(vm_region_jar, region); 1214 if (vma->vm_file) 1381 if (vma->vm_file) 1215 fput(vma->vm_file); 1382 fput(vma->vm_file); 1216 vm_area_free(vma); 1383 vm_area_free(vma); 1217 return ret; 1384 return ret; 1218 1385 1219 sharing_violation: 1386 sharing_violation: 1220 up_write(&nommu_region_sem); 1387 up_write(&nommu_region_sem); 1221 pr_warn("Attempt to share mismatched 1388 pr_warn("Attempt to share mismatched mappings\n"); 1222 ret = -EINVAL; 1389 ret = -EINVAL; 1223 goto error; 1390 goto error; 1224 1391 1225 error_getting_vma: 1392 error_getting_vma: 1226 kmem_cache_free(vm_region_jar, region 1393 kmem_cache_free(vm_region_jar, region); 1227 pr_warn("Allocation of vma for %lu by 1394 pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n", 1228 len, current->pid); 1395 len, current->pid); 1229 show_mem(); !! 1396 show_free_areas(0, NULL); 1230 return -ENOMEM; 1397 return -ENOMEM; 1231 1398 1232 error_getting_region: 1399 error_getting_region: 1233 pr_warn("Allocation of vm region for 1400 pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n", 1234 len, current->pid); 1401 len, current->pid); 1235 show_mem(); !! 1402 show_free_areas(0, NULL); 1236 return -ENOMEM; 1403 return -ENOMEM; 1237 } 1404 } 1238 1405 1239 unsigned long ksys_mmap_pgoff(unsigned long a 1406 unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, 1240 unsigned long p 1407 unsigned long prot, unsigned long flags, 1241 unsigned long f 1408 unsigned long fd, unsigned long pgoff) 1242 { 1409 { 1243 struct file *file = NULL; 1410 struct file *file = NULL; 1244 unsigned long retval = -EBADF; 1411 unsigned long retval = -EBADF; 1245 1412 1246 audit_mmap_fd(fd, flags); 1413 audit_mmap_fd(fd, flags); 1247 if (!(flags & MAP_ANONYMOUS)) { 1414 if (!(flags & MAP_ANONYMOUS)) { 1248 file = fget(fd); 1415 file = fget(fd); 1249 if (!file) 1416 if (!file) 1250 goto out; 1417 goto out; 1251 } 1418 } 1252 1419 >> 1420 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); >> 1421 1253 retval = vm_mmap_pgoff(file, addr, le 1422 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); 1254 1423 1255 if (file) 1424 if (file) 1256 fput(file); 1425 fput(file); 1257 out: 1426 out: 1258 return retval; 1427 return retval; 1259 } 1428 } 1260 1429 1261 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, ad 1430 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, 1262 unsigned long, prot, unsigned 1431 unsigned long, prot, unsigned long, flags, 1263 unsigned long, fd, unsigned l 1432 unsigned long, fd, unsigned long, pgoff) 1264 { 1433 { 1265 return ksys_mmap_pgoff(addr, len, pro 1434 return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); 1266 } 1435 } 1267 1436 1268 #ifdef __ARCH_WANT_SYS_OLD_MMAP 1437 #ifdef __ARCH_WANT_SYS_OLD_MMAP 1269 struct mmap_arg_struct { 1438 struct mmap_arg_struct { 1270 unsigned long addr; 1439 unsigned long addr; 1271 unsigned long len; 1440 unsigned long len; 1272 unsigned long prot; 1441 unsigned long prot; 1273 unsigned long flags; 1442 unsigned long flags; 1274 unsigned long fd; 1443 unsigned long fd; 1275 unsigned long offset; 1444 unsigned long offset; 1276 }; 1445 }; 1277 1446 1278 SYSCALL_DEFINE1(old_mmap, struct mmap_arg_str 1447 SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) 1279 { 1448 { 1280 struct mmap_arg_struct a; 1449 struct mmap_arg_struct a; 1281 1450 1282 if (copy_from_user(&a, arg, sizeof(a) 1451 if (copy_from_user(&a, arg, sizeof(a))) 1283 return -EFAULT; 1452 return -EFAULT; 1284 if (offset_in_page(a.offset)) 1453 if (offset_in_page(a.offset)) 1285 return -EINVAL; 1454 return -EINVAL; 1286 1455 1287 return ksys_mmap_pgoff(a.addr, a.len, 1456 return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, 1288 a.offset >> PA 1457 a.offset >> PAGE_SHIFT); 1289 } 1458 } 1290 #endif /* __ARCH_WANT_SYS_OLD_MMAP */ 1459 #endif /* __ARCH_WANT_SYS_OLD_MMAP */ 1291 1460 1292 /* 1461 /* 1293 * split a vma into two pieces at address 'ad 1462 * split a vma into two pieces at address 'addr', a new vma is allocated either 1294 * for the first part or the tail. 1463 * for the first part or the tail. 1295 */ 1464 */ 1296 static int split_vma(struct vma_iterator *vmi !! 1465 int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, 1297 unsigned long addr, int !! 1466 unsigned long addr, int new_below) 1298 { 1467 { 1299 struct vm_area_struct *new; 1468 struct vm_area_struct *new; 1300 struct vm_region *region; 1469 struct vm_region *region; 1301 unsigned long npages; 1470 unsigned long npages; 1302 struct mm_struct *mm; << 1303 1471 1304 /* we're only permitted to split anon 1472 /* we're only permitted to split anonymous regions (these should have 1305 * only a single usage on the region) 1473 * only a single usage on the region) */ 1306 if (vma->vm_file) 1474 if (vma->vm_file) 1307 return -ENOMEM; 1475 return -ENOMEM; 1308 1476 1309 mm = vma->vm_mm; << 1310 if (mm->map_count >= sysctl_max_map_c 1477 if (mm->map_count >= sysctl_max_map_count) 1311 return -ENOMEM; 1478 return -ENOMEM; 1312 1479 1313 region = kmem_cache_alloc(vm_region_j 1480 region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); 1314 if (!region) 1481 if (!region) 1315 return -ENOMEM; 1482 return -ENOMEM; 1316 1483 1317 new = vm_area_dup(vma); 1484 new = vm_area_dup(vma); 1318 if (!new) !! 1485 if (!new) { 1319 goto err_vma_dup; !! 1486 kmem_cache_free(vm_region_jar, region); >> 1487 return -ENOMEM; >> 1488 } 1320 1489 1321 /* most fields are the same, copy all 1490 /* most fields are the same, copy all, and then fixup */ 1322 *region = *vma->vm_region; 1491 *region = *vma->vm_region; 1323 new->vm_region = region; 1492 new->vm_region = region; 1324 1493 1325 npages = (addr - vma->vm_start) >> PA 1494 npages = (addr - vma->vm_start) >> PAGE_SHIFT; 1326 1495 1327 if (new_below) { 1496 if (new_below) { 1328 region->vm_top = region->vm_e 1497 region->vm_top = region->vm_end = new->vm_end = addr; 1329 } else { 1498 } else { 1330 region->vm_start = new->vm_st 1499 region->vm_start = new->vm_start = addr; 1331 region->vm_pgoff = new->vm_pg 1500 region->vm_pgoff = new->vm_pgoff += npages; 1332 } 1501 } 1333 1502 1334 vma_iter_config(vmi, new->vm_start, n << 1335 if (vma_iter_prealloc(vmi, vma)) { << 1336 pr_warn("Allocation of vma tr << 1337 current->pid); << 1338 goto err_vmi_preallocate; << 1339 } << 1340 << 1341 if (new->vm_ops && new->vm_ops->open) 1503 if (new->vm_ops && new->vm_ops->open) 1342 new->vm_ops->open(new); 1504 new->vm_ops->open(new); 1343 1505 >> 1506 delete_vma_from_mm(vma); 1344 down_write(&nommu_region_sem); 1507 down_write(&nommu_region_sem); 1345 delete_nommu_region(vma->vm_region); 1508 delete_nommu_region(vma->vm_region); 1346 if (new_below) { 1509 if (new_below) { 1347 vma->vm_region->vm_start = vm 1510 vma->vm_region->vm_start = vma->vm_start = addr; 1348 vma->vm_region->vm_pgoff = vm 1511 vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; 1349 } else { 1512 } else { 1350 vma->vm_region->vm_end = vma- 1513 vma->vm_region->vm_end = vma->vm_end = addr; 1351 vma->vm_region->vm_top = addr 1514 vma->vm_region->vm_top = addr; 1352 } 1515 } 1353 add_nommu_region(vma->vm_region); 1516 add_nommu_region(vma->vm_region); 1354 add_nommu_region(new->vm_region); 1517 add_nommu_region(new->vm_region); 1355 up_write(&nommu_region_sem); 1518 up_write(&nommu_region_sem); 1356 !! 1519 add_vma_to_mm(mm, vma); 1357 setup_vma_to_mm(vma, mm); !! 1520 add_vma_to_mm(mm, new); 1358 setup_vma_to_mm(new, mm); << 1359 vma_iter_store(vmi, new); << 1360 mm->map_count++; << 1361 return 0; 1521 return 0; 1362 << 1363 err_vmi_preallocate: << 1364 vm_area_free(new); << 1365 err_vma_dup: << 1366 kmem_cache_free(vm_region_jar, region << 1367 return -ENOMEM; << 1368 } 1522 } 1369 1523 1370 /* 1524 /* 1371 * shrink a VMA by removing the specified chu 1525 * shrink a VMA by removing the specified chunk from either the beginning or 1372 * the end 1526 * the end 1373 */ 1527 */ 1374 static int vmi_shrink_vma(struct vma_iterator !! 1528 static int shrink_vma(struct mm_struct *mm, 1375 struct vm_area_struct * 1529 struct vm_area_struct *vma, 1376 unsigned long from, uns 1530 unsigned long from, unsigned long to) 1377 { 1531 { 1378 struct vm_region *region; 1532 struct vm_region *region; 1379 1533 1380 /* adjust the VMA's pointers, which m 1534 /* adjust the VMA's pointers, which may reposition it in the MM's tree 1381 * and list */ 1535 * and list */ 1382 if (from > vma->vm_start) { !! 1536 delete_vma_from_mm(vma); 1383 if (vma_iter_clear_gfp(vmi, f !! 1537 if (from > vma->vm_start) 1384 return -ENOMEM; << 1385 vma->vm_end = from; 1538 vma->vm_end = from; 1386 } else { !! 1539 else 1387 if (vma_iter_clear_gfp(vmi, v << 1388 return -ENOMEM; << 1389 vma->vm_start = to; 1540 vma->vm_start = to; 1390 } !! 1541 add_vma_to_mm(mm, vma); 1391 1542 1392 /* cut the backing region down to siz 1543 /* cut the backing region down to size */ 1393 region = vma->vm_region; 1544 region = vma->vm_region; 1394 BUG_ON(region->vm_usage != 1); 1545 BUG_ON(region->vm_usage != 1); 1395 1546 1396 down_write(&nommu_region_sem); 1547 down_write(&nommu_region_sem); 1397 delete_nommu_region(region); 1548 delete_nommu_region(region); 1398 if (from > region->vm_start) { 1549 if (from > region->vm_start) { 1399 to = region->vm_top; 1550 to = region->vm_top; 1400 region->vm_top = region->vm_e 1551 region->vm_top = region->vm_end = from; 1401 } else { 1552 } else { 1402 region->vm_start = to; 1553 region->vm_start = to; 1403 } 1554 } 1404 add_nommu_region(region); 1555 add_nommu_region(region); 1405 up_write(&nommu_region_sem); 1556 up_write(&nommu_region_sem); 1406 1557 1407 free_page_series(from, to); 1558 free_page_series(from, to); 1408 return 0; 1559 return 0; 1409 } 1560 } 1410 1561 1411 /* 1562 /* 1412 * release a mapping 1563 * release a mapping 1413 * - under NOMMU conditions the chunk to be u 1564 * - under NOMMU conditions the chunk to be unmapped must be backed by a single 1414 * VMA, though it need not cover the whole 1565 * VMA, though it need not cover the whole VMA 1415 */ 1566 */ 1416 int do_munmap(struct mm_struct *mm, unsigned 1567 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf) 1417 { 1568 { 1418 VMA_ITERATOR(vmi, mm, start); << 1419 struct vm_area_struct *vma; 1569 struct vm_area_struct *vma; 1420 unsigned long end; 1570 unsigned long end; 1421 int ret = 0; !! 1571 int ret; 1422 1572 1423 len = PAGE_ALIGN(len); 1573 len = PAGE_ALIGN(len); 1424 if (len == 0) 1574 if (len == 0) 1425 return -EINVAL; 1575 return -EINVAL; 1426 1576 1427 end = start + len; 1577 end = start + len; 1428 1578 1429 /* find the first potentially overlap 1579 /* find the first potentially overlapping VMA */ 1430 vma = vma_find(&vmi, end); !! 1580 vma = find_vma(mm, start); 1431 if (!vma) { 1581 if (!vma) { 1432 static int limit; 1582 static int limit; 1433 if (limit < 5) { 1583 if (limit < 5) { 1434 pr_warn("munmap of me 1584 pr_warn("munmap of memory not mmapped by process %d (%s): 0x%lx-0x%lx\n", 1435 curre 1585 current->pid, current->comm, 1436 start 1586 start, start + len - 1); 1437 limit++; 1587 limit++; 1438 } 1588 } 1439 return -EINVAL; 1589 return -EINVAL; 1440 } 1590 } 1441 1591 1442 /* we're allowed to split an anonymou 1592 /* we're allowed to split an anonymous VMA but not a file-backed one */ 1443 if (vma->vm_file) { 1593 if (vma->vm_file) { 1444 do { 1594 do { 1445 if (start > vma->vm_s 1595 if (start > vma->vm_start) 1446 return -EINVA 1596 return -EINVAL; 1447 if (end == vma->vm_en 1597 if (end == vma->vm_end) 1448 goto erase_wh 1598 goto erase_whole_vma; 1449 vma = vma_find(&vmi, !! 1599 vma = vma->vm_next; 1450 } while (vma); 1600 } while (vma); 1451 return -EINVAL; 1601 return -EINVAL; 1452 } else { 1602 } else { 1453 /* the chunk must be a subset 1603 /* the chunk must be a subset of the VMA found */ 1454 if (start == vma->vm_start && 1604 if (start == vma->vm_start && end == vma->vm_end) 1455 goto erase_whole_vma; 1605 goto erase_whole_vma; 1456 if (start < vma->vm_start || 1606 if (start < vma->vm_start || end > vma->vm_end) 1457 return -EINVAL; 1607 return -EINVAL; 1458 if (offset_in_page(start)) 1608 if (offset_in_page(start)) 1459 return -EINVAL; 1609 return -EINVAL; 1460 if (end != vma->vm_end && off 1610 if (end != vma->vm_end && offset_in_page(end)) 1461 return -EINVAL; 1611 return -EINVAL; 1462 if (start != vma->vm_start && 1612 if (start != vma->vm_start && end != vma->vm_end) { 1463 ret = split_vma(&vmi, !! 1613 ret = split_vma(mm, vma, start, 1); 1464 if (ret < 0) 1614 if (ret < 0) 1465 return ret; 1615 return ret; 1466 } 1616 } 1467 return vmi_shrink_vma(&vmi, v !! 1617 return shrink_vma(mm, vma, start, end); 1468 } 1618 } 1469 1619 1470 erase_whole_vma: 1620 erase_whole_vma: 1471 if (delete_vma_from_mm(vma)) !! 1621 delete_vma_from_mm(vma); 1472 ret = -ENOMEM; !! 1622 delete_vma(mm, vma); 1473 else !! 1623 return 0; 1474 delete_vma(mm, vma); << 1475 return ret; << 1476 } 1624 } >> 1625 EXPORT_SYMBOL(do_munmap); 1477 1626 1478 int vm_munmap(unsigned long addr, size_t len) 1627 int vm_munmap(unsigned long addr, size_t len) 1479 { 1628 { 1480 struct mm_struct *mm = current->mm; 1629 struct mm_struct *mm = current->mm; 1481 int ret; 1630 int ret; 1482 1631 1483 mmap_write_lock(mm); !! 1632 down_write(&mm->mmap_sem); 1484 ret = do_munmap(mm, addr, len, NULL); 1633 ret = do_munmap(mm, addr, len, NULL); 1485 mmap_write_unlock(mm); !! 1634 up_write(&mm->mmap_sem); 1486 return ret; 1635 return ret; 1487 } 1636 } 1488 EXPORT_SYMBOL(vm_munmap); 1637 EXPORT_SYMBOL(vm_munmap); 1489 1638 1490 SYSCALL_DEFINE2(munmap, unsigned long, addr, 1639 SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) 1491 { 1640 { 1492 return vm_munmap(addr, len); 1641 return vm_munmap(addr, len); 1493 } 1642 } 1494 1643 1495 /* 1644 /* 1496 * release all the mappings made in a process 1645 * release all the mappings made in a process's VM space 1497 */ 1646 */ 1498 void exit_mmap(struct mm_struct *mm) 1647 void exit_mmap(struct mm_struct *mm) 1499 { 1648 { 1500 VMA_ITERATOR(vmi, mm, 0); << 1501 struct vm_area_struct *vma; 1649 struct vm_area_struct *vma; 1502 1650 1503 if (!mm) 1651 if (!mm) 1504 return; 1652 return; 1505 1653 1506 mm->total_vm = 0; 1654 mm->total_vm = 0; 1507 1655 1508 /* !! 1656 while ((vma = mm->mmap)) { 1509 * Lock the mm to avoid assert compla !! 1657 mm->mmap = vma->vm_next; 1510 * user of the mm !! 1658 delete_vma_from_mm(vma); 1511 */ << 1512 mmap_write_lock(mm); << 1513 for_each_vma(vmi, vma) { << 1514 cleanup_vma_from_mm(vma); << 1515 delete_vma(mm, vma); 1659 delete_vma(mm, vma); 1516 cond_resched(); 1660 cond_resched(); 1517 } 1661 } 1518 __mt_destroy(&mm->mm_mt); !! 1662 } 1519 mmap_write_unlock(mm); !! 1663 >> 1664 int vm_brk(unsigned long addr, unsigned long len) >> 1665 { >> 1666 return -ENOMEM; 1520 } 1667 } 1521 1668 1522 /* 1669 /* 1523 * expand (or shrink) an existing mapping, po 1670 * expand (or shrink) an existing mapping, potentially moving it at the same 1524 * time (controlled by the MREMAP_MAYMOVE fla 1671 * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 1525 * 1672 * 1526 * under NOMMU conditions, we only permit cha 1673 * under NOMMU conditions, we only permit changing a mapping's size, and only 1527 * as long as it stays within the region allo 1674 * as long as it stays within the region allocated by do_mmap_private() and the 1528 * block is not shareable 1675 * block is not shareable 1529 * 1676 * 1530 * MREMAP_FIXED is not supported under NOMMU 1677 * MREMAP_FIXED is not supported under NOMMU conditions 1531 */ 1678 */ 1532 static unsigned long do_mremap(unsigned long 1679 static unsigned long do_mremap(unsigned long addr, 1533 unsigned long old_len 1680 unsigned long old_len, unsigned long new_len, 1534 unsigned long flags, 1681 unsigned long flags, unsigned long new_addr) 1535 { 1682 { 1536 struct vm_area_struct *vma; 1683 struct vm_area_struct *vma; 1537 1684 1538 /* insanity checks first */ 1685 /* insanity checks first */ 1539 old_len = PAGE_ALIGN(old_len); 1686 old_len = PAGE_ALIGN(old_len); 1540 new_len = PAGE_ALIGN(new_len); 1687 new_len = PAGE_ALIGN(new_len); 1541 if (old_len == 0 || new_len == 0) 1688 if (old_len == 0 || new_len == 0) 1542 return (unsigned long) -EINVA 1689 return (unsigned long) -EINVAL; 1543 1690 1544 if (offset_in_page(addr)) 1691 if (offset_in_page(addr)) 1545 return -EINVAL; 1692 return -EINVAL; 1546 1693 1547 if (flags & MREMAP_FIXED && new_addr 1694 if (flags & MREMAP_FIXED && new_addr != addr) 1548 return (unsigned long) -EINVA 1695 return (unsigned long) -EINVAL; 1549 1696 1550 vma = find_vma_exact(current->mm, add 1697 vma = find_vma_exact(current->mm, addr, old_len); 1551 if (!vma) 1698 if (!vma) 1552 return (unsigned long) -EINVA 1699 return (unsigned long) -EINVAL; 1553 1700 1554 if (vma->vm_end != vma->vm_start + ol 1701 if (vma->vm_end != vma->vm_start + old_len) 1555 return (unsigned long) -EFAUL 1702 return (unsigned long) -EFAULT; 1556 1703 1557 if (is_nommu_shared_mapping(vma->vm_f !! 1704 if (vma->vm_flags & VM_MAYSHARE) 1558 return (unsigned long) -EPERM 1705 return (unsigned long) -EPERM; 1559 1706 1560 if (new_len > vma->vm_region->vm_end 1707 if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start) 1561 return (unsigned long) -ENOME 1708 return (unsigned long) -ENOMEM; 1562 1709 1563 /* all checks complete - do it */ 1710 /* all checks complete - do it */ 1564 vma->vm_end = vma->vm_start + new_len 1711 vma->vm_end = vma->vm_start + new_len; 1565 return vma->vm_start; 1712 return vma->vm_start; 1566 } 1713 } 1567 1714 1568 SYSCALL_DEFINE5(mremap, unsigned long, addr, 1715 SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, 1569 unsigned long, new_len, unsig 1716 unsigned long, new_len, unsigned long, flags, 1570 unsigned long, new_addr) 1717 unsigned long, new_addr) 1571 { 1718 { 1572 unsigned long ret; 1719 unsigned long ret; 1573 1720 1574 mmap_write_lock(current->mm); !! 1721 down_write(¤t->mm->mmap_sem); 1575 ret = do_mremap(addr, old_len, new_le 1722 ret = do_mremap(addr, old_len, new_len, flags, new_addr); 1576 mmap_write_unlock(current->mm); !! 1723 up_write(¤t->mm->mmap_sem); 1577 return ret; 1724 return ret; 1578 } 1725 } 1579 1726 >> 1727 struct page *follow_page(struct vm_area_struct *vma, unsigned long address, >> 1728 unsigned int foll_flags) >> 1729 { >> 1730 return NULL; >> 1731 } >> 1732 1580 int remap_pfn_range(struct vm_area_struct *vm 1733 int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, 1581 unsigned long pfn, unsigned l 1734 unsigned long pfn, unsigned long size, pgprot_t prot) 1582 { 1735 { 1583 if (addr != (pfn << PAGE_SHIFT)) 1736 if (addr != (pfn << PAGE_SHIFT)) 1584 return -EINVAL; 1737 return -EINVAL; 1585 1738 1586 vm_flags_set(vma, VM_IO | VM_PFNMAP | !! 1739 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; 1587 return 0; 1740 return 0; 1588 } 1741 } 1589 EXPORT_SYMBOL(remap_pfn_range); 1742 EXPORT_SYMBOL(remap_pfn_range); 1590 1743 1591 int vm_iomap_memory(struct vm_area_struct *vm 1744 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) 1592 { 1745 { 1593 unsigned long pfn = start >> PAGE_SHI 1746 unsigned long pfn = start >> PAGE_SHIFT; 1594 unsigned long vm_len = vma->vm_end - 1747 unsigned long vm_len = vma->vm_end - vma->vm_start; 1595 1748 1596 pfn += vma->vm_pgoff; 1749 pfn += vma->vm_pgoff; 1597 return io_remap_pfn_range(vma, vma->v 1750 return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); 1598 } 1751 } 1599 EXPORT_SYMBOL(vm_iomap_memory); 1752 EXPORT_SYMBOL(vm_iomap_memory); 1600 1753 1601 int remap_vmalloc_range(struct vm_area_struct 1754 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 1602 unsigned long pgoff) 1755 unsigned long pgoff) 1603 { 1756 { 1604 unsigned int size = vma->vm_end - vma 1757 unsigned int size = vma->vm_end - vma->vm_start; 1605 1758 1606 if (!(vma->vm_flags & VM_USERMAP)) 1759 if (!(vma->vm_flags & VM_USERMAP)) 1607 return -EINVAL; 1760 return -EINVAL; 1608 1761 1609 vma->vm_start = (unsigned long)(addr 1762 vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); 1610 vma->vm_end = vma->vm_start + size; 1763 vma->vm_end = vma->vm_start + size; 1611 1764 1612 return 0; 1765 return 0; 1613 } 1766 } 1614 EXPORT_SYMBOL(remap_vmalloc_range); 1767 EXPORT_SYMBOL(remap_vmalloc_range); 1615 1768 >> 1769 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, >> 1770 unsigned long len, unsigned long pgoff, unsigned long flags) >> 1771 { >> 1772 return -ENOMEM; >> 1773 } >> 1774 1616 vm_fault_t filemap_fault(struct vm_fault *vmf 1775 vm_fault_t filemap_fault(struct vm_fault *vmf) 1617 { 1776 { 1618 BUG(); 1777 BUG(); 1619 return 0; 1778 return 0; 1620 } 1779 } 1621 EXPORT_SYMBOL(filemap_fault); 1780 EXPORT_SYMBOL(filemap_fault); 1622 1781 1623 vm_fault_t filemap_map_pages(struct vm_fault !! 1782 void filemap_map_pages(struct vm_fault *vmf, 1624 pgoff_t start_pgoff, pgoff_t 1783 pgoff_t start_pgoff, pgoff_t end_pgoff) 1625 { 1784 { 1626 BUG(); 1785 BUG(); 1627 return 0; << 1628 } 1786 } 1629 EXPORT_SYMBOL(filemap_map_pages); 1787 EXPORT_SYMBOL(filemap_map_pages); 1630 1788 1631 static int __access_remote_vm(struct mm_struc !! 1789 int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, 1632 void *buf, int !! 1790 unsigned long addr, void *buf, int len, unsigned int gup_flags) 1633 { 1791 { 1634 struct vm_area_struct *vma; 1792 struct vm_area_struct *vma; 1635 int write = gup_flags & FOLL_WRITE; 1793 int write = gup_flags & FOLL_WRITE; 1636 1794 1637 if (mmap_read_lock_killable(mm)) !! 1795 if (down_read_killable(&mm->mmap_sem)) 1638 return 0; 1796 return 0; 1639 1797 1640 /* the access must start within one o 1798 /* the access must start within one of the target process's mappings */ 1641 vma = find_vma(mm, addr); 1799 vma = find_vma(mm, addr); 1642 if (vma) { 1800 if (vma) { 1643 /* don't overrun this mapping 1801 /* don't overrun this mapping */ 1644 if (addr + len >= vma->vm_end 1802 if (addr + len >= vma->vm_end) 1645 len = vma->vm_end - a 1803 len = vma->vm_end - addr; 1646 1804 1647 /* only read or write mapping 1805 /* only read or write mappings where it is permitted */ 1648 if (write && vma->vm_flags & 1806 if (write && vma->vm_flags & VM_MAYWRITE) 1649 copy_to_user_page(vma 1807 copy_to_user_page(vma, NULL, addr, 1650 (voi 1808 (void *) addr, buf, len); 1651 else if (!write && vma->vm_fl 1809 else if (!write && vma->vm_flags & VM_MAYREAD) 1652 copy_from_user_page(v 1810 copy_from_user_page(vma, NULL, addr, 1653 b 1811 buf, (void *) addr, len); 1654 else 1812 else 1655 len = 0; 1813 len = 0; 1656 } else { 1814 } else { 1657 len = 0; 1815 len = 0; 1658 } 1816 } 1659 1817 1660 mmap_read_unlock(mm); !! 1818 up_read(&mm->mmap_sem); 1661 1819 1662 return len; 1820 return len; 1663 } 1821 } 1664 1822 1665 /** 1823 /** 1666 * access_remote_vm - access another process' 1824 * access_remote_vm - access another process' address space 1667 * @mm: the mm_struct of the target a 1825 * @mm: the mm_struct of the target address space 1668 * @addr: start address to access 1826 * @addr: start address to access 1669 * @buf: source or destination buffer 1827 * @buf: source or destination buffer 1670 * @len: number of bytes to transfer 1828 * @len: number of bytes to transfer 1671 * @gup_flags: flags modifying lookup behavi 1829 * @gup_flags: flags modifying lookup behaviour 1672 * 1830 * 1673 * The caller must hold a reference on @mm. 1831 * The caller must hold a reference on @mm. 1674 */ 1832 */ 1675 int access_remote_vm(struct mm_struct *mm, un 1833 int access_remote_vm(struct mm_struct *mm, unsigned long addr, 1676 void *buf, int len, unsigned 1834 void *buf, int len, unsigned int gup_flags) 1677 { 1835 { 1678 return __access_remote_vm(mm, addr, b !! 1836 return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); 1679 } 1837 } 1680 1838 1681 /* 1839 /* 1682 * Access another process' address space. 1840 * Access another process' address space. 1683 * - source/target buffer must be kernel spac 1841 * - source/target buffer must be kernel space 1684 */ 1842 */ 1685 int access_process_vm(struct task_struct *tsk 1843 int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, 1686 unsigned int gup_flags) 1844 unsigned int gup_flags) 1687 { 1845 { 1688 struct mm_struct *mm; 1846 struct mm_struct *mm; 1689 1847 1690 if (addr + len < addr) 1848 if (addr + len < addr) 1691 return 0; 1849 return 0; 1692 1850 1693 mm = get_task_mm(tsk); 1851 mm = get_task_mm(tsk); 1694 if (!mm) 1852 if (!mm) 1695 return 0; 1853 return 0; 1696 1854 1697 len = __access_remote_vm(mm, addr, bu !! 1855 len = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); 1698 1856 1699 mmput(mm); 1857 mmput(mm); 1700 return len; 1858 return len; 1701 } 1859 } 1702 EXPORT_SYMBOL_GPL(access_process_vm); 1860 EXPORT_SYMBOL_GPL(access_process_vm); 1703 1861 1704 /** 1862 /** 1705 * nommu_shrink_inode_mappings - Shrink the s 1863 * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode 1706 * @inode: The inode to check 1864 * @inode: The inode to check 1707 * @size: The current filesize of the inode 1865 * @size: The current filesize of the inode 1708 * @newsize: The proposed filesize of the ino 1866 * @newsize: The proposed filesize of the inode 1709 * 1867 * 1710 * Check the shared mappings on an inode on b 1868 * Check the shared mappings on an inode on behalf of a shrinking truncate to 1711 * make sure that any outstanding VMAs aren't !! 1869 * make sure that that any outstanding VMAs aren't broken and then shrink the 1712 * vm_regions that extend beyond so that do_m !! 1870 * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't 1713 * automatically grant mappings that are too 1871 * automatically grant mappings that are too large. 1714 */ 1872 */ 1715 int nommu_shrink_inode_mappings(struct inode 1873 int nommu_shrink_inode_mappings(struct inode *inode, size_t size, 1716 size_t newsiz 1874 size_t newsize) 1717 { 1875 { 1718 struct vm_area_struct *vma; 1876 struct vm_area_struct *vma; 1719 struct vm_region *region; 1877 struct vm_region *region; 1720 pgoff_t low, high; 1878 pgoff_t low, high; 1721 size_t r_size, r_top; 1879 size_t r_size, r_top; 1722 1880 1723 low = newsize >> PAGE_SHIFT; 1881 low = newsize >> PAGE_SHIFT; 1724 high = (size + PAGE_SIZE - 1) >> PAGE 1882 high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 1725 1883 1726 down_write(&nommu_region_sem); 1884 down_write(&nommu_region_sem); 1727 i_mmap_lock_read(inode->i_mapping); 1885 i_mmap_lock_read(inode->i_mapping); 1728 1886 1729 /* search for VMAs that fall within t 1887 /* search for VMAs that fall within the dead zone */ 1730 vma_interval_tree_foreach(vma, &inode 1888 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) { 1731 /* found one - only intereste 1889 /* found one - only interested if it's shared out of the page 1732 * cache */ 1890 * cache */ 1733 if (vma->vm_flags & VM_SHARED 1891 if (vma->vm_flags & VM_SHARED) { 1734 i_mmap_unlock_read(in 1892 i_mmap_unlock_read(inode->i_mapping); 1735 up_write(&nommu_regio 1893 up_write(&nommu_region_sem); 1736 return -ETXTBSY; /* n 1894 return -ETXTBSY; /* not quite true, but near enough */ 1737 } 1895 } 1738 } 1896 } 1739 1897 1740 /* reduce any regions that overlap th 1898 /* reduce any regions that overlap the dead zone - if in existence, 1741 * these will be pointed to by VMAs t 1899 * these will be pointed to by VMAs that don't overlap the dead zone 1742 * 1900 * 1743 * we don't check for any regions tha 1901 * we don't check for any regions that start beyond the EOF as there 1744 * shouldn't be any 1902 * shouldn't be any 1745 */ 1903 */ 1746 vma_interval_tree_foreach(vma, &inode 1904 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, 0, ULONG_MAX) { 1747 if (!(vma->vm_flags & VM_SHAR 1905 if (!(vma->vm_flags & VM_SHARED)) 1748 continue; 1906 continue; 1749 1907 1750 region = vma->vm_region; 1908 region = vma->vm_region; 1751 r_size = region->vm_top - reg 1909 r_size = region->vm_top - region->vm_start; 1752 r_top = (region->vm_pgoff << 1910 r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size; 1753 1911 1754 if (r_top > newsize) { 1912 if (r_top > newsize) { 1755 region->vm_top -= r_t 1913 region->vm_top -= r_top - newsize; 1756 if (region->vm_end > 1914 if (region->vm_end > region->vm_top) 1757 region->vm_en 1915 region->vm_end = region->vm_top; 1758 } 1916 } 1759 } 1917 } 1760 1918 1761 i_mmap_unlock_read(inode->i_mapping); 1919 i_mmap_unlock_read(inode->i_mapping); 1762 up_write(&nommu_region_sem); 1920 up_write(&nommu_region_sem); 1763 return 0; 1921 return 0; 1764 } 1922 } 1765 1923 1766 /* 1924 /* 1767 * Initialise sysctl_user_reserve_kbytes. 1925 * Initialise sysctl_user_reserve_kbytes. 1768 * 1926 * 1769 * This is intended to prevent a user from st 1927 * This is intended to prevent a user from starting a single memory hogging 1770 * process, such that they cannot recover (ki 1928 * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER 1771 * mode. 1929 * mode. 1772 * 1930 * 1773 * The default value is min(3% of free memory 1931 * The default value is min(3% of free memory, 128MB) 1774 * 128MB is enough to recover with sshd/login 1932 * 128MB is enough to recover with sshd/login, bash, and top/kill. 1775 */ 1933 */ 1776 static int __meminit init_user_reserve(void) 1934 static int __meminit init_user_reserve(void) 1777 { 1935 { 1778 unsigned long free_kbytes; 1936 unsigned long free_kbytes; 1779 1937 1780 free_kbytes = K(global_zone_page_stat !! 1938 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 1781 1939 1782 sysctl_user_reserve_kbytes = min(free 1940 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); 1783 return 0; 1941 return 0; 1784 } 1942 } 1785 subsys_initcall(init_user_reserve); 1943 subsys_initcall(init_user_reserve); 1786 1944 1787 /* 1945 /* 1788 * Initialise sysctl_admin_reserve_kbytes. 1946 * Initialise sysctl_admin_reserve_kbytes. 1789 * 1947 * 1790 * The purpose of sysctl_admin_reserve_kbytes 1948 * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin 1791 * to log in and kill a memory hogging proces 1949 * to log in and kill a memory hogging process. 1792 * 1950 * 1793 * Systems with more than 256MB will reserve 1951 * Systems with more than 256MB will reserve 8MB, enough to recover 1794 * with sshd, bash, and top in OVERCOMMIT_GUE 1952 * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will 1795 * only reserve 3% of free pages by default. 1953 * only reserve 3% of free pages by default. 1796 */ 1954 */ 1797 static int __meminit init_admin_reserve(void) 1955 static int __meminit init_admin_reserve(void) 1798 { 1956 { 1799 unsigned long free_kbytes; 1957 unsigned long free_kbytes; 1800 1958 1801 free_kbytes = K(global_zone_page_stat !! 1959 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 1802 1960 1803 sysctl_admin_reserve_kbytes = min(fre 1961 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); 1804 return 0; 1962 return 0; 1805 } 1963 } 1806 subsys_initcall(init_admin_reserve); 1964 subsys_initcall(init_admin_reserve); 1807 1965
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.