1 // SPDX-License-Identifier: GPL-2.0-only << 2 /* 1 /* 3 * linux/mm/nommu.c 2 * linux/mm/nommu.c 4 * 3 * 5 * Replacement code for mm functions to suppo 4 * Replacement code for mm functions to support CPU's that don't 6 * have any form of memory management unit (t 5 * have any form of memory management unit (thus no virtual memory). 7 * 6 * 8 * See Documentation/admin-guide/mm/nommu-mma !! 7 * See Documentation/nommu-mmap.txt 9 * 8 * 10 * Copyright (c) 2004-2008 David Howells <dho 9 * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com> 11 * Copyright (c) 2000-2003 David McCullough < 10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 12 * Copyright (c) 2000-2001 D Jeff Dionne <jef 11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 13 * Copyright (c) 2002 Greg Ungerer <gerg 12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 14 * Copyright (c) 2007-2010 Paul Mundt <lethal 13 * Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org> 15 */ 14 */ 16 15 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 16 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 17 19 #include <linux/export.h> 18 #include <linux/export.h> 20 #include <linux/mm.h> 19 #include <linux/mm.h> 21 #include <linux/sched/mm.h> 20 #include <linux/sched/mm.h> >> 21 #include <linux/vmacache.h> 22 #include <linux/mman.h> 22 #include <linux/mman.h> 23 #include <linux/swap.h> 23 #include <linux/swap.h> 24 #include <linux/file.h> 24 #include <linux/file.h> 25 #include <linux/highmem.h> 25 #include <linux/highmem.h> 26 #include <linux/pagemap.h> 26 #include <linux/pagemap.h> 27 #include <linux/slab.h> 27 #include <linux/slab.h> 28 #include <linux/vmalloc.h> 28 #include <linux/vmalloc.h> >> 29 #include <linux/blkdev.h> 29 #include <linux/backing-dev.h> 30 #include <linux/backing-dev.h> 30 #include <linux/compiler.h> 31 #include <linux/compiler.h> 31 #include <linux/mount.h> 32 #include <linux/mount.h> 32 #include <linux/personality.h> 33 #include <linux/personality.h> 33 #include <linux/security.h> 34 #include <linux/security.h> 34 #include <linux/syscalls.h> 35 #include <linux/syscalls.h> 35 #include <linux/audit.h> 36 #include <linux/audit.h> 36 #include <linux/printk.h> 37 #include <linux/printk.h> 37 38 38 #include <linux/uaccess.h> 39 #include <linux/uaccess.h> 39 #include <linux/uio.h> << 40 #include <asm/tlb.h> 40 #include <asm/tlb.h> 41 #include <asm/tlbflush.h> 41 #include <asm/tlbflush.h> 42 #include <asm/mmu_context.h> 42 #include <asm/mmu_context.h> 43 #include "internal.h" 43 #include "internal.h" 44 44 45 void *high_memory; 45 void *high_memory; 46 EXPORT_SYMBOL(high_memory); 46 EXPORT_SYMBOL(high_memory); 47 struct page *mem_map; 47 struct page *mem_map; 48 unsigned long max_mapnr; 48 unsigned long max_mapnr; 49 EXPORT_SYMBOL(max_mapnr); 49 EXPORT_SYMBOL(max_mapnr); 50 unsigned long highest_memmap_pfn; 50 unsigned long highest_memmap_pfn; 51 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIA 51 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; 52 int heap_stack_gap = 0; 52 int heap_stack_gap = 0; 53 53 54 atomic_long_t mmap_pages_allocated; 54 atomic_long_t mmap_pages_allocated; 55 55 56 EXPORT_SYMBOL(mem_map); 56 EXPORT_SYMBOL(mem_map); 57 57 58 /* list of mapped, potentially shareable regio 58 /* list of mapped, potentially shareable regions */ 59 static struct kmem_cache *vm_region_jar; 59 static struct kmem_cache *vm_region_jar; 60 struct rb_root nommu_region_tree = RB_ROOT; 60 struct rb_root nommu_region_tree = RB_ROOT; 61 DECLARE_RWSEM(nommu_region_sem); 61 DECLARE_RWSEM(nommu_region_sem); 62 62 63 const struct vm_operations_struct generic_file 63 const struct vm_operations_struct generic_file_vm_ops = { 64 }; 64 }; 65 65 66 /* 66 /* 67 * Return the total memory allocated for this 67 * Return the total memory allocated for this pointer, not 68 * just what the caller asked for. 68 * just what the caller asked for. 69 * 69 * 70 * Doesn't have to be accurate, i.e. may have 70 * Doesn't have to be accurate, i.e. may have races. 71 */ 71 */ 72 unsigned int kobjsize(const void *objp) 72 unsigned int kobjsize(const void *objp) 73 { 73 { 74 struct page *page; 74 struct page *page; 75 75 76 /* 76 /* 77 * If the object we have should not ha 77 * If the object we have should not have ksize performed on it, 78 * return size of 0 78 * return size of 0 79 */ 79 */ 80 if (!objp || !virt_addr_valid(objp)) 80 if (!objp || !virt_addr_valid(objp)) 81 return 0; 81 return 0; 82 82 83 page = virt_to_head_page(objp); 83 page = virt_to_head_page(objp); 84 84 85 /* 85 /* 86 * If the allocator sets PageSlab, we 86 * If the allocator sets PageSlab, we know the pointer came from 87 * kmalloc(). 87 * kmalloc(). 88 */ 88 */ 89 if (PageSlab(page)) 89 if (PageSlab(page)) 90 return ksize(objp); 90 return ksize(objp); 91 91 92 /* 92 /* 93 * If it's not a compound page, see if 93 * If it's not a compound page, see if we have a matching VMA 94 * region. This test is intentionally 94 * region. This test is intentionally done in reverse order, 95 * so if there's no VMA, we still fall 95 * so if there's no VMA, we still fall through and hand back 96 * PAGE_SIZE for 0-order pages. 96 * PAGE_SIZE for 0-order pages. 97 */ 97 */ 98 if (!PageCompound(page)) { 98 if (!PageCompound(page)) { 99 struct vm_area_struct *vma; 99 struct vm_area_struct *vma; 100 100 101 vma = find_vma(current->mm, (u 101 vma = find_vma(current->mm, (unsigned long)objp); 102 if (vma) 102 if (vma) 103 return vma->vm_end - v 103 return vma->vm_end - vma->vm_start; 104 } 104 } 105 105 106 /* 106 /* 107 * The ksize() function is only guaran 107 * The ksize() function is only guaranteed to work for pointers 108 * returned by kmalloc(). So handle ar 108 * returned by kmalloc(). So handle arbitrary pointers here. 109 */ 109 */ 110 return page_size(page); !! 110 return PAGE_SIZE << compound_order(page); 111 } 111 } 112 112 >> 113 static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, >> 114 unsigned long start, unsigned long nr_pages, >> 115 unsigned int foll_flags, struct page **pages, >> 116 struct vm_area_struct **vmas, int *nonblocking) >> 117 { >> 118 struct vm_area_struct *vma; >> 119 unsigned long vm_flags; >> 120 int i; >> 121 >> 122 /* calculate required read or write permissions. >> 123 * If FOLL_FORCE is set, we only require the "MAY" flags. >> 124 */ >> 125 vm_flags = (foll_flags & FOLL_WRITE) ? >> 126 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); >> 127 vm_flags &= (foll_flags & FOLL_FORCE) ? >> 128 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); >> 129 >> 130 for (i = 0; i < nr_pages; i++) { >> 131 vma = find_vma(mm, start); >> 132 if (!vma) >> 133 goto finish_or_fault; >> 134 >> 135 /* protect what we can, including chardevs */ >> 136 if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) || >> 137 !(vm_flags & vma->vm_flags)) >> 138 goto finish_or_fault; >> 139 >> 140 if (pages) { >> 141 pages[i] = virt_to_page(start); >> 142 if (pages[i]) >> 143 get_page(pages[i]); >> 144 } >> 145 if (vmas) >> 146 vmas[i] = vma; >> 147 start = (start + PAGE_SIZE) & PAGE_MASK; >> 148 } >> 149 >> 150 return i; >> 151 >> 152 finish_or_fault: >> 153 return i ? : -EFAULT; >> 154 } >> 155 >> 156 /* >> 157 * get a list of pages in an address range belonging to the specified process >> 158 * and indicate the VMA that covers each page >> 159 * - this is potentially dodgy as we may end incrementing the page count of a >> 160 * slab page or a secondary page from a compound page >> 161 * - don't permit access to VMAs that don't support it, such as I/O mappings >> 162 */ >> 163 long get_user_pages(unsigned long start, unsigned long nr_pages, >> 164 unsigned int gup_flags, struct page **pages, >> 165 struct vm_area_struct **vmas) >> 166 { >> 167 return __get_user_pages(current, current->mm, start, nr_pages, >> 168 gup_flags, pages, vmas, NULL); >> 169 } >> 170 EXPORT_SYMBOL(get_user_pages); >> 171 >> 172 long get_user_pages_locked(unsigned long start, unsigned long nr_pages, >> 173 unsigned int gup_flags, struct page **pages, >> 174 int *locked) >> 175 { >> 176 return get_user_pages(start, nr_pages, gup_flags, pages, NULL); >> 177 } >> 178 EXPORT_SYMBOL(get_user_pages_locked); >> 179 >> 180 static long __get_user_pages_unlocked(struct task_struct *tsk, >> 181 struct mm_struct *mm, unsigned long start, >> 182 unsigned long nr_pages, struct page **pages, >> 183 unsigned int gup_flags) >> 184 { >> 185 long ret; >> 186 down_read(&mm->mmap_sem); >> 187 ret = __get_user_pages(tsk, mm, start, nr_pages, gup_flags, pages, >> 188 NULL, NULL); >> 189 up_read(&mm->mmap_sem); >> 190 return ret; >> 191 } >> 192 >> 193 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, >> 194 struct page **pages, unsigned int gup_flags) >> 195 { >> 196 return __get_user_pages_unlocked(current, current->mm, start, nr_pages, >> 197 pages, gup_flags); >> 198 } >> 199 EXPORT_SYMBOL(get_user_pages_unlocked); >> 200 >> 201 /** >> 202 * follow_pfn - look up PFN at a user virtual address >> 203 * @vma: memory mapping >> 204 * @address: user virtual address >> 205 * @pfn: location to store found PFN >> 206 * >> 207 * Only IO mappings and raw PFN mappings are allowed. >> 208 * >> 209 * Returns zero and the pfn at @pfn on success, -ve otherwise. >> 210 */ >> 211 int follow_pfn(struct vm_area_struct *vma, unsigned long address, >> 212 unsigned long *pfn) >> 213 { >> 214 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) >> 215 return -EINVAL; >> 216 >> 217 *pfn = address >> PAGE_SHIFT; >> 218 return 0; >> 219 } >> 220 EXPORT_SYMBOL(follow_pfn); >> 221 >> 222 LIST_HEAD(vmap_area_list); >> 223 113 void vfree(const void *addr) 224 void vfree(const void *addr) 114 { 225 { 115 kfree(addr); 226 kfree(addr); 116 } 227 } 117 EXPORT_SYMBOL(vfree); 228 EXPORT_SYMBOL(vfree); 118 229 119 void *__vmalloc_noprof(unsigned long size, gfp !! 230 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) 120 { 231 { 121 /* 232 /* 122 * You can't specify __GFP_HIGHMEM wi 233 * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() 123 * returns only a logical address. 234 * returns only a logical address. 124 */ 235 */ 125 return kmalloc_noprof(size, (gfp_mask !! 236 return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); 126 } 237 } 127 EXPORT_SYMBOL(__vmalloc_noprof); !! 238 EXPORT_SYMBOL(__vmalloc); 128 239 129 void *vrealloc_noprof(const void *p, size_t si !! 240 void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags) 130 { 241 { 131 return krealloc_noprof(p, size, (flags !! 242 return __vmalloc(size, flags, PAGE_KERNEL); 132 } 243 } 133 244 134 void *__vmalloc_node_range_noprof(unsigned lon !! 245 void *vmalloc_user(unsigned long size) 135 unsigned long start, unsigned << 136 pgprot_t prot, unsigned long v << 137 const void *caller) << 138 { << 139 return __vmalloc_noprof(size, gfp_mask << 140 } << 141 << 142 void *__vmalloc_node_noprof(unsigned long size << 143 int node, const void *caller) << 144 { << 145 return __vmalloc_noprof(size, gfp_mask << 146 } << 147 << 148 static void *__vmalloc_user_flags(unsigned lon << 149 { 246 { 150 void *ret; 247 void *ret; 151 248 152 ret = __vmalloc(size, flags); !! 249 ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 153 if (ret) { 250 if (ret) { 154 struct vm_area_struct *vma; 251 struct vm_area_struct *vma; 155 252 156 mmap_write_lock(current->mm); !! 253 down_write(¤t->mm->mmap_sem); 157 vma = find_vma(current->mm, (u 254 vma = find_vma(current->mm, (unsigned long)ret); 158 if (vma) 255 if (vma) 159 vm_flags_set(vma, VM_U !! 256 vma->vm_flags |= VM_USERMAP; 160 mmap_write_unlock(current->mm) !! 257 up_write(¤t->mm->mmap_sem); 161 } 258 } 162 259 163 return ret; 260 return ret; 164 } 261 } 165 !! 262 EXPORT_SYMBOL(vmalloc_user); 166 void *vmalloc_user_noprof(unsigned long size) << 167 { << 168 return __vmalloc_user_flags(size, GFP_ << 169 } << 170 EXPORT_SYMBOL(vmalloc_user_noprof); << 171 263 172 struct page *vmalloc_to_page(const void *addr) 264 struct page *vmalloc_to_page(const void *addr) 173 { 265 { 174 return virt_to_page(addr); 266 return virt_to_page(addr); 175 } 267 } 176 EXPORT_SYMBOL(vmalloc_to_page); 268 EXPORT_SYMBOL(vmalloc_to_page); 177 269 178 unsigned long vmalloc_to_pfn(const void *addr) 270 unsigned long vmalloc_to_pfn(const void *addr) 179 { 271 { 180 return page_to_pfn(virt_to_page(addr)) 272 return page_to_pfn(virt_to_page(addr)); 181 } 273 } 182 EXPORT_SYMBOL(vmalloc_to_pfn); 274 EXPORT_SYMBOL(vmalloc_to_pfn); 183 275 184 long vread_iter(struct iov_iter *iter, const c !! 276 long vread(char *buf, char *addr, unsigned long count) >> 277 { >> 278 /* Don't allow overflow */ >> 279 if ((unsigned long) buf + count < count) >> 280 count = -(unsigned long) buf; >> 281 >> 282 memcpy(buf, addr, count); >> 283 return count; >> 284 } >> 285 >> 286 long vwrite(char *buf, char *addr, unsigned long count) 185 { 287 { 186 /* Don't allow overflow */ 288 /* Don't allow overflow */ 187 if ((unsigned long) addr + count < cou 289 if ((unsigned long) addr + count < count) 188 count = -(unsigned long) addr; 290 count = -(unsigned long) addr; 189 291 190 return copy_to_iter(addr, count, iter) !! 292 memcpy(addr, buf, count); >> 293 return count; 191 } 294 } 192 295 193 /* 296 /* 194 * vmalloc - allocate virtually contigu 297 * vmalloc - allocate virtually contiguous memory 195 * 298 * 196 * @size: allocation size 299 * @size: allocation size 197 * 300 * 198 * Allocate enough pages to cover @size f 301 * Allocate enough pages to cover @size from the page level 199 * allocator and map them into contiguous 302 * allocator and map them into contiguous kernel virtual space. 200 * 303 * 201 * For tight control over page level allo 304 * For tight control over page level allocator and protection flags 202 * use __vmalloc() instead. 305 * use __vmalloc() instead. 203 */ 306 */ 204 void *vmalloc_noprof(unsigned long size) !! 307 void *vmalloc(unsigned long size) 205 { 308 { 206 return __vmalloc_noprof(size, GFP_KERN !! 309 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); 207 } 310 } 208 EXPORT_SYMBOL(vmalloc_noprof); !! 311 EXPORT_SYMBOL(vmalloc); 209 << 210 void *vmalloc_huge_noprof(unsigned long size, << 211 312 212 /* 313 /* 213 * vzalloc - allocate virtually contiguou 314 * vzalloc - allocate virtually contiguous memory with zero fill 214 * 315 * 215 * @size: allocation size 316 * @size: allocation size 216 * 317 * 217 * Allocate enough pages to cover @size f 318 * Allocate enough pages to cover @size from the page level 218 * allocator and map them into contiguous 319 * allocator and map them into contiguous kernel virtual space. 219 * The memory allocated is set to zero. 320 * The memory allocated is set to zero. 220 * 321 * 221 * For tight control over page level allo 322 * For tight control over page level allocator and protection flags 222 * use __vmalloc() instead. 323 * use __vmalloc() instead. 223 */ 324 */ 224 void *vzalloc_noprof(unsigned long size) !! 325 void *vzalloc(unsigned long size) 225 { 326 { 226 return __vmalloc_noprof(size, GFP_KERN !! 327 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, >> 328 PAGE_KERNEL); 227 } 329 } 228 EXPORT_SYMBOL(vzalloc_noprof); !! 330 EXPORT_SYMBOL(vzalloc); 229 331 230 /** 332 /** 231 * vmalloc_node - allocate memory on a specifi 333 * vmalloc_node - allocate memory on a specific node 232 * @size: allocation size 334 * @size: allocation size 233 * @node: numa node 335 * @node: numa node 234 * 336 * 235 * Allocate enough pages to cover @size from t 337 * Allocate enough pages to cover @size from the page level 236 * allocator and map them into contiguous kern 338 * allocator and map them into contiguous kernel virtual space. 237 * 339 * 238 * For tight control over page level allocator 340 * For tight control over page level allocator and protection flags 239 * use __vmalloc() instead. 341 * use __vmalloc() instead. 240 */ 342 */ 241 void *vmalloc_node_noprof(unsigned long size, !! 343 void *vmalloc_node(unsigned long size, int node) 242 { 344 { 243 return vmalloc_noprof(size); !! 345 return vmalloc(size); 244 } 346 } 245 EXPORT_SYMBOL(vmalloc_node_noprof); !! 347 EXPORT_SYMBOL(vmalloc_node); 246 348 247 /** 349 /** 248 * vzalloc_node - allocate memory on a specifi 350 * vzalloc_node - allocate memory on a specific node with zero fill 249 * @size: allocation size 351 * @size: allocation size 250 * @node: numa node 352 * @node: numa node 251 * 353 * 252 * Allocate enough pages to cover @size from t 354 * Allocate enough pages to cover @size from the page level 253 * allocator and map them into contiguous kern 355 * allocator and map them into contiguous kernel virtual space. 254 * The memory allocated is set to zero. 356 * The memory allocated is set to zero. 255 * 357 * 256 * For tight control over page level allocator 358 * For tight control over page level allocator and protection flags 257 * use __vmalloc() instead. 359 * use __vmalloc() instead. 258 */ 360 */ 259 void *vzalloc_node_noprof(unsigned long size, !! 361 void *vzalloc_node(unsigned long size, int node) 260 { 362 { 261 return vzalloc_noprof(size); !! 363 return vzalloc(size); >> 364 } >> 365 EXPORT_SYMBOL(vzalloc_node); >> 366 >> 367 /** >> 368 * vmalloc_exec - allocate virtually contiguous, executable memory >> 369 * @size: allocation size >> 370 * >> 371 * Kernel-internal function to allocate enough pages to cover @size >> 372 * the page level allocator and map them into contiguous and >> 373 * executable kernel virtual space. >> 374 * >> 375 * For tight control over page level allocator and protection flags >> 376 * use __vmalloc() instead. >> 377 */ >> 378 >> 379 void *vmalloc_exec(unsigned long size) >> 380 { >> 381 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); 262 } 382 } 263 EXPORT_SYMBOL(vzalloc_node_noprof); << 264 383 265 /** 384 /** 266 * vmalloc_32 - allocate virtually contiguou 385 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 267 * @size: allocation size 386 * @size: allocation size 268 * 387 * 269 * Allocate enough 32bit PA addressable p 388 * Allocate enough 32bit PA addressable pages to cover @size from the 270 * page level allocator and map them into 389 * page level allocator and map them into contiguous kernel virtual space. 271 */ 390 */ 272 void *vmalloc_32_noprof(unsigned long size) !! 391 void *vmalloc_32(unsigned long size) 273 { 392 { 274 return __vmalloc_noprof(size, GFP_KERN !! 393 return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); 275 } 394 } 276 EXPORT_SYMBOL(vmalloc_32_noprof); !! 395 EXPORT_SYMBOL(vmalloc_32); 277 396 278 /** 397 /** 279 * vmalloc_32_user - allocate zeroed virtually 398 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory 280 * @size: allocation size 399 * @size: allocation size 281 * 400 * 282 * The resulting memory area is 32bit addressa 401 * The resulting memory area is 32bit addressable and zeroed so it can be 283 * mapped to userspace without leaking data. 402 * mapped to userspace without leaking data. 284 * 403 * 285 * VM_USERMAP is set on the corresponding VMA 404 * VM_USERMAP is set on the corresponding VMA so that subsequent calls to 286 * remap_vmalloc_range() are permissible. 405 * remap_vmalloc_range() are permissible. 287 */ 406 */ 288 void *vmalloc_32_user_noprof(unsigned long siz !! 407 void *vmalloc_32_user(unsigned long size) 289 { 408 { 290 /* 409 /* 291 * We'll have to sort out the ZONE_DMA 410 * We'll have to sort out the ZONE_DMA bits for 64-bit, 292 * but for now this can simply use vma 411 * but for now this can simply use vmalloc_user() directly. 293 */ 412 */ 294 return vmalloc_user_noprof(size); !! 413 return vmalloc_user(size); 295 } 414 } 296 EXPORT_SYMBOL(vmalloc_32_user_noprof); !! 415 EXPORT_SYMBOL(vmalloc_32_user); 297 416 298 void *vmap(struct page **pages, unsigned int c 417 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) 299 { 418 { 300 BUG(); 419 BUG(); 301 return NULL; 420 return NULL; 302 } 421 } 303 EXPORT_SYMBOL(vmap); 422 EXPORT_SYMBOL(vmap); 304 423 305 void vunmap(const void *addr) 424 void vunmap(const void *addr) 306 { 425 { 307 BUG(); 426 BUG(); 308 } 427 } 309 EXPORT_SYMBOL(vunmap); 428 EXPORT_SYMBOL(vunmap); 310 429 311 void *vm_map_ram(struct page **pages, unsigned !! 430 void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) 312 { 431 { 313 BUG(); 432 BUG(); 314 return NULL; 433 return NULL; 315 } 434 } 316 EXPORT_SYMBOL(vm_map_ram); 435 EXPORT_SYMBOL(vm_map_ram); 317 436 318 void vm_unmap_ram(const void *mem, unsigned in 437 void vm_unmap_ram(const void *mem, unsigned int count) 319 { 438 { 320 BUG(); 439 BUG(); 321 } 440 } 322 EXPORT_SYMBOL(vm_unmap_ram); 441 EXPORT_SYMBOL(vm_unmap_ram); 323 442 324 void vm_unmap_aliases(void) 443 void vm_unmap_aliases(void) 325 { 444 { 326 } 445 } 327 EXPORT_SYMBOL_GPL(vm_unmap_aliases); 446 EXPORT_SYMBOL_GPL(vm_unmap_aliases); 328 447 329 void free_vm_area(struct vm_struct *area) !! 448 /* >> 449 * Implement a stub for vmalloc_sync_all() if the architecture chose not to >> 450 * have one. >> 451 */ >> 452 void __weak vmalloc_sync_all(void) 330 { 453 { 331 BUG(); << 332 } 454 } 333 EXPORT_SYMBOL_GPL(free_vm_area); << 334 455 335 int vm_insert_page(struct vm_area_struct *vma, !! 456 struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) 336 struct page *page) << 337 { 457 { 338 return -EINVAL; !! 458 BUG(); 339 } !! 459 return NULL; 340 EXPORT_SYMBOL(vm_insert_page); << 341 << 342 int vm_insert_pages(struct vm_area_struct *vma << 343 struct page **pages, u << 344 { << 345 return -EINVAL; << 346 } 460 } 347 EXPORT_SYMBOL(vm_insert_pages); !! 461 EXPORT_SYMBOL_GPL(alloc_vm_area); 348 462 349 int vm_map_pages(struct vm_area_struct *vma, s !! 463 void free_vm_area(struct vm_struct *area) 350 unsigned long num) << 351 { 464 { 352 return -EINVAL; !! 465 BUG(); 353 } 466 } 354 EXPORT_SYMBOL(vm_map_pages); !! 467 EXPORT_SYMBOL_GPL(free_vm_area); 355 468 356 int vm_map_pages_zero(struct vm_area_struct *v !! 469 int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 357 unsigned long !! 470 struct page *page) 358 { 471 { 359 return -EINVAL; 472 return -EINVAL; 360 } 473 } 361 EXPORT_SYMBOL(vm_map_pages_zero); !! 474 EXPORT_SYMBOL(vm_insert_page); 362 475 363 /* 476 /* 364 * sys_brk() for the most part doesn't need t 477 * sys_brk() for the most part doesn't need the global kernel 365 * lock, except when an application is doing 478 * lock, except when an application is doing something nasty 366 * like trying to un-brk an area that has alr 479 * like trying to un-brk an area that has already been mapped 367 * to a regular file. in this case, the unma 480 * to a regular file. in this case, the unmapping will need 368 * to invoke file system routines that need t 481 * to invoke file system routines that need the global lock. 369 */ 482 */ 370 SYSCALL_DEFINE1(brk, unsigned long, brk) 483 SYSCALL_DEFINE1(brk, unsigned long, brk) 371 { 484 { 372 struct mm_struct *mm = current->mm; 485 struct mm_struct *mm = current->mm; 373 486 374 if (brk < mm->start_brk || brk > mm->c 487 if (brk < mm->start_brk || brk > mm->context.end_brk) 375 return mm->brk; 488 return mm->brk; 376 489 377 if (mm->brk == brk) 490 if (mm->brk == brk) 378 return mm->brk; 491 return mm->brk; 379 492 380 /* 493 /* 381 * Always allow shrinking brk 494 * Always allow shrinking brk 382 */ 495 */ 383 if (brk <= mm->brk) { 496 if (brk <= mm->brk) { 384 mm->brk = brk; 497 mm->brk = brk; 385 return brk; 498 return brk; 386 } 499 } 387 500 388 /* 501 /* 389 * Ok, looks good - let it rip. 502 * Ok, looks good - let it rip. 390 */ 503 */ 391 flush_icache_user_range(mm->brk, brk); !! 504 flush_icache_range(mm->brk, brk); 392 return mm->brk = brk; 505 return mm->brk = brk; 393 } 506 } 394 507 395 /* 508 /* 396 * initialise the percpu counter for VM and re 509 * initialise the percpu counter for VM and region record slabs 397 */ 510 */ 398 void __init mmap_init(void) 511 void __init mmap_init(void) 399 { 512 { 400 int ret; 513 int ret; 401 514 402 ret = percpu_counter_init(&vm_committe 515 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); 403 VM_BUG_ON(ret); 516 VM_BUG_ON(ret); 404 vm_region_jar = KMEM_CACHE(vm_region, 517 vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT); 405 } 518 } 406 519 407 /* 520 /* 408 * validate the region tree 521 * validate the region tree 409 * - the caller must hold the region lock 522 * - the caller must hold the region lock 410 */ 523 */ 411 #ifdef CONFIG_DEBUG_NOMMU_REGIONS 524 #ifdef CONFIG_DEBUG_NOMMU_REGIONS 412 static noinline void validate_nommu_regions(vo 525 static noinline void validate_nommu_regions(void) 413 { 526 { 414 struct vm_region *region, *last; 527 struct vm_region *region, *last; 415 struct rb_node *p, *lastp; 528 struct rb_node *p, *lastp; 416 529 417 lastp = rb_first(&nommu_region_tree); 530 lastp = rb_first(&nommu_region_tree); 418 if (!lastp) 531 if (!lastp) 419 return; 532 return; 420 533 421 last = rb_entry(lastp, struct vm_regio 534 last = rb_entry(lastp, struct vm_region, vm_rb); 422 BUG_ON(last->vm_end <= last->vm_start) 535 BUG_ON(last->vm_end <= last->vm_start); 423 BUG_ON(last->vm_top < last->vm_end); 536 BUG_ON(last->vm_top < last->vm_end); 424 537 425 while ((p = rb_next(lastp))) { 538 while ((p = rb_next(lastp))) { 426 region = rb_entry(p, struct vm 539 region = rb_entry(p, struct vm_region, vm_rb); 427 last = rb_entry(lastp, struct 540 last = rb_entry(lastp, struct vm_region, vm_rb); 428 541 429 BUG_ON(region->vm_end <= regio 542 BUG_ON(region->vm_end <= region->vm_start); 430 BUG_ON(region->vm_top < region 543 BUG_ON(region->vm_top < region->vm_end); 431 BUG_ON(region->vm_start < last 544 BUG_ON(region->vm_start < last->vm_top); 432 545 433 lastp = p; 546 lastp = p; 434 } 547 } 435 } 548 } 436 #else 549 #else 437 static void validate_nommu_regions(void) 550 static void validate_nommu_regions(void) 438 { 551 { 439 } 552 } 440 #endif 553 #endif 441 554 442 /* 555 /* 443 * add a region into the global tree 556 * add a region into the global tree 444 */ 557 */ 445 static void add_nommu_region(struct vm_region 558 static void add_nommu_region(struct vm_region *region) 446 { 559 { 447 struct vm_region *pregion; 560 struct vm_region *pregion; 448 struct rb_node **p, *parent; 561 struct rb_node **p, *parent; 449 562 450 validate_nommu_regions(); 563 validate_nommu_regions(); 451 564 452 parent = NULL; 565 parent = NULL; 453 p = &nommu_region_tree.rb_node; 566 p = &nommu_region_tree.rb_node; 454 while (*p) { 567 while (*p) { 455 parent = *p; 568 parent = *p; 456 pregion = rb_entry(parent, str 569 pregion = rb_entry(parent, struct vm_region, vm_rb); 457 if (region->vm_start < pregion 570 if (region->vm_start < pregion->vm_start) 458 p = &(*p)->rb_left; 571 p = &(*p)->rb_left; 459 else if (region->vm_start > pr 572 else if (region->vm_start > pregion->vm_start) 460 p = &(*p)->rb_right; 573 p = &(*p)->rb_right; 461 else if (pregion == region) 574 else if (pregion == region) 462 return; 575 return; 463 else 576 else 464 BUG(); 577 BUG(); 465 } 578 } 466 579 467 rb_link_node(®ion->vm_rb, parent, p 580 rb_link_node(®ion->vm_rb, parent, p); 468 rb_insert_color(®ion->vm_rb, &nommu 581 rb_insert_color(®ion->vm_rb, &nommu_region_tree); 469 582 470 validate_nommu_regions(); 583 validate_nommu_regions(); 471 } 584 } 472 585 473 /* 586 /* 474 * delete a region from the global tree 587 * delete a region from the global tree 475 */ 588 */ 476 static void delete_nommu_region(struct vm_regi 589 static void delete_nommu_region(struct vm_region *region) 477 { 590 { 478 BUG_ON(!nommu_region_tree.rb_node); 591 BUG_ON(!nommu_region_tree.rb_node); 479 592 480 validate_nommu_regions(); 593 validate_nommu_regions(); 481 rb_erase(®ion->vm_rb, &nommu_region 594 rb_erase(®ion->vm_rb, &nommu_region_tree); 482 validate_nommu_regions(); 595 validate_nommu_regions(); 483 } 596 } 484 597 485 /* 598 /* 486 * free a contiguous series of pages 599 * free a contiguous series of pages 487 */ 600 */ 488 static void free_page_series(unsigned long fro 601 static void free_page_series(unsigned long from, unsigned long to) 489 { 602 { 490 for (; from < to; from += PAGE_SIZE) { 603 for (; from < to; from += PAGE_SIZE) { 491 struct page *page = virt_to_pa !! 604 struct page *page = virt_to_page(from); 492 605 493 atomic_long_dec(&mmap_pages_al 606 atomic_long_dec(&mmap_pages_allocated); 494 put_page(page); 607 put_page(page); 495 } 608 } 496 } 609 } 497 610 498 /* 611 /* 499 * release a reference to a region 612 * release a reference to a region 500 * - the caller must hold the region semaphore 613 * - the caller must hold the region semaphore for writing, which this releases 501 * - the region may not have been added to the 614 * - the region may not have been added to the tree yet, in which case vm_top 502 * will equal vm_start 615 * will equal vm_start 503 */ 616 */ 504 static void __put_nommu_region(struct vm_regio 617 static void __put_nommu_region(struct vm_region *region) 505 __releases(nommu_region_sem) 618 __releases(nommu_region_sem) 506 { 619 { 507 BUG_ON(!nommu_region_tree.rb_node); 620 BUG_ON(!nommu_region_tree.rb_node); 508 621 509 if (--region->vm_usage == 0) { 622 if (--region->vm_usage == 0) { 510 if (region->vm_top > region->v 623 if (region->vm_top > region->vm_start) 511 delete_nommu_region(re 624 delete_nommu_region(region); 512 up_write(&nommu_region_sem); 625 up_write(&nommu_region_sem); 513 626 514 if (region->vm_file) 627 if (region->vm_file) 515 fput(region->vm_file); 628 fput(region->vm_file); 516 629 517 /* IO memory and memory shared 630 /* IO memory and memory shared directly out of the pagecache 518 * from ramfs/tmpfs mustn't be 631 * from ramfs/tmpfs mustn't be released here */ 519 if (region->vm_flags & VM_MAPP 632 if (region->vm_flags & VM_MAPPED_COPY) 520 free_page_series(regio 633 free_page_series(region->vm_start, region->vm_top); 521 kmem_cache_free(vm_region_jar, 634 kmem_cache_free(vm_region_jar, region); 522 } else { 635 } else { 523 up_write(&nommu_region_sem); 636 up_write(&nommu_region_sem); 524 } 637 } 525 } 638 } 526 639 527 /* 640 /* 528 * release a reference to a region 641 * release a reference to a region 529 */ 642 */ 530 static void put_nommu_region(struct vm_region 643 static void put_nommu_region(struct vm_region *region) 531 { 644 { 532 down_write(&nommu_region_sem); 645 down_write(&nommu_region_sem); 533 __put_nommu_region(region); 646 __put_nommu_region(region); 534 } 647 } 535 648 536 static void setup_vma_to_mm(struct vm_area_str !! 649 /* 537 { !! 650 * add a VMA into a process's mm_struct in the appropriate place in the list >> 651 * and tree and add to the address space's page tree also if not an anonymous >> 652 * page >> 653 * - should be called with mm->mmap_sem held writelocked >> 654 */ >> 655 static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) >> 656 { >> 657 struct vm_area_struct *pvma, *prev; >> 658 struct address_space *mapping; >> 659 struct rb_node **p, *parent, *rb_prev; >> 660 >> 661 BUG_ON(!vma->vm_region); >> 662 >> 663 mm->map_count++; 538 vma->vm_mm = mm; 664 vma->vm_mm = mm; 539 665 540 /* add the VMA to the mapping */ 666 /* add the VMA to the mapping */ 541 if (vma->vm_file) { 667 if (vma->vm_file) { 542 struct address_space *mapping !! 668 mapping = vma->vm_file->f_mapping; 543 669 544 i_mmap_lock_write(mapping); 670 i_mmap_lock_write(mapping); 545 flush_dcache_mmap_lock(mapping 671 flush_dcache_mmap_lock(mapping); 546 vma_interval_tree_insert(vma, 672 vma_interval_tree_insert(vma, &mapping->i_mmap); 547 flush_dcache_mmap_unlock(mappi 673 flush_dcache_mmap_unlock(mapping); 548 i_mmap_unlock_write(mapping); 674 i_mmap_unlock_write(mapping); 549 } 675 } >> 676 >> 677 /* add the VMA to the tree */ >> 678 parent = rb_prev = NULL; >> 679 p = &mm->mm_rb.rb_node; >> 680 while (*p) { >> 681 parent = *p; >> 682 pvma = rb_entry(parent, struct vm_area_struct, vm_rb); >> 683 >> 684 /* sort by: start addr, end addr, VMA struct addr in that order >> 685 * (the latter is necessary as we may get identical VMAs) */ >> 686 if (vma->vm_start < pvma->vm_start) >> 687 p = &(*p)->rb_left; >> 688 else if (vma->vm_start > pvma->vm_start) { >> 689 rb_prev = parent; >> 690 p = &(*p)->rb_right; >> 691 } else if (vma->vm_end < pvma->vm_end) >> 692 p = &(*p)->rb_left; >> 693 else if (vma->vm_end > pvma->vm_end) { >> 694 rb_prev = parent; >> 695 p = &(*p)->rb_right; >> 696 } else if (vma < pvma) >> 697 p = &(*p)->rb_left; >> 698 else if (vma > pvma) { >> 699 rb_prev = parent; >> 700 p = &(*p)->rb_right; >> 701 } else >> 702 BUG(); >> 703 } >> 704 >> 705 rb_link_node(&vma->vm_rb, parent, p); >> 706 rb_insert_color(&vma->vm_rb, &mm->mm_rb); >> 707 >> 708 /* add VMA to the VMA list also */ >> 709 prev = NULL; >> 710 if (rb_prev) >> 711 prev = rb_entry(rb_prev, struct vm_area_struct, vm_rb); >> 712 >> 713 __vma_link_list(mm, vma, prev, parent); 550 } 714 } 551 715 552 static void cleanup_vma_from_mm(struct vm_area !! 716 /* >> 717 * delete a VMA from its owning mm_struct and address space >> 718 */ >> 719 static void delete_vma_from_mm(struct vm_area_struct *vma) 553 { 720 { 554 vma->vm_mm->map_count--; !! 721 int i; >> 722 struct address_space *mapping; >> 723 struct mm_struct *mm = vma->vm_mm; >> 724 struct task_struct *curr = current; >> 725 >> 726 mm->map_count--; >> 727 for (i = 0; i < VMACACHE_SIZE; i++) { >> 728 /* if the vma is cached, invalidate the entire cache */ >> 729 if (curr->vmacache.vmas[i] == vma) { >> 730 vmacache_invalidate(mm); >> 731 break; >> 732 } >> 733 } >> 734 555 /* remove the VMA from the mapping */ 735 /* remove the VMA from the mapping */ 556 if (vma->vm_file) { 736 if (vma->vm_file) { 557 struct address_space *mapping; << 558 mapping = vma->vm_file->f_mapp 737 mapping = vma->vm_file->f_mapping; 559 738 560 i_mmap_lock_write(mapping); 739 i_mmap_lock_write(mapping); 561 flush_dcache_mmap_lock(mapping 740 flush_dcache_mmap_lock(mapping); 562 vma_interval_tree_remove(vma, 741 vma_interval_tree_remove(vma, &mapping->i_mmap); 563 flush_dcache_mmap_unlock(mappi 742 flush_dcache_mmap_unlock(mapping); 564 i_mmap_unlock_write(mapping); 743 i_mmap_unlock_write(mapping); 565 } 744 } 566 } << 567 745 568 /* !! 746 /* remove from the MM's tree and list */ 569 * delete a VMA from its owning mm_struct and !! 747 rb_erase(&vma->vm_rb, &mm->mm_rb); 570 */ << 571 static int delete_vma_from_mm(struct vm_area_s << 572 { << 573 VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_ << 574 748 575 vma_iter_config(&vmi, vma->vm_start, v !! 749 if (vma->vm_prev) 576 if (vma_iter_prealloc(&vmi, vma)) { !! 750 vma->vm_prev->vm_next = vma->vm_next; 577 pr_warn("Allocation of vma tre !! 751 else 578 current->pid); !! 752 mm->mmap = vma->vm_next; 579 return -ENOMEM; << 580 } << 581 cleanup_vma_from_mm(vma); << 582 753 583 /* remove from the MM's tree and list !! 754 if (vma->vm_next) 584 vma_iter_clear(&vmi); !! 755 vma->vm_next->vm_prev = vma->vm_prev; 585 return 0; << 586 } 756 } >> 757 587 /* 758 /* 588 * destroy a VMA record 759 * destroy a VMA record 589 */ 760 */ 590 static void delete_vma(struct mm_struct *mm, s 761 static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) 591 { 762 { 592 vma_close(vma); !! 763 if (vma->vm_ops && vma->vm_ops->close) >> 764 vma->vm_ops->close(vma); 593 if (vma->vm_file) 765 if (vma->vm_file) 594 fput(vma->vm_file); 766 fput(vma->vm_file); 595 put_nommu_region(vma->vm_region); 767 put_nommu_region(vma->vm_region); 596 vm_area_free(vma); 768 vm_area_free(vma); 597 } 769 } 598 770 599 struct vm_area_struct *find_vma_intersection(s << 600 u << 601 u << 602 { << 603 unsigned long index = start_addr; << 604 << 605 mmap_assert_locked(mm); << 606 return mt_find(&mm->mm_mt, &index, end << 607 } << 608 EXPORT_SYMBOL(find_vma_intersection); << 609 << 610 /* 771 /* 611 * look up the first VMA in which addr resides 772 * look up the first VMA in which addr resides, NULL if none 612 * - should be called with mm->mmap_lock at le !! 773 * - should be called with mm->mmap_sem at least held readlocked 613 */ 774 */ 614 struct vm_area_struct *find_vma(struct mm_stru 775 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 615 { 776 { 616 VMA_ITERATOR(vmi, mm, addr); !! 777 struct vm_area_struct *vma; >> 778 >> 779 /* check the cache first */ >> 780 vma = vmacache_find(mm, addr); >> 781 if (likely(vma)) >> 782 return vma; >> 783 >> 784 /* trawl the list (there may be multiple mappings in which addr >> 785 * resides) */ >> 786 for (vma = mm->mmap; vma; vma = vma->vm_next) { >> 787 if (vma->vm_start > addr) >> 788 return NULL; >> 789 if (vma->vm_end > addr) { >> 790 vmacache_update(addr, vma); >> 791 return vma; >> 792 } >> 793 } 617 794 618 return vma_iter_load(&vmi); !! 795 return NULL; 619 } 796 } 620 EXPORT_SYMBOL(find_vma); 797 EXPORT_SYMBOL(find_vma); 621 798 622 /* 799 /* 623 * At least xtensa ends up having protection f !! 800 * find a VMA 624 * MMU.. No stack expansion, at least. !! 801 * - we don't extend stack VMAs under NOMMU conditions 625 */ 802 */ 626 struct vm_area_struct *lock_mm_and_find_vma(st !! 803 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 627 unsigned long addr, st << 628 { 804 { 629 struct vm_area_struct *vma; !! 805 return find_vma(mm, addr); 630 << 631 mmap_read_lock(mm); << 632 vma = vma_lookup(mm, addr); << 633 if (!vma) << 634 mmap_read_unlock(mm); << 635 return vma; << 636 } 806 } 637 807 638 /* 808 /* 639 * expand a stack to a given address 809 * expand a stack to a given address 640 * - not supported under NOMMU conditions 810 * - not supported under NOMMU conditions 641 */ 811 */ 642 int expand_stack_locked(struct vm_area_struct !! 812 int expand_stack(struct vm_area_struct *vma, unsigned long address) 643 { 813 { 644 return -ENOMEM; 814 return -ENOMEM; 645 } 815 } 646 816 647 struct vm_area_struct *expand_stack(struct mm_ << 648 { << 649 mmap_read_unlock(mm); << 650 return NULL; << 651 } << 652 << 653 /* 817 /* 654 * look up the first VMA exactly that exactly 818 * look up the first VMA exactly that exactly matches addr 655 * - should be called with mm->mmap_lock at le !! 819 * - should be called with mm->mmap_sem at least held readlocked 656 */ 820 */ 657 static struct vm_area_struct *find_vma_exact(s 821 static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 658 u 822 unsigned long addr, 659 u 823 unsigned long len) 660 { 824 { 661 struct vm_area_struct *vma; 825 struct vm_area_struct *vma; 662 unsigned long end = addr + len; 826 unsigned long end = addr + len; 663 VMA_ITERATOR(vmi, mm, addr); << 664 827 665 vma = vma_iter_load(&vmi); !! 828 /* check the cache first */ 666 if (!vma) !! 829 vma = vmacache_find_exact(mm, addr, end); 667 return NULL; !! 830 if (vma) 668 if (vma->vm_start != addr) !! 831 return vma; 669 return NULL; !! 832 670 if (vma->vm_end != end) !! 833 /* trawl the list (there may be multiple mappings in which addr 671 return NULL; !! 834 * resides) */ >> 835 for (vma = mm->mmap; vma; vma = vma->vm_next) { >> 836 if (vma->vm_start < addr) >> 837 continue; >> 838 if (vma->vm_start > addr) >> 839 return NULL; >> 840 if (vma->vm_end == end) { >> 841 vmacache_update(addr, vma); >> 842 return vma; >> 843 } >> 844 } 672 845 673 return vma; !! 846 return NULL; 674 } 847 } 675 848 676 /* 849 /* 677 * determine whether a mapping should be permi 850 * determine whether a mapping should be permitted and, if so, what sort of 678 * mapping we're capable of supporting 851 * mapping we're capable of supporting 679 */ 852 */ 680 static int validate_mmap_request(struct file * 853 static int validate_mmap_request(struct file *file, 681 unsigned long 854 unsigned long addr, 682 unsigned long 855 unsigned long len, 683 unsigned long 856 unsigned long prot, 684 unsigned long 857 unsigned long flags, 685 unsigned long 858 unsigned long pgoff, 686 unsigned long 859 unsigned long *_capabilities) 687 { 860 { 688 unsigned long capabilities, rlen; 861 unsigned long capabilities, rlen; 689 int ret; 862 int ret; 690 863 691 /* do the simple checks first */ 864 /* do the simple checks first */ 692 if (flags & MAP_FIXED) 865 if (flags & MAP_FIXED) 693 return -EINVAL; 866 return -EINVAL; 694 867 695 if ((flags & MAP_TYPE) != MAP_PRIVATE 868 if ((flags & MAP_TYPE) != MAP_PRIVATE && 696 (flags & MAP_TYPE) != MAP_SHARED) 869 (flags & MAP_TYPE) != MAP_SHARED) 697 return -EINVAL; 870 return -EINVAL; 698 871 699 if (!len) 872 if (!len) 700 return -EINVAL; 873 return -EINVAL; 701 874 702 /* Careful about overflows.. */ 875 /* Careful about overflows.. */ 703 rlen = PAGE_ALIGN(len); 876 rlen = PAGE_ALIGN(len); 704 if (!rlen || rlen > TASK_SIZE) 877 if (!rlen || rlen > TASK_SIZE) 705 return -ENOMEM; 878 return -ENOMEM; 706 879 707 /* offset overflow? */ 880 /* offset overflow? */ 708 if ((pgoff + (rlen >> PAGE_SHIFT)) < p 881 if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff) 709 return -EOVERFLOW; 882 return -EOVERFLOW; 710 883 711 if (file) { 884 if (file) { 712 /* files must support mmap */ 885 /* files must support mmap */ 713 if (!file->f_op->mmap) 886 if (!file->f_op->mmap) 714 return -ENODEV; 887 return -ENODEV; 715 888 716 /* work out if what we've got 889 /* work out if what we've got could possibly be shared 717 * - we support chardevs that 890 * - we support chardevs that provide their own "memory" 718 * - we support files/blockdev 891 * - we support files/blockdevs that are memory backed 719 */ 892 */ 720 if (file->f_op->mmap_capabilit 893 if (file->f_op->mmap_capabilities) { 721 capabilities = file->f 894 capabilities = file->f_op->mmap_capabilities(file); 722 } else { 895 } else { 723 /* no explicit capabil 896 /* no explicit capabilities set, so assume some 724 * defaults */ 897 * defaults */ 725 switch (file_inode(fil 898 switch (file_inode(file)->i_mode & S_IFMT) { 726 case S_IFREG: 899 case S_IFREG: 727 case S_IFBLK: 900 case S_IFBLK: 728 capabilities = 901 capabilities = NOMMU_MAP_COPY; 729 break; 902 break; 730 903 731 case S_IFCHR: 904 case S_IFCHR: 732 capabilities = 905 capabilities = 733 NOMMU_ 906 NOMMU_MAP_DIRECT | 734 NOMMU_ 907 NOMMU_MAP_READ | 735 NOMMU_ 908 NOMMU_MAP_WRITE; 736 break; 909 break; 737 910 738 default: 911 default: 739 return -EINVAL 912 return -EINVAL; 740 } 913 } 741 } 914 } 742 915 743 /* eliminate any capabilities 916 /* eliminate any capabilities that we can't support on this 744 * device */ 917 * device */ 745 if (!file->f_op->get_unmapped_ 918 if (!file->f_op->get_unmapped_area) 746 capabilities &= ~NOMMU 919 capabilities &= ~NOMMU_MAP_DIRECT; 747 if (!(file->f_mode & FMODE_CAN 920 if (!(file->f_mode & FMODE_CAN_READ)) 748 capabilities &= ~NOMMU 921 capabilities &= ~NOMMU_MAP_COPY; 749 922 750 /* The file shall have been op 923 /* The file shall have been opened with read permission. */ 751 if (!(file->f_mode & FMODE_REA 924 if (!(file->f_mode & FMODE_READ)) 752 return -EACCES; 925 return -EACCES; 753 926 754 if (flags & MAP_SHARED) { 927 if (flags & MAP_SHARED) { 755 /* do checks for writi 928 /* do checks for writing, appending and locking */ 756 if ((prot & PROT_WRITE 929 if ((prot & PROT_WRITE) && 757 !(file->f_mode & F 930 !(file->f_mode & FMODE_WRITE)) 758 return -EACCES 931 return -EACCES; 759 932 760 if (IS_APPEND(file_ino 933 if (IS_APPEND(file_inode(file)) && 761 (file->f_mode & FM 934 (file->f_mode & FMODE_WRITE)) 762 return -EACCES 935 return -EACCES; 763 936 >> 937 if (locks_verify_locked(file)) >> 938 return -EAGAIN; >> 939 764 if (!(capabilities & N 940 if (!(capabilities & NOMMU_MAP_DIRECT)) 765 return -ENODEV 941 return -ENODEV; 766 942 767 /* we mustn't privatis 943 /* we mustn't privatise shared mappings */ 768 capabilities &= ~NOMMU 944 capabilities &= ~NOMMU_MAP_COPY; 769 } else { 945 } else { 770 /* we're going to read 946 /* we're going to read the file into private memory we 771 * allocate */ 947 * allocate */ 772 if (!(capabilities & N 948 if (!(capabilities & NOMMU_MAP_COPY)) 773 return -ENODEV 949 return -ENODEV; 774 950 775 /* we don't permit a p 951 /* we don't permit a private writable mapping to be 776 * shared with the bac 952 * shared with the backing device */ 777 if (prot & PROT_WRITE) 953 if (prot & PROT_WRITE) 778 capabilities & 954 capabilities &= ~NOMMU_MAP_DIRECT; 779 } 955 } 780 956 781 if (capabilities & NOMMU_MAP_D 957 if (capabilities & NOMMU_MAP_DIRECT) { 782 if (((prot & PROT_READ 958 if (((prot & PROT_READ) && !(capabilities & NOMMU_MAP_READ)) || 783 ((prot & PROT_WRIT 959 ((prot & PROT_WRITE) && !(capabilities & NOMMU_MAP_WRITE)) || 784 ((prot & PROT_EXEC 960 ((prot & PROT_EXEC) && !(capabilities & NOMMU_MAP_EXEC)) 785 ) { 961 ) { 786 capabilities & 962 capabilities &= ~NOMMU_MAP_DIRECT; 787 if (flags & MA 963 if (flags & MAP_SHARED) { 788 pr_war 964 pr_warn("MAP_SHARED not completely supported on !MMU\n"); 789 return 965 return -EINVAL; 790 } 966 } 791 } 967 } 792 } 968 } 793 969 794 /* handle executable mappings 970 /* handle executable mappings and implied executable 795 * mappings */ 971 * mappings */ 796 if (path_noexec(&file->f_path) 972 if (path_noexec(&file->f_path)) { 797 if (prot & PROT_EXEC) 973 if (prot & PROT_EXEC) 798 return -EPERM; 974 return -EPERM; 799 } else if ((prot & PROT_READ) 975 } else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { 800 /* handle implication 976 /* handle implication of PROT_EXEC by PROT_READ */ 801 if (current->personali 977 if (current->personality & READ_IMPLIES_EXEC) { 802 if (capabiliti 978 if (capabilities & NOMMU_MAP_EXEC) 803 prot | 979 prot |= PROT_EXEC; 804 } 980 } 805 } else if ((prot & PROT_READ) 981 } else if ((prot & PROT_READ) && 806 (prot & PROT_EXEC) && 982 (prot & PROT_EXEC) && 807 !(capabilities & NOMM 983 !(capabilities & NOMMU_MAP_EXEC) 808 ) { 984 ) { 809 /* backing file is not 985 /* backing file is not executable, try to copy */ 810 capabilities &= ~NOMMU 986 capabilities &= ~NOMMU_MAP_DIRECT; 811 } 987 } 812 } else { 988 } else { 813 /* anonymous mappings are alwa 989 /* anonymous mappings are always memory backed and can be 814 * privately mapped 990 * privately mapped 815 */ 991 */ 816 capabilities = NOMMU_MAP_COPY; 992 capabilities = NOMMU_MAP_COPY; 817 993 818 /* handle PROT_EXEC implicatio 994 /* handle PROT_EXEC implication by PROT_READ */ 819 if ((prot & PROT_READ) && 995 if ((prot & PROT_READ) && 820 (current->personality & RE 996 (current->personality & READ_IMPLIES_EXEC)) 821 prot |= PROT_EXEC; 997 prot |= PROT_EXEC; 822 } 998 } 823 999 824 /* allow the security API to have its 1000 /* allow the security API to have its say */ 825 ret = security_mmap_addr(addr); 1001 ret = security_mmap_addr(addr); 826 if (ret < 0) 1002 if (ret < 0) 827 return ret; 1003 return ret; 828 1004 829 /* looks okay */ 1005 /* looks okay */ 830 *_capabilities = capabilities; 1006 *_capabilities = capabilities; 831 return 0; 1007 return 0; 832 } 1008 } 833 1009 834 /* 1010 /* 835 * we've determined that we can make the mappi 1011 * we've determined that we can make the mapping, now translate what we 836 * now know into VMA flags 1012 * now know into VMA flags 837 */ 1013 */ 838 static unsigned long determine_vm_flags(struct 1014 static unsigned long determine_vm_flags(struct file *file, 839 unsign 1015 unsigned long prot, 840 unsign 1016 unsigned long flags, 841 unsign 1017 unsigned long capabilities) 842 { 1018 { 843 unsigned long vm_flags; 1019 unsigned long vm_flags; 844 1020 845 vm_flags = calc_vm_prot_bits(prot, 0) !! 1021 vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags); >> 1022 /* vm_flags |= mm->def_flags; */ 846 1023 847 if (!file) { !! 1024 if (!(capabilities & NOMMU_MAP_DIRECT)) { 848 /* !! 1025 /* attempt to share read-only copies of mapped file chunks */ 849 * MAP_ANONYMOUS. MAP_SHARED i << 850 * there is no fork(). << 851 */ << 852 vm_flags |= VM_MAYREAD | VM_MA 1026 vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 853 } else if (flags & MAP_PRIVATE) { !! 1027 if (file && !(prot & PROT_WRITE)) 854 /* MAP_PRIVATE file mapping */ !! 1028 vm_flags |= VM_MAYSHARE; 855 if (capabilities & NOMMU_MAP_D << 856 vm_flags |= (capabilit << 857 else << 858 vm_flags |= VM_MAYREAD << 859 << 860 if (!(prot & PROT_WRITE) && !c << 861 /* << 862 * R/O private file ma << 863 * modify memory, espe << 864 * (e.g., set breakpoi << 865 * permissions (no mpr << 866 * the file mapping, w << 867 * ramfs/tmpfs/shmfs a << 868 */ << 869 vm_flags |= VM_MAYOVER << 870 } else { 1029 } else { 871 /* MAP_SHARED file mapping: NO !! 1030 /* overlay a shareable mapping on the backing device or inode 872 vm_flags |= VM_SHARED | VM_MAY !! 1031 * if possible - used for chardevs, ramfs/tmpfs/shmfs and 873 (capabilities & NO !! 1032 * romfs/cramfs */ >> 1033 vm_flags |= VM_MAYSHARE | (capabilities & NOMMU_VMFLAGS); >> 1034 if (flags & MAP_SHARED) >> 1035 vm_flags |= VM_SHARED; 874 } 1036 } 875 1037 >> 1038 /* refuse to let anyone share private mappings with this process if >> 1039 * it's being traced - otherwise breakpoints set in it may interfere >> 1040 * with another untraced process >> 1041 */ >> 1042 if ((flags & MAP_PRIVATE) && current->ptrace) >> 1043 vm_flags &= ~VM_MAYSHARE; >> 1044 876 return vm_flags; 1045 return vm_flags; 877 } 1046 } 878 1047 879 /* 1048 /* 880 * set up a shared mapping on a file (the driv 1049 * set up a shared mapping on a file (the driver or filesystem provides and 881 * pins the storage) 1050 * pins the storage) 882 */ 1051 */ 883 static int do_mmap_shared_file(struct vm_area_ 1052 static int do_mmap_shared_file(struct vm_area_struct *vma) 884 { 1053 { 885 int ret; 1054 int ret; 886 1055 887 ret = mmap_file(vma->vm_file, vma); !! 1056 ret = call_mmap(vma->vm_file, vma); 888 if (ret == 0) { 1057 if (ret == 0) { 889 vma->vm_region->vm_top = vma-> 1058 vma->vm_region->vm_top = vma->vm_region->vm_end; 890 return 0; 1059 return 0; 891 } 1060 } 892 if (ret != -ENOSYS) 1061 if (ret != -ENOSYS) 893 return ret; 1062 return ret; 894 1063 895 /* getting -ENOSYS indicates that dire 1064 /* getting -ENOSYS indicates that direct mmap isn't possible (as 896 * opposed to tried but failed) so we 1065 * opposed to tried but failed) so we can only give a suitable error as 897 * it's not possible to make a private 1066 * it's not possible to make a private copy if MAP_SHARED was given */ 898 return -ENODEV; 1067 return -ENODEV; 899 } 1068 } 900 1069 901 /* 1070 /* 902 * set up a private mapping or an anonymous sh 1071 * set up a private mapping or an anonymous shared mapping 903 */ 1072 */ 904 static int do_mmap_private(struct vm_area_stru 1073 static int do_mmap_private(struct vm_area_struct *vma, 905 struct vm_region *r 1074 struct vm_region *region, 906 unsigned long len, 1075 unsigned long len, 907 unsigned long capab 1076 unsigned long capabilities) 908 { 1077 { 909 unsigned long total, point; 1078 unsigned long total, point; 910 void *base; 1079 void *base; 911 int ret, order; 1080 int ret, order; 912 1081 913 /* !! 1082 /* invoke the file's mapping function so that it can keep track of 914 * Invoke the file's mapping function !! 1083 * shared mappings on devices or memory 915 * shared mappings on devices or memor !! 1084 * - VM_MAYSHARE will be set if it may attempt to share 916 * it may attempt to share, which will << 917 * happy. << 918 */ 1085 */ 919 if (capabilities & NOMMU_MAP_DIRECT) { 1086 if (capabilities & NOMMU_MAP_DIRECT) { 920 ret = mmap_file(vma->vm_file, !! 1087 ret = call_mmap(vma->vm_file, vma); 921 /* shouldn't return success if << 922 if (WARN_ON_ONCE(!is_nommu_sha << 923 ret = -ENOSYS; << 924 if (ret == 0) { 1088 if (ret == 0) { >> 1089 /* shouldn't return success if we're not sharing */ >> 1090 BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); 925 vma->vm_region->vm_top 1091 vma->vm_region->vm_top = vma->vm_region->vm_end; 926 return 0; 1092 return 0; 927 } 1093 } 928 if (ret != -ENOSYS) 1094 if (ret != -ENOSYS) 929 return ret; 1095 return ret; 930 1096 931 /* getting an ENOSYS error ind 1097 /* getting an ENOSYS error indicates that direct mmap isn't 932 * possible (as opposed to tri 1098 * possible (as opposed to tried but failed) so we'll try to 933 * make a private copy of the 1099 * make a private copy of the data and map that instead */ 934 } 1100 } 935 1101 936 1102 937 /* allocate some memory to hold the ma 1103 /* allocate some memory to hold the mapping 938 * - note that this may not return a p 1104 * - note that this may not return a page-aligned address if the object 939 * we're allocating is smaller than 1105 * we're allocating is smaller than a page 940 */ 1106 */ 941 order = get_order(len); 1107 order = get_order(len); 942 total = 1 << order; 1108 total = 1 << order; 943 point = len >> PAGE_SHIFT; 1109 point = len >> PAGE_SHIFT; 944 1110 945 /* we don't want to allocate a power-o 1111 /* we don't want to allocate a power-of-2 sized page set */ 946 if (sysctl_nr_trim_pages && total - po 1112 if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) 947 total = point; 1113 total = point; 948 1114 949 base = alloc_pages_exact(total << PAGE 1115 base = alloc_pages_exact(total << PAGE_SHIFT, GFP_KERNEL); 950 if (!base) 1116 if (!base) 951 goto enomem; 1117 goto enomem; 952 1118 953 atomic_long_add(total, &mmap_pages_all 1119 atomic_long_add(total, &mmap_pages_allocated); 954 1120 955 vm_flags_set(vma, VM_MAPPED_COPY); !! 1121 region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; 956 region->vm_flags = vma->vm_flags; << 957 region->vm_start = (unsigned long) bas 1122 region->vm_start = (unsigned long) base; 958 region->vm_end = region->vm_start + 1123 region->vm_end = region->vm_start + len; 959 region->vm_top = region->vm_start + 1124 region->vm_top = region->vm_start + (total << PAGE_SHIFT); 960 1125 961 vma->vm_start = region->vm_start; 1126 vma->vm_start = region->vm_start; 962 vma->vm_end = region->vm_start + len 1127 vma->vm_end = region->vm_start + len; 963 1128 964 if (vma->vm_file) { 1129 if (vma->vm_file) { 965 /* read the contents of a file 1130 /* read the contents of a file into the copy */ 966 loff_t fpos; 1131 loff_t fpos; 967 1132 968 fpos = vma->vm_pgoff; 1133 fpos = vma->vm_pgoff; 969 fpos <<= PAGE_SHIFT; 1134 fpos <<= PAGE_SHIFT; 970 1135 971 ret = kernel_read(vma->vm_file 1136 ret = kernel_read(vma->vm_file, base, len, &fpos); 972 if (ret < 0) 1137 if (ret < 0) 973 goto error_free; 1138 goto error_free; 974 1139 975 /* clear the last little bit * 1140 /* clear the last little bit */ 976 if (ret < len) 1141 if (ret < len) 977 memset(base + ret, 0, 1142 memset(base + ret, 0, len - ret); 978 1143 979 } else { 1144 } else { 980 vma_set_anonymous(vma); 1145 vma_set_anonymous(vma); 981 } 1146 } 982 1147 983 return 0; 1148 return 0; 984 1149 985 error_free: 1150 error_free: 986 free_page_series(region->vm_start, reg 1151 free_page_series(region->vm_start, region->vm_top); 987 region->vm_start = vma->vm_start = 0; 1152 region->vm_start = vma->vm_start = 0; 988 region->vm_end = vma->vm_end = 0; 1153 region->vm_end = vma->vm_end = 0; 989 region->vm_top = 0; 1154 region->vm_top = 0; 990 return ret; 1155 return ret; 991 1156 992 enomem: 1157 enomem: 993 pr_err("Allocation of length %lu from 1158 pr_err("Allocation of length %lu from process %d (%s) failed\n", 994 len, current->pid, current->com 1159 len, current->pid, current->comm); 995 show_mem(); !! 1160 show_free_areas(0, NULL); 996 return -ENOMEM; 1161 return -ENOMEM; 997 } 1162 } 998 1163 999 /* 1164 /* 1000 * handle mapping creation for uClinux 1165 * handle mapping creation for uClinux 1001 */ 1166 */ 1002 unsigned long do_mmap(struct file *file, 1167 unsigned long do_mmap(struct file *file, 1003 unsigned long addr, 1168 unsigned long addr, 1004 unsigned long len, 1169 unsigned long len, 1005 unsigned long prot, 1170 unsigned long prot, 1006 unsigned long flags, 1171 unsigned long flags, 1007 vm_flags_t vm_flags, 1172 vm_flags_t vm_flags, 1008 unsigned long pgoff, 1173 unsigned long pgoff, 1009 unsigned long *popula 1174 unsigned long *populate, 1010 struct list_head *uf) 1175 struct list_head *uf) 1011 { 1176 { 1012 struct vm_area_struct *vma; 1177 struct vm_area_struct *vma; 1013 struct vm_region *region; 1178 struct vm_region *region; 1014 struct rb_node *rb; 1179 struct rb_node *rb; 1015 unsigned long capabilities, result; 1180 unsigned long capabilities, result; 1016 int ret; 1181 int ret; 1017 VMA_ITERATOR(vmi, current->mm, 0); << 1018 1182 1019 *populate = 0; 1183 *populate = 0; 1020 1184 1021 /* decide whether we should attempt t 1185 /* decide whether we should attempt the mapping, and if so what sort of 1022 * mapping */ 1186 * mapping */ 1023 ret = validate_mmap_request(file, add 1187 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 1024 &capabili 1188 &capabilities); 1025 if (ret < 0) 1189 if (ret < 0) 1026 return ret; 1190 return ret; 1027 1191 1028 /* we ignore the address hint */ 1192 /* we ignore the address hint */ 1029 addr = 0; 1193 addr = 0; 1030 len = PAGE_ALIGN(len); 1194 len = PAGE_ALIGN(len); 1031 1195 1032 /* we've determined that we can make 1196 /* we've determined that we can make the mapping, now translate what we 1033 * now know into VMA flags */ 1197 * now know into VMA flags */ 1034 vm_flags |= determine_vm_flags(file, 1198 vm_flags |= determine_vm_flags(file, prot, flags, capabilities); 1035 1199 1036 << 1037 /* we're going to need to record the 1200 /* we're going to need to record the mapping */ 1038 region = kmem_cache_zalloc(vm_region_ 1201 region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); 1039 if (!region) 1202 if (!region) 1040 goto error_getting_region; 1203 goto error_getting_region; 1041 1204 1042 vma = vm_area_alloc(current->mm); 1205 vma = vm_area_alloc(current->mm); 1043 if (!vma) 1206 if (!vma) 1044 goto error_getting_vma; 1207 goto error_getting_vma; 1045 1208 1046 region->vm_usage = 1; 1209 region->vm_usage = 1; 1047 region->vm_flags = vm_flags; 1210 region->vm_flags = vm_flags; 1048 region->vm_pgoff = pgoff; 1211 region->vm_pgoff = pgoff; 1049 1212 1050 vm_flags_init(vma, vm_flags); !! 1213 vma->vm_flags = vm_flags; 1051 vma->vm_pgoff = pgoff; 1214 vma->vm_pgoff = pgoff; 1052 1215 1053 if (file) { 1216 if (file) { 1054 region->vm_file = get_file(fi 1217 region->vm_file = get_file(file); 1055 vma->vm_file = get_file(file) 1218 vma->vm_file = get_file(file); 1056 } 1219 } 1057 1220 1058 down_write(&nommu_region_sem); 1221 down_write(&nommu_region_sem); 1059 1222 1060 /* if we want to share, we need to ch 1223 /* if we want to share, we need to check for regions created by other 1061 * mmap() calls that overlap with our 1224 * mmap() calls that overlap with our proposed mapping 1062 * - we can only share with a superse 1225 * - we can only share with a superset match on most regular files 1063 * - shared mappings on character dev 1226 * - shared mappings on character devices and memory backed files are 1064 * permitted to overlap inexactly a 1227 * permitted to overlap inexactly as far as we are concerned for in 1065 * these cases, sharing is handled 1228 * these cases, sharing is handled in the driver or filesystem rather 1066 * than here 1229 * than here 1067 */ 1230 */ 1068 if (is_nommu_shared_mapping(vm_flags) !! 1231 if (vm_flags & VM_MAYSHARE) { 1069 struct vm_region *pregion; 1232 struct vm_region *pregion; 1070 unsigned long pglen, rpglen, 1233 unsigned long pglen, rpglen, pgend, rpgend, start; 1071 1234 1072 pglen = (len + PAGE_SIZE - 1) 1235 pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1073 pgend = pgoff + pglen; 1236 pgend = pgoff + pglen; 1074 1237 1075 for (rb = rb_first(&nommu_reg 1238 for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { 1076 pregion = rb_entry(rb 1239 pregion = rb_entry(rb, struct vm_region, vm_rb); 1077 1240 1078 if (!is_nommu_shared_ !! 1241 if (!(pregion->vm_flags & VM_MAYSHARE)) 1079 continue; 1242 continue; 1080 1243 1081 /* search for overlap 1244 /* search for overlapping mappings on the same file */ 1082 if (file_inode(pregio 1245 if (file_inode(pregion->vm_file) != 1083 file_inode(file)) 1246 file_inode(file)) 1084 continue; 1247 continue; 1085 1248 1086 if (pregion->vm_pgoff 1249 if (pregion->vm_pgoff >= pgend) 1087 continue; 1250 continue; 1088 1251 1089 rpglen = pregion->vm_ 1252 rpglen = pregion->vm_end - pregion->vm_start; 1090 rpglen = (rpglen + PA 1253 rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT; 1091 rpgend = pregion->vm_ 1254 rpgend = pregion->vm_pgoff + rpglen; 1092 if (pgoff >= rpgend) 1255 if (pgoff >= rpgend) 1093 continue; 1256 continue; 1094 1257 1095 /* handle inexactly o 1258 /* handle inexactly overlapping matches between 1096 * mappings */ 1259 * mappings */ 1097 if ((pregion->vm_pgof 1260 if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && 1098 !(pgoff >= pregio 1261 !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { 1099 /* new mappin 1262 /* new mapping is not a subset of the region */ 1100 if (!(capabil 1263 if (!(capabilities & NOMMU_MAP_DIRECT)) 1101 goto 1264 goto sharing_violation; 1102 continue; 1265 continue; 1103 } 1266 } 1104 1267 1105 /* we've found a regi 1268 /* we've found a region we can share */ 1106 pregion->vm_usage++; 1269 pregion->vm_usage++; 1107 vma->vm_region = preg 1270 vma->vm_region = pregion; 1108 start = pregion->vm_s 1271 start = pregion->vm_start; 1109 start += (pgoff - pre 1272 start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; 1110 vma->vm_start = start 1273 vma->vm_start = start; 1111 vma->vm_end = start + 1274 vma->vm_end = start + len; 1112 1275 1113 if (pregion->vm_flags 1276 if (pregion->vm_flags & VM_MAPPED_COPY) 1114 vm_flags_set( !! 1277 vma->vm_flags |= VM_MAPPED_COPY; 1115 else { 1278 else { 1116 ret = do_mmap 1279 ret = do_mmap_shared_file(vma); 1117 if (ret < 0) 1280 if (ret < 0) { 1118 vma-> 1281 vma->vm_region = NULL; 1119 vma-> 1282 vma->vm_start = 0; 1120 vma-> 1283 vma->vm_end = 0; 1121 pregi 1284 pregion->vm_usage--; 1122 pregi 1285 pregion = NULL; 1123 goto 1286 goto error_just_free; 1124 } 1287 } 1125 } 1288 } 1126 fput(region->vm_file) 1289 fput(region->vm_file); 1127 kmem_cache_free(vm_re 1290 kmem_cache_free(vm_region_jar, region); 1128 region = pregion; 1291 region = pregion; 1129 result = start; 1292 result = start; 1130 goto share; 1293 goto share; 1131 } 1294 } 1132 1295 1133 /* obtain the address at whic 1296 /* obtain the address at which to make a shared mapping 1134 * - this is the hook for qua 1297 * - this is the hook for quasi-memory character devices to 1135 * tell us the location of 1298 * tell us the location of a shared mapping 1136 */ 1299 */ 1137 if (capabilities & NOMMU_MAP_ 1300 if (capabilities & NOMMU_MAP_DIRECT) { 1138 addr = file->f_op->ge 1301 addr = file->f_op->get_unmapped_area(file, addr, len, 1139 1302 pgoff, flags); 1140 if (IS_ERR_VALUE(addr 1303 if (IS_ERR_VALUE(addr)) { 1141 ret = addr; 1304 ret = addr; 1142 if (ret != -E 1305 if (ret != -ENOSYS) 1143 goto 1306 goto error_just_free; 1144 1307 1145 /* the driver 1308 /* the driver refused to tell us where to site 1146 * the mappin 1309 * the mapping so we'll have to attempt to copy 1147 * it */ 1310 * it */ 1148 ret = -ENODEV 1311 ret = -ENODEV; 1149 if (!(capabil 1312 if (!(capabilities & NOMMU_MAP_COPY)) 1150 goto 1313 goto error_just_free; 1151 1314 1152 capabilities 1315 capabilities &= ~NOMMU_MAP_DIRECT; 1153 } else { 1316 } else { 1154 vma->vm_start 1317 vma->vm_start = region->vm_start = addr; 1155 vma->vm_end = 1318 vma->vm_end = region->vm_end = addr + len; 1156 } 1319 } 1157 } 1320 } 1158 } 1321 } 1159 1322 1160 vma->vm_region = region; 1323 vma->vm_region = region; 1161 1324 1162 /* set up the mapping 1325 /* set up the mapping 1163 * - the region is filled in if NOMMU 1326 * - the region is filled in if NOMMU_MAP_DIRECT is still set 1164 */ 1327 */ 1165 if (file && vma->vm_flags & VM_SHARED 1328 if (file && vma->vm_flags & VM_SHARED) 1166 ret = do_mmap_shared_file(vma 1329 ret = do_mmap_shared_file(vma); 1167 else 1330 else 1168 ret = do_mmap_private(vma, re 1331 ret = do_mmap_private(vma, region, len, capabilities); 1169 if (ret < 0) 1332 if (ret < 0) 1170 goto error_just_free; 1333 goto error_just_free; 1171 add_nommu_region(region); 1334 add_nommu_region(region); 1172 1335 1173 /* clear anonymous mappings that don' 1336 /* clear anonymous mappings that don't ask for uninitialized data */ 1174 if (!vma->vm_file && !! 1337 if (!vma->vm_file && !(flags & MAP_UNINITIALIZED)) 1175 (!IS_ENABLED(CONFIG_MMAP_ALLOW_UN << 1176 !(flags & MAP_UNINITIALIZED))) << 1177 memset((void *)region->vm_sta 1338 memset((void *)region->vm_start, 0, 1178 region->vm_end - regio 1339 region->vm_end - region->vm_start); 1179 1340 1180 /* okay... we have a mapping; now we 1341 /* okay... we have a mapping; now we have to register it */ 1181 result = vma->vm_start; 1342 result = vma->vm_start; 1182 1343 1183 current->mm->total_vm += len >> PAGE_ 1344 current->mm->total_vm += len >> PAGE_SHIFT; 1184 1345 1185 share: 1346 share: 1186 BUG_ON(!vma->vm_region); !! 1347 add_vma_to_mm(current->mm, vma); 1187 vma_iter_config(&vmi, vma->vm_start, << 1188 if (vma_iter_prealloc(&vmi, vma)) << 1189 goto error_just_free; << 1190 << 1191 setup_vma_to_mm(vma, current->mm); << 1192 current->mm->map_count++; << 1193 /* add the VMA to the tree */ << 1194 vma_iter_store(&vmi, vma); << 1195 1348 1196 /* we flush the region from the icach 1349 /* we flush the region from the icache only when the first executable 1197 * mapping of it is made */ 1350 * mapping of it is made */ 1198 if (vma->vm_flags & VM_EXEC && !regio 1351 if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { 1199 flush_icache_user_range(regio !! 1352 flush_icache_range(region->vm_start, region->vm_end); 1200 region->vm_icache_flushed = t 1353 region->vm_icache_flushed = true; 1201 } 1354 } 1202 1355 1203 up_write(&nommu_region_sem); 1356 up_write(&nommu_region_sem); 1204 1357 1205 return result; 1358 return result; 1206 1359 1207 error_just_free: 1360 error_just_free: 1208 up_write(&nommu_region_sem); 1361 up_write(&nommu_region_sem); 1209 error: 1362 error: 1210 vma_iter_free(&vmi); << 1211 if (region->vm_file) 1363 if (region->vm_file) 1212 fput(region->vm_file); 1364 fput(region->vm_file); 1213 kmem_cache_free(vm_region_jar, region 1365 kmem_cache_free(vm_region_jar, region); 1214 if (vma->vm_file) 1366 if (vma->vm_file) 1215 fput(vma->vm_file); 1367 fput(vma->vm_file); 1216 vm_area_free(vma); 1368 vm_area_free(vma); 1217 return ret; 1369 return ret; 1218 1370 1219 sharing_violation: 1371 sharing_violation: 1220 up_write(&nommu_region_sem); 1372 up_write(&nommu_region_sem); 1221 pr_warn("Attempt to share mismatched 1373 pr_warn("Attempt to share mismatched mappings\n"); 1222 ret = -EINVAL; 1374 ret = -EINVAL; 1223 goto error; 1375 goto error; 1224 1376 1225 error_getting_vma: 1377 error_getting_vma: 1226 kmem_cache_free(vm_region_jar, region 1378 kmem_cache_free(vm_region_jar, region); 1227 pr_warn("Allocation of vma for %lu by 1379 pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n", 1228 len, current->pid); 1380 len, current->pid); 1229 show_mem(); !! 1381 show_free_areas(0, NULL); 1230 return -ENOMEM; 1382 return -ENOMEM; 1231 1383 1232 error_getting_region: 1384 error_getting_region: 1233 pr_warn("Allocation of vm region for 1385 pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n", 1234 len, current->pid); 1386 len, current->pid); 1235 show_mem(); !! 1387 show_free_areas(0, NULL); 1236 return -ENOMEM; 1388 return -ENOMEM; 1237 } 1389 } 1238 1390 1239 unsigned long ksys_mmap_pgoff(unsigned long a 1391 unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, 1240 unsigned long p 1392 unsigned long prot, unsigned long flags, 1241 unsigned long f 1393 unsigned long fd, unsigned long pgoff) 1242 { 1394 { 1243 struct file *file = NULL; 1395 struct file *file = NULL; 1244 unsigned long retval = -EBADF; 1396 unsigned long retval = -EBADF; 1245 1397 1246 audit_mmap_fd(fd, flags); 1398 audit_mmap_fd(fd, flags); 1247 if (!(flags & MAP_ANONYMOUS)) { 1399 if (!(flags & MAP_ANONYMOUS)) { 1248 file = fget(fd); 1400 file = fget(fd); 1249 if (!file) 1401 if (!file) 1250 goto out; 1402 goto out; 1251 } 1403 } 1252 1404 >> 1405 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); >> 1406 1253 retval = vm_mmap_pgoff(file, addr, le 1407 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); 1254 1408 1255 if (file) 1409 if (file) 1256 fput(file); 1410 fput(file); 1257 out: 1411 out: 1258 return retval; 1412 return retval; 1259 } 1413 } 1260 1414 1261 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, ad 1415 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, 1262 unsigned long, prot, unsigned 1416 unsigned long, prot, unsigned long, flags, 1263 unsigned long, fd, unsigned l 1417 unsigned long, fd, unsigned long, pgoff) 1264 { 1418 { 1265 return ksys_mmap_pgoff(addr, len, pro 1419 return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); 1266 } 1420 } 1267 1421 1268 #ifdef __ARCH_WANT_SYS_OLD_MMAP 1422 #ifdef __ARCH_WANT_SYS_OLD_MMAP 1269 struct mmap_arg_struct { 1423 struct mmap_arg_struct { 1270 unsigned long addr; 1424 unsigned long addr; 1271 unsigned long len; 1425 unsigned long len; 1272 unsigned long prot; 1426 unsigned long prot; 1273 unsigned long flags; 1427 unsigned long flags; 1274 unsigned long fd; 1428 unsigned long fd; 1275 unsigned long offset; 1429 unsigned long offset; 1276 }; 1430 }; 1277 1431 1278 SYSCALL_DEFINE1(old_mmap, struct mmap_arg_str 1432 SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) 1279 { 1433 { 1280 struct mmap_arg_struct a; 1434 struct mmap_arg_struct a; 1281 1435 1282 if (copy_from_user(&a, arg, sizeof(a) 1436 if (copy_from_user(&a, arg, sizeof(a))) 1283 return -EFAULT; 1437 return -EFAULT; 1284 if (offset_in_page(a.offset)) 1438 if (offset_in_page(a.offset)) 1285 return -EINVAL; 1439 return -EINVAL; 1286 1440 1287 return ksys_mmap_pgoff(a.addr, a.len, 1441 return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, 1288 a.offset >> PA 1442 a.offset >> PAGE_SHIFT); 1289 } 1443 } 1290 #endif /* __ARCH_WANT_SYS_OLD_MMAP */ 1444 #endif /* __ARCH_WANT_SYS_OLD_MMAP */ 1291 1445 1292 /* 1446 /* 1293 * split a vma into two pieces at address 'ad 1447 * split a vma into two pieces at address 'addr', a new vma is allocated either 1294 * for the first part or the tail. 1448 * for the first part or the tail. 1295 */ 1449 */ 1296 static int split_vma(struct vma_iterator *vmi !! 1450 int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, 1297 unsigned long addr, int !! 1451 unsigned long addr, int new_below) 1298 { 1452 { 1299 struct vm_area_struct *new; 1453 struct vm_area_struct *new; 1300 struct vm_region *region; 1454 struct vm_region *region; 1301 unsigned long npages; 1455 unsigned long npages; 1302 struct mm_struct *mm; << 1303 1456 1304 /* we're only permitted to split anon 1457 /* we're only permitted to split anonymous regions (these should have 1305 * only a single usage on the region) 1458 * only a single usage on the region) */ 1306 if (vma->vm_file) 1459 if (vma->vm_file) 1307 return -ENOMEM; 1460 return -ENOMEM; 1308 1461 1309 mm = vma->vm_mm; << 1310 if (mm->map_count >= sysctl_max_map_c 1462 if (mm->map_count >= sysctl_max_map_count) 1311 return -ENOMEM; 1463 return -ENOMEM; 1312 1464 1313 region = kmem_cache_alloc(vm_region_j 1465 region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); 1314 if (!region) 1466 if (!region) 1315 return -ENOMEM; 1467 return -ENOMEM; 1316 1468 1317 new = vm_area_dup(vma); 1469 new = vm_area_dup(vma); 1318 if (!new) !! 1470 if (!new) { 1319 goto err_vma_dup; !! 1471 kmem_cache_free(vm_region_jar, region); >> 1472 return -ENOMEM; >> 1473 } 1320 1474 1321 /* most fields are the same, copy all 1475 /* most fields are the same, copy all, and then fixup */ 1322 *region = *vma->vm_region; 1476 *region = *vma->vm_region; 1323 new->vm_region = region; 1477 new->vm_region = region; 1324 1478 1325 npages = (addr - vma->vm_start) >> PA 1479 npages = (addr - vma->vm_start) >> PAGE_SHIFT; 1326 1480 1327 if (new_below) { 1481 if (new_below) { 1328 region->vm_top = region->vm_e 1482 region->vm_top = region->vm_end = new->vm_end = addr; 1329 } else { 1483 } else { 1330 region->vm_start = new->vm_st 1484 region->vm_start = new->vm_start = addr; 1331 region->vm_pgoff = new->vm_pg 1485 region->vm_pgoff = new->vm_pgoff += npages; 1332 } 1486 } 1333 1487 1334 vma_iter_config(vmi, new->vm_start, n << 1335 if (vma_iter_prealloc(vmi, vma)) { << 1336 pr_warn("Allocation of vma tr << 1337 current->pid); << 1338 goto err_vmi_preallocate; << 1339 } << 1340 << 1341 if (new->vm_ops && new->vm_ops->open) 1488 if (new->vm_ops && new->vm_ops->open) 1342 new->vm_ops->open(new); 1489 new->vm_ops->open(new); 1343 1490 >> 1491 delete_vma_from_mm(vma); 1344 down_write(&nommu_region_sem); 1492 down_write(&nommu_region_sem); 1345 delete_nommu_region(vma->vm_region); 1493 delete_nommu_region(vma->vm_region); 1346 if (new_below) { 1494 if (new_below) { 1347 vma->vm_region->vm_start = vm 1495 vma->vm_region->vm_start = vma->vm_start = addr; 1348 vma->vm_region->vm_pgoff = vm 1496 vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; 1349 } else { 1497 } else { 1350 vma->vm_region->vm_end = vma- 1498 vma->vm_region->vm_end = vma->vm_end = addr; 1351 vma->vm_region->vm_top = addr 1499 vma->vm_region->vm_top = addr; 1352 } 1500 } 1353 add_nommu_region(vma->vm_region); 1501 add_nommu_region(vma->vm_region); 1354 add_nommu_region(new->vm_region); 1502 add_nommu_region(new->vm_region); 1355 up_write(&nommu_region_sem); 1503 up_write(&nommu_region_sem); 1356 !! 1504 add_vma_to_mm(mm, vma); 1357 setup_vma_to_mm(vma, mm); !! 1505 add_vma_to_mm(mm, new); 1358 setup_vma_to_mm(new, mm); << 1359 vma_iter_store(vmi, new); << 1360 mm->map_count++; << 1361 return 0; 1506 return 0; 1362 << 1363 err_vmi_preallocate: << 1364 vm_area_free(new); << 1365 err_vma_dup: << 1366 kmem_cache_free(vm_region_jar, region << 1367 return -ENOMEM; << 1368 } 1507 } 1369 1508 1370 /* 1509 /* 1371 * shrink a VMA by removing the specified chu 1510 * shrink a VMA by removing the specified chunk from either the beginning or 1372 * the end 1511 * the end 1373 */ 1512 */ 1374 static int vmi_shrink_vma(struct vma_iterator !! 1513 static int shrink_vma(struct mm_struct *mm, 1375 struct vm_area_struct * 1514 struct vm_area_struct *vma, 1376 unsigned long from, uns 1515 unsigned long from, unsigned long to) 1377 { 1516 { 1378 struct vm_region *region; 1517 struct vm_region *region; 1379 1518 1380 /* adjust the VMA's pointers, which m 1519 /* adjust the VMA's pointers, which may reposition it in the MM's tree 1381 * and list */ 1520 * and list */ 1382 if (from > vma->vm_start) { !! 1521 delete_vma_from_mm(vma); 1383 if (vma_iter_clear_gfp(vmi, f !! 1522 if (from > vma->vm_start) 1384 return -ENOMEM; << 1385 vma->vm_end = from; 1523 vma->vm_end = from; 1386 } else { !! 1524 else 1387 if (vma_iter_clear_gfp(vmi, v << 1388 return -ENOMEM; << 1389 vma->vm_start = to; 1525 vma->vm_start = to; 1390 } !! 1526 add_vma_to_mm(mm, vma); 1391 1527 1392 /* cut the backing region down to siz 1528 /* cut the backing region down to size */ 1393 region = vma->vm_region; 1529 region = vma->vm_region; 1394 BUG_ON(region->vm_usage != 1); 1530 BUG_ON(region->vm_usage != 1); 1395 1531 1396 down_write(&nommu_region_sem); 1532 down_write(&nommu_region_sem); 1397 delete_nommu_region(region); 1533 delete_nommu_region(region); 1398 if (from > region->vm_start) { 1534 if (from > region->vm_start) { 1399 to = region->vm_top; 1535 to = region->vm_top; 1400 region->vm_top = region->vm_e 1536 region->vm_top = region->vm_end = from; 1401 } else { 1537 } else { 1402 region->vm_start = to; 1538 region->vm_start = to; 1403 } 1539 } 1404 add_nommu_region(region); 1540 add_nommu_region(region); 1405 up_write(&nommu_region_sem); 1541 up_write(&nommu_region_sem); 1406 1542 1407 free_page_series(from, to); 1543 free_page_series(from, to); 1408 return 0; 1544 return 0; 1409 } 1545 } 1410 1546 1411 /* 1547 /* 1412 * release a mapping 1548 * release a mapping 1413 * - under NOMMU conditions the chunk to be u 1549 * - under NOMMU conditions the chunk to be unmapped must be backed by a single 1414 * VMA, though it need not cover the whole 1550 * VMA, though it need not cover the whole VMA 1415 */ 1551 */ 1416 int do_munmap(struct mm_struct *mm, unsigned 1552 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf) 1417 { 1553 { 1418 VMA_ITERATOR(vmi, mm, start); << 1419 struct vm_area_struct *vma; 1554 struct vm_area_struct *vma; 1420 unsigned long end; 1555 unsigned long end; 1421 int ret = 0; !! 1556 int ret; 1422 1557 1423 len = PAGE_ALIGN(len); 1558 len = PAGE_ALIGN(len); 1424 if (len == 0) 1559 if (len == 0) 1425 return -EINVAL; 1560 return -EINVAL; 1426 1561 1427 end = start + len; 1562 end = start + len; 1428 1563 1429 /* find the first potentially overlap 1564 /* find the first potentially overlapping VMA */ 1430 vma = vma_find(&vmi, end); !! 1565 vma = find_vma(mm, start); 1431 if (!vma) { 1566 if (!vma) { 1432 static int limit; 1567 static int limit; 1433 if (limit < 5) { 1568 if (limit < 5) { 1434 pr_warn("munmap of me 1569 pr_warn("munmap of memory not mmapped by process %d (%s): 0x%lx-0x%lx\n", 1435 curre 1570 current->pid, current->comm, 1436 start 1571 start, start + len - 1); 1437 limit++; 1572 limit++; 1438 } 1573 } 1439 return -EINVAL; 1574 return -EINVAL; 1440 } 1575 } 1441 1576 1442 /* we're allowed to split an anonymou 1577 /* we're allowed to split an anonymous VMA but not a file-backed one */ 1443 if (vma->vm_file) { 1578 if (vma->vm_file) { 1444 do { 1579 do { 1445 if (start > vma->vm_s 1580 if (start > vma->vm_start) 1446 return -EINVA 1581 return -EINVAL; 1447 if (end == vma->vm_en 1582 if (end == vma->vm_end) 1448 goto erase_wh 1583 goto erase_whole_vma; 1449 vma = vma_find(&vmi, !! 1584 vma = vma->vm_next; 1450 } while (vma); 1585 } while (vma); 1451 return -EINVAL; 1586 return -EINVAL; 1452 } else { 1587 } else { 1453 /* the chunk must be a subset 1588 /* the chunk must be a subset of the VMA found */ 1454 if (start == vma->vm_start && 1589 if (start == vma->vm_start && end == vma->vm_end) 1455 goto erase_whole_vma; 1590 goto erase_whole_vma; 1456 if (start < vma->vm_start || 1591 if (start < vma->vm_start || end > vma->vm_end) 1457 return -EINVAL; 1592 return -EINVAL; 1458 if (offset_in_page(start)) 1593 if (offset_in_page(start)) 1459 return -EINVAL; 1594 return -EINVAL; 1460 if (end != vma->vm_end && off 1595 if (end != vma->vm_end && offset_in_page(end)) 1461 return -EINVAL; 1596 return -EINVAL; 1462 if (start != vma->vm_start && 1597 if (start != vma->vm_start && end != vma->vm_end) { 1463 ret = split_vma(&vmi, !! 1598 ret = split_vma(mm, vma, start, 1); 1464 if (ret < 0) 1599 if (ret < 0) 1465 return ret; 1600 return ret; 1466 } 1601 } 1467 return vmi_shrink_vma(&vmi, v !! 1602 return shrink_vma(mm, vma, start, end); 1468 } 1603 } 1469 1604 1470 erase_whole_vma: 1605 erase_whole_vma: 1471 if (delete_vma_from_mm(vma)) !! 1606 delete_vma_from_mm(vma); 1472 ret = -ENOMEM; !! 1607 delete_vma(mm, vma); 1473 else !! 1608 return 0; 1474 delete_vma(mm, vma); << 1475 return ret; << 1476 } 1609 } >> 1610 EXPORT_SYMBOL(do_munmap); 1477 1611 1478 int vm_munmap(unsigned long addr, size_t len) 1612 int vm_munmap(unsigned long addr, size_t len) 1479 { 1613 { 1480 struct mm_struct *mm = current->mm; 1614 struct mm_struct *mm = current->mm; 1481 int ret; 1615 int ret; 1482 1616 1483 mmap_write_lock(mm); !! 1617 down_write(&mm->mmap_sem); 1484 ret = do_munmap(mm, addr, len, NULL); 1618 ret = do_munmap(mm, addr, len, NULL); 1485 mmap_write_unlock(mm); !! 1619 up_write(&mm->mmap_sem); 1486 return ret; 1620 return ret; 1487 } 1621 } 1488 EXPORT_SYMBOL(vm_munmap); 1622 EXPORT_SYMBOL(vm_munmap); 1489 1623 1490 SYSCALL_DEFINE2(munmap, unsigned long, addr, 1624 SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) 1491 { 1625 { 1492 return vm_munmap(addr, len); 1626 return vm_munmap(addr, len); 1493 } 1627 } 1494 1628 1495 /* 1629 /* 1496 * release all the mappings made in a process 1630 * release all the mappings made in a process's VM space 1497 */ 1631 */ 1498 void exit_mmap(struct mm_struct *mm) 1632 void exit_mmap(struct mm_struct *mm) 1499 { 1633 { 1500 VMA_ITERATOR(vmi, mm, 0); << 1501 struct vm_area_struct *vma; 1634 struct vm_area_struct *vma; 1502 1635 1503 if (!mm) 1636 if (!mm) 1504 return; 1637 return; 1505 1638 1506 mm->total_vm = 0; 1639 mm->total_vm = 0; 1507 1640 1508 /* !! 1641 while ((vma = mm->mmap)) { 1509 * Lock the mm to avoid assert compla !! 1642 mm->mmap = vma->vm_next; 1510 * user of the mm !! 1643 delete_vma_from_mm(vma); 1511 */ << 1512 mmap_write_lock(mm); << 1513 for_each_vma(vmi, vma) { << 1514 cleanup_vma_from_mm(vma); << 1515 delete_vma(mm, vma); 1644 delete_vma(mm, vma); 1516 cond_resched(); 1645 cond_resched(); 1517 } 1646 } 1518 __mt_destroy(&mm->mm_mt); !! 1647 } 1519 mmap_write_unlock(mm); !! 1648 >> 1649 int vm_brk(unsigned long addr, unsigned long len) >> 1650 { >> 1651 return -ENOMEM; 1520 } 1652 } 1521 1653 1522 /* 1654 /* 1523 * expand (or shrink) an existing mapping, po 1655 * expand (or shrink) an existing mapping, potentially moving it at the same 1524 * time (controlled by the MREMAP_MAYMOVE fla 1656 * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 1525 * 1657 * 1526 * under NOMMU conditions, we only permit cha 1658 * under NOMMU conditions, we only permit changing a mapping's size, and only 1527 * as long as it stays within the region allo 1659 * as long as it stays within the region allocated by do_mmap_private() and the 1528 * block is not shareable 1660 * block is not shareable 1529 * 1661 * 1530 * MREMAP_FIXED is not supported under NOMMU 1662 * MREMAP_FIXED is not supported under NOMMU conditions 1531 */ 1663 */ 1532 static unsigned long do_mremap(unsigned long 1664 static unsigned long do_mremap(unsigned long addr, 1533 unsigned long old_len 1665 unsigned long old_len, unsigned long new_len, 1534 unsigned long flags, 1666 unsigned long flags, unsigned long new_addr) 1535 { 1667 { 1536 struct vm_area_struct *vma; 1668 struct vm_area_struct *vma; 1537 1669 1538 /* insanity checks first */ 1670 /* insanity checks first */ 1539 old_len = PAGE_ALIGN(old_len); 1671 old_len = PAGE_ALIGN(old_len); 1540 new_len = PAGE_ALIGN(new_len); 1672 new_len = PAGE_ALIGN(new_len); 1541 if (old_len == 0 || new_len == 0) 1673 if (old_len == 0 || new_len == 0) 1542 return (unsigned long) -EINVA 1674 return (unsigned long) -EINVAL; 1543 1675 1544 if (offset_in_page(addr)) 1676 if (offset_in_page(addr)) 1545 return -EINVAL; 1677 return -EINVAL; 1546 1678 1547 if (flags & MREMAP_FIXED && new_addr 1679 if (flags & MREMAP_FIXED && new_addr != addr) 1548 return (unsigned long) -EINVA 1680 return (unsigned long) -EINVAL; 1549 1681 1550 vma = find_vma_exact(current->mm, add 1682 vma = find_vma_exact(current->mm, addr, old_len); 1551 if (!vma) 1683 if (!vma) 1552 return (unsigned long) -EINVA 1684 return (unsigned long) -EINVAL; 1553 1685 1554 if (vma->vm_end != vma->vm_start + ol 1686 if (vma->vm_end != vma->vm_start + old_len) 1555 return (unsigned long) -EFAUL 1687 return (unsigned long) -EFAULT; 1556 1688 1557 if (is_nommu_shared_mapping(vma->vm_f !! 1689 if (vma->vm_flags & VM_MAYSHARE) 1558 return (unsigned long) -EPERM 1690 return (unsigned long) -EPERM; 1559 1691 1560 if (new_len > vma->vm_region->vm_end 1692 if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start) 1561 return (unsigned long) -ENOME 1693 return (unsigned long) -ENOMEM; 1562 1694 1563 /* all checks complete - do it */ 1695 /* all checks complete - do it */ 1564 vma->vm_end = vma->vm_start + new_len 1696 vma->vm_end = vma->vm_start + new_len; 1565 return vma->vm_start; 1697 return vma->vm_start; 1566 } 1698 } 1567 1699 1568 SYSCALL_DEFINE5(mremap, unsigned long, addr, 1700 SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, 1569 unsigned long, new_len, unsig 1701 unsigned long, new_len, unsigned long, flags, 1570 unsigned long, new_addr) 1702 unsigned long, new_addr) 1571 { 1703 { 1572 unsigned long ret; 1704 unsigned long ret; 1573 1705 1574 mmap_write_lock(current->mm); !! 1706 down_write(¤t->mm->mmap_sem); 1575 ret = do_mremap(addr, old_len, new_le 1707 ret = do_mremap(addr, old_len, new_len, flags, new_addr); 1576 mmap_write_unlock(current->mm); !! 1708 up_write(¤t->mm->mmap_sem); 1577 return ret; 1709 return ret; 1578 } 1710 } 1579 1711 >> 1712 struct page *follow_page(struct vm_area_struct *vma, unsigned long address, >> 1713 unsigned int foll_flags) >> 1714 { >> 1715 return NULL; >> 1716 } >> 1717 1580 int remap_pfn_range(struct vm_area_struct *vm 1718 int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, 1581 unsigned long pfn, unsigned l 1719 unsigned long pfn, unsigned long size, pgprot_t prot) 1582 { 1720 { 1583 if (addr != (pfn << PAGE_SHIFT)) 1721 if (addr != (pfn << PAGE_SHIFT)) 1584 return -EINVAL; 1722 return -EINVAL; 1585 1723 1586 vm_flags_set(vma, VM_IO | VM_PFNMAP | !! 1724 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; 1587 return 0; 1725 return 0; 1588 } 1726 } 1589 EXPORT_SYMBOL(remap_pfn_range); 1727 EXPORT_SYMBOL(remap_pfn_range); 1590 1728 1591 int vm_iomap_memory(struct vm_area_struct *vm 1729 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) 1592 { 1730 { 1593 unsigned long pfn = start >> PAGE_SHI 1731 unsigned long pfn = start >> PAGE_SHIFT; 1594 unsigned long vm_len = vma->vm_end - 1732 unsigned long vm_len = vma->vm_end - vma->vm_start; 1595 1733 1596 pfn += vma->vm_pgoff; 1734 pfn += vma->vm_pgoff; 1597 return io_remap_pfn_range(vma, vma->v 1735 return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); 1598 } 1736 } 1599 EXPORT_SYMBOL(vm_iomap_memory); 1737 EXPORT_SYMBOL(vm_iomap_memory); 1600 1738 1601 int remap_vmalloc_range(struct vm_area_struct 1739 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 1602 unsigned long pgoff) 1740 unsigned long pgoff) 1603 { 1741 { 1604 unsigned int size = vma->vm_end - vma 1742 unsigned int size = vma->vm_end - vma->vm_start; 1605 1743 1606 if (!(vma->vm_flags & VM_USERMAP)) 1744 if (!(vma->vm_flags & VM_USERMAP)) 1607 return -EINVAL; 1745 return -EINVAL; 1608 1746 1609 vma->vm_start = (unsigned long)(addr 1747 vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); 1610 vma->vm_end = vma->vm_start + size; 1748 vma->vm_end = vma->vm_start + size; 1611 1749 1612 return 0; 1750 return 0; 1613 } 1751 } 1614 EXPORT_SYMBOL(remap_vmalloc_range); 1752 EXPORT_SYMBOL(remap_vmalloc_range); 1615 1753 >> 1754 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, >> 1755 unsigned long len, unsigned long pgoff, unsigned long flags) >> 1756 { >> 1757 return -ENOMEM; >> 1758 } >> 1759 1616 vm_fault_t filemap_fault(struct vm_fault *vmf 1760 vm_fault_t filemap_fault(struct vm_fault *vmf) 1617 { 1761 { 1618 BUG(); 1762 BUG(); 1619 return 0; 1763 return 0; 1620 } 1764 } 1621 EXPORT_SYMBOL(filemap_fault); 1765 EXPORT_SYMBOL(filemap_fault); 1622 1766 1623 vm_fault_t filemap_map_pages(struct vm_fault !! 1767 void filemap_map_pages(struct vm_fault *vmf, 1624 pgoff_t start_pgoff, pgoff_t 1768 pgoff_t start_pgoff, pgoff_t end_pgoff) 1625 { 1769 { 1626 BUG(); 1770 BUG(); 1627 return 0; << 1628 } 1771 } 1629 EXPORT_SYMBOL(filemap_map_pages); 1772 EXPORT_SYMBOL(filemap_map_pages); 1630 1773 1631 static int __access_remote_vm(struct mm_struc !! 1774 int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, 1632 void *buf, int !! 1775 unsigned long addr, void *buf, int len, unsigned int gup_flags) 1633 { 1776 { 1634 struct vm_area_struct *vma; 1777 struct vm_area_struct *vma; 1635 int write = gup_flags & FOLL_WRITE; 1778 int write = gup_flags & FOLL_WRITE; 1636 1779 1637 if (mmap_read_lock_killable(mm)) !! 1780 down_read(&mm->mmap_sem); 1638 return 0; << 1639 1781 1640 /* the access must start within one o 1782 /* the access must start within one of the target process's mappings */ 1641 vma = find_vma(mm, addr); 1783 vma = find_vma(mm, addr); 1642 if (vma) { 1784 if (vma) { 1643 /* don't overrun this mapping 1785 /* don't overrun this mapping */ 1644 if (addr + len >= vma->vm_end 1786 if (addr + len >= vma->vm_end) 1645 len = vma->vm_end - a 1787 len = vma->vm_end - addr; 1646 1788 1647 /* only read or write mapping 1789 /* only read or write mappings where it is permitted */ 1648 if (write && vma->vm_flags & 1790 if (write && vma->vm_flags & VM_MAYWRITE) 1649 copy_to_user_page(vma 1791 copy_to_user_page(vma, NULL, addr, 1650 (voi 1792 (void *) addr, buf, len); 1651 else if (!write && vma->vm_fl 1793 else if (!write && vma->vm_flags & VM_MAYREAD) 1652 copy_from_user_page(v 1794 copy_from_user_page(vma, NULL, addr, 1653 b 1795 buf, (void *) addr, len); 1654 else 1796 else 1655 len = 0; 1797 len = 0; 1656 } else { 1798 } else { 1657 len = 0; 1799 len = 0; 1658 } 1800 } 1659 1801 1660 mmap_read_unlock(mm); !! 1802 up_read(&mm->mmap_sem); 1661 1803 1662 return len; 1804 return len; 1663 } 1805 } 1664 1806 1665 /** 1807 /** 1666 * access_remote_vm - access another process' 1808 * access_remote_vm - access another process' address space 1667 * @mm: the mm_struct of the target a 1809 * @mm: the mm_struct of the target address space 1668 * @addr: start address to access 1810 * @addr: start address to access 1669 * @buf: source or destination buffer 1811 * @buf: source or destination buffer 1670 * @len: number of bytes to transfer 1812 * @len: number of bytes to transfer 1671 * @gup_flags: flags modifying lookup behavi 1813 * @gup_flags: flags modifying lookup behaviour 1672 * 1814 * 1673 * The caller must hold a reference on @mm. 1815 * The caller must hold a reference on @mm. 1674 */ 1816 */ 1675 int access_remote_vm(struct mm_struct *mm, un 1817 int access_remote_vm(struct mm_struct *mm, unsigned long addr, 1676 void *buf, int len, unsigned 1818 void *buf, int len, unsigned int gup_flags) 1677 { 1819 { 1678 return __access_remote_vm(mm, addr, b !! 1820 return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); 1679 } 1821 } 1680 1822 1681 /* 1823 /* 1682 * Access another process' address space. 1824 * Access another process' address space. 1683 * - source/target buffer must be kernel spac 1825 * - source/target buffer must be kernel space 1684 */ 1826 */ 1685 int access_process_vm(struct task_struct *tsk 1827 int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, 1686 unsigned int gup_flags) 1828 unsigned int gup_flags) 1687 { 1829 { 1688 struct mm_struct *mm; 1830 struct mm_struct *mm; 1689 1831 1690 if (addr + len < addr) 1832 if (addr + len < addr) 1691 return 0; 1833 return 0; 1692 1834 1693 mm = get_task_mm(tsk); 1835 mm = get_task_mm(tsk); 1694 if (!mm) 1836 if (!mm) 1695 return 0; 1837 return 0; 1696 1838 1697 len = __access_remote_vm(mm, addr, bu !! 1839 len = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); 1698 1840 1699 mmput(mm); 1841 mmput(mm); 1700 return len; 1842 return len; 1701 } 1843 } 1702 EXPORT_SYMBOL_GPL(access_process_vm); 1844 EXPORT_SYMBOL_GPL(access_process_vm); 1703 1845 1704 /** 1846 /** 1705 * nommu_shrink_inode_mappings - Shrink the s 1847 * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode 1706 * @inode: The inode to check 1848 * @inode: The inode to check 1707 * @size: The current filesize of the inode 1849 * @size: The current filesize of the inode 1708 * @newsize: The proposed filesize of the ino 1850 * @newsize: The proposed filesize of the inode 1709 * 1851 * 1710 * Check the shared mappings on an inode on b 1852 * Check the shared mappings on an inode on behalf of a shrinking truncate to 1711 * make sure that any outstanding VMAs aren't !! 1853 * make sure that that any outstanding VMAs aren't broken and then shrink the 1712 * vm_regions that extend beyond so that do_m !! 1854 * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't 1713 * automatically grant mappings that are too 1855 * automatically grant mappings that are too large. 1714 */ 1856 */ 1715 int nommu_shrink_inode_mappings(struct inode 1857 int nommu_shrink_inode_mappings(struct inode *inode, size_t size, 1716 size_t newsiz 1858 size_t newsize) 1717 { 1859 { 1718 struct vm_area_struct *vma; 1860 struct vm_area_struct *vma; 1719 struct vm_region *region; 1861 struct vm_region *region; 1720 pgoff_t low, high; 1862 pgoff_t low, high; 1721 size_t r_size, r_top; 1863 size_t r_size, r_top; 1722 1864 1723 low = newsize >> PAGE_SHIFT; 1865 low = newsize >> PAGE_SHIFT; 1724 high = (size + PAGE_SIZE - 1) >> PAGE 1866 high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 1725 1867 1726 down_write(&nommu_region_sem); 1868 down_write(&nommu_region_sem); 1727 i_mmap_lock_read(inode->i_mapping); 1869 i_mmap_lock_read(inode->i_mapping); 1728 1870 1729 /* search for VMAs that fall within t 1871 /* search for VMAs that fall within the dead zone */ 1730 vma_interval_tree_foreach(vma, &inode 1872 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) { 1731 /* found one - only intereste 1873 /* found one - only interested if it's shared out of the page 1732 * cache */ 1874 * cache */ 1733 if (vma->vm_flags & VM_SHARED 1875 if (vma->vm_flags & VM_SHARED) { 1734 i_mmap_unlock_read(in 1876 i_mmap_unlock_read(inode->i_mapping); 1735 up_write(&nommu_regio 1877 up_write(&nommu_region_sem); 1736 return -ETXTBSY; /* n 1878 return -ETXTBSY; /* not quite true, but near enough */ 1737 } 1879 } 1738 } 1880 } 1739 1881 1740 /* reduce any regions that overlap th 1882 /* reduce any regions that overlap the dead zone - if in existence, 1741 * these will be pointed to by VMAs t 1883 * these will be pointed to by VMAs that don't overlap the dead zone 1742 * 1884 * 1743 * we don't check for any regions tha 1885 * we don't check for any regions that start beyond the EOF as there 1744 * shouldn't be any 1886 * shouldn't be any 1745 */ 1887 */ 1746 vma_interval_tree_foreach(vma, &inode 1888 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, 0, ULONG_MAX) { 1747 if (!(vma->vm_flags & VM_SHAR 1889 if (!(vma->vm_flags & VM_SHARED)) 1748 continue; 1890 continue; 1749 1891 1750 region = vma->vm_region; 1892 region = vma->vm_region; 1751 r_size = region->vm_top - reg 1893 r_size = region->vm_top - region->vm_start; 1752 r_top = (region->vm_pgoff << 1894 r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size; 1753 1895 1754 if (r_top > newsize) { 1896 if (r_top > newsize) { 1755 region->vm_top -= r_t 1897 region->vm_top -= r_top - newsize; 1756 if (region->vm_end > 1898 if (region->vm_end > region->vm_top) 1757 region->vm_en 1899 region->vm_end = region->vm_top; 1758 } 1900 } 1759 } 1901 } 1760 1902 1761 i_mmap_unlock_read(inode->i_mapping); 1903 i_mmap_unlock_read(inode->i_mapping); 1762 up_write(&nommu_region_sem); 1904 up_write(&nommu_region_sem); 1763 return 0; 1905 return 0; 1764 } 1906 } 1765 1907 1766 /* 1908 /* 1767 * Initialise sysctl_user_reserve_kbytes. 1909 * Initialise sysctl_user_reserve_kbytes. 1768 * 1910 * 1769 * This is intended to prevent a user from st 1911 * This is intended to prevent a user from starting a single memory hogging 1770 * process, such that they cannot recover (ki 1912 * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER 1771 * mode. 1913 * mode. 1772 * 1914 * 1773 * The default value is min(3% of free memory 1915 * The default value is min(3% of free memory, 128MB) 1774 * 128MB is enough to recover with sshd/login 1916 * 128MB is enough to recover with sshd/login, bash, and top/kill. 1775 */ 1917 */ 1776 static int __meminit init_user_reserve(void) 1918 static int __meminit init_user_reserve(void) 1777 { 1919 { 1778 unsigned long free_kbytes; 1920 unsigned long free_kbytes; 1779 1921 1780 free_kbytes = K(global_zone_page_stat !! 1922 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 1781 1923 1782 sysctl_user_reserve_kbytes = min(free 1924 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); 1783 return 0; 1925 return 0; 1784 } 1926 } 1785 subsys_initcall(init_user_reserve); 1927 subsys_initcall(init_user_reserve); 1786 1928 1787 /* 1929 /* 1788 * Initialise sysctl_admin_reserve_kbytes. 1930 * Initialise sysctl_admin_reserve_kbytes. 1789 * 1931 * 1790 * The purpose of sysctl_admin_reserve_kbytes 1932 * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin 1791 * to log in and kill a memory hogging proces 1933 * to log in and kill a memory hogging process. 1792 * 1934 * 1793 * Systems with more than 256MB will reserve 1935 * Systems with more than 256MB will reserve 8MB, enough to recover 1794 * with sshd, bash, and top in OVERCOMMIT_GUE 1936 * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will 1795 * only reserve 3% of free pages by default. 1937 * only reserve 3% of free pages by default. 1796 */ 1938 */ 1797 static int __meminit init_admin_reserve(void) 1939 static int __meminit init_admin_reserve(void) 1798 { 1940 { 1799 unsigned long free_kbytes; 1941 unsigned long free_kbytes; 1800 1942 1801 free_kbytes = K(global_zone_page_stat !! 1943 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 1802 1944 1803 sysctl_admin_reserve_kbytes = min(fre 1945 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); 1804 return 0; 1946 return 0; 1805 } 1947 } 1806 subsys_initcall(init_admin_reserve); 1948 subsys_initcall(init_admin_reserve); 1807 1949
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.