1 // SPDX-License-Identifier: GPL-2.0-only << 2 /* 1 /* 3 * linux/mm/nommu.c 2 * linux/mm/nommu.c 4 * 3 * 5 * Replacement code for mm functions to suppo 4 * Replacement code for mm functions to support CPU's that don't 6 * have any form of memory management unit (t 5 * have any form of memory management unit (thus no virtual memory). 7 * 6 * 8 * See Documentation/admin-guide/mm/nommu-mma !! 7 * See Documentation/nommu-mmap.txt 9 * 8 * 10 * Copyright (c) 2004-2008 David Howells <dho 9 * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com> 11 * Copyright (c) 2000-2003 David McCullough < 10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 12 * Copyright (c) 2000-2001 D Jeff Dionne <jef 11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 13 * Copyright (c) 2002 Greg Ungerer <gerg 12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 14 * Copyright (c) 2007-2010 Paul Mundt <lethal 13 * Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org> 15 */ 14 */ 16 15 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 16 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 17 19 #include <linux/export.h> 18 #include <linux/export.h> 20 #include <linux/mm.h> 19 #include <linux/mm.h> 21 #include <linux/sched/mm.h> !! 20 #include <linux/vmacache.h> 22 #include <linux/mman.h> 21 #include <linux/mman.h> 23 #include <linux/swap.h> 22 #include <linux/swap.h> 24 #include <linux/file.h> 23 #include <linux/file.h> 25 #include <linux/highmem.h> 24 #include <linux/highmem.h> 26 #include <linux/pagemap.h> 25 #include <linux/pagemap.h> 27 #include <linux/slab.h> 26 #include <linux/slab.h> 28 #include <linux/vmalloc.h> 27 #include <linux/vmalloc.h> >> 28 #include <linux/blkdev.h> 29 #include <linux/backing-dev.h> 29 #include <linux/backing-dev.h> 30 #include <linux/compiler.h> 30 #include <linux/compiler.h> 31 #include <linux/mount.h> 31 #include <linux/mount.h> 32 #include <linux/personality.h> 32 #include <linux/personality.h> 33 #include <linux/security.h> 33 #include <linux/security.h> 34 #include <linux/syscalls.h> 34 #include <linux/syscalls.h> 35 #include <linux/audit.h> 35 #include <linux/audit.h> >> 36 #include <linux/sched/sysctl.h> 36 #include <linux/printk.h> 37 #include <linux/printk.h> 37 38 38 #include <linux/uaccess.h> !! 39 #include <asm/uaccess.h> 39 #include <linux/uio.h> << 40 #include <asm/tlb.h> 40 #include <asm/tlb.h> 41 #include <asm/tlbflush.h> 41 #include <asm/tlbflush.h> 42 #include <asm/mmu_context.h> 42 #include <asm/mmu_context.h> 43 #include "internal.h" 43 #include "internal.h" 44 44 45 void *high_memory; 45 void *high_memory; 46 EXPORT_SYMBOL(high_memory); 46 EXPORT_SYMBOL(high_memory); 47 struct page *mem_map; 47 struct page *mem_map; 48 unsigned long max_mapnr; 48 unsigned long max_mapnr; 49 EXPORT_SYMBOL(max_mapnr); 49 EXPORT_SYMBOL(max_mapnr); 50 unsigned long highest_memmap_pfn; 50 unsigned long highest_memmap_pfn; >> 51 struct percpu_counter vm_committed_as; >> 52 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ >> 53 int sysctl_overcommit_ratio = 50; /* default is 50% */ >> 54 unsigned long sysctl_overcommit_kbytes __read_mostly; >> 55 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; 51 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIA 56 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; >> 57 unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ >> 58 unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ 52 int heap_stack_gap = 0; 59 int heap_stack_gap = 0; 53 60 54 atomic_long_t mmap_pages_allocated; 61 atomic_long_t mmap_pages_allocated; 55 62 >> 63 /* >> 64 * The global memory commitment made in the system can be a metric >> 65 * that can be used to drive ballooning decisions when Linux is hosted >> 66 * as a guest. On Hyper-V, the host implements a policy engine for dynamically >> 67 * balancing memory across competing virtual machines that are hosted. >> 68 * Several metrics drive this policy engine including the guest reported >> 69 * memory commitment. >> 70 */ >> 71 unsigned long vm_memory_committed(void) >> 72 { >> 73 return percpu_counter_read_positive(&vm_committed_as); >> 74 } >> 75 >> 76 EXPORT_SYMBOL_GPL(vm_memory_committed); >> 77 56 EXPORT_SYMBOL(mem_map); 78 EXPORT_SYMBOL(mem_map); 57 79 58 /* list of mapped, potentially shareable regio 80 /* list of mapped, potentially shareable regions */ 59 static struct kmem_cache *vm_region_jar; 81 static struct kmem_cache *vm_region_jar; 60 struct rb_root nommu_region_tree = RB_ROOT; 82 struct rb_root nommu_region_tree = RB_ROOT; 61 DECLARE_RWSEM(nommu_region_sem); 83 DECLARE_RWSEM(nommu_region_sem); 62 84 63 const struct vm_operations_struct generic_file 85 const struct vm_operations_struct generic_file_vm_ops = { 64 }; 86 }; 65 87 66 /* 88 /* 67 * Return the total memory allocated for this 89 * Return the total memory allocated for this pointer, not 68 * just what the caller asked for. 90 * just what the caller asked for. 69 * 91 * 70 * Doesn't have to be accurate, i.e. may have 92 * Doesn't have to be accurate, i.e. may have races. 71 */ 93 */ 72 unsigned int kobjsize(const void *objp) 94 unsigned int kobjsize(const void *objp) 73 { 95 { 74 struct page *page; 96 struct page *page; 75 97 76 /* 98 /* 77 * If the object we have should not ha 99 * If the object we have should not have ksize performed on it, 78 * return size of 0 100 * return size of 0 79 */ 101 */ 80 if (!objp || !virt_addr_valid(objp)) 102 if (!objp || !virt_addr_valid(objp)) 81 return 0; 103 return 0; 82 104 83 page = virt_to_head_page(objp); 105 page = virt_to_head_page(objp); 84 106 85 /* 107 /* 86 * If the allocator sets PageSlab, we 108 * If the allocator sets PageSlab, we know the pointer came from 87 * kmalloc(). 109 * kmalloc(). 88 */ 110 */ 89 if (PageSlab(page)) 111 if (PageSlab(page)) 90 return ksize(objp); 112 return ksize(objp); 91 113 92 /* 114 /* 93 * If it's not a compound page, see if 115 * If it's not a compound page, see if we have a matching VMA 94 * region. This test is intentionally 116 * region. This test is intentionally done in reverse order, 95 * so if there's no VMA, we still fall 117 * so if there's no VMA, we still fall through and hand back 96 * PAGE_SIZE for 0-order pages. 118 * PAGE_SIZE for 0-order pages. 97 */ 119 */ 98 if (!PageCompound(page)) { 120 if (!PageCompound(page)) { 99 struct vm_area_struct *vma; 121 struct vm_area_struct *vma; 100 122 101 vma = find_vma(current->mm, (u 123 vma = find_vma(current->mm, (unsigned long)objp); 102 if (vma) 124 if (vma) 103 return vma->vm_end - v 125 return vma->vm_end - vma->vm_start; 104 } 126 } 105 127 106 /* 128 /* 107 * The ksize() function is only guaran 129 * The ksize() function is only guaranteed to work for pointers 108 * returned by kmalloc(). So handle ar 130 * returned by kmalloc(). So handle arbitrary pointers here. 109 */ 131 */ 110 return page_size(page); !! 132 return PAGE_SIZE << compound_order(page); 111 } 133 } 112 134 113 void vfree(const void *addr) !! 135 long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, >> 136 unsigned long start, unsigned long nr_pages, >> 137 unsigned int foll_flags, struct page **pages, >> 138 struct vm_area_struct **vmas, int *nonblocking) 114 { 139 { 115 kfree(addr); !! 140 struct vm_area_struct *vma; >> 141 unsigned long vm_flags; >> 142 int i; >> 143 >> 144 /* calculate required read or write permissions. >> 145 * If FOLL_FORCE is set, we only require the "MAY" flags. >> 146 */ >> 147 vm_flags = (foll_flags & FOLL_WRITE) ? >> 148 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); >> 149 vm_flags &= (foll_flags & FOLL_FORCE) ? >> 150 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); >> 151 >> 152 for (i = 0; i < nr_pages; i++) { >> 153 vma = find_vma(mm, start); >> 154 if (!vma) >> 155 goto finish_or_fault; >> 156 >> 157 /* protect what we can, including chardevs */ >> 158 if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) || >> 159 !(vm_flags & vma->vm_flags)) >> 160 goto finish_or_fault; >> 161 >> 162 if (pages) { >> 163 pages[i] = virt_to_page(start); >> 164 if (pages[i]) >> 165 page_cache_get(pages[i]); >> 166 } >> 167 if (vmas) >> 168 vmas[i] = vma; >> 169 start = (start + PAGE_SIZE) & PAGE_MASK; >> 170 } >> 171 >> 172 return i; >> 173 >> 174 finish_or_fault: >> 175 return i ? : -EFAULT; 116 } 176 } 117 EXPORT_SYMBOL(vfree); << 118 177 119 void *__vmalloc_noprof(unsigned long size, gfp !! 178 /* >> 179 * get a list of pages in an address range belonging to the specified process >> 180 * and indicate the VMA that covers each page >> 181 * - this is potentially dodgy as we may end incrementing the page count of a >> 182 * slab page or a secondary page from a compound page >> 183 * - don't permit access to VMAs that don't support it, such as I/O mappings >> 184 */ >> 185 long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, >> 186 unsigned long start, unsigned long nr_pages, >> 187 unsigned int gup_flags, struct page **pages, >> 188 struct vm_area_struct **vmas) 120 { 189 { 121 /* !! 190 return __get_user_pages(tsk, mm, start, nr_pages, 122 * You can't specify __GFP_HIGHMEM wi !! 191 gup_flags, pages, vmas, NULL); 123 * returns only a logical address. !! 192 } 124 */ !! 193 EXPORT_SYMBOL(get_user_pages); 125 return kmalloc_noprof(size, (gfp_mask !! 194 >> 195 long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, >> 196 unsigned long start, unsigned long nr_pages, >> 197 unsigned int gup_flags, struct page **pages, >> 198 int *locked) >> 199 { >> 200 return get_user_pages(tsk, mm, start, nr_pages, gup_flags, >> 201 pages, NULL); 126 } 202 } 127 EXPORT_SYMBOL(__vmalloc_noprof); !! 203 EXPORT_SYMBOL(get_user_pages_locked); 128 204 129 void *vrealloc_noprof(const void *p, size_t si !! 205 long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, >> 206 unsigned long start, unsigned long nr_pages, >> 207 struct page **pages, unsigned int gup_flags) >> 208 { >> 209 long ret; >> 210 down_read(&mm->mmap_sem); >> 211 ret = __get_user_pages(tsk, mm, start, nr_pages, gup_flags, pages, >> 212 NULL, NULL); >> 213 up_read(&mm->mmap_sem); >> 214 return ret; >> 215 } >> 216 EXPORT_SYMBOL(__get_user_pages_unlocked); >> 217 >> 218 long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, >> 219 unsigned long start, unsigned long nr_pages, >> 220 struct page **pages, unsigned int gup_flags) >> 221 { >> 222 return __get_user_pages_unlocked(tsk, mm, start, nr_pages, >> 223 pages, gup_flags); >> 224 } >> 225 EXPORT_SYMBOL(get_user_pages_unlocked); >> 226 >> 227 /** >> 228 * follow_pfn - look up PFN at a user virtual address >> 229 * @vma: memory mapping >> 230 * @address: user virtual address >> 231 * @pfn: location to store found PFN >> 232 * >> 233 * Only IO mappings and raw PFN mappings are allowed. >> 234 * >> 235 * Returns zero and the pfn at @pfn on success, -ve otherwise. >> 236 */ >> 237 int follow_pfn(struct vm_area_struct *vma, unsigned long address, >> 238 unsigned long *pfn) 130 { 239 { 131 return krealloc_noprof(p, size, (flags !! 240 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) >> 241 return -EINVAL; >> 242 >> 243 *pfn = address >> PAGE_SHIFT; >> 244 return 0; 132 } 245 } >> 246 EXPORT_SYMBOL(follow_pfn); >> 247 >> 248 LIST_HEAD(vmap_area_list); 133 249 134 void *__vmalloc_node_range_noprof(unsigned lon !! 250 void vfree(const void *addr) 135 unsigned long start, unsigned << 136 pgprot_t prot, unsigned long v << 137 const void *caller) << 138 { 251 { 139 return __vmalloc_noprof(size, gfp_mask !! 252 kfree(addr); 140 } 253 } >> 254 EXPORT_SYMBOL(vfree); 141 255 142 void *__vmalloc_node_noprof(unsigned long size !! 256 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) 143 int node, const void *caller) << 144 { 257 { 145 return __vmalloc_noprof(size, gfp_mask !! 258 /* >> 259 * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() >> 260 * returns only a logical address. >> 261 */ >> 262 return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); 146 } 263 } >> 264 EXPORT_SYMBOL(__vmalloc); 147 265 148 static void *__vmalloc_user_flags(unsigned lon !! 266 void *vmalloc_user(unsigned long size) 149 { 267 { 150 void *ret; 268 void *ret; 151 269 152 ret = __vmalloc(size, flags); !! 270 ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, >> 271 PAGE_KERNEL); 153 if (ret) { 272 if (ret) { 154 struct vm_area_struct *vma; 273 struct vm_area_struct *vma; 155 274 156 mmap_write_lock(current->mm); !! 275 down_write(¤t->mm->mmap_sem); 157 vma = find_vma(current->mm, (u 276 vma = find_vma(current->mm, (unsigned long)ret); 158 if (vma) 277 if (vma) 159 vm_flags_set(vma, VM_U !! 278 vma->vm_flags |= VM_USERMAP; 160 mmap_write_unlock(current->mm) !! 279 up_write(¤t->mm->mmap_sem); 161 } 280 } 162 281 163 return ret; 282 return ret; 164 } 283 } 165 !! 284 EXPORT_SYMBOL(vmalloc_user); 166 void *vmalloc_user_noprof(unsigned long size) << 167 { << 168 return __vmalloc_user_flags(size, GFP_ << 169 } << 170 EXPORT_SYMBOL(vmalloc_user_noprof); << 171 285 172 struct page *vmalloc_to_page(const void *addr) 286 struct page *vmalloc_to_page(const void *addr) 173 { 287 { 174 return virt_to_page(addr); 288 return virt_to_page(addr); 175 } 289 } 176 EXPORT_SYMBOL(vmalloc_to_page); 290 EXPORT_SYMBOL(vmalloc_to_page); 177 291 178 unsigned long vmalloc_to_pfn(const void *addr) 292 unsigned long vmalloc_to_pfn(const void *addr) 179 { 293 { 180 return page_to_pfn(virt_to_page(addr)) 294 return page_to_pfn(virt_to_page(addr)); 181 } 295 } 182 EXPORT_SYMBOL(vmalloc_to_pfn); 296 EXPORT_SYMBOL(vmalloc_to_pfn); 183 297 184 long vread_iter(struct iov_iter *iter, const c !! 298 long vread(char *buf, char *addr, unsigned long count) >> 299 { >> 300 /* Don't allow overflow */ >> 301 if ((unsigned long) buf + count < count) >> 302 count = -(unsigned long) buf; >> 303 >> 304 memcpy(buf, addr, count); >> 305 return count; >> 306 } >> 307 >> 308 long vwrite(char *buf, char *addr, unsigned long count) 185 { 309 { 186 /* Don't allow overflow */ 310 /* Don't allow overflow */ 187 if ((unsigned long) addr + count < cou 311 if ((unsigned long) addr + count < count) 188 count = -(unsigned long) addr; 312 count = -(unsigned long) addr; 189 313 190 return copy_to_iter(addr, count, iter) !! 314 memcpy(addr, buf, count); >> 315 return count; 191 } 316 } 192 317 193 /* 318 /* 194 * vmalloc - allocate virtually contigu 319 * vmalloc - allocate virtually contiguous memory 195 * 320 * 196 * @size: allocation size 321 * @size: allocation size 197 * 322 * 198 * Allocate enough pages to cover @size f 323 * Allocate enough pages to cover @size from the page level 199 * allocator and map them into contiguous 324 * allocator and map them into contiguous kernel virtual space. 200 * 325 * 201 * For tight control over page level allo 326 * For tight control over page level allocator and protection flags 202 * use __vmalloc() instead. 327 * use __vmalloc() instead. 203 */ 328 */ 204 void *vmalloc_noprof(unsigned long size) !! 329 void *vmalloc(unsigned long size) 205 { 330 { 206 return __vmalloc_noprof(size, GFP_KERN !! 331 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); 207 } 332 } 208 EXPORT_SYMBOL(vmalloc_noprof); !! 333 EXPORT_SYMBOL(vmalloc); 209 << 210 void *vmalloc_huge_noprof(unsigned long size, << 211 334 212 /* 335 /* 213 * vzalloc - allocate virtually contiguou 336 * vzalloc - allocate virtually contiguous memory with zero fill 214 * 337 * 215 * @size: allocation size 338 * @size: allocation size 216 * 339 * 217 * Allocate enough pages to cover @size f 340 * Allocate enough pages to cover @size from the page level 218 * allocator and map them into contiguous 341 * allocator and map them into contiguous kernel virtual space. 219 * The memory allocated is set to zero. 342 * The memory allocated is set to zero. 220 * 343 * 221 * For tight control over page level allo 344 * For tight control over page level allocator and protection flags 222 * use __vmalloc() instead. 345 * use __vmalloc() instead. 223 */ 346 */ 224 void *vzalloc_noprof(unsigned long size) !! 347 void *vzalloc(unsigned long size) 225 { 348 { 226 return __vmalloc_noprof(size, GFP_KERN !! 349 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, >> 350 PAGE_KERNEL); 227 } 351 } 228 EXPORT_SYMBOL(vzalloc_noprof); !! 352 EXPORT_SYMBOL(vzalloc); 229 353 230 /** 354 /** 231 * vmalloc_node - allocate memory on a specifi 355 * vmalloc_node - allocate memory on a specific node 232 * @size: allocation size 356 * @size: allocation size 233 * @node: numa node 357 * @node: numa node 234 * 358 * 235 * Allocate enough pages to cover @size from t 359 * Allocate enough pages to cover @size from the page level 236 * allocator and map them into contiguous kern 360 * allocator and map them into contiguous kernel virtual space. 237 * 361 * 238 * For tight control over page level allocator 362 * For tight control over page level allocator and protection flags 239 * use __vmalloc() instead. 363 * use __vmalloc() instead. 240 */ 364 */ 241 void *vmalloc_node_noprof(unsigned long size, !! 365 void *vmalloc_node(unsigned long size, int node) 242 { 366 { 243 return vmalloc_noprof(size); !! 367 return vmalloc(size); 244 } 368 } 245 EXPORT_SYMBOL(vmalloc_node_noprof); !! 369 EXPORT_SYMBOL(vmalloc_node); 246 370 247 /** 371 /** 248 * vzalloc_node - allocate memory on a specifi 372 * vzalloc_node - allocate memory on a specific node with zero fill 249 * @size: allocation size 373 * @size: allocation size 250 * @node: numa node 374 * @node: numa node 251 * 375 * 252 * Allocate enough pages to cover @size from t 376 * Allocate enough pages to cover @size from the page level 253 * allocator and map them into contiguous kern 377 * allocator and map them into contiguous kernel virtual space. 254 * The memory allocated is set to zero. 378 * The memory allocated is set to zero. 255 * 379 * 256 * For tight control over page level allocator 380 * For tight control over page level allocator and protection flags 257 * use __vmalloc() instead. 381 * use __vmalloc() instead. 258 */ 382 */ 259 void *vzalloc_node_noprof(unsigned long size, !! 383 void *vzalloc_node(unsigned long size, int node) 260 { 384 { 261 return vzalloc_noprof(size); !! 385 return vzalloc(size); >> 386 } >> 387 EXPORT_SYMBOL(vzalloc_node); >> 388 >> 389 #ifndef PAGE_KERNEL_EXEC >> 390 # define PAGE_KERNEL_EXEC PAGE_KERNEL >> 391 #endif >> 392 >> 393 /** >> 394 * vmalloc_exec - allocate virtually contiguous, executable memory >> 395 * @size: allocation size >> 396 * >> 397 * Kernel-internal function to allocate enough pages to cover @size >> 398 * the page level allocator and map them into contiguous and >> 399 * executable kernel virtual space. >> 400 * >> 401 * For tight control over page level allocator and protection flags >> 402 * use __vmalloc() instead. >> 403 */ >> 404 >> 405 void *vmalloc_exec(unsigned long size) >> 406 { >> 407 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); 262 } 408 } 263 EXPORT_SYMBOL(vzalloc_node_noprof); << 264 409 265 /** 410 /** 266 * vmalloc_32 - allocate virtually contiguou 411 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 267 * @size: allocation size 412 * @size: allocation size 268 * 413 * 269 * Allocate enough 32bit PA addressable p 414 * Allocate enough 32bit PA addressable pages to cover @size from the 270 * page level allocator and map them into 415 * page level allocator and map them into contiguous kernel virtual space. 271 */ 416 */ 272 void *vmalloc_32_noprof(unsigned long size) !! 417 void *vmalloc_32(unsigned long size) 273 { 418 { 274 return __vmalloc_noprof(size, GFP_KERN !! 419 return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); 275 } 420 } 276 EXPORT_SYMBOL(vmalloc_32_noprof); !! 421 EXPORT_SYMBOL(vmalloc_32); 277 422 278 /** 423 /** 279 * vmalloc_32_user - allocate zeroed virtually 424 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory 280 * @size: allocation size 425 * @size: allocation size 281 * 426 * 282 * The resulting memory area is 32bit addressa 427 * The resulting memory area is 32bit addressable and zeroed so it can be 283 * mapped to userspace without leaking data. 428 * mapped to userspace without leaking data. 284 * 429 * 285 * VM_USERMAP is set on the corresponding VMA 430 * VM_USERMAP is set on the corresponding VMA so that subsequent calls to 286 * remap_vmalloc_range() are permissible. 431 * remap_vmalloc_range() are permissible. 287 */ 432 */ 288 void *vmalloc_32_user_noprof(unsigned long siz !! 433 void *vmalloc_32_user(unsigned long size) 289 { 434 { 290 /* 435 /* 291 * We'll have to sort out the ZONE_DMA 436 * We'll have to sort out the ZONE_DMA bits for 64-bit, 292 * but for now this can simply use vma 437 * but for now this can simply use vmalloc_user() directly. 293 */ 438 */ 294 return vmalloc_user_noprof(size); !! 439 return vmalloc_user(size); 295 } 440 } 296 EXPORT_SYMBOL(vmalloc_32_user_noprof); !! 441 EXPORT_SYMBOL(vmalloc_32_user); 297 442 298 void *vmap(struct page **pages, unsigned int c 443 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) 299 { 444 { 300 BUG(); 445 BUG(); 301 return NULL; 446 return NULL; 302 } 447 } 303 EXPORT_SYMBOL(vmap); 448 EXPORT_SYMBOL(vmap); 304 449 305 void vunmap(const void *addr) 450 void vunmap(const void *addr) 306 { 451 { 307 BUG(); 452 BUG(); 308 } 453 } 309 EXPORT_SYMBOL(vunmap); 454 EXPORT_SYMBOL(vunmap); 310 455 311 void *vm_map_ram(struct page **pages, unsigned !! 456 void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) 312 { 457 { 313 BUG(); 458 BUG(); 314 return NULL; 459 return NULL; 315 } 460 } 316 EXPORT_SYMBOL(vm_map_ram); 461 EXPORT_SYMBOL(vm_map_ram); 317 462 318 void vm_unmap_ram(const void *mem, unsigned in 463 void vm_unmap_ram(const void *mem, unsigned int count) 319 { 464 { 320 BUG(); 465 BUG(); 321 } 466 } 322 EXPORT_SYMBOL(vm_unmap_ram); 467 EXPORT_SYMBOL(vm_unmap_ram); 323 468 324 void vm_unmap_aliases(void) 469 void vm_unmap_aliases(void) 325 { 470 { 326 } 471 } 327 EXPORT_SYMBOL_GPL(vm_unmap_aliases); 472 EXPORT_SYMBOL_GPL(vm_unmap_aliases); 328 473 329 void free_vm_area(struct vm_struct *area) !! 474 /* >> 475 * Implement a stub for vmalloc_sync_[un]mapping() if the architecture >> 476 * chose not to have one. >> 477 */ >> 478 void __weak vmalloc_sync_mappings(void) 330 { 479 { 331 BUG(); << 332 } 480 } 333 EXPORT_SYMBOL_GPL(free_vm_area); << 334 481 335 int vm_insert_page(struct vm_area_struct *vma, !! 482 void __weak vmalloc_sync_unmappings(void) 336 struct page *page) << 337 { 483 { 338 return -EINVAL; << 339 } 484 } 340 EXPORT_SYMBOL(vm_insert_page); << 341 485 342 int vm_insert_pages(struct vm_area_struct *vma !! 486 /** 343 struct page **pages, u !! 487 * alloc_vm_area - allocate a range of kernel address space >> 488 * @size: size of the area >> 489 * >> 490 * Returns: NULL on failure, vm_struct on success >> 491 * >> 492 * This function reserves a range of kernel address space, and >> 493 * allocates pagetables to map that range. No actual mappings >> 494 * are created. If the kernel address space is not shared >> 495 * between processes, it syncs the pagetable across all >> 496 * processes. >> 497 */ >> 498 struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) 344 { 499 { 345 return -EINVAL; !! 500 BUG(); >> 501 return NULL; 346 } 502 } 347 EXPORT_SYMBOL(vm_insert_pages); !! 503 EXPORT_SYMBOL_GPL(alloc_vm_area); 348 504 349 int vm_map_pages(struct vm_area_struct *vma, s !! 505 void free_vm_area(struct vm_struct *area) 350 unsigned long num) << 351 { 506 { 352 return -EINVAL; !! 507 BUG(); 353 } 508 } 354 EXPORT_SYMBOL(vm_map_pages); !! 509 EXPORT_SYMBOL_GPL(free_vm_area); 355 510 356 int vm_map_pages_zero(struct vm_area_struct *v !! 511 int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 357 unsigned long !! 512 struct page *page) 358 { 513 { 359 return -EINVAL; 514 return -EINVAL; 360 } 515 } 361 EXPORT_SYMBOL(vm_map_pages_zero); !! 516 EXPORT_SYMBOL(vm_insert_page); 362 517 363 /* 518 /* 364 * sys_brk() for the most part doesn't need t 519 * sys_brk() for the most part doesn't need the global kernel 365 * lock, except when an application is doing 520 * lock, except when an application is doing something nasty 366 * like trying to un-brk an area that has alr 521 * like trying to un-brk an area that has already been mapped 367 * to a regular file. in this case, the unma 522 * to a regular file. in this case, the unmapping will need 368 * to invoke file system routines that need t 523 * to invoke file system routines that need the global lock. 369 */ 524 */ 370 SYSCALL_DEFINE1(brk, unsigned long, brk) 525 SYSCALL_DEFINE1(brk, unsigned long, brk) 371 { 526 { 372 struct mm_struct *mm = current->mm; 527 struct mm_struct *mm = current->mm; 373 528 374 if (brk < mm->start_brk || brk > mm->c 529 if (brk < mm->start_brk || brk > mm->context.end_brk) 375 return mm->brk; 530 return mm->brk; 376 531 377 if (mm->brk == brk) 532 if (mm->brk == brk) 378 return mm->brk; 533 return mm->brk; 379 534 380 /* 535 /* 381 * Always allow shrinking brk 536 * Always allow shrinking brk 382 */ 537 */ 383 if (brk <= mm->brk) { 538 if (brk <= mm->brk) { 384 mm->brk = brk; 539 mm->brk = brk; 385 return brk; 540 return brk; 386 } 541 } 387 542 388 /* 543 /* 389 * Ok, looks good - let it rip. 544 * Ok, looks good - let it rip. 390 */ 545 */ 391 flush_icache_user_range(mm->brk, brk); !! 546 flush_icache_range(mm->brk, brk); 392 return mm->brk = brk; 547 return mm->brk = brk; 393 } 548 } 394 549 395 /* 550 /* 396 * initialise the percpu counter for VM and re !! 551 * initialise the VMA and region record slabs 397 */ 552 */ 398 void __init mmap_init(void) 553 void __init mmap_init(void) 399 { 554 { 400 int ret; 555 int ret; 401 556 402 ret = percpu_counter_init(&vm_committe 557 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); 403 VM_BUG_ON(ret); 558 VM_BUG_ON(ret); 404 vm_region_jar = KMEM_CACHE(vm_region, !! 559 vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); 405 } 560 } 406 561 407 /* 562 /* 408 * validate the region tree 563 * validate the region tree 409 * - the caller must hold the region lock 564 * - the caller must hold the region lock 410 */ 565 */ 411 #ifdef CONFIG_DEBUG_NOMMU_REGIONS 566 #ifdef CONFIG_DEBUG_NOMMU_REGIONS 412 static noinline void validate_nommu_regions(vo 567 static noinline void validate_nommu_regions(void) 413 { 568 { 414 struct vm_region *region, *last; 569 struct vm_region *region, *last; 415 struct rb_node *p, *lastp; 570 struct rb_node *p, *lastp; 416 571 417 lastp = rb_first(&nommu_region_tree); 572 lastp = rb_first(&nommu_region_tree); 418 if (!lastp) 573 if (!lastp) 419 return; 574 return; 420 575 421 last = rb_entry(lastp, struct vm_regio 576 last = rb_entry(lastp, struct vm_region, vm_rb); 422 BUG_ON(last->vm_end <= last->vm_start) 577 BUG_ON(last->vm_end <= last->vm_start); 423 BUG_ON(last->vm_top < last->vm_end); 578 BUG_ON(last->vm_top < last->vm_end); 424 579 425 while ((p = rb_next(lastp))) { 580 while ((p = rb_next(lastp))) { 426 region = rb_entry(p, struct vm 581 region = rb_entry(p, struct vm_region, vm_rb); 427 last = rb_entry(lastp, struct 582 last = rb_entry(lastp, struct vm_region, vm_rb); 428 583 429 BUG_ON(region->vm_end <= regio 584 BUG_ON(region->vm_end <= region->vm_start); 430 BUG_ON(region->vm_top < region 585 BUG_ON(region->vm_top < region->vm_end); 431 BUG_ON(region->vm_start < last 586 BUG_ON(region->vm_start < last->vm_top); 432 587 433 lastp = p; 588 lastp = p; 434 } 589 } 435 } 590 } 436 #else 591 #else 437 static void validate_nommu_regions(void) 592 static void validate_nommu_regions(void) 438 { 593 { 439 } 594 } 440 #endif 595 #endif 441 596 442 /* 597 /* 443 * add a region into the global tree 598 * add a region into the global tree 444 */ 599 */ 445 static void add_nommu_region(struct vm_region 600 static void add_nommu_region(struct vm_region *region) 446 { 601 { 447 struct vm_region *pregion; 602 struct vm_region *pregion; 448 struct rb_node **p, *parent; 603 struct rb_node **p, *parent; 449 604 450 validate_nommu_regions(); 605 validate_nommu_regions(); 451 606 452 parent = NULL; 607 parent = NULL; 453 p = &nommu_region_tree.rb_node; 608 p = &nommu_region_tree.rb_node; 454 while (*p) { 609 while (*p) { 455 parent = *p; 610 parent = *p; 456 pregion = rb_entry(parent, str 611 pregion = rb_entry(parent, struct vm_region, vm_rb); 457 if (region->vm_start < pregion 612 if (region->vm_start < pregion->vm_start) 458 p = &(*p)->rb_left; 613 p = &(*p)->rb_left; 459 else if (region->vm_start > pr 614 else if (region->vm_start > pregion->vm_start) 460 p = &(*p)->rb_right; 615 p = &(*p)->rb_right; 461 else if (pregion == region) 616 else if (pregion == region) 462 return; 617 return; 463 else 618 else 464 BUG(); 619 BUG(); 465 } 620 } 466 621 467 rb_link_node(®ion->vm_rb, parent, p 622 rb_link_node(®ion->vm_rb, parent, p); 468 rb_insert_color(®ion->vm_rb, &nommu 623 rb_insert_color(®ion->vm_rb, &nommu_region_tree); 469 624 470 validate_nommu_regions(); 625 validate_nommu_regions(); 471 } 626 } 472 627 473 /* 628 /* 474 * delete a region from the global tree 629 * delete a region from the global tree 475 */ 630 */ 476 static void delete_nommu_region(struct vm_regi 631 static void delete_nommu_region(struct vm_region *region) 477 { 632 { 478 BUG_ON(!nommu_region_tree.rb_node); 633 BUG_ON(!nommu_region_tree.rb_node); 479 634 480 validate_nommu_regions(); 635 validate_nommu_regions(); 481 rb_erase(®ion->vm_rb, &nommu_region 636 rb_erase(®ion->vm_rb, &nommu_region_tree); 482 validate_nommu_regions(); 637 validate_nommu_regions(); 483 } 638 } 484 639 485 /* 640 /* 486 * free a contiguous series of pages 641 * free a contiguous series of pages 487 */ 642 */ 488 static void free_page_series(unsigned long fro 643 static void free_page_series(unsigned long from, unsigned long to) 489 { 644 { 490 for (; from < to; from += PAGE_SIZE) { 645 for (; from < to; from += PAGE_SIZE) { 491 struct page *page = virt_to_pa !! 646 struct page *page = virt_to_page(from); 492 647 493 atomic_long_dec(&mmap_pages_al 648 atomic_long_dec(&mmap_pages_allocated); 494 put_page(page); 649 put_page(page); 495 } 650 } 496 } 651 } 497 652 498 /* 653 /* 499 * release a reference to a region 654 * release a reference to a region 500 * - the caller must hold the region semaphore 655 * - the caller must hold the region semaphore for writing, which this releases 501 * - the region may not have been added to the 656 * - the region may not have been added to the tree yet, in which case vm_top 502 * will equal vm_start 657 * will equal vm_start 503 */ 658 */ 504 static void __put_nommu_region(struct vm_regio 659 static void __put_nommu_region(struct vm_region *region) 505 __releases(nommu_region_sem) 660 __releases(nommu_region_sem) 506 { 661 { 507 BUG_ON(!nommu_region_tree.rb_node); 662 BUG_ON(!nommu_region_tree.rb_node); 508 663 509 if (--region->vm_usage == 0) { 664 if (--region->vm_usage == 0) { 510 if (region->vm_top > region->v 665 if (region->vm_top > region->vm_start) 511 delete_nommu_region(re 666 delete_nommu_region(region); 512 up_write(&nommu_region_sem); 667 up_write(&nommu_region_sem); 513 668 514 if (region->vm_file) 669 if (region->vm_file) 515 fput(region->vm_file); 670 fput(region->vm_file); 516 671 517 /* IO memory and memory shared 672 /* IO memory and memory shared directly out of the pagecache 518 * from ramfs/tmpfs mustn't be 673 * from ramfs/tmpfs mustn't be released here */ 519 if (region->vm_flags & VM_MAPP 674 if (region->vm_flags & VM_MAPPED_COPY) 520 free_page_series(regio 675 free_page_series(region->vm_start, region->vm_top); 521 kmem_cache_free(vm_region_jar, 676 kmem_cache_free(vm_region_jar, region); 522 } else { 677 } else { 523 up_write(&nommu_region_sem); 678 up_write(&nommu_region_sem); 524 } 679 } 525 } 680 } 526 681 527 /* 682 /* 528 * release a reference to a region 683 * release a reference to a region 529 */ 684 */ 530 static void put_nommu_region(struct vm_region 685 static void put_nommu_region(struct vm_region *region) 531 { 686 { 532 down_write(&nommu_region_sem); 687 down_write(&nommu_region_sem); 533 __put_nommu_region(region); 688 __put_nommu_region(region); 534 } 689 } 535 690 536 static void setup_vma_to_mm(struct vm_area_str !! 691 /* >> 692 * update protection on a vma >> 693 */ >> 694 static void protect_vma(struct vm_area_struct *vma, unsigned long flags) 537 { 695 { >> 696 #ifdef CONFIG_MPU >> 697 struct mm_struct *mm = vma->vm_mm; >> 698 long start = vma->vm_start & PAGE_MASK; >> 699 while (start < vma->vm_end) { >> 700 protect_page(mm, start, flags); >> 701 start += PAGE_SIZE; >> 702 } >> 703 update_protections(mm); >> 704 #endif >> 705 } >> 706 >> 707 /* >> 708 * add a VMA into a process's mm_struct in the appropriate place in the list >> 709 * and tree and add to the address space's page tree also if not an anonymous >> 710 * page >> 711 * - should be called with mm->mmap_sem held writelocked >> 712 */ >> 713 static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) >> 714 { >> 715 struct vm_area_struct *pvma, *prev; >> 716 struct address_space *mapping; >> 717 struct rb_node **p, *parent, *rb_prev; >> 718 >> 719 BUG_ON(!vma->vm_region); >> 720 >> 721 mm->map_count++; 538 vma->vm_mm = mm; 722 vma->vm_mm = mm; 539 723 >> 724 protect_vma(vma, vma->vm_flags); >> 725 540 /* add the VMA to the mapping */ 726 /* add the VMA to the mapping */ 541 if (vma->vm_file) { 727 if (vma->vm_file) { 542 struct address_space *mapping !! 728 mapping = vma->vm_file->f_mapping; 543 729 544 i_mmap_lock_write(mapping); 730 i_mmap_lock_write(mapping); 545 flush_dcache_mmap_lock(mapping 731 flush_dcache_mmap_lock(mapping); 546 vma_interval_tree_insert(vma, 732 vma_interval_tree_insert(vma, &mapping->i_mmap); 547 flush_dcache_mmap_unlock(mappi 733 flush_dcache_mmap_unlock(mapping); 548 i_mmap_unlock_write(mapping); 734 i_mmap_unlock_write(mapping); 549 } 735 } >> 736 >> 737 /* add the VMA to the tree */ >> 738 parent = rb_prev = NULL; >> 739 p = &mm->mm_rb.rb_node; >> 740 while (*p) { >> 741 parent = *p; >> 742 pvma = rb_entry(parent, struct vm_area_struct, vm_rb); >> 743 >> 744 /* sort by: start addr, end addr, VMA struct addr in that order >> 745 * (the latter is necessary as we may get identical VMAs) */ >> 746 if (vma->vm_start < pvma->vm_start) >> 747 p = &(*p)->rb_left; >> 748 else if (vma->vm_start > pvma->vm_start) { >> 749 rb_prev = parent; >> 750 p = &(*p)->rb_right; >> 751 } else if (vma->vm_end < pvma->vm_end) >> 752 p = &(*p)->rb_left; >> 753 else if (vma->vm_end > pvma->vm_end) { >> 754 rb_prev = parent; >> 755 p = &(*p)->rb_right; >> 756 } else if (vma < pvma) >> 757 p = &(*p)->rb_left; >> 758 else if (vma > pvma) { >> 759 rb_prev = parent; >> 760 p = &(*p)->rb_right; >> 761 } else >> 762 BUG(); >> 763 } >> 764 >> 765 rb_link_node(&vma->vm_rb, parent, p); >> 766 rb_insert_color(&vma->vm_rb, &mm->mm_rb); >> 767 >> 768 /* add VMA to the VMA list also */ >> 769 prev = NULL; >> 770 if (rb_prev) >> 771 prev = rb_entry(rb_prev, struct vm_area_struct, vm_rb); >> 772 >> 773 __vma_link_list(mm, vma, prev, parent); 550 } 774 } 551 775 552 static void cleanup_vma_from_mm(struct vm_area !! 776 /* >> 777 * delete a VMA from its owning mm_struct and address space >> 778 */ >> 779 static void delete_vma_from_mm(struct vm_area_struct *vma) 553 { 780 { 554 vma->vm_mm->map_count--; !! 781 int i; >> 782 struct address_space *mapping; >> 783 struct mm_struct *mm = vma->vm_mm; >> 784 struct task_struct *curr = current; >> 785 >> 786 protect_vma(vma, 0); >> 787 >> 788 mm->map_count--; >> 789 for (i = 0; i < VMACACHE_SIZE; i++) { >> 790 /* if the vma is cached, invalidate the entire cache */ >> 791 if (curr->vmacache[i] == vma) { >> 792 vmacache_invalidate(mm); >> 793 break; >> 794 } >> 795 } >> 796 555 /* remove the VMA from the mapping */ 797 /* remove the VMA from the mapping */ 556 if (vma->vm_file) { 798 if (vma->vm_file) { 557 struct address_space *mapping; << 558 mapping = vma->vm_file->f_mapp 799 mapping = vma->vm_file->f_mapping; 559 800 560 i_mmap_lock_write(mapping); 801 i_mmap_lock_write(mapping); 561 flush_dcache_mmap_lock(mapping 802 flush_dcache_mmap_lock(mapping); 562 vma_interval_tree_remove(vma, 803 vma_interval_tree_remove(vma, &mapping->i_mmap); 563 flush_dcache_mmap_unlock(mappi 804 flush_dcache_mmap_unlock(mapping); 564 i_mmap_unlock_write(mapping); 805 i_mmap_unlock_write(mapping); 565 } 806 } 566 } << 567 807 568 /* !! 808 /* remove from the MM's tree and list */ 569 * delete a VMA from its owning mm_struct and !! 809 rb_erase(&vma->vm_rb, &mm->mm_rb); 570 */ << 571 static int delete_vma_from_mm(struct vm_area_s << 572 { << 573 VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_ << 574 810 575 vma_iter_config(&vmi, vma->vm_start, v !! 811 if (vma->vm_prev) 576 if (vma_iter_prealloc(&vmi, vma)) { !! 812 vma->vm_prev->vm_next = vma->vm_next; 577 pr_warn("Allocation of vma tre !! 813 else 578 current->pid); !! 814 mm->mmap = vma->vm_next; 579 return -ENOMEM; << 580 } << 581 cleanup_vma_from_mm(vma); << 582 815 583 /* remove from the MM's tree and list !! 816 if (vma->vm_next) 584 vma_iter_clear(&vmi); !! 817 vma->vm_next->vm_prev = vma->vm_prev; 585 return 0; << 586 } 818 } >> 819 587 /* 820 /* 588 * destroy a VMA record 821 * destroy a VMA record 589 */ 822 */ 590 static void delete_vma(struct mm_struct *mm, s 823 static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) 591 { 824 { 592 vma_close(vma); !! 825 if (vma->vm_ops && vma->vm_ops->close) >> 826 vma->vm_ops->close(vma); 593 if (vma->vm_file) 827 if (vma->vm_file) 594 fput(vma->vm_file); 828 fput(vma->vm_file); 595 put_nommu_region(vma->vm_region); 829 put_nommu_region(vma->vm_region); 596 vm_area_free(vma); !! 830 kmem_cache_free(vm_area_cachep, vma); 597 } << 598 << 599 struct vm_area_struct *find_vma_intersection(s << 600 u << 601 u << 602 { << 603 unsigned long index = start_addr; << 604 << 605 mmap_assert_locked(mm); << 606 return mt_find(&mm->mm_mt, &index, end << 607 } 831 } 608 EXPORT_SYMBOL(find_vma_intersection); << 609 832 610 /* 833 /* 611 * look up the first VMA in which addr resides 834 * look up the first VMA in which addr resides, NULL if none 612 * - should be called with mm->mmap_lock at le !! 835 * - should be called with mm->mmap_sem at least held readlocked 613 */ 836 */ 614 struct vm_area_struct *find_vma(struct mm_stru 837 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 615 { 838 { 616 VMA_ITERATOR(vmi, mm, addr); !! 839 struct vm_area_struct *vma; >> 840 >> 841 /* check the cache first */ >> 842 vma = vmacache_find(mm, addr); >> 843 if (likely(vma)) >> 844 return vma; >> 845 >> 846 /* trawl the list (there may be multiple mappings in which addr >> 847 * resides) */ >> 848 for (vma = mm->mmap; vma; vma = vma->vm_next) { >> 849 if (vma->vm_start > addr) >> 850 return NULL; >> 851 if (vma->vm_end > addr) { >> 852 vmacache_update(addr, vma); >> 853 return vma; >> 854 } >> 855 } 617 856 618 return vma_iter_load(&vmi); !! 857 return NULL; 619 } 858 } 620 EXPORT_SYMBOL(find_vma); 859 EXPORT_SYMBOL(find_vma); 621 860 622 /* 861 /* 623 * At least xtensa ends up having protection f !! 862 * find a VMA 624 * MMU.. No stack expansion, at least. !! 863 * - we don't extend stack VMAs under NOMMU conditions 625 */ 864 */ 626 struct vm_area_struct *lock_mm_and_find_vma(st !! 865 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 627 unsigned long addr, st << 628 { 866 { 629 struct vm_area_struct *vma; !! 867 return find_vma(mm, addr); 630 << 631 mmap_read_lock(mm); << 632 vma = vma_lookup(mm, addr); << 633 if (!vma) << 634 mmap_read_unlock(mm); << 635 return vma; << 636 } 868 } 637 869 638 /* 870 /* 639 * expand a stack to a given address 871 * expand a stack to a given address 640 * - not supported under NOMMU conditions 872 * - not supported under NOMMU conditions 641 */ 873 */ 642 int expand_stack_locked(struct vm_area_struct !! 874 int expand_stack(struct vm_area_struct *vma, unsigned long address) 643 { 875 { 644 return -ENOMEM; 876 return -ENOMEM; 645 } 877 } 646 878 647 struct vm_area_struct *expand_stack(struct mm_ << 648 { << 649 mmap_read_unlock(mm); << 650 return NULL; << 651 } << 652 << 653 /* 879 /* 654 * look up the first VMA exactly that exactly 880 * look up the first VMA exactly that exactly matches addr 655 * - should be called with mm->mmap_lock at le !! 881 * - should be called with mm->mmap_sem at least held readlocked 656 */ 882 */ 657 static struct vm_area_struct *find_vma_exact(s 883 static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 658 u 884 unsigned long addr, 659 u 885 unsigned long len) 660 { 886 { 661 struct vm_area_struct *vma; 887 struct vm_area_struct *vma; 662 unsigned long end = addr + len; 888 unsigned long end = addr + len; 663 VMA_ITERATOR(vmi, mm, addr); << 664 889 665 vma = vma_iter_load(&vmi); !! 890 /* check the cache first */ 666 if (!vma) !! 891 vma = vmacache_find_exact(mm, addr, end); 667 return NULL; !! 892 if (vma) 668 if (vma->vm_start != addr) !! 893 return vma; 669 return NULL; !! 894 670 if (vma->vm_end != end) !! 895 /* trawl the list (there may be multiple mappings in which addr 671 return NULL; !! 896 * resides) */ >> 897 for (vma = mm->mmap; vma; vma = vma->vm_next) { >> 898 if (vma->vm_start < addr) >> 899 continue; >> 900 if (vma->vm_start > addr) >> 901 return NULL; >> 902 if (vma->vm_end == end) { >> 903 vmacache_update(addr, vma); >> 904 return vma; >> 905 } >> 906 } 672 907 673 return vma; !! 908 return NULL; 674 } 909 } 675 910 676 /* 911 /* 677 * determine whether a mapping should be permi 912 * determine whether a mapping should be permitted and, if so, what sort of 678 * mapping we're capable of supporting 913 * mapping we're capable of supporting 679 */ 914 */ 680 static int validate_mmap_request(struct file * 915 static int validate_mmap_request(struct file *file, 681 unsigned long 916 unsigned long addr, 682 unsigned long 917 unsigned long len, 683 unsigned long 918 unsigned long prot, 684 unsigned long 919 unsigned long flags, 685 unsigned long 920 unsigned long pgoff, 686 unsigned long 921 unsigned long *_capabilities) 687 { 922 { 688 unsigned long capabilities, rlen; 923 unsigned long capabilities, rlen; 689 int ret; 924 int ret; 690 925 691 /* do the simple checks first */ 926 /* do the simple checks first */ 692 if (flags & MAP_FIXED) 927 if (flags & MAP_FIXED) 693 return -EINVAL; 928 return -EINVAL; 694 929 695 if ((flags & MAP_TYPE) != MAP_PRIVATE 930 if ((flags & MAP_TYPE) != MAP_PRIVATE && 696 (flags & MAP_TYPE) != MAP_SHARED) 931 (flags & MAP_TYPE) != MAP_SHARED) 697 return -EINVAL; 932 return -EINVAL; 698 933 699 if (!len) 934 if (!len) 700 return -EINVAL; 935 return -EINVAL; 701 936 702 /* Careful about overflows.. */ 937 /* Careful about overflows.. */ 703 rlen = PAGE_ALIGN(len); 938 rlen = PAGE_ALIGN(len); 704 if (!rlen || rlen > TASK_SIZE) 939 if (!rlen || rlen > TASK_SIZE) 705 return -ENOMEM; 940 return -ENOMEM; 706 941 707 /* offset overflow? */ 942 /* offset overflow? */ 708 if ((pgoff + (rlen >> PAGE_SHIFT)) < p 943 if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff) 709 return -EOVERFLOW; 944 return -EOVERFLOW; 710 945 711 if (file) { 946 if (file) { 712 /* files must support mmap */ 947 /* files must support mmap */ 713 if (!file->f_op->mmap) 948 if (!file->f_op->mmap) 714 return -ENODEV; 949 return -ENODEV; 715 950 716 /* work out if what we've got 951 /* work out if what we've got could possibly be shared 717 * - we support chardevs that 952 * - we support chardevs that provide their own "memory" 718 * - we support files/blockdev 953 * - we support files/blockdevs that are memory backed 719 */ 954 */ 720 if (file->f_op->mmap_capabilit 955 if (file->f_op->mmap_capabilities) { 721 capabilities = file->f 956 capabilities = file->f_op->mmap_capabilities(file); 722 } else { 957 } else { 723 /* no explicit capabil 958 /* no explicit capabilities set, so assume some 724 * defaults */ 959 * defaults */ 725 switch (file_inode(fil 960 switch (file_inode(file)->i_mode & S_IFMT) { 726 case S_IFREG: 961 case S_IFREG: 727 case S_IFBLK: 962 case S_IFBLK: 728 capabilities = 963 capabilities = NOMMU_MAP_COPY; 729 break; 964 break; 730 965 731 case S_IFCHR: 966 case S_IFCHR: 732 capabilities = 967 capabilities = 733 NOMMU_ 968 NOMMU_MAP_DIRECT | 734 NOMMU_ 969 NOMMU_MAP_READ | 735 NOMMU_ 970 NOMMU_MAP_WRITE; 736 break; 971 break; 737 972 738 default: 973 default: 739 return -EINVAL 974 return -EINVAL; 740 } 975 } 741 } 976 } 742 977 743 /* eliminate any capabilities 978 /* eliminate any capabilities that we can't support on this 744 * device */ 979 * device */ 745 if (!file->f_op->get_unmapped_ 980 if (!file->f_op->get_unmapped_area) 746 capabilities &= ~NOMMU 981 capabilities &= ~NOMMU_MAP_DIRECT; 747 if (!(file->f_mode & FMODE_CAN 982 if (!(file->f_mode & FMODE_CAN_READ)) 748 capabilities &= ~NOMMU 983 capabilities &= ~NOMMU_MAP_COPY; 749 984 750 /* The file shall have been op 985 /* The file shall have been opened with read permission. */ 751 if (!(file->f_mode & FMODE_REA 986 if (!(file->f_mode & FMODE_READ)) 752 return -EACCES; 987 return -EACCES; 753 988 754 if (flags & MAP_SHARED) { 989 if (flags & MAP_SHARED) { 755 /* do checks for writi 990 /* do checks for writing, appending and locking */ 756 if ((prot & PROT_WRITE 991 if ((prot & PROT_WRITE) && 757 !(file->f_mode & F 992 !(file->f_mode & FMODE_WRITE)) 758 return -EACCES 993 return -EACCES; 759 994 760 if (IS_APPEND(file_ino 995 if (IS_APPEND(file_inode(file)) && 761 (file->f_mode & FM 996 (file->f_mode & FMODE_WRITE)) 762 return -EACCES 997 return -EACCES; 763 998 >> 999 if (locks_verify_locked(file)) >> 1000 return -EAGAIN; >> 1001 764 if (!(capabilities & N 1002 if (!(capabilities & NOMMU_MAP_DIRECT)) 765 return -ENODEV 1003 return -ENODEV; 766 1004 767 /* we mustn't privatis 1005 /* we mustn't privatise shared mappings */ 768 capabilities &= ~NOMMU 1006 capabilities &= ~NOMMU_MAP_COPY; 769 } else { 1007 } else { 770 /* we're going to read 1008 /* we're going to read the file into private memory we 771 * allocate */ 1009 * allocate */ 772 if (!(capabilities & N 1010 if (!(capabilities & NOMMU_MAP_COPY)) 773 return -ENODEV 1011 return -ENODEV; 774 1012 775 /* we don't permit a p 1013 /* we don't permit a private writable mapping to be 776 * shared with the bac 1014 * shared with the backing device */ 777 if (prot & PROT_WRITE) 1015 if (prot & PROT_WRITE) 778 capabilities & 1016 capabilities &= ~NOMMU_MAP_DIRECT; 779 } 1017 } 780 1018 781 if (capabilities & NOMMU_MAP_D 1019 if (capabilities & NOMMU_MAP_DIRECT) { 782 if (((prot & PROT_READ 1020 if (((prot & PROT_READ) && !(capabilities & NOMMU_MAP_READ)) || 783 ((prot & PROT_WRIT 1021 ((prot & PROT_WRITE) && !(capabilities & NOMMU_MAP_WRITE)) || 784 ((prot & PROT_EXEC 1022 ((prot & PROT_EXEC) && !(capabilities & NOMMU_MAP_EXEC)) 785 ) { 1023 ) { 786 capabilities & 1024 capabilities &= ~NOMMU_MAP_DIRECT; 787 if (flags & MA 1025 if (flags & MAP_SHARED) { 788 pr_war 1026 pr_warn("MAP_SHARED not completely supported on !MMU\n"); 789 return 1027 return -EINVAL; 790 } 1028 } 791 } 1029 } 792 } 1030 } 793 1031 794 /* handle executable mappings 1032 /* handle executable mappings and implied executable 795 * mappings */ 1033 * mappings */ 796 if (path_noexec(&file->f_path) 1034 if (path_noexec(&file->f_path)) { 797 if (prot & PROT_EXEC) 1035 if (prot & PROT_EXEC) 798 return -EPERM; 1036 return -EPERM; 799 } else if ((prot & PROT_READ) 1037 } else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { 800 /* handle implication 1038 /* handle implication of PROT_EXEC by PROT_READ */ 801 if (current->personali 1039 if (current->personality & READ_IMPLIES_EXEC) { 802 if (capabiliti 1040 if (capabilities & NOMMU_MAP_EXEC) 803 prot | 1041 prot |= PROT_EXEC; 804 } 1042 } 805 } else if ((prot & PROT_READ) 1043 } else if ((prot & PROT_READ) && 806 (prot & PROT_EXEC) && 1044 (prot & PROT_EXEC) && 807 !(capabilities & NOMM 1045 !(capabilities & NOMMU_MAP_EXEC) 808 ) { 1046 ) { 809 /* backing file is not 1047 /* backing file is not executable, try to copy */ 810 capabilities &= ~NOMMU 1048 capabilities &= ~NOMMU_MAP_DIRECT; 811 } 1049 } 812 } else { 1050 } else { 813 /* anonymous mappings are alwa 1051 /* anonymous mappings are always memory backed and can be 814 * privately mapped 1052 * privately mapped 815 */ 1053 */ 816 capabilities = NOMMU_MAP_COPY; 1054 capabilities = NOMMU_MAP_COPY; 817 1055 818 /* handle PROT_EXEC implicatio 1056 /* handle PROT_EXEC implication by PROT_READ */ 819 if ((prot & PROT_READ) && 1057 if ((prot & PROT_READ) && 820 (current->personality & RE 1058 (current->personality & READ_IMPLIES_EXEC)) 821 prot |= PROT_EXEC; 1059 prot |= PROT_EXEC; 822 } 1060 } 823 1061 824 /* allow the security API to have its 1062 /* allow the security API to have its say */ 825 ret = security_mmap_addr(addr); 1063 ret = security_mmap_addr(addr); 826 if (ret < 0) 1064 if (ret < 0) 827 return ret; 1065 return ret; 828 1066 829 /* looks okay */ 1067 /* looks okay */ 830 *_capabilities = capabilities; 1068 *_capabilities = capabilities; 831 return 0; 1069 return 0; 832 } 1070 } 833 1071 834 /* 1072 /* 835 * we've determined that we can make the mappi 1073 * we've determined that we can make the mapping, now translate what we 836 * now know into VMA flags 1074 * now know into VMA flags 837 */ 1075 */ 838 static unsigned long determine_vm_flags(struct 1076 static unsigned long determine_vm_flags(struct file *file, 839 unsign 1077 unsigned long prot, 840 unsign 1078 unsigned long flags, 841 unsign 1079 unsigned long capabilities) 842 { 1080 { 843 unsigned long vm_flags; 1081 unsigned long vm_flags; 844 1082 845 vm_flags = calc_vm_prot_bits(prot, 0) !! 1083 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags); >> 1084 /* vm_flags |= mm->def_flags; */ 846 1085 847 if (!file) { !! 1086 if (!(capabilities & NOMMU_MAP_DIRECT)) { 848 /* !! 1087 /* attempt to share read-only copies of mapped file chunks */ 849 * MAP_ANONYMOUS. MAP_SHARED i << 850 * there is no fork(). << 851 */ << 852 vm_flags |= VM_MAYREAD | VM_MA 1088 vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 853 } else if (flags & MAP_PRIVATE) { !! 1089 if (file && !(prot & PROT_WRITE)) 854 /* MAP_PRIVATE file mapping */ !! 1090 vm_flags |= VM_MAYSHARE; 855 if (capabilities & NOMMU_MAP_D << 856 vm_flags |= (capabilit << 857 else << 858 vm_flags |= VM_MAYREAD << 859 << 860 if (!(prot & PROT_WRITE) && !c << 861 /* << 862 * R/O private file ma << 863 * modify memory, espe << 864 * (e.g., set breakpoi << 865 * permissions (no mpr << 866 * the file mapping, w << 867 * ramfs/tmpfs/shmfs a << 868 */ << 869 vm_flags |= VM_MAYOVER << 870 } else { 1091 } else { 871 /* MAP_SHARED file mapping: NO !! 1092 /* overlay a shareable mapping on the backing device or inode 872 vm_flags |= VM_SHARED | VM_MAY !! 1093 * if possible - used for chardevs, ramfs/tmpfs/shmfs and 873 (capabilities & NO !! 1094 * romfs/cramfs */ >> 1095 vm_flags |= VM_MAYSHARE | (capabilities & NOMMU_VMFLAGS); >> 1096 if (flags & MAP_SHARED) >> 1097 vm_flags |= VM_SHARED; 874 } 1098 } 875 1099 >> 1100 /* refuse to let anyone share private mappings with this process if >> 1101 * it's being traced - otherwise breakpoints set in it may interfere >> 1102 * with another untraced process >> 1103 */ >> 1104 if ((flags & MAP_PRIVATE) && current->ptrace) >> 1105 vm_flags &= ~VM_MAYSHARE; >> 1106 876 return vm_flags; 1107 return vm_flags; 877 } 1108 } 878 1109 879 /* 1110 /* 880 * set up a shared mapping on a file (the driv 1111 * set up a shared mapping on a file (the driver or filesystem provides and 881 * pins the storage) 1112 * pins the storage) 882 */ 1113 */ 883 static int do_mmap_shared_file(struct vm_area_ 1114 static int do_mmap_shared_file(struct vm_area_struct *vma) 884 { 1115 { 885 int ret; 1116 int ret; 886 1117 887 ret = mmap_file(vma->vm_file, vma); !! 1118 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 888 if (ret == 0) { 1119 if (ret == 0) { 889 vma->vm_region->vm_top = vma-> 1120 vma->vm_region->vm_top = vma->vm_region->vm_end; 890 return 0; 1121 return 0; 891 } 1122 } 892 if (ret != -ENOSYS) 1123 if (ret != -ENOSYS) 893 return ret; 1124 return ret; 894 1125 895 /* getting -ENOSYS indicates that dire 1126 /* getting -ENOSYS indicates that direct mmap isn't possible (as 896 * opposed to tried but failed) so we 1127 * opposed to tried but failed) so we can only give a suitable error as 897 * it's not possible to make a private 1128 * it's not possible to make a private copy if MAP_SHARED was given */ 898 return -ENODEV; 1129 return -ENODEV; 899 } 1130 } 900 1131 901 /* 1132 /* 902 * set up a private mapping or an anonymous sh 1133 * set up a private mapping or an anonymous shared mapping 903 */ 1134 */ 904 static int do_mmap_private(struct vm_area_stru 1135 static int do_mmap_private(struct vm_area_struct *vma, 905 struct vm_region *r 1136 struct vm_region *region, 906 unsigned long len, 1137 unsigned long len, 907 unsigned long capab 1138 unsigned long capabilities) 908 { 1139 { 909 unsigned long total, point; 1140 unsigned long total, point; 910 void *base; 1141 void *base; 911 int ret, order; 1142 int ret, order; 912 1143 913 /* !! 1144 /* invoke the file's mapping function so that it can keep track of 914 * Invoke the file's mapping function !! 1145 * shared mappings on devices or memory 915 * shared mappings on devices or memor !! 1146 * - VM_MAYSHARE will be set if it may attempt to share 916 * it may attempt to share, which will << 917 * happy. << 918 */ 1147 */ 919 if (capabilities & NOMMU_MAP_DIRECT) { 1148 if (capabilities & NOMMU_MAP_DIRECT) { 920 ret = mmap_file(vma->vm_file, !! 1149 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 921 /* shouldn't return success if << 922 if (WARN_ON_ONCE(!is_nommu_sha << 923 ret = -ENOSYS; << 924 if (ret == 0) { 1150 if (ret == 0) { >> 1151 /* shouldn't return success if we're not sharing */ >> 1152 BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); 925 vma->vm_region->vm_top 1153 vma->vm_region->vm_top = vma->vm_region->vm_end; 926 return 0; 1154 return 0; 927 } 1155 } 928 if (ret != -ENOSYS) 1156 if (ret != -ENOSYS) 929 return ret; 1157 return ret; 930 1158 931 /* getting an ENOSYS error ind 1159 /* getting an ENOSYS error indicates that direct mmap isn't 932 * possible (as opposed to tri 1160 * possible (as opposed to tried but failed) so we'll try to 933 * make a private copy of the 1161 * make a private copy of the data and map that instead */ 934 } 1162 } 935 1163 936 1164 937 /* allocate some memory to hold the ma 1165 /* allocate some memory to hold the mapping 938 * - note that this may not return a p 1166 * - note that this may not return a page-aligned address if the object 939 * we're allocating is smaller than 1167 * we're allocating is smaller than a page 940 */ 1168 */ 941 order = get_order(len); 1169 order = get_order(len); 942 total = 1 << order; 1170 total = 1 << order; 943 point = len >> PAGE_SHIFT; 1171 point = len >> PAGE_SHIFT; 944 1172 945 /* we don't want to allocate a power-o 1173 /* we don't want to allocate a power-of-2 sized page set */ 946 if (sysctl_nr_trim_pages && total - po 1174 if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) 947 total = point; 1175 total = point; 948 1176 949 base = alloc_pages_exact(total << PAGE 1177 base = alloc_pages_exact(total << PAGE_SHIFT, GFP_KERNEL); 950 if (!base) 1178 if (!base) 951 goto enomem; 1179 goto enomem; 952 1180 953 atomic_long_add(total, &mmap_pages_all 1181 atomic_long_add(total, &mmap_pages_allocated); 954 1182 955 vm_flags_set(vma, VM_MAPPED_COPY); !! 1183 region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; 956 region->vm_flags = vma->vm_flags; << 957 region->vm_start = (unsigned long) bas 1184 region->vm_start = (unsigned long) base; 958 region->vm_end = region->vm_start + 1185 region->vm_end = region->vm_start + len; 959 region->vm_top = region->vm_start + 1186 region->vm_top = region->vm_start + (total << PAGE_SHIFT); 960 1187 961 vma->vm_start = region->vm_start; 1188 vma->vm_start = region->vm_start; 962 vma->vm_end = region->vm_start + len 1189 vma->vm_end = region->vm_start + len; 963 1190 964 if (vma->vm_file) { 1191 if (vma->vm_file) { 965 /* read the contents of a file 1192 /* read the contents of a file into the copy */ >> 1193 mm_segment_t old_fs; 966 loff_t fpos; 1194 loff_t fpos; 967 1195 968 fpos = vma->vm_pgoff; 1196 fpos = vma->vm_pgoff; 969 fpos <<= PAGE_SHIFT; 1197 fpos <<= PAGE_SHIFT; 970 1198 971 ret = kernel_read(vma->vm_file !! 1199 old_fs = get_fs(); >> 1200 set_fs(KERNEL_DS); >> 1201 ret = __vfs_read(vma->vm_file, base, len, &fpos); >> 1202 set_fs(old_fs); >> 1203 972 if (ret < 0) 1204 if (ret < 0) 973 goto error_free; 1205 goto error_free; 974 1206 975 /* clear the last little bit * 1207 /* clear the last little bit */ 976 if (ret < len) 1208 if (ret < len) 977 memset(base + ret, 0, 1209 memset(base + ret, 0, len - ret); 978 1210 979 } else { << 980 vma_set_anonymous(vma); << 981 } 1211 } 982 1212 983 return 0; 1213 return 0; 984 1214 985 error_free: 1215 error_free: 986 free_page_series(region->vm_start, reg 1216 free_page_series(region->vm_start, region->vm_top); 987 region->vm_start = vma->vm_start = 0; 1217 region->vm_start = vma->vm_start = 0; 988 region->vm_end = vma->vm_end = 0; 1218 region->vm_end = vma->vm_end = 0; 989 region->vm_top = 0; 1219 region->vm_top = 0; 990 return ret; 1220 return ret; 991 1221 992 enomem: 1222 enomem: 993 pr_err("Allocation of length %lu from 1223 pr_err("Allocation of length %lu from process %d (%s) failed\n", 994 len, current->pid, current->com 1224 len, current->pid, current->comm); 995 show_mem(); !! 1225 show_free_areas(0); 996 return -ENOMEM; 1226 return -ENOMEM; 997 } 1227 } 998 1228 999 /* 1229 /* 1000 * handle mapping creation for uClinux 1230 * handle mapping creation for uClinux 1001 */ 1231 */ 1002 unsigned long do_mmap(struct file *file, 1232 unsigned long do_mmap(struct file *file, 1003 unsigned long addr, 1233 unsigned long addr, 1004 unsigned long len, 1234 unsigned long len, 1005 unsigned long prot, 1235 unsigned long prot, 1006 unsigned long flags, 1236 unsigned long flags, 1007 vm_flags_t vm_flags, 1237 vm_flags_t vm_flags, 1008 unsigned long pgoff, 1238 unsigned long pgoff, 1009 unsigned long *popula !! 1239 unsigned long *populate) 1010 struct list_head *uf) << 1011 { 1240 { 1012 struct vm_area_struct *vma; 1241 struct vm_area_struct *vma; 1013 struct vm_region *region; 1242 struct vm_region *region; 1014 struct rb_node *rb; 1243 struct rb_node *rb; 1015 unsigned long capabilities, result; 1244 unsigned long capabilities, result; 1016 int ret; 1245 int ret; 1017 VMA_ITERATOR(vmi, current->mm, 0); << 1018 1246 1019 *populate = 0; 1247 *populate = 0; 1020 1248 1021 /* decide whether we should attempt t 1249 /* decide whether we should attempt the mapping, and if so what sort of 1022 * mapping */ 1250 * mapping */ 1023 ret = validate_mmap_request(file, add 1251 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 1024 &capabili 1252 &capabilities); 1025 if (ret < 0) 1253 if (ret < 0) 1026 return ret; 1254 return ret; 1027 1255 1028 /* we ignore the address hint */ 1256 /* we ignore the address hint */ 1029 addr = 0; 1257 addr = 0; 1030 len = PAGE_ALIGN(len); 1258 len = PAGE_ALIGN(len); 1031 1259 1032 /* we've determined that we can make 1260 /* we've determined that we can make the mapping, now translate what we 1033 * now know into VMA flags */ 1261 * now know into VMA flags */ 1034 vm_flags |= determine_vm_flags(file, 1262 vm_flags |= determine_vm_flags(file, prot, flags, capabilities); 1035 1263 1036 << 1037 /* we're going to need to record the 1264 /* we're going to need to record the mapping */ 1038 region = kmem_cache_zalloc(vm_region_ 1265 region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); 1039 if (!region) 1266 if (!region) 1040 goto error_getting_region; 1267 goto error_getting_region; 1041 1268 1042 vma = vm_area_alloc(current->mm); !! 1269 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); 1043 if (!vma) 1270 if (!vma) 1044 goto error_getting_vma; 1271 goto error_getting_vma; 1045 1272 1046 region->vm_usage = 1; 1273 region->vm_usage = 1; 1047 region->vm_flags = vm_flags; 1274 region->vm_flags = vm_flags; 1048 region->vm_pgoff = pgoff; 1275 region->vm_pgoff = pgoff; 1049 1276 1050 vm_flags_init(vma, vm_flags); !! 1277 INIT_LIST_HEAD(&vma->anon_vma_chain); >> 1278 vma->vm_flags = vm_flags; 1051 vma->vm_pgoff = pgoff; 1279 vma->vm_pgoff = pgoff; 1052 1280 1053 if (file) { 1281 if (file) { 1054 region->vm_file = get_file(fi 1282 region->vm_file = get_file(file); 1055 vma->vm_file = get_file(file) 1283 vma->vm_file = get_file(file); 1056 } 1284 } 1057 1285 1058 down_write(&nommu_region_sem); 1286 down_write(&nommu_region_sem); 1059 1287 1060 /* if we want to share, we need to ch 1288 /* if we want to share, we need to check for regions created by other 1061 * mmap() calls that overlap with our 1289 * mmap() calls that overlap with our proposed mapping 1062 * - we can only share with a superse 1290 * - we can only share with a superset match on most regular files 1063 * - shared mappings on character dev 1291 * - shared mappings on character devices and memory backed files are 1064 * permitted to overlap inexactly a 1292 * permitted to overlap inexactly as far as we are concerned for in 1065 * these cases, sharing is handled 1293 * these cases, sharing is handled in the driver or filesystem rather 1066 * than here 1294 * than here 1067 */ 1295 */ 1068 if (is_nommu_shared_mapping(vm_flags) !! 1296 if (vm_flags & VM_MAYSHARE) { 1069 struct vm_region *pregion; 1297 struct vm_region *pregion; 1070 unsigned long pglen, rpglen, 1298 unsigned long pglen, rpglen, pgend, rpgend, start; 1071 1299 1072 pglen = (len + PAGE_SIZE - 1) 1300 pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1073 pgend = pgoff + pglen; 1301 pgend = pgoff + pglen; 1074 1302 1075 for (rb = rb_first(&nommu_reg 1303 for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { 1076 pregion = rb_entry(rb 1304 pregion = rb_entry(rb, struct vm_region, vm_rb); 1077 1305 1078 if (!is_nommu_shared_ !! 1306 if (!(pregion->vm_flags & VM_MAYSHARE)) 1079 continue; 1307 continue; 1080 1308 1081 /* search for overlap 1309 /* search for overlapping mappings on the same file */ 1082 if (file_inode(pregio 1310 if (file_inode(pregion->vm_file) != 1083 file_inode(file)) 1311 file_inode(file)) 1084 continue; 1312 continue; 1085 1313 1086 if (pregion->vm_pgoff 1314 if (pregion->vm_pgoff >= pgend) 1087 continue; 1315 continue; 1088 1316 1089 rpglen = pregion->vm_ 1317 rpglen = pregion->vm_end - pregion->vm_start; 1090 rpglen = (rpglen + PA 1318 rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT; 1091 rpgend = pregion->vm_ 1319 rpgend = pregion->vm_pgoff + rpglen; 1092 if (pgoff >= rpgend) 1320 if (pgoff >= rpgend) 1093 continue; 1321 continue; 1094 1322 1095 /* handle inexactly o 1323 /* handle inexactly overlapping matches between 1096 * mappings */ 1324 * mappings */ 1097 if ((pregion->vm_pgof 1325 if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && 1098 !(pgoff >= pregio 1326 !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { 1099 /* new mappin 1327 /* new mapping is not a subset of the region */ 1100 if (!(capabil 1328 if (!(capabilities & NOMMU_MAP_DIRECT)) 1101 goto 1329 goto sharing_violation; 1102 continue; 1330 continue; 1103 } 1331 } 1104 1332 1105 /* we've found a regi 1333 /* we've found a region we can share */ 1106 pregion->vm_usage++; 1334 pregion->vm_usage++; 1107 vma->vm_region = preg 1335 vma->vm_region = pregion; 1108 start = pregion->vm_s 1336 start = pregion->vm_start; 1109 start += (pgoff - pre 1337 start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; 1110 vma->vm_start = start 1338 vma->vm_start = start; 1111 vma->vm_end = start + 1339 vma->vm_end = start + len; 1112 1340 1113 if (pregion->vm_flags 1341 if (pregion->vm_flags & VM_MAPPED_COPY) 1114 vm_flags_set( !! 1342 vma->vm_flags |= VM_MAPPED_COPY; 1115 else { 1343 else { 1116 ret = do_mmap 1344 ret = do_mmap_shared_file(vma); 1117 if (ret < 0) 1345 if (ret < 0) { 1118 vma-> 1346 vma->vm_region = NULL; 1119 vma-> 1347 vma->vm_start = 0; 1120 vma-> 1348 vma->vm_end = 0; 1121 pregi 1349 pregion->vm_usage--; 1122 pregi 1350 pregion = NULL; 1123 goto 1351 goto error_just_free; 1124 } 1352 } 1125 } 1353 } 1126 fput(region->vm_file) 1354 fput(region->vm_file); 1127 kmem_cache_free(vm_re 1355 kmem_cache_free(vm_region_jar, region); 1128 region = pregion; 1356 region = pregion; 1129 result = start; 1357 result = start; 1130 goto share; 1358 goto share; 1131 } 1359 } 1132 1360 1133 /* obtain the address at whic 1361 /* obtain the address at which to make a shared mapping 1134 * - this is the hook for qua 1362 * - this is the hook for quasi-memory character devices to 1135 * tell us the location of 1363 * tell us the location of a shared mapping 1136 */ 1364 */ 1137 if (capabilities & NOMMU_MAP_ 1365 if (capabilities & NOMMU_MAP_DIRECT) { 1138 addr = file->f_op->ge 1366 addr = file->f_op->get_unmapped_area(file, addr, len, 1139 1367 pgoff, flags); 1140 if (IS_ERR_VALUE(addr 1368 if (IS_ERR_VALUE(addr)) { 1141 ret = addr; 1369 ret = addr; 1142 if (ret != -E 1370 if (ret != -ENOSYS) 1143 goto 1371 goto error_just_free; 1144 1372 1145 /* the driver 1373 /* the driver refused to tell us where to site 1146 * the mappin 1374 * the mapping so we'll have to attempt to copy 1147 * it */ 1375 * it */ 1148 ret = -ENODEV 1376 ret = -ENODEV; 1149 if (!(capabil 1377 if (!(capabilities & NOMMU_MAP_COPY)) 1150 goto 1378 goto error_just_free; 1151 1379 1152 capabilities 1380 capabilities &= ~NOMMU_MAP_DIRECT; 1153 } else { 1381 } else { 1154 vma->vm_start 1382 vma->vm_start = region->vm_start = addr; 1155 vma->vm_end = 1383 vma->vm_end = region->vm_end = addr + len; 1156 } 1384 } 1157 } 1385 } 1158 } 1386 } 1159 1387 1160 vma->vm_region = region; 1388 vma->vm_region = region; 1161 1389 1162 /* set up the mapping 1390 /* set up the mapping 1163 * - the region is filled in if NOMMU 1391 * - the region is filled in if NOMMU_MAP_DIRECT is still set 1164 */ 1392 */ 1165 if (file && vma->vm_flags & VM_SHARED 1393 if (file && vma->vm_flags & VM_SHARED) 1166 ret = do_mmap_shared_file(vma 1394 ret = do_mmap_shared_file(vma); 1167 else 1395 else 1168 ret = do_mmap_private(vma, re 1396 ret = do_mmap_private(vma, region, len, capabilities); 1169 if (ret < 0) 1397 if (ret < 0) 1170 goto error_just_free; 1398 goto error_just_free; 1171 add_nommu_region(region); 1399 add_nommu_region(region); 1172 1400 1173 /* clear anonymous mappings that don' 1401 /* clear anonymous mappings that don't ask for uninitialized data */ 1174 if (!vma->vm_file && !! 1402 if (!vma->vm_file && !(flags & MAP_UNINITIALIZED)) 1175 (!IS_ENABLED(CONFIG_MMAP_ALLOW_UN << 1176 !(flags & MAP_UNINITIALIZED))) << 1177 memset((void *)region->vm_sta 1403 memset((void *)region->vm_start, 0, 1178 region->vm_end - regio 1404 region->vm_end - region->vm_start); 1179 1405 1180 /* okay... we have a mapping; now we 1406 /* okay... we have a mapping; now we have to register it */ 1181 result = vma->vm_start; 1407 result = vma->vm_start; 1182 1408 1183 current->mm->total_vm += len >> PAGE_ 1409 current->mm->total_vm += len >> PAGE_SHIFT; 1184 1410 1185 share: 1411 share: 1186 BUG_ON(!vma->vm_region); !! 1412 add_vma_to_mm(current->mm, vma); 1187 vma_iter_config(&vmi, vma->vm_start, << 1188 if (vma_iter_prealloc(&vmi, vma)) << 1189 goto error_just_free; << 1190 << 1191 setup_vma_to_mm(vma, current->mm); << 1192 current->mm->map_count++; << 1193 /* add the VMA to the tree */ << 1194 vma_iter_store(&vmi, vma); << 1195 1413 1196 /* we flush the region from the icach 1414 /* we flush the region from the icache only when the first executable 1197 * mapping of it is made */ 1415 * mapping of it is made */ 1198 if (vma->vm_flags & VM_EXEC && !regio 1416 if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { 1199 flush_icache_user_range(regio !! 1417 flush_icache_range(region->vm_start, region->vm_end); 1200 region->vm_icache_flushed = t 1418 region->vm_icache_flushed = true; 1201 } 1419 } 1202 1420 1203 up_write(&nommu_region_sem); 1421 up_write(&nommu_region_sem); 1204 1422 1205 return result; 1423 return result; 1206 1424 1207 error_just_free: 1425 error_just_free: 1208 up_write(&nommu_region_sem); 1426 up_write(&nommu_region_sem); 1209 error: 1427 error: 1210 vma_iter_free(&vmi); << 1211 if (region->vm_file) 1428 if (region->vm_file) 1212 fput(region->vm_file); 1429 fput(region->vm_file); 1213 kmem_cache_free(vm_region_jar, region 1430 kmem_cache_free(vm_region_jar, region); 1214 if (vma->vm_file) 1431 if (vma->vm_file) 1215 fput(vma->vm_file); 1432 fput(vma->vm_file); 1216 vm_area_free(vma); !! 1433 kmem_cache_free(vm_area_cachep, vma); 1217 return ret; 1434 return ret; 1218 1435 1219 sharing_violation: 1436 sharing_violation: 1220 up_write(&nommu_region_sem); 1437 up_write(&nommu_region_sem); 1221 pr_warn("Attempt to share mismatched 1438 pr_warn("Attempt to share mismatched mappings\n"); 1222 ret = -EINVAL; 1439 ret = -EINVAL; 1223 goto error; 1440 goto error; 1224 1441 1225 error_getting_vma: 1442 error_getting_vma: 1226 kmem_cache_free(vm_region_jar, region 1443 kmem_cache_free(vm_region_jar, region); 1227 pr_warn("Allocation of vma for %lu by 1444 pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n", 1228 len, current->pid); 1445 len, current->pid); 1229 show_mem(); !! 1446 show_free_areas(0); 1230 return -ENOMEM; 1447 return -ENOMEM; 1231 1448 1232 error_getting_region: 1449 error_getting_region: 1233 pr_warn("Allocation of vm region for 1450 pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n", 1234 len, current->pid); 1451 len, current->pid); 1235 show_mem(); !! 1452 show_free_areas(0); 1236 return -ENOMEM; 1453 return -ENOMEM; 1237 } 1454 } 1238 1455 1239 unsigned long ksys_mmap_pgoff(unsigned long a !! 1456 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, 1240 unsigned long p !! 1457 unsigned long, prot, unsigned long, flags, 1241 unsigned long f !! 1458 unsigned long, fd, unsigned long, pgoff) 1242 { 1459 { 1243 struct file *file = NULL; 1460 struct file *file = NULL; 1244 unsigned long retval = -EBADF; 1461 unsigned long retval = -EBADF; 1245 1462 1246 audit_mmap_fd(fd, flags); 1463 audit_mmap_fd(fd, flags); 1247 if (!(flags & MAP_ANONYMOUS)) { 1464 if (!(flags & MAP_ANONYMOUS)) { 1248 file = fget(fd); 1465 file = fget(fd); 1249 if (!file) 1466 if (!file) 1250 goto out; 1467 goto out; 1251 } 1468 } 1252 1469 >> 1470 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); >> 1471 1253 retval = vm_mmap_pgoff(file, addr, le 1472 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); 1254 1473 1255 if (file) 1474 if (file) 1256 fput(file); 1475 fput(file); 1257 out: 1476 out: 1258 return retval; 1477 return retval; 1259 } 1478 } 1260 1479 1261 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, ad << 1262 unsigned long, prot, unsigned << 1263 unsigned long, fd, unsigned l << 1264 { << 1265 return ksys_mmap_pgoff(addr, len, pro << 1266 } << 1267 << 1268 #ifdef __ARCH_WANT_SYS_OLD_MMAP 1480 #ifdef __ARCH_WANT_SYS_OLD_MMAP 1269 struct mmap_arg_struct { 1481 struct mmap_arg_struct { 1270 unsigned long addr; 1482 unsigned long addr; 1271 unsigned long len; 1483 unsigned long len; 1272 unsigned long prot; 1484 unsigned long prot; 1273 unsigned long flags; 1485 unsigned long flags; 1274 unsigned long fd; 1486 unsigned long fd; 1275 unsigned long offset; 1487 unsigned long offset; 1276 }; 1488 }; 1277 1489 1278 SYSCALL_DEFINE1(old_mmap, struct mmap_arg_str 1490 SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) 1279 { 1491 { 1280 struct mmap_arg_struct a; 1492 struct mmap_arg_struct a; 1281 1493 1282 if (copy_from_user(&a, arg, sizeof(a) 1494 if (copy_from_user(&a, arg, sizeof(a))) 1283 return -EFAULT; 1495 return -EFAULT; 1284 if (offset_in_page(a.offset)) 1496 if (offset_in_page(a.offset)) 1285 return -EINVAL; 1497 return -EINVAL; 1286 1498 1287 return ksys_mmap_pgoff(a.addr, a.len, !! 1499 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, 1288 a.offset >> PA !! 1500 a.offset >> PAGE_SHIFT); 1289 } 1501 } 1290 #endif /* __ARCH_WANT_SYS_OLD_MMAP */ 1502 #endif /* __ARCH_WANT_SYS_OLD_MMAP */ 1291 1503 1292 /* 1504 /* 1293 * split a vma into two pieces at address 'ad 1505 * split a vma into two pieces at address 'addr', a new vma is allocated either 1294 * for the first part or the tail. 1506 * for the first part or the tail. 1295 */ 1507 */ 1296 static int split_vma(struct vma_iterator *vmi !! 1508 int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, 1297 unsigned long addr, int !! 1509 unsigned long addr, int new_below) 1298 { 1510 { 1299 struct vm_area_struct *new; 1511 struct vm_area_struct *new; 1300 struct vm_region *region; 1512 struct vm_region *region; 1301 unsigned long npages; 1513 unsigned long npages; 1302 struct mm_struct *mm; << 1303 1514 1304 /* we're only permitted to split anon 1515 /* we're only permitted to split anonymous regions (these should have 1305 * only a single usage on the region) 1516 * only a single usage on the region) */ 1306 if (vma->vm_file) 1517 if (vma->vm_file) 1307 return -ENOMEM; 1518 return -ENOMEM; 1308 1519 1309 mm = vma->vm_mm; << 1310 if (mm->map_count >= sysctl_max_map_c 1520 if (mm->map_count >= sysctl_max_map_count) 1311 return -ENOMEM; 1521 return -ENOMEM; 1312 1522 1313 region = kmem_cache_alloc(vm_region_j 1523 region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); 1314 if (!region) 1524 if (!region) 1315 return -ENOMEM; 1525 return -ENOMEM; 1316 1526 1317 new = vm_area_dup(vma); !! 1527 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); 1318 if (!new) !! 1528 if (!new) { 1319 goto err_vma_dup; !! 1529 kmem_cache_free(vm_region_jar, region); >> 1530 return -ENOMEM; >> 1531 } 1320 1532 1321 /* most fields are the same, copy all 1533 /* most fields are the same, copy all, and then fixup */ >> 1534 *new = *vma; 1322 *region = *vma->vm_region; 1535 *region = *vma->vm_region; 1323 new->vm_region = region; 1536 new->vm_region = region; 1324 1537 1325 npages = (addr - vma->vm_start) >> PA 1538 npages = (addr - vma->vm_start) >> PAGE_SHIFT; 1326 1539 1327 if (new_below) { 1540 if (new_below) { 1328 region->vm_top = region->vm_e 1541 region->vm_top = region->vm_end = new->vm_end = addr; 1329 } else { 1542 } else { 1330 region->vm_start = new->vm_st 1543 region->vm_start = new->vm_start = addr; 1331 region->vm_pgoff = new->vm_pg 1544 region->vm_pgoff = new->vm_pgoff += npages; 1332 } 1545 } 1333 1546 1334 vma_iter_config(vmi, new->vm_start, n << 1335 if (vma_iter_prealloc(vmi, vma)) { << 1336 pr_warn("Allocation of vma tr << 1337 current->pid); << 1338 goto err_vmi_preallocate; << 1339 } << 1340 << 1341 if (new->vm_ops && new->vm_ops->open) 1547 if (new->vm_ops && new->vm_ops->open) 1342 new->vm_ops->open(new); 1548 new->vm_ops->open(new); 1343 1549 >> 1550 delete_vma_from_mm(vma); 1344 down_write(&nommu_region_sem); 1551 down_write(&nommu_region_sem); 1345 delete_nommu_region(vma->vm_region); 1552 delete_nommu_region(vma->vm_region); 1346 if (new_below) { 1553 if (new_below) { 1347 vma->vm_region->vm_start = vm 1554 vma->vm_region->vm_start = vma->vm_start = addr; 1348 vma->vm_region->vm_pgoff = vm 1555 vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; 1349 } else { 1556 } else { 1350 vma->vm_region->vm_end = vma- 1557 vma->vm_region->vm_end = vma->vm_end = addr; 1351 vma->vm_region->vm_top = addr 1558 vma->vm_region->vm_top = addr; 1352 } 1559 } 1353 add_nommu_region(vma->vm_region); 1560 add_nommu_region(vma->vm_region); 1354 add_nommu_region(new->vm_region); 1561 add_nommu_region(new->vm_region); 1355 up_write(&nommu_region_sem); 1562 up_write(&nommu_region_sem); 1356 !! 1563 add_vma_to_mm(mm, vma); 1357 setup_vma_to_mm(vma, mm); !! 1564 add_vma_to_mm(mm, new); 1358 setup_vma_to_mm(new, mm); << 1359 vma_iter_store(vmi, new); << 1360 mm->map_count++; << 1361 return 0; 1565 return 0; 1362 << 1363 err_vmi_preallocate: << 1364 vm_area_free(new); << 1365 err_vma_dup: << 1366 kmem_cache_free(vm_region_jar, region << 1367 return -ENOMEM; << 1368 } 1566 } 1369 1567 1370 /* 1568 /* 1371 * shrink a VMA by removing the specified chu 1569 * shrink a VMA by removing the specified chunk from either the beginning or 1372 * the end 1570 * the end 1373 */ 1571 */ 1374 static int vmi_shrink_vma(struct vma_iterator !! 1572 static int shrink_vma(struct mm_struct *mm, 1375 struct vm_area_struct * 1573 struct vm_area_struct *vma, 1376 unsigned long from, uns 1574 unsigned long from, unsigned long to) 1377 { 1575 { 1378 struct vm_region *region; 1576 struct vm_region *region; 1379 1577 1380 /* adjust the VMA's pointers, which m 1578 /* adjust the VMA's pointers, which may reposition it in the MM's tree 1381 * and list */ 1579 * and list */ 1382 if (from > vma->vm_start) { !! 1580 delete_vma_from_mm(vma); 1383 if (vma_iter_clear_gfp(vmi, f !! 1581 if (from > vma->vm_start) 1384 return -ENOMEM; << 1385 vma->vm_end = from; 1582 vma->vm_end = from; 1386 } else { !! 1583 else 1387 if (vma_iter_clear_gfp(vmi, v << 1388 return -ENOMEM; << 1389 vma->vm_start = to; 1584 vma->vm_start = to; 1390 } !! 1585 add_vma_to_mm(mm, vma); 1391 1586 1392 /* cut the backing region down to siz 1587 /* cut the backing region down to size */ 1393 region = vma->vm_region; 1588 region = vma->vm_region; 1394 BUG_ON(region->vm_usage != 1); 1589 BUG_ON(region->vm_usage != 1); 1395 1590 1396 down_write(&nommu_region_sem); 1591 down_write(&nommu_region_sem); 1397 delete_nommu_region(region); 1592 delete_nommu_region(region); 1398 if (from > region->vm_start) { 1593 if (from > region->vm_start) { 1399 to = region->vm_top; 1594 to = region->vm_top; 1400 region->vm_top = region->vm_e 1595 region->vm_top = region->vm_end = from; 1401 } else { 1596 } else { 1402 region->vm_start = to; 1597 region->vm_start = to; 1403 } 1598 } 1404 add_nommu_region(region); 1599 add_nommu_region(region); 1405 up_write(&nommu_region_sem); 1600 up_write(&nommu_region_sem); 1406 1601 1407 free_page_series(from, to); 1602 free_page_series(from, to); 1408 return 0; 1603 return 0; 1409 } 1604 } 1410 1605 1411 /* 1606 /* 1412 * release a mapping 1607 * release a mapping 1413 * - under NOMMU conditions the chunk to be u 1608 * - under NOMMU conditions the chunk to be unmapped must be backed by a single 1414 * VMA, though it need not cover the whole 1609 * VMA, though it need not cover the whole VMA 1415 */ 1610 */ 1416 int do_munmap(struct mm_struct *mm, unsigned !! 1611 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) 1417 { 1612 { 1418 VMA_ITERATOR(vmi, mm, start); << 1419 struct vm_area_struct *vma; 1613 struct vm_area_struct *vma; 1420 unsigned long end; 1614 unsigned long end; 1421 int ret = 0; !! 1615 int ret; 1422 1616 1423 len = PAGE_ALIGN(len); 1617 len = PAGE_ALIGN(len); 1424 if (len == 0) 1618 if (len == 0) 1425 return -EINVAL; 1619 return -EINVAL; 1426 1620 1427 end = start + len; 1621 end = start + len; 1428 1622 1429 /* find the first potentially overlap 1623 /* find the first potentially overlapping VMA */ 1430 vma = vma_find(&vmi, end); !! 1624 vma = find_vma(mm, start); 1431 if (!vma) { 1625 if (!vma) { 1432 static int limit; 1626 static int limit; 1433 if (limit < 5) { 1627 if (limit < 5) { 1434 pr_warn("munmap of me 1628 pr_warn("munmap of memory not mmapped by process %d (%s): 0x%lx-0x%lx\n", 1435 curre 1629 current->pid, current->comm, 1436 start 1630 start, start + len - 1); 1437 limit++; 1631 limit++; 1438 } 1632 } 1439 return -EINVAL; 1633 return -EINVAL; 1440 } 1634 } 1441 1635 1442 /* we're allowed to split an anonymou 1636 /* we're allowed to split an anonymous VMA but not a file-backed one */ 1443 if (vma->vm_file) { 1637 if (vma->vm_file) { 1444 do { 1638 do { 1445 if (start > vma->vm_s 1639 if (start > vma->vm_start) 1446 return -EINVA 1640 return -EINVAL; 1447 if (end == vma->vm_en 1641 if (end == vma->vm_end) 1448 goto erase_wh 1642 goto erase_whole_vma; 1449 vma = vma_find(&vmi, !! 1643 vma = vma->vm_next; 1450 } while (vma); 1644 } while (vma); 1451 return -EINVAL; 1645 return -EINVAL; 1452 } else { 1646 } else { 1453 /* the chunk must be a subset 1647 /* the chunk must be a subset of the VMA found */ 1454 if (start == vma->vm_start && 1648 if (start == vma->vm_start && end == vma->vm_end) 1455 goto erase_whole_vma; 1649 goto erase_whole_vma; 1456 if (start < vma->vm_start || 1650 if (start < vma->vm_start || end > vma->vm_end) 1457 return -EINVAL; 1651 return -EINVAL; 1458 if (offset_in_page(start)) 1652 if (offset_in_page(start)) 1459 return -EINVAL; 1653 return -EINVAL; 1460 if (end != vma->vm_end && off 1654 if (end != vma->vm_end && offset_in_page(end)) 1461 return -EINVAL; 1655 return -EINVAL; 1462 if (start != vma->vm_start && 1656 if (start != vma->vm_start && end != vma->vm_end) { 1463 ret = split_vma(&vmi, !! 1657 ret = split_vma(mm, vma, start, 1); 1464 if (ret < 0) 1658 if (ret < 0) 1465 return ret; 1659 return ret; 1466 } 1660 } 1467 return vmi_shrink_vma(&vmi, v !! 1661 return shrink_vma(mm, vma, start, end); 1468 } 1662 } 1469 1663 1470 erase_whole_vma: 1664 erase_whole_vma: 1471 if (delete_vma_from_mm(vma)) !! 1665 delete_vma_from_mm(vma); 1472 ret = -ENOMEM; !! 1666 delete_vma(mm, vma); 1473 else !! 1667 return 0; 1474 delete_vma(mm, vma); << 1475 return ret; << 1476 } 1668 } >> 1669 EXPORT_SYMBOL(do_munmap); 1477 1670 1478 int vm_munmap(unsigned long addr, size_t len) 1671 int vm_munmap(unsigned long addr, size_t len) 1479 { 1672 { 1480 struct mm_struct *mm = current->mm; 1673 struct mm_struct *mm = current->mm; 1481 int ret; 1674 int ret; 1482 1675 1483 mmap_write_lock(mm); !! 1676 down_write(&mm->mmap_sem); 1484 ret = do_munmap(mm, addr, len, NULL); !! 1677 ret = do_munmap(mm, addr, len); 1485 mmap_write_unlock(mm); !! 1678 up_write(&mm->mmap_sem); 1486 return ret; 1679 return ret; 1487 } 1680 } 1488 EXPORT_SYMBOL(vm_munmap); 1681 EXPORT_SYMBOL(vm_munmap); 1489 1682 1490 SYSCALL_DEFINE2(munmap, unsigned long, addr, 1683 SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) 1491 { 1684 { 1492 return vm_munmap(addr, len); 1685 return vm_munmap(addr, len); 1493 } 1686 } 1494 1687 1495 /* 1688 /* 1496 * release all the mappings made in a process 1689 * release all the mappings made in a process's VM space 1497 */ 1690 */ 1498 void exit_mmap(struct mm_struct *mm) 1691 void exit_mmap(struct mm_struct *mm) 1499 { 1692 { 1500 VMA_ITERATOR(vmi, mm, 0); << 1501 struct vm_area_struct *vma; 1693 struct vm_area_struct *vma; 1502 1694 1503 if (!mm) 1695 if (!mm) 1504 return; 1696 return; 1505 1697 1506 mm->total_vm = 0; 1698 mm->total_vm = 0; 1507 1699 1508 /* !! 1700 while ((vma = mm->mmap)) { 1509 * Lock the mm to avoid assert compla !! 1701 mm->mmap = vma->vm_next; 1510 * user of the mm !! 1702 delete_vma_from_mm(vma); 1511 */ << 1512 mmap_write_lock(mm); << 1513 for_each_vma(vmi, vma) { << 1514 cleanup_vma_from_mm(vma); << 1515 delete_vma(mm, vma); 1703 delete_vma(mm, vma); 1516 cond_resched(); 1704 cond_resched(); 1517 } 1705 } 1518 __mt_destroy(&mm->mm_mt); !! 1706 } 1519 mmap_write_unlock(mm); !! 1707 >> 1708 unsigned long vm_brk(unsigned long addr, unsigned long len) >> 1709 { >> 1710 return -ENOMEM; 1520 } 1711 } 1521 1712 1522 /* 1713 /* 1523 * expand (or shrink) an existing mapping, po 1714 * expand (or shrink) an existing mapping, potentially moving it at the same 1524 * time (controlled by the MREMAP_MAYMOVE fla 1715 * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 1525 * 1716 * 1526 * under NOMMU conditions, we only permit cha 1717 * under NOMMU conditions, we only permit changing a mapping's size, and only 1527 * as long as it stays within the region allo 1718 * as long as it stays within the region allocated by do_mmap_private() and the 1528 * block is not shareable 1719 * block is not shareable 1529 * 1720 * 1530 * MREMAP_FIXED is not supported under NOMMU 1721 * MREMAP_FIXED is not supported under NOMMU conditions 1531 */ 1722 */ 1532 static unsigned long do_mremap(unsigned long 1723 static unsigned long do_mremap(unsigned long addr, 1533 unsigned long old_len 1724 unsigned long old_len, unsigned long new_len, 1534 unsigned long flags, 1725 unsigned long flags, unsigned long new_addr) 1535 { 1726 { 1536 struct vm_area_struct *vma; 1727 struct vm_area_struct *vma; 1537 1728 1538 /* insanity checks first */ 1729 /* insanity checks first */ 1539 old_len = PAGE_ALIGN(old_len); 1730 old_len = PAGE_ALIGN(old_len); 1540 new_len = PAGE_ALIGN(new_len); 1731 new_len = PAGE_ALIGN(new_len); 1541 if (old_len == 0 || new_len == 0) 1732 if (old_len == 0 || new_len == 0) 1542 return (unsigned long) -EINVA 1733 return (unsigned long) -EINVAL; 1543 1734 1544 if (offset_in_page(addr)) 1735 if (offset_in_page(addr)) 1545 return -EINVAL; 1736 return -EINVAL; 1546 1737 1547 if (flags & MREMAP_FIXED && new_addr 1738 if (flags & MREMAP_FIXED && new_addr != addr) 1548 return (unsigned long) -EINVA 1739 return (unsigned long) -EINVAL; 1549 1740 1550 vma = find_vma_exact(current->mm, add 1741 vma = find_vma_exact(current->mm, addr, old_len); 1551 if (!vma) 1742 if (!vma) 1552 return (unsigned long) -EINVA 1743 return (unsigned long) -EINVAL; 1553 1744 1554 if (vma->vm_end != vma->vm_start + ol 1745 if (vma->vm_end != vma->vm_start + old_len) 1555 return (unsigned long) -EFAUL 1746 return (unsigned long) -EFAULT; 1556 1747 1557 if (is_nommu_shared_mapping(vma->vm_f !! 1748 if (vma->vm_flags & VM_MAYSHARE) 1558 return (unsigned long) -EPERM 1749 return (unsigned long) -EPERM; 1559 1750 1560 if (new_len > vma->vm_region->vm_end 1751 if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start) 1561 return (unsigned long) -ENOME 1752 return (unsigned long) -ENOMEM; 1562 1753 1563 /* all checks complete - do it */ 1754 /* all checks complete - do it */ 1564 vma->vm_end = vma->vm_start + new_len 1755 vma->vm_end = vma->vm_start + new_len; 1565 return vma->vm_start; 1756 return vma->vm_start; 1566 } 1757 } 1567 1758 1568 SYSCALL_DEFINE5(mremap, unsigned long, addr, 1759 SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, 1569 unsigned long, new_len, unsig 1760 unsigned long, new_len, unsigned long, flags, 1570 unsigned long, new_addr) 1761 unsigned long, new_addr) 1571 { 1762 { 1572 unsigned long ret; 1763 unsigned long ret; 1573 1764 1574 mmap_write_lock(current->mm); !! 1765 down_write(¤t->mm->mmap_sem); 1575 ret = do_mremap(addr, old_len, new_le 1766 ret = do_mremap(addr, old_len, new_len, flags, new_addr); 1576 mmap_write_unlock(current->mm); !! 1767 up_write(¤t->mm->mmap_sem); 1577 return ret; 1768 return ret; 1578 } 1769 } 1579 1770 >> 1771 struct page *follow_page_mask(struct vm_area_struct *vma, >> 1772 unsigned long address, unsigned int flags, >> 1773 unsigned int *page_mask) >> 1774 { >> 1775 *page_mask = 0; >> 1776 return NULL; >> 1777 } >> 1778 1580 int remap_pfn_range(struct vm_area_struct *vm 1779 int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, 1581 unsigned long pfn, unsigned l 1780 unsigned long pfn, unsigned long size, pgprot_t prot) 1582 { 1781 { 1583 if (addr != (pfn << PAGE_SHIFT)) 1782 if (addr != (pfn << PAGE_SHIFT)) 1584 return -EINVAL; 1783 return -EINVAL; 1585 1784 1586 vm_flags_set(vma, VM_IO | VM_PFNMAP | !! 1785 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; 1587 return 0; 1786 return 0; 1588 } 1787 } 1589 EXPORT_SYMBOL(remap_pfn_range); 1788 EXPORT_SYMBOL(remap_pfn_range); 1590 1789 1591 int vm_iomap_memory(struct vm_area_struct *vm 1790 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) 1592 { 1791 { 1593 unsigned long pfn = start >> PAGE_SHI 1792 unsigned long pfn = start >> PAGE_SHIFT; 1594 unsigned long vm_len = vma->vm_end - 1793 unsigned long vm_len = vma->vm_end - vma->vm_start; 1595 1794 1596 pfn += vma->vm_pgoff; 1795 pfn += vma->vm_pgoff; 1597 return io_remap_pfn_range(vma, vma->v 1796 return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); 1598 } 1797 } 1599 EXPORT_SYMBOL(vm_iomap_memory); 1798 EXPORT_SYMBOL(vm_iomap_memory); 1600 1799 1601 int remap_vmalloc_range(struct vm_area_struct 1800 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 1602 unsigned long pgoff) 1801 unsigned long pgoff) 1603 { 1802 { 1604 unsigned int size = vma->vm_end - vma 1803 unsigned int size = vma->vm_end - vma->vm_start; 1605 1804 1606 if (!(vma->vm_flags & VM_USERMAP)) 1805 if (!(vma->vm_flags & VM_USERMAP)) 1607 return -EINVAL; 1806 return -EINVAL; 1608 1807 1609 vma->vm_start = (unsigned long)(addr 1808 vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); 1610 vma->vm_end = vma->vm_start + size; 1809 vma->vm_end = vma->vm_start + size; 1611 1810 1612 return 0; 1811 return 0; 1613 } 1812 } 1614 EXPORT_SYMBOL(remap_vmalloc_range); 1813 EXPORT_SYMBOL(remap_vmalloc_range); 1615 1814 1616 vm_fault_t filemap_fault(struct vm_fault *vmf !! 1815 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, >> 1816 unsigned long len, unsigned long pgoff, unsigned long flags) >> 1817 { >> 1818 return -ENOMEM; >> 1819 } >> 1820 >> 1821 void unmap_mapping_range(struct address_space *mapping, >> 1822 loff_t const holebegin, loff_t const holelen, >> 1823 int even_cows) >> 1824 { >> 1825 } >> 1826 EXPORT_SYMBOL(unmap_mapping_range); >> 1827 >> 1828 /* >> 1829 * Check that a process has enough memory to allocate a new virtual >> 1830 * mapping. 0 means there is enough memory for the allocation to >> 1831 * succeed and -ENOMEM implies there is not. >> 1832 * >> 1833 * We currently support three overcommit policies, which are set via the >> 1834 * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting >> 1835 * >> 1836 * Strict overcommit modes added 2002 Feb 26 by Alan Cox. >> 1837 * Additional code 2002 Jul 20 by Robert Love. >> 1838 * >> 1839 * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. >> 1840 * >> 1841 * Note this is a helper function intended to be used by LSMs which >> 1842 * wish to use this logic. >> 1843 */ >> 1844 int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) >> 1845 { >> 1846 long free, allowed, reserve; >> 1847 >> 1848 vm_acct_memory(pages); >> 1849 >> 1850 /* >> 1851 * Sometimes we want to use more memory than we have >> 1852 */ >> 1853 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) >> 1854 return 0; >> 1855 >> 1856 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { >> 1857 free = global_page_state(NR_FREE_PAGES); >> 1858 free += global_page_state(NR_FILE_PAGES); >> 1859 >> 1860 /* >> 1861 * shmem pages shouldn't be counted as free in this >> 1862 * case, they can't be purged, only swapped out, and >> 1863 * that won't affect the overall amount of available >> 1864 * memory in the system. >> 1865 */ >> 1866 free -= global_page_state(NR_SHMEM); >> 1867 >> 1868 free += get_nr_swap_pages(); >> 1869 >> 1870 /* >> 1871 * Any slabs which are created with the >> 1872 * SLAB_RECLAIM_ACCOUNT flag claim to have contents >> 1873 * which are reclaimable, under pressure. The dentry >> 1874 * cache and most inode caches should fall into this >> 1875 */ >> 1876 free += global_page_state(NR_SLAB_RECLAIMABLE); >> 1877 >> 1878 /* >> 1879 * Leave reserved pages. The pages are not for anonymous pages. >> 1880 */ >> 1881 if (free <= totalreserve_pages) >> 1882 goto error; >> 1883 else >> 1884 free -= totalreserve_pages; >> 1885 >> 1886 /* >> 1887 * Reserve some for root >> 1888 */ >> 1889 if (!cap_sys_admin) >> 1890 free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); >> 1891 >> 1892 if (free > pages) >> 1893 return 0; >> 1894 >> 1895 goto error; >> 1896 } >> 1897 >> 1898 allowed = vm_commit_limit(); >> 1899 /* >> 1900 * Reserve some 3% for root >> 1901 */ >> 1902 if (!cap_sys_admin) >> 1903 allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); >> 1904 >> 1905 /* >> 1906 * Don't let a single process grow so big a user can't recover >> 1907 */ >> 1908 if (mm) { >> 1909 reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); >> 1910 allowed -= min_t(long, mm->total_vm / 32, reserve); >> 1911 } >> 1912 >> 1913 if (percpu_counter_read_positive(&vm_committed_as) < allowed) >> 1914 return 0; >> 1915 >> 1916 error: >> 1917 vm_unacct_memory(pages); >> 1918 >> 1919 return -ENOMEM; >> 1920 } >> 1921 >> 1922 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1617 { 1923 { 1618 BUG(); 1924 BUG(); 1619 return 0; 1925 return 0; 1620 } 1926 } 1621 EXPORT_SYMBOL(filemap_fault); 1927 EXPORT_SYMBOL(filemap_fault); 1622 1928 1623 vm_fault_t filemap_map_pages(struct vm_fault !! 1929 void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf) 1624 pgoff_t start_pgoff, pgoff_t << 1625 { 1930 { 1626 BUG(); 1931 BUG(); 1627 return 0; << 1628 } 1932 } 1629 EXPORT_SYMBOL(filemap_map_pages); 1933 EXPORT_SYMBOL(filemap_map_pages); 1630 1934 1631 static int __access_remote_vm(struct mm_struc !! 1935 static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, 1632 void *buf, int !! 1936 unsigned long addr, void *buf, int len, unsigned int gup_flags) 1633 { 1937 { 1634 struct vm_area_struct *vma; 1938 struct vm_area_struct *vma; 1635 int write = gup_flags & FOLL_WRITE; 1939 int write = gup_flags & FOLL_WRITE; 1636 1940 1637 if (mmap_read_lock_killable(mm)) !! 1941 down_read(&mm->mmap_sem); 1638 return 0; << 1639 1942 1640 /* the access must start within one o 1943 /* the access must start within one of the target process's mappings */ 1641 vma = find_vma(mm, addr); 1944 vma = find_vma(mm, addr); 1642 if (vma) { 1945 if (vma) { 1643 /* don't overrun this mapping 1946 /* don't overrun this mapping */ 1644 if (addr + len >= vma->vm_end 1947 if (addr + len >= vma->vm_end) 1645 len = vma->vm_end - a 1948 len = vma->vm_end - addr; 1646 1949 1647 /* only read or write mapping 1950 /* only read or write mappings where it is permitted */ 1648 if (write && vma->vm_flags & 1951 if (write && vma->vm_flags & VM_MAYWRITE) 1649 copy_to_user_page(vma 1952 copy_to_user_page(vma, NULL, addr, 1650 (voi 1953 (void *) addr, buf, len); 1651 else if (!write && vma->vm_fl 1954 else if (!write && vma->vm_flags & VM_MAYREAD) 1652 copy_from_user_page(v 1955 copy_from_user_page(vma, NULL, addr, 1653 b 1956 buf, (void *) addr, len); 1654 else 1957 else 1655 len = 0; 1958 len = 0; 1656 } else { 1959 } else { 1657 len = 0; 1960 len = 0; 1658 } 1961 } 1659 1962 1660 mmap_read_unlock(mm); !! 1963 up_read(&mm->mmap_sem); 1661 1964 1662 return len; 1965 return len; 1663 } 1966 } 1664 1967 1665 /** 1968 /** 1666 * access_remote_vm - access another process' !! 1969 * @access_remote_vm - access another process' address space 1667 * @mm: the mm_struct of the target a 1970 * @mm: the mm_struct of the target address space 1668 * @addr: start address to access 1971 * @addr: start address to access 1669 * @buf: source or destination buffer 1972 * @buf: source or destination buffer 1670 * @len: number of bytes to transfer 1973 * @len: number of bytes to transfer 1671 * @gup_flags: flags modifying lookup behavi 1974 * @gup_flags: flags modifying lookup behaviour 1672 * 1975 * 1673 * The caller must hold a reference on @mm. 1976 * The caller must hold a reference on @mm. 1674 */ 1977 */ 1675 int access_remote_vm(struct mm_struct *mm, un 1978 int access_remote_vm(struct mm_struct *mm, unsigned long addr, 1676 void *buf, int len, unsigned 1979 void *buf, int len, unsigned int gup_flags) 1677 { 1980 { 1678 return __access_remote_vm(mm, addr, b !! 1981 return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); 1679 } 1982 } 1680 1983 1681 /* 1984 /* 1682 * Access another process' address space. 1985 * Access another process' address space. 1683 * - source/target buffer must be kernel spac 1986 * - source/target buffer must be kernel space 1684 */ 1987 */ 1685 int access_process_vm(struct task_struct *tsk !! 1988 int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) 1686 unsigned int gup_flags) << 1687 { 1989 { 1688 struct mm_struct *mm; 1990 struct mm_struct *mm; 1689 1991 1690 if (addr + len < addr) 1992 if (addr + len < addr) 1691 return 0; 1993 return 0; 1692 1994 1693 mm = get_task_mm(tsk); 1995 mm = get_task_mm(tsk); 1694 if (!mm) 1996 if (!mm) 1695 return 0; 1997 return 0; 1696 1998 1697 len = __access_remote_vm(mm, addr, bu !! 1999 len = __access_remote_vm(tsk, mm, addr, buf, len, >> 2000 write ? FOLL_WRITE : 0); 1698 2001 1699 mmput(mm); 2002 mmput(mm); 1700 return len; 2003 return len; 1701 } 2004 } 1702 EXPORT_SYMBOL_GPL(access_process_vm); << 1703 2005 1704 /** 2006 /** 1705 * nommu_shrink_inode_mappings - Shrink the s 2007 * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode 1706 * @inode: The inode to check 2008 * @inode: The inode to check 1707 * @size: The current filesize of the inode 2009 * @size: The current filesize of the inode 1708 * @newsize: The proposed filesize of the ino 2010 * @newsize: The proposed filesize of the inode 1709 * 2011 * 1710 * Check the shared mappings on an inode on b 2012 * Check the shared mappings on an inode on behalf of a shrinking truncate to 1711 * make sure that any outstanding VMAs aren't !! 2013 * make sure that that any outstanding VMAs aren't broken and then shrink the 1712 * vm_regions that extend beyond so that do_m !! 2014 * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't 1713 * automatically grant mappings that are too 2015 * automatically grant mappings that are too large. 1714 */ 2016 */ 1715 int nommu_shrink_inode_mappings(struct inode 2017 int nommu_shrink_inode_mappings(struct inode *inode, size_t size, 1716 size_t newsiz 2018 size_t newsize) 1717 { 2019 { 1718 struct vm_area_struct *vma; 2020 struct vm_area_struct *vma; 1719 struct vm_region *region; 2021 struct vm_region *region; 1720 pgoff_t low, high; 2022 pgoff_t low, high; 1721 size_t r_size, r_top; 2023 size_t r_size, r_top; 1722 2024 1723 low = newsize >> PAGE_SHIFT; 2025 low = newsize >> PAGE_SHIFT; 1724 high = (size + PAGE_SIZE - 1) >> PAGE 2026 high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 1725 2027 1726 down_write(&nommu_region_sem); 2028 down_write(&nommu_region_sem); 1727 i_mmap_lock_read(inode->i_mapping); 2029 i_mmap_lock_read(inode->i_mapping); 1728 2030 1729 /* search for VMAs that fall within t 2031 /* search for VMAs that fall within the dead zone */ 1730 vma_interval_tree_foreach(vma, &inode 2032 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) { 1731 /* found one - only intereste 2033 /* found one - only interested if it's shared out of the page 1732 * cache */ 2034 * cache */ 1733 if (vma->vm_flags & VM_SHARED 2035 if (vma->vm_flags & VM_SHARED) { 1734 i_mmap_unlock_read(in 2036 i_mmap_unlock_read(inode->i_mapping); 1735 up_write(&nommu_regio 2037 up_write(&nommu_region_sem); 1736 return -ETXTBSY; /* n 2038 return -ETXTBSY; /* not quite true, but near enough */ 1737 } 2039 } 1738 } 2040 } 1739 2041 1740 /* reduce any regions that overlap th 2042 /* reduce any regions that overlap the dead zone - if in existence, 1741 * these will be pointed to by VMAs t 2043 * these will be pointed to by VMAs that don't overlap the dead zone 1742 * 2044 * 1743 * we don't check for any regions tha 2045 * we don't check for any regions that start beyond the EOF as there 1744 * shouldn't be any 2046 * shouldn't be any 1745 */ 2047 */ 1746 vma_interval_tree_foreach(vma, &inode 2048 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, 0, ULONG_MAX) { 1747 if (!(vma->vm_flags & VM_SHAR 2049 if (!(vma->vm_flags & VM_SHARED)) 1748 continue; 2050 continue; 1749 2051 1750 region = vma->vm_region; 2052 region = vma->vm_region; 1751 r_size = region->vm_top - reg 2053 r_size = region->vm_top - region->vm_start; 1752 r_top = (region->vm_pgoff << 2054 r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size; 1753 2055 1754 if (r_top > newsize) { 2056 if (r_top > newsize) { 1755 region->vm_top -= r_t 2057 region->vm_top -= r_top - newsize; 1756 if (region->vm_end > 2058 if (region->vm_end > region->vm_top) 1757 region->vm_en 2059 region->vm_end = region->vm_top; 1758 } 2060 } 1759 } 2061 } 1760 2062 1761 i_mmap_unlock_read(inode->i_mapping); 2063 i_mmap_unlock_read(inode->i_mapping); 1762 up_write(&nommu_region_sem); 2064 up_write(&nommu_region_sem); 1763 return 0; 2065 return 0; 1764 } 2066 } 1765 2067 1766 /* 2068 /* 1767 * Initialise sysctl_user_reserve_kbytes. 2069 * Initialise sysctl_user_reserve_kbytes. 1768 * 2070 * 1769 * This is intended to prevent a user from st 2071 * This is intended to prevent a user from starting a single memory hogging 1770 * process, such that they cannot recover (ki 2072 * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER 1771 * mode. 2073 * mode. 1772 * 2074 * 1773 * The default value is min(3% of free memory 2075 * The default value is min(3% of free memory, 128MB) 1774 * 128MB is enough to recover with sshd/login 2076 * 128MB is enough to recover with sshd/login, bash, and top/kill. 1775 */ 2077 */ 1776 static int __meminit init_user_reserve(void) 2078 static int __meminit init_user_reserve(void) 1777 { 2079 { 1778 unsigned long free_kbytes; 2080 unsigned long free_kbytes; 1779 2081 1780 free_kbytes = K(global_zone_page_stat !! 2082 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 1781 2083 1782 sysctl_user_reserve_kbytes = min(free 2084 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); 1783 return 0; 2085 return 0; 1784 } 2086 } 1785 subsys_initcall(init_user_reserve); 2087 subsys_initcall(init_user_reserve); 1786 2088 1787 /* 2089 /* 1788 * Initialise sysctl_admin_reserve_kbytes. 2090 * Initialise sysctl_admin_reserve_kbytes. 1789 * 2091 * 1790 * The purpose of sysctl_admin_reserve_kbytes 2092 * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin 1791 * to log in and kill a memory hogging proces 2093 * to log in and kill a memory hogging process. 1792 * 2094 * 1793 * Systems with more than 256MB will reserve 2095 * Systems with more than 256MB will reserve 8MB, enough to recover 1794 * with sshd, bash, and top in OVERCOMMIT_GUE 2096 * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will 1795 * only reserve 3% of free pages by default. 2097 * only reserve 3% of free pages by default. 1796 */ 2098 */ 1797 static int __meminit init_admin_reserve(void) 2099 static int __meminit init_admin_reserve(void) 1798 { 2100 { 1799 unsigned long free_kbytes; 2101 unsigned long free_kbytes; 1800 2102 1801 free_kbytes = K(global_zone_page_stat !! 2103 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 1802 2104 1803 sysctl_admin_reserve_kbytes = min(fre 2105 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); 1804 return 0; 2106 return 0; 1805 } 2107 } 1806 subsys_initcall(init_admin_reserve); 2108 subsys_initcall(init_admin_reserve); 1807 2109
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.