1 // SPDX-License-Identifier: GPL-2.0-only << 2 /* 1 /* 3 * linux/fs/exec.c 2 * linux/fs/exec.c 4 * 3 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 5 */ 7 6 8 /* 7 /* 9 * #!-checking implemented by tytso. 8 * #!-checking implemented by tytso. 10 */ 9 */ 11 /* 10 /* 12 * Demand-loading implemented 01.12.91 - no ne 11 * Demand-loading implemented 01.12.91 - no need to read anything but 13 * the header into memory. The inode of the ex 12 * the header into memory. The inode of the executable is put into 14 * "current->executable", and page faults do t 13 * "current->executable", and page faults do the actual loading. Clean. 15 * 14 * 16 * Once more I can proudly say that linux stoo 15 * Once more I can proudly say that linux stood up to being changed: it 17 * was less than 2 hours work to get demand-lo 16 * was less than 2 hours work to get demand-loading completely implemented. 18 * 17 * 19 * Demand loading changed July 1993 by Eric Yo 18 * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead, 20 * current->executable is only used by the pro 19 * current->executable is only used by the procfs. This allows a dispatch 21 * table to check for several different types 20 * table to check for several different types of binary formats. We keep 22 * trying until we recognize the file or we ru 21 * trying until we recognize the file or we run out of supported binary 23 * formats. !! 22 * formats. 24 */ 23 */ 25 24 26 #include <linux/kernel_read_file.h> !! 25 #include <linux/config.h> 27 #include <linux/slab.h> 26 #include <linux/slab.h> 28 #include <linux/file.h> 27 #include <linux/file.h> 29 #include <linux/fdtable.h> !! 28 #include <linux/mman.h> 30 #include <linux/mm.h> !! 29 #include <linux/a.out.h> 31 #include <linux/stat.h> 30 #include <linux/stat.h> 32 #include <linux/fcntl.h> 31 #include <linux/fcntl.h> 33 #include <linux/swap.h> !! 32 #include <linux/smp_lock.h> 34 #include <linux/string.h> << 35 #include <linux/init.h> 33 #include <linux/init.h> 36 #include <linux/sched/mm.h> << 37 #include <linux/sched/coredump.h> << 38 #include <linux/sched/signal.h> << 39 #include <linux/sched/numa_balancing.h> << 40 #include <linux/sched/task.h> << 41 #include <linux/pagemap.h> 34 #include <linux/pagemap.h> 42 #include <linux/perf_event.h> << 43 #include <linux/highmem.h> 35 #include <linux/highmem.h> 44 #include <linux/spinlock.h> 36 #include <linux/spinlock.h> 45 #include <linux/key.h> << 46 #include <linux/personality.h> 37 #include <linux/personality.h> 47 #include <linux/binfmts.h> 38 #include <linux/binfmts.h> >> 39 #include <linux/swap.h> 48 #include <linux/utsname.h> 40 #include <linux/utsname.h> 49 #include <linux/pid_namespace.h> << 50 #include <linux/module.h> 41 #include <linux/module.h> 51 #include <linux/namei.h> 42 #include <linux/namei.h> >> 43 #include <linux/proc_fs.h> >> 44 #include <linux/ptrace.h> 52 #include <linux/mount.h> 45 #include <linux/mount.h> 53 #include <linux/security.h> 46 #include <linux/security.h> 54 #include <linux/syscalls.h> !! 47 #include <linux/rmap-locking.h> 55 #include <linux/tsacct_kern.h> << 56 #include <linux/cn_proc.h> << 57 #include <linux/audit.h> << 58 #include <linux/kmod.h> << 59 #include <linux/fsnotify.h> << 60 #include <linux/fs_struct.h> << 61 #include <linux/oom.h> << 62 #include <linux/compat.h> << 63 #include <linux/vmalloc.h> << 64 #include <linux/io_uring.h> << 65 #include <linux/syscall_user_dispatch.h> << 66 #include <linux/coredump.h> << 67 #include <linux/time_namespace.h> << 68 #include <linux/user_events.h> << 69 #include <linux/rseq.h> << 70 #include <linux/ksm.h> << 71 48 72 #include <linux/uaccess.h> !! 49 #include <asm/uaccess.h> >> 50 #include <asm/pgalloc.h> 73 #include <asm/mmu_context.h> 51 #include <asm/mmu_context.h> 74 #include <asm/tlb.h> << 75 52 76 #include <trace/events/task.h> !! 53 #ifdef CONFIG_KMOD 77 #include "internal.h" !! 54 #include <linux/kmod.h> 78 !! 55 #endif 79 #include <trace/events/sched.h> << 80 << 81 static int bprm_creds_from_file(struct linux_b << 82 56 83 int suid_dumpable = 0; !! 57 int core_uses_pid; >> 58 char core_pattern[65] = "core"; >> 59 /* The maximal length of core_pattern is also specified in sysctl.c */ 84 60 85 static LIST_HEAD(formats); !! 61 static struct linux_binfmt *formats; 86 static DEFINE_RWLOCK(binfmt_lock); !! 62 static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED; 87 63 88 void __register_binfmt(struct linux_binfmt * f !! 64 int register_binfmt(struct linux_binfmt * fmt) 89 { 65 { >> 66 struct linux_binfmt ** tmp = &formats; >> 67 >> 68 if (!fmt) >> 69 return -EINVAL; >> 70 if (fmt->next) >> 71 return -EBUSY; 90 write_lock(&binfmt_lock); 72 write_lock(&binfmt_lock); 91 insert ? list_add(&fmt->lh, &formats) !! 73 while (*tmp) { 92 list_add_tail(&fmt->lh, &form !! 74 if (fmt == *tmp) { >> 75 write_unlock(&binfmt_lock); >> 76 return -EBUSY; >> 77 } >> 78 tmp = &(*tmp)->next; >> 79 } >> 80 fmt->next = formats; >> 81 formats = fmt; 93 write_unlock(&binfmt_lock); 82 write_unlock(&binfmt_lock); >> 83 return 0; 94 } 84 } 95 85 96 EXPORT_SYMBOL(__register_binfmt); !! 86 EXPORT_SYMBOL(register_binfmt); 97 87 98 void unregister_binfmt(struct linux_binfmt * f !! 88 int unregister_binfmt(struct linux_binfmt * fmt) 99 { 89 { >> 90 struct linux_binfmt ** tmp = &formats; >> 91 100 write_lock(&binfmt_lock); 92 write_lock(&binfmt_lock); 101 list_del(&fmt->lh); !! 93 while (*tmp) { >> 94 if (fmt == *tmp) { >> 95 *tmp = fmt->next; >> 96 write_unlock(&binfmt_lock); >> 97 return 0; >> 98 } >> 99 tmp = &(*tmp)->next; >> 100 } 102 write_unlock(&binfmt_lock); 101 write_unlock(&binfmt_lock); >> 102 return -EINVAL; 103 } 103 } 104 104 105 EXPORT_SYMBOL(unregister_binfmt); 105 EXPORT_SYMBOL(unregister_binfmt); 106 106 107 static inline void put_binfmt(struct linux_bin 107 static inline void put_binfmt(struct linux_binfmt * fmt) 108 { 108 { 109 module_put(fmt->module); 109 module_put(fmt->module); 110 } 110 } 111 111 112 bool path_noexec(const struct path *path) << 113 { << 114 return (path->mnt->mnt_flags & MNT_NOE << 115 (path->mnt->mnt_sb->s_iflags & << 116 } << 117 << 118 #ifdef CONFIG_USELIB << 119 /* 112 /* 120 * Note that a shared library must be both rea 113 * Note that a shared library must be both readable and executable due to 121 * security reasons. 114 * security reasons. 122 * 115 * 123 * Also note that we take the address to load !! 116 * Also note that we take the address to load from from the file itself. 124 */ 117 */ 125 SYSCALL_DEFINE1(uselib, const char __user *, l !! 118 asmlinkage long sys_uselib(const char __user * library) 126 { 119 { 127 struct linux_binfmt *fmt; !! 120 struct file * file; 128 struct file *file; !! 121 struct nameidata nd; 129 struct filename *tmp = getname(library !! 122 int error; 130 int error = PTR_ERR(tmp); << 131 static const struct open_flags uselib_ << 132 .open_flag = O_LARGEFILE | O_R << 133 .acc_mode = MAY_READ | MAY_EXE << 134 .intent = LOOKUP_OPEN, << 135 .lookup_flags = LOOKUP_FOLLOW, << 136 }; << 137 123 138 if (IS_ERR(tmp)) !! 124 nd.intent.open.flags = O_RDONLY; >> 125 error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); >> 126 if (error) 139 goto out; 127 goto out; 140 128 141 file = do_filp_open(AT_FDCWD, tmp, &us !! 129 error = -EINVAL; 142 putname(tmp); !! 130 if (!S_ISREG(nd.dentry->d_inode->i_mode)) >> 131 goto exit; >> 132 >> 133 error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC, &nd); >> 134 if (error) >> 135 goto exit; >> 136 >> 137 file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); 143 error = PTR_ERR(file); 138 error = PTR_ERR(file); 144 if (IS_ERR(file)) 139 if (IS_ERR(file)) 145 goto out; 140 goto out; 146 141 147 /* << 148 * Check do_open_execat() for an expla << 149 */ << 150 error = -EACCES; << 151 if (WARN_ON_ONCE(!S_ISREG(file_inode(f << 152 path_noexec(&file->f_path)) << 153 goto exit; << 154 << 155 error = -ENOEXEC; 142 error = -ENOEXEC; >> 143 if(file->f_op) { >> 144 struct linux_binfmt * fmt; 156 145 157 read_lock(&binfmt_lock); << 158 list_for_each_entry(fmt, &formats, lh) << 159 if (!fmt->load_shlib) << 160 continue; << 161 if (!try_module_get(fmt->modul << 162 continue; << 163 read_unlock(&binfmt_lock); << 164 error = fmt->load_shlib(file); << 165 read_lock(&binfmt_lock); 146 read_lock(&binfmt_lock); 166 put_binfmt(fmt); !! 147 for (fmt = formats ; fmt ; fmt = fmt->next) { 167 if (error != -ENOEXEC) !! 148 if (!fmt->load_shlib) 168 break; !! 149 continue; >> 150 if (!try_module_get(fmt->module)) >> 151 continue; >> 152 read_unlock(&binfmt_lock); >> 153 error = fmt->load_shlib(file); >> 154 read_lock(&binfmt_lock); >> 155 put_binfmt(fmt); >> 156 if (error != -ENOEXEC) >> 157 break; >> 158 } >> 159 read_unlock(&binfmt_lock); 169 } 160 } 170 read_unlock(&binfmt_lock); << 171 exit: << 172 fput(file); 161 fput(file); 173 out: 162 out: 174 return error; !! 163 return error; 175 } !! 164 exit: 176 #endif /* #ifdef CONFIG_USELIB */ !! 165 path_release(&nd); 177 !! 166 goto out; 178 #ifdef CONFIG_MMU << 179 /* << 180 * The nascent bprm->mm is not visible until e << 181 * use a lot of memory, account these pages in << 182 * for oom_badness()->get_mm_rss(). Once exec << 183 * change the counter back via acct_arg_size(0 << 184 */ << 185 static void acct_arg_size(struct linux_binprm << 186 { << 187 struct mm_struct *mm = current->mm; << 188 long diff = (long)(pages - bprm->vma_p << 189 << 190 if (!mm || !diff) << 191 return; << 192 << 193 bprm->vma_pages = pages; << 194 add_mm_counter(mm, MM_ANONPAGES, diff) << 195 } << 196 << 197 static struct page *get_arg_page(struct linux_ << 198 int write) << 199 { << 200 struct page *page; << 201 struct vm_area_struct *vma = bprm->vma << 202 struct mm_struct *mm = bprm->mm; << 203 int ret; << 204 << 205 /* << 206 * Avoid relying on expanding the stac << 207 * does not work for STACK_GROWSUP any << 208 * by hand ahead of time. << 209 */ << 210 if (write && pos < vma->vm_start) { << 211 mmap_write_lock(mm); << 212 ret = expand_downwards(vma, po << 213 if (unlikely(ret < 0)) { << 214 mmap_write_unlock(mm); << 215 return NULL; << 216 } << 217 mmap_write_downgrade(mm); << 218 } else << 219 mmap_read_lock(mm); << 220 << 221 /* << 222 * We are doing an exec(). 'current' << 223 * doing the exec and 'mm' is the new << 224 */ << 225 ret = get_user_pages_remote(mm, pos, 1 << 226 write ? FOLL_WRITE : 0 << 227 &page, NULL); << 228 mmap_read_unlock(mm); << 229 if (ret <= 0) << 230 return NULL; << 231 << 232 if (write) << 233 acct_arg_size(bprm, vma_pages( << 234 << 235 return page; << 236 } << 237 << 238 static void put_arg_page(struct page *page) << 239 { << 240 put_page(page); << 241 } << 242 << 243 static void free_arg_pages(struct linux_binprm << 244 { << 245 } << 246 << 247 static void flush_arg_page(struct linux_binprm << 248 struct page *page) << 249 { << 250 flush_cache_page(bprm->vma, pos, page_ << 251 } << 252 << 253 static int __bprm_mm_init(struct linux_binprm << 254 { << 255 int err; << 256 struct vm_area_struct *vma = NULL; << 257 struct mm_struct *mm = bprm->mm; << 258 << 259 bprm->vma = vma = vm_area_alloc(mm); << 260 if (!vma) << 261 return -ENOMEM; << 262 vma_set_anonymous(vma); << 263 << 264 if (mmap_write_lock_killable(mm)) { << 265 err = -EINTR; << 266 goto err_free; << 267 } << 268 << 269 /* << 270 * Need to be called with mmap write l << 271 * held, to avoid race with ksmd. << 272 */ << 273 err = ksm_execve(mm); << 274 if (err) << 275 goto err_ksm; << 276 << 277 /* << 278 * Place the stack at the largest stac << 279 * supports. Later, we'll move this to << 280 * use STACK_TOP because that can depe << 281 * configured yet. << 282 */ << 283 BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK << 284 vma->vm_end = STACK_TOP_MAX; << 285 vma->vm_start = vma->vm_end - PAGE_SIZ << 286 vm_flags_init(vma, VM_SOFTDIRTY | VM_S << 287 vma->vm_page_prot = vm_get_page_prot(v << 288 << 289 err = insert_vm_struct(mm, vma); << 290 if (err) << 291 goto err; << 292 << 293 mm->stack_vm = mm->total_vm = 1; << 294 mmap_write_unlock(mm); << 295 bprm->p = vma->vm_end - sizeof(void *) << 296 return 0; << 297 err: << 298 ksm_exit(mm); << 299 err_ksm: << 300 mmap_write_unlock(mm); << 301 err_free: << 302 bprm->vma = NULL; << 303 vm_area_free(vma); << 304 return err; << 305 } << 306 << 307 static bool valid_arg_len(struct linux_binprm << 308 { << 309 return len <= MAX_ARG_STRLEN; << 310 } << 311 << 312 #else << 313 << 314 static inline void acct_arg_size(struct linux_ << 315 { << 316 } << 317 << 318 static struct page *get_arg_page(struct linux_ << 319 int write) << 320 { << 321 struct page *page; << 322 << 323 page = bprm->page[pos / PAGE_SIZE]; << 324 if (!page && write) { << 325 page = alloc_page(GFP_HIGHUSER << 326 if (!page) << 327 return NULL; << 328 bprm->page[pos / PAGE_SIZE] = << 329 } << 330 << 331 return page; << 332 } << 333 << 334 static void put_arg_page(struct page *page) << 335 { << 336 } << 337 << 338 static void free_arg_page(struct linux_binprm << 339 { << 340 if (bprm->page[i]) { << 341 __free_page(bprm->page[i]); << 342 bprm->page[i] = NULL; << 343 } << 344 } << 345 << 346 static void free_arg_pages(struct linux_binprm << 347 { << 348 int i; << 349 << 350 for (i = 0; i < MAX_ARG_PAGES; i++) << 351 free_arg_page(bprm, i); << 352 } << 353 << 354 static void flush_arg_page(struct linux_binprm << 355 struct page *page) << 356 { << 357 } << 358 << 359 static int __bprm_mm_init(struct linux_binprm << 360 { << 361 bprm->p = PAGE_SIZE * MAX_ARG_PAGES - << 362 return 0; << 363 } << 364 << 365 static bool valid_arg_len(struct linux_binprm << 366 { << 367 return len <= bprm->p; << 368 } << 369 << 370 #endif /* CONFIG_MMU */ << 371 << 372 /* << 373 * Create a new mm_struct and populate it with << 374 * vm_area_struct. We don't have enough conte << 375 * flags, permissions, and offset, so we use t << 376 * them later in setup_arg_pages(). << 377 */ << 378 static int bprm_mm_init(struct linux_binprm *b << 379 { << 380 int err; << 381 struct mm_struct *mm = NULL; << 382 << 383 bprm->mm = mm = mm_alloc(); << 384 err = -ENOMEM; << 385 if (!mm) << 386 goto err; << 387 << 388 /* Save current stack limit for all ca << 389 task_lock(current->group_leader); << 390 bprm->rlim_stack = current->signal->rl << 391 task_unlock(current->group_leader); << 392 << 393 err = __bprm_mm_init(bprm); << 394 if (err) << 395 goto err; << 396 << 397 return 0; << 398 << 399 err: << 400 if (mm) { << 401 bprm->mm = NULL; << 402 mmdrop(mm); << 403 } << 404 << 405 return err; << 406 } << 407 << 408 struct user_arg_ptr { << 409 #ifdef CONFIG_COMPAT << 410 bool is_compat; << 411 #endif << 412 union { << 413 const char __user *const __use << 414 #ifdef CONFIG_COMPAT << 415 const compat_uptr_t __user *co << 416 #endif << 417 } ptr; << 418 }; << 419 << 420 static const char __user *get_user_arg_ptr(str << 421 { << 422 const char __user *native; << 423 << 424 #ifdef CONFIG_COMPAT << 425 if (unlikely(argv.is_compat)) { << 426 compat_uptr_t compat; << 427 << 428 if (get_user(compat, argv.ptr. << 429 return ERR_PTR(-EFAULT << 430 << 431 return compat_ptr(compat); << 432 } << 433 #endif << 434 << 435 if (get_user(native, argv.ptr.native + << 436 return ERR_PTR(-EFAULT); << 437 << 438 return native; << 439 } 167 } 440 168 441 /* 169 /* 442 * count() counts the number of strings in arr 170 * count() counts the number of strings in array ARGV. 443 */ 171 */ 444 static int count(struct user_arg_ptr argv, int !! 172 static int count(char __user * __user * argv, int max) 445 { 173 { 446 int i = 0; 174 int i = 0; 447 175 448 if (argv.ptr.native != NULL) { !! 176 if (argv != NULL) { 449 for (;;) { 177 for (;;) { 450 const char __user *p = !! 178 char __user * p; 451 179 >> 180 if (get_user(p, argv)) >> 181 return -EFAULT; 452 if (!p) 182 if (!p) 453 break; 183 break; 454 !! 184 argv++; 455 if (IS_ERR(p)) !! 185 if(++i > max) 456 return -EFAULT << 457 << 458 if (i >= max) << 459 return -E2BIG; 186 return -E2BIG; 460 ++i; << 461 << 462 if (fatal_signal_pendi << 463 return -ERESTA << 464 cond_resched(); << 465 } 187 } 466 } 188 } 467 return i; 189 return i; 468 } 190 } 469 191 470 static int count_strings_kernel(const char *co << 471 { << 472 int i; << 473 << 474 if (!argv) << 475 return 0; << 476 << 477 for (i = 0; argv[i]; ++i) { << 478 if (i >= MAX_ARG_STRINGS) << 479 return -E2BIG; << 480 if (fatal_signal_pending(curre << 481 return -ERESTARTNOHAND << 482 cond_resched(); << 483 } << 484 return i; << 485 } << 486 << 487 static inline int bprm_set_stack_limit(struct << 488 unsigne << 489 { << 490 #ifdef CONFIG_MMU << 491 /* Avoid a pathological bprm->p. */ << 492 if (bprm->p < limit) << 493 return -E2BIG; << 494 bprm->argmin = bprm->p - limit; << 495 #endif << 496 return 0; << 497 } << 498 static inline bool bprm_hit_stack_limit(struct << 499 { << 500 #ifdef CONFIG_MMU << 501 return bprm->p < bprm->argmin; << 502 #else << 503 return false; << 504 #endif << 505 } << 506 << 507 /* << 508 * Calculate bprm->argmin from: << 509 * - _STK_LIM << 510 * - ARG_MAX << 511 * - bprm->rlim_stack.rlim_cur << 512 * - bprm->argc << 513 * - bprm->envc << 514 * - bprm->p << 515 */ << 516 static int bprm_stack_limits(struct linux_binp << 517 { << 518 unsigned long limit, ptr_size; << 519 << 520 /* << 521 * Limit to 1/4 of the max stack size << 522 * (whichever is smaller) for the argv << 523 * This ensures that: << 524 * - the remaining binfmt code will n << 525 * - the program will have a reasonab << 526 * to work from. << 527 */ << 528 limit = _STK_LIM / 4 * 3; << 529 limit = min(limit, bprm->rlim_stack.rl << 530 /* << 531 * We've historically supported up to << 532 * of argument strings even with small << 533 */ << 534 limit = max_t(unsigned long, limit, AR << 535 /* Reject totally pathological counts. << 536 if (bprm->argc < 0 || bprm->envc < 0) << 537 return -E2BIG; << 538 /* << 539 * We must account for the size of all << 540 * the argv and envp strings, since th << 541 * the stack. They aren't stored until << 542 * signal to the parent that the child << 543 * Instead, calculate it here so it's << 544 * << 545 * In the case of argc = 0, make sure << 546 * empty string (which will bump argc << 547 * userspace programs don't start proc << 548 * argc can never be 0, to keep them f << 549 * See do_execveat_common(). << 550 */ << 551 if (check_add_overflow(max(bprm->argc, << 552 check_mul_overflow(ptr_size, sizeo << 553 return -E2BIG; << 554 if (limit <= ptr_size) << 555 return -E2BIG; << 556 limit -= ptr_size; << 557 << 558 return bprm_set_stack_limit(bprm, limi << 559 } << 560 << 561 /* 192 /* 562 * 'copy_strings()' copies argument/environmen !! 193 * 'copy_strings()' copies argument/environment strings from user 563 * processes's memory to the new process's sta !! 194 * memory to free pages in kernel mem. These are in a format ready 564 * ensures the destination page is created and !! 195 * to be put directly into the top of new user memory. 565 */ 196 */ 566 static int copy_strings(int argc, struct user_ !! 197 int copy_strings(int argc,char __user * __user * argv, struct linux_binprm *bprm) 567 struct linux_binprm *b << 568 { 198 { 569 struct page *kmapped_page = NULL; 199 struct page *kmapped_page = NULL; 570 char *kaddr = NULL; 200 char *kaddr = NULL; 571 unsigned long kpos = 0; << 572 int ret; 201 int ret; 573 202 574 while (argc-- > 0) { 203 while (argc-- > 0) { 575 const char __user *str; !! 204 char __user *str; 576 int len; 205 int len; 577 unsigned long pos; 206 unsigned long pos; 578 207 579 ret = -EFAULT; !! 208 if (get_user(str, argv+argc) || 580 str = get_user_arg_ptr(argv, a !! 209 !(len = strnlen_user(str, bprm->p))) { 581 if (IS_ERR(str)) !! 210 ret = -EFAULT; 582 goto out; << 583 << 584 len = strnlen_user(str, MAX_AR << 585 if (!len) << 586 goto out; 211 goto out; >> 212 } 587 213 588 ret = -E2BIG; !! 214 if (bprm->p < len) { 589 if (!valid_arg_len(bprm, len)) !! 215 ret = -E2BIG; 590 goto out; 216 goto out; >> 217 } 591 218 592 /* We're going to work our way << 593 pos = bprm->p; << 594 str += len; << 595 bprm->p -= len; 219 bprm->p -= len; 596 if (bprm_hit_stack_limit(bprm) !! 220 /* XXX: add architecture specific overflow check here. */ 597 goto out; !! 221 pos = bprm->p; 598 222 599 while (len > 0) { 223 while (len > 0) { >> 224 int i, new, err; 600 int offset, bytes_to_c 225 int offset, bytes_to_copy; 601 !! 226 struct page *page; 602 if (fatal_signal_pendi << 603 ret = -ERESTAR << 604 goto out; << 605 } << 606 cond_resched(); << 607 227 608 offset = pos % PAGE_SI 228 offset = pos % PAGE_SIZE; 609 if (offset == 0) !! 229 i = pos/PAGE_SIZE; 610 offset = PAGE_ !! 230 page = bprm->page[i]; 611 !! 231 new = 0; 612 bytes_to_copy = offset !! 232 if (!page) { 613 if (bytes_to_copy > le !! 233 page = alloc_page(GFP_HIGHUSER); 614 bytes_to_copy !! 234 bprm->page[i] = page; 615 << 616 offset -= bytes_to_cop << 617 pos -= bytes_to_copy; << 618 str -= bytes_to_copy; << 619 len -= bytes_to_copy; << 620 << 621 if (!kmapped_page || k << 622 struct page *p << 623 << 624 page = get_arg << 625 if (!page) { 235 if (!page) { 626 ret = !! 236 ret = -ENOMEM; 627 goto o 237 goto out; 628 } 238 } >> 239 new = 1; >> 240 } 629 241 630 if (kmapped_pa !! 242 if (page != kmapped_page) { 631 flush_ !! 243 if (kmapped_page) 632 kunmap !! 244 kunmap(kmapped_page); 633 put_ar << 634 } << 635 kmapped_page = 245 kmapped_page = page; 636 kaddr = kmap_l !! 246 kaddr = kmap(kmapped_page); 637 kpos = pos & P << 638 flush_arg_page << 639 } 247 } 640 if (copy_from_user(kad !! 248 if (new && offset) >> 249 memset(kaddr, 0, offset); >> 250 bytes_to_copy = PAGE_SIZE - offset; >> 251 if (bytes_to_copy > len) { >> 252 bytes_to_copy = len; >> 253 if (new) >> 254 memset(kaddr+offset+len, 0, >> 255 PAGE_SIZE-offset-len); >> 256 } >> 257 err = copy_from_user(kaddr+offset, str, bytes_to_copy); >> 258 if (err) { 641 ret = -EFAULT; 259 ret = -EFAULT; 642 goto out; 260 goto out; 643 } 261 } >> 262 >> 263 pos += bytes_to_copy; >> 264 str += bytes_to_copy; >> 265 len -= bytes_to_copy; 644 } 266 } 645 } 267 } 646 ret = 0; 268 ret = 0; 647 out: 269 out: 648 if (kmapped_page) { !! 270 if (kmapped_page) 649 flush_dcache_page(kmapped_page !! 271 kunmap(kmapped_page); 650 kunmap_local(kaddr); << 651 put_arg_page(kmapped_page); << 652 } << 653 return ret; 272 return ret; 654 } 273 } 655 274 656 /* 275 /* 657 * Copy and argument/environment string from t !! 276 * Like copy_strings, but get argv and its values from kernel memory. 658 */ 277 */ 659 int copy_string_kernel(const char *arg, struct !! 278 int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm) 660 { 279 { 661 int len = strnlen(arg, MAX_ARG_STRLEN) !! 280 int r; 662 unsigned long pos = bprm->p; !! 281 mm_segment_t oldfs = get_fs(); 663 !! 282 set_fs(KERNEL_DS); 664 if (len == 0) !! 283 r = copy_strings(argc, (char __user * __user *)argv, bprm); 665 return -EFAULT; !! 284 set_fs(oldfs); 666 if (!valid_arg_len(bprm, len)) !! 285 return r; 667 return -E2BIG; << 668 << 669 /* We're going to work our way backwar << 670 arg += len; << 671 bprm->p -= len; << 672 if (bprm_hit_stack_limit(bprm)) << 673 return -E2BIG; << 674 << 675 while (len > 0) { << 676 unsigned int bytes_to_copy = m << 677 min_not_zero(o << 678 struct page *page; << 679 << 680 pos -= bytes_to_copy; << 681 arg -= bytes_to_copy; << 682 len -= bytes_to_copy; << 683 << 684 page = get_arg_page(bprm, pos, << 685 if (!page) << 686 return -E2BIG; << 687 flush_arg_page(bprm, pos & PAG << 688 memcpy_to_page(page, offset_in << 689 put_arg_page(page); << 690 } << 691 << 692 return 0; << 693 } 286 } 694 EXPORT_SYMBOL(copy_string_kernel); << 695 287 696 static int copy_strings_kernel(int argc, const !! 288 EXPORT_SYMBOL(copy_strings_kernel); 697 struct linux_bi << 698 { << 699 while (argc-- > 0) { << 700 int ret = copy_string_kernel(a << 701 if (ret < 0) << 702 return ret; << 703 if (fatal_signal_pending(curre << 704 return -ERESTARTNOHAND << 705 cond_resched(); << 706 } << 707 return 0; << 708 } << 709 289 710 #ifdef CONFIG_MMU 290 #ifdef CONFIG_MMU 711 << 712 /* 291 /* 713 * Finalizes the stack vm_area_struct. The fla !! 292 * This routine is used to map in a page into an address space: needed by 714 * the stack is optionally relocated, and some !! 293 * execve() for the initial stack and environment pages. >> 294 * >> 295 * tsk->mmap_sem is held for writing. 715 */ 296 */ 716 int setup_arg_pages(struct linux_binprm *bprm, !! 297 void put_dirty_page(struct task_struct *tsk, struct page *page, 717 unsigned long stack_top, !! 298 unsigned long address, pgprot_t prot) 718 int executable_stack) !! 299 { >> 300 pgd_t * pgd; >> 301 pmd_t * pmd; >> 302 pte_t * pte; >> 303 struct pte_chain *pte_chain; >> 304 >> 305 if (page_count(page) != 1) >> 306 printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", >> 307 page, address); >> 308 >> 309 pgd = pgd_offset(tsk->mm, address); >> 310 pte_chain = pte_chain_alloc(GFP_KERNEL); >> 311 if (!pte_chain) >> 312 goto out_sig; >> 313 spin_lock(&tsk->mm->page_table_lock); >> 314 pmd = pmd_alloc(tsk->mm, pgd, address); >> 315 if (!pmd) >> 316 goto out; >> 317 pte = pte_alloc_map(tsk->mm, pmd, address); >> 318 if (!pte) >> 319 goto out; >> 320 if (!pte_none(*pte)) { >> 321 pte_unmap(pte); >> 322 goto out; >> 323 } >> 324 lru_cache_add_active(page); >> 325 flush_dcache_page(page); >> 326 set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot)))); >> 327 pte_chain = page_add_rmap(page, pte, pte_chain); >> 328 pte_unmap(pte); >> 329 tsk->mm->rss++; >> 330 spin_unlock(&tsk->mm->page_table_lock); >> 331 >> 332 /* no need for flush_tlb */ >> 333 pte_chain_free(pte_chain); >> 334 return; >> 335 out: >> 336 spin_unlock(&tsk->mm->page_table_lock); >> 337 out_sig: >> 338 __free_page(page); >> 339 force_sig(SIGKILL, tsk); >> 340 pte_chain_free(pte_chain); >> 341 return; >> 342 } >> 343 >> 344 int setup_arg_pages(struct linux_binprm *bprm) 719 { 345 { 720 unsigned long ret; << 721 unsigned long stack_shift; << 722 struct mm_struct *mm = current->mm; << 723 struct vm_area_struct *vma = bprm->vma << 724 struct vm_area_struct *prev = NULL; << 725 unsigned long vm_flags; << 726 unsigned long stack_base; 346 unsigned long stack_base; 727 unsigned long stack_size; !! 347 struct vm_area_struct *mpnt; 728 unsigned long stack_expand; !! 348 struct mm_struct *mm = current->mm; 729 unsigned long rlim_stack; !! 349 int i; 730 struct mmu_gather tlb; !! 350 long arg_size; 731 struct vma_iterator vmi; << 732 351 733 #ifdef CONFIG_STACK_GROWSUP 352 #ifdef CONFIG_STACK_GROWSUP 734 /* Limit stack size */ !! 353 /* Move the argument and environment strings to the bottom of the 735 stack_base = bprm->rlim_stack.rlim_max !! 354 * stack space. 736 !! 355 */ 737 stack_base = calc_max_stack_size(stack !! 356 int offset, j; 738 !! 357 char *to, *from; 739 /* Add space for stack randomization. << 740 if (current->flags & PF_RANDOMIZE) << 741 stack_base += (STACK_RND_MASK << 742 << 743 /* Make sure we didn't let the argumen << 744 if (vma->vm_end - vma->vm_start > stac << 745 return -ENOMEM; << 746 358 747 stack_base = PAGE_ALIGN(stack_top - st !! 359 /* Start by shifting all the pages down */ >> 360 i = 0; >> 361 for (j = 0; j < MAX_ARG_PAGES; j++) { >> 362 struct page *page = bprm->page[j]; >> 363 if (!page) >> 364 continue; >> 365 bprm->page[i++] = page; >> 366 } 748 367 749 stack_shift = vma->vm_start - stack_ba !! 368 /* Now move them within their pages */ 750 mm->arg_start = bprm->p - stack_shift; !! 369 offset = bprm->p % PAGE_SIZE; 751 bprm->p = vma->vm_end - stack_shift; !! 370 to = kmap(bprm->page[0]); >> 371 for (j = 1; j < i; j++) { >> 372 memmove(to, to + offset, PAGE_SIZE - offset); >> 373 from = kmap(bprm->page[j]); >> 374 memcpy(to + PAGE_SIZE - offset, from, offset); >> 375 kunmap(bprm->page[j - 1]); >> 376 to = from; >> 377 } >> 378 memmove(to, to + offset, PAGE_SIZE - offset); >> 379 kunmap(bprm->page[j - 1]); >> 380 >> 381 /* Adjust bprm->p to point to the end of the strings. */ >> 382 bprm->p = PAGE_SIZE * i - offset; >> 383 >> 384 /* Limit stack size to 1GB */ >> 385 stack_base = current->rlim[RLIMIT_STACK].rlim_max; >> 386 if (stack_base > (1 << 30)) >> 387 stack_base = 1 << 30; >> 388 stack_base = PAGE_ALIGN(STACK_TOP - stack_base); >> 389 >> 390 mm->arg_start = stack_base; >> 391 arg_size = i << PAGE_SHIFT; >> 392 >> 393 /* zero pages that were copied above */ >> 394 while (i < MAX_ARG_PAGES) >> 395 bprm->page[i++] = NULL; 752 #else 396 #else 753 stack_top = arch_align_stack(stack_top !! 397 stack_base = STACK_TOP - MAX_ARG_PAGES * PAGE_SIZE; 754 stack_top = PAGE_ALIGN(stack_top); !! 398 mm->arg_start = bprm->p + stack_base; 755 !! 399 arg_size = STACK_TOP - (PAGE_MASK & (unsigned long) mm->arg_start); 756 if (unlikely(stack_top < mmap_min_addr << 757 unlikely(vma->vm_end - vma->vm_sta << 758 return -ENOMEM; << 759 << 760 stack_shift = vma->vm_end - stack_top; << 761 << 762 bprm->p -= stack_shift; << 763 mm->arg_start = bprm->p; << 764 #endif 400 #endif 765 401 >> 402 bprm->p += stack_base; 766 if (bprm->loader) 403 if (bprm->loader) 767 bprm->loader -= stack_shift; !! 404 bprm->loader += stack_base; 768 bprm->exec -= stack_shift; !! 405 bprm->exec += stack_base; 769 << 770 if (mmap_write_lock_killable(mm)) << 771 return -EINTR; << 772 << 773 vm_flags = VM_STACK_FLAGS; << 774 406 775 /* !! 407 mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); 776 * Adjust stack execute permissions; e !! 408 if (!mpnt) 777 * EXSTACK_ENABLE_X, disable for EXSTA !! 409 return -ENOMEM; 778 * (arch default) otherwise. << 779 */ << 780 if (unlikely(executable_stack == EXSTA << 781 vm_flags |= VM_EXEC; << 782 else if (executable_stack == EXSTACK_D << 783 vm_flags &= ~VM_EXEC; << 784 vm_flags |= mm->def_flags; << 785 vm_flags |= VM_STACK_INCOMPLETE_SETUP; << 786 << 787 vma_iter_init(&vmi, mm, vma->vm_start) << 788 << 789 tlb_gather_mmu(&tlb, mm); << 790 ret = mprotect_fixup(&vmi, &tlb, vma, << 791 vm_flags); << 792 tlb_finish_mmu(&tlb); << 793 << 794 if (ret) << 795 goto out_unlock; << 796 BUG_ON(prev != vma); << 797 << 798 if (unlikely(vm_flags & VM_EXEC)) { << 799 pr_warn_once("process '%pD4' s << 800 bprm->file); << 801 } << 802 410 803 /* Move stack pages down in memory. */ !! 411 if (security_vm_enough_memory(arg_size >> PAGE_SHIFT)) { 804 if (stack_shift) { !! 412 kmem_cache_free(vm_area_cachep, mpnt); 805 /* !! 413 return -ENOMEM; 806 * During bprm_mm_init(), we c << 807 * the binfmt code determines << 808 * its final location. << 809 */ << 810 ret = relocate_vma_down(vma, s << 811 if (ret) << 812 goto out_unlock; << 813 } 414 } 814 415 815 /* mprotect_fixup is overkill to remov !! 416 down_write(&mm->mmap_sem); 816 vm_flags_clear(vma, VM_STACK_INCOMPLET !! 417 { 817 !! 418 mpnt->vm_mm = mm; 818 stack_expand = 131072UL; /* randomly 3 << 819 stack_size = vma->vm_end - vma->vm_sta << 820 /* << 821 * Align this down to a page boundary << 822 * will align it up. << 823 */ << 824 rlim_stack = bprm->rlim_stack.rlim_cur << 825 << 826 stack_expand = min(rlim_stack, stack_s << 827 << 828 #ifdef CONFIG_STACK_GROWSUP 419 #ifdef CONFIG_STACK_GROWSUP 829 stack_base = vma->vm_start + stack_exp !! 420 mpnt->vm_start = stack_base; >> 421 mpnt->vm_end = PAGE_MASK & >> 422 (PAGE_SIZE - 1 + (unsigned long) bprm->p); 830 #else 423 #else 831 stack_base = vma->vm_end - stack_expan !! 424 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; >> 425 mpnt->vm_end = STACK_TOP; 832 #endif 426 #endif 833 current->mm->start_stack = bprm->p; !! 427 mpnt->vm_page_prot = protection_map[VM_STACK_FLAGS & 0x7]; 834 ret = expand_stack_locked(vma, stack_b !! 428 mpnt->vm_flags = VM_STACK_FLAGS; 835 if (ret) !! 429 mpnt->vm_ops = NULL; 836 ret = -EFAULT; !! 430 mpnt->vm_pgoff = 0; 837 !! 431 mpnt->vm_file = NULL; 838 out_unlock: !! 432 INIT_LIST_HEAD(&mpnt->shared); 839 mmap_write_unlock(mm); !! 433 mpnt->vm_private_data = (void *) 0; 840 return ret; !! 434 insert_vm_struct(mm, mpnt); >> 435 mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; >> 436 } >> 437 >> 438 for (i = 0 ; i < MAX_ARG_PAGES ; i++) { >> 439 struct page *page = bprm->page[i]; >> 440 if (page) { >> 441 bprm->page[i] = NULL; >> 442 put_dirty_page(current, page, stack_base, >> 443 mpnt->vm_page_prot); >> 444 } >> 445 stack_base += PAGE_SIZE; >> 446 } >> 447 up_write(&mm->mmap_sem); >> 448 >> 449 return 0; 841 } 450 } >> 451 842 EXPORT_SYMBOL(setup_arg_pages); 452 EXPORT_SYMBOL(setup_arg_pages); 843 453 >> 454 #define free_arg_pages(bprm) do { } while (0) >> 455 844 #else 456 #else 845 457 846 /* !! 458 static inline void free_arg_pages(struct linux_binprm *bprm) 847 * Transfer the program arguments and environm << 848 * onto the stack. The provided stack pointer << 849 */ << 850 int transfer_args_to_stack(struct linux_binprm << 851 unsigned long *sp_l << 852 { 459 { 853 unsigned long index, stop, sp; !! 460 int i; 854 int ret = 0; << 855 << 856 stop = bprm->p >> PAGE_SHIFT; << 857 sp = *sp_location; << 858 461 859 for (index = MAX_ARG_PAGES - 1; index !! 462 for (i = 0; i < MAX_ARG_PAGES; i++) { 860 unsigned int offset = index == !! 463 if (bprm->page[i]) 861 char *src = kmap_local_page(bp !! 464 __free_page(bprm->page[i]); 862 sp -= PAGE_SIZE - offset; !! 465 bprm->page[i] = NULL; 863 if (copy_to_user((void *) sp, << 864 ret = -EFAULT; << 865 kunmap_local(src); << 866 if (ret) << 867 goto out; << 868 } 466 } 869 << 870 bprm->exec += *sp_location - MAX_ARG_P << 871 *sp_location = sp; << 872 << 873 out: << 874 return ret; << 875 } 467 } 876 EXPORT_SYMBOL(transfer_args_to_stack); << 877 468 878 #endif /* CONFIG_MMU */ 469 #endif /* CONFIG_MMU */ 879 470 880 /* << 881 * On success, caller must call do_close_execa << 882 * struct file to close it. << 883 */ << 884 static struct file *do_open_execat(int fd, str << 885 { << 886 struct file *file; << 887 struct open_flags open_exec_flags = { << 888 .open_flag = O_LARGEFILE | O_R << 889 .acc_mode = MAY_EXEC, << 890 .intent = LOOKUP_OPEN, << 891 .lookup_flags = LOOKUP_FOLLOW, << 892 }; << 893 << 894 if ((flags & ~(AT_SYMLINK_NOFOLLOW | A << 895 return ERR_PTR(-EINVAL); << 896 if (flags & AT_SYMLINK_NOFOLLOW) << 897 open_exec_flags.lookup_flags & << 898 if (flags & AT_EMPTY_PATH) << 899 open_exec_flags.lookup_flags | << 900 << 901 file = do_filp_open(fd, name, &open_ex << 902 if (IS_ERR(file)) << 903 return file; << 904 << 905 /* << 906 * In the past the regular type check << 907 * 633fb6ac3980 ("exec: move S_ISREG() << 908 * an invariant that all non-regular f << 909 */ << 910 if (WARN_ON_ONCE(!S_ISREG(file_inode(f << 911 path_noexec(&file->f_path)) { << 912 fput(file); << 913 return ERR_PTR(-EACCES); << 914 } << 915 << 916 return file; << 917 } << 918 << 919 /** << 920 * open_exec - Open a path name for execution << 921 * << 922 * @name: path name to open with the intent of << 923 * << 924 * Returns ERR_PTR on failure or allocated str << 925 * << 926 * As this is a wrapper for the internal do_op << 927 * do_close_execat(). << 928 */ << 929 struct file *open_exec(const char *name) 471 struct file *open_exec(const char *name) 930 { 472 { 931 struct filename *filename = getname_ke !! 473 struct nameidata nd; 932 struct file *f = ERR_CAST(filename); !! 474 int err = path_lookup(name, LOOKUP_FOLLOW, &nd); 933 !! 475 struct file *file = ERR_PTR(err); 934 if (!IS_ERR(filename)) { !! 476 935 f = do_open_execat(AT_FDCWD, f !! 477 if (!err) { 936 putname(filename); !! 478 struct inode *inode = nd.dentry->d_inode; >> 479 file = ERR_PTR(-EACCES); >> 480 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) && >> 481 S_ISREG(inode->i_mode)) { >> 482 int err = permission(inode, MAY_EXEC, &nd); >> 483 if (!err && !(inode->i_mode & 0111)) >> 484 err = -EACCES; >> 485 file = ERR_PTR(err); >> 486 if (!err) { >> 487 file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); >> 488 if (!IS_ERR(file)) { >> 489 err = deny_write_access(file); >> 490 if (err) { >> 491 fput(file); >> 492 file = ERR_PTR(err); >> 493 } >> 494 } >> 495 out: >> 496 return file; >> 497 } >> 498 } >> 499 path_release(&nd); 937 } 500 } 938 return f; !! 501 goto out; 939 } 502 } >> 503 940 EXPORT_SYMBOL(open_exec); 504 EXPORT_SYMBOL(open_exec); 941 505 942 #if defined(CONFIG_BINFMT_FLAT) || defined(CON !! 506 int kernel_read(struct file *file, unsigned long offset, 943 ssize_t read_code(struct file *file, unsigned !! 507 char *addr, unsigned long count) 944 { 508 { 945 ssize_t res = vfs_read(file, (void __u !! 509 mm_segment_t old_fs; 946 if (res > 0) !! 510 loff_t pos = offset; 947 flush_icache_user_range(addr, !! 511 int result; 948 return res; !! 512 >> 513 old_fs = get_fs(); >> 514 set_fs(get_ds()); >> 515 /* The cast to a user pointer is valid due to the set_fs() */ >> 516 result = vfs_read(file, (void __user *)addr, count, &pos); >> 517 set_fs(old_fs); >> 518 return result; 949 } 519 } 950 EXPORT_SYMBOL(read_code); << 951 #endif << 952 520 953 /* !! 521 EXPORT_SYMBOL(kernel_read); 954 * Maps the mm_struct mm into the current task !! 522 955 * On success, this function returns with exec << 956 * held for writing. << 957 */ << 958 static int exec_mmap(struct mm_struct *mm) 523 static int exec_mmap(struct mm_struct *mm) 959 { 524 { 960 struct task_struct *tsk; 525 struct task_struct *tsk; 961 struct mm_struct *old_mm, *active_mm; !! 526 struct mm_struct * old_mm, *active_mm; 962 int ret; !! 527 >> 528 /* Add it to the list of mm's */ >> 529 spin_lock(&mmlist_lock); >> 530 list_add(&mm->mmlist, &init_mm.mmlist); >> 531 mmlist_nr++; >> 532 spin_unlock(&mmlist_lock); 963 533 964 /* Notify parent that we're no longer 534 /* Notify parent that we're no longer interested in the old VM */ 965 tsk = current; 535 tsk = current; 966 old_mm = current->mm; 536 old_mm = current->mm; 967 exec_mm_release(tsk, old_mm); !! 537 mm_release(tsk, old_mm); 968 << 969 ret = down_write_killable(&tsk->signal << 970 if (ret) << 971 return ret; << 972 << 973 if (old_mm) { << 974 /* << 975 * If there is a pending fatal << 976 * whose default action is to << 977 * out and die instead of goin << 978 */ << 979 ret = mmap_read_lock_killable( << 980 if (ret) { << 981 up_write(&tsk->signal- << 982 return ret; << 983 } << 984 } << 985 538 986 task_lock(tsk); 539 task_lock(tsk); 987 membarrier_exec_mmap(mm); << 988 << 989 local_irq_disable(); << 990 active_mm = tsk->active_mm; 540 active_mm = tsk->active_mm; 991 tsk->active_mm = mm; << 992 tsk->mm = mm; 541 tsk->mm = mm; 993 mm_init_cid(mm); !! 542 tsk->active_mm = mm; 994 /* << 995 * This prevents preemption while acti << 996 * it and mm are being updated, which << 997 * lazy tlb mm refcounting when these << 998 * switches. Not all architectures can << 999 * activate_mm yet. << 1000 */ << 1001 if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS << 1002 local_irq_enable(); << 1003 activate_mm(active_mm, mm); 543 activate_mm(active_mm, mm); 1004 if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_ << 1005 local_irq_enable(); << 1006 lru_gen_add_mm(mm); << 1007 task_unlock(tsk); 544 task_unlock(tsk); 1008 lru_gen_use_mm(mm); << 1009 if (old_mm) { 545 if (old_mm) { 1010 mmap_read_unlock(old_mm); !! 546 if (active_mm != old_mm) BUG(); 1011 BUG_ON(active_mm != old_mm); << 1012 setmax_mm_hiwater_rss(&tsk->s << 1013 mm_update_next_owner(old_mm); << 1014 mmput(old_mm); 547 mmput(old_mm); 1015 return 0; 548 return 0; 1016 } 549 } 1017 mmdrop_lazy_tlb(active_mm); !! 550 mmdrop(active_mm); 1018 return 0; 551 return 0; 1019 } 552 } 1020 553 1021 static int de_thread(struct task_struct *tsk) !! 554 /* >> 555 * This function makes sure the current process has its own signal table, >> 556 * so that flush_signal_handlers can later reset the handlers without >> 557 * disturbing other processes. (Other processes might share the signal >> 558 * table via the CLONE_SIGHAND option to clone().) >> 559 */ >> 560 static inline int de_thread(struct task_struct *tsk) 1022 { 561 { 1023 struct signal_struct *sig = tsk->sign !! 562 struct signal_struct *newsig, *oldsig = tsk->signal; 1024 struct sighand_struct *oldsighand = t !! 563 struct sighand_struct *newsighand, *oldsighand = tsk->sighand; 1025 spinlock_t *lock = &oldsighand->siglo 564 spinlock_t *lock = &oldsighand->siglock; >> 565 int count; 1026 566 1027 if (thread_group_empty(tsk)) !! 567 /* >> 568 * If we don't share sighandlers, then we aren't sharing anything >> 569 * and we can just re-use it all. >> 570 */ >> 571 if (atomic_read(&oldsighand->count) <= 1) >> 572 return 0; >> 573 >> 574 newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); >> 575 if (!newsighand) >> 576 return -ENOMEM; >> 577 >> 578 spin_lock_init(&newsighand->siglock); >> 579 atomic_set(&newsighand->count, 1); >> 580 memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action)); >> 581 >> 582 /* >> 583 * See if we need to allocate a new signal structure >> 584 */ >> 585 newsig = NULL; >> 586 if (atomic_read(&oldsig->count) > 1) { >> 587 newsig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); >> 588 if (!newsig) { >> 589 kmem_cache_free(sighand_cachep, newsighand); >> 590 return -ENOMEM; >> 591 } >> 592 atomic_set(&newsig->count, 1); >> 593 newsig->group_exit = 0; >> 594 newsig->group_exit_code = 0; >> 595 newsig->group_exit_task = NULL; >> 596 newsig->group_stop_count = 0; >> 597 newsig->curr_target = NULL; >> 598 init_sigpending(&newsig->shared_pending); >> 599 } >> 600 >> 601 if (thread_group_empty(current)) 1028 goto no_thread_group; 602 goto no_thread_group; 1029 603 1030 /* 604 /* 1031 * Kill all other threads in the thre 605 * Kill all other threads in the thread group. >> 606 * We must hold tasklist_lock to call zap_other_threads. 1032 */ 607 */ >> 608 read_lock(&tasklist_lock); 1033 spin_lock_irq(lock); 609 spin_lock_irq(lock); 1034 if ((sig->flags & SIGNAL_GROUP_EXIT) !! 610 if (oldsig->group_exit) { 1035 /* 611 /* 1036 * Another group action in pr 612 * Another group action in progress, just 1037 * return so that the signal 613 * return so that the signal is processed. 1038 */ 614 */ 1039 spin_unlock_irq(lock); 615 spin_unlock_irq(lock); >> 616 read_unlock(&tasklist_lock); >> 617 kmem_cache_free(sighand_cachep, newsighand); >> 618 if (newsig) >> 619 kmem_cache_free(signal_cachep, newsig); 1040 return -EAGAIN; 620 return -EAGAIN; 1041 } 621 } >> 622 oldsig->group_exit = 1; >> 623 zap_other_threads(current); >> 624 read_unlock(&tasklist_lock); 1042 625 1043 sig->group_exec_task = tsk; !! 626 /* 1044 sig->notify_count = zap_other_threads !! 627 * Account for the thread group leader hanging around: 1045 if (!thread_group_leader(tsk)) !! 628 */ 1046 sig->notify_count--; !! 629 count = 2; 1047 !! 630 if (current->pid == current->tgid) 1048 while (sig->notify_count) { !! 631 count = 1; 1049 __set_current_state(TASK_KILL !! 632 while (atomic_read(&oldsig->count) > count) { >> 633 oldsig->group_exit_task = current; >> 634 oldsig->notify_count = count; >> 635 __set_current_state(TASK_UNINTERRUPTIBLE); 1050 spin_unlock_irq(lock); 636 spin_unlock_irq(lock); 1051 schedule(); 637 schedule(); 1052 if (__fatal_signal_pending(ts << 1053 goto killed; << 1054 spin_lock_irq(lock); 638 spin_lock_irq(lock); 1055 } 639 } 1056 spin_unlock_irq(lock); 640 spin_unlock_irq(lock); 1057 641 1058 /* 642 /* 1059 * At this point all other threads ha 643 * At this point all other threads have exited, all we have to 1060 * do is to wait for the thread group 644 * do is to wait for the thread group leader to become inactive, 1061 * and to assume its PID: 645 * and to assume its PID: 1062 */ 646 */ 1063 if (!thread_group_leader(tsk)) { !! 647 if (current->pid != current->tgid) { 1064 struct task_struct *leader = !! 648 struct task_struct *leader = current->group_leader, *parent; 1065 !! 649 struct dentry *proc_dentry1, *proc_dentry2; 1066 for (;;) { !! 650 unsigned long state, ptrace; 1067 cgroup_threadgroup_ch << 1068 write_lock_irq(&taskl << 1069 /* << 1070 * Do this under task << 1071 * exit_notify() can' << 1072 */ << 1073 sig->notify_count = - << 1074 if (likely(leader->ex << 1075 break; << 1076 __set_current_state(T << 1077 write_unlock_irq(&tas << 1078 cgroup_threadgroup_ch << 1079 schedule(); << 1080 if (__fatal_signal_pe << 1081 goto killed; << 1082 } << 1083 651 1084 /* 652 /* 1085 * The only record we have of !! 653 * Wait for the thread group leader to be a zombie. 1086 * process, regardless of exe !! 654 * It should already be zombie at this point, most 1087 * All the past CPU time is a !! 655 * of the time. 1088 * from sister threads now de << 1089 * exec, nothing survives fro << 1090 * whose birth marks the true << 1091 * When we take on its identi << 1092 * also take its birthdate (a << 1093 */ 656 */ 1094 tsk->start_time = leader->sta !! 657 while (leader->state != TASK_ZOMBIE) 1095 tsk->start_boottime = leader- !! 658 yield(); 1096 659 1097 BUG_ON(!same_thread_group(lea !! 660 spin_lock(&leader->proc_lock); >> 661 spin_lock(¤t->proc_lock); >> 662 proc_dentry1 = proc_pid_unhash(current); >> 663 proc_dentry2 = proc_pid_unhash(leader); >> 664 write_lock_irq(&tasklist_lock); >> 665 >> 666 if (leader->tgid != current->tgid) >> 667 BUG(); >> 668 if (current->pid == current->tgid) >> 669 BUG(); 1098 /* 670 /* 1099 * An exec() starts a new thr 671 * An exec() starts a new thread group with the 1100 * TGID of the previous threa 672 * TGID of the previous thread group. Rehash the 1101 * two threads with a switche 673 * two threads with a switched PID, and release 1102 * the former thread group le 674 * the former thread group leader: 1103 */ 675 */ >> 676 ptrace = leader->ptrace; >> 677 parent = leader->parent; 1104 678 1105 /* Become a process group lea !! 679 ptrace_unlink(current); 1106 * The old leader becomes a t !! 680 ptrace_unlink(leader); 1107 */ !! 681 remove_parent(current); 1108 exchange_tids(tsk, leader); !! 682 remove_parent(leader); 1109 transfer_pid(leader, tsk, PID !! 683 1110 transfer_pid(leader, tsk, PID !! 684 switch_exec_pids(leader, current); 1111 transfer_pid(leader, tsk, PID !! 685 1112 !! 686 current->parent = current->real_parent = leader->real_parent; 1113 list_replace_rcu(&leader->tas !! 687 leader->parent = leader->real_parent = child_reaper; 1114 list_replace_init(&leader->si !! 688 current->group_leader = current; 1115 !! 689 leader->group_leader = leader; 1116 tsk->group_leader = tsk; !! 690 1117 leader->group_leader = tsk; !! 691 add_parent(current, current->parent); >> 692 add_parent(leader, leader->parent); >> 693 if (ptrace) { >> 694 current->ptrace = ptrace; >> 695 __ptrace_link(current, parent); >> 696 } 1118 697 1119 tsk->exit_signal = SIGCHLD; !! 698 list_del(¤t->tasks); 1120 leader->exit_signal = -1; !! 699 list_add_tail(¤t->tasks, &init_task.tasks); >> 700 current->exit_signal = SIGCHLD; >> 701 state = leader->state; 1121 702 1122 BUG_ON(leader->exit_state != << 1123 leader->exit_state = EXIT_DEA << 1124 /* << 1125 * We are going to release_ta << 1126 * the tracer can sleep in do << 1127 * the tracer won't block aga << 1128 */ << 1129 if (unlikely(leader->ptrace)) << 1130 __wake_up_parent(lead << 1131 write_unlock_irq(&tasklist_lo 703 write_unlock_irq(&tasklist_lock); 1132 cgroup_threadgroup_change_end !! 704 spin_unlock(&leader->proc_lock); >> 705 spin_unlock(¤t->proc_lock); >> 706 proc_pid_flush(proc_dentry1); >> 707 proc_pid_flush(proc_dentry2); 1133 708 >> 709 if (state != TASK_ZOMBIE) >> 710 BUG(); 1134 release_task(leader); 711 release_task(leader); 1135 } !! 712 } 1136 << 1137 sig->group_exec_task = NULL; << 1138 sig->notify_count = 0; << 1139 713 1140 no_thread_group: 714 no_thread_group: 1141 /* we have changed execution domain * << 1142 tsk->exit_signal = SIGCHLD; << 1143 << 1144 BUG_ON(!thread_group_leader(tsk)); << 1145 return 0; << 1146 << 1147 killed: << 1148 /* protects against exit_notify() and << 1149 read_lock(&tasklist_lock); << 1150 sig->group_exec_task = NULL; << 1151 sig->notify_count = 0; << 1152 read_unlock(&tasklist_lock); << 1153 return -EAGAIN; << 1154 } << 1155 << 1156 << 1157 /* << 1158 * This function makes sure the current proce << 1159 * so that flush_signal_handlers can later re << 1160 * disturbing other processes. (Other proces << 1161 * table via the CLONE_SIGHAND option to clon << 1162 */ << 1163 static int unshare_sighand(struct task_struct << 1164 { << 1165 struct sighand_struct *oldsighand = m << 1166 << 1167 if (refcount_read(&oldsighand->count) << 1168 struct sighand_struct *newsig << 1169 /* << 1170 * This ->sighand is shared w << 1171 * but not CLONE_THREAD task, << 1172 */ << 1173 newsighand = kmem_cache_alloc << 1174 if (!newsighand) << 1175 return -ENOMEM; << 1176 << 1177 refcount_set(&newsighand->cou << 1178 715 1179 write_lock_irq(&tasklist_lock !! 716 write_lock_irq(&tasklist_lock); 1180 spin_lock(&oldsighand->sigloc !! 717 spin_lock(&oldsighand->siglock); 1181 memcpy(newsighand->action, ol !! 718 spin_lock(&newsighand->siglock); 1182 sizeof(newsighand->act !! 719 1183 rcu_assign_pointer(me->sighan !! 720 if (current == oldsig->curr_target) 1184 spin_unlock(&oldsighand->sigl !! 721 oldsig->curr_target = next_thread(current); 1185 write_unlock_irq(&tasklist_lo !! 722 if (newsig) 1186 !! 723 current->signal = newsig; 1187 __cleanup_sighand(oldsighand) !! 724 current->sighand = newsighand; 1188 } !! 725 init_sigpending(¤t->pending); >> 726 recalc_sigpending(); >> 727 >> 728 spin_unlock(&newsighand->siglock); >> 729 spin_unlock(&oldsighand->siglock); >> 730 write_unlock_irq(&tasklist_lock); >> 731 >> 732 if (newsig && atomic_dec_and_test(&oldsig->count)) >> 733 kmem_cache_free(signal_cachep, oldsig); >> 734 >> 735 if (atomic_dec_and_test(&oldsighand->count)) >> 736 kmem_cache_free(sighand_cachep, oldsighand); >> 737 >> 738 if (!thread_group_empty(current)) >> 739 BUG(); >> 740 if (current->tgid != current->pid) >> 741 BUG(); 1189 return 0; 742 return 0; 1190 } 743 } 1191 !! 744 1192 char *__get_task_comm(char *buf, size_t buf_s << 1193 { << 1194 task_lock(tsk); << 1195 /* Always NUL terminated and zero-pad << 1196 strscpy_pad(buf, tsk->comm, buf_size) << 1197 task_unlock(tsk); << 1198 return buf; << 1199 } << 1200 EXPORT_SYMBOL_GPL(__get_task_comm); << 1201 << 1202 /* 745 /* 1203 * These functions flushes out all traces of 746 * These functions flushes out all traces of the currently running executable 1204 * so that a new one can be started 747 * so that a new one can be started 1205 */ 748 */ 1206 749 1207 void __set_task_comm(struct task_struct *tsk, !! 750 static inline void flush_old_files(struct files_struct * files) 1208 { << 1209 task_lock(tsk); << 1210 trace_task_rename(tsk, buf); << 1211 strscpy_pad(tsk->comm, buf, sizeof(ts << 1212 task_unlock(tsk); << 1213 perf_event_comm(tsk, exec); << 1214 } << 1215 << 1216 /* << 1217 * Calling this is the point of no return. No << 1218 * seen by userspace since either the process << 1219 * signal (via de_thread() or coredump), or w << 1220 * (after exec_mmap()) by search_binary_handl << 1221 */ << 1222 int begin_new_exec(struct linux_binprm * bprm << 1223 { 751 { 1224 struct task_struct *me = current; !! 752 long j = -1; 1225 int retval; << 1226 753 1227 /* Once we are committed compute the !! 754 spin_lock(&files->file_lock); 1228 retval = bprm_creds_from_file(bprm); !! 755 for (;;) { 1229 if (retval) !! 756 unsigned long set, i; 1230 return retval; << 1231 757 1232 /* !! 758 j++; 1233 * This tracepoint marks the point be !! 759 i = j * __NFDBITS; 1234 * the current task is still unchange !! 760 if (i >= files->max_fds || i >= files->max_fdset) 1235 * no return). The later "sched_proce !! 761 break; 1236 * the current task has successfully !! 762 set = files->close_on_exec->fds_bits[j]; 1237 */ !! 763 if (!set) 1238 trace_sched_prepare_exec(current, bpr !! 764 continue; 1239 !! 765 files->close_on_exec->fds_bits[j] = 0; 1240 /* !! 766 spin_unlock(&files->file_lock); 1241 * Ensure all future errors are fatal !! 767 for ( ; set ; i++,set >>= 1) { 1242 */ !! 768 if (set & 1) { 1243 bprm->point_of_no_return = true; !! 769 sys_close(i); 1244 !! 770 } 1245 /* !! 771 } 1246 * Make this the only thread in the t !! 772 spin_lock(&files->file_lock); 1247 */ << 1248 retval = de_thread(me); << 1249 if (retval) << 1250 goto out; << 1251 773 1252 /* !! 774 } 1253 * Cancel any io_uring activity acros !! 775 spin_unlock(&files->file_lock); 1254 */ !! 776 } 1255 io_uring_task_cancel(); << 1256 777 1257 /* Ensure the files table is not shar !! 778 int flush_old_exec(struct linux_binprm * bprm) 1258 retval = unshare_files(); !! 779 { 1259 if (retval) !! 780 char * name; 1260 goto out; !! 781 int i, ch, retval; 1261 782 1262 /* 783 /* 1263 * Must be called _before_ exec_mmap( !! 784 * Make sure we have a private signal table and that 1264 * not visible until then. Doing it h !! 785 * we are unassociated from the previous thread group. 1265 * we don't race against replace_mm_e << 1266 */ 786 */ 1267 retval = set_mm_exe_file(bprm->mm, bp !! 787 retval = de_thread(current); 1268 if (retval) 788 if (retval) 1269 goto out; 789 goto out; 1270 790 1271 /* If the binary is not readable then << 1272 would_dump(bprm, bprm->file); << 1273 if (bprm->have_execfd) << 1274 would_dump(bprm, bprm->execut << 1275 << 1276 /* 791 /* 1277 * Release all of the old mmap stuff 792 * Release all of the old mmap stuff 1278 */ 793 */ 1279 acct_arg_size(bprm, 0); << 1280 retval = exec_mmap(bprm->mm); 794 retval = exec_mmap(bprm->mm); 1281 if (retval) 795 if (retval) 1282 goto out; 796 goto out; 1283 797 1284 bprm->mm = NULL; !! 798 bprm->mm = NULL; /* We're using it now */ 1285 << 1286 retval = exec_task_namespaces(); << 1287 if (retval) << 1288 goto out_unlock; << 1289 << 1290 #ifdef CONFIG_POSIX_TIMERS << 1291 spin_lock_irq(&me->sighand->siglock); << 1292 posix_cpu_timers_exit(me); << 1293 spin_unlock_irq(&me->sighand->siglock << 1294 exit_itimers(me); << 1295 flush_itimer_signals(); << 1296 #endif << 1297 << 1298 /* << 1299 * Make the signal table private. << 1300 */ << 1301 retval = unshare_sighand(me); << 1302 if (retval) << 1303 goto out_unlock; << 1304 << 1305 me->flags &= ~(PF_RANDOMIZE | PF_FORK << 1306 PF_NO << 1307 flush_thread(); << 1308 me->personality &= ~bprm->per_clear; << 1309 << 1310 clear_syscall_work_syscall_user_dispa << 1311 799 1312 /* !! 800 /* This is the point of no return */ 1313 * We have to apply CLOEXEC before we << 1314 * dumpable (in setup_new_exec) to av << 1315 * trying to access the should-be-clo << 1316 * undergoing exec(2). << 1317 */ << 1318 do_close_on_exec(me->files); << 1319 801 1320 if (bprm->secureexec) { !! 802 current->sas_ss_sp = current->sas_ss_size = 0; 1321 /* Make sure parent cannot si << 1322 me->pdeath_signal = 0; << 1323 803 1324 /* !! 804 if (current->euid == current->uid && current->egid == current->gid) 1325 * For secureexec, reset the !! 805 current->mm->dumpable = 1; 1326 * avoid bad behavior from th !! 806 name = bprm->filename; 1327 * happen before arch_pick_mm !! 807 for (i=0; (ch = *(name++)) != '\0';) { 1328 * RLIMIT_STACK, but after th !! 808 if (ch == '/') 1329 * needing to clean up the ch !! 809 i = 0; 1330 */ !! 810 else 1331 if (bprm->rlim_stack.rlim_cur !! 811 if (i < 15) 1332 bprm->rlim_stack.rlim !! 812 current->comm[i++] = ch; 1333 } 813 } >> 814 current->comm[i] = '\0'; 1334 815 1335 me->sas_ss_sp = me->sas_ss_size = 0; !! 816 flush_thread(); 1336 << 1337 /* << 1338 * Figure out dumpability. Note that << 1339 * is wrong, but userspace depends on << 1340 * bprm->secureexec instead. << 1341 */ << 1342 if (bprm->interp_flags & BINPRM_FLAGS << 1343 !(uid_eq(current_euid(), current_ << 1344 gid_eq(current_egid(), current_ << 1345 set_dumpable(current->mm, sui << 1346 else << 1347 set_dumpable(current->mm, SUI << 1348 817 1349 perf_event_exec(); !! 818 if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || 1350 __set_task_comm(me, kbasename(bprm->f !! 819 permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL)) >> 820 current->mm->dumpable = 0; 1351 821 1352 /* An exec changes our domain. We are 822 /* An exec changes our domain. We are no longer part of the thread 1353 group */ 823 group */ 1354 WRITE_ONCE(me->self_exec_id, me->self << 1355 flush_signal_handlers(me, 0); << 1356 << 1357 retval = set_cred_ucounts(bprm->cred) << 1358 if (retval < 0) << 1359 goto out_unlock; << 1360 824 1361 /* !! 825 current->self_exec_id++; 1362 * install the new credentials for th !! 826 1363 */ !! 827 flush_signal_handlers(current, 0); 1364 security_bprm_committing_creds(bprm); !! 828 flush_old_files(current->files); 1365 !! 829 exit_itimers(current); 1366 commit_creds(bprm->cred); << 1367 bprm->cred = NULL; << 1368 << 1369 /* << 1370 * Disable monitoring for regular use << 1371 * when executing setuid binaries. Mu << 1372 * wait until new credentials are com << 1373 * by commit_creds() above << 1374 */ << 1375 if (get_dumpable(me->mm) != SUID_DUMP << 1376 perf_event_exit_task(me); << 1377 /* << 1378 * cred_guard_mutex must be held at l << 1379 * ptrace_attach() from altering our << 1380 * credentials; any time after this i << 1381 */ << 1382 security_bprm_committed_creds(bprm); << 1383 830 1384 /* Pass the opened binary to the inte << 1385 if (bprm->have_execfd) { << 1386 retval = get_unused_fd_flags( << 1387 if (retval < 0) << 1388 goto out_unlock; << 1389 fd_install(retval, bprm->exec << 1390 bprm->executable = NULL; << 1391 bprm->execfd = retval; << 1392 } << 1393 return 0; 831 return 0; 1394 832 1395 out_unlock: << 1396 up_write(&me->signal->exec_update_loc << 1397 if (!bprm->cred) << 1398 mutex_unlock(&me->signal->cre << 1399 << 1400 out: 833 out: 1401 return retval; 834 return retval; 1402 } 835 } 1403 EXPORT_SYMBOL(begin_new_exec); << 1404 << 1405 void would_dump(struct linux_binprm *bprm, st << 1406 { << 1407 struct inode *inode = file_inode(file << 1408 struct mnt_idmap *idmap = file_mnt_id << 1409 if (inode_permission(idmap, inode, MA << 1410 struct user_namespace *old, * << 1411 bprm->interp_flags |= BINPRM_ << 1412 << 1413 /* Ensure mm->user_ns contain << 1414 user_ns = old = bprm->mm->use << 1415 while ((user_ns != &init_user << 1416 !privileged_wrt_inode_ << 1417 user_ns = user_ns->pa << 1418 << 1419 if (old != user_ns) { << 1420 bprm->mm->user_ns = g << 1421 put_user_ns(old); << 1422 } << 1423 } << 1424 } << 1425 EXPORT_SYMBOL(would_dump); << 1426 << 1427 void setup_new_exec(struct linux_binprm * bpr << 1428 { << 1429 /* Setup things that can depend upon << 1430 struct task_struct *me = current; << 1431 << 1432 arch_pick_mmap_layout(me->mm, &bprm-> << 1433 << 1434 arch_setup_new_exec(); << 1435 << 1436 /* Set the new mm task size. We have << 1437 * depend on TIF_32BIT which is only << 1438 * some architectures like powerpc << 1439 */ << 1440 me->mm->task_size = TASK_SIZE; << 1441 up_write(&me->signal->exec_update_loc << 1442 mutex_unlock(&me->signal->cred_guard_ << 1443 } << 1444 EXPORT_SYMBOL(setup_new_exec); << 1445 836 1446 /* Runs immediately before start_thread() tak !! 837 EXPORT_SYMBOL(flush_old_exec); 1447 void finalize_exec(struct linux_binprm *bprm) << 1448 { << 1449 /* Store any stack rlimit changes bef << 1450 task_lock(current->group_leader); << 1451 current->signal->rlim[RLIMIT_STACK] = << 1452 task_unlock(current->group_leader); << 1453 } << 1454 EXPORT_SYMBOL(finalize_exec); << 1455 838 1456 /* 839 /* 1457 * Prepare credentials and lock ->cred_guard_ !! 840 * We mustn't allow tracing of suid binaries, unless 1458 * setup_new_exec() commits the new creds and !! 841 * the tracer has the capability to trace anything.. 1459 * Or, if exec fails before, free_bprm() shou << 1460 * and unlock. << 1461 */ 842 */ 1462 static int prepare_bprm_creds(struct linux_bi !! 843 static inline int must_not_trace_exec(struct task_struct * p) 1463 { << 1464 if (mutex_lock_interruptible(¤t << 1465 return -ERESTARTNOINTR; << 1466 << 1467 bprm->cred = prepare_exec_creds(); << 1468 if (likely(bprm->cred)) << 1469 return 0; << 1470 << 1471 mutex_unlock(¤t->signal->cred_g << 1472 return -ENOMEM; << 1473 } << 1474 << 1475 /* Matches do_open_execat() */ << 1476 static void do_close_execat(struct file *file << 1477 { << 1478 if (file) << 1479 fput(file); << 1480 } << 1481 << 1482 static void free_bprm(struct linux_binprm *bp << 1483 { 844 { 1484 if (bprm->mm) { !! 845 return (p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP); 1485 acct_arg_size(bprm, 0); << 1486 mmput(bprm->mm); << 1487 } << 1488 free_arg_pages(bprm); << 1489 if (bprm->cred) { << 1490 mutex_unlock(¤t->signal << 1491 abort_creds(bprm->cred); << 1492 } << 1493 do_close_execat(bprm->file); << 1494 if (bprm->executable) << 1495 fput(bprm->executable); << 1496 /* If a binfmt changed the interp, fr << 1497 if (bprm->interp != bprm->filename) << 1498 kfree(bprm->interp); << 1499 kfree(bprm->fdpath); << 1500 kfree(bprm); << 1501 } 846 } 1502 847 1503 static struct linux_binprm *alloc_bprm(int fd !! 848 /* >> 849 * Fill the binprm structure from the inode. >> 850 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes >> 851 */ >> 852 int prepare_binprm(struct linux_binprm *bprm) 1504 { 853 { 1505 struct linux_binprm *bprm; !! 854 int mode; 1506 struct file *file; !! 855 struct inode * inode = bprm->file->f_dentry->d_inode; 1507 int retval = -ENOMEM; !! 856 int retval; 1508 << 1509 file = do_open_execat(fd, filename, f << 1510 if (IS_ERR(file)) << 1511 return ERR_CAST(file); << 1512 << 1513 bprm = kzalloc(sizeof(*bprm), GFP_KER << 1514 if (!bprm) { << 1515 do_close_execat(file); << 1516 return ERR_PTR(-ENOMEM); << 1517 } << 1518 << 1519 bprm->file = file; << 1520 857 1521 if (fd == AT_FDCWD || filename->name[ !! 858 mode = inode->i_mode; 1522 bprm->filename = filename->na !! 859 /* 1523 } else { !! 860 * Check execute perms again - if the caller has CAP_DAC_OVERRIDE, 1524 if (filename->name[0] == '\0' !! 861 * vfs_permission lets a non-executable through 1525 bprm->fdpath = kaspri !! 862 */ 1526 else !! 863 if (!(mode & 0111)) /* with at least _one_ execute bit set */ 1527 bprm->fdpath = kaspri !! 864 return -EACCES; 1528 !! 865 if (bprm->file->f_op == NULL) 1529 if (!bprm->fdpath) !! 866 return -EACCES; 1530 goto out_free; !! 867 >> 868 bprm->e_uid = current->euid; >> 869 bprm->e_gid = current->egid; >> 870 >> 871 if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) { >> 872 /* Set-uid? */ >> 873 if (mode & S_ISUID) >> 874 bprm->e_uid = inode->i_uid; 1531 875 >> 876 /* Set-gid? */ 1532 /* 877 /* 1533 * Record that a name derived !! 878 * If setgid is set but no group execute bit then this 1534 * inaccessible after exec. !! 879 * is a candidate for mandatory locking, not a setgid 1535 * choose to fail when the ex !! 880 * executable. 1536 * interpreter and an open fi << 1537 * the interpreter. This mak << 1538 * than having the interprete << 1539 * when it finds the executab << 1540 */ 881 */ 1541 if (get_close_on_exec(fd)) !! 882 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) 1542 bprm->interp_flags |= !! 883 bprm->e_gid = inode->i_gid; 1543 << 1544 bprm->filename = bprm->fdpath << 1545 } 884 } 1546 bprm->interp = bprm->filename; << 1547 885 1548 retval = bprm_mm_init(bprm); !! 886 /* fill in binprm security blob */ 1549 if (!retval) !! 887 retval = security_bprm_set(bprm); 1550 return bprm; !! 888 if (retval) >> 889 return retval; 1551 890 1552 out_free: !! 891 memset(bprm->buf,0,BINPRM_BUF_SIZE); 1553 free_bprm(bprm); !! 892 return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE); 1554 return ERR_PTR(retval); << 1555 } 893 } 1556 894 1557 int bprm_change_interp(const char *interp, st !! 895 EXPORT_SYMBOL(prepare_binprm); 1558 { << 1559 /* If a binfmt changed the interp, fr << 1560 if (bprm->interp != bprm->filename) << 1561 kfree(bprm->interp); << 1562 bprm->interp = kstrdup(interp, GFP_KE << 1563 if (!bprm->interp) << 1564 return -ENOMEM; << 1565 return 0; << 1566 } << 1567 EXPORT_SYMBOL(bprm_change_interp); << 1568 896 1569 /* 897 /* 1570 * determine how safe it is to execute the pr !! 898 * This function is used to produce the new IDs and capabilities 1571 * - the caller must hold ->cred_guard_mutex !! 899 * from the old ones and the file's capabilities. 1572 * PTRACE_ATTACH or seccomp thread-sync !! 900 * >> 901 * The formula used for evolving capabilities is: >> 902 * >> 903 * pI' = pI >> 904 * (***) pP' = (fP & X) | (fI & pI) >> 905 * pE' = pP' & fE [NB. fE is 0 or ~0] >> 906 * >> 907 * I=Inheritable, P=Permitted, E=Effective // p=process, f=file >> 908 * ' indicates post-exec(), and X is the global 'cap_bset'. >> 909 * 1573 */ 910 */ 1574 static void check_unsafe_exec(struct linux_bi << 1575 { << 1576 struct task_struct *p = current, *t; << 1577 unsigned n_fs; << 1578 << 1579 if (p->ptrace) << 1580 bprm->unsafe |= LSM_UNSAFE_PT << 1581 911 1582 /* !! 912 void compute_creds(struct linux_binprm *bprm) 1583 * This isn't strictly necessary, but << 1584 * mess up. << 1585 */ << 1586 if (task_no_new_privs(current)) << 1587 bprm->unsafe |= LSM_UNSAFE_NO << 1588 << 1589 /* << 1590 * If another task is sharing our fs, << 1591 * suid exec because the differently << 1592 * will be able to manipulate the cur << 1593 * It would be nice to force an unsha << 1594 */ << 1595 n_fs = 1; << 1596 spin_lock(&p->fs->lock); << 1597 rcu_read_lock(); << 1598 for_other_threads(p, t) { << 1599 if (t->fs == p->fs) << 1600 n_fs++; << 1601 } << 1602 rcu_read_unlock(); << 1603 << 1604 /* "users" and "in_exec" locked for c << 1605 if (p->fs->users > n_fs) << 1606 bprm->unsafe |= LSM_UNSAFE_SH << 1607 else << 1608 p->fs->in_exec = 1; << 1609 spin_unlock(&p->fs->lock); << 1610 } << 1611 << 1612 static void bprm_fill_uid(struct linux_binprm << 1613 { 913 { 1614 /* Handle suid and sgid on files */ !! 914 task_lock(current); 1615 struct mnt_idmap *idmap; !! 915 if (bprm->e_uid != current->uid || bprm->e_gid != current->gid) { 1616 struct inode *inode = file_inode(file !! 916 current->mm->dumpable = 0; 1617 unsigned int mode; !! 917 1618 vfsuid_t vfsuid; !! 918 if (must_not_trace_exec(current) 1619 vfsgid_t vfsgid; !! 919 || atomic_read(¤t->fs->count) > 1 1620 int err; !! 920 || atomic_read(¤t->files->count) > 1 1621 !! 921 || atomic_read(¤t->sighand->count) > 1) { 1622 if (!mnt_may_suid(file->f_path.mnt)) !! 922 if(!capable(CAP_SETUID)) { 1623 return; !! 923 bprm->e_uid = current->uid; 1624 !! 924 bprm->e_gid = current->gid; 1625 if (task_no_new_privs(current)) !! 925 } 1626 return; !! 926 } 1627 << 1628 mode = READ_ONCE(inode->i_mode); << 1629 if (!(mode & (S_ISUID|S_ISGID))) << 1630 return; << 1631 << 1632 idmap = file_mnt_idmap(file); << 1633 << 1634 /* Be careful if suid/sgid is set */ << 1635 inode_lock(inode); << 1636 << 1637 /* Atomically reload and check mode/u << 1638 mode = inode->i_mode; << 1639 vfsuid = i_uid_into_vfsuid(idmap, ino << 1640 vfsgid = i_gid_into_vfsgid(idmap, ino << 1641 err = inode_permission(idmap, inode, << 1642 inode_unlock(inode); << 1643 << 1644 /* Did the exec bit vanish out from u << 1645 if (err) << 1646 return; << 1647 << 1648 /* We ignore suid/sgid if there are n << 1649 if (!vfsuid_has_mapping(bprm->cred->u << 1650 !vfsgid_has_mapping(bprm->cred->u << 1651 return; << 1652 << 1653 if (mode & S_ISUID) { << 1654 bprm->per_clear |= PER_CLEAR_ << 1655 bprm->cred->euid = vfsuid_int << 1656 } << 1657 << 1658 if ((mode & (S_ISGID | S_IXGRP)) == ( << 1659 bprm->per_clear |= PER_CLEAR_ << 1660 bprm->cred->egid = vfsgid_int << 1661 } 927 } 1662 } << 1663 928 1664 /* !! 929 current->suid = current->euid = current->fsuid = bprm->e_uid; 1665 * Compute brpm->cred based upon the final bi !! 930 current->sgid = current->egid = current->fsgid = bprm->e_gid; 1666 */ << 1667 static int bprm_creds_from_file(struct linux_ << 1668 { << 1669 /* Compute creds based on which file? << 1670 struct file *file = bprm->execfd_cred << 1671 931 1672 bprm_fill_uid(bprm, file); !! 932 task_unlock(current); 1673 return security_bprm_creds_from_file( << 1674 } << 1675 933 1676 /* !! 934 security_bprm_compute_creds(bprm); 1677 * Fill the binprm structure from the inode. << 1678 * Read the first BINPRM_BUF_SIZE bytes << 1679 * << 1680 * This may be called multiple times for bina << 1681 */ << 1682 static int prepare_binprm(struct linux_binprm << 1683 { << 1684 loff_t pos = 0; << 1685 << 1686 memset(bprm->buf, 0, BINPRM_BUF_SIZE) << 1687 return kernel_read(bprm->file, bprm-> << 1688 } 935 } 1689 936 1690 /* !! 937 EXPORT_SYMBOL(compute_creds); 1691 * Arguments are '\0' separated strings found << 1692 * points to; chop off the first by relocatin << 1693 * the first '\0' encountered. << 1694 */ << 1695 int remove_arg_zero(struct linux_binprm *bprm << 1696 { << 1697 unsigned long offset; << 1698 char *kaddr; << 1699 struct page *page; << 1700 << 1701 if (!bprm->argc) << 1702 return 0; << 1703 << 1704 do { << 1705 offset = bprm->p & ~PAGE_MASK << 1706 page = get_arg_page(bprm, bpr << 1707 if (!page) << 1708 return -EFAULT; << 1709 kaddr = kmap_local_page(page) << 1710 938 1711 for (; offset < PAGE_SIZE && !! 939 void remove_arg_zero(struct linux_binprm *bprm) 1712 offset++, bpr !! 940 { 1713 ; !! 941 if (bprm->argc) { 1714 !! 942 unsigned long offset; 1715 kunmap_local(kaddr); !! 943 char * kaddr; 1716 put_arg_page(page); !! 944 struct page *page; 1717 } while (offset == PAGE_SIZE); << 1718 945 1719 bprm->p++; !! 946 offset = bprm->p % PAGE_SIZE; 1720 bprm->argc--; !! 947 goto inside; 1721 948 1722 return 0; !! 949 while (bprm->p++, *(kaddr+offset++)) { >> 950 if (offset != PAGE_SIZE) >> 951 continue; >> 952 offset = 0; >> 953 kunmap_atomic(kaddr, KM_USER0); >> 954 inside: >> 955 page = bprm->page[bprm->p/PAGE_SIZE]; >> 956 kaddr = kmap_atomic(page, KM_USER0); >> 957 } >> 958 kunmap_atomic(kaddr, KM_USER0); >> 959 bprm->argc--; >> 960 } 1723 } 961 } >> 962 1724 EXPORT_SYMBOL(remove_arg_zero); 963 EXPORT_SYMBOL(remove_arg_zero); 1725 964 1726 #define printable(c) (((c)=='\t') || ((c)=='\ << 1727 /* 965 /* 1728 * cycle the list of binary formats handler, 966 * cycle the list of binary formats handler, until one recognizes the image 1729 */ 967 */ 1730 static int search_binary_handler(struct linux !! 968 int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) 1731 { 969 { 1732 bool need_retry = IS_ENABLED(CONFIG_M !! 970 int try,retval=0; 1733 struct linux_binfmt *fmt; 971 struct linux_binfmt *fmt; 1734 int retval; !! 972 #ifdef __alpha__ >> 973 /* handle /sbin/loader.. */ >> 974 { >> 975 struct exec * eh = (struct exec *) bprm->buf; 1735 976 1736 retval = prepare_binprm(bprm); !! 977 if (!bprm->loader && eh->fh.f_magic == 0x183 && 1737 if (retval < 0) !! 978 (eh->fh.f_flags & 0x3000) == 0x3000) 1738 return retval; !! 979 { >> 980 struct file * file; >> 981 unsigned long loader; >> 982 >> 983 allow_write_access(bprm->file); >> 984 fput(bprm->file); >> 985 bprm->file = NULL; >> 986 >> 987 loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); >> 988 >> 989 file = open_exec("/sbin/loader"); >> 990 retval = PTR_ERR(file); >> 991 if (IS_ERR(file)) >> 992 return retval; >> 993 >> 994 /* Remember if the application is TASO. */ >> 995 bprm->sh_bang = eh->ah.entry < 0x100000000; 1739 996 >> 997 bprm->file = file; >> 998 bprm->loader = loader; >> 999 retval = prepare_binprm(bprm); >> 1000 if (retval<0) >> 1001 return retval; >> 1002 /* should call search_binary_handler recursively here, >> 1003 but it does not matter */ >> 1004 } >> 1005 } >> 1006 #endif 1740 retval = security_bprm_check(bprm); 1007 retval = security_bprm_check(bprm); 1741 if (retval) 1008 if (retval) 1742 return retval; 1009 return retval; 1743 1010 1744 retval = -ENOENT; !! 1011 /* kernel module loader fixup */ 1745 retry: !! 1012 /* so we don't try to load run modprobe in kernel space. */ 1746 read_lock(&binfmt_lock); !! 1013 set_fs(USER_DS); 1747 list_for_each_entry(fmt, &formats, lh !! 1014 for (try=0; try<2; try++) { 1748 if (!try_module_get(fmt->modu << 1749 continue; << 1750 read_unlock(&binfmt_lock); << 1751 << 1752 retval = fmt->load_binary(bpr << 1753 << 1754 read_lock(&binfmt_lock); 1015 read_lock(&binfmt_lock); 1755 put_binfmt(fmt); !! 1016 for (fmt = formats ; fmt ; fmt = fmt->next) { 1756 if (bprm->point_of_no_return !! 1017 int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary; >> 1018 if (!fn) >> 1019 continue; >> 1020 if (!try_module_get(fmt->module)) >> 1021 continue; 1757 read_unlock(&binfmt_l 1022 read_unlock(&binfmt_lock); 1758 return retval; !! 1023 retval = fn(bprm, regs); >> 1024 if (retval >= 0) { >> 1025 put_binfmt(fmt); >> 1026 allow_write_access(bprm->file); >> 1027 if (bprm->file) >> 1028 fput(bprm->file); >> 1029 bprm->file = NULL; >> 1030 current->did_exec = 1; >> 1031 return retval; >> 1032 } >> 1033 read_lock(&binfmt_lock); >> 1034 put_binfmt(fmt); >> 1035 if (retval != -ENOEXEC || bprm->mm == NULL) >> 1036 break; >> 1037 if (!bprm->file) { >> 1038 read_unlock(&binfmt_lock); >> 1039 return retval; >> 1040 } >> 1041 } >> 1042 read_unlock(&binfmt_lock); >> 1043 if (retval != -ENOEXEC || bprm->mm == NULL) { >> 1044 break; >> 1045 #ifdef CONFIG_KMOD >> 1046 }else{ >> 1047 #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) >> 1048 if (printable(bprm->buf[0]) && >> 1049 printable(bprm->buf[1]) && >> 1050 printable(bprm->buf[2]) && >> 1051 printable(bprm->buf[3])) >> 1052 break; /* -ENOEXEC */ >> 1053 request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2])); >> 1054 #endif 1759 } 1055 } 1760 } 1056 } 1761 read_unlock(&binfmt_lock); << 1762 << 1763 if (need_retry) { << 1764 if (printable(bprm->buf[0]) & << 1765 printable(bprm->buf[2]) & << 1766 return retval; << 1767 if (request_module("binfmt-%0 << 1768 return retval; << 1769 need_retry = false; << 1770 goto retry; << 1771 } << 1772 << 1773 return retval; 1057 return retval; 1774 } 1058 } 1775 1059 1776 /* binfmt handlers will call back into begin_ !! 1060 EXPORT_SYMBOL(search_binary_handler); 1777 static int exec_binprm(struct linux_binprm *b << 1778 { << 1779 pid_t old_pid, old_vpid; << 1780 int ret, depth; << 1781 << 1782 /* Need to fetch pid before load_bina << 1783 old_pid = current->pid; << 1784 rcu_read_lock(); << 1785 old_vpid = task_pid_nr_ns(current, ta << 1786 rcu_read_unlock(); << 1787 << 1788 /* This allows 4 levels of binfmt rew << 1789 for (depth = 0;; depth++) { << 1790 struct file *exec; << 1791 if (depth > 5) << 1792 return -ELOOP; << 1793 << 1794 ret = search_binary_handler(b << 1795 if (ret < 0) << 1796 return ret; << 1797 if (!bprm->interpreter) << 1798 break; << 1799 << 1800 exec = bprm->file; << 1801 bprm->file = bprm->interprete << 1802 bprm->interpreter = NULL; << 1803 << 1804 if (unlikely(bprm->have_execf << 1805 if (bprm->executable) << 1806 fput(exec); << 1807 return -ENOEX << 1808 } << 1809 bprm->executable = ex << 1810 } else << 1811 fput(exec); << 1812 } << 1813 << 1814 audit_bprm(bprm); << 1815 trace_sched_process_exec(current, old << 1816 ptrace_event(PTRACE_EVENT_EXEC, old_v << 1817 proc_exec_connector(current); << 1818 return 0; << 1819 } << 1820 1061 1821 static int bprm_execve(struct linux_binprm *b !! 1062 /* >> 1063 * sys_execve() executes a new program. >> 1064 */ >> 1065 int do_execve(char * filename, >> 1066 char __user *__user *argv, >> 1067 char __user *__user *envp, >> 1068 struct pt_regs * regs) 1822 { 1069 { >> 1070 struct linux_binprm bprm; >> 1071 struct file *file; 1823 int retval; 1072 int retval; 1824 1073 1825 retval = prepare_bprm_creds(bprm); !! 1074 sched_balance_exec(); 1826 if (retval) << 1827 return retval; << 1828 1075 1829 /* !! 1076 file = open_exec(filename); 1830 * Check for unsafe execution states << 1831 * will call back into begin_new_exec << 1832 * where setuid-ness is evaluated. << 1833 */ << 1834 check_unsafe_exec(bprm); << 1835 current->in_execve = 1; << 1836 sched_mm_cid_before_execve(current); << 1837 << 1838 sched_exec(); << 1839 << 1840 /* Set the unchanging part of bprm->c << 1841 retval = security_bprm_creds_for_exec << 1842 if (retval) << 1843 goto out; << 1844 1077 1845 retval = ccs_exec_binprm(bprm); !! 1078 retval = PTR_ERR(file); 1846 if (retval < 0) !! 1079 if (IS_ERR(file)) 1847 goto out; !! 1080 return retval; 1848 1081 1849 sched_mm_cid_after_execve(current); !! 1082 bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); 1850 /* execve succeeded */ !! 1083 memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); 1851 current->fs->in_exec = 0; << 1852 current->in_execve = 0; << 1853 rseq_execve(current); << 1854 user_events_execve(current); << 1855 acct_update_integrals(current); << 1856 task_numa_free(current, false); << 1857 return retval; << 1858 1084 1859 out: !! 1085 bprm.file = file; 1860 /* !! 1086 bprm.filename = filename; 1861 * If past the point of no return ens !! 1087 bprm.interp = filename; 1862 * returns to the userspace process. !! 1088 bprm.sh_bang = 0; 1863 * signal if present otherwise termin !! 1089 bprm.loader = 0; 1864 * SIGSEGV. !! 1090 bprm.exec = 0; 1865 */ !! 1091 bprm.security = NULL; 1866 if (bprm->point_of_no_return && !fata !! 1092 bprm.mm = mm_alloc(); 1867 force_fatal_sig(SIGSEGV); !! 1093 retval = -ENOMEM; 1868 !! 1094 if (!bprm.mm) 1869 sched_mm_cid_after_execve(current); !! 1095 goto out_file; 1870 current->fs->in_exec = 0; << 1871 current->in_execve = 0; << 1872 1096 1873 return retval; !! 1097 retval = init_new_context(current, bprm.mm); 1874 } !! 1098 if (retval < 0) >> 1099 goto out_mm; 1875 1100 1876 static int do_execveat_common(int fd, struct !! 1101 bprm.argc = count(argv, bprm.p / sizeof(void *)); 1877 struct user_arg !! 1102 if ((retval = bprm.argc) < 0) 1878 struct user_arg !! 1103 goto out_mm; 1879 int flags) << 1880 { << 1881 struct linux_binprm *bprm; << 1882 int retval; << 1883 1104 1884 if (IS_ERR(filename)) !! 1105 bprm.envc = count(envp, bprm.p / sizeof(void *)); 1885 return PTR_ERR(filename); !! 1106 if ((retval = bprm.envc) < 0) >> 1107 goto out_mm; 1886 1108 1887 /* !! 1109 retval = security_bprm_alloc(&bprm); 1888 * We move the actual failure in case !! 1110 if (retval) 1889 * set*uid() to execve() because too !! 1111 goto out; 1890 * don't check setuid() return code. << 1891 * whether NPROC limit is still excee << 1892 */ << 1893 if ((current->flags & PF_NPROC_EXCEED << 1894 is_rlimit_overlimit(current_ucoun << 1895 retval = -EAGAIN; << 1896 goto out_ret; << 1897 } << 1898 << 1899 /* We're below the limit (still or ag << 1900 * further execve() calls fail. */ << 1901 current->flags &= ~PF_NPROC_EXCEEDED; << 1902 << 1903 bprm = alloc_bprm(fd, filename, flags << 1904 if (IS_ERR(bprm)) { << 1905 retval = PTR_ERR(bprm); << 1906 goto out_ret; << 1907 } << 1908 << 1909 retval = count(argv, MAX_ARG_STRINGS) << 1910 if (retval == 0) << 1911 pr_warn_once("process '%s' la << 1912 current->comm, b << 1913 if (retval < 0) << 1914 goto out_free; << 1915 bprm->argc = retval; << 1916 1112 1917 retval = count(envp, MAX_ARG_STRINGS) !! 1113 retval = prepare_binprm(&bprm); 1918 if (retval < 0) 1114 if (retval < 0) 1919 goto out_free; !! 1115 goto out; 1920 bprm->envc = retval; << 1921 1116 1922 retval = bprm_stack_limits(bprm); !! 1117 retval = copy_strings_kernel(1, &bprm.filename, &bprm); 1923 if (retval < 0) 1118 if (retval < 0) 1924 goto out_free; !! 1119 goto out; 1925 1120 1926 retval = copy_string_kernel(bprm->fil !! 1121 bprm.exec = bprm.p; >> 1122 retval = copy_strings(bprm.envc, envp, &bprm); 1927 if (retval < 0) 1123 if (retval < 0) 1928 goto out_free; !! 1124 goto out; 1929 bprm->exec = bprm->p; << 1930 1125 1931 retval = copy_strings(bprm->envc, env !! 1126 retval = copy_strings(bprm.argc, argv, &bprm); 1932 if (retval < 0) 1127 if (retval < 0) 1933 goto out_free; !! 1128 goto out; 1934 1129 1935 retval = copy_strings(bprm->argc, arg !! 1130 retval = search_binary_handler(&bprm,regs); 1936 if (retval < 0) !! 1131 if (retval >= 0) { 1937 goto out_free; !! 1132 free_arg_pages(&bprm); 1938 1133 1939 /* !! 1134 /* execve success */ 1940 * When argv is empty, add an empty s !! 1135 security_bprm_free(&bprm); 1941 * ensure confused userspace programs !! 1136 return retval; 1942 * from argv[1] won't end up walking << 1943 * bprm_stack_limits(). << 1944 */ << 1945 if (bprm->argc == 0) { << 1946 retval = copy_string_kernel(" << 1947 if (retval < 0) << 1948 goto out_free; << 1949 bprm->argc = 1; << 1950 } 1137 } 1951 1138 1952 retval = bprm_execve(bprm); !! 1139 out: 1953 out_free: !! 1140 /* Something went wrong, return the inode and free the argument pages*/ 1954 free_bprm(bprm); !! 1141 free_arg_pages(&bprm); 1955 << 1956 out_ret: << 1957 putname(filename); << 1958 return retval; << 1959 } << 1960 << 1961 int kernel_execve(const char *kernel_filename << 1962 const char *const *argv, co << 1963 { << 1964 struct filename *filename; << 1965 struct linux_binprm *bprm; << 1966 int fd = AT_FDCWD; << 1967 int retval; << 1968 1142 1969 /* It is non-sense for kernel threads !! 1143 if (bprm.security) 1970 if (WARN_ON_ONCE(current->flags & PF_ !! 1144 security_bprm_free(&bprm); 1971 return -EINVAL; << 1972 1145 1973 filename = getname_kernel(kernel_file !! 1146 out_mm: 1974 if (IS_ERR(filename)) !! 1147 if (bprm.mm) 1975 return PTR_ERR(filename); !! 1148 mmdrop(bprm.mm); 1976 !! 1149 1977 bprm = alloc_bprm(fd, filename, 0); !! 1150 out_file: 1978 if (IS_ERR(bprm)) { !! 1151 if (bprm.file) { 1979 retval = PTR_ERR(bprm); !! 1152 allow_write_access(bprm.file); 1980 goto out_ret; !! 1153 fput(bprm.file); 1981 } 1154 } 1982 << 1983 retval = count_strings_kernel(argv); << 1984 if (WARN_ON_ONCE(retval == 0)) << 1985 retval = -EINVAL; << 1986 if (retval < 0) << 1987 goto out_free; << 1988 bprm->argc = retval; << 1989 << 1990 retval = count_strings_kernel(envp); << 1991 if (retval < 0) << 1992 goto out_free; << 1993 bprm->envc = retval; << 1994 << 1995 retval = bprm_stack_limits(bprm); << 1996 if (retval < 0) << 1997 goto out_free; << 1998 << 1999 retval = copy_string_kernel(bprm->fil << 2000 if (retval < 0) << 2001 goto out_free; << 2002 bprm->exec = bprm->p; << 2003 << 2004 retval = copy_strings_kernel(bprm->en << 2005 if (retval < 0) << 2006 goto out_free; << 2007 << 2008 retval = copy_strings_kernel(bprm->ar << 2009 if (retval < 0) << 2010 goto out_free; << 2011 << 2012 retval = bprm_execve(bprm); << 2013 out_free: << 2014 free_bprm(bprm); << 2015 out_ret: << 2016 putname(filename); << 2017 return retval; 1155 return retval; 2018 } 1156 } 2019 1157 2020 static int do_execve(struct filename *filenam !! 1158 EXPORT_SYMBOL(do_execve); 2021 const char __user *const __user *__ar << 2022 const char __user *const __user *__en << 2023 { << 2024 struct user_arg_ptr argv = { .ptr.nat << 2025 struct user_arg_ptr envp = { .ptr.nat << 2026 return do_execveat_common(AT_FDCWD, f << 2027 } << 2028 << 2029 static int do_execveat(int fd, struct filenam << 2030 const char __user *const __us << 2031 const char __user *const __us << 2032 int flags) << 2033 { << 2034 struct user_arg_ptr argv = { .ptr.nat << 2035 struct user_arg_ptr envp = { .ptr.nat << 2036 << 2037 return do_execveat_common(fd, filenam << 2038 } << 2039 << 2040 #ifdef CONFIG_COMPAT << 2041 static int compat_do_execve(struct filename * << 2042 const compat_uptr_t __user *__argv, << 2043 const compat_uptr_t __user *__envp) << 2044 { << 2045 struct user_arg_ptr argv = { << 2046 .is_compat = true, << 2047 .ptr.compat = __argv, << 2048 }; << 2049 struct user_arg_ptr envp = { << 2050 .is_compat = true, << 2051 .ptr.compat = __envp, << 2052 }; << 2053 return do_execveat_common(AT_FDCWD, f << 2054 } << 2055 << 2056 static int compat_do_execveat(int fd, struct << 2057 const compat_up << 2058 const compat_up << 2059 int flags) << 2060 { << 2061 struct user_arg_ptr argv = { << 2062 .is_compat = true, << 2063 .ptr.compat = __argv, << 2064 }; << 2065 struct user_arg_ptr envp = { << 2066 .is_compat = true, << 2067 .ptr.compat = __envp, << 2068 }; << 2069 return do_execveat_common(fd, filenam << 2070 } << 2071 #endif << 2072 1159 2073 void set_binfmt(struct linux_binfmt *new) !! 1160 int set_binfmt(struct linux_binfmt *new) 2074 { 1161 { 2075 struct mm_struct *mm = current->mm; !! 1162 struct linux_binfmt *old = current->binfmt; 2076 << 2077 if (mm->binfmt) << 2078 module_put(mm->binfmt->module << 2079 1163 2080 mm->binfmt = new; !! 1164 if (new) { 2081 if (new) !! 1165 if (!try_module_get(new->module)) 2082 __module_get(new->module); !! 1166 return -1; >> 1167 } >> 1168 current->binfmt = new; >> 1169 if (old) >> 1170 module_put(old->module); >> 1171 return 0; 2083 } 1172 } >> 1173 2084 EXPORT_SYMBOL(set_binfmt); 1174 EXPORT_SYMBOL(set_binfmt); 2085 1175 2086 /* !! 1176 #define CORENAME_MAX_SIZE 64 2087 * set_dumpable stores three-value SUID_DUMP_ << 2088 */ << 2089 void set_dumpable(struct mm_struct *mm, int v << 2090 { << 2091 if (WARN_ON((unsigned)value > SUID_DU << 2092 return; << 2093 1177 2094 set_mask_bits(&mm->flags, MMF_DUMPABL !! 1178 /* format_corename will inspect the pattern parameter, and output a >> 1179 * name into corename, which must have space for at least >> 1180 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. >> 1181 */ >> 1182 void format_corename(char *corename, const char *pattern, long signr) >> 1183 { >> 1184 const char *pat_ptr = pattern; >> 1185 char *out_ptr = corename; >> 1186 char *const out_end = corename + CORENAME_MAX_SIZE; >> 1187 int rc; >> 1188 int pid_in_pattern = 0; >> 1189 >> 1190 /* Repeat as long as we have more pattern to process and more output >> 1191 space */ >> 1192 while (*pat_ptr) { >> 1193 if (*pat_ptr != '%') { >> 1194 if (out_ptr == out_end) >> 1195 goto out; >> 1196 *out_ptr++ = *pat_ptr++; >> 1197 } else { >> 1198 switch (*++pat_ptr) { >> 1199 case 0: >> 1200 goto out; >> 1201 /* Double percent, output one percent */ >> 1202 case '%': >> 1203 if (out_ptr == out_end) >> 1204 goto out; >> 1205 *out_ptr++ = '%'; >> 1206 break; >> 1207 /* pid */ >> 1208 case 'p': >> 1209 pid_in_pattern = 1; >> 1210 rc = snprintf(out_ptr, out_end - out_ptr, >> 1211 "%d", current->tgid); >> 1212 if (rc > out_end - out_ptr) >> 1213 goto out; >> 1214 out_ptr += rc; >> 1215 break; >> 1216 /* uid */ >> 1217 case 'u': >> 1218 rc = snprintf(out_ptr, out_end - out_ptr, >> 1219 "%d", current->uid); >> 1220 if (rc > out_end - out_ptr) >> 1221 goto out; >> 1222 out_ptr += rc; >> 1223 break; >> 1224 /* gid */ >> 1225 case 'g': >> 1226 rc = snprintf(out_ptr, out_end - out_ptr, >> 1227 "%d", current->gid); >> 1228 if (rc > out_end - out_ptr) >> 1229 goto out; >> 1230 out_ptr += rc; >> 1231 break; >> 1232 /* signal that caused the coredump */ >> 1233 case 's': >> 1234 rc = snprintf(out_ptr, out_end - out_ptr, >> 1235 "%ld", signr); >> 1236 if (rc > out_end - out_ptr) >> 1237 goto out; >> 1238 out_ptr += rc; >> 1239 break; >> 1240 /* UNIX time of coredump */ >> 1241 case 't': { >> 1242 struct timeval tv; >> 1243 do_gettimeofday(&tv); >> 1244 rc = snprintf(out_ptr, out_end - out_ptr, >> 1245 "%lu", tv.tv_sec); >> 1246 if (rc > out_end - out_ptr) >> 1247 goto out; >> 1248 out_ptr += rc; >> 1249 break; >> 1250 } >> 1251 /* hostname */ >> 1252 case 'h': >> 1253 down_read(&uts_sem); >> 1254 rc = snprintf(out_ptr, out_end - out_ptr, >> 1255 "%s", system_utsname.nodename); >> 1256 up_read(&uts_sem); >> 1257 if (rc > out_end - out_ptr) >> 1258 goto out; >> 1259 out_ptr += rc; >> 1260 break; >> 1261 /* executable */ >> 1262 case 'e': >> 1263 rc = snprintf(out_ptr, out_end - out_ptr, >> 1264 "%s", current->comm); >> 1265 if (rc > out_end - out_ptr) >> 1266 goto out; >> 1267 out_ptr += rc; >> 1268 break; >> 1269 default: >> 1270 break; >> 1271 } >> 1272 ++pat_ptr; >> 1273 } >> 1274 } >> 1275 /* Backward compatibility with core_uses_pid: >> 1276 * >> 1277 * If core_pattern does not include a %p (as is the default) >> 1278 * and core_uses_pid is set, then .%pid will be appended to >> 1279 * the filename */ >> 1280 if (!pid_in_pattern >> 1281 && (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1)) { >> 1282 rc = snprintf(out_ptr, out_end - out_ptr, >> 1283 ".%d", current->tgid); >> 1284 if (rc > out_end - out_ptr) >> 1285 goto out; >> 1286 out_ptr += rc; >> 1287 } >> 1288 out: >> 1289 *out_ptr = 0; 2095 } 1290 } 2096 1291 2097 SYSCALL_DEFINE3(execve, !! 1292 static void zap_threads (struct mm_struct *mm) 2098 const char __user *, filename << 2099 const char __user *const __us << 2100 const char __user *const __us << 2101 { 1293 { 2102 return do_execve(getname(filename), a !! 1294 struct task_struct *g, *p; 2103 } !! 1295 struct task_struct *tsk = current; >> 1296 struct completion *vfork_done = tsk->vfork_done; 2104 1297 2105 SYSCALL_DEFINE5(execveat, !! 1298 /* 2106 int, fd, const char __user *, !! 1299 * Make sure nobody is waiting for us to release the VM, 2107 const char __user *const __us !! 1300 * otherwise we can deadlock when we wait on each other 2108 const char __user *const __us !! 1301 */ 2109 int, flags) !! 1302 if (vfork_done) { 2110 { !! 1303 tsk->vfork_done = NULL; 2111 return do_execveat(fd, !! 1304 complete(vfork_done); 2112 getname_uflags(fil !! 1305 } 2113 argv, envp, flags) << 2114 } << 2115 1306 2116 #ifdef CONFIG_COMPAT !! 1307 read_lock(&tasklist_lock); 2117 COMPAT_SYSCALL_DEFINE3(execve, const char __u !! 1308 do_each_thread(g,p) 2118 const compat_uptr_t __user *, argv, !! 1309 if (mm == p->mm && p != tsk) { 2119 const compat_uptr_t __user *, envp) !! 1310 force_sig_specific(SIGKILL, p); 2120 { !! 1311 mm->core_waiters++; 2121 return compat_do_execve(getname(filen !! 1312 } >> 1313 while_each_thread(g,p); >> 1314 >> 1315 read_unlock(&tasklist_lock); 2122 } 1316 } 2123 1317 2124 COMPAT_SYSCALL_DEFINE5(execveat, int, fd, !! 1318 static void coredump_wait(struct mm_struct *mm) 2125 const char __user *, f << 2126 const compat_uptr_t __ << 2127 const compat_uptr_t __ << 2128 int, flags) << 2129 { 1319 { 2130 return compat_do_execveat(fd, !! 1320 DECLARE_COMPLETION(startup_done); 2131 getname_ufl << 2132 argv, envp, << 2133 } << 2134 #endif << 2135 1321 2136 #ifdef CONFIG_SYSCTL !! 1322 mm->core_waiters++; /* let other threads block */ >> 1323 mm->core_startup_done = &startup_done; 2137 1324 2138 static int proc_dointvec_minmax_coredump(cons !! 1325 /* give other threads a chance to run: */ 2139 void *buffer, size_t *lenp, l !! 1326 yield(); 2140 { << 2141 int error = proc_dointvec_minmax(tabl << 2142 1327 2143 if (!error) !! 1328 zap_threads(mm); 2144 validate_coredump_safety(); !! 1329 if (--mm->core_waiters) { 2145 return error; !! 1330 up_write(&mm->mmap_sem); >> 1331 wait_for_completion(&startup_done); >> 1332 } else >> 1333 up_write(&mm->mmap_sem); >> 1334 BUG_ON(mm->core_waiters); 2146 } 1335 } 2147 1336 2148 static struct ctl_table fs_exec_sysctls[] = { !! 1337 int do_coredump(long signr, int exit_code, struct pt_regs * regs) 2149 { << 2150 .procname = "suid_dumpa << 2151 .data = &suid_dumpa << 2152 .maxlen = sizeof(int) << 2153 .mode = 0644, << 2154 .proc_handler = proc_dointv << 2155 .extra1 = SYSCTL_ZERO << 2156 .extra2 = SYSCTL_TWO, << 2157 }, << 2158 }; << 2159 << 2160 static int __init init_fs_exec_sysctls(void) << 2161 { 1338 { 2162 register_sysctl_init("fs", fs_exec_sy !! 1339 char corename[CORENAME_MAX_SIZE + 1]; 2163 return 0; !! 1340 struct mm_struct *mm = current->mm; 2164 } !! 1341 struct linux_binfmt * binfmt; >> 1342 struct inode * inode; >> 1343 struct file * file; >> 1344 int retval = 0; >> 1345 >> 1346 lock_kernel(); >> 1347 binfmt = current->binfmt; >> 1348 if (!binfmt || !binfmt->core_dump) >> 1349 goto fail; >> 1350 down_write(&mm->mmap_sem); >> 1351 if (!mm->dumpable) { >> 1352 up_write(&mm->mmap_sem); >> 1353 goto fail; >> 1354 } >> 1355 mm->dumpable = 0; >> 1356 init_completion(&mm->core_done); >> 1357 current->signal->group_exit = 1; >> 1358 current->signal->group_exit_code = exit_code; >> 1359 coredump_wait(mm); 2165 1360 2166 fs_initcall(init_fs_exec_sysctls); !! 1361 if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump) 2167 #endif /* CONFIG_SYSCTL */ !! 1362 goto fail_unlock; 2168 1363 2169 #ifdef CONFIG_EXEC_KUNIT_TEST !! 1364 format_corename(corename, core_pattern, signr); 2170 #include "tests/exec_kunit.c" !! 1365 file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600); 2171 #endif !! 1366 if (IS_ERR(file)) >> 1367 goto fail_unlock; >> 1368 inode = file->f_dentry->d_inode; >> 1369 if (inode->i_nlink > 1) >> 1370 goto close_fail; /* multiple links - don't dump */ >> 1371 if (d_unhashed(file->f_dentry)) >> 1372 goto close_fail; >> 1373 >> 1374 if (!S_ISREG(inode->i_mode)) >> 1375 goto close_fail; >> 1376 if (!file->f_op) >> 1377 goto close_fail; >> 1378 if (!file->f_op->write) >> 1379 goto close_fail; >> 1380 if (do_truncate(file->f_dentry, 0) != 0) >> 1381 goto close_fail; >> 1382 >> 1383 retval = binfmt->core_dump(signr, regs, file); >> 1384 >> 1385 current->signal->group_exit_code |= 0x80; >> 1386 close_fail: >> 1387 filp_close(file, NULL); >> 1388 fail_unlock: >> 1389 complete_all(&mm->core_done); >> 1390 fail: >> 1391 unlock_kernel(); >> 1392 return retval; >> 1393 } 2172 1394
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.