1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/kernel/sys.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8 #include <linux/export.h> 9 #include <linux/mm.h> 10 #include <linux/mm_inline.h> 11 #include <linux/utsname.h> 12 #include <linux/mman.h> 13 #include <linux/reboot.h> 14 #include <linux/prctl.h> 15 #include <linux/highuid.h> 16 #include <linux/fs.h> 17 #include <linux/kmod.h> 18 #include <linux/ksm.h> 19 #include <linux/perf_event.h> 20 #include <linux/resource.h> 21 #include <linux/kernel.h> 22 #include <linux/workqueue.h> 23 #include <linux/capability.h> 24 #include <linux/device.h> 25 #include <linux/key.h> 26 #include <linux/times.h> 27 #include <linux/posix-timers.h> 28 #include <linux/security.h> 29 #include <linux/random.h> 30 #include <linux/suspend.h> 31 #include <linux/tty.h> 32 #include <linux/signal.h> 33 #include <linux/cn_proc.h> 34 #include <linux/getcpu.h> 35 #include <linux/task_io_accounting_ops.h> 36 #include <linux/seccomp.h> 37 #include <linux/cpu.h> 38 #include <linux/personality.h> 39 #include <linux/ptrace.h> 40 #include <linux/fs_struct.h> 41 #include <linux/file.h> 42 #include <linux/mount.h> 43 #include <linux/gfp.h> 44 #include <linux/syscore_ops.h> 45 #include <linux/version.h> 46 #include <linux/ctype.h> 47 #include <linux/syscall_user_dispatch.h> 48 49 #include <linux/compat.h> 50 #include <linux/syscalls.h> 51 #include <linux/kprobes.h> 52 #include <linux/user_namespace.h> 53 #include <linux/time_namespace.h> 54 #include <linux/binfmts.h> 55 56 #include <linux/sched.h> 57 #include <linux/sched/autogroup.h> 58 #include <linux/sched/loadavg.h> 59 #include <linux/sched/stat.h> 60 #include <linux/sched/mm.h> 61 #include <linux/sched/coredump.h> 62 #include <linux/sched/task.h> 63 #include <linux/sched/cputime.h> 64 #include <linux/rcupdate.h> 65 #include <linux/uidgid.h> 66 #include <linux/cred.h> 67 68 #include <linux/nospec.h> 69 70 #include <linux/kmsg_dump.h> 71 /* Move somewhere else to avoid recompiling? */ 72 #include <generated/utsrelease.h> 73 74 #include <linux/uaccess.h> 75 #include <asm/io.h> 76 #include <asm/unistd.h> 77 78 #include "uid16.h" 79 80 #ifndef SET_UNALIGN_CTL 81 # define SET_UNALIGN_CTL(a, b) (-EINVAL) 82 #endif 83 #ifndef GET_UNALIGN_CTL 84 # define GET_UNALIGN_CTL(a, b) (-EINVAL) 85 #endif 86 #ifndef SET_FPEMU_CTL 87 # define SET_FPEMU_CTL(a, b) (-EINVAL) 88 #endif 89 #ifndef GET_FPEMU_CTL 90 # define GET_FPEMU_CTL(a, b) (-EINVAL) 91 #endif 92 #ifndef SET_FPEXC_CTL 93 # define SET_FPEXC_CTL(a, b) (-EINVAL) 94 #endif 95 #ifndef GET_FPEXC_CTL 96 # define GET_FPEXC_CTL(a, b) (-EINVAL) 97 #endif 98 #ifndef GET_ENDIAN 99 # define GET_ENDIAN(a, b) (-EINVAL) 100 #endif 101 #ifndef SET_ENDIAN 102 # define SET_ENDIAN(a, b) (-EINVAL) 103 #endif 104 #ifndef GET_TSC_CTL 105 # define GET_TSC_CTL(a) (-EINVAL) 106 #endif 107 #ifndef SET_TSC_CTL 108 # define SET_TSC_CTL(a) (-EINVAL) 109 #endif 110 #ifndef GET_FP_MODE 111 # define GET_FP_MODE(a) (-EINVAL) 112 #endif 113 #ifndef SET_FP_MODE 114 # define SET_FP_MODE(a,b) (-EINVAL) 115 #endif 116 #ifndef SVE_SET_VL 117 # define SVE_SET_VL(a) (-EINVAL) 118 #endif 119 #ifndef SVE_GET_VL 120 # define SVE_GET_VL() (-EINVAL) 121 #endif 122 #ifndef SME_SET_VL 123 # define SME_SET_VL(a) (-EINVAL) 124 #endif 125 #ifndef SME_GET_VL 126 # define SME_GET_VL() (-EINVAL) 127 #endif 128 #ifndef PAC_RESET_KEYS 129 # define PAC_RESET_KEYS(a, b) (-EINVAL) 130 #endif 131 #ifndef PAC_SET_ENABLED_KEYS 132 # define PAC_SET_ENABLED_KEYS(a, b, c) (-EINVAL) 133 #endif 134 #ifndef PAC_GET_ENABLED_KEYS 135 # define PAC_GET_ENABLED_KEYS(a) (-EINVAL) 136 #endif 137 #ifndef SET_TAGGED_ADDR_CTRL 138 # define SET_TAGGED_ADDR_CTRL(a) (-EINVAL) 139 #endif 140 #ifndef GET_TAGGED_ADDR_CTRL 141 # define GET_TAGGED_ADDR_CTRL() (-EINVAL) 142 #endif 143 #ifndef RISCV_V_SET_CONTROL 144 # define RISCV_V_SET_CONTROL(a) (-EINVAL) 145 #endif 146 #ifndef RISCV_V_GET_CONTROL 147 # define RISCV_V_GET_CONTROL() (-EINVAL) 148 #endif 149 #ifndef RISCV_SET_ICACHE_FLUSH_CTX 150 # define RISCV_SET_ICACHE_FLUSH_CTX(a, b) (-EINVAL) 151 #endif 152 #ifndef PPC_GET_DEXCR_ASPECT 153 # define PPC_GET_DEXCR_ASPECT(a, b) (-EINVAL) 154 #endif 155 #ifndef PPC_SET_DEXCR_ASPECT 156 # define PPC_SET_DEXCR_ASPECT(a, b, c) (-EINVAL) 157 #endif 158 159 /* 160 * this is where the system-wide overflow UID and GID are defined, for 161 * architectures that now have 32-bit UID/GID but didn't in the past 162 */ 163 164 int overflowuid = DEFAULT_OVERFLOWUID; 165 int overflowgid = DEFAULT_OVERFLOWGID; 166 167 EXPORT_SYMBOL(overflowuid); 168 EXPORT_SYMBOL(overflowgid); 169 170 /* 171 * the same as above, but for filesystems which can only store a 16-bit 172 * UID and GID. as such, this is needed on all architectures 173 */ 174 175 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 176 int fs_overflowgid = DEFAULT_FS_OVERFLOWGID; 177 178 EXPORT_SYMBOL(fs_overflowuid); 179 EXPORT_SYMBOL(fs_overflowgid); 180 181 /* 182 * Returns true if current's euid is same as p's uid or euid, 183 * or has CAP_SYS_NICE to p's user_ns. 184 * 185 * Called with rcu_read_lock, creds are safe 186 */ 187 static bool set_one_prio_perm(struct task_struct *p) 188 { 189 const struct cred *cred = current_cred(), *pcred = __task_cred(p); 190 191 if (uid_eq(pcred->uid, cred->euid) || 192 uid_eq(pcred->euid, cred->euid)) 193 return true; 194 if (ns_capable(pcred->user_ns, CAP_SYS_NICE)) 195 return true; 196 return false; 197 } 198 199 /* 200 * set the priority of a task 201 * - the caller must hold the RCU read lock 202 */ 203 static int set_one_prio(struct task_struct *p, int niceval, int error) 204 { 205 int no_nice; 206 207 if (!set_one_prio_perm(p)) { 208 error = -EPERM; 209 goto out; 210 } 211 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 212 error = -EACCES; 213 goto out; 214 } 215 no_nice = security_task_setnice(p, niceval); 216 if (no_nice) { 217 error = no_nice; 218 goto out; 219 } 220 if (error == -ESRCH) 221 error = 0; 222 set_user_nice(p, niceval); 223 out: 224 return error; 225 } 226 227 SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) 228 { 229 struct task_struct *g, *p; 230 struct user_struct *user; 231 const struct cred *cred = current_cred(); 232 int error = -EINVAL; 233 struct pid *pgrp; 234 kuid_t uid; 235 236 if (which > PRIO_USER || which < PRIO_PROCESS) 237 goto out; 238 if (!ccs_capable(CCS_SYS_NICE)) { 239 error = -EPERM; 240 goto out; 241 } 242 243 /* normalize: avoid signed division (rounding problems) */ 244 error = -ESRCH; 245 if (niceval < MIN_NICE) 246 niceval = MIN_NICE; 247 if (niceval > MAX_NICE) 248 niceval = MAX_NICE; 249 250 rcu_read_lock(); 251 switch (which) { 252 case PRIO_PROCESS: 253 if (who) 254 p = find_task_by_vpid(who); 255 else 256 p = current; 257 if (p) 258 error = set_one_prio(p, niceval, error); 259 break; 260 case PRIO_PGRP: 261 if (who) 262 pgrp = find_vpid(who); 263 else 264 pgrp = task_pgrp(current); 265 read_lock(&tasklist_lock); 266 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 267 error = set_one_prio(p, niceval, error); 268 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 269 read_unlock(&tasklist_lock); 270 break; 271 case PRIO_USER: 272 uid = make_kuid(cred->user_ns, who); 273 user = cred->user; 274 if (!who) 275 uid = cred->uid; 276 else if (!uid_eq(uid, cred->uid)) { 277 user = find_user(uid); 278 if (!user) 279 goto out_unlock; /* No processes for this user */ 280 } 281 for_each_process_thread(g, p) { 282 if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) 283 error = set_one_prio(p, niceval, error); 284 } 285 if (!uid_eq(uid, cred->uid)) 286 free_uid(user); /* For find_user() */ 287 break; 288 } 289 out_unlock: 290 rcu_read_unlock(); 291 out: 292 return error; 293 } 294 295 /* 296 * Ugh. To avoid negative return values, "getpriority()" will 297 * not return the normal nice-value, but a negated value that 298 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 299 * to stay compatible. 300 */ 301 SYSCALL_DEFINE2(getpriority, int, which, int, who) 302 { 303 struct task_struct *g, *p; 304 struct user_struct *user; 305 const struct cred *cred = current_cred(); 306 long niceval, retval = -ESRCH; 307 struct pid *pgrp; 308 kuid_t uid; 309 310 if (which > PRIO_USER || which < PRIO_PROCESS) 311 return -EINVAL; 312 313 rcu_read_lock(); 314 switch (which) { 315 case PRIO_PROCESS: 316 if (who) 317 p = find_task_by_vpid(who); 318 else 319 p = current; 320 if (p) { 321 niceval = nice_to_rlimit(task_nice(p)); 322 if (niceval > retval) 323 retval = niceval; 324 } 325 break; 326 case PRIO_PGRP: 327 if (who) 328 pgrp = find_vpid(who); 329 else 330 pgrp = task_pgrp(current); 331 read_lock(&tasklist_lock); 332 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 333 niceval = nice_to_rlimit(task_nice(p)); 334 if (niceval > retval) 335 retval = niceval; 336 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 337 read_unlock(&tasklist_lock); 338 break; 339 case PRIO_USER: 340 uid = make_kuid(cred->user_ns, who); 341 user = cred->user; 342 if (!who) 343 uid = cred->uid; 344 else if (!uid_eq(uid, cred->uid)) { 345 user = find_user(uid); 346 if (!user) 347 goto out_unlock; /* No processes for this user */ 348 } 349 for_each_process_thread(g, p) { 350 if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) { 351 niceval = nice_to_rlimit(task_nice(p)); 352 if (niceval > retval) 353 retval = niceval; 354 } 355 } 356 if (!uid_eq(uid, cred->uid)) 357 free_uid(user); /* for find_user() */ 358 break; 359 } 360 out_unlock: 361 rcu_read_unlock(); 362 363 return retval; 364 } 365 366 /* 367 * Unprivileged users may change the real gid to the effective gid 368 * or vice versa. (BSD-style) 369 * 370 * If you set the real gid at all, or set the effective gid to a value not 371 * equal to the real gid, then the saved gid is set to the new effective gid. 372 * 373 * This makes it possible for a setgid program to completely drop its 374 * privileges, which is often a useful assertion to make when you are doing 375 * a security audit over a program. 376 * 377 * The general idea is that a program which uses just setregid() will be 378 * 100% compatible with BSD. A program which uses just setgid() will be 379 * 100% compatible with POSIX with saved IDs. 380 * 381 * SMP: There are not races, the GIDs are checked only by filesystem 382 * operations (as far as semantic preservation is concerned). 383 */ 384 #ifdef CONFIG_MULTIUSER 385 long __sys_setregid(gid_t rgid, gid_t egid) 386 { 387 struct user_namespace *ns = current_user_ns(); 388 const struct cred *old; 389 struct cred *new; 390 int retval; 391 kgid_t krgid, kegid; 392 393 krgid = make_kgid(ns, rgid); 394 kegid = make_kgid(ns, egid); 395 396 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 397 return -EINVAL; 398 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 399 return -EINVAL; 400 401 new = prepare_creds(); 402 if (!new) 403 return -ENOMEM; 404 old = current_cred(); 405 406 retval = -EPERM; 407 if (rgid != (gid_t) -1) { 408 if (gid_eq(old->gid, krgid) || 409 gid_eq(old->egid, krgid) || 410 ns_capable_setid(old->user_ns, CAP_SETGID)) 411 new->gid = krgid; 412 else 413 goto error; 414 } 415 if (egid != (gid_t) -1) { 416 if (gid_eq(old->gid, kegid) || 417 gid_eq(old->egid, kegid) || 418 gid_eq(old->sgid, kegid) || 419 ns_capable_setid(old->user_ns, CAP_SETGID)) 420 new->egid = kegid; 421 else 422 goto error; 423 } 424 425 if (rgid != (gid_t) -1 || 426 (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) 427 new->sgid = new->egid; 428 new->fsgid = new->egid; 429 430 retval = security_task_fix_setgid(new, old, LSM_SETID_RE); 431 if (retval < 0) 432 goto error; 433 434 return commit_creds(new); 435 436 error: 437 abort_creds(new); 438 return retval; 439 } 440 441 SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) 442 { 443 return __sys_setregid(rgid, egid); 444 } 445 446 /* 447 * setgid() is implemented like SysV w/ SAVED_IDS 448 * 449 * SMP: Same implicit races as above. 450 */ 451 long __sys_setgid(gid_t gid) 452 { 453 struct user_namespace *ns = current_user_ns(); 454 const struct cred *old; 455 struct cred *new; 456 int retval; 457 kgid_t kgid; 458 459 kgid = make_kgid(ns, gid); 460 if (!gid_valid(kgid)) 461 return -EINVAL; 462 463 new = prepare_creds(); 464 if (!new) 465 return -ENOMEM; 466 old = current_cred(); 467 468 retval = -EPERM; 469 if (ns_capable_setid(old->user_ns, CAP_SETGID)) 470 new->gid = new->egid = new->sgid = new->fsgid = kgid; 471 else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) 472 new->egid = new->fsgid = kgid; 473 else 474 goto error; 475 476 retval = security_task_fix_setgid(new, old, LSM_SETID_ID); 477 if (retval < 0) 478 goto error; 479 480 return commit_creds(new); 481 482 error: 483 abort_creds(new); 484 return retval; 485 } 486 487 SYSCALL_DEFINE1(setgid, gid_t, gid) 488 { 489 return __sys_setgid(gid); 490 } 491 492 /* 493 * change the user struct in a credentials set to match the new UID 494 */ 495 static int set_user(struct cred *new) 496 { 497 struct user_struct *new_user; 498 499 new_user = alloc_uid(new->uid); 500 if (!new_user) 501 return -EAGAIN; 502 503 free_uid(new->user); 504 new->user = new_user; 505 return 0; 506 } 507 508 static void flag_nproc_exceeded(struct cred *new) 509 { 510 if (new->ucounts == current_ucounts()) 511 return; 512 513 /* 514 * We don't fail in case of NPROC limit excess here because too many 515 * poorly written programs don't check set*uid() return code, assuming 516 * it never fails if called by root. We may still enforce NPROC limit 517 * for programs doing set*uid()+execve() by harmlessly deferring the 518 * failure to the execve() stage. 519 */ 520 if (is_rlimit_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) && 521 new->user != INIT_USER) 522 current->flags |= PF_NPROC_EXCEEDED; 523 else 524 current->flags &= ~PF_NPROC_EXCEEDED; 525 } 526 527 /* 528 * Unprivileged users may change the real uid to the effective uid 529 * or vice versa. (BSD-style) 530 * 531 * If you set the real uid at all, or set the effective uid to a value not 532 * equal to the real uid, then the saved uid is set to the new effective uid. 533 * 534 * This makes it possible for a setuid program to completely drop its 535 * privileges, which is often a useful assertion to make when you are doing 536 * a security audit over a program. 537 * 538 * The general idea is that a program which uses just setreuid() will be 539 * 100% compatible with BSD. A program which uses just setuid() will be 540 * 100% compatible with POSIX with saved IDs. 541 */ 542 long __sys_setreuid(uid_t ruid, uid_t euid) 543 { 544 struct user_namespace *ns = current_user_ns(); 545 const struct cred *old; 546 struct cred *new; 547 int retval; 548 kuid_t kruid, keuid; 549 550 kruid = make_kuid(ns, ruid); 551 keuid = make_kuid(ns, euid); 552 553 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 554 return -EINVAL; 555 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 556 return -EINVAL; 557 558 new = prepare_creds(); 559 if (!new) 560 return -ENOMEM; 561 old = current_cred(); 562 563 retval = -EPERM; 564 if (ruid != (uid_t) -1) { 565 new->uid = kruid; 566 if (!uid_eq(old->uid, kruid) && 567 !uid_eq(old->euid, kruid) && 568 !ns_capable_setid(old->user_ns, CAP_SETUID)) 569 goto error; 570 } 571 572 if (euid != (uid_t) -1) { 573 new->euid = keuid; 574 if (!uid_eq(old->uid, keuid) && 575 !uid_eq(old->euid, keuid) && 576 !uid_eq(old->suid, keuid) && 577 !ns_capable_setid(old->user_ns, CAP_SETUID)) 578 goto error; 579 } 580 581 if (!uid_eq(new->uid, old->uid)) { 582 retval = set_user(new); 583 if (retval < 0) 584 goto error; 585 } 586 if (ruid != (uid_t) -1 || 587 (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) 588 new->suid = new->euid; 589 new->fsuid = new->euid; 590 591 retval = security_task_fix_setuid(new, old, LSM_SETID_RE); 592 if (retval < 0) 593 goto error; 594 595 retval = set_cred_ucounts(new); 596 if (retval < 0) 597 goto error; 598 599 flag_nproc_exceeded(new); 600 return commit_creds(new); 601 602 error: 603 abort_creds(new); 604 return retval; 605 } 606 607 SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) 608 { 609 return __sys_setreuid(ruid, euid); 610 } 611 612 /* 613 * setuid() is implemented like SysV with SAVED_IDS 614 * 615 * Note that SAVED_ID's is deficient in that a setuid root program 616 * like sendmail, for example, cannot set its uid to be a normal 617 * user and then switch back, because if you're root, setuid() sets 618 * the saved uid too. If you don't like this, blame the bright people 619 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 620 * will allow a root program to temporarily drop privileges and be able to 621 * regain them by swapping the real and effective uid. 622 */ 623 long __sys_setuid(uid_t uid) 624 { 625 struct user_namespace *ns = current_user_ns(); 626 const struct cred *old; 627 struct cred *new; 628 int retval; 629 kuid_t kuid; 630 631 kuid = make_kuid(ns, uid); 632 if (!uid_valid(kuid)) 633 return -EINVAL; 634 635 new = prepare_creds(); 636 if (!new) 637 return -ENOMEM; 638 old = current_cred(); 639 640 retval = -EPERM; 641 if (ns_capable_setid(old->user_ns, CAP_SETUID)) { 642 new->suid = new->uid = kuid; 643 if (!uid_eq(kuid, old->uid)) { 644 retval = set_user(new); 645 if (retval < 0) 646 goto error; 647 } 648 } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) { 649 goto error; 650 } 651 652 new->fsuid = new->euid = kuid; 653 654 retval = security_task_fix_setuid(new, old, LSM_SETID_ID); 655 if (retval < 0) 656 goto error; 657 658 retval = set_cred_ucounts(new); 659 if (retval < 0) 660 goto error; 661 662 flag_nproc_exceeded(new); 663 return commit_creds(new); 664 665 error: 666 abort_creds(new); 667 return retval; 668 } 669 670 SYSCALL_DEFINE1(setuid, uid_t, uid) 671 { 672 return __sys_setuid(uid); 673 } 674 675 676 /* 677 * This function implements a generic ability to update ruid, euid, 678 * and suid. This allows you to implement the 4.4 compatible seteuid(). 679 */ 680 long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) 681 { 682 struct user_namespace *ns = current_user_ns(); 683 const struct cred *old; 684 struct cred *new; 685 int retval; 686 kuid_t kruid, keuid, ksuid; 687 bool ruid_new, euid_new, suid_new; 688 689 kruid = make_kuid(ns, ruid); 690 keuid = make_kuid(ns, euid); 691 ksuid = make_kuid(ns, suid); 692 693 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 694 return -EINVAL; 695 696 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 697 return -EINVAL; 698 699 if ((suid != (uid_t) -1) && !uid_valid(ksuid)) 700 return -EINVAL; 701 702 old = current_cred(); 703 704 /* check for no-op */ 705 if ((ruid == (uid_t) -1 || uid_eq(kruid, old->uid)) && 706 (euid == (uid_t) -1 || (uid_eq(keuid, old->euid) && 707 uid_eq(keuid, old->fsuid))) && 708 (suid == (uid_t) -1 || uid_eq(ksuid, old->suid))) 709 return 0; 710 711 ruid_new = ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && 712 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid); 713 euid_new = euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && 714 !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid); 715 suid_new = suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && 716 !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid); 717 if ((ruid_new || euid_new || suid_new) && 718 !ns_capable_setid(old->user_ns, CAP_SETUID)) 719 return -EPERM; 720 721 new = prepare_creds(); 722 if (!new) 723 return -ENOMEM; 724 725 if (ruid != (uid_t) -1) { 726 new->uid = kruid; 727 if (!uid_eq(kruid, old->uid)) { 728 retval = set_user(new); 729 if (retval < 0) 730 goto error; 731 } 732 } 733 if (euid != (uid_t) -1) 734 new->euid = keuid; 735 if (suid != (uid_t) -1) 736 new->suid = ksuid; 737 new->fsuid = new->euid; 738 739 retval = security_task_fix_setuid(new, old, LSM_SETID_RES); 740 if (retval < 0) 741 goto error; 742 743 retval = set_cred_ucounts(new); 744 if (retval < 0) 745 goto error; 746 747 flag_nproc_exceeded(new); 748 return commit_creds(new); 749 750 error: 751 abort_creds(new); 752 return retval; 753 } 754 755 SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) 756 { 757 return __sys_setresuid(ruid, euid, suid); 758 } 759 760 SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) 761 { 762 const struct cred *cred = current_cred(); 763 int retval; 764 uid_t ruid, euid, suid; 765 766 ruid = from_kuid_munged(cred->user_ns, cred->uid); 767 euid = from_kuid_munged(cred->user_ns, cred->euid); 768 suid = from_kuid_munged(cred->user_ns, cred->suid); 769 770 retval = put_user(ruid, ruidp); 771 if (!retval) { 772 retval = put_user(euid, euidp); 773 if (!retval) 774 return put_user(suid, suidp); 775 } 776 return retval; 777 } 778 779 /* 780 * Same as above, but for rgid, egid, sgid. 781 */ 782 long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) 783 { 784 struct user_namespace *ns = current_user_ns(); 785 const struct cred *old; 786 struct cred *new; 787 int retval; 788 kgid_t krgid, kegid, ksgid; 789 bool rgid_new, egid_new, sgid_new; 790 791 krgid = make_kgid(ns, rgid); 792 kegid = make_kgid(ns, egid); 793 ksgid = make_kgid(ns, sgid); 794 795 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 796 return -EINVAL; 797 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 798 return -EINVAL; 799 if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) 800 return -EINVAL; 801 802 old = current_cred(); 803 804 /* check for no-op */ 805 if ((rgid == (gid_t) -1 || gid_eq(krgid, old->gid)) && 806 (egid == (gid_t) -1 || (gid_eq(kegid, old->egid) && 807 gid_eq(kegid, old->fsgid))) && 808 (sgid == (gid_t) -1 || gid_eq(ksgid, old->sgid))) 809 return 0; 810 811 rgid_new = rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && 812 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid); 813 egid_new = egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && 814 !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid); 815 sgid_new = sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && 816 !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid); 817 if ((rgid_new || egid_new || sgid_new) && 818 !ns_capable_setid(old->user_ns, CAP_SETGID)) 819 return -EPERM; 820 821 new = prepare_creds(); 822 if (!new) 823 return -ENOMEM; 824 825 if (rgid != (gid_t) -1) 826 new->gid = krgid; 827 if (egid != (gid_t) -1) 828 new->egid = kegid; 829 if (sgid != (gid_t) -1) 830 new->sgid = ksgid; 831 new->fsgid = new->egid; 832 833 retval = security_task_fix_setgid(new, old, LSM_SETID_RES); 834 if (retval < 0) 835 goto error; 836 837 return commit_creds(new); 838 839 error: 840 abort_creds(new); 841 return retval; 842 } 843 844 SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) 845 { 846 return __sys_setresgid(rgid, egid, sgid); 847 } 848 849 SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) 850 { 851 const struct cred *cred = current_cred(); 852 int retval; 853 gid_t rgid, egid, sgid; 854 855 rgid = from_kgid_munged(cred->user_ns, cred->gid); 856 egid = from_kgid_munged(cred->user_ns, cred->egid); 857 sgid = from_kgid_munged(cred->user_ns, cred->sgid); 858 859 retval = put_user(rgid, rgidp); 860 if (!retval) { 861 retval = put_user(egid, egidp); 862 if (!retval) 863 retval = put_user(sgid, sgidp); 864 } 865 866 return retval; 867 } 868 869 870 /* 871 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 872 * is used for "access()" and for the NFS daemon (letting nfsd stay at 873 * whatever uid it wants to). It normally shadows "euid", except when 874 * explicitly set by setfsuid() or for access.. 875 */ 876 long __sys_setfsuid(uid_t uid) 877 { 878 const struct cred *old; 879 struct cred *new; 880 uid_t old_fsuid; 881 kuid_t kuid; 882 883 old = current_cred(); 884 old_fsuid = from_kuid_munged(old->user_ns, old->fsuid); 885 886 kuid = make_kuid(old->user_ns, uid); 887 if (!uid_valid(kuid)) 888 return old_fsuid; 889 890 new = prepare_creds(); 891 if (!new) 892 return old_fsuid; 893 894 if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || 895 uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || 896 ns_capable_setid(old->user_ns, CAP_SETUID)) { 897 if (!uid_eq(kuid, old->fsuid)) { 898 new->fsuid = kuid; 899 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 900 goto change_okay; 901 } 902 } 903 904 abort_creds(new); 905 return old_fsuid; 906 907 change_okay: 908 commit_creds(new); 909 return old_fsuid; 910 } 911 912 SYSCALL_DEFINE1(setfsuid, uid_t, uid) 913 { 914 return __sys_setfsuid(uid); 915 } 916 917 /* 918 * Samma pÃ¥ svenska.. 919 */ 920 long __sys_setfsgid(gid_t gid) 921 { 922 const struct cred *old; 923 struct cred *new; 924 gid_t old_fsgid; 925 kgid_t kgid; 926 927 old = current_cred(); 928 old_fsgid = from_kgid_munged(old->user_ns, old->fsgid); 929 930 kgid = make_kgid(old->user_ns, gid); 931 if (!gid_valid(kgid)) 932 return old_fsgid; 933 934 new = prepare_creds(); 935 if (!new) 936 return old_fsgid; 937 938 if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || 939 gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || 940 ns_capable_setid(old->user_ns, CAP_SETGID)) { 941 if (!gid_eq(kgid, old->fsgid)) { 942 new->fsgid = kgid; 943 if (security_task_fix_setgid(new,old,LSM_SETID_FS) == 0) 944 goto change_okay; 945 } 946 } 947 948 abort_creds(new); 949 return old_fsgid; 950 951 change_okay: 952 commit_creds(new); 953 return old_fsgid; 954 } 955 956 SYSCALL_DEFINE1(setfsgid, gid_t, gid) 957 { 958 return __sys_setfsgid(gid); 959 } 960 #endif /* CONFIG_MULTIUSER */ 961 962 /** 963 * sys_getpid - return the thread group id of the current process 964 * 965 * Note, despite the name, this returns the tgid not the pid. The tgid and 966 * the pid are identical unless CLONE_THREAD was specified on clone() in 967 * which case the tgid is the same in all threads of the same group. 968 * 969 * This is SMP safe as current->tgid does not change. 970 */ 971 SYSCALL_DEFINE0(getpid) 972 { 973 return task_tgid_vnr(current); 974 } 975 976 /* Thread ID - the internal kernel "pid" */ 977 SYSCALL_DEFINE0(gettid) 978 { 979 return task_pid_vnr(current); 980 } 981 982 /* 983 * Accessing ->real_parent is not SMP-safe, it could 984 * change from under us. However, we can use a stale 985 * value of ->real_parent under rcu_read_lock(), see 986 * release_task()->call_rcu(delayed_put_task_struct). 987 */ 988 SYSCALL_DEFINE0(getppid) 989 { 990 int pid; 991 992 rcu_read_lock(); 993 pid = task_tgid_vnr(rcu_dereference(current->real_parent)); 994 rcu_read_unlock(); 995 996 return pid; 997 } 998 999 SYSCALL_DEFINE0(getuid) 1000 { 1001 /* Only we change this so SMP safe */ 1002 return from_kuid_munged(current_user_ns(), current_uid()); 1003 } 1004 1005 SYSCALL_DEFINE0(geteuid) 1006 { 1007 /* Only we change this so SMP safe */ 1008 return from_kuid_munged(current_user_ns(), current_euid()); 1009 } 1010 1011 SYSCALL_DEFINE0(getgid) 1012 { 1013 /* Only we change this so SMP safe */ 1014 return from_kgid_munged(current_user_ns(), current_gid()); 1015 } 1016 1017 SYSCALL_DEFINE0(getegid) 1018 { 1019 /* Only we change this so SMP safe */ 1020 return from_kgid_munged(current_user_ns(), current_egid()); 1021 } 1022 1023 static void do_sys_times(struct tms *tms) 1024 { 1025 u64 tgutime, tgstime, cutime, cstime; 1026 1027 thread_group_cputime_adjusted(current, &tgutime, &tgstime); 1028 cutime = current->signal->cutime; 1029 cstime = current->signal->cstime; 1030 tms->tms_utime = nsec_to_clock_t(tgutime); 1031 tms->tms_stime = nsec_to_clock_t(tgstime); 1032 tms->tms_cutime = nsec_to_clock_t(cutime); 1033 tms->tms_cstime = nsec_to_clock_t(cstime); 1034 } 1035 1036 SYSCALL_DEFINE1(times, struct tms __user *, tbuf) 1037 { 1038 if (tbuf) { 1039 struct tms tmp; 1040 1041 do_sys_times(&tmp); 1042 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 1043 return -EFAULT; 1044 } 1045 force_successful_syscall_return(); 1046 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 1047 } 1048 1049 #ifdef CONFIG_COMPAT 1050 static compat_clock_t clock_t_to_compat_clock_t(clock_t x) 1051 { 1052 return compat_jiffies_to_clock_t(clock_t_to_jiffies(x)); 1053 } 1054 1055 COMPAT_SYSCALL_DEFINE1(times, struct compat_tms __user *, tbuf) 1056 { 1057 if (tbuf) { 1058 struct tms tms; 1059 struct compat_tms tmp; 1060 1061 do_sys_times(&tms); 1062 /* Convert our struct tms to the compat version. */ 1063 tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime); 1064 tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime); 1065 tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime); 1066 tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime); 1067 if (copy_to_user(tbuf, &tmp, sizeof(tmp))) 1068 return -EFAULT; 1069 } 1070 force_successful_syscall_return(); 1071 return compat_jiffies_to_clock_t(jiffies); 1072 } 1073 #endif 1074 1075 /* 1076 * This needs some heavy checking ... 1077 * I just haven't the stomach for it. I also don't fully 1078 * understand sessions/pgrp etc. Let somebody who does explain it. 1079 * 1080 * OK, I think I have the protection semantics right.... this is really 1081 * only important on a multi-user system anyway, to make sure one user 1082 * can't send a signal to a process owned by another. -TYT, 12/12/91 1083 * 1084 * !PF_FORKNOEXEC check to conform completely to POSIX. 1085 */ 1086 SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) 1087 { 1088 struct task_struct *p; 1089 struct task_struct *group_leader = current->group_leader; 1090 struct pid *pgrp; 1091 int err; 1092 1093 if (!pid) 1094 pid = task_pid_vnr(group_leader); 1095 if (!pgid) 1096 pgid = pid; 1097 if (pgid < 0) 1098 return -EINVAL; 1099 rcu_read_lock(); 1100 1101 /* From this point forward we keep holding onto the tasklist lock 1102 * so that our parent does not change from under us. -DaveM 1103 */ 1104 write_lock_irq(&tasklist_lock); 1105 1106 err = -ESRCH; 1107 p = find_task_by_vpid(pid); 1108 if (!p) 1109 goto out; 1110 1111 err = -EINVAL; 1112 if (!thread_group_leader(p)) 1113 goto out; 1114 1115 if (same_thread_group(p->real_parent, group_leader)) { 1116 err = -EPERM; 1117 if (task_session(p) != task_session(group_leader)) 1118 goto out; 1119 err = -EACCES; 1120 if (!(p->flags & PF_FORKNOEXEC)) 1121 goto out; 1122 } else { 1123 err = -ESRCH; 1124 if (p != group_leader) 1125 goto out; 1126 } 1127 1128 err = -EPERM; 1129 if (p->signal->leader) 1130 goto out; 1131 1132 pgrp = task_pid(p); 1133 if (pgid != pid) { 1134 struct task_struct *g; 1135 1136 pgrp = find_vpid(pgid); 1137 g = pid_task(pgrp, PIDTYPE_PGID); 1138 if (!g || task_session(g) != task_session(group_leader)) 1139 goto out; 1140 } 1141 1142 err = security_task_setpgid(p, pgid); 1143 if (err) 1144 goto out; 1145 1146 if (task_pgrp(p) != pgrp) 1147 change_pid(p, PIDTYPE_PGID, pgrp); 1148 1149 err = 0; 1150 out: 1151 /* All paths lead to here, thus we are safe. -DaveM */ 1152 write_unlock_irq(&tasklist_lock); 1153 rcu_read_unlock(); 1154 return err; 1155 } 1156 1157 static int do_getpgid(pid_t pid) 1158 { 1159 struct task_struct *p; 1160 struct pid *grp; 1161 int retval; 1162 1163 rcu_read_lock(); 1164 if (!pid) 1165 grp = task_pgrp(current); 1166 else { 1167 retval = -ESRCH; 1168 p = find_task_by_vpid(pid); 1169 if (!p) 1170 goto out; 1171 grp = task_pgrp(p); 1172 if (!grp) 1173 goto out; 1174 1175 retval = security_task_getpgid(p); 1176 if (retval) 1177 goto out; 1178 } 1179 retval = pid_vnr(grp); 1180 out: 1181 rcu_read_unlock(); 1182 return retval; 1183 } 1184 1185 SYSCALL_DEFINE1(getpgid, pid_t, pid) 1186 { 1187 return do_getpgid(pid); 1188 } 1189 1190 #ifdef __ARCH_WANT_SYS_GETPGRP 1191 1192 SYSCALL_DEFINE0(getpgrp) 1193 { 1194 return do_getpgid(0); 1195 } 1196 1197 #endif 1198 1199 SYSCALL_DEFINE1(getsid, pid_t, pid) 1200 { 1201 struct task_struct *p; 1202 struct pid *sid; 1203 int retval; 1204 1205 rcu_read_lock(); 1206 if (!pid) 1207 sid = task_session(current); 1208 else { 1209 retval = -ESRCH; 1210 p = find_task_by_vpid(pid); 1211 if (!p) 1212 goto out; 1213 sid = task_session(p); 1214 if (!sid) 1215 goto out; 1216 1217 retval = security_task_getsid(p); 1218 if (retval) 1219 goto out; 1220 } 1221 retval = pid_vnr(sid); 1222 out: 1223 rcu_read_unlock(); 1224 return retval; 1225 } 1226 1227 static void set_special_pids(struct pid *pid) 1228 { 1229 struct task_struct *curr = current->group_leader; 1230 1231 if (task_session(curr) != pid) 1232 change_pid(curr, PIDTYPE_SID, pid); 1233 1234 if (task_pgrp(curr) != pid) 1235 change_pid(curr, PIDTYPE_PGID, pid); 1236 } 1237 1238 int ksys_setsid(void) 1239 { 1240 struct task_struct *group_leader = current->group_leader; 1241 struct pid *sid = task_pid(group_leader); 1242 pid_t session = pid_vnr(sid); 1243 int err = -EPERM; 1244 1245 write_lock_irq(&tasklist_lock); 1246 /* Fail if I am already a session leader */ 1247 if (group_leader->signal->leader) 1248 goto out; 1249 1250 /* Fail if a process group id already exists that equals the 1251 * proposed session id. 1252 */ 1253 if (pid_task(sid, PIDTYPE_PGID)) 1254 goto out; 1255 1256 group_leader->signal->leader = 1; 1257 set_special_pids(sid); 1258 1259 proc_clear_tty(group_leader); 1260 1261 err = session; 1262 out: 1263 write_unlock_irq(&tasklist_lock); 1264 if (err > 0) { 1265 proc_sid_connector(group_leader); 1266 sched_autogroup_create_attach(group_leader); 1267 } 1268 return err; 1269 } 1270 1271 SYSCALL_DEFINE0(setsid) 1272 { 1273 return ksys_setsid(); 1274 } 1275 1276 DECLARE_RWSEM(uts_sem); 1277 1278 #ifdef COMPAT_UTS_MACHINE 1279 #define override_architecture(name) \ 1280 (personality(current->personality) == PER_LINUX32 && \ 1281 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ 1282 sizeof(COMPAT_UTS_MACHINE))) 1283 #else 1284 #define override_architecture(name) 0 1285 #endif 1286 1287 /* 1288 * Work around broken programs that cannot handle "Linux 3.0". 1289 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 1290 * And we map 4.x and later versions to 2.6.60+x, so 4.0/5.0/6.0/... would be 1291 * 2.6.60. 1292 */ 1293 static int override_release(char __user *release, size_t len) 1294 { 1295 int ret = 0; 1296 1297 if (current->personality & UNAME26) { 1298 const char *rest = UTS_RELEASE; 1299 char buf[65] = { 0 }; 1300 int ndots = 0; 1301 unsigned v; 1302 size_t copy; 1303 1304 while (*rest) { 1305 if (*rest == '.' && ++ndots >= 3) 1306 break; 1307 if (!isdigit(*rest) && *rest != '.') 1308 break; 1309 rest++; 1310 } 1311 v = LINUX_VERSION_PATCHLEVEL + 60; 1312 copy = clamp_t(size_t, len, 1, sizeof(buf)); 1313 copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); 1314 ret = copy_to_user(release, buf, copy + 1); 1315 } 1316 return ret; 1317 } 1318 1319 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) 1320 { 1321 struct new_utsname tmp; 1322 1323 down_read(&uts_sem); 1324 memcpy(&tmp, utsname(), sizeof(tmp)); 1325 up_read(&uts_sem); 1326 if (copy_to_user(name, &tmp, sizeof(tmp))) 1327 return -EFAULT; 1328 1329 if (override_release(name->release, sizeof(name->release))) 1330 return -EFAULT; 1331 if (override_architecture(name)) 1332 return -EFAULT; 1333 return 0; 1334 } 1335 1336 #ifdef __ARCH_WANT_SYS_OLD_UNAME 1337 /* 1338 * Old cruft 1339 */ 1340 SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) 1341 { 1342 struct old_utsname tmp; 1343 1344 if (!name) 1345 return -EFAULT; 1346 1347 down_read(&uts_sem); 1348 memcpy(&tmp, utsname(), sizeof(tmp)); 1349 up_read(&uts_sem); 1350 if (copy_to_user(name, &tmp, sizeof(tmp))) 1351 return -EFAULT; 1352 1353 if (override_release(name->release, sizeof(name->release))) 1354 return -EFAULT; 1355 if (override_architecture(name)) 1356 return -EFAULT; 1357 return 0; 1358 } 1359 1360 SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) 1361 { 1362 struct oldold_utsname tmp; 1363 1364 if (!name) 1365 return -EFAULT; 1366 1367 memset(&tmp, 0, sizeof(tmp)); 1368 1369 down_read(&uts_sem); 1370 memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN); 1371 memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN); 1372 memcpy(&tmp.release, &utsname()->release, __OLD_UTS_LEN); 1373 memcpy(&tmp.version, &utsname()->version, __OLD_UTS_LEN); 1374 memcpy(&tmp.machine, &utsname()->machine, __OLD_UTS_LEN); 1375 up_read(&uts_sem); 1376 if (copy_to_user(name, &tmp, sizeof(tmp))) 1377 return -EFAULT; 1378 1379 if (override_architecture(name)) 1380 return -EFAULT; 1381 if (override_release(name->release, sizeof(name->release))) 1382 return -EFAULT; 1383 return 0; 1384 } 1385 #endif 1386 1387 SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) 1388 { 1389 int errno; 1390 char tmp[__NEW_UTS_LEN]; 1391 1392 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1393 return -EPERM; 1394 1395 if (len < 0 || len > __NEW_UTS_LEN) 1396 return -EINVAL; 1397 if (!ccs_capable(CCS_SYS_SETHOSTNAME)) 1398 return -EPERM; 1399 errno = -EFAULT; 1400 if (!copy_from_user(tmp, name, len)) { 1401 struct new_utsname *u; 1402 1403 add_device_randomness(tmp, len); 1404 down_write(&uts_sem); 1405 u = utsname(); 1406 memcpy(u->nodename, tmp, len); 1407 memset(u->nodename + len, 0, sizeof(u->nodename) - len); 1408 errno = 0; 1409 uts_proc_notify(UTS_PROC_HOSTNAME); 1410 up_write(&uts_sem); 1411 } 1412 return errno; 1413 } 1414 1415 #ifdef __ARCH_WANT_SYS_GETHOSTNAME 1416 1417 SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) 1418 { 1419 int i; 1420 struct new_utsname *u; 1421 char tmp[__NEW_UTS_LEN + 1]; 1422 1423 if (len < 0) 1424 return -EINVAL; 1425 down_read(&uts_sem); 1426 u = utsname(); 1427 i = 1 + strlen(u->nodename); 1428 if (i > len) 1429 i = len; 1430 memcpy(tmp, u->nodename, i); 1431 up_read(&uts_sem); 1432 if (copy_to_user(name, tmp, i)) 1433 return -EFAULT; 1434 return 0; 1435 } 1436 1437 #endif 1438 1439 /* 1440 * Only setdomainname; getdomainname can be implemented by calling 1441 * uname() 1442 */ 1443 SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) 1444 { 1445 int errno; 1446 char tmp[__NEW_UTS_LEN]; 1447 1448 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1449 return -EPERM; 1450 if (len < 0 || len > __NEW_UTS_LEN) 1451 return -EINVAL; 1452 if (!ccs_capable(CCS_SYS_SETHOSTNAME)) 1453 return -EPERM; 1454 1455 errno = -EFAULT; 1456 if (!copy_from_user(tmp, name, len)) { 1457 struct new_utsname *u; 1458 1459 add_device_randomness(tmp, len); 1460 down_write(&uts_sem); 1461 u = utsname(); 1462 memcpy(u->domainname, tmp, len); 1463 memset(u->domainname + len, 0, sizeof(u->domainname) - len); 1464 errno = 0; 1465 uts_proc_notify(UTS_PROC_DOMAINNAME); 1466 up_write(&uts_sem); 1467 } 1468 return errno; 1469 } 1470 1471 /* make sure you are allowed to change @tsk limits before calling this */ 1472 static int do_prlimit(struct task_struct *tsk, unsigned int resource, 1473 struct rlimit *new_rlim, struct rlimit *old_rlim) 1474 { 1475 struct rlimit *rlim; 1476 int retval = 0; 1477 1478 if (resource >= RLIM_NLIMITS) 1479 return -EINVAL; 1480 resource = array_index_nospec(resource, RLIM_NLIMITS); 1481 1482 if (new_rlim) { 1483 if (new_rlim->rlim_cur > new_rlim->rlim_max) 1484 return -EINVAL; 1485 if (resource == RLIMIT_NOFILE && 1486 new_rlim->rlim_max > sysctl_nr_open) 1487 return -EPERM; 1488 } 1489 1490 /* Holding a refcount on tsk protects tsk->signal from disappearing. */ 1491 rlim = tsk->signal->rlim + resource; 1492 task_lock(tsk->group_leader); 1493 if (new_rlim) { 1494 /* 1495 * Keep the capable check against init_user_ns until cgroups can 1496 * contain all limits. 1497 */ 1498 if (new_rlim->rlim_max > rlim->rlim_max && 1499 !capable(CAP_SYS_RESOURCE)) 1500 retval = -EPERM; 1501 if (!retval) 1502 retval = security_task_setrlimit(tsk, resource, new_rlim); 1503 } 1504 if (!retval) { 1505 if (old_rlim) 1506 *old_rlim = *rlim; 1507 if (new_rlim) 1508 *rlim = *new_rlim; 1509 } 1510 task_unlock(tsk->group_leader); 1511 1512 /* 1513 * RLIMIT_CPU handling. Arm the posix CPU timer if the limit is not 1514 * infinite. In case of RLIM_INFINITY the posix CPU timer code 1515 * ignores the rlimit. 1516 */ 1517 if (!retval && new_rlim && resource == RLIMIT_CPU && 1518 new_rlim->rlim_cur != RLIM_INFINITY && 1519 IS_ENABLED(CONFIG_POSIX_TIMERS)) { 1520 /* 1521 * update_rlimit_cpu can fail if the task is exiting, but there 1522 * may be other tasks in the thread group that are not exiting, 1523 * and they need their cpu timers adjusted. 1524 * 1525 * The group_leader is the last task to be released, so if we 1526 * cannot update_rlimit_cpu on it, then the entire process is 1527 * exiting and we do not need to update at all. 1528 */ 1529 update_rlimit_cpu(tsk->group_leader, new_rlim->rlim_cur); 1530 } 1531 1532 return retval; 1533 } 1534 1535 SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1536 { 1537 struct rlimit value; 1538 int ret; 1539 1540 ret = do_prlimit(current, resource, NULL, &value); 1541 if (!ret) 1542 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1543 1544 return ret; 1545 } 1546 1547 #ifdef CONFIG_COMPAT 1548 1549 COMPAT_SYSCALL_DEFINE2(setrlimit, unsigned int, resource, 1550 struct compat_rlimit __user *, rlim) 1551 { 1552 struct rlimit r; 1553 struct compat_rlimit r32; 1554 1555 if (copy_from_user(&r32, rlim, sizeof(struct compat_rlimit))) 1556 return -EFAULT; 1557 1558 if (r32.rlim_cur == COMPAT_RLIM_INFINITY) 1559 r.rlim_cur = RLIM_INFINITY; 1560 else 1561 r.rlim_cur = r32.rlim_cur; 1562 if (r32.rlim_max == COMPAT_RLIM_INFINITY) 1563 r.rlim_max = RLIM_INFINITY; 1564 else 1565 r.rlim_max = r32.rlim_max; 1566 return do_prlimit(current, resource, &r, NULL); 1567 } 1568 1569 COMPAT_SYSCALL_DEFINE2(getrlimit, unsigned int, resource, 1570 struct compat_rlimit __user *, rlim) 1571 { 1572 struct rlimit r; 1573 int ret; 1574 1575 ret = do_prlimit(current, resource, NULL, &r); 1576 if (!ret) { 1577 struct compat_rlimit r32; 1578 if (r.rlim_cur > COMPAT_RLIM_INFINITY) 1579 r32.rlim_cur = COMPAT_RLIM_INFINITY; 1580 else 1581 r32.rlim_cur = r.rlim_cur; 1582 if (r.rlim_max > COMPAT_RLIM_INFINITY) 1583 r32.rlim_max = COMPAT_RLIM_INFINITY; 1584 else 1585 r32.rlim_max = r.rlim_max; 1586 1587 if (copy_to_user(rlim, &r32, sizeof(struct compat_rlimit))) 1588 return -EFAULT; 1589 } 1590 return ret; 1591 } 1592 1593 #endif 1594 1595 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1596 1597 /* 1598 * Back compatibility for getrlimit. Needed for some apps. 1599 */ 1600 SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, 1601 struct rlimit __user *, rlim) 1602 { 1603 struct rlimit x; 1604 if (resource >= RLIM_NLIMITS) 1605 return -EINVAL; 1606 1607 resource = array_index_nospec(resource, RLIM_NLIMITS); 1608 task_lock(current->group_leader); 1609 x = current->signal->rlim[resource]; 1610 task_unlock(current->group_leader); 1611 if (x.rlim_cur > 0x7FFFFFFF) 1612 x.rlim_cur = 0x7FFFFFFF; 1613 if (x.rlim_max > 0x7FFFFFFF) 1614 x.rlim_max = 0x7FFFFFFF; 1615 return copy_to_user(rlim, &x, sizeof(x)) ? -EFAULT : 0; 1616 } 1617 1618 #ifdef CONFIG_COMPAT 1619 COMPAT_SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, 1620 struct compat_rlimit __user *, rlim) 1621 { 1622 struct rlimit r; 1623 1624 if (resource >= RLIM_NLIMITS) 1625 return -EINVAL; 1626 1627 resource = array_index_nospec(resource, RLIM_NLIMITS); 1628 task_lock(current->group_leader); 1629 r = current->signal->rlim[resource]; 1630 task_unlock(current->group_leader); 1631 if (r.rlim_cur > 0x7FFFFFFF) 1632 r.rlim_cur = 0x7FFFFFFF; 1633 if (r.rlim_max > 0x7FFFFFFF) 1634 r.rlim_max = 0x7FFFFFFF; 1635 1636 if (put_user(r.rlim_cur, &rlim->rlim_cur) || 1637 put_user(r.rlim_max, &rlim->rlim_max)) 1638 return -EFAULT; 1639 return 0; 1640 } 1641 #endif 1642 1643 #endif 1644 1645 static inline bool rlim64_is_infinity(__u64 rlim64) 1646 { 1647 #if BITS_PER_LONG < 64 1648 return rlim64 >= ULONG_MAX; 1649 #else 1650 return rlim64 == RLIM64_INFINITY; 1651 #endif 1652 } 1653 1654 static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) 1655 { 1656 if (rlim->rlim_cur == RLIM_INFINITY) 1657 rlim64->rlim_cur = RLIM64_INFINITY; 1658 else 1659 rlim64->rlim_cur = rlim->rlim_cur; 1660 if (rlim->rlim_max == RLIM_INFINITY) 1661 rlim64->rlim_max = RLIM64_INFINITY; 1662 else 1663 rlim64->rlim_max = rlim->rlim_max; 1664 } 1665 1666 static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) 1667 { 1668 if (rlim64_is_infinity(rlim64->rlim_cur)) 1669 rlim->rlim_cur = RLIM_INFINITY; 1670 else 1671 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; 1672 if (rlim64_is_infinity(rlim64->rlim_max)) 1673 rlim->rlim_max = RLIM_INFINITY; 1674 else 1675 rlim->rlim_max = (unsigned long)rlim64->rlim_max; 1676 } 1677 1678 /* rcu lock must be held */ 1679 static int check_prlimit_permission(struct task_struct *task, 1680 unsigned int flags) 1681 { 1682 const struct cred *cred = current_cred(), *tcred; 1683 bool id_match; 1684 1685 if (current == task) 1686 return 0; 1687 1688 tcred = __task_cred(task); 1689 id_match = (uid_eq(cred->uid, tcred->euid) && 1690 uid_eq(cred->uid, tcred->suid) && 1691 uid_eq(cred->uid, tcred->uid) && 1692 gid_eq(cred->gid, tcred->egid) && 1693 gid_eq(cred->gid, tcred->sgid) && 1694 gid_eq(cred->gid, tcred->gid)); 1695 if (!id_match && !ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) 1696 return -EPERM; 1697 1698 return security_task_prlimit(cred, tcred, flags); 1699 } 1700 1701 SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, 1702 const struct rlimit64 __user *, new_rlim, 1703 struct rlimit64 __user *, old_rlim) 1704 { 1705 struct rlimit64 old64, new64; 1706 struct rlimit old, new; 1707 struct task_struct *tsk; 1708 unsigned int checkflags = 0; 1709 int ret; 1710 1711 if (old_rlim) 1712 checkflags |= LSM_PRLIMIT_READ; 1713 1714 if (new_rlim) { 1715 if (copy_from_user(&new64, new_rlim, sizeof(new64))) 1716 return -EFAULT; 1717 rlim64_to_rlim(&new64, &new); 1718 checkflags |= LSM_PRLIMIT_WRITE; 1719 } 1720 1721 rcu_read_lock(); 1722 tsk = pid ? find_task_by_vpid(pid) : current; 1723 if (!tsk) { 1724 rcu_read_unlock(); 1725 return -ESRCH; 1726 } 1727 ret = check_prlimit_permission(tsk, checkflags); 1728 if (ret) { 1729 rcu_read_unlock(); 1730 return ret; 1731 } 1732 get_task_struct(tsk); 1733 rcu_read_unlock(); 1734 1735 ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, 1736 old_rlim ? &old : NULL); 1737 1738 if (!ret && old_rlim) { 1739 rlim_to_rlim64(&old, &old64); 1740 if (copy_to_user(old_rlim, &old64, sizeof(old64))) 1741 ret = -EFAULT; 1742 } 1743 1744 put_task_struct(tsk); 1745 return ret; 1746 } 1747 1748 SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1749 { 1750 struct rlimit new_rlim; 1751 1752 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1753 return -EFAULT; 1754 return do_prlimit(current, resource, &new_rlim, NULL); 1755 } 1756 1757 /* 1758 * It would make sense to put struct rusage in the task_struct, 1759 * except that would make the task_struct be *really big*. After 1760 * task_struct gets moved into malloc'ed memory, it would 1761 * make sense to do this. It will make moving the rest of the information 1762 * a lot simpler! (Which we're not doing right now because we're not 1763 * measuring them yet). 1764 * 1765 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1766 * races with threads incrementing their own counters. But since word 1767 * reads are atomic, we either get new values or old values and we don't 1768 * care which for the sums. We always take the siglock to protect reading 1769 * the c* fields from p->signal from races with exit.c updating those 1770 * fields when reaping, so a sample either gets all the additions of a 1771 * given child after it's reaped, or none so this sample is before reaping. 1772 * 1773 * Locking: 1774 * We need to take the siglock for CHILDEREN, SELF and BOTH 1775 * for the cases current multithreaded, non-current single threaded 1776 * non-current multithreaded. Thread traversal is now safe with 1777 * the siglock held. 1778 * Strictly speaking, we donot need to take the siglock if we are current and 1779 * single threaded, as no one else can take our signal_struct away, no one 1780 * else can reap the children to update signal->c* counters, and no one else 1781 * can race with the signal-> fields. If we do not take any lock, the 1782 * signal-> fields could be read out of order while another thread was just 1783 * exiting. So we should place a read memory barrier when we avoid the lock. 1784 * On the writer side, write memory barrier is implied in __exit_signal 1785 * as __exit_signal releases the siglock spinlock after updating the signal-> 1786 * fields. But we don't do this yet to keep things simple. 1787 * 1788 */ 1789 1790 static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) 1791 { 1792 r->ru_nvcsw += t->nvcsw; 1793 r->ru_nivcsw += t->nivcsw; 1794 r->ru_minflt += t->min_flt; 1795 r->ru_majflt += t->maj_flt; 1796 r->ru_inblock += task_io_get_inblock(t); 1797 r->ru_oublock += task_io_get_oublock(t); 1798 } 1799 1800 void getrusage(struct task_struct *p, int who, struct rusage *r) 1801 { 1802 struct task_struct *t; 1803 unsigned long flags; 1804 u64 tgutime, tgstime, utime, stime; 1805 unsigned long maxrss; 1806 struct mm_struct *mm; 1807 struct signal_struct *sig = p->signal; 1808 unsigned int seq = 0; 1809 1810 retry: 1811 memset(r, 0, sizeof(*r)); 1812 utime = stime = 0; 1813 maxrss = 0; 1814 1815 if (who == RUSAGE_THREAD) { 1816 task_cputime_adjusted(current, &utime, &stime); 1817 accumulate_thread_rusage(p, r); 1818 maxrss = sig->maxrss; 1819 goto out_thread; 1820 } 1821 1822 flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); 1823 1824 switch (who) { 1825 case RUSAGE_BOTH: 1826 case RUSAGE_CHILDREN: 1827 utime = sig->cutime; 1828 stime = sig->cstime; 1829 r->ru_nvcsw = sig->cnvcsw; 1830 r->ru_nivcsw = sig->cnivcsw; 1831 r->ru_minflt = sig->cmin_flt; 1832 r->ru_majflt = sig->cmaj_flt; 1833 r->ru_inblock = sig->cinblock; 1834 r->ru_oublock = sig->coublock; 1835 maxrss = sig->cmaxrss; 1836 1837 if (who == RUSAGE_CHILDREN) 1838 break; 1839 fallthrough; 1840 1841 case RUSAGE_SELF: 1842 r->ru_nvcsw += sig->nvcsw; 1843 r->ru_nivcsw += sig->nivcsw; 1844 r->ru_minflt += sig->min_flt; 1845 r->ru_majflt += sig->maj_flt; 1846 r->ru_inblock += sig->inblock; 1847 r->ru_oublock += sig->oublock; 1848 if (maxrss < sig->maxrss) 1849 maxrss = sig->maxrss; 1850 1851 rcu_read_lock(); 1852 __for_each_thread(sig, t) 1853 accumulate_thread_rusage(t, r); 1854 rcu_read_unlock(); 1855 1856 break; 1857 1858 default: 1859 BUG(); 1860 } 1861 1862 if (need_seqretry(&sig->stats_lock, seq)) { 1863 seq = 1; 1864 goto retry; 1865 } 1866 done_seqretry_irqrestore(&sig->stats_lock, seq, flags); 1867 1868 if (who == RUSAGE_CHILDREN) 1869 goto out_children; 1870 1871 thread_group_cputime_adjusted(p, &tgutime, &tgstime); 1872 utime += tgutime; 1873 stime += tgstime; 1874 1875 out_thread: 1876 mm = get_task_mm(p); 1877 if (mm) { 1878 setmax_mm_hiwater_rss(&maxrss, mm); 1879 mmput(mm); 1880 } 1881 1882 out_children: 1883 r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ 1884 r->ru_utime = ns_to_kernel_old_timeval(utime); 1885 r->ru_stime = ns_to_kernel_old_timeval(stime); 1886 } 1887 1888 SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) 1889 { 1890 struct rusage r; 1891 1892 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1893 who != RUSAGE_THREAD) 1894 return -EINVAL; 1895 1896 getrusage(current, who, &r); 1897 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1898 } 1899 1900 #ifdef CONFIG_COMPAT 1901 COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru) 1902 { 1903 struct rusage r; 1904 1905 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1906 who != RUSAGE_THREAD) 1907 return -EINVAL; 1908 1909 getrusage(current, who, &r); 1910 return put_compat_rusage(&r, ru); 1911 } 1912 #endif 1913 1914 SYSCALL_DEFINE1(umask, int, mask) 1915 { 1916 mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); 1917 return mask; 1918 } 1919 1920 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1921 { 1922 struct fd exe; 1923 struct inode *inode; 1924 int err; 1925 1926 exe = fdget(fd); 1927 if (!exe.file) 1928 return -EBADF; 1929 1930 inode = file_inode(exe.file); 1931 1932 /* 1933 * Because the original mm->exe_file points to executable file, make 1934 * sure that this one is executable as well, to avoid breaking an 1935 * overall picture. 1936 */ 1937 err = -EACCES; 1938 if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path)) 1939 goto exit; 1940 1941 err = file_permission(exe.file, MAY_EXEC); 1942 if (err) 1943 goto exit; 1944 1945 err = replace_mm_exe_file(mm, exe.file); 1946 exit: 1947 fdput(exe); 1948 return err; 1949 } 1950 1951 /* 1952 * Check arithmetic relations of passed addresses. 1953 * 1954 * WARNING: we don't require any capability here so be very careful 1955 * in what is allowed for modification from userspace. 1956 */ 1957 static int validate_prctl_map_addr(struct prctl_mm_map *prctl_map) 1958 { 1959 unsigned long mmap_max_addr = TASK_SIZE; 1960 int error = -EINVAL, i; 1961 1962 static const unsigned char offsets[] = { 1963 offsetof(struct prctl_mm_map, start_code), 1964 offsetof(struct prctl_mm_map, end_code), 1965 offsetof(struct prctl_mm_map, start_data), 1966 offsetof(struct prctl_mm_map, end_data), 1967 offsetof(struct prctl_mm_map, start_brk), 1968 offsetof(struct prctl_mm_map, brk), 1969 offsetof(struct prctl_mm_map, start_stack), 1970 offsetof(struct prctl_mm_map, arg_start), 1971 offsetof(struct prctl_mm_map, arg_end), 1972 offsetof(struct prctl_mm_map, env_start), 1973 offsetof(struct prctl_mm_map, env_end), 1974 }; 1975 1976 /* 1977 * Make sure the members are not somewhere outside 1978 * of allowed address space. 1979 */ 1980 for (i = 0; i < ARRAY_SIZE(offsets); i++) { 1981 u64 val = *(u64 *)((char *)prctl_map + offsets[i]); 1982 1983 if ((unsigned long)val >= mmap_max_addr || 1984 (unsigned long)val < mmap_min_addr) 1985 goto out; 1986 } 1987 1988 /* 1989 * Make sure the pairs are ordered. 1990 */ 1991 #define __prctl_check_order(__m1, __op, __m2) \ 1992 ((unsigned long)prctl_map->__m1 __op \ 1993 (unsigned long)prctl_map->__m2) ? 0 : -EINVAL 1994 error = __prctl_check_order(start_code, <, end_code); 1995 error |= __prctl_check_order(start_data,<=, end_data); 1996 error |= __prctl_check_order(start_brk, <=, brk); 1997 error |= __prctl_check_order(arg_start, <=, arg_end); 1998 error |= __prctl_check_order(env_start, <=, env_end); 1999 if (error) 2000 goto out; 2001 #undef __prctl_check_order 2002 2003 error = -EINVAL; 2004 2005 /* 2006 * Neither we should allow to override limits if they set. 2007 */ 2008 if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk, 2009 prctl_map->start_brk, prctl_map->end_data, 2010 prctl_map->start_data)) 2011 goto out; 2012 2013 error = 0; 2014 out: 2015 return error; 2016 } 2017 2018 #ifdef CONFIG_CHECKPOINT_RESTORE 2019 static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size) 2020 { 2021 struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, }; 2022 unsigned long user_auxv[AT_VECTOR_SIZE]; 2023 struct mm_struct *mm = current->mm; 2024 int error; 2025 2026 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); 2027 BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256); 2028 2029 if (opt == PR_SET_MM_MAP_SIZE) 2030 return put_user((unsigned int)sizeof(prctl_map), 2031 (unsigned int __user *)addr); 2032 2033 if (data_size != sizeof(prctl_map)) 2034 return -EINVAL; 2035 2036 if (copy_from_user(&prctl_map, addr, sizeof(prctl_map))) 2037 return -EFAULT; 2038 2039 error = validate_prctl_map_addr(&prctl_map); 2040 if (error) 2041 return error; 2042 2043 if (prctl_map.auxv_size) { 2044 /* 2045 * Someone is trying to cheat the auxv vector. 2046 */ 2047 if (!prctl_map.auxv || 2048 prctl_map.auxv_size > sizeof(mm->saved_auxv)) 2049 return -EINVAL; 2050 2051 memset(user_auxv, 0, sizeof(user_auxv)); 2052 if (copy_from_user(user_auxv, 2053 (const void __user *)prctl_map.auxv, 2054 prctl_map.auxv_size)) 2055 return -EFAULT; 2056 2057 /* Last entry must be AT_NULL as specification requires */ 2058 user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL; 2059 user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; 2060 } 2061 2062 if (prctl_map.exe_fd != (u32)-1) { 2063 /* 2064 * Check if the current user is checkpoint/restore capable. 2065 * At the time of this writing, it checks for CAP_SYS_ADMIN 2066 * or CAP_CHECKPOINT_RESTORE. 2067 * Note that a user with access to ptrace can masquerade an 2068 * arbitrary program as any executable, even setuid ones. 2069 * This may have implications in the tomoyo subsystem. 2070 */ 2071 if (!checkpoint_restore_ns_capable(current_user_ns())) 2072 return -EPERM; 2073 2074 error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd); 2075 if (error) 2076 return error; 2077 } 2078 2079 /* 2080 * arg_lock protects concurrent updates but we still need mmap_lock for 2081 * read to exclude races with sys_brk. 2082 */ 2083 mmap_read_lock(mm); 2084 2085 /* 2086 * We don't validate if these members are pointing to 2087 * real present VMAs because application may have correspond 2088 * VMAs already unmapped and kernel uses these members for statistics 2089 * output in procfs mostly, except 2090 * 2091 * - @start_brk/@brk which are used in do_brk_flags but kernel lookups 2092 * for VMAs when updating these members so anything wrong written 2093 * here cause kernel to swear at userspace program but won't lead 2094 * to any problem in kernel itself 2095 */ 2096 2097 spin_lock(&mm->arg_lock); 2098 mm->start_code = prctl_map.start_code; 2099 mm->end_code = prctl_map.end_code; 2100 mm->start_data = prctl_map.start_data; 2101 mm->end_data = prctl_map.end_data; 2102 mm->start_brk = prctl_map.start_brk; 2103 mm->brk = prctl_map.brk; 2104 mm->start_stack = prctl_map.start_stack; 2105 mm->arg_start = prctl_map.arg_start; 2106 mm->arg_end = prctl_map.arg_end; 2107 mm->env_start = prctl_map.env_start; 2108 mm->env_end = prctl_map.env_end; 2109 spin_unlock(&mm->arg_lock); 2110 2111 /* 2112 * Note this update of @saved_auxv is lockless thus 2113 * if someone reads this member in procfs while we're 2114 * updating -- it may get partly updated results. It's 2115 * known and acceptable trade off: we leave it as is to 2116 * not introduce additional locks here making the kernel 2117 * more complex. 2118 */ 2119 if (prctl_map.auxv_size) 2120 memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); 2121 2122 mmap_read_unlock(mm); 2123 return 0; 2124 } 2125 #endif /* CONFIG_CHECKPOINT_RESTORE */ 2126 2127 static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr, 2128 unsigned long len) 2129 { 2130 /* 2131 * This doesn't move the auxiliary vector itself since it's pinned to 2132 * mm_struct, but it permits filling the vector with new values. It's 2133 * up to the caller to provide sane values here, otherwise userspace 2134 * tools which use this vector might be unhappy. 2135 */ 2136 unsigned long user_auxv[AT_VECTOR_SIZE] = {}; 2137 2138 if (len > sizeof(user_auxv)) 2139 return -EINVAL; 2140 2141 if (copy_from_user(user_auxv, (const void __user *)addr, len)) 2142 return -EFAULT; 2143 2144 /* Make sure the last entry is always AT_NULL */ 2145 user_auxv[AT_VECTOR_SIZE - 2] = 0; 2146 user_auxv[AT_VECTOR_SIZE - 1] = 0; 2147 2148 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); 2149 2150 task_lock(current); 2151 memcpy(mm->saved_auxv, user_auxv, len); 2152 task_unlock(current); 2153 2154 return 0; 2155 } 2156 2157 static int prctl_set_mm(int opt, unsigned long addr, 2158 unsigned long arg4, unsigned long arg5) 2159 { 2160 struct mm_struct *mm = current->mm; 2161 struct prctl_mm_map prctl_map = { 2162 .auxv = NULL, 2163 .auxv_size = 0, 2164 .exe_fd = -1, 2165 }; 2166 struct vm_area_struct *vma; 2167 int error; 2168 2169 if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV && 2170 opt != PR_SET_MM_MAP && 2171 opt != PR_SET_MM_MAP_SIZE))) 2172 return -EINVAL; 2173 2174 #ifdef CONFIG_CHECKPOINT_RESTORE 2175 if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE) 2176 return prctl_set_mm_map(opt, (const void __user *)addr, arg4); 2177 #endif 2178 2179 if (!capable(CAP_SYS_RESOURCE)) 2180 return -EPERM; 2181 2182 if (opt == PR_SET_MM_EXE_FILE) 2183 return prctl_set_mm_exe_file(mm, (unsigned int)addr); 2184 2185 if (opt == PR_SET_MM_AUXV) 2186 return prctl_set_auxv(mm, addr, arg4); 2187 2188 if (addr >= TASK_SIZE || addr < mmap_min_addr) 2189 return -EINVAL; 2190 2191 error = -EINVAL; 2192 2193 /* 2194 * arg_lock protects concurrent updates of arg boundaries, we need 2195 * mmap_lock for a) concurrent sys_brk, b) finding VMA for addr 2196 * validation. 2197 */ 2198 mmap_read_lock(mm); 2199 vma = find_vma(mm, addr); 2200 2201 spin_lock(&mm->arg_lock); 2202 prctl_map.start_code = mm->start_code; 2203 prctl_map.end_code = mm->end_code; 2204 prctl_map.start_data = mm->start_data; 2205 prctl_map.end_data = mm->end_data; 2206 prctl_map.start_brk = mm->start_brk; 2207 prctl_map.brk = mm->brk; 2208 prctl_map.start_stack = mm->start_stack; 2209 prctl_map.arg_start = mm->arg_start; 2210 prctl_map.arg_end = mm->arg_end; 2211 prctl_map.env_start = mm->env_start; 2212 prctl_map.env_end = mm->env_end; 2213 2214 switch (opt) { 2215 case PR_SET_MM_START_CODE: 2216 prctl_map.start_code = addr; 2217 break; 2218 case PR_SET_MM_END_CODE: 2219 prctl_map.end_code = addr; 2220 break; 2221 case PR_SET_MM_START_DATA: 2222 prctl_map.start_data = addr; 2223 break; 2224 case PR_SET_MM_END_DATA: 2225 prctl_map.end_data = addr; 2226 break; 2227 case PR_SET_MM_START_STACK: 2228 prctl_map.start_stack = addr; 2229 break; 2230 case PR_SET_MM_START_BRK: 2231 prctl_map.start_brk = addr; 2232 break; 2233 case PR_SET_MM_BRK: 2234 prctl_map.brk = addr; 2235 break; 2236 case PR_SET_MM_ARG_START: 2237 prctl_map.arg_start = addr; 2238 break; 2239 case PR_SET_MM_ARG_END: 2240 prctl_map.arg_end = addr; 2241 break; 2242 case PR_SET_MM_ENV_START: 2243 prctl_map.env_start = addr; 2244 break; 2245 case PR_SET_MM_ENV_END: 2246 prctl_map.env_end = addr; 2247 break; 2248 default: 2249 goto out; 2250 } 2251 2252 error = validate_prctl_map_addr(&prctl_map); 2253 if (error) 2254 goto out; 2255 2256 switch (opt) { 2257 /* 2258 * If command line arguments and environment 2259 * are placed somewhere else on stack, we can 2260 * set them up here, ARG_START/END to setup 2261 * command line arguments and ENV_START/END 2262 * for environment. 2263 */ 2264 case PR_SET_MM_START_STACK: 2265 case PR_SET_MM_ARG_START: 2266 case PR_SET_MM_ARG_END: 2267 case PR_SET_MM_ENV_START: 2268 case PR_SET_MM_ENV_END: 2269 if (!vma) { 2270 error = -EFAULT; 2271 goto out; 2272 } 2273 } 2274 2275 mm->start_code = prctl_map.start_code; 2276 mm->end_code = prctl_map.end_code; 2277 mm->start_data = prctl_map.start_data; 2278 mm->end_data = prctl_map.end_data; 2279 mm->start_brk = prctl_map.start_brk; 2280 mm->brk = prctl_map.brk; 2281 mm->start_stack = prctl_map.start_stack; 2282 mm->arg_start = prctl_map.arg_start; 2283 mm->arg_end = prctl_map.arg_end; 2284 mm->env_start = prctl_map.env_start; 2285 mm->env_end = prctl_map.env_end; 2286 2287 error = 0; 2288 out: 2289 spin_unlock(&mm->arg_lock); 2290 mmap_read_unlock(mm); 2291 return error; 2292 } 2293 2294 #ifdef CONFIG_CHECKPOINT_RESTORE 2295 static int prctl_get_tid_address(struct task_struct *me, int __user * __user *tid_addr) 2296 { 2297 return put_user(me->clear_child_tid, tid_addr); 2298 } 2299 #else 2300 static int prctl_get_tid_address(struct task_struct *me, int __user * __user *tid_addr) 2301 { 2302 return -EINVAL; 2303 } 2304 #endif 2305 2306 static int propagate_has_child_subreaper(struct task_struct *p, void *data) 2307 { 2308 /* 2309 * If task has has_child_subreaper - all its descendants 2310 * already have these flag too and new descendants will 2311 * inherit it on fork, skip them. 2312 * 2313 * If we've found child_reaper - skip descendants in 2314 * it's subtree as they will never get out pidns. 2315 */ 2316 if (p->signal->has_child_subreaper || 2317 is_child_reaper(task_pid(p))) 2318 return 0; 2319 2320 p->signal->has_child_subreaper = 1; 2321 return 1; 2322 } 2323 2324 int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which) 2325 { 2326 return -EINVAL; 2327 } 2328 2329 int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, 2330 unsigned long ctrl) 2331 { 2332 return -EINVAL; 2333 } 2334 2335 #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) 2336 2337 #ifdef CONFIG_ANON_VMA_NAME 2338 2339 #define ANON_VMA_NAME_MAX_LEN 80 2340 #define ANON_VMA_NAME_INVALID_CHARS "\\`$[]" 2341 2342 static inline bool is_valid_name_char(char ch) 2343 { 2344 /* printable ascii characters, excluding ANON_VMA_NAME_INVALID_CHARS */ 2345 return ch > 0x1f && ch < 0x7f && 2346 !strchr(ANON_VMA_NAME_INVALID_CHARS, ch); 2347 } 2348 2349 static int prctl_set_vma(unsigned long opt, unsigned long addr, 2350 unsigned long size, unsigned long arg) 2351 { 2352 struct mm_struct *mm = current->mm; 2353 const char __user *uname; 2354 struct anon_vma_name *anon_name = NULL; 2355 int error; 2356 2357 switch (opt) { 2358 case PR_SET_VMA_ANON_NAME: 2359 uname = (const char __user *)arg; 2360 if (uname) { 2361 char *name, *pch; 2362 2363 name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN); 2364 if (IS_ERR(name)) 2365 return PTR_ERR(name); 2366 2367 for (pch = name; *pch != '\0'; pch++) { 2368 if (!is_valid_name_char(*pch)) { 2369 kfree(name); 2370 return -EINVAL; 2371 } 2372 } 2373 /* anon_vma has its own copy */ 2374 anon_name = anon_vma_name_alloc(name); 2375 kfree(name); 2376 if (!anon_name) 2377 return -ENOMEM; 2378 2379 } 2380 2381 mmap_write_lock(mm); 2382 error = madvise_set_anon_name(mm, addr, size, anon_name); 2383 mmap_write_unlock(mm); 2384 anon_vma_name_put(anon_name); 2385 break; 2386 default: 2387 error = -EINVAL; 2388 } 2389 2390 return error; 2391 } 2392 2393 #else /* CONFIG_ANON_VMA_NAME */ 2394 static int prctl_set_vma(unsigned long opt, unsigned long start, 2395 unsigned long size, unsigned long arg) 2396 { 2397 return -EINVAL; 2398 } 2399 #endif /* CONFIG_ANON_VMA_NAME */ 2400 2401 static inline unsigned long get_current_mdwe(void) 2402 { 2403 unsigned long ret = 0; 2404 2405 if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) 2406 ret |= PR_MDWE_REFUSE_EXEC_GAIN; 2407 if (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags)) 2408 ret |= PR_MDWE_NO_INHERIT; 2409 2410 return ret; 2411 } 2412 2413 static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3, 2414 unsigned long arg4, unsigned long arg5) 2415 { 2416 unsigned long current_bits; 2417 2418 if (arg3 || arg4 || arg5) 2419 return -EINVAL; 2420 2421 if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT)) 2422 return -EINVAL; 2423 2424 /* NO_INHERIT only makes sense with REFUSE_EXEC_GAIN */ 2425 if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN)) 2426 return -EINVAL; 2427 2428 /* 2429 * EOPNOTSUPP might be more appropriate here in principle, but 2430 * existing userspace depends on EINVAL specifically. 2431 */ 2432 if (!arch_memory_deny_write_exec_supported()) 2433 return -EINVAL; 2434 2435 current_bits = get_current_mdwe(); 2436 if (current_bits && current_bits != bits) 2437 return -EPERM; /* Cannot unset the flags */ 2438 2439 if (bits & PR_MDWE_NO_INHERIT) 2440 set_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags); 2441 if (bits & PR_MDWE_REFUSE_EXEC_GAIN) 2442 set_bit(MMF_HAS_MDWE, ¤t->mm->flags); 2443 2444 return 0; 2445 } 2446 2447 static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3, 2448 unsigned long arg4, unsigned long arg5) 2449 { 2450 if (arg2 || arg3 || arg4 || arg5) 2451 return -EINVAL; 2452 return get_current_mdwe(); 2453 } 2454 2455 static int prctl_get_auxv(void __user *addr, unsigned long len) 2456 { 2457 struct mm_struct *mm = current->mm; 2458 unsigned long size = min_t(unsigned long, sizeof(mm->saved_auxv), len); 2459 2460 if (size && copy_to_user(addr, mm->saved_auxv, size)) 2461 return -EFAULT; 2462 return sizeof(mm->saved_auxv); 2463 } 2464 2465 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 2466 unsigned long, arg4, unsigned long, arg5) 2467 { 2468 struct task_struct *me = current; 2469 unsigned char comm[sizeof(me->comm)]; 2470 long error; 2471 2472 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 2473 if (error != -ENOSYS) 2474 return error; 2475 2476 error = 0; 2477 switch (option) { 2478 case PR_SET_PDEATHSIG: 2479 if (!valid_signal(arg2)) { 2480 error = -EINVAL; 2481 break; 2482 } 2483 me->pdeath_signal = arg2; 2484 break; 2485 case PR_GET_PDEATHSIG: 2486 error = put_user(me->pdeath_signal, (int __user *)arg2); 2487 break; 2488 case PR_GET_DUMPABLE: 2489 error = get_dumpable(me->mm); 2490 break; 2491 case PR_SET_DUMPABLE: 2492 if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) { 2493 error = -EINVAL; 2494 break; 2495 } 2496 set_dumpable(me->mm, arg2); 2497 break; 2498 2499 case PR_SET_UNALIGN: 2500 error = SET_UNALIGN_CTL(me, arg2); 2501 break; 2502 case PR_GET_UNALIGN: 2503 error = GET_UNALIGN_CTL(me, arg2); 2504 break; 2505 case PR_SET_FPEMU: 2506 error = SET_FPEMU_CTL(me, arg2); 2507 break; 2508 case PR_GET_FPEMU: 2509 error = GET_FPEMU_CTL(me, arg2); 2510 break; 2511 case PR_SET_FPEXC: 2512 error = SET_FPEXC_CTL(me, arg2); 2513 break; 2514 case PR_GET_FPEXC: 2515 error = GET_FPEXC_CTL(me, arg2); 2516 break; 2517 case PR_GET_TIMING: 2518 error = PR_TIMING_STATISTICAL; 2519 break; 2520 case PR_SET_TIMING: 2521 if (arg2 != PR_TIMING_STATISTICAL) 2522 error = -EINVAL; 2523 break; 2524 case PR_SET_NAME: 2525 comm[sizeof(me->comm) - 1] = 0; 2526 if (strncpy_from_user(comm, (char __user *)arg2, 2527 sizeof(me->comm) - 1) < 0) 2528 return -EFAULT; 2529 set_task_comm(me, comm); 2530 proc_comm_connector(me); 2531 break; 2532 case PR_GET_NAME: 2533 get_task_comm(comm, me); 2534 if (copy_to_user((char __user *)arg2, comm, sizeof(comm))) 2535 return -EFAULT; 2536 break; 2537 case PR_GET_ENDIAN: 2538 error = GET_ENDIAN(me, arg2); 2539 break; 2540 case PR_SET_ENDIAN: 2541 error = SET_ENDIAN(me, arg2); 2542 break; 2543 case PR_GET_SECCOMP: 2544 error = prctl_get_seccomp(); 2545 break; 2546 case PR_SET_SECCOMP: 2547 error = prctl_set_seccomp(arg2, (char __user *)arg3); 2548 break; 2549 case PR_GET_TSC: 2550 error = GET_TSC_CTL(arg2); 2551 break; 2552 case PR_SET_TSC: 2553 error = SET_TSC_CTL(arg2); 2554 break; 2555 case PR_TASK_PERF_EVENTS_DISABLE: 2556 error = perf_event_task_disable(); 2557 break; 2558 case PR_TASK_PERF_EVENTS_ENABLE: 2559 error = perf_event_task_enable(); 2560 break; 2561 case PR_GET_TIMERSLACK: 2562 if (current->timer_slack_ns > ULONG_MAX) 2563 error = ULONG_MAX; 2564 else 2565 error = current->timer_slack_ns; 2566 break; 2567 case PR_SET_TIMERSLACK: 2568 if (arg2 <= 0) 2569 current->timer_slack_ns = 2570 current->default_timer_slack_ns; 2571 else 2572 current->timer_slack_ns = arg2; 2573 break; 2574 case PR_MCE_KILL: 2575 if (arg4 | arg5) 2576 return -EINVAL; 2577 switch (arg2) { 2578 case PR_MCE_KILL_CLEAR: 2579 if (arg3 != 0) 2580 return -EINVAL; 2581 current->flags &= ~PF_MCE_PROCESS; 2582 break; 2583 case PR_MCE_KILL_SET: 2584 current->flags |= PF_MCE_PROCESS; 2585 if (arg3 == PR_MCE_KILL_EARLY) 2586 current->flags |= PF_MCE_EARLY; 2587 else if (arg3 == PR_MCE_KILL_LATE) 2588 current->flags &= ~PF_MCE_EARLY; 2589 else if (arg3 == PR_MCE_KILL_DEFAULT) 2590 current->flags &= 2591 ~(PF_MCE_EARLY|PF_MCE_PROCESS); 2592 else 2593 return -EINVAL; 2594 break; 2595 default: 2596 return -EINVAL; 2597 } 2598 break; 2599 case PR_MCE_KILL_GET: 2600 if (arg2 | arg3 | arg4 | arg5) 2601 return -EINVAL; 2602 if (current->flags & PF_MCE_PROCESS) 2603 error = (current->flags & PF_MCE_EARLY) ? 2604 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; 2605 else 2606 error = PR_MCE_KILL_DEFAULT; 2607 break; 2608 case PR_SET_MM: 2609 error = prctl_set_mm(arg2, arg3, arg4, arg5); 2610 break; 2611 case PR_GET_TID_ADDRESS: 2612 error = prctl_get_tid_address(me, (int __user * __user *)arg2); 2613 break; 2614 case PR_SET_CHILD_SUBREAPER: 2615 me->signal->is_child_subreaper = !!arg2; 2616 if (!arg2) 2617 break; 2618 2619 walk_process_tree(me, propagate_has_child_subreaper, NULL); 2620 break; 2621 case PR_GET_CHILD_SUBREAPER: 2622 error = put_user(me->signal->is_child_subreaper, 2623 (int __user *)arg2); 2624 break; 2625 case PR_SET_NO_NEW_PRIVS: 2626 if (arg2 != 1 || arg3 || arg4 || arg5) 2627 return -EINVAL; 2628 2629 task_set_no_new_privs(current); 2630 break; 2631 case PR_GET_NO_NEW_PRIVS: 2632 if (arg2 || arg3 || arg4 || arg5) 2633 return -EINVAL; 2634 return task_no_new_privs(current) ? 1 : 0; 2635 case PR_GET_THP_DISABLE: 2636 if (arg2 || arg3 || arg4 || arg5) 2637 return -EINVAL; 2638 error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags); 2639 break; 2640 case PR_SET_THP_DISABLE: 2641 if (arg3 || arg4 || arg5) 2642 return -EINVAL; 2643 if (mmap_write_lock_killable(me->mm)) 2644 return -EINTR; 2645 if (arg2) 2646 set_bit(MMF_DISABLE_THP, &me->mm->flags); 2647 else 2648 clear_bit(MMF_DISABLE_THP, &me->mm->flags); 2649 mmap_write_unlock(me->mm); 2650 break; 2651 case PR_MPX_ENABLE_MANAGEMENT: 2652 case PR_MPX_DISABLE_MANAGEMENT: 2653 /* No longer implemented: */ 2654 return -EINVAL; 2655 case PR_SET_FP_MODE: 2656 error = SET_FP_MODE(me, arg2); 2657 break; 2658 case PR_GET_FP_MODE: 2659 error = GET_FP_MODE(me); 2660 break; 2661 case PR_SVE_SET_VL: 2662 error = SVE_SET_VL(arg2); 2663 break; 2664 case PR_SVE_GET_VL: 2665 error = SVE_GET_VL(); 2666 break; 2667 case PR_SME_SET_VL: 2668 error = SME_SET_VL(arg2); 2669 break; 2670 case PR_SME_GET_VL: 2671 error = SME_GET_VL(); 2672 break; 2673 case PR_GET_SPECULATION_CTRL: 2674 if (arg3 || arg4 || arg5) 2675 return -EINVAL; 2676 error = arch_prctl_spec_ctrl_get(me, arg2); 2677 break; 2678 case PR_SET_SPECULATION_CTRL: 2679 if (arg4 || arg5) 2680 return -EINVAL; 2681 error = arch_prctl_spec_ctrl_set(me, arg2, arg3); 2682 break; 2683 case PR_PAC_RESET_KEYS: 2684 if (arg3 || arg4 || arg5) 2685 return -EINVAL; 2686 error = PAC_RESET_KEYS(me, arg2); 2687 break; 2688 case PR_PAC_SET_ENABLED_KEYS: 2689 if (arg4 || arg5) 2690 return -EINVAL; 2691 error = PAC_SET_ENABLED_KEYS(me, arg2, arg3); 2692 break; 2693 case PR_PAC_GET_ENABLED_KEYS: 2694 if (arg2 || arg3 || arg4 || arg5) 2695 return -EINVAL; 2696 error = PAC_GET_ENABLED_KEYS(me); 2697 break; 2698 case PR_SET_TAGGED_ADDR_CTRL: 2699 if (arg3 || arg4 || arg5) 2700 return -EINVAL; 2701 error = SET_TAGGED_ADDR_CTRL(arg2); 2702 break; 2703 case PR_GET_TAGGED_ADDR_CTRL: 2704 if (arg2 || arg3 || arg4 || arg5) 2705 return -EINVAL; 2706 error = GET_TAGGED_ADDR_CTRL(); 2707 break; 2708 case PR_SET_IO_FLUSHER: 2709 if (!capable(CAP_SYS_RESOURCE)) 2710 return -EPERM; 2711 2712 if (arg3 || arg4 || arg5) 2713 return -EINVAL; 2714 2715 if (arg2 == 1) 2716 current->flags |= PR_IO_FLUSHER; 2717 else if (!arg2) 2718 current->flags &= ~PR_IO_FLUSHER; 2719 else 2720 return -EINVAL; 2721 break; 2722 case PR_GET_IO_FLUSHER: 2723 if (!capable(CAP_SYS_RESOURCE)) 2724 return -EPERM; 2725 2726 if (arg2 || arg3 || arg4 || arg5) 2727 return -EINVAL; 2728 2729 error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER; 2730 break; 2731 case PR_SET_SYSCALL_USER_DISPATCH: 2732 error = set_syscall_user_dispatch(arg2, arg3, arg4, 2733 (char __user *) arg5); 2734 break; 2735 #ifdef CONFIG_SCHED_CORE 2736 case PR_SCHED_CORE: 2737 error = sched_core_share_pid(arg2, arg3, arg4, arg5); 2738 break; 2739 #endif 2740 case PR_SET_MDWE: 2741 error = prctl_set_mdwe(arg2, arg3, arg4, arg5); 2742 break; 2743 case PR_GET_MDWE: 2744 error = prctl_get_mdwe(arg2, arg3, arg4, arg5); 2745 break; 2746 case PR_PPC_GET_DEXCR: 2747 if (arg3 || arg4 || arg5) 2748 return -EINVAL; 2749 error = PPC_GET_DEXCR_ASPECT(me, arg2); 2750 break; 2751 case PR_PPC_SET_DEXCR: 2752 if (arg4 || arg5) 2753 return -EINVAL; 2754 error = PPC_SET_DEXCR_ASPECT(me, arg2, arg3); 2755 break; 2756 case PR_SET_VMA: 2757 error = prctl_set_vma(arg2, arg3, arg4, arg5); 2758 break; 2759 case PR_GET_AUXV: 2760 if (arg4 || arg5) 2761 return -EINVAL; 2762 error = prctl_get_auxv((void __user *)arg2, arg3); 2763 break; 2764 #ifdef CONFIG_KSM 2765 case PR_SET_MEMORY_MERGE: 2766 if (arg3 || arg4 || arg5) 2767 return -EINVAL; 2768 if (mmap_write_lock_killable(me->mm)) 2769 return -EINTR; 2770 2771 if (arg2) 2772 error = ksm_enable_merge_any(me->mm); 2773 else 2774 error = ksm_disable_merge_any(me->mm); 2775 mmap_write_unlock(me->mm); 2776 break; 2777 case PR_GET_MEMORY_MERGE: 2778 if (arg2 || arg3 || arg4 || arg5) 2779 return -EINVAL; 2780 2781 error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags); 2782 break; 2783 #endif 2784 case PR_RISCV_V_SET_CONTROL: 2785 error = RISCV_V_SET_CONTROL(arg2); 2786 break; 2787 case PR_RISCV_V_GET_CONTROL: 2788 error = RISCV_V_GET_CONTROL(); 2789 break; 2790 case PR_RISCV_SET_ICACHE_FLUSH_CTX: 2791 error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3); 2792 break; 2793 default: 2794 error = -EINVAL; 2795 break; 2796 } 2797 return error; 2798 } 2799 2800 SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, 2801 struct getcpu_cache __user *, unused) 2802 { 2803 int err = 0; 2804 int cpu = raw_smp_processor_id(); 2805 2806 if (cpup) 2807 err |= put_user(cpu, cpup); 2808 if (nodep) 2809 err |= put_user(cpu_to_node(cpu), nodep); 2810 return err ? -EFAULT : 0; 2811 } 2812 2813 /** 2814 * do_sysinfo - fill in sysinfo struct 2815 * @info: pointer to buffer to fill 2816 */ 2817 static int do_sysinfo(struct sysinfo *info) 2818 { 2819 unsigned long mem_total, sav_total; 2820 unsigned int mem_unit, bitcount; 2821 struct timespec64 tp; 2822 2823 memset(info, 0, sizeof(struct sysinfo)); 2824 2825 ktime_get_boottime_ts64(&tp); 2826 timens_add_boottime(&tp); 2827 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 2828 2829 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); 2830 2831 info->procs = nr_threads; 2832 2833 si_meminfo(info); 2834 si_swapinfo(info); 2835 2836 /* 2837 * If the sum of all the available memory (i.e. ram + swap) 2838 * is less than can be stored in a 32 bit unsigned long then 2839 * we can be binary compatible with 2.2.x kernels. If not, 2840 * well, in that case 2.2.x was broken anyways... 2841 * 2842 * -Erik Andersen <andersee@debian.org> 2843 */ 2844 2845 mem_total = info->totalram + info->totalswap; 2846 if (mem_total < info->totalram || mem_total < info->totalswap) 2847 goto out; 2848 bitcount = 0; 2849 mem_unit = info->mem_unit; 2850 while (mem_unit > 1) { 2851 bitcount++; 2852 mem_unit >>= 1; 2853 sav_total = mem_total; 2854 mem_total <<= 1; 2855 if (mem_total < sav_total) 2856 goto out; 2857 } 2858 2859 /* 2860 * If mem_total did not overflow, multiply all memory values by 2861 * info->mem_unit and set it to 1. This leaves things compatible 2862 * with 2.2.x, and also retains compatibility with earlier 2.4.x 2863 * kernels... 2864 */ 2865 2866 info->mem_unit = 1; 2867 info->totalram <<= bitcount; 2868 info->freeram <<= bitcount; 2869 info->sharedram <<= bitcount; 2870 info->bufferram <<= bitcount; 2871 info->totalswap <<= bitcount; 2872 info->freeswap <<= bitcount; 2873 info->totalhigh <<= bitcount; 2874 info->freehigh <<= bitcount; 2875 2876 out: 2877 return 0; 2878 } 2879 2880 SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) 2881 { 2882 struct sysinfo val; 2883 2884 do_sysinfo(&val); 2885 2886 if (copy_to_user(info, &val, sizeof(struct sysinfo))) 2887 return -EFAULT; 2888 2889 return 0; 2890 } 2891 2892 #ifdef CONFIG_COMPAT 2893 struct compat_sysinfo { 2894 s32 uptime; 2895 u32 loads[3]; 2896 u32 totalram; 2897 u32 freeram; 2898 u32 sharedram; 2899 u32 bufferram; 2900 u32 totalswap; 2901 u32 freeswap; 2902 u16 procs; 2903 u16 pad; 2904 u32 totalhigh; 2905 u32 freehigh; 2906 u32 mem_unit; 2907 char _f[20-2*sizeof(u32)-sizeof(int)]; 2908 }; 2909 2910 COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) 2911 { 2912 struct sysinfo s; 2913 struct compat_sysinfo s_32; 2914 2915 do_sysinfo(&s); 2916 2917 /* Check to see if any memory value is too large for 32-bit and scale 2918 * down if needed 2919 */ 2920 if (upper_32_bits(s.totalram) || upper_32_bits(s.totalswap)) { 2921 int bitcount = 0; 2922 2923 while (s.mem_unit < PAGE_SIZE) { 2924 s.mem_unit <<= 1; 2925 bitcount++; 2926 } 2927 2928 s.totalram >>= bitcount; 2929 s.freeram >>= bitcount; 2930 s.sharedram >>= bitcount; 2931 s.bufferram >>= bitcount; 2932 s.totalswap >>= bitcount; 2933 s.freeswap >>= bitcount; 2934 s.totalhigh >>= bitcount; 2935 s.freehigh >>= bitcount; 2936 } 2937 2938 memset(&s_32, 0, sizeof(s_32)); 2939 s_32.uptime = s.uptime; 2940 s_32.loads[0] = s.loads[0]; 2941 s_32.loads[1] = s.loads[1]; 2942 s_32.loads[2] = s.loads[2]; 2943 s_32.totalram = s.totalram; 2944 s_32.freeram = s.freeram; 2945 s_32.sharedram = s.sharedram; 2946 s_32.bufferram = s.bufferram; 2947 s_32.totalswap = s.totalswap; 2948 s_32.freeswap = s.freeswap; 2949 s_32.procs = s.procs; 2950 s_32.totalhigh = s.totalhigh; 2951 s_32.freehigh = s.freehigh; 2952 s_32.mem_unit = s.mem_unit; 2953 if (copy_to_user(info, &s_32, sizeof(s_32))) 2954 return -EFAULT; 2955 return 0; 2956 } 2957 #endif /* CONFIG_COMPAT */ 2958
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.