1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 #include <errno.h> 5 #include <fcntl.h> 6 #include <linux/kernel.h> 7 #include <limits.h> 8 #include <stdbool.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <syscall.h> 13 #include <unistd.h> 14 #include <sys/resource.h> 15 #include <linux/close_range.h> 16 17 #include "../kselftest_harness.h" 18 #include "../clone3/clone3_selftests.h" 19 20 21 #ifndef F_LINUX_SPECIFIC_BASE 22 #define F_LINUX_SPECIFIC_BASE 1024 23 #endif 24 25 #ifndef F_DUPFD_QUERY 26 #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3) 27 #endif 28 29 static inline int sys_close_range(unsigned int fd, unsigned int max_fd, 30 unsigned int flags) 31 { 32 return syscall(__NR_close_range, fd, max_fd, flags); 33 } 34 35 TEST(core_close_range) 36 { 37 int i, ret; 38 int open_fds[101]; 39 40 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 41 int fd; 42 43 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 44 ASSERT_GE(fd, 0) { 45 if (errno == ENOENT) 46 SKIP(return, "Skipping test since /dev/null does not exist"); 47 } 48 49 open_fds[i] = fd; 50 } 51 52 EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { 53 if (errno == ENOSYS) 54 SKIP(return, "close_range() syscall not supported"); 55 } 56 57 for (i = 0; i < 100; i++) { 58 ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]); 59 if (ret < 0) { 60 EXPECT_EQ(errno, EINVAL); 61 } else { 62 EXPECT_EQ(ret, 0); 63 } 64 } 65 66 EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); 67 68 for (i = 0; i <= 50; i++) 69 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 70 71 for (i = 51; i <= 100; i++) 72 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 73 74 /* create a couple of gaps */ 75 close(57); 76 close(78); 77 close(81); 78 close(82); 79 close(84); 80 close(90); 81 82 EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0)); 83 84 for (i = 51; i <= 92; i++) 85 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 86 87 for (i = 93; i <= 100; i++) 88 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 89 90 /* test that the kernel caps and still closes all fds */ 91 EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0)); 92 93 for (i = 93; i <= 99; i++) 94 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 95 96 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 97 98 EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0)); 99 100 EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL)); 101 } 102 103 TEST(close_range_unshare) 104 { 105 int i, ret, status; 106 pid_t pid; 107 int open_fds[101]; 108 struct __clone_args args = { 109 .flags = CLONE_FILES, 110 .exit_signal = SIGCHLD, 111 }; 112 113 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 114 int fd; 115 116 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 117 ASSERT_GE(fd, 0) { 118 if (errno == ENOENT) 119 SKIP(return, "Skipping test since /dev/null does not exist"); 120 } 121 122 open_fds[i] = fd; 123 } 124 125 pid = sys_clone3(&args, sizeof(args)); 126 ASSERT_GE(pid, 0); 127 128 if (pid == 0) { 129 ret = sys_close_range(open_fds[0], open_fds[50], 130 CLOSE_RANGE_UNSHARE); 131 if (ret) 132 exit(EXIT_FAILURE); 133 134 for (i = 0; i <= 50; i++) 135 if (fcntl(open_fds[i], F_GETFL) != -1) 136 exit(EXIT_FAILURE); 137 138 for (i = 51; i <= 100; i++) 139 if (fcntl(open_fds[i], F_GETFL) == -1) 140 exit(EXIT_FAILURE); 141 142 /* create a couple of gaps */ 143 close(57); 144 close(78); 145 close(81); 146 close(82); 147 close(84); 148 close(90); 149 150 ret = sys_close_range(open_fds[51], open_fds[92], 151 CLOSE_RANGE_UNSHARE); 152 if (ret) 153 exit(EXIT_FAILURE); 154 155 for (i = 51; i <= 92; i++) 156 if (fcntl(open_fds[i], F_GETFL) != -1) 157 exit(EXIT_FAILURE); 158 159 for (i = 93; i <= 100; i++) 160 if (fcntl(open_fds[i], F_GETFL) == -1) 161 exit(EXIT_FAILURE); 162 163 /* test that the kernel caps and still closes all fds */ 164 ret = sys_close_range(open_fds[93], open_fds[99], 165 CLOSE_RANGE_UNSHARE); 166 if (ret) 167 exit(EXIT_FAILURE); 168 169 for (i = 93; i <= 99; i++) 170 if (fcntl(open_fds[i], F_GETFL) != -1) 171 exit(EXIT_FAILURE); 172 173 if (fcntl(open_fds[100], F_GETFL) == -1) 174 exit(EXIT_FAILURE); 175 176 ret = sys_close_range(open_fds[100], open_fds[100], 177 CLOSE_RANGE_UNSHARE); 178 if (ret) 179 exit(EXIT_FAILURE); 180 181 if (fcntl(open_fds[100], F_GETFL) != -1) 182 exit(EXIT_FAILURE); 183 184 exit(EXIT_SUCCESS); 185 } 186 187 EXPECT_EQ(waitpid(pid, &status, 0), pid); 188 EXPECT_EQ(true, WIFEXITED(status)); 189 EXPECT_EQ(0, WEXITSTATUS(status)); 190 } 191 192 TEST(close_range_unshare_capped) 193 { 194 int i, ret, status; 195 pid_t pid; 196 int open_fds[101]; 197 struct __clone_args args = { 198 .flags = CLONE_FILES, 199 .exit_signal = SIGCHLD, 200 }; 201 202 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 203 int fd; 204 205 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 206 ASSERT_GE(fd, 0) { 207 if (errno == ENOENT) 208 SKIP(return, "Skipping test since /dev/null does not exist"); 209 } 210 211 open_fds[i] = fd; 212 } 213 214 pid = sys_clone3(&args, sizeof(args)); 215 ASSERT_GE(pid, 0); 216 217 if (pid == 0) { 218 ret = sys_close_range(open_fds[0], UINT_MAX, 219 CLOSE_RANGE_UNSHARE); 220 if (ret) 221 exit(EXIT_FAILURE); 222 223 for (i = 0; i <= 100; i++) 224 if (fcntl(open_fds[i], F_GETFL) != -1) 225 exit(EXIT_FAILURE); 226 227 exit(EXIT_SUCCESS); 228 } 229 230 EXPECT_EQ(waitpid(pid, &status, 0), pid); 231 EXPECT_EQ(true, WIFEXITED(status)); 232 EXPECT_EQ(0, WEXITSTATUS(status)); 233 } 234 235 TEST(close_range_cloexec) 236 { 237 int i, ret; 238 int open_fds[101]; 239 struct rlimit rlimit; 240 241 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 242 int fd; 243 244 fd = open("/dev/null", O_RDONLY); 245 ASSERT_GE(fd, 0) { 246 if (errno == ENOENT) 247 SKIP(return, "Skipping test since /dev/null does not exist"); 248 } 249 250 open_fds[i] = fd; 251 } 252 253 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC); 254 if (ret < 0) { 255 if (errno == ENOSYS) 256 SKIP(return, "close_range() syscall not supported"); 257 if (errno == EINVAL) 258 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); 259 } 260 261 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */ 262 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); 263 rlimit.rlim_cur = 25; 264 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)); 265 266 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */ 267 ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC); 268 ASSERT_EQ(0, ret); 269 ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC); 270 ASSERT_EQ(0, ret); 271 272 for (i = 0; i <= 50; i++) { 273 int flags = fcntl(open_fds[i], F_GETFD); 274 275 EXPECT_GT(flags, -1); 276 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 277 } 278 279 for (i = 51; i <= 74; i++) { 280 int flags = fcntl(open_fds[i], F_GETFD); 281 282 EXPECT_GT(flags, -1); 283 EXPECT_EQ(flags & FD_CLOEXEC, 0); 284 } 285 286 for (i = 75; i <= 100; i++) { 287 int flags = fcntl(open_fds[i], F_GETFD); 288 289 EXPECT_GT(flags, -1); 290 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 291 } 292 293 /* Test a common pattern. */ 294 ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC); 295 for (i = 0; i <= 100; i++) { 296 int flags = fcntl(open_fds[i], F_GETFD); 297 298 EXPECT_GT(flags, -1); 299 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 300 } 301 } 302 303 TEST(close_range_cloexec_unshare) 304 { 305 int i, ret; 306 int open_fds[101]; 307 struct rlimit rlimit; 308 309 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 310 int fd; 311 312 fd = open("/dev/null", O_RDONLY); 313 ASSERT_GE(fd, 0) { 314 if (errno == ENOENT) 315 SKIP(return, "Skipping test since /dev/null does not exist"); 316 } 317 318 open_fds[i] = fd; 319 } 320 321 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC); 322 if (ret < 0) { 323 if (errno == ENOSYS) 324 SKIP(return, "close_range() syscall not supported"); 325 if (errno == EINVAL) 326 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); 327 } 328 329 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */ 330 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); 331 rlimit.rlim_cur = 25; 332 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)); 333 334 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */ 335 ret = sys_close_range(open_fds[0], open_fds[50], 336 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); 337 ASSERT_EQ(0, ret); 338 ret = sys_close_range(open_fds[75], open_fds[100], 339 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); 340 ASSERT_EQ(0, ret); 341 342 for (i = 0; i <= 50; i++) { 343 int flags = fcntl(open_fds[i], F_GETFD); 344 345 EXPECT_GT(flags, -1); 346 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 347 } 348 349 for (i = 51; i <= 74; i++) { 350 int flags = fcntl(open_fds[i], F_GETFD); 351 352 EXPECT_GT(flags, -1); 353 EXPECT_EQ(flags & FD_CLOEXEC, 0); 354 } 355 356 for (i = 75; i <= 100; i++) { 357 int flags = fcntl(open_fds[i], F_GETFD); 358 359 EXPECT_GT(flags, -1); 360 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 361 } 362 363 /* Test a common pattern. */ 364 ret = sys_close_range(3, UINT_MAX, 365 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); 366 for (i = 0; i <= 100; i++) { 367 int flags = fcntl(open_fds[i], F_GETFD); 368 369 EXPECT_GT(flags, -1); 370 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 371 } 372 } 373 374 /* 375 * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com 376 */ 377 TEST(close_range_cloexec_syzbot) 378 { 379 int fd1, fd2, fd3, fd4, flags, ret, status; 380 pid_t pid; 381 struct __clone_args args = { 382 .flags = CLONE_FILES, 383 .exit_signal = SIGCHLD, 384 }; 385 386 /* Create a huge gap in the fd table. */ 387 fd1 = open("/dev/null", O_RDWR); 388 EXPECT_GT(fd1, 0); 389 390 fd2 = dup2(fd1, 1000); 391 EXPECT_GT(fd2, 0); 392 393 flags = fcntl(fd1, F_DUPFD_QUERY, fd2); 394 if (flags < 0) { 395 EXPECT_EQ(errno, EINVAL); 396 } else { 397 EXPECT_EQ(flags, 1); 398 } 399 400 pid = sys_clone3(&args, sizeof(args)); 401 ASSERT_GE(pid, 0); 402 403 if (pid == 0) { 404 ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC); 405 if (ret) 406 exit(EXIT_FAILURE); 407 408 /* 409 * We now have a private file descriptor table and all 410 * our open fds should still be open but made 411 * close-on-exec. 412 */ 413 flags = fcntl(fd1, F_GETFD); 414 EXPECT_GT(flags, -1); 415 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 416 417 flags = fcntl(fd2, F_GETFD); 418 EXPECT_GT(flags, -1); 419 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 420 421 fd3 = dup2(fd1, 42); 422 EXPECT_GT(fd3, 0); 423 424 flags = fcntl(fd1, F_DUPFD_QUERY, fd3); 425 if (flags < 0) { 426 EXPECT_EQ(errno, EINVAL); 427 } else { 428 EXPECT_EQ(flags, 1); 429 } 430 431 432 433 /* 434 * Duplicating the file descriptor must remove the 435 * FD_CLOEXEC flag. 436 */ 437 flags = fcntl(fd3, F_GETFD); 438 EXPECT_GT(flags, -1); 439 EXPECT_EQ(flags & FD_CLOEXEC, 0); 440 441 exit(EXIT_SUCCESS); 442 } 443 444 EXPECT_EQ(waitpid(pid, &status, 0), pid); 445 EXPECT_EQ(true, WIFEXITED(status)); 446 EXPECT_EQ(0, WEXITSTATUS(status)); 447 448 /* 449 * We had a shared file descriptor table before along with requesting 450 * close-on-exec so the original fds must not be close-on-exec. 451 */ 452 flags = fcntl(fd1, F_GETFD); 453 EXPECT_GT(flags, -1); 454 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 455 456 flags = fcntl(fd2, F_GETFD); 457 EXPECT_GT(flags, -1); 458 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 459 460 fd3 = dup2(fd1, 42); 461 EXPECT_GT(fd3, 0); 462 463 flags = fcntl(fd1, F_DUPFD_QUERY, fd3); 464 if (flags < 0) { 465 EXPECT_EQ(errno, EINVAL); 466 } else { 467 EXPECT_EQ(flags, 1); 468 } 469 470 fd4 = open("/dev/null", O_RDWR); 471 EXPECT_GT(fd4, 0); 472 473 /* Same inode, different file pointers. */ 474 flags = fcntl(fd1, F_DUPFD_QUERY, fd4); 475 if (flags < 0) { 476 EXPECT_EQ(errno, EINVAL); 477 } else { 478 EXPECT_EQ(flags, 0); 479 } 480 481 flags = fcntl(fd3, F_GETFD); 482 EXPECT_GT(flags, -1); 483 EXPECT_EQ(flags & FD_CLOEXEC, 0); 484 485 EXPECT_EQ(close(fd1), 0); 486 EXPECT_EQ(close(fd2), 0); 487 EXPECT_EQ(close(fd3), 0); 488 EXPECT_EQ(close(fd4), 0); 489 } 490 491 /* 492 * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com 493 */ 494 TEST(close_range_cloexec_unshare_syzbot) 495 { 496 int i, fd1, fd2, fd3, flags, ret, status; 497 pid_t pid; 498 struct __clone_args args = { 499 .flags = CLONE_FILES, 500 .exit_signal = SIGCHLD, 501 }; 502 503 /* 504 * Create a huge gap in the fd table. When we now call 505 * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper 506 * bound the kernel will only copy up to fd1 file descriptors into the 507 * new fd table. If the kernel is buggy and doesn't handle 508 * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file 509 * descriptors and we will oops! 510 * 511 * On a buggy kernel this should immediately oops. But let's loop just 512 * to be sure. 513 */ 514 fd1 = open("/dev/null", O_RDWR); 515 EXPECT_GT(fd1, 0); 516 517 fd2 = dup2(fd1, 1000); 518 EXPECT_GT(fd2, 0); 519 520 for (i = 0; i < 100; i++) { 521 522 pid = sys_clone3(&args, sizeof(args)); 523 ASSERT_GE(pid, 0); 524 525 if (pid == 0) { 526 ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE | 527 CLOSE_RANGE_CLOEXEC); 528 if (ret) 529 exit(EXIT_FAILURE); 530 531 /* 532 * We now have a private file descriptor table and all 533 * our open fds should still be open but made 534 * close-on-exec. 535 */ 536 flags = fcntl(fd1, F_GETFD); 537 EXPECT_GT(flags, -1); 538 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 539 540 flags = fcntl(fd2, F_GETFD); 541 EXPECT_GT(flags, -1); 542 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 543 544 fd3 = dup2(fd1, 42); 545 EXPECT_GT(fd3, 0); 546 547 /* 548 * Duplicating the file descriptor must remove the 549 * FD_CLOEXEC flag. 550 */ 551 flags = fcntl(fd3, F_GETFD); 552 EXPECT_GT(flags, -1); 553 EXPECT_EQ(flags & FD_CLOEXEC, 0); 554 555 EXPECT_EQ(close(fd1), 0); 556 EXPECT_EQ(close(fd2), 0); 557 EXPECT_EQ(close(fd3), 0); 558 559 exit(EXIT_SUCCESS); 560 } 561 562 EXPECT_EQ(waitpid(pid, &status, 0), pid); 563 EXPECT_EQ(true, WIFEXITED(status)); 564 EXPECT_EQ(0, WEXITSTATUS(status)); 565 } 566 567 /* 568 * We created a private file descriptor table before along with 569 * requesting close-on-exec so the original fds must not be 570 * close-on-exec. 571 */ 572 flags = fcntl(fd1, F_GETFD); 573 EXPECT_GT(flags, -1); 574 EXPECT_EQ(flags & FD_CLOEXEC, 0); 575 576 flags = fcntl(fd2, F_GETFD); 577 EXPECT_GT(flags, -1); 578 EXPECT_EQ(flags & FD_CLOEXEC, 0); 579 580 fd3 = dup2(fd1, 42); 581 EXPECT_GT(fd3, 0); 582 583 flags = fcntl(fd3, F_GETFD); 584 EXPECT_GT(flags, -1); 585 EXPECT_EQ(flags & FD_CLOEXEC, 0); 586 587 EXPECT_EQ(close(fd1), 0); 588 EXPECT_EQ(close(fd2), 0); 589 EXPECT_EQ(close(fd3), 0); 590 } 591 592 TEST(close_range_bitmap_corruption) 593 { 594 pid_t pid; 595 int status; 596 struct __clone_args args = { 597 .flags = CLONE_FILES, 598 .exit_signal = SIGCHLD, 599 }; 600 601 /* get the first 128 descriptors open */ 602 for (int i = 2; i < 128; i++) 603 EXPECT_GE(dup2(0, i), 0); 604 605 /* get descriptor table shared */ 606 pid = sys_clone3(&args, sizeof(args)); 607 ASSERT_GE(pid, 0); 608 609 if (pid == 0) { 610 /* unshare and truncate descriptor table down to 64 */ 611 if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE)) 612 exit(EXIT_FAILURE); 613 614 ASSERT_EQ(fcntl(64, F_GETFD), -1); 615 /* ... and verify that the range 64..127 is not 616 stuck "fully used" according to secondary bitmap */ 617 EXPECT_EQ(dup(0), 64) 618 exit(EXIT_FAILURE); 619 exit(EXIT_SUCCESS); 620 } 621 622 EXPECT_EQ(waitpid(pid, &status, 0), pid); 623 EXPECT_EQ(true, WIFEXITED(status)); 624 EXPECT_EQ(0, WEXITSTATUS(status)); 625 } 626 627 TEST_HARNESS_MAIN 628
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.