1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2020 Cloudflare 3 /* 4 * Test suite for SOCKMAP/SOCKHASH holding listening sockets. 5 * Covers: 6 * 1. BPF map operations - bpf_map_{update,lookup delete}_elem 7 * 2. BPF redirect helpers - bpf_{sk,msg}_redirect_map 8 * 3. BPF reuseport helper - bpf_sk_select_reuseport 9 */ 10 11 #include <linux/compiler.h> 12 #include <errno.h> 13 #include <error.h> 14 #include <limits.h> 15 #include <netinet/in.h> 16 #include <pthread.h> 17 #include <stdlib.h> 18 #include <string.h> 19 #include <sys/select.h> 20 #include <unistd.h> 21 #include <linux/vm_sockets.h> 22 23 #include <bpf/bpf.h> 24 #include <bpf/libbpf.h> 25 26 #include "bpf_util.h" 27 #include "test_progs.h" 28 #include "test_sockmap_listen.skel.h" 29 30 #include "sockmap_helpers.h" 31 32 #define NO_FLAGS 0 33 34 static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused, 35 int family, int sotype, int mapfd) 36 { 37 u32 key = 0; 38 u64 value; 39 int err; 40 41 value = -1; 42 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 43 if (!err || errno != EINVAL) 44 FAIL_ERRNO("map_update: expected EINVAL"); 45 46 value = INT_MAX; 47 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 48 if (!err || errno != EBADF) 49 FAIL_ERRNO("map_update: expected EBADF"); 50 } 51 52 static void test_insert_opened(struct test_sockmap_listen *skel __always_unused, 53 int family, int sotype, int mapfd) 54 { 55 u32 key = 0; 56 u64 value; 57 int err, s; 58 59 s = xsocket(family, sotype, 0); 60 if (s == -1) 61 return; 62 63 errno = 0; 64 value = s; 65 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 66 if (sotype == SOCK_STREAM) { 67 if (!err || errno != EOPNOTSUPP) 68 FAIL_ERRNO("map_update: expected EOPNOTSUPP"); 69 } else if (err) 70 FAIL_ERRNO("map_update: expected success"); 71 xclose(s); 72 } 73 74 static void test_insert_bound(struct test_sockmap_listen *skel __always_unused, 75 int family, int sotype, int mapfd) 76 { 77 struct sockaddr_storage addr; 78 socklen_t len = 0; 79 u32 key = 0; 80 u64 value; 81 int err, s; 82 83 init_addr_loopback(family, &addr, &len); 84 85 s = xsocket(family, sotype, 0); 86 if (s == -1) 87 return; 88 89 err = xbind(s, sockaddr(&addr), len); 90 if (err) 91 goto close; 92 93 errno = 0; 94 value = s; 95 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 96 if (!err || errno != EOPNOTSUPP) 97 FAIL_ERRNO("map_update: expected EOPNOTSUPP"); 98 close: 99 xclose(s); 100 } 101 102 static void test_insert(struct test_sockmap_listen *skel __always_unused, 103 int family, int sotype, int mapfd) 104 { 105 u64 value; 106 u32 key; 107 int s; 108 109 s = socket_loopback(family, sotype); 110 if (s < 0) 111 return; 112 113 key = 0; 114 value = s; 115 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 116 xclose(s); 117 } 118 119 static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused, 120 int family, int sotype, int mapfd) 121 { 122 u64 value; 123 u32 key; 124 int s; 125 126 s = socket_loopback(family, sotype); 127 if (s < 0) 128 return; 129 130 key = 0; 131 value = s; 132 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 133 xbpf_map_delete_elem(mapfd, &key); 134 xclose(s); 135 } 136 137 static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused, 138 int family, int sotype, int mapfd) 139 { 140 int err, s; 141 u64 value; 142 u32 key; 143 144 s = socket_loopback(family, sotype); 145 if (s < 0) 146 return; 147 148 key = 0; 149 value = s; 150 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 151 152 xclose(s); 153 154 errno = 0; 155 err = bpf_map_delete_elem(mapfd, &key); 156 if (!err || (errno != EINVAL && errno != ENOENT)) 157 /* SOCKMAP and SOCKHASH return different error codes */ 158 FAIL_ERRNO("map_delete: expected EINVAL/EINVAL"); 159 } 160 161 static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused, 162 int family, int sotype, int mapfd) 163 { 164 u64 cookie, value; 165 socklen_t len; 166 u32 key; 167 int s; 168 169 s = socket_loopback(family, sotype); 170 if (s < 0) 171 return; 172 173 key = 0; 174 value = s; 175 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 176 177 len = sizeof(cookie); 178 xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len); 179 180 xbpf_map_lookup_elem(mapfd, &key, &value); 181 182 if (value != cookie) { 183 FAIL("map_lookup: have %#llx, want %#llx", 184 (unsigned long long)value, (unsigned long long)cookie); 185 } 186 187 xclose(s); 188 } 189 190 static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused, 191 int family, int sotype, int mapfd) 192 { 193 int err, s; 194 u64 value; 195 u32 key; 196 197 s = socket_loopback(family, sotype); 198 if (s < 0) 199 return; 200 201 key = 0; 202 value = s; 203 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 204 xbpf_map_delete_elem(mapfd, &key); 205 206 errno = 0; 207 err = bpf_map_lookup_elem(mapfd, &key, &value); 208 if (!err || errno != ENOENT) 209 FAIL_ERRNO("map_lookup: expected ENOENT"); 210 211 xclose(s); 212 } 213 214 static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused, 215 int family, int sotype, int mapfd) 216 { 217 u32 key, value32; 218 int err, s; 219 220 s = socket_loopback(family, sotype); 221 if (s < 0) 222 return; 223 224 mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key), 225 sizeof(value32), 1, NULL); 226 if (mapfd < 0) { 227 FAIL_ERRNO("map_create"); 228 goto close; 229 } 230 231 key = 0; 232 value32 = s; 233 xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST); 234 235 errno = 0; 236 err = bpf_map_lookup_elem(mapfd, &key, &value32); 237 if (!err || errno != ENOSPC) 238 FAIL_ERRNO("map_lookup: expected ENOSPC"); 239 240 xclose(mapfd); 241 close: 242 xclose(s); 243 } 244 245 static void test_update_existing(struct test_sockmap_listen *skel __always_unused, 246 int family, int sotype, int mapfd) 247 { 248 int s1, s2; 249 u64 value; 250 u32 key; 251 252 s1 = socket_loopback(family, sotype); 253 if (s1 < 0) 254 return; 255 256 s2 = socket_loopback(family, sotype); 257 if (s2 < 0) 258 goto close_s1; 259 260 key = 0; 261 value = s1; 262 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 263 264 value = s2; 265 xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST); 266 xclose(s2); 267 close_s1: 268 xclose(s1); 269 } 270 271 /* Exercise the code path where we destroy child sockets that never 272 * got accept()'ed, aka orphans, when parent socket gets closed. 273 */ 274 static void do_destroy_orphan_child(int family, int sotype, int mapfd) 275 { 276 struct sockaddr_storage addr; 277 socklen_t len; 278 int err, s, c; 279 u64 value; 280 u32 key; 281 282 s = socket_loopback(family, sotype); 283 if (s < 0) 284 return; 285 286 len = sizeof(addr); 287 err = xgetsockname(s, sockaddr(&addr), &len); 288 if (err) 289 goto close_srv; 290 291 key = 0; 292 value = s; 293 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 294 295 c = xsocket(family, sotype, 0); 296 if (c == -1) 297 goto close_srv; 298 299 xconnect(c, sockaddr(&addr), len); 300 xclose(c); 301 close_srv: 302 xclose(s); 303 } 304 305 static void test_destroy_orphan_child(struct test_sockmap_listen *skel, 306 int family, int sotype, int mapfd) 307 { 308 int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict); 309 int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict); 310 const struct test { 311 int progfd; 312 enum bpf_attach_type atype; 313 } tests[] = { 314 { -1, -1 }, 315 { msg_verdict, BPF_SK_MSG_VERDICT }, 316 { skb_verdict, BPF_SK_SKB_VERDICT }, 317 }; 318 const struct test *t; 319 320 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 321 if (t->progfd != -1 && 322 xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0) 323 return; 324 325 do_destroy_orphan_child(family, sotype, mapfd); 326 327 if (t->progfd != -1) 328 xbpf_prog_detach2(t->progfd, mapfd, t->atype); 329 } 330 } 331 332 /* Perform a passive open after removing listening socket from SOCKMAP 333 * to ensure that callbacks get restored properly. 334 */ 335 static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused, 336 int family, int sotype, int mapfd) 337 { 338 struct sockaddr_storage addr; 339 socklen_t len; 340 int err, s, c; 341 u64 value; 342 u32 key; 343 344 s = socket_loopback(family, sotype); 345 if (s < 0) 346 return; 347 348 len = sizeof(addr); 349 err = xgetsockname(s, sockaddr(&addr), &len); 350 if (err) 351 goto close_srv; 352 353 key = 0; 354 value = s; 355 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 356 xbpf_map_delete_elem(mapfd, &key); 357 358 c = xsocket(family, sotype, 0); 359 if (c < 0) 360 goto close_srv; 361 362 xconnect(c, sockaddr(&addr), len); 363 xclose(c); 364 close_srv: 365 xclose(s); 366 } 367 368 /* Check that child socket that got created while parent was in a 369 * SOCKMAP, but got accept()'ed only after the parent has been removed 370 * from SOCKMAP, gets cloned without parent psock state or callbacks. 371 */ 372 static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused, 373 int family, int sotype, int mapfd) 374 { 375 struct sockaddr_storage addr; 376 const u32 zero = 0; 377 int err, s, c, p; 378 socklen_t len; 379 u64 value; 380 381 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 382 if (s == -1) 383 return; 384 385 len = sizeof(addr); 386 err = xgetsockname(s, sockaddr(&addr), &len); 387 if (err) 388 goto close_srv; 389 390 value = s; 391 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 392 if (err) 393 goto close_srv; 394 395 c = xsocket(family, sotype, 0); 396 if (c == -1) 397 goto close_srv; 398 399 /* Create child while parent is in sockmap */ 400 err = xconnect(c, sockaddr(&addr), len); 401 if (err) 402 goto close_cli; 403 404 /* Remove parent from sockmap */ 405 err = xbpf_map_delete_elem(mapfd, &zero); 406 if (err) 407 goto close_cli; 408 409 p = xaccept_nonblock(s, NULL, NULL); 410 if (p == -1) 411 goto close_cli; 412 413 /* Check that child sk_user_data is not set */ 414 value = p; 415 xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 416 417 xclose(p); 418 close_cli: 419 xclose(c); 420 close_srv: 421 xclose(s); 422 } 423 424 /* Check that child socket that got created and accepted while parent 425 * was in a SOCKMAP is cloned without parent psock state or callbacks. 426 */ 427 static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused, 428 int family, int sotype, int mapfd) 429 { 430 struct sockaddr_storage addr; 431 const u32 zero = 0, one = 1; 432 int err, s, c, p; 433 socklen_t len; 434 u64 value; 435 436 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 437 if (s == -1) 438 return; 439 440 len = sizeof(addr); 441 err = xgetsockname(s, sockaddr(&addr), &len); 442 if (err) 443 goto close_srv; 444 445 value = s; 446 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 447 if (err) 448 goto close_srv; 449 450 c = xsocket(family, sotype, 0); 451 if (c == -1) 452 goto close_srv; 453 454 /* Create & accept child while parent is in sockmap */ 455 err = xconnect(c, sockaddr(&addr), len); 456 if (err) 457 goto close_cli; 458 459 p = xaccept_nonblock(s, NULL, NULL); 460 if (p == -1) 461 goto close_cli; 462 463 /* Check that child sk_user_data is not set */ 464 value = p; 465 xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST); 466 467 xclose(p); 468 close_cli: 469 xclose(c); 470 close_srv: 471 xclose(s); 472 } 473 474 struct connect_accept_ctx { 475 int sockfd; 476 unsigned int done; 477 unsigned int nr_iter; 478 }; 479 480 static bool is_thread_done(struct connect_accept_ctx *ctx) 481 { 482 return READ_ONCE(ctx->done); 483 } 484 485 static void *connect_accept_thread(void *arg) 486 { 487 struct connect_accept_ctx *ctx = arg; 488 struct sockaddr_storage addr; 489 int family, socktype; 490 socklen_t len; 491 int err, i, s; 492 493 s = ctx->sockfd; 494 495 len = sizeof(addr); 496 err = xgetsockname(s, sockaddr(&addr), &len); 497 if (err) 498 goto done; 499 500 len = sizeof(family); 501 err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len); 502 if (err) 503 goto done; 504 505 len = sizeof(socktype); 506 err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len); 507 if (err) 508 goto done; 509 510 for (i = 0; i < ctx->nr_iter; i++) { 511 int c, p; 512 513 c = xsocket(family, socktype, 0); 514 if (c < 0) 515 break; 516 517 err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr)); 518 if (err) { 519 xclose(c); 520 break; 521 } 522 523 p = xaccept_nonblock(s, NULL, NULL); 524 if (p < 0) { 525 xclose(c); 526 break; 527 } 528 529 xclose(p); 530 xclose(c); 531 } 532 done: 533 WRITE_ONCE(ctx->done, 1); 534 return NULL; 535 } 536 537 static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused, 538 int family, int sotype, int mapfd) 539 { 540 struct connect_accept_ctx ctx = { 0 }; 541 struct sockaddr_storage addr; 542 socklen_t len; 543 u32 zero = 0; 544 pthread_t t; 545 int err, s; 546 u64 value; 547 548 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 549 if (s < 0) 550 return; 551 552 len = sizeof(addr); 553 err = xgetsockname(s, sockaddr(&addr), &len); 554 if (err) 555 goto close; 556 557 ctx.sockfd = s; 558 ctx.nr_iter = 1000; 559 560 err = xpthread_create(&t, NULL, connect_accept_thread, &ctx); 561 if (err) 562 goto close; 563 564 value = s; 565 while (!is_thread_done(&ctx)) { 566 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 567 if (err) 568 break; 569 570 err = xbpf_map_delete_elem(mapfd, &zero); 571 if (err) 572 break; 573 } 574 575 xpthread_join(t, NULL); 576 close: 577 xclose(s); 578 } 579 580 static void *listen_thread(void *arg) 581 { 582 struct sockaddr unspec = { AF_UNSPEC }; 583 struct connect_accept_ctx *ctx = arg; 584 int err, i, s; 585 586 s = ctx->sockfd; 587 588 for (i = 0; i < ctx->nr_iter; i++) { 589 err = xlisten(s, 1); 590 if (err) 591 break; 592 err = xconnect(s, &unspec, sizeof(unspec)); 593 if (err) 594 break; 595 } 596 597 WRITE_ONCE(ctx->done, 1); 598 return NULL; 599 } 600 601 static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused, 602 int family, int socktype, int mapfd) 603 { 604 struct connect_accept_ctx ctx = { 0 }; 605 const u32 zero = 0; 606 const int one = 1; 607 pthread_t t; 608 int err, s; 609 u64 value; 610 611 s = xsocket(family, socktype, 0); 612 if (s < 0) 613 return; 614 615 err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); 616 if (err) 617 goto close; 618 619 ctx.sockfd = s; 620 ctx.nr_iter = 10000; 621 622 err = pthread_create(&t, NULL, listen_thread, &ctx); 623 if (err) 624 goto close; 625 626 value = s; 627 while (!is_thread_done(&ctx)) { 628 err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 629 /* Expecting EOPNOTSUPP before listen() */ 630 if (err && errno != EOPNOTSUPP) { 631 FAIL_ERRNO("map_update"); 632 break; 633 } 634 635 err = bpf_map_delete_elem(mapfd, &zero); 636 /* Expecting no entry after unhash on connect(AF_UNSPEC) */ 637 if (err && errno != EINVAL && errno != ENOENT) { 638 FAIL_ERRNO("map_delete"); 639 break; 640 } 641 } 642 643 xpthread_join(t, NULL); 644 close: 645 xclose(s); 646 } 647 648 static void zero_verdict_count(int mapfd) 649 { 650 unsigned int zero = 0; 651 int key; 652 653 key = SK_DROP; 654 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY); 655 key = SK_PASS; 656 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY); 657 } 658 659 enum redir_mode { 660 REDIR_INGRESS, 661 REDIR_EGRESS, 662 }; 663 664 static const char *redir_mode_str(enum redir_mode mode) 665 { 666 switch (mode) { 667 case REDIR_INGRESS: 668 return "ingress"; 669 case REDIR_EGRESS: 670 return "egress"; 671 default: 672 return "unknown"; 673 } 674 } 675 676 static void redir_to_connected(int family, int sotype, int sock_mapfd, 677 int verd_mapfd, enum redir_mode mode) 678 { 679 const char *log_prefix = redir_mode_str(mode); 680 int s, c0, c1, p0, p1; 681 unsigned int pass; 682 int err, n; 683 u32 key; 684 char b; 685 686 zero_verdict_count(verd_mapfd); 687 688 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 689 if (s < 0) 690 return; 691 692 err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1); 693 if (err) 694 goto close_srv; 695 696 err = add_to_sockmap(sock_mapfd, p0, p1); 697 if (err) 698 goto close; 699 700 n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1); 701 if (n < 0) 702 FAIL_ERRNO("%s: write", log_prefix); 703 if (n == 0) 704 FAIL("%s: incomplete write", log_prefix); 705 if (n < 1) 706 goto close; 707 708 key = SK_PASS; 709 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); 710 if (err) 711 goto close; 712 if (pass != 1) 713 FAIL("%s: want pass count 1, have %d", log_prefix, pass); 714 n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC); 715 if (n < 0) 716 FAIL_ERRNO("%s: recv_timeout", log_prefix); 717 if (n == 0) 718 FAIL("%s: incomplete recv", log_prefix); 719 720 close: 721 xclose(p1); 722 xclose(c1); 723 xclose(p0); 724 xclose(c0); 725 close_srv: 726 xclose(s); 727 } 728 729 static void test_skb_redir_to_connected(struct test_sockmap_listen *skel, 730 struct bpf_map *inner_map, int family, 731 int sotype) 732 { 733 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict); 734 int parser = bpf_program__fd(skel->progs.prog_stream_parser); 735 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 736 int sock_map = bpf_map__fd(inner_map); 737 int err; 738 739 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0); 740 if (err) 741 return; 742 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0); 743 if (err) 744 goto detach; 745 746 redir_to_connected(family, sotype, sock_map, verdict_map, 747 REDIR_INGRESS); 748 749 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT); 750 detach: 751 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER); 752 } 753 754 static void test_msg_redir_to_connected(struct test_sockmap_listen *skel, 755 struct bpf_map *inner_map, int family, 756 int sotype) 757 { 758 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict); 759 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 760 int sock_map = bpf_map__fd(inner_map); 761 int err; 762 763 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0); 764 if (err) 765 return; 766 767 redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS); 768 769 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT); 770 } 771 772 static void test_msg_redir_to_connected_with_link(struct test_sockmap_listen *skel, 773 struct bpf_map *inner_map, int family, 774 int sotype) 775 { 776 int prog_msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict); 777 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 778 int sock_map = bpf_map__fd(inner_map); 779 int link_fd; 780 781 link_fd = bpf_link_create(prog_msg_verdict, sock_map, BPF_SK_MSG_VERDICT, NULL); 782 if (!ASSERT_GE(link_fd, 0, "bpf_link_create")) 783 return; 784 785 redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS); 786 787 close(link_fd); 788 } 789 790 static void redir_to_listening(int family, int sotype, int sock_mapfd, 791 int verd_mapfd, enum redir_mode mode) 792 { 793 const char *log_prefix = redir_mode_str(mode); 794 struct sockaddr_storage addr; 795 int s, c, p, err, n; 796 unsigned int drop; 797 socklen_t len; 798 u32 key; 799 800 zero_verdict_count(verd_mapfd); 801 802 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 803 if (s < 0) 804 return; 805 806 len = sizeof(addr); 807 err = xgetsockname(s, sockaddr(&addr), &len); 808 if (err) 809 goto close_srv; 810 811 c = xsocket(family, sotype, 0); 812 if (c < 0) 813 goto close_srv; 814 err = xconnect(c, sockaddr(&addr), len); 815 if (err) 816 goto close_cli; 817 818 p = xaccept_nonblock(s, NULL, NULL); 819 if (p < 0) 820 goto close_cli; 821 822 err = add_to_sockmap(sock_mapfd, s, p); 823 if (err) 824 goto close_peer; 825 826 n = write(mode == REDIR_INGRESS ? c : p, "a", 1); 827 if (n < 0 && errno != EACCES) 828 FAIL_ERRNO("%s: write", log_prefix); 829 if (n == 0) 830 FAIL("%s: incomplete write", log_prefix); 831 if (n < 1) 832 goto close_peer; 833 834 key = SK_DROP; 835 err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop); 836 if (err) 837 goto close_peer; 838 if (drop != 1) 839 FAIL("%s: want drop count 1, have %d", log_prefix, drop); 840 841 close_peer: 842 xclose(p); 843 close_cli: 844 xclose(c); 845 close_srv: 846 xclose(s); 847 } 848 849 static void test_skb_redir_to_listening(struct test_sockmap_listen *skel, 850 struct bpf_map *inner_map, int family, 851 int sotype) 852 { 853 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict); 854 int parser = bpf_program__fd(skel->progs.prog_stream_parser); 855 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 856 int sock_map = bpf_map__fd(inner_map); 857 int err; 858 859 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0); 860 if (err) 861 return; 862 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0); 863 if (err) 864 goto detach; 865 866 redir_to_listening(family, sotype, sock_map, verdict_map, 867 REDIR_INGRESS); 868 869 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT); 870 detach: 871 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER); 872 } 873 874 static void test_msg_redir_to_listening(struct test_sockmap_listen *skel, 875 struct bpf_map *inner_map, int family, 876 int sotype) 877 { 878 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict); 879 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 880 int sock_map = bpf_map__fd(inner_map); 881 int err; 882 883 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0); 884 if (err) 885 return; 886 887 redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS); 888 889 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT); 890 } 891 892 static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *skel, 893 struct bpf_map *inner_map, int family, 894 int sotype) 895 { 896 struct bpf_program *verdict = skel->progs.prog_msg_verdict; 897 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 898 int sock_map = bpf_map__fd(inner_map); 899 struct bpf_link *link; 900 901 link = bpf_program__attach_sockmap(verdict, sock_map); 902 if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap")) 903 return; 904 905 redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS); 906 907 bpf_link__detach(link); 908 } 909 910 static void redir_partial(int family, int sotype, int sock_map, int parser_map) 911 { 912 int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1; 913 int err, n, key, value; 914 char buf[] = "abc"; 915 916 key = 0; 917 value = sizeof(buf) - 1; 918 err = xbpf_map_update_elem(parser_map, &key, &value, 0); 919 if (err) 920 return; 921 922 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 923 if (s < 0) 924 goto clean_parser_map; 925 926 err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1); 927 if (err) 928 goto close_srv; 929 930 err = add_to_sockmap(sock_map, p0, p1); 931 if (err) 932 goto close; 933 934 n = xsend(c1, buf, sizeof(buf), 0); 935 if (n < sizeof(buf)) 936 FAIL("incomplete write"); 937 938 n = xrecv_nonblock(c0, buf, sizeof(buf), 0); 939 if (n != sizeof(buf) - 1) 940 FAIL("expect %zu, received %d", sizeof(buf) - 1, n); 941 942 close: 943 xclose(c0); 944 xclose(p0); 945 xclose(c1); 946 xclose(p1); 947 close_srv: 948 xclose(s); 949 950 clean_parser_map: 951 key = 0; 952 value = 0; 953 xbpf_map_update_elem(parser_map, &key, &value, 0); 954 } 955 956 static void test_skb_redir_partial(struct test_sockmap_listen *skel, 957 struct bpf_map *inner_map, int family, 958 int sotype) 959 { 960 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict); 961 int parser = bpf_program__fd(skel->progs.prog_stream_parser); 962 int parser_map = bpf_map__fd(skel->maps.parser_map); 963 int sock_map = bpf_map__fd(inner_map); 964 int err; 965 966 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0); 967 if (err) 968 return; 969 970 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0); 971 if (err) 972 goto detach; 973 974 redir_partial(family, sotype, sock_map, parser_map); 975 976 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT); 977 detach: 978 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER); 979 } 980 981 static void test_reuseport_select_listening(int family, int sotype, 982 int sock_map, int verd_map, 983 int reuseport_prog) 984 { 985 struct sockaddr_storage addr; 986 unsigned int pass; 987 int s, c, err; 988 socklen_t len; 989 u64 value; 990 u32 key; 991 992 zero_verdict_count(verd_map); 993 994 s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK, 995 reuseport_prog); 996 if (s < 0) 997 return; 998 999 len = sizeof(addr); 1000 err = xgetsockname(s, sockaddr(&addr), &len); 1001 if (err) 1002 goto close_srv; 1003 1004 key = 0; 1005 value = s; 1006 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); 1007 if (err) 1008 goto close_srv; 1009 1010 c = xsocket(family, sotype, 0); 1011 if (c < 0) 1012 goto close_srv; 1013 err = xconnect(c, sockaddr(&addr), len); 1014 if (err) 1015 goto close_cli; 1016 1017 if (sotype == SOCK_STREAM) { 1018 int p; 1019 1020 p = xaccept_nonblock(s, NULL, NULL); 1021 if (p < 0) 1022 goto close_cli; 1023 xclose(p); 1024 } else { 1025 char b = 'a'; 1026 ssize_t n; 1027 1028 n = xsend(c, &b, sizeof(b), 0); 1029 if (n == -1) 1030 goto close_cli; 1031 1032 n = xrecv_nonblock(s, &b, sizeof(b), 0); 1033 if (n == -1) 1034 goto close_cli; 1035 } 1036 1037 key = SK_PASS; 1038 err = xbpf_map_lookup_elem(verd_map, &key, &pass); 1039 if (err) 1040 goto close_cli; 1041 if (pass != 1) 1042 FAIL("want pass count 1, have %d", pass); 1043 1044 close_cli: 1045 xclose(c); 1046 close_srv: 1047 xclose(s); 1048 } 1049 1050 static void test_reuseport_select_connected(int family, int sotype, 1051 int sock_map, int verd_map, 1052 int reuseport_prog) 1053 { 1054 struct sockaddr_storage addr; 1055 int s, c0, c1, p0, err; 1056 unsigned int drop; 1057 socklen_t len; 1058 u64 value; 1059 u32 key; 1060 1061 zero_verdict_count(verd_map); 1062 1063 s = socket_loopback_reuseport(family, sotype, reuseport_prog); 1064 if (s < 0) 1065 return; 1066 1067 /* Populate sock_map[0] to avoid ENOENT on first connection */ 1068 key = 0; 1069 value = s; 1070 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); 1071 if (err) 1072 goto close_srv; 1073 1074 len = sizeof(addr); 1075 err = xgetsockname(s, sockaddr(&addr), &len); 1076 if (err) 1077 goto close_srv; 1078 1079 c0 = xsocket(family, sotype, 0); 1080 if (c0 < 0) 1081 goto close_srv; 1082 1083 err = xconnect(c0, sockaddr(&addr), len); 1084 if (err) 1085 goto close_cli0; 1086 1087 if (sotype == SOCK_STREAM) { 1088 p0 = xaccept_nonblock(s, NULL, NULL); 1089 if (p0 < 0) 1090 goto close_cli0; 1091 } else { 1092 p0 = xsocket(family, sotype, 0); 1093 if (p0 < 0) 1094 goto close_cli0; 1095 1096 len = sizeof(addr); 1097 err = xgetsockname(c0, sockaddr(&addr), &len); 1098 if (err) 1099 goto close_cli0; 1100 1101 err = xconnect(p0, sockaddr(&addr), len); 1102 if (err) 1103 goto close_cli0; 1104 } 1105 1106 /* Update sock_map[0] to redirect to a connected socket */ 1107 key = 0; 1108 value = p0; 1109 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST); 1110 if (err) 1111 goto close_peer0; 1112 1113 c1 = xsocket(family, sotype, 0); 1114 if (c1 < 0) 1115 goto close_peer0; 1116 1117 len = sizeof(addr); 1118 err = xgetsockname(s, sockaddr(&addr), &len); 1119 if (err) 1120 goto close_srv; 1121 1122 errno = 0; 1123 err = connect(c1, sockaddr(&addr), len); 1124 if (sotype == SOCK_DGRAM) { 1125 char b = 'a'; 1126 ssize_t n; 1127 1128 n = xsend(c1, &b, sizeof(b), 0); 1129 if (n == -1) 1130 goto close_cli1; 1131 1132 n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC); 1133 err = n == -1; 1134 } 1135 if (!err || errno != ECONNREFUSED) 1136 FAIL_ERRNO("connect: expected ECONNREFUSED"); 1137 1138 key = SK_DROP; 1139 err = xbpf_map_lookup_elem(verd_map, &key, &drop); 1140 if (err) 1141 goto close_cli1; 1142 if (drop != 1) 1143 FAIL("want drop count 1, have %d", drop); 1144 1145 close_cli1: 1146 xclose(c1); 1147 close_peer0: 1148 xclose(p0); 1149 close_cli0: 1150 xclose(c0); 1151 close_srv: 1152 xclose(s); 1153 } 1154 1155 /* Check that redirecting across reuseport groups is not allowed. */ 1156 static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, 1157 int verd_map, int reuseport_prog) 1158 { 1159 struct sockaddr_storage addr; 1160 int s1, s2, c, err; 1161 unsigned int drop; 1162 socklen_t len; 1163 u32 key; 1164 1165 zero_verdict_count(verd_map); 1166 1167 /* Create two listeners, each in its own reuseport group */ 1168 s1 = socket_loopback_reuseport(family, sotype, reuseport_prog); 1169 if (s1 < 0) 1170 return; 1171 1172 s2 = socket_loopback_reuseport(family, sotype, reuseport_prog); 1173 if (s2 < 0) 1174 goto close_srv1; 1175 1176 err = add_to_sockmap(sock_map, s1, s2); 1177 if (err) 1178 goto close_srv2; 1179 1180 /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */ 1181 len = sizeof(addr); 1182 err = xgetsockname(s2, sockaddr(&addr), &len); 1183 if (err) 1184 goto close_srv2; 1185 1186 c = xsocket(family, sotype, 0); 1187 if (c < 0) 1188 goto close_srv2; 1189 1190 err = connect(c, sockaddr(&addr), len); 1191 if (sotype == SOCK_DGRAM) { 1192 char b = 'a'; 1193 ssize_t n; 1194 1195 n = xsend(c, &b, sizeof(b), 0); 1196 if (n == -1) 1197 goto close_cli; 1198 1199 n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC); 1200 err = n == -1; 1201 } 1202 if (!err || errno != ECONNREFUSED) { 1203 FAIL_ERRNO("connect: expected ECONNREFUSED"); 1204 goto close_cli; 1205 } 1206 1207 /* Expect drop, can't redirect outside of reuseport group */ 1208 key = SK_DROP; 1209 err = xbpf_map_lookup_elem(verd_map, &key, &drop); 1210 if (err) 1211 goto close_cli; 1212 if (drop != 1) 1213 FAIL("want drop count 1, have %d", drop); 1214 1215 close_cli: 1216 xclose(c); 1217 close_srv2: 1218 xclose(s2); 1219 close_srv1: 1220 xclose(s1); 1221 } 1222 1223 #define TEST(fn, ...) \ 1224 { \ 1225 fn, #fn, __VA_ARGS__ \ 1226 } 1227 1228 static void test_ops_cleanup(const struct bpf_map *map) 1229 { 1230 int err, mapfd; 1231 u32 key; 1232 1233 mapfd = bpf_map__fd(map); 1234 1235 for (key = 0; key < bpf_map__max_entries(map); key++) { 1236 err = bpf_map_delete_elem(mapfd, &key); 1237 if (err && errno != EINVAL && errno != ENOENT) 1238 FAIL_ERRNO("map_delete: expected EINVAL/ENOENT"); 1239 } 1240 } 1241 1242 static const char *family_str(sa_family_t family) 1243 { 1244 switch (family) { 1245 case AF_INET: 1246 return "IPv4"; 1247 case AF_INET6: 1248 return "IPv6"; 1249 case AF_UNIX: 1250 return "Unix"; 1251 case AF_VSOCK: 1252 return "VSOCK"; 1253 default: 1254 return "unknown"; 1255 } 1256 } 1257 1258 static const char *map_type_str(const struct bpf_map *map) 1259 { 1260 int type; 1261 1262 if (!map) 1263 return "invalid"; 1264 type = bpf_map__type(map); 1265 1266 switch (type) { 1267 case BPF_MAP_TYPE_SOCKMAP: 1268 return "sockmap"; 1269 case BPF_MAP_TYPE_SOCKHASH: 1270 return "sockhash"; 1271 default: 1272 return "unknown"; 1273 } 1274 } 1275 1276 static const char *sotype_str(int sotype) 1277 { 1278 switch (sotype) { 1279 case SOCK_DGRAM: 1280 return "UDP"; 1281 case SOCK_STREAM: 1282 return "TCP"; 1283 default: 1284 return "unknown"; 1285 } 1286 } 1287 1288 static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, 1289 int family, int sotype) 1290 { 1291 const struct op_test { 1292 void (*fn)(struct test_sockmap_listen *skel, 1293 int family, int sotype, int mapfd); 1294 const char *name; 1295 int sotype; 1296 } tests[] = { 1297 /* insert */ 1298 TEST(test_insert_invalid), 1299 TEST(test_insert_opened), 1300 TEST(test_insert_bound, SOCK_STREAM), 1301 TEST(test_insert), 1302 /* delete */ 1303 TEST(test_delete_after_insert), 1304 TEST(test_delete_after_close), 1305 /* lookup */ 1306 TEST(test_lookup_after_insert), 1307 TEST(test_lookup_after_delete), 1308 TEST(test_lookup_32_bit_value), 1309 /* update */ 1310 TEST(test_update_existing), 1311 /* races with insert/delete */ 1312 TEST(test_destroy_orphan_child, SOCK_STREAM), 1313 TEST(test_syn_recv_insert_delete, SOCK_STREAM), 1314 TEST(test_race_insert_listen, SOCK_STREAM), 1315 /* child clone */ 1316 TEST(test_clone_after_delete, SOCK_STREAM), 1317 TEST(test_accept_after_delete, SOCK_STREAM), 1318 TEST(test_accept_before_delete, SOCK_STREAM), 1319 }; 1320 const char *family_name, *map_name, *sotype_name; 1321 const struct op_test *t; 1322 char s[MAX_TEST_NAME]; 1323 int map_fd; 1324 1325 family_name = family_str(family); 1326 map_name = map_type_str(map); 1327 sotype_name = sotype_str(sotype); 1328 map_fd = bpf_map__fd(map); 1329 1330 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1331 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, 1332 sotype_name, t->name); 1333 1334 if (t->sotype != 0 && t->sotype != sotype) 1335 continue; 1336 1337 if (!test__start_subtest(s)) 1338 continue; 1339 1340 t->fn(skel, family, sotype, map_fd); 1341 test_ops_cleanup(map); 1342 } 1343 } 1344 1345 static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, 1346 int family, int sotype) 1347 { 1348 const struct redir_test { 1349 void (*fn)(struct test_sockmap_listen *skel, 1350 struct bpf_map *map, int family, int sotype); 1351 const char *name; 1352 } tests[] = { 1353 TEST(test_skb_redir_to_connected), 1354 TEST(test_skb_redir_to_listening), 1355 TEST(test_skb_redir_partial), 1356 TEST(test_msg_redir_to_connected), 1357 TEST(test_msg_redir_to_connected_with_link), 1358 TEST(test_msg_redir_to_listening), 1359 TEST(test_msg_redir_to_listening_with_link), 1360 }; 1361 const char *family_name, *map_name; 1362 const struct redir_test *t; 1363 char s[MAX_TEST_NAME]; 1364 1365 family_name = family_str(family); 1366 map_name = map_type_str(map); 1367 1368 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1369 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, 1370 t->name); 1371 1372 if (!test__start_subtest(s)) 1373 continue; 1374 1375 t->fn(skel, map, family, sotype); 1376 } 1377 } 1378 1379 static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, 1380 int sock_mapfd, int nop_mapfd, 1381 int verd_mapfd, enum redir_mode mode, 1382 int send_flags) 1383 { 1384 const char *log_prefix = redir_mode_str(mode); 1385 unsigned int pass; 1386 int err, n; 1387 u32 key; 1388 char b; 1389 1390 zero_verdict_count(verd_mapfd); 1391 1392 err = add_to_sockmap(sock_mapfd, peer0, peer1); 1393 if (err) 1394 return; 1395 1396 if (nop_mapfd >= 0) { 1397 err = add_to_sockmap(nop_mapfd, cli0, cli1); 1398 if (err) 1399 return; 1400 } 1401 1402 /* Last byte is OOB data when send_flags has MSG_OOB bit set */ 1403 n = xsend(cli1, "ab", 2, send_flags); 1404 if (n >= 0 && n < 2) 1405 FAIL("%s: incomplete send", log_prefix); 1406 if (n < 2) 1407 return; 1408 1409 key = SK_PASS; 1410 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); 1411 if (err) 1412 return; 1413 if (pass != 1) 1414 FAIL("%s: want pass count 1, have %d", log_prefix, pass); 1415 1416 n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC); 1417 if (n < 0) 1418 FAIL_ERRNO("%s: recv_timeout", log_prefix); 1419 if (n == 0) 1420 FAIL("%s: incomplete recv", log_prefix); 1421 1422 if (send_flags & MSG_OOB) { 1423 /* Check that we can't read OOB while in sockmap */ 1424 errno = 0; 1425 n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT); 1426 if (n != -1 || errno != EOPNOTSUPP) 1427 FAIL("%s: recv(MSG_OOB): expected EOPNOTSUPP: retval=%d errno=%d", 1428 log_prefix, n, errno); 1429 1430 /* Remove peer1 from sockmap */ 1431 xbpf_map_delete_elem(sock_mapfd, &(int){ 1 }); 1432 1433 /* Check that OOB was dropped on redirect */ 1434 errno = 0; 1435 n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT); 1436 if (n != -1 || errno != EINVAL) 1437 FAIL("%s: recv(MSG_OOB): expected EINVAL: retval=%d errno=%d", 1438 log_prefix, n, errno); 1439 } 1440 } 1441 1442 static void unix_redir_to_connected(int sotype, int sock_mapfd, 1443 int verd_mapfd, enum redir_mode mode) 1444 { 1445 int c0, c1, p0, p1; 1446 int sfd[2]; 1447 1448 if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) 1449 return; 1450 c0 = sfd[0], p0 = sfd[1]; 1451 1452 if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) 1453 goto close0; 1454 c1 = sfd[0], p1 = sfd[1]; 1455 1456 pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, 1457 mode, NO_FLAGS); 1458 1459 xclose(c1); 1460 xclose(p1); 1461 close0: 1462 xclose(c0); 1463 xclose(p0); 1464 } 1465 1466 static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel, 1467 struct bpf_map *inner_map, int sotype) 1468 { 1469 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); 1470 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1471 int sock_map = bpf_map__fd(inner_map); 1472 int err; 1473 1474 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); 1475 if (err) 1476 return; 1477 1478 skel->bss->test_ingress = false; 1479 unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS); 1480 skel->bss->test_ingress = true; 1481 unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS); 1482 1483 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); 1484 } 1485 1486 static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, 1487 int sotype) 1488 { 1489 const char *family_name, *map_name; 1490 char s[MAX_TEST_NAME]; 1491 1492 family_name = family_str(AF_UNIX); 1493 map_name = map_type_str(map); 1494 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); 1495 if (!test__start_subtest(s)) 1496 return; 1497 unix_skb_redir_to_connected(skel, map, sotype); 1498 } 1499 1500 /* Returns two connected loopback vsock sockets */ 1501 static int vsock_socketpair_connectible(int sotype, int *v0, int *v1) 1502 { 1503 struct sockaddr_storage addr; 1504 socklen_t len = sizeof(addr); 1505 int s, p, c; 1506 1507 s = socket_loopback(AF_VSOCK, sotype); 1508 if (s < 0) 1509 return -1; 1510 1511 c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0); 1512 if (c == -1) 1513 goto close_srv; 1514 1515 if (getsockname(s, sockaddr(&addr), &len) < 0) 1516 goto close_cli; 1517 1518 if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) { 1519 FAIL_ERRNO("connect"); 1520 goto close_cli; 1521 } 1522 1523 len = sizeof(addr); 1524 p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC); 1525 if (p < 0) 1526 goto close_cli; 1527 1528 if (poll_connect(c, IO_TIMEOUT_SEC) < 0) { 1529 FAIL_ERRNO("poll_connect"); 1530 goto close_acc; 1531 } 1532 1533 *v0 = p; 1534 *v1 = c; 1535 1536 return 0; 1537 1538 close_acc: 1539 close(p); 1540 close_cli: 1541 close(c); 1542 close_srv: 1543 close(s); 1544 1545 return -1; 1546 } 1547 1548 static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd, 1549 enum redir_mode mode, int sotype) 1550 { 1551 const char *log_prefix = redir_mode_str(mode); 1552 char a = 'a', b = 'b'; 1553 int u0, u1, v0, v1; 1554 int sfd[2]; 1555 unsigned int pass; 1556 int err, n; 1557 u32 key; 1558 1559 zero_verdict_count(verd_mapfd); 1560 1561 if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd)) 1562 return; 1563 1564 u0 = sfd[0]; 1565 u1 = sfd[1]; 1566 1567 err = vsock_socketpair_connectible(sotype, &v0, &v1); 1568 if (err) { 1569 FAIL("vsock_socketpair_connectible() failed"); 1570 goto close_uds; 1571 } 1572 1573 err = add_to_sockmap(sock_mapfd, u0, v0); 1574 if (err) { 1575 FAIL("add_to_sockmap failed"); 1576 goto close_vsock; 1577 } 1578 1579 n = write(v1, &a, sizeof(a)); 1580 if (n < 0) 1581 FAIL_ERRNO("%s: write", log_prefix); 1582 if (n == 0) 1583 FAIL("%s: incomplete write", log_prefix); 1584 if (n < 1) 1585 goto out; 1586 1587 n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0); 1588 if (n < 0) 1589 FAIL("%s: recv() err, errno=%d", log_prefix, errno); 1590 if (n == 0) 1591 FAIL("%s: incomplete recv", log_prefix); 1592 if (b != a) 1593 FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b); 1594 1595 key = SK_PASS; 1596 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); 1597 if (err) 1598 goto out; 1599 if (pass != 1) 1600 FAIL("%s: want pass count 1, have %d", log_prefix, pass); 1601 out: 1602 key = 0; 1603 bpf_map_delete_elem(sock_mapfd, &key); 1604 key = 1; 1605 bpf_map_delete_elem(sock_mapfd, &key); 1606 1607 close_vsock: 1608 close(v0); 1609 close(v1); 1610 1611 close_uds: 1612 close(u0); 1613 close(u1); 1614 } 1615 1616 static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel, 1617 struct bpf_map *inner_map, 1618 int sotype) 1619 { 1620 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); 1621 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1622 int sock_map = bpf_map__fd(inner_map); 1623 int err; 1624 1625 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); 1626 if (err) 1627 return; 1628 1629 skel->bss->test_ingress = false; 1630 vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype); 1631 skel->bss->test_ingress = true; 1632 vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype); 1633 1634 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); 1635 } 1636 1637 static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map) 1638 { 1639 const char *family_name, *map_name; 1640 char s[MAX_TEST_NAME]; 1641 1642 family_name = family_str(AF_VSOCK); 1643 map_name = map_type_str(map); 1644 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); 1645 if (!test__start_subtest(s)) 1646 return; 1647 1648 vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM); 1649 vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET); 1650 } 1651 1652 static void test_reuseport(struct test_sockmap_listen *skel, 1653 struct bpf_map *map, int family, int sotype) 1654 { 1655 const struct reuseport_test { 1656 void (*fn)(int family, int sotype, int socket_map, 1657 int verdict_map, int reuseport_prog); 1658 const char *name; 1659 int sotype; 1660 } tests[] = { 1661 TEST(test_reuseport_select_listening), 1662 TEST(test_reuseport_select_connected), 1663 TEST(test_reuseport_mixed_groups), 1664 }; 1665 int socket_map, verdict_map, reuseport_prog; 1666 const char *family_name, *map_name, *sotype_name; 1667 const struct reuseport_test *t; 1668 char s[MAX_TEST_NAME]; 1669 1670 family_name = family_str(family); 1671 map_name = map_type_str(map); 1672 sotype_name = sotype_str(sotype); 1673 1674 socket_map = bpf_map__fd(map); 1675 verdict_map = bpf_map__fd(skel->maps.verdict_map); 1676 reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport); 1677 1678 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1679 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, 1680 sotype_name, t->name); 1681 1682 if (t->sotype != 0 && t->sotype != sotype) 1683 continue; 1684 1685 if (!test__start_subtest(s)) 1686 continue; 1687 1688 t->fn(family, sotype, socket_map, verdict_map, reuseport_prog); 1689 } 1690 } 1691 1692 static int inet_socketpair(int family, int type, int *s, int *c) 1693 { 1694 struct sockaddr_storage addr; 1695 socklen_t len; 1696 int p0, c0; 1697 int err; 1698 1699 p0 = socket_loopback(family, type | SOCK_NONBLOCK); 1700 if (p0 < 0) 1701 return p0; 1702 1703 len = sizeof(addr); 1704 err = xgetsockname(p0, sockaddr(&addr), &len); 1705 if (err) 1706 goto close_peer0; 1707 1708 c0 = xsocket(family, type | SOCK_NONBLOCK, 0); 1709 if (c0 < 0) { 1710 err = c0; 1711 goto close_peer0; 1712 } 1713 err = xconnect(c0, sockaddr(&addr), len); 1714 if (err) 1715 goto close_cli0; 1716 err = xgetsockname(c0, sockaddr(&addr), &len); 1717 if (err) 1718 goto close_cli0; 1719 err = xconnect(p0, sockaddr(&addr), len); 1720 if (err) 1721 goto close_cli0; 1722 1723 *s = p0; 1724 *c = c0; 1725 return 0; 1726 1727 close_cli0: 1728 xclose(c0); 1729 close_peer0: 1730 xclose(p0); 1731 return err; 1732 } 1733 1734 static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, 1735 enum redir_mode mode) 1736 { 1737 int c0, c1, p0, p1; 1738 int err; 1739 1740 err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); 1741 if (err) 1742 return; 1743 err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1); 1744 if (err) 1745 goto close_cli0; 1746 1747 pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, 1748 mode, NO_FLAGS); 1749 1750 xclose(c1); 1751 xclose(p1); 1752 close_cli0: 1753 xclose(c0); 1754 xclose(p0); 1755 } 1756 1757 static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel, 1758 struct bpf_map *inner_map, int family) 1759 { 1760 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); 1761 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1762 int sock_map = bpf_map__fd(inner_map); 1763 int err; 1764 1765 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); 1766 if (err) 1767 return; 1768 1769 skel->bss->test_ingress = false; 1770 udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS); 1771 skel->bss->test_ingress = true; 1772 udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS); 1773 1774 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); 1775 } 1776 1777 static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map, 1778 int family) 1779 { 1780 const char *family_name, *map_name; 1781 char s[MAX_TEST_NAME]; 1782 1783 family_name = family_str(family); 1784 map_name = map_type_str(map); 1785 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); 1786 if (!test__start_subtest(s)) 1787 return; 1788 udp_skb_redir_to_connected(skel, map, family); 1789 } 1790 1791 static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, 1792 int verd_mapfd, enum redir_mode mode) 1793 { 1794 int c0, c1, p0, p1; 1795 int sfd[2]; 1796 int err; 1797 1798 if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd)) 1799 return; 1800 c0 = sfd[0], p0 = sfd[1]; 1801 1802 err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1); 1803 if (err) 1804 goto close; 1805 1806 pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, 1807 mode, NO_FLAGS); 1808 1809 xclose(c1); 1810 xclose(p1); 1811 close: 1812 xclose(c0); 1813 xclose(p0); 1814 } 1815 1816 static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, 1817 struct bpf_map *inner_map, int family) 1818 { 1819 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); 1820 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1821 int sock_map = bpf_map__fd(inner_map); 1822 int err; 1823 1824 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); 1825 if (err) 1826 return; 1827 1828 skel->bss->test_ingress = false; 1829 inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, 1830 REDIR_EGRESS); 1831 inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, 1832 REDIR_EGRESS); 1833 skel->bss->test_ingress = true; 1834 inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, 1835 REDIR_INGRESS); 1836 inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, 1837 REDIR_INGRESS); 1838 1839 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); 1840 } 1841 1842 static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, 1843 int nop_mapfd, int verd_mapfd, 1844 enum redir_mode mode, int send_flags) 1845 { 1846 int c0, c1, p0, p1; 1847 int sfd[2]; 1848 int err; 1849 1850 err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); 1851 if (err) 1852 return; 1853 1854 if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd)) 1855 goto close_cli0; 1856 c1 = sfd[0], p1 = sfd[1]; 1857 1858 pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, nop_mapfd, 1859 verd_mapfd, mode, send_flags); 1860 1861 xclose(c1); 1862 xclose(p1); 1863 close_cli0: 1864 xclose(c0); 1865 xclose(p0); 1866 } 1867 1868 static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, 1869 struct bpf_map *inner_map, int family) 1870 { 1871 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); 1872 int nop_map = bpf_map__fd(skel->maps.nop_map); 1873 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1874 int sock_map = bpf_map__fd(inner_map); 1875 int err; 1876 1877 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); 1878 if (err) 1879 return; 1880 1881 skel->bss->test_ingress = false; 1882 unix_inet_redir_to_connected(family, SOCK_DGRAM, 1883 sock_map, -1, verdict_map, 1884 REDIR_EGRESS, NO_FLAGS); 1885 unix_inet_redir_to_connected(family, SOCK_DGRAM, 1886 sock_map, -1, verdict_map, 1887 REDIR_EGRESS, NO_FLAGS); 1888 1889 unix_inet_redir_to_connected(family, SOCK_DGRAM, 1890 sock_map, nop_map, verdict_map, 1891 REDIR_EGRESS, NO_FLAGS); 1892 unix_inet_redir_to_connected(family, SOCK_STREAM, 1893 sock_map, nop_map, verdict_map, 1894 REDIR_EGRESS, NO_FLAGS); 1895 1896 /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */ 1897 unix_inet_redir_to_connected(family, SOCK_STREAM, 1898 sock_map, nop_map, verdict_map, 1899 REDIR_EGRESS, MSG_OOB); 1900 1901 skel->bss->test_ingress = true; 1902 unix_inet_redir_to_connected(family, SOCK_DGRAM, 1903 sock_map, -1, verdict_map, 1904 REDIR_INGRESS, NO_FLAGS); 1905 unix_inet_redir_to_connected(family, SOCK_STREAM, 1906 sock_map, -1, verdict_map, 1907 REDIR_INGRESS, NO_FLAGS); 1908 1909 unix_inet_redir_to_connected(family, SOCK_DGRAM, 1910 sock_map, nop_map, verdict_map, 1911 REDIR_INGRESS, NO_FLAGS); 1912 unix_inet_redir_to_connected(family, SOCK_STREAM, 1913 sock_map, nop_map, verdict_map, 1914 REDIR_INGRESS, NO_FLAGS); 1915 1916 /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */ 1917 unix_inet_redir_to_connected(family, SOCK_STREAM, 1918 sock_map, nop_map, verdict_map, 1919 REDIR_INGRESS, MSG_OOB); 1920 1921 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); 1922 } 1923 1924 static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, 1925 int family) 1926 { 1927 const char *family_name, *map_name; 1928 char s[MAX_TEST_NAME]; 1929 1930 family_name = family_str(family); 1931 map_name = map_type_str(map); 1932 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); 1933 if (!test__start_subtest(s)) 1934 return; 1935 inet_unix_skb_redir_to_connected(skel, map, family); 1936 unix_inet_skb_redir_to_connected(skel, map, family); 1937 } 1938 1939 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, 1940 int family) 1941 { 1942 test_ops(skel, map, family, SOCK_STREAM); 1943 test_ops(skel, map, family, SOCK_DGRAM); 1944 test_redir(skel, map, family, SOCK_STREAM); 1945 test_reuseport(skel, map, family, SOCK_STREAM); 1946 test_reuseport(skel, map, family, SOCK_DGRAM); 1947 test_udp_redir(skel, map, family); 1948 test_udp_unix_redir(skel, map, family); 1949 } 1950 1951 void serial_test_sockmap_listen(void) 1952 { 1953 struct test_sockmap_listen *skel; 1954 1955 skel = test_sockmap_listen__open_and_load(); 1956 if (!skel) { 1957 FAIL("skeleton open/load failed"); 1958 return; 1959 } 1960 1961 skel->bss->test_sockmap = true; 1962 run_tests(skel, skel->maps.sock_map, AF_INET); 1963 run_tests(skel, skel->maps.sock_map, AF_INET6); 1964 test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM); 1965 test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM); 1966 test_vsock_redir(skel, skel->maps.sock_map); 1967 1968 skel->bss->test_sockmap = false; 1969 run_tests(skel, skel->maps.sock_hash, AF_INET); 1970 run_tests(skel, skel->maps.sock_hash, AF_INET6); 1971 test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM); 1972 test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM); 1973 test_vsock_redir(skel, skel->maps.sock_hash); 1974 1975 test_sockmap_listen__destroy(skel); 1976 } 1977
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.