1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <stddef.h> 5 #include <errno.h> 6 #include <stdbool.h> 7 #include <sys/types.h> 8 #include <sys/socket.h> 9 #include <linux/tcp.h> 10 #include <linux/socket.h> 11 #include <linux/bpf.h> 12 #include <linux/types.h> 13 #include <bpf/bpf_helpers.h> 14 #include <bpf/bpf_endian.h> 15 #define BPF_PROG_TEST_TCP_HDR_OPTIONS 16 #include "test_tcp_hdr_options.h" 17 18 #ifndef sizeof_field 19 #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) 20 #endif 21 22 __u8 test_kind = TCPOPT_EXP; 23 __u16 test_magic = 0xeB9F; 24 __u32 inherit_cb_flags = 0; 25 26 struct bpf_test_option passive_synack_out = {}; 27 struct bpf_test_option passive_fin_out = {}; 28 29 struct bpf_test_option passive_estab_in = {}; 30 struct bpf_test_option passive_fin_in = {}; 31 32 struct bpf_test_option active_syn_out = {}; 33 struct bpf_test_option active_fin_out = {}; 34 35 struct bpf_test_option active_estab_in = {}; 36 struct bpf_test_option active_fin_in = {}; 37 38 struct { 39 __uint(type, BPF_MAP_TYPE_SK_STORAGE); 40 __uint(map_flags, BPF_F_NO_PREALLOC); 41 __type(key, int); 42 __type(value, struct hdr_stg); 43 } hdr_stg_map SEC(".maps"); 44 45 static bool skops_want_cookie(const struct bpf_sock_ops *skops) 46 { 47 return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE; 48 } 49 50 static bool skops_current_mss(const struct bpf_sock_ops *skops) 51 { 52 return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS; 53 } 54 55 static __u8 option_total_len(__u8 flags) 56 { 57 __u8 i, len = 1; /* +1 for flags */ 58 59 if (!flags) 60 return 0; 61 62 /* RESEND bit does not use a byte */ 63 for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++) 64 len += !!TEST_OPTION_FLAGS(flags, i); 65 66 if (test_kind == TCPOPT_EXP) 67 return len + TCP_BPF_EXPOPT_BASE_LEN; 68 else 69 return len + 2; /* +1 kind, +1 kind-len */ 70 } 71 72 static void write_test_option(const struct bpf_test_option *test_opt, 73 __u8 *data) 74 { 75 __u8 offset = 0; 76 77 data[offset++] = test_opt->flags; 78 if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS)) 79 data[offset++] = test_opt->max_delack_ms; 80 81 if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND)) 82 data[offset++] = test_opt->rand; 83 } 84 85 static int store_option(struct bpf_sock_ops *skops, 86 const struct bpf_test_option *test_opt) 87 { 88 union { 89 struct tcp_exprm_opt exprm; 90 struct tcp_opt regular; 91 } write_opt; 92 int err; 93 94 if (test_kind == TCPOPT_EXP) { 95 write_opt.exprm.kind = TCPOPT_EXP; 96 write_opt.exprm.len = option_total_len(test_opt->flags); 97 write_opt.exprm.magic = __bpf_htons(test_magic); 98 write_opt.exprm.data32 = 0; 99 write_test_option(test_opt, write_opt.exprm.data); 100 err = bpf_store_hdr_opt(skops, &write_opt.exprm, 101 sizeof(write_opt.exprm), 0); 102 } else { 103 write_opt.regular.kind = test_kind; 104 write_opt.regular.len = option_total_len(test_opt->flags); 105 write_opt.regular.data32 = 0; 106 write_test_option(test_opt, write_opt.regular.data); 107 err = bpf_store_hdr_opt(skops, &write_opt.regular, 108 sizeof(write_opt.regular), 0); 109 } 110 111 if (err) 112 RET_CG_ERR(err); 113 114 return CG_OK; 115 } 116 117 static int parse_test_option(struct bpf_test_option *opt, const __u8 *start) 118 { 119 opt->flags = *start++; 120 121 if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS)) 122 opt->max_delack_ms = *start++; 123 124 if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND)) 125 opt->rand = *start++; 126 127 return 0; 128 } 129 130 static int load_option(struct bpf_sock_ops *skops, 131 struct bpf_test_option *test_opt, bool from_syn) 132 { 133 union { 134 struct tcp_exprm_opt exprm; 135 struct tcp_opt regular; 136 } search_opt; 137 int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0; 138 139 if (test_kind == TCPOPT_EXP) { 140 search_opt.exprm.kind = TCPOPT_EXP; 141 search_opt.exprm.len = 4; 142 search_opt.exprm.magic = __bpf_htons(test_magic); 143 search_opt.exprm.data32 = 0; 144 ret = bpf_load_hdr_opt(skops, &search_opt.exprm, 145 sizeof(search_opt.exprm), load_flags); 146 if (ret < 0) 147 return ret; 148 return parse_test_option(test_opt, search_opt.exprm.data); 149 } else { 150 search_opt.regular.kind = test_kind; 151 search_opt.regular.len = 0; 152 search_opt.regular.data32 = 0; 153 ret = bpf_load_hdr_opt(skops, &search_opt.regular, 154 sizeof(search_opt.regular), load_flags); 155 if (ret < 0) 156 return ret; 157 return parse_test_option(test_opt, search_opt.regular.data); 158 } 159 } 160 161 static int synack_opt_len(struct bpf_sock_ops *skops) 162 { 163 struct bpf_test_option test_opt = {}; 164 __u8 optlen; 165 int err; 166 167 if (!passive_synack_out.flags) 168 return CG_OK; 169 170 err = load_option(skops, &test_opt, true); 171 172 /* bpf_test_option is not found */ 173 if (err == -ENOMSG) 174 return CG_OK; 175 176 if (err) 177 RET_CG_ERR(err); 178 179 optlen = option_total_len(passive_synack_out.flags); 180 if (optlen) { 181 err = bpf_reserve_hdr_opt(skops, optlen, 0); 182 if (err) 183 RET_CG_ERR(err); 184 } 185 186 return CG_OK; 187 } 188 189 static int write_synack_opt(struct bpf_sock_ops *skops) 190 { 191 struct bpf_test_option opt; 192 193 if (!passive_synack_out.flags) 194 /* We should not even be called since no header 195 * space has been reserved. 196 */ 197 RET_CG_ERR(0); 198 199 opt = passive_synack_out; 200 if (skops_want_cookie(skops)) 201 SET_OPTION_FLAGS(opt.flags, OPTION_RESEND); 202 203 return store_option(skops, &opt); 204 } 205 206 static int syn_opt_len(struct bpf_sock_ops *skops) 207 { 208 __u8 optlen; 209 int err; 210 211 if (!active_syn_out.flags) 212 return CG_OK; 213 214 optlen = option_total_len(active_syn_out.flags); 215 if (optlen) { 216 err = bpf_reserve_hdr_opt(skops, optlen, 0); 217 if (err) 218 RET_CG_ERR(err); 219 } 220 221 return CG_OK; 222 } 223 224 static int write_syn_opt(struct bpf_sock_ops *skops) 225 { 226 if (!active_syn_out.flags) 227 RET_CG_ERR(0); 228 229 return store_option(skops, &active_syn_out); 230 } 231 232 static int fin_opt_len(struct bpf_sock_ops *skops) 233 { 234 struct bpf_test_option *opt; 235 struct hdr_stg *hdr_stg; 236 __u8 optlen; 237 int err; 238 239 if (!skops->sk) 240 RET_CG_ERR(0); 241 242 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); 243 if (!hdr_stg) 244 RET_CG_ERR(0); 245 246 if (hdr_stg->active) 247 opt = &active_fin_out; 248 else 249 opt = &passive_fin_out; 250 251 optlen = option_total_len(opt->flags); 252 if (optlen) { 253 err = bpf_reserve_hdr_opt(skops, optlen, 0); 254 if (err) 255 RET_CG_ERR(err); 256 } 257 258 return CG_OK; 259 } 260 261 static int write_fin_opt(struct bpf_sock_ops *skops) 262 { 263 struct bpf_test_option *opt; 264 struct hdr_stg *hdr_stg; 265 266 if (!skops->sk) 267 RET_CG_ERR(0); 268 269 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); 270 if (!hdr_stg) 271 RET_CG_ERR(0); 272 273 if (hdr_stg->active) 274 opt = &active_fin_out; 275 else 276 opt = &passive_fin_out; 277 278 if (!opt->flags) 279 RET_CG_ERR(0); 280 281 return store_option(skops, opt); 282 } 283 284 static int resend_in_ack(struct bpf_sock_ops *skops) 285 { 286 struct hdr_stg *hdr_stg; 287 288 if (!skops->sk) 289 return -1; 290 291 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); 292 if (!hdr_stg) 293 return -1; 294 295 return !!hdr_stg->resend_syn; 296 } 297 298 static int nodata_opt_len(struct bpf_sock_ops *skops) 299 { 300 int resend; 301 302 resend = resend_in_ack(skops); 303 if (resend < 0) 304 RET_CG_ERR(0); 305 306 if (resend) 307 return syn_opt_len(skops); 308 309 return CG_OK; 310 } 311 312 static int write_nodata_opt(struct bpf_sock_ops *skops) 313 { 314 int resend; 315 316 resend = resend_in_ack(skops); 317 if (resend < 0) 318 RET_CG_ERR(0); 319 320 if (resend) 321 return write_syn_opt(skops); 322 323 return CG_OK; 324 } 325 326 static int data_opt_len(struct bpf_sock_ops *skops) 327 { 328 /* Same as the nodata version. Mostly to show 329 * an example usage on skops->skb_len. 330 */ 331 return nodata_opt_len(skops); 332 } 333 334 static int write_data_opt(struct bpf_sock_ops *skops) 335 { 336 return write_nodata_opt(skops); 337 } 338 339 static int current_mss_opt_len(struct bpf_sock_ops *skops) 340 { 341 /* Reserve maximum that may be needed */ 342 int err; 343 344 err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0); 345 if (err) 346 RET_CG_ERR(err); 347 348 return CG_OK; 349 } 350 351 static int handle_hdr_opt_len(struct bpf_sock_ops *skops) 352 { 353 __u8 tcp_flags = skops_tcp_flags(skops); 354 355 if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) 356 return synack_opt_len(skops); 357 358 if (tcp_flags & TCPHDR_SYN) 359 return syn_opt_len(skops); 360 361 if (tcp_flags & TCPHDR_FIN) 362 return fin_opt_len(skops); 363 364 if (skops_current_mss(skops)) 365 /* The kernel is calculating the MSS */ 366 return current_mss_opt_len(skops); 367 368 if (skops->skb_len) 369 return data_opt_len(skops); 370 371 return nodata_opt_len(skops); 372 } 373 374 static int handle_write_hdr_opt(struct bpf_sock_ops *skops) 375 { 376 __u8 tcp_flags = skops_tcp_flags(skops); 377 struct tcphdr *th; 378 379 if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) 380 return write_synack_opt(skops); 381 382 if (tcp_flags & TCPHDR_SYN) 383 return write_syn_opt(skops); 384 385 if (tcp_flags & TCPHDR_FIN) 386 return write_fin_opt(skops); 387 388 th = skops->skb_data; 389 if (th + 1 > skops->skb_data_end) 390 RET_CG_ERR(0); 391 392 if (skops->skb_len > tcp_hdrlen(th)) 393 return write_data_opt(skops); 394 395 return write_nodata_opt(skops); 396 } 397 398 static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms) 399 { 400 __u32 max_delack_us = max_delack_ms * 1000; 401 402 return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX, 403 &max_delack_us, sizeof(max_delack_us)); 404 } 405 406 static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms) 407 { 408 __u32 min_rto_us = peer_max_delack_ms * 1000; 409 410 return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us, 411 sizeof(min_rto_us)); 412 } 413 414 static int handle_active_estab(struct bpf_sock_ops *skops) 415 { 416 struct hdr_stg init_stg = { 417 .active = true, 418 }; 419 int err; 420 421 err = load_option(skops, &active_estab_in, false); 422 if (err && err != -ENOMSG) 423 RET_CG_ERR(err); 424 425 init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags, 426 OPTION_RESEND); 427 if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk, 428 &init_stg, 429 BPF_SK_STORAGE_GET_F_CREATE)) 430 RET_CG_ERR(0); 431 432 if (init_stg.resend_syn) 433 /* Don't clear the write_hdr cb now because 434 * the ACK may get lost and retransmit may 435 * be needed. 436 * 437 * PARSE_ALL_HDR cb flag is set to learn if this 438 * resend_syn option has received by the peer. 439 * 440 * The header option will be resent until a valid 441 * packet is received at handle_parse_hdr() 442 * and all hdr cb flags will be cleared in 443 * handle_parse_hdr(). 444 */ 445 set_parse_all_hdr_cb_flags(skops); 446 else if (!active_fin_out.flags) 447 /* No options will be written from now */ 448 clear_hdr_cb_flags(skops); 449 450 if (active_syn_out.max_delack_ms) { 451 err = set_delack_max(skops, active_syn_out.max_delack_ms); 452 if (err) 453 RET_CG_ERR(err); 454 } 455 456 if (active_estab_in.max_delack_ms) { 457 err = set_rto_min(skops, active_estab_in.max_delack_ms); 458 if (err) 459 RET_CG_ERR(err); 460 } 461 462 return CG_OK; 463 } 464 465 static int handle_passive_estab(struct bpf_sock_ops *skops) 466 { 467 struct hdr_stg init_stg = {}; 468 struct tcphdr *th; 469 int err; 470 471 inherit_cb_flags = skops->bpf_sock_ops_cb_flags; 472 473 err = load_option(skops, &passive_estab_in, true); 474 if (err == -ENOENT) { 475 /* saved_syn is not found. It was in syncookie mode. 476 * We have asked the active side to resend the options 477 * in ACK, so try to find the bpf_test_option from ACK now. 478 */ 479 err = load_option(skops, &passive_estab_in, false); 480 init_stg.syncookie = true; 481 } 482 483 /* ENOMSG: The bpf_test_option is not found which is fine. 484 * Bail out now for all other errors. 485 */ 486 if (err && err != -ENOMSG) 487 RET_CG_ERR(err); 488 489 th = skops->skb_data; 490 if (th + 1 > skops->skb_data_end) 491 RET_CG_ERR(0); 492 493 if (th->syn) { 494 /* Fastopen */ 495 496 /* Cannot clear cb_flags to stop write_hdr cb. 497 * synack is not sent yet for fast open. 498 * Even it was, the synack may need to be retransmitted. 499 * 500 * PARSE_ALL_HDR cb flag is set to learn 501 * if synack has reached the peer. 502 * All cb_flags will be cleared in handle_parse_hdr(). 503 */ 504 set_parse_all_hdr_cb_flags(skops); 505 init_stg.fastopen = true; 506 } else if (!passive_fin_out.flags) { 507 /* No options will be written from now */ 508 clear_hdr_cb_flags(skops); 509 } 510 511 if (!skops->sk || 512 !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg, 513 BPF_SK_STORAGE_GET_F_CREATE)) 514 RET_CG_ERR(0); 515 516 if (passive_synack_out.max_delack_ms) { 517 err = set_delack_max(skops, passive_synack_out.max_delack_ms); 518 if (err) 519 RET_CG_ERR(err); 520 } 521 522 if (passive_estab_in.max_delack_ms) { 523 err = set_rto_min(skops, passive_estab_in.max_delack_ms); 524 if (err) 525 RET_CG_ERR(err); 526 } 527 528 return CG_OK; 529 } 530 531 static int handle_parse_hdr(struct bpf_sock_ops *skops) 532 { 533 struct hdr_stg *hdr_stg; 534 struct tcphdr *th; 535 536 if (!skops->sk) 537 RET_CG_ERR(0); 538 539 th = skops->skb_data; 540 if (th + 1 > skops->skb_data_end) 541 RET_CG_ERR(0); 542 543 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); 544 if (!hdr_stg) 545 RET_CG_ERR(0); 546 547 if (hdr_stg->resend_syn || hdr_stg->fastopen) 548 /* The PARSE_ALL_HDR cb flag was turned on 549 * to ensure that the previously written 550 * options have reached the peer. 551 * Those previously written option includes: 552 * - Active side: resend_syn in ACK during syncookie 553 * or 554 * - Passive side: SYNACK during fastopen 555 * 556 * A valid packet has been received here after 557 * the 3WHS, so the PARSE_ALL_HDR cb flag 558 * can be cleared now. 559 */ 560 clear_parse_all_hdr_cb_flags(skops); 561 562 if (hdr_stg->resend_syn && !active_fin_out.flags) 563 /* Active side resent the syn option in ACK 564 * because the server was in syncookie mode. 565 * A valid packet has been received, so 566 * clear header cb flags if there is no 567 * more option to send. 568 */ 569 clear_hdr_cb_flags(skops); 570 571 if (hdr_stg->fastopen && !passive_fin_out.flags) 572 /* Passive side was in fastopen. 573 * A valid packet has been received, so 574 * the SYNACK has reached the peer. 575 * Clear header cb flags if there is no more 576 * option to send. 577 */ 578 clear_hdr_cb_flags(skops); 579 580 if (th->fin) { 581 struct bpf_test_option *fin_opt; 582 int err; 583 584 if (hdr_stg->active) 585 fin_opt = &active_fin_in; 586 else 587 fin_opt = &passive_fin_in; 588 589 err = load_option(skops, fin_opt, false); 590 if (err && err != -ENOMSG) 591 RET_CG_ERR(err); 592 } 593 594 return CG_OK; 595 } 596 597 SEC("sockops") 598 int estab(struct bpf_sock_ops *skops) 599 { 600 int true_val = 1; 601 602 switch (skops->op) { 603 case BPF_SOCK_OPS_TCP_LISTEN_CB: 604 bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, 605 &true_val, sizeof(true_val)); 606 set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG); 607 break; 608 case BPF_SOCK_OPS_TCP_CONNECT_CB: 609 set_hdr_cb_flags(skops, 0); 610 break; 611 case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: 612 return handle_parse_hdr(skops); 613 case BPF_SOCK_OPS_HDR_OPT_LEN_CB: 614 return handle_hdr_opt_len(skops); 615 case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: 616 return handle_write_hdr_opt(skops); 617 case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: 618 return handle_passive_estab(skops); 619 case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: 620 return handle_active_estab(skops); 621 } 622 623 return CG_OK; 624 } 625 626 char _license[] SEC("license") = "GPL"; 627
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.