1 // SPDX-License-Identifier: GPL-2.0 2 #include <limits.h> 3 #include <stddef.h> 4 #include <stdbool.h> 5 #include <string.h> 6 #include <linux/pkt_cls.h> 7 #include <linux/bpf.h> 8 #include <linux/in.h> 9 #include <linux/if_ether.h> 10 #include <linux/icmp.h> 11 #include <linux/ip.h> 12 #include <linux/ipv6.h> 13 #include <linux/tcp.h> 14 #include <linux/udp.h> 15 #include <linux/if_packet.h> 16 #include <sys/socket.h> 17 #include <linux/if_tunnel.h> 18 #include <linux/mpls.h> 19 #include <bpf/bpf_helpers.h> 20 #include <bpf/bpf_endian.h> 21 22 #define PROG(F) PROG_(F, _##F) 23 #define PROG_(NUM, NAME) SEC("flow_dissector") int flow_dissector_##NUM 24 25 #define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */ 26 27 /* These are the identifiers of the BPF programs that will be used in tail 28 * calls. Name is limited to 16 characters, with the terminating character and 29 * bpf_func_ above, we have only 6 to work with, anything after will be cropped. 30 */ 31 #define IP 0 32 #define IPV6 1 33 #define IPV6OP 2 /* Destination/Hop-by-Hop Options IPv6 Ext. Header */ 34 #define IPV6FR 3 /* Fragmentation IPv6 Extension Header */ 35 #define MPLS 4 36 #define VLAN 5 37 #define MAX_PROG 6 38 39 #define IP_MF 0x2000 40 #define IP_OFFSET 0x1FFF 41 #define IP6_MF 0x0001 42 #define IP6_OFFSET 0xFFF8 43 44 struct vlan_hdr { 45 __be16 h_vlan_TCI; 46 __be16 h_vlan_encapsulated_proto; 47 }; 48 49 struct gre_hdr { 50 __be16 flags; 51 __be16 proto; 52 }; 53 54 struct frag_hdr { 55 __u8 nexthdr; 56 __u8 reserved; 57 __be16 frag_off; 58 __be32 identification; 59 }; 60 61 struct { 62 __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 63 __uint(max_entries, MAX_PROG); 64 __uint(key_size, sizeof(__u32)); 65 __uint(value_size, sizeof(__u32)); 66 } jmp_table SEC(".maps"); 67 68 struct { 69 __uint(type, BPF_MAP_TYPE_HASH); 70 __uint(max_entries, 1024); 71 __type(key, __u32); 72 __type(value, struct bpf_flow_keys); 73 } last_dissection SEC(".maps"); 74 75 static __always_inline int export_flow_keys(struct bpf_flow_keys *keys, 76 int ret) 77 { 78 __u32 key = (__u32)(keys->sport) << 16 | keys->dport; 79 struct bpf_flow_keys val; 80 81 memcpy(&val, keys, sizeof(val)); 82 bpf_map_update_elem(&last_dissection, &key, &val, BPF_ANY); 83 return ret; 84 } 85 86 #define IPV6_FLOWLABEL_MASK __bpf_constant_htonl(0x000FFFFF) 87 static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr) 88 { 89 return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK; 90 } 91 92 static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, 93 __u16 hdr_size, 94 void *buffer) 95 { 96 void *data_end = (void *)(long)skb->data_end; 97 void *data = (void *)(long)skb->data; 98 __u16 thoff = skb->flow_keys->thoff; 99 __u8 *hdr; 100 101 /* Verifies this variable offset does not overflow */ 102 if (thoff > (USHRT_MAX - hdr_size)) 103 return NULL; 104 105 hdr = data + thoff; 106 if (hdr + hdr_size <= data_end) 107 return hdr; 108 109 if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size)) 110 return NULL; 111 112 return buffer; 113 } 114 115 /* Dispatches on ETHERTYPE */ 116 static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) 117 { 118 struct bpf_flow_keys *keys = skb->flow_keys; 119 120 switch (proto) { 121 case bpf_htons(ETH_P_IP): 122 bpf_tail_call_static(skb, &jmp_table, IP); 123 break; 124 case bpf_htons(ETH_P_IPV6): 125 bpf_tail_call_static(skb, &jmp_table, IPV6); 126 break; 127 case bpf_htons(ETH_P_MPLS_MC): 128 case bpf_htons(ETH_P_MPLS_UC): 129 bpf_tail_call_static(skb, &jmp_table, MPLS); 130 break; 131 case bpf_htons(ETH_P_8021Q): 132 case bpf_htons(ETH_P_8021AD): 133 bpf_tail_call_static(skb, &jmp_table, VLAN); 134 break; 135 default: 136 /* Protocol not supported */ 137 return export_flow_keys(keys, BPF_DROP); 138 } 139 140 return export_flow_keys(keys, BPF_DROP); 141 } 142 143 SEC("flow_dissector") 144 int _dissect(struct __sk_buff *skb) 145 { 146 struct bpf_flow_keys *keys = skb->flow_keys; 147 148 if (keys->n_proto == bpf_htons(ETH_P_IP)) { 149 /* IP traffic from FLOW_CONTINUE_SADDR falls-back to 150 * standard dissector 151 */ 152 struct iphdr *iph, _iph; 153 154 iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); 155 if (iph && iph->ihl == 5 && 156 iph->saddr == bpf_htonl(FLOW_CONTINUE_SADDR)) { 157 return BPF_FLOW_DISSECTOR_CONTINUE; 158 } 159 } 160 161 return parse_eth_proto(skb, keys->n_proto); 162 } 163 164 /* Parses on IPPROTO_* */ 165 static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) 166 { 167 struct bpf_flow_keys *keys = skb->flow_keys; 168 void *data_end = (void *)(long)skb->data_end; 169 struct icmphdr *icmp, _icmp; 170 struct gre_hdr *gre, _gre; 171 struct ethhdr *eth, _eth; 172 struct tcphdr *tcp, _tcp; 173 struct udphdr *udp, _udp; 174 175 switch (proto) { 176 case IPPROTO_ICMP: 177 icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); 178 if (!icmp) 179 return export_flow_keys(keys, BPF_DROP); 180 return export_flow_keys(keys, BPF_OK); 181 case IPPROTO_IPIP: 182 keys->is_encap = true; 183 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) 184 return export_flow_keys(keys, BPF_OK); 185 186 return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); 187 case IPPROTO_IPV6: 188 keys->is_encap = true; 189 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) 190 return export_flow_keys(keys, BPF_OK); 191 192 return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6)); 193 case IPPROTO_GRE: 194 gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); 195 if (!gre) 196 return export_flow_keys(keys, BPF_DROP); 197 198 if (bpf_htons(gre->flags & GRE_VERSION)) 199 /* Only inspect standard GRE packets with version 0 */ 200 return export_flow_keys(keys, BPF_OK); 201 202 keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */ 203 if (GRE_IS_CSUM(gre->flags)) 204 keys->thoff += 4; /* Step over chksum and Padding */ 205 if (GRE_IS_KEY(gre->flags)) 206 keys->thoff += 4; /* Step over key */ 207 if (GRE_IS_SEQ(gre->flags)) 208 keys->thoff += 4; /* Step over sequence number */ 209 210 keys->is_encap = true; 211 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) 212 return export_flow_keys(keys, BPF_OK); 213 214 if (gre->proto == bpf_htons(ETH_P_TEB)) { 215 eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), 216 &_eth); 217 if (!eth) 218 return export_flow_keys(keys, BPF_DROP); 219 220 keys->thoff += sizeof(*eth); 221 222 return parse_eth_proto(skb, eth->h_proto); 223 } else { 224 return parse_eth_proto(skb, gre->proto); 225 } 226 case IPPROTO_TCP: 227 tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); 228 if (!tcp) 229 return export_flow_keys(keys, BPF_DROP); 230 231 if (tcp->doff < 5) 232 return export_flow_keys(keys, BPF_DROP); 233 234 if ((__u8 *)tcp + (tcp->doff << 2) > data_end) 235 return export_flow_keys(keys, BPF_DROP); 236 237 keys->sport = tcp->source; 238 keys->dport = tcp->dest; 239 return export_flow_keys(keys, BPF_OK); 240 case IPPROTO_UDP: 241 case IPPROTO_UDPLITE: 242 udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); 243 if (!udp) 244 return export_flow_keys(keys, BPF_DROP); 245 246 keys->sport = udp->source; 247 keys->dport = udp->dest; 248 return export_flow_keys(keys, BPF_OK); 249 default: 250 return export_flow_keys(keys, BPF_DROP); 251 } 252 253 return export_flow_keys(keys, BPF_DROP); 254 } 255 256 static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) 257 { 258 struct bpf_flow_keys *keys = skb->flow_keys; 259 260 switch (nexthdr) { 261 case IPPROTO_HOPOPTS: 262 case IPPROTO_DSTOPTS: 263 bpf_tail_call_static(skb, &jmp_table, IPV6OP); 264 break; 265 case IPPROTO_FRAGMENT: 266 bpf_tail_call_static(skb, &jmp_table, IPV6FR); 267 break; 268 default: 269 return parse_ip_proto(skb, nexthdr); 270 } 271 272 return export_flow_keys(keys, BPF_DROP); 273 } 274 275 PROG(IP)(struct __sk_buff *skb) 276 { 277 void *data_end = (void *)(long)skb->data_end; 278 struct bpf_flow_keys *keys = skb->flow_keys; 279 void *data = (void *)(long)skb->data; 280 struct iphdr *iph, _iph; 281 bool done = false; 282 283 iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); 284 if (!iph) 285 return export_flow_keys(keys, BPF_DROP); 286 287 /* IP header cannot be smaller than 20 bytes */ 288 if (iph->ihl < 5) 289 return export_flow_keys(keys, BPF_DROP); 290 291 keys->addr_proto = ETH_P_IP; 292 keys->ipv4_src = iph->saddr; 293 keys->ipv4_dst = iph->daddr; 294 keys->ip_proto = iph->protocol; 295 296 keys->thoff += iph->ihl << 2; 297 if (data + keys->thoff > data_end) 298 return export_flow_keys(keys, BPF_DROP); 299 300 if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { 301 keys->is_frag = true; 302 if (iph->frag_off & bpf_htons(IP_OFFSET)) { 303 /* From second fragment on, packets do not have headers 304 * we can parse. 305 */ 306 done = true; 307 } else { 308 keys->is_first_frag = true; 309 /* No need to parse fragmented packet unless 310 * explicitly asked for. 311 */ 312 if (!(keys->flags & 313 BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) 314 done = true; 315 } 316 } 317 318 if (done) 319 return export_flow_keys(keys, BPF_OK); 320 321 return parse_ip_proto(skb, iph->protocol); 322 } 323 324 PROG(IPV6)(struct __sk_buff *skb) 325 { 326 struct bpf_flow_keys *keys = skb->flow_keys; 327 struct ipv6hdr *ip6h, _ip6h; 328 329 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); 330 if (!ip6h) 331 return export_flow_keys(keys, BPF_DROP); 332 333 keys->addr_proto = ETH_P_IPV6; 334 memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); 335 336 keys->thoff += sizeof(struct ipv6hdr); 337 keys->ip_proto = ip6h->nexthdr; 338 keys->flow_label = ip6_flowlabel(ip6h); 339 340 if (keys->flow_label && keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) 341 return export_flow_keys(keys, BPF_OK); 342 343 return parse_ipv6_proto(skb, ip6h->nexthdr); 344 } 345 346 PROG(IPV6OP)(struct __sk_buff *skb) 347 { 348 struct bpf_flow_keys *keys = skb->flow_keys; 349 struct ipv6_opt_hdr *ip6h, _ip6h; 350 351 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); 352 if (!ip6h) 353 return export_flow_keys(keys, BPF_DROP); 354 355 /* hlen is in 8-octets and does not include the first 8 bytes 356 * of the header 357 */ 358 keys->thoff += (1 + ip6h->hdrlen) << 3; 359 keys->ip_proto = ip6h->nexthdr; 360 361 return parse_ipv6_proto(skb, ip6h->nexthdr); 362 } 363 364 PROG(IPV6FR)(struct __sk_buff *skb) 365 { 366 struct bpf_flow_keys *keys = skb->flow_keys; 367 struct frag_hdr *fragh, _fragh; 368 369 fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); 370 if (!fragh) 371 return export_flow_keys(keys, BPF_DROP); 372 373 keys->thoff += sizeof(*fragh); 374 keys->is_frag = true; 375 keys->ip_proto = fragh->nexthdr; 376 377 if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) { 378 keys->is_first_frag = true; 379 380 /* No need to parse fragmented packet unless 381 * explicitly asked for. 382 */ 383 if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) 384 return export_flow_keys(keys, BPF_OK); 385 } else { 386 return export_flow_keys(keys, BPF_OK); 387 } 388 389 return parse_ipv6_proto(skb, fragh->nexthdr); 390 } 391 392 PROG(MPLS)(struct __sk_buff *skb) 393 { 394 struct bpf_flow_keys *keys = skb->flow_keys; 395 struct mpls_label *mpls, _mpls; 396 397 mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); 398 if (!mpls) 399 return export_flow_keys(keys, BPF_DROP); 400 401 return export_flow_keys(keys, BPF_OK); 402 } 403 404 PROG(VLAN)(struct __sk_buff *skb) 405 { 406 struct bpf_flow_keys *keys = skb->flow_keys; 407 struct vlan_hdr *vlan, _vlan; 408 409 /* Account for double-tagging */ 410 if (keys->n_proto == bpf_htons(ETH_P_8021AD)) { 411 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); 412 if (!vlan) 413 return export_flow_keys(keys, BPF_DROP); 414 415 if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) 416 return export_flow_keys(keys, BPF_DROP); 417 418 keys->nhoff += sizeof(*vlan); 419 keys->thoff += sizeof(*vlan); 420 } 421 422 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); 423 if (!vlan) 424 return export_flow_keys(keys, BPF_DROP); 425 426 keys->nhoff += sizeof(*vlan); 427 keys->thoff += sizeof(*vlan); 428 /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ 429 if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || 430 vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) 431 return export_flow_keys(keys, BPF_DROP); 432 433 keys->n_proto = vlan->h_vlan_encapsulated_proto; 434 return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); 435 } 436 437 char __license[] SEC("license") = "GPL"; 438
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.