1 /* Copyright (c) 2015 PLUMgrid, http://plumgri 1 /* Copyright (c) 2015 PLUMgrid, http://plumgrid.com 2 * 2 * 3 * This program is free software; you can redi 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of t 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software F 5 * License as published by the Free Software Foundation. 6 */ 6 */ 7 #include <uapi/linux/bpf.h> 7 #include <uapi/linux/bpf.h> >> 8 #include "bpf_helpers.h" 8 #include <uapi/linux/in.h> 9 #include <uapi/linux/in.h> 9 #include <uapi/linux/if.h> 10 #include <uapi/linux/if.h> 10 #include <uapi/linux/if_ether.h> 11 #include <uapi/linux/if_ether.h> 11 #include <uapi/linux/ip.h> 12 #include <uapi/linux/ip.h> 12 #include <uapi/linux/ipv6.h> 13 #include <uapi/linux/ipv6.h> 13 #include <uapi/linux/if_tunnel.h> 14 #include <uapi/linux/if_tunnel.h> 14 #include <uapi/linux/mpls.h> 15 #include <uapi/linux/mpls.h> 15 #include <bpf/bpf_helpers.h> << 16 #include "bpf_legacy.h" << 17 #define IP_MF 0x2000 16 #define IP_MF 0x2000 18 #define IP_OFFSET 0x1FFF 17 #define IP_OFFSET 0x1FFF 19 18 >> 19 #define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F >> 20 >> 21 struct bpf_map_def SEC("maps") jmp_table = { >> 22 .type = BPF_MAP_TYPE_PROG_ARRAY, >> 23 .key_size = sizeof(u32), >> 24 .value_size = sizeof(u32), >> 25 .max_entries = 8, >> 26 }; >> 27 20 #define PARSE_VLAN 1 28 #define PARSE_VLAN 1 21 #define PARSE_MPLS 2 29 #define PARSE_MPLS 2 22 #define PARSE_IP 3 30 #define PARSE_IP 3 23 #define PARSE_IPV6 4 31 #define PARSE_IPV6 4 24 32 >> 33 /* protocol dispatch routine. >> 34 * It tail-calls next BPF program depending on eth proto >> 35 * Note, we could have used: >> 36 * bpf_tail_call(skb, &jmp_table, proto); >> 37 * but it would need large prog_array >> 38 */ >> 39 static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto) >> 40 { >> 41 switch (proto) { >> 42 case ETH_P_8021Q: >> 43 case ETH_P_8021AD: >> 44 bpf_tail_call(skb, &jmp_table, PARSE_VLAN); >> 45 break; >> 46 case ETH_P_MPLS_UC: >> 47 case ETH_P_MPLS_MC: >> 48 bpf_tail_call(skb, &jmp_table, PARSE_MPLS); >> 49 break; >> 50 case ETH_P_IP: >> 51 bpf_tail_call(skb, &jmp_table, PARSE_IP); >> 52 break; >> 53 case ETH_P_IPV6: >> 54 bpf_tail_call(skb, &jmp_table, PARSE_IPV6); >> 55 break; >> 56 } >> 57 } >> 58 25 struct vlan_hdr { 59 struct vlan_hdr { 26 __be16 h_vlan_TCI; 60 __be16 h_vlan_TCI; 27 __be16 h_vlan_encapsulated_proto; 61 __be16 h_vlan_encapsulated_proto; 28 }; 62 }; 29 63 30 struct flow_key_record { 64 struct flow_key_record { 31 __be32 src; 65 __be32 src; 32 __be32 dst; 66 __be32 dst; 33 union { 67 union { 34 __be32 ports; 68 __be32 ports; 35 __be16 port16[2]; 69 __be16 port16[2]; 36 }; 70 }; 37 __u32 ip_proto; 71 __u32 ip_proto; 38 }; 72 }; 39 73 40 static inline void parse_eth_proto(struct __sk << 41 << 42 static inline int ip_is_fragment(struct __sk_b 74 static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff) 43 { 75 { 44 return load_half(ctx, nhoff + offsetof 76 return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) 45 & (IP_MF | IP_OFFSET); 77 & (IP_MF | IP_OFFSET); 46 } 78 } 47 79 48 static inline __u32 ipv6_addr_hash(struct __sk 80 static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) 49 { 81 { 50 __u64 w0 = load_word(ctx, off); 82 __u64 w0 = load_word(ctx, off); 51 __u64 w1 = load_word(ctx, off + 4); 83 __u64 w1 = load_word(ctx, off + 4); 52 __u64 w2 = load_word(ctx, off + 8); 84 __u64 w2 = load_word(ctx, off + 8); 53 __u64 w3 = load_word(ctx, off + 12); 85 __u64 w3 = load_word(ctx, off + 12); 54 86 55 return (__u32)(w0 ^ w1 ^ w2 ^ w3); 87 return (__u32)(w0 ^ w1 ^ w2 ^ w3); 56 } 88 } 57 89 58 struct globals { 90 struct globals { 59 struct flow_key_record flow; 91 struct flow_key_record flow; 60 }; 92 }; 61 93 62 struct { !! 94 struct bpf_map_def SEC("maps") percpu_map = { 63 __uint(type, BPF_MAP_TYPE_ARRAY); !! 95 .type = BPF_MAP_TYPE_ARRAY, 64 __type(key, __u32); !! 96 .key_size = sizeof(__u32), 65 __type(value, struct globals); !! 97 .value_size = sizeof(struct globals), 66 __uint(max_entries, 32); !! 98 .max_entries = 32, 67 } percpu_map SEC(".maps"); !! 99 }; 68 100 69 /* user poor man's per_cpu until native suppor 101 /* user poor man's per_cpu until native support is ready */ 70 static struct globals *this_cpu_globals(void) 102 static struct globals *this_cpu_globals(void) 71 { 103 { 72 u32 key = bpf_get_smp_processor_id(); 104 u32 key = bpf_get_smp_processor_id(); 73 105 74 return bpf_map_lookup_elem(&percpu_map 106 return bpf_map_lookup_elem(&percpu_map, &key); 75 } 107 } 76 108 77 /* some simple stats for user space consumptio 109 /* some simple stats for user space consumption */ 78 struct pair { 110 struct pair { 79 __u64 packets; 111 __u64 packets; 80 __u64 bytes; 112 __u64 bytes; 81 }; 113 }; 82 114 83 struct { !! 115 struct bpf_map_def SEC("maps") hash_map = { 84 __uint(type, BPF_MAP_TYPE_HASH); !! 116 .type = BPF_MAP_TYPE_HASH, 85 __type(key, struct flow_key_record); !! 117 .key_size = sizeof(struct flow_key_record), 86 __type(value, struct pair); !! 118 .value_size = sizeof(struct pair), 87 __uint(max_entries, 1024); !! 119 .max_entries = 1024, 88 } hash_map SEC(".maps"); !! 120 }; 89 121 90 static void update_stats(struct __sk_buff *skb 122 static void update_stats(struct __sk_buff *skb, struct globals *g) 91 { 123 { 92 struct flow_key_record key = g->flow; 124 struct flow_key_record key = g->flow; 93 struct pair *value; 125 struct pair *value; 94 126 95 value = bpf_map_lookup_elem(&hash_map, 127 value = bpf_map_lookup_elem(&hash_map, &key); 96 if (value) { 128 if (value) { 97 __sync_fetch_and_add(&value->p 129 __sync_fetch_and_add(&value->packets, 1); 98 __sync_fetch_and_add(&value->b 130 __sync_fetch_and_add(&value->bytes, skb->len); 99 } else { 131 } else { 100 struct pair val = {1, skb->len 132 struct pair val = {1, skb->len}; 101 133 102 bpf_map_update_elem(&hash_map, 134 bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); 103 } 135 } 104 } 136 } 105 137 106 static __always_inline void parse_ip_proto(str 138 static __always_inline void parse_ip_proto(struct __sk_buff *skb, 107 str 139 struct globals *g, __u32 ip_proto) 108 { 140 { 109 __u32 nhoff = skb->cb[0]; 141 __u32 nhoff = skb->cb[0]; 110 int poff; 142 int poff; 111 143 112 switch (ip_proto) { 144 switch (ip_proto) { 113 case IPPROTO_GRE: { 145 case IPPROTO_GRE: { 114 struct gre_hdr { 146 struct gre_hdr { 115 __be16 flags; 147 __be16 flags; 116 __be16 proto; 148 __be16 proto; 117 }; 149 }; 118 150 119 __u32 gre_flags = load_half(sk 151 __u32 gre_flags = load_half(skb, 120 nh 152 nhoff + offsetof(struct gre_hdr, flags)); 121 __u32 gre_proto = load_half(sk 153 __u32 gre_proto = load_half(skb, 122 nh 154 nhoff + offsetof(struct gre_hdr, proto)); 123 155 124 if (gre_flags & (GRE_VERSION|G 156 if (gre_flags & (GRE_VERSION|GRE_ROUTING)) 125 break; 157 break; 126 158 127 nhoff += 4; 159 nhoff += 4; 128 if (gre_flags & GRE_CSUM) 160 if (gre_flags & GRE_CSUM) 129 nhoff += 4; 161 nhoff += 4; 130 if (gre_flags & GRE_KEY) 162 if (gre_flags & GRE_KEY) 131 nhoff += 4; 163 nhoff += 4; 132 if (gre_flags & GRE_SEQ) 164 if (gre_flags & GRE_SEQ) 133 nhoff += 4; 165 nhoff += 4; 134 166 135 skb->cb[0] = nhoff; 167 skb->cb[0] = nhoff; 136 parse_eth_proto(skb, gre_proto 168 parse_eth_proto(skb, gre_proto); 137 break; 169 break; 138 } 170 } 139 case IPPROTO_IPIP: 171 case IPPROTO_IPIP: 140 parse_eth_proto(skb, ETH_P_IP) 172 parse_eth_proto(skb, ETH_P_IP); 141 break; 173 break; 142 case IPPROTO_IPV6: 174 case IPPROTO_IPV6: 143 parse_eth_proto(skb, ETH_P_IPV 175 parse_eth_proto(skb, ETH_P_IPV6); 144 break; 176 break; 145 case IPPROTO_TCP: 177 case IPPROTO_TCP: 146 case IPPROTO_UDP: 178 case IPPROTO_UDP: 147 g->flow.ports = load_word(skb, 179 g->flow.ports = load_word(skb, nhoff); 148 case IPPROTO_ICMP: 180 case IPPROTO_ICMP: 149 g->flow.ip_proto = ip_proto; 181 g->flow.ip_proto = ip_proto; 150 update_stats(skb, g); 182 update_stats(skb, g); 151 break; 183 break; 152 default: 184 default: 153 break; 185 break; 154 } 186 } 155 } 187 } 156 188 157 SEC("socket") !! 189 PROG(PARSE_IP)(struct __sk_buff *skb) 158 int bpf_func_ip(struct __sk_buff *skb) << 159 { 190 { 160 struct globals *g = this_cpu_globals() 191 struct globals *g = this_cpu_globals(); 161 __u32 nhoff, verlen, ip_proto; 192 __u32 nhoff, verlen, ip_proto; 162 193 163 if (!g) 194 if (!g) 164 return 0; 195 return 0; 165 196 166 nhoff = skb->cb[0]; 197 nhoff = skb->cb[0]; 167 198 168 if (unlikely(ip_is_fragment(skb, nhoff 199 if (unlikely(ip_is_fragment(skb, nhoff))) 169 return 0; 200 return 0; 170 201 171 ip_proto = load_byte(skb, nhoff + offs 202 ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); 172 203 173 if (ip_proto != IPPROTO_GRE) { 204 if (ip_proto != IPPROTO_GRE) { 174 g->flow.src = load_word(skb, n 205 g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); 175 g->flow.dst = load_word(skb, n 206 g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); 176 } 207 } 177 208 178 verlen = load_byte(skb, nhoff + 0/*off 209 verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); 179 nhoff += (verlen & 0xF) << 2; 210 nhoff += (verlen & 0xF) << 2; 180 211 181 skb->cb[0] = nhoff; 212 skb->cb[0] = nhoff; 182 parse_ip_proto(skb, g, ip_proto); 213 parse_ip_proto(skb, g, ip_proto); 183 return 0; 214 return 0; 184 } 215 } 185 216 186 SEC("socket") !! 217 PROG(PARSE_IPV6)(struct __sk_buff *skb) 187 int bpf_func_ipv6(struct __sk_buff *skb) << 188 { 218 { 189 struct globals *g = this_cpu_globals() 219 struct globals *g = this_cpu_globals(); 190 __u32 nhoff, ip_proto; 220 __u32 nhoff, ip_proto; 191 221 192 if (!g) 222 if (!g) 193 return 0; 223 return 0; 194 224 195 nhoff = skb->cb[0]; 225 nhoff = skb->cb[0]; 196 226 197 ip_proto = load_byte(skb, 227 ip_proto = load_byte(skb, 198 nhoff + offsetof( 228 nhoff + offsetof(struct ipv6hdr, nexthdr)); 199 g->flow.src = ipv6_addr_hash(skb, 229 g->flow.src = ipv6_addr_hash(skb, 200 nhoff + o 230 nhoff + offsetof(struct ipv6hdr, saddr)); 201 g->flow.dst = ipv6_addr_hash(skb, 231 g->flow.dst = ipv6_addr_hash(skb, 202 nhoff + o 232 nhoff + offsetof(struct ipv6hdr, daddr)); 203 nhoff += sizeof(struct ipv6hdr); 233 nhoff += sizeof(struct ipv6hdr); 204 234 205 skb->cb[0] = nhoff; 235 skb->cb[0] = nhoff; 206 parse_ip_proto(skb, g, ip_proto); 236 parse_ip_proto(skb, g, ip_proto); 207 return 0; 237 return 0; 208 } 238 } 209 239 210 SEC("socket") !! 240 PROG(PARSE_VLAN)(struct __sk_buff *skb) 211 int bpf_func_vlan(struct __sk_buff *skb) << 212 { 241 { 213 __u32 nhoff, proto; 242 __u32 nhoff, proto; 214 243 215 nhoff = skb->cb[0]; 244 nhoff = skb->cb[0]; 216 245 217 proto = load_half(skb, nhoff + offseto 246 proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, 218 247 h_vlan_encapsulated_proto)); 219 nhoff += sizeof(struct vlan_hdr); 248 nhoff += sizeof(struct vlan_hdr); 220 skb->cb[0] = nhoff; 249 skb->cb[0] = nhoff; 221 250 222 parse_eth_proto(skb, proto); 251 parse_eth_proto(skb, proto); 223 252 224 return 0; 253 return 0; 225 } 254 } 226 255 227 SEC("socket") !! 256 PROG(PARSE_MPLS)(struct __sk_buff *skb) 228 int bpf_func_mpls(struct __sk_buff *skb) << 229 { 257 { 230 __u32 nhoff, label; 258 __u32 nhoff, label; 231 259 232 nhoff = skb->cb[0]; 260 nhoff = skb->cb[0]; 233 261 234 label = load_word(skb, nhoff); 262 label = load_word(skb, nhoff); 235 nhoff += sizeof(struct mpls_label); 263 nhoff += sizeof(struct mpls_label); 236 skb->cb[0] = nhoff; 264 skb->cb[0] = nhoff; 237 265 238 if (label & MPLS_LS_S_MASK) { 266 if (label & MPLS_LS_S_MASK) { 239 __u8 verlen = load_byte(skb, n 267 __u8 verlen = load_byte(skb, nhoff); 240 if ((verlen & 0xF0) == 4) 268 if ((verlen & 0xF0) == 4) 241 parse_eth_proto(skb, E 269 parse_eth_proto(skb, ETH_P_IP); 242 else 270 else 243 parse_eth_proto(skb, E 271 parse_eth_proto(skb, ETH_P_IPV6); 244 } else { 272 } else { 245 parse_eth_proto(skb, ETH_P_MPL 273 parse_eth_proto(skb, ETH_P_MPLS_UC); 246 } 274 } 247 275 248 return 0; 276 return 0; 249 } 277 } 250 278 251 struct { !! 279 SEC("socket/0") 252 __uint(type, BPF_MAP_TYPE_PROG_ARRAY); << 253 __uint(key_size, sizeof(u32)); << 254 __uint(max_entries, 8); << 255 __array(values, u32 (void *)); << 256 } prog_array_init SEC(".maps") = { << 257 .values = { << 258 [PARSE_VLAN] = (void *)&bpf_fu << 259 [PARSE_IP] = (void *)&bpf_fu << 260 [PARSE_IPV6] = (void *)&bpf_fu << 261 [PARSE_MPLS] = (void *)&bpf_fu << 262 }, << 263 }; << 264 << 265 /* Protocol dispatch routine. It tail-calls ne << 266 * on eth proto. Note, we could have used ... << 267 * << 268 * bpf_tail_call(skb, &prog_array_init, prot << 269 * << 270 * ... but it would need large prog_array and << 271 * the map key is not static. << 272 */ << 273 static inline void parse_eth_proto(struct __sk << 274 { << 275 switch (proto) { << 276 case ETH_P_8021Q: << 277 case ETH_P_8021AD: << 278 bpf_tail_call(skb, &prog_array << 279 break; << 280 case ETH_P_MPLS_UC: << 281 case ETH_P_MPLS_MC: << 282 bpf_tail_call(skb, &prog_array << 283 break; << 284 case ETH_P_IP: << 285 bpf_tail_call(skb, &prog_array << 286 break; << 287 case ETH_P_IPV6: << 288 bpf_tail_call(skb, &prog_array << 289 break; << 290 } << 291 } << 292 << 293 SEC("socket") << 294 int main_prog(struct __sk_buff *skb) 280 int main_prog(struct __sk_buff *skb) 295 { 281 { 296 __u32 nhoff = ETH_HLEN; 282 __u32 nhoff = ETH_HLEN; 297 __u32 proto = load_half(skb, 12); 283 __u32 proto = load_half(skb, 12); 298 284 299 skb->cb[0] = nhoff; 285 skb->cb[0] = nhoff; 300 parse_eth_proto(skb, proto); 286 parse_eth_proto(skb, proto); 301 return 0; 287 return 0; 302 } 288 } 303 289 304 char _license[] SEC("license") = "GPL"; 290 char _license[] SEC("license") = "GPL"; 305 291
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.