1 /* Copyright (c) 2015 PLUMgrid, http://plumgri 1 /* Copyright (c) 2015 PLUMgrid, http://plumgrid.com 2 * 2 * 3 * This program is free software; you can redi 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of t 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software F 5 * License as published by the Free Software Foundation. 6 */ 6 */ 7 #include <uapi/linux/bpf.h> 7 #include <uapi/linux/bpf.h> 8 #include <uapi/linux/in.h> 8 #include <uapi/linux/in.h> 9 #include <uapi/linux/if.h> 9 #include <uapi/linux/if.h> 10 #include <uapi/linux/if_ether.h> 10 #include <uapi/linux/if_ether.h> 11 #include <uapi/linux/ip.h> 11 #include <uapi/linux/ip.h> 12 #include <uapi/linux/ipv6.h> 12 #include <uapi/linux/ipv6.h> 13 #include <uapi/linux/if_tunnel.h> 13 #include <uapi/linux/if_tunnel.h> 14 #include <uapi/linux/mpls.h> 14 #include <uapi/linux/mpls.h> 15 #include <bpf/bpf_helpers.h> 15 #include <bpf/bpf_helpers.h> 16 #include "bpf_legacy.h" 16 #include "bpf_legacy.h" 17 #define IP_MF 0x2000 17 #define IP_MF 0x2000 18 #define IP_OFFSET 0x1FFF 18 #define IP_OFFSET 0x1FFF 19 19 20 #define PARSE_VLAN 1 20 #define PARSE_VLAN 1 21 #define PARSE_MPLS 2 21 #define PARSE_MPLS 2 22 #define PARSE_IP 3 22 #define PARSE_IP 3 23 #define PARSE_IPV6 4 23 #define PARSE_IPV6 4 24 24 25 struct vlan_hdr { 25 struct vlan_hdr { 26 __be16 h_vlan_TCI; 26 __be16 h_vlan_TCI; 27 __be16 h_vlan_encapsulated_proto; 27 __be16 h_vlan_encapsulated_proto; 28 }; 28 }; 29 29 30 struct flow_key_record { 30 struct flow_key_record { 31 __be32 src; 31 __be32 src; 32 __be32 dst; 32 __be32 dst; 33 union { 33 union { 34 __be32 ports; 34 __be32 ports; 35 __be16 port16[2]; 35 __be16 port16[2]; 36 }; 36 }; 37 __u32 ip_proto; 37 __u32 ip_proto; 38 }; 38 }; 39 39 40 static inline void parse_eth_proto(struct __sk 40 static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto); 41 41 42 static inline int ip_is_fragment(struct __sk_b 42 static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff) 43 { 43 { 44 return load_half(ctx, nhoff + offsetof 44 return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) 45 & (IP_MF | IP_OFFSET); 45 & (IP_MF | IP_OFFSET); 46 } 46 } 47 47 48 static inline __u32 ipv6_addr_hash(struct __sk 48 static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) 49 { 49 { 50 __u64 w0 = load_word(ctx, off); 50 __u64 w0 = load_word(ctx, off); 51 __u64 w1 = load_word(ctx, off + 4); 51 __u64 w1 = load_word(ctx, off + 4); 52 __u64 w2 = load_word(ctx, off + 8); 52 __u64 w2 = load_word(ctx, off + 8); 53 __u64 w3 = load_word(ctx, off + 12); 53 __u64 w3 = load_word(ctx, off + 12); 54 54 55 return (__u32)(w0 ^ w1 ^ w2 ^ w3); 55 return (__u32)(w0 ^ w1 ^ w2 ^ w3); 56 } 56 } 57 57 58 struct globals { 58 struct globals { 59 struct flow_key_record flow; 59 struct flow_key_record flow; 60 }; 60 }; 61 61 62 struct { 62 struct { 63 __uint(type, BPF_MAP_TYPE_ARRAY); 63 __uint(type, BPF_MAP_TYPE_ARRAY); 64 __type(key, __u32); 64 __type(key, __u32); 65 __type(value, struct globals); 65 __type(value, struct globals); 66 __uint(max_entries, 32); 66 __uint(max_entries, 32); 67 } percpu_map SEC(".maps"); 67 } percpu_map SEC(".maps"); 68 68 69 /* user poor man's per_cpu until native suppor 69 /* user poor man's per_cpu until native support is ready */ 70 static struct globals *this_cpu_globals(void) 70 static struct globals *this_cpu_globals(void) 71 { 71 { 72 u32 key = bpf_get_smp_processor_id(); 72 u32 key = bpf_get_smp_processor_id(); 73 73 74 return bpf_map_lookup_elem(&percpu_map 74 return bpf_map_lookup_elem(&percpu_map, &key); 75 } 75 } 76 76 77 /* some simple stats for user space consumptio 77 /* some simple stats for user space consumption */ 78 struct pair { 78 struct pair { 79 __u64 packets; 79 __u64 packets; 80 __u64 bytes; 80 __u64 bytes; 81 }; 81 }; 82 82 83 struct { 83 struct { 84 __uint(type, BPF_MAP_TYPE_HASH); 84 __uint(type, BPF_MAP_TYPE_HASH); 85 __type(key, struct flow_key_record); 85 __type(key, struct flow_key_record); 86 __type(value, struct pair); 86 __type(value, struct pair); 87 __uint(max_entries, 1024); 87 __uint(max_entries, 1024); 88 } hash_map SEC(".maps"); 88 } hash_map SEC(".maps"); 89 89 90 static void update_stats(struct __sk_buff *skb 90 static void update_stats(struct __sk_buff *skb, struct globals *g) 91 { 91 { 92 struct flow_key_record key = g->flow; 92 struct flow_key_record key = g->flow; 93 struct pair *value; 93 struct pair *value; 94 94 95 value = bpf_map_lookup_elem(&hash_map, 95 value = bpf_map_lookup_elem(&hash_map, &key); 96 if (value) { 96 if (value) { 97 __sync_fetch_and_add(&value->p 97 __sync_fetch_and_add(&value->packets, 1); 98 __sync_fetch_and_add(&value->b 98 __sync_fetch_and_add(&value->bytes, skb->len); 99 } else { 99 } else { 100 struct pair val = {1, skb->len 100 struct pair val = {1, skb->len}; 101 101 102 bpf_map_update_elem(&hash_map, 102 bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); 103 } 103 } 104 } 104 } 105 105 106 static __always_inline void parse_ip_proto(str 106 static __always_inline void parse_ip_proto(struct __sk_buff *skb, 107 str 107 struct globals *g, __u32 ip_proto) 108 { 108 { 109 __u32 nhoff = skb->cb[0]; 109 __u32 nhoff = skb->cb[0]; 110 int poff; 110 int poff; 111 111 112 switch (ip_proto) { 112 switch (ip_proto) { 113 case IPPROTO_GRE: { 113 case IPPROTO_GRE: { 114 struct gre_hdr { 114 struct gre_hdr { 115 __be16 flags; 115 __be16 flags; 116 __be16 proto; 116 __be16 proto; 117 }; 117 }; 118 118 119 __u32 gre_flags = load_half(sk 119 __u32 gre_flags = load_half(skb, 120 nh 120 nhoff + offsetof(struct gre_hdr, flags)); 121 __u32 gre_proto = load_half(sk 121 __u32 gre_proto = load_half(skb, 122 nh 122 nhoff + offsetof(struct gre_hdr, proto)); 123 123 124 if (gre_flags & (GRE_VERSION|G 124 if (gre_flags & (GRE_VERSION|GRE_ROUTING)) 125 break; 125 break; 126 126 127 nhoff += 4; 127 nhoff += 4; 128 if (gre_flags & GRE_CSUM) 128 if (gre_flags & GRE_CSUM) 129 nhoff += 4; 129 nhoff += 4; 130 if (gre_flags & GRE_KEY) 130 if (gre_flags & GRE_KEY) 131 nhoff += 4; 131 nhoff += 4; 132 if (gre_flags & GRE_SEQ) 132 if (gre_flags & GRE_SEQ) 133 nhoff += 4; 133 nhoff += 4; 134 134 135 skb->cb[0] = nhoff; 135 skb->cb[0] = nhoff; 136 parse_eth_proto(skb, gre_proto 136 parse_eth_proto(skb, gre_proto); 137 break; 137 break; 138 } 138 } 139 case IPPROTO_IPIP: 139 case IPPROTO_IPIP: 140 parse_eth_proto(skb, ETH_P_IP) 140 parse_eth_proto(skb, ETH_P_IP); 141 break; 141 break; 142 case IPPROTO_IPV6: 142 case IPPROTO_IPV6: 143 parse_eth_proto(skb, ETH_P_IPV 143 parse_eth_proto(skb, ETH_P_IPV6); 144 break; 144 break; 145 case IPPROTO_TCP: 145 case IPPROTO_TCP: 146 case IPPROTO_UDP: 146 case IPPROTO_UDP: 147 g->flow.ports = load_word(skb, 147 g->flow.ports = load_word(skb, nhoff); 148 case IPPROTO_ICMP: 148 case IPPROTO_ICMP: 149 g->flow.ip_proto = ip_proto; 149 g->flow.ip_proto = ip_proto; 150 update_stats(skb, g); 150 update_stats(skb, g); 151 break; 151 break; 152 default: 152 default: 153 break; 153 break; 154 } 154 } 155 } 155 } 156 156 157 SEC("socket") 157 SEC("socket") 158 int bpf_func_ip(struct __sk_buff *skb) 158 int bpf_func_ip(struct __sk_buff *skb) 159 { 159 { 160 struct globals *g = this_cpu_globals() 160 struct globals *g = this_cpu_globals(); 161 __u32 nhoff, verlen, ip_proto; 161 __u32 nhoff, verlen, ip_proto; 162 162 163 if (!g) 163 if (!g) 164 return 0; 164 return 0; 165 165 166 nhoff = skb->cb[0]; 166 nhoff = skb->cb[0]; 167 167 168 if (unlikely(ip_is_fragment(skb, nhoff 168 if (unlikely(ip_is_fragment(skb, nhoff))) 169 return 0; 169 return 0; 170 170 171 ip_proto = load_byte(skb, nhoff + offs 171 ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); 172 172 173 if (ip_proto != IPPROTO_GRE) { 173 if (ip_proto != IPPROTO_GRE) { 174 g->flow.src = load_word(skb, n 174 g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); 175 g->flow.dst = load_word(skb, n 175 g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); 176 } 176 } 177 177 178 verlen = load_byte(skb, nhoff + 0/*off 178 verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); 179 nhoff += (verlen & 0xF) << 2; 179 nhoff += (verlen & 0xF) << 2; 180 180 181 skb->cb[0] = nhoff; 181 skb->cb[0] = nhoff; 182 parse_ip_proto(skb, g, ip_proto); 182 parse_ip_proto(skb, g, ip_proto); 183 return 0; 183 return 0; 184 } 184 } 185 185 186 SEC("socket") 186 SEC("socket") 187 int bpf_func_ipv6(struct __sk_buff *skb) 187 int bpf_func_ipv6(struct __sk_buff *skb) 188 { 188 { 189 struct globals *g = this_cpu_globals() 189 struct globals *g = this_cpu_globals(); 190 __u32 nhoff, ip_proto; 190 __u32 nhoff, ip_proto; 191 191 192 if (!g) 192 if (!g) 193 return 0; 193 return 0; 194 194 195 nhoff = skb->cb[0]; 195 nhoff = skb->cb[0]; 196 196 197 ip_proto = load_byte(skb, 197 ip_proto = load_byte(skb, 198 nhoff + offsetof( 198 nhoff + offsetof(struct ipv6hdr, nexthdr)); 199 g->flow.src = ipv6_addr_hash(skb, 199 g->flow.src = ipv6_addr_hash(skb, 200 nhoff + o 200 nhoff + offsetof(struct ipv6hdr, saddr)); 201 g->flow.dst = ipv6_addr_hash(skb, 201 g->flow.dst = ipv6_addr_hash(skb, 202 nhoff + o 202 nhoff + offsetof(struct ipv6hdr, daddr)); 203 nhoff += sizeof(struct ipv6hdr); 203 nhoff += sizeof(struct ipv6hdr); 204 204 205 skb->cb[0] = nhoff; 205 skb->cb[0] = nhoff; 206 parse_ip_proto(skb, g, ip_proto); 206 parse_ip_proto(skb, g, ip_proto); 207 return 0; 207 return 0; 208 } 208 } 209 209 210 SEC("socket") 210 SEC("socket") 211 int bpf_func_vlan(struct __sk_buff *skb) 211 int bpf_func_vlan(struct __sk_buff *skb) 212 { 212 { 213 __u32 nhoff, proto; 213 __u32 nhoff, proto; 214 214 215 nhoff = skb->cb[0]; 215 nhoff = skb->cb[0]; 216 216 217 proto = load_half(skb, nhoff + offseto 217 proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, 218 218 h_vlan_encapsulated_proto)); 219 nhoff += sizeof(struct vlan_hdr); 219 nhoff += sizeof(struct vlan_hdr); 220 skb->cb[0] = nhoff; 220 skb->cb[0] = nhoff; 221 221 222 parse_eth_proto(skb, proto); 222 parse_eth_proto(skb, proto); 223 223 224 return 0; 224 return 0; 225 } 225 } 226 226 227 SEC("socket") 227 SEC("socket") 228 int bpf_func_mpls(struct __sk_buff *skb) 228 int bpf_func_mpls(struct __sk_buff *skb) 229 { 229 { 230 __u32 nhoff, label; 230 __u32 nhoff, label; 231 231 232 nhoff = skb->cb[0]; 232 nhoff = skb->cb[0]; 233 233 234 label = load_word(skb, nhoff); 234 label = load_word(skb, nhoff); 235 nhoff += sizeof(struct mpls_label); 235 nhoff += sizeof(struct mpls_label); 236 skb->cb[0] = nhoff; 236 skb->cb[0] = nhoff; 237 237 238 if (label & MPLS_LS_S_MASK) { 238 if (label & MPLS_LS_S_MASK) { 239 __u8 verlen = load_byte(skb, n 239 __u8 verlen = load_byte(skb, nhoff); 240 if ((verlen & 0xF0) == 4) 240 if ((verlen & 0xF0) == 4) 241 parse_eth_proto(skb, E 241 parse_eth_proto(skb, ETH_P_IP); 242 else 242 else 243 parse_eth_proto(skb, E 243 parse_eth_proto(skb, ETH_P_IPV6); 244 } else { 244 } else { 245 parse_eth_proto(skb, ETH_P_MPL 245 parse_eth_proto(skb, ETH_P_MPLS_UC); 246 } 246 } 247 247 248 return 0; 248 return 0; 249 } 249 } 250 250 251 struct { 251 struct { 252 __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 252 __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 253 __uint(key_size, sizeof(u32)); 253 __uint(key_size, sizeof(u32)); 254 __uint(max_entries, 8); 254 __uint(max_entries, 8); 255 __array(values, u32 (void *)); 255 __array(values, u32 (void *)); 256 } prog_array_init SEC(".maps") = { 256 } prog_array_init SEC(".maps") = { 257 .values = { 257 .values = { 258 [PARSE_VLAN] = (void *)&bpf_fu 258 [PARSE_VLAN] = (void *)&bpf_func_vlan, 259 [PARSE_IP] = (void *)&bpf_fu 259 [PARSE_IP] = (void *)&bpf_func_ip, 260 [PARSE_IPV6] = (void *)&bpf_fu 260 [PARSE_IPV6] = (void *)&bpf_func_ipv6, 261 [PARSE_MPLS] = (void *)&bpf_fu 261 [PARSE_MPLS] = (void *)&bpf_func_mpls, 262 }, 262 }, 263 }; 263 }; 264 264 265 /* Protocol dispatch routine. It tail-calls ne 265 /* Protocol dispatch routine. It tail-calls next BPF program depending 266 * on eth proto. Note, we could have used ... 266 * on eth proto. Note, we could have used ... 267 * 267 * 268 * bpf_tail_call(skb, &prog_array_init, prot 268 * bpf_tail_call(skb, &prog_array_init, proto); 269 * 269 * 270 * ... but it would need large prog_array and 270 * ... but it would need large prog_array and cannot be optimised given 271 * the map key is not static. 271 * the map key is not static. 272 */ 272 */ 273 static inline void parse_eth_proto(struct __sk 273 static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto) 274 { 274 { 275 switch (proto) { 275 switch (proto) { 276 case ETH_P_8021Q: 276 case ETH_P_8021Q: 277 case ETH_P_8021AD: 277 case ETH_P_8021AD: 278 bpf_tail_call(skb, &prog_array 278 bpf_tail_call(skb, &prog_array_init, PARSE_VLAN); 279 break; 279 break; 280 case ETH_P_MPLS_UC: 280 case ETH_P_MPLS_UC: 281 case ETH_P_MPLS_MC: 281 case ETH_P_MPLS_MC: 282 bpf_tail_call(skb, &prog_array 282 bpf_tail_call(skb, &prog_array_init, PARSE_MPLS); 283 break; 283 break; 284 case ETH_P_IP: 284 case ETH_P_IP: 285 bpf_tail_call(skb, &prog_array 285 bpf_tail_call(skb, &prog_array_init, PARSE_IP); 286 break; 286 break; 287 case ETH_P_IPV6: 287 case ETH_P_IPV6: 288 bpf_tail_call(skb, &prog_array 288 bpf_tail_call(skb, &prog_array_init, PARSE_IPV6); 289 break; 289 break; 290 } 290 } 291 } 291 } 292 292 293 SEC("socket") 293 SEC("socket") 294 int main_prog(struct __sk_buff *skb) 294 int main_prog(struct __sk_buff *skb) 295 { 295 { 296 __u32 nhoff = ETH_HLEN; 296 __u32 nhoff = ETH_HLEN; 297 __u32 proto = load_half(skb, 12); 297 __u32 proto = load_half(skb, 12); 298 298 299 skb->cb[0] = nhoff; 299 skb->cb[0] = nhoff; 300 parse_eth_proto(skb, proto); 300 parse_eth_proto(skb, proto); 301 return 0; 301 return 0; 302 } 302 } 303 303 304 char _license[] SEC("license") = "GPL"; 304 char _license[] SEC("license") = "GPL"; 305 305
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.