~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/bpf/progs/test_l4lb.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* Copyright (c) 2017 Facebook
  2  *
  3  * This program is free software; you can redistribute it and/or
  4  * modify it under the terms of version 2 of the GNU General Public
  5  * License as published by the Free Software Foundation.
  6  */
  7 #include <stddef.h>
  8 #include <stdbool.h>
  9 #include <string.h>
 10 #include <linux/pkt_cls.h>
 11 #include <linux/bpf.h>
 12 #include <linux/in.h>
 13 #include <linux/if_ether.h>
 14 #include <linux/ip.h>
 15 #include <linux/ipv6.h>
 16 #include <linux/icmp.h>
 17 #include <linux/icmpv6.h>
 18 #include <linux/tcp.h>
 19 #include <linux/udp.h>
 20 #include <bpf/bpf_helpers.h>
 21 #include "test_iptunnel_common.h"
 22 #include <bpf/bpf_endian.h>
 23 
 24 static inline __u32 rol32(__u32 word, unsigned int shift)
 25 {
 26         return (word << shift) | (word >> ((-shift) & 31));
 27 }
 28 
 29 /* copy paste of jhash from kernel sources to make sure llvm
 30  * can compile it into valid sequence of bpf instructions
 31  */
 32 #define __jhash_mix(a, b, c)                    \
 33 {                                               \
 34         a -= c;  a ^= rol32(c, 4);  c += b;     \
 35         b -= a;  b ^= rol32(a, 6);  a += c;     \
 36         c -= b;  c ^= rol32(b, 8);  b += a;     \
 37         a -= c;  a ^= rol32(c, 16); c += b;     \
 38         b -= a;  b ^= rol32(a, 19); a += c;     \
 39         c -= b;  c ^= rol32(b, 4);  b += a;     \
 40 }
 41 
 42 #define __jhash_final(a, b, c)                  \
 43 {                                               \
 44         c ^= b; c -= rol32(b, 14);              \
 45         a ^= c; a -= rol32(c, 11);              \
 46         b ^= a; b -= rol32(a, 25);              \
 47         c ^= b; c -= rol32(b, 16);              \
 48         a ^= c; a -= rol32(c, 4);               \
 49         b ^= a; b -= rol32(a, 14);              \
 50         c ^= b; c -= rol32(b, 24);              \
 51 }
 52 
 53 #define JHASH_INITVAL           0xdeadbeef
 54 
 55 typedef unsigned int u32;
 56 
 57 static inline u32 jhash(const void *key, u32 length, u32 initval)
 58 {
 59         u32 a, b, c;
 60         const unsigned char *k = key;
 61 
 62         a = b = c = JHASH_INITVAL + length + initval;
 63 
 64         while (length > 12) {
 65                 a += *(u32 *)(k);
 66                 b += *(u32 *)(k + 4);
 67                 c += *(u32 *)(k + 8);
 68                 __jhash_mix(a, b, c);
 69                 length -= 12;
 70                 k += 12;
 71         }
 72         switch (length) {
 73         case 12: c += (u32)k[11]<<24;
 74         case 11: c += (u32)k[10]<<16;
 75         case 10: c += (u32)k[9]<<8;
 76         case 9:  c += k[8];
 77         case 8:  b += (u32)k[7]<<24;
 78         case 7:  b += (u32)k[6]<<16;
 79         case 6:  b += (u32)k[5]<<8;
 80         case 5:  b += k[4];
 81         case 4:  a += (u32)k[3]<<24;
 82         case 3:  a += (u32)k[2]<<16;
 83         case 2:  a += (u32)k[1]<<8;
 84         case 1:  a += k[0];
 85                  __jhash_final(a, b, c);
 86         case 0: /* Nothing left to add */
 87                 break;
 88         }
 89 
 90         return c;
 91 }
 92 
 93 static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
 94 {
 95         a += initval;
 96         b += initval;
 97         c += initval;
 98         __jhash_final(a, b, c);
 99         return c;
100 }
101 
102 static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
103 {
104         return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
105 }
106 
107 #define PCKT_FRAGMENTED 65343
108 #define IPV4_HDR_LEN_NO_OPT 20
109 #define IPV4_PLUS_ICMP_HDR 28
110 #define IPV6_PLUS_ICMP_HDR 48
111 #define RING_SIZE 2
112 #define MAX_VIPS 12
113 #define MAX_REALS 5
114 #define CTL_MAP_SIZE 16
115 #define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
116 #define F_IPV6 (1 << 0)
117 #define F_HASH_NO_SRC_PORT (1 << 0)
118 #define F_ICMP (1 << 0)
119 #define F_SYN_SET (1 << 1)
120 
121 struct packet_description {
122         union {
123                 __be32 src;
124                 __be32 srcv6[4];
125         };
126         union {
127                 __be32 dst;
128                 __be32 dstv6[4];
129         };
130         union {
131                 __u32 ports;
132                 __u16 port16[2];
133         };
134         __u8 proto;
135         __u8 flags;
136 };
137 
138 struct ctl_value {
139         union {
140                 __u64 value;
141                 __u32 ifindex;
142                 __u8 mac[6];
143         };
144 };
145 
146 struct vip_meta {
147         __u32 flags;
148         __u32 vip_num;
149 };
150 
151 struct real_definition {
152         union {
153                 __be32 dst;
154                 __be32 dstv6[4];
155         };
156         __u8 flags;
157 };
158 
159 struct vip_stats {
160         __u64 bytes;
161         __u64 pkts;
162 };
163 
164 struct eth_hdr {
165         unsigned char eth_dest[ETH_ALEN];
166         unsigned char eth_source[ETH_ALEN];
167         unsigned short eth_proto;
168 };
169 
170 struct {
171         __uint(type, BPF_MAP_TYPE_HASH);
172         __uint(max_entries, MAX_VIPS);
173         __type(key, struct vip);
174         __type(value, struct vip_meta);
175 } vip_map SEC(".maps");
176 
177 struct {
178         __uint(type, BPF_MAP_TYPE_ARRAY);
179         __uint(max_entries, CH_RINGS_SIZE);
180         __type(key, __u32);
181         __type(value, __u32);
182 } ch_rings SEC(".maps");
183 
184 struct {
185         __uint(type, BPF_MAP_TYPE_ARRAY);
186         __uint(max_entries, MAX_REALS);
187         __type(key, __u32);
188         __type(value, struct real_definition);
189 } reals SEC(".maps");
190 
191 struct {
192         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
193         __uint(max_entries, MAX_VIPS);
194         __type(key, __u32);
195         __type(value, struct vip_stats);
196 } stats SEC(".maps");
197 
198 struct {
199         __uint(type, BPF_MAP_TYPE_ARRAY);
200         __uint(max_entries, CTL_MAP_SIZE);
201         __type(key, __u32);
202         __type(value, struct ctl_value);
203 } ctl_array SEC(".maps");
204 
205 static __always_inline __u32 get_packet_hash(struct packet_description *pckt,
206                                              bool ipv6)
207 {
208         if (ipv6)
209                 return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
210                                     pckt->ports, CH_RINGS_SIZE);
211         else
212                 return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
213 }
214 
215 static __always_inline bool get_packet_dst(struct real_definition **real,
216                                            struct packet_description *pckt,
217                                            struct vip_meta *vip_info,
218                                            bool is_ipv6)
219 {
220         __u32 hash = get_packet_hash(pckt, is_ipv6) % RING_SIZE;
221         __u32 key = RING_SIZE * vip_info->vip_num + hash;
222         __u32 *real_pos;
223 
224         real_pos = bpf_map_lookup_elem(&ch_rings, &key);
225         if (!real_pos)
226                 return false;
227         key = *real_pos;
228         *real = bpf_map_lookup_elem(&reals, &key);
229         if (!(*real))
230                 return false;
231         return true;
232 }
233 
234 static __always_inline int parse_icmpv6(void *data, void *data_end, __u64 off,
235                                         struct packet_description *pckt)
236 {
237         struct icmp6hdr *icmp_hdr;
238         struct ipv6hdr *ip6h;
239 
240         icmp_hdr = data + off;
241         if (icmp_hdr + 1 > data_end)
242                 return TC_ACT_SHOT;
243         if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
244                 return TC_ACT_OK;
245         off += sizeof(struct icmp6hdr);
246         ip6h = data + off;
247         if (ip6h + 1 > data_end)
248                 return TC_ACT_SHOT;
249         pckt->proto = ip6h->nexthdr;
250         pckt->flags |= F_ICMP;
251         memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
252         memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
253         return TC_ACT_UNSPEC;
254 }
255 
256 static __always_inline int parse_icmp(void *data, void *data_end, __u64 off,
257                                       struct packet_description *pckt)
258 {
259         struct icmphdr *icmp_hdr;
260         struct iphdr *iph;
261 
262         icmp_hdr = data + off;
263         if (icmp_hdr + 1 > data_end)
264                 return TC_ACT_SHOT;
265         if (icmp_hdr->type != ICMP_DEST_UNREACH ||
266             icmp_hdr->code != ICMP_FRAG_NEEDED)
267                 return TC_ACT_OK;
268         off += sizeof(struct icmphdr);
269         iph = data + off;
270         if (iph + 1 > data_end)
271                 return TC_ACT_SHOT;
272         if (iph->ihl != 5)
273                 return TC_ACT_SHOT;
274         pckt->proto = iph->protocol;
275         pckt->flags |= F_ICMP;
276         pckt->src = iph->daddr;
277         pckt->dst = iph->saddr;
278         return TC_ACT_UNSPEC;
279 }
280 
281 static __always_inline bool parse_udp(void *data, __u64 off, void *data_end,
282                                       struct packet_description *pckt)
283 {
284         struct udphdr *udp;
285         udp = data + off;
286 
287         if (udp + 1 > data_end)
288                 return false;
289 
290         if (!(pckt->flags & F_ICMP)) {
291                 pckt->port16[0] = udp->source;
292                 pckt->port16[1] = udp->dest;
293         } else {
294                 pckt->port16[0] = udp->dest;
295                 pckt->port16[1] = udp->source;
296         }
297         return true;
298 }
299 
300 static __always_inline bool parse_tcp(void *data, __u64 off, void *data_end,
301                                       struct packet_description *pckt)
302 {
303         struct tcphdr *tcp;
304 
305         tcp = data + off;
306         if (tcp + 1 > data_end)
307                 return false;
308 
309         if (tcp->syn)
310                 pckt->flags |= F_SYN_SET;
311 
312         if (!(pckt->flags & F_ICMP)) {
313                 pckt->port16[0] = tcp->source;
314                 pckt->port16[1] = tcp->dest;
315         } else {
316                 pckt->port16[0] = tcp->dest;
317                 pckt->port16[1] = tcp->source;
318         }
319         return true;
320 }
321 
322 static __always_inline int process_packet(void *data, __u64 off, void *data_end,
323                                           bool is_ipv6, struct __sk_buff *skb)
324 {
325         void *pkt_start = (void *)(long)skb->data;
326         struct packet_description pckt = {};
327         struct eth_hdr *eth = pkt_start;
328         struct bpf_tunnel_key tkey = {};
329         struct vip_stats *data_stats;
330         struct real_definition *dst;
331         struct vip_meta *vip_info;
332         struct ctl_value *cval;
333         __u32 v4_intf_pos = 1;
334         __u32 v6_intf_pos = 2;
335         struct ipv6hdr *ip6h;
336         struct vip vip = {};
337         struct iphdr *iph;
338         int tun_flag = 0;
339         __u16 pkt_bytes;
340         __u64 iph_len;
341         __u32 ifindex;
342         __u8 protocol;
343         __u32 vip_num;
344         int action;
345 
346         tkey.tunnel_ttl = 64;
347         if (is_ipv6) {
348                 ip6h = data + off;
349                 if (ip6h + 1 > data_end)
350                         return TC_ACT_SHOT;
351 
352                 iph_len = sizeof(struct ipv6hdr);
353                 protocol = ip6h->nexthdr;
354                 pckt.proto = protocol;
355                 pkt_bytes = bpf_ntohs(ip6h->payload_len);
356                 off += iph_len;
357                 if (protocol == IPPROTO_FRAGMENT) {
358                         return TC_ACT_SHOT;
359                 } else if (protocol == IPPROTO_ICMPV6) {
360                         action = parse_icmpv6(data, data_end, off, &pckt);
361                         if (action >= 0)
362                                 return action;
363                         off += IPV6_PLUS_ICMP_HDR;
364                 } else {
365                         memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
366                         memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
367                 }
368         } else {
369                 iph = data + off;
370                 if (iph + 1 > data_end)
371                         return TC_ACT_SHOT;
372                 if (iph->ihl != 5)
373                         return TC_ACT_SHOT;
374 
375                 protocol = iph->protocol;
376                 pckt.proto = protocol;
377                 pkt_bytes = bpf_ntohs(iph->tot_len);
378                 off += IPV4_HDR_LEN_NO_OPT;
379 
380                 if (iph->frag_off & PCKT_FRAGMENTED)
381                         return TC_ACT_SHOT;
382                 if (protocol == IPPROTO_ICMP) {
383                         action = parse_icmp(data, data_end, off, &pckt);
384                         if (action >= 0)
385                                 return action;
386                         off += IPV4_PLUS_ICMP_HDR;
387                 } else {
388                         pckt.src = iph->saddr;
389                         pckt.dst = iph->daddr;
390                 }
391         }
392         protocol = pckt.proto;
393 
394         if (protocol == IPPROTO_TCP) {
395                 if (!parse_tcp(data, off, data_end, &pckt))
396                         return TC_ACT_SHOT;
397         } else if (protocol == IPPROTO_UDP) {
398                 if (!parse_udp(data, off, data_end, &pckt))
399                         return TC_ACT_SHOT;
400         } else {
401                 return TC_ACT_SHOT;
402         }
403 
404         if (is_ipv6)
405                 memcpy(vip.daddr.v6, pckt.dstv6, 16);
406         else
407                 vip.daddr.v4 = pckt.dst;
408 
409         vip.dport = pckt.port16[1];
410         vip.protocol = pckt.proto;
411         vip_info = bpf_map_lookup_elem(&vip_map, &vip);
412         if (!vip_info) {
413                 vip.dport = 0;
414                 vip_info = bpf_map_lookup_elem(&vip_map, &vip);
415                 if (!vip_info)
416                         return TC_ACT_SHOT;
417                 pckt.port16[1] = 0;
418         }
419 
420         if (vip_info->flags & F_HASH_NO_SRC_PORT)
421                 pckt.port16[0] = 0;
422 
423         if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
424                 return TC_ACT_SHOT;
425 
426         if (dst->flags & F_IPV6) {
427                 cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
428                 if (!cval)
429                         return TC_ACT_SHOT;
430                 ifindex = cval->ifindex;
431                 memcpy(tkey.remote_ipv6, dst->dstv6, 16);
432                 tun_flag = BPF_F_TUNINFO_IPV6;
433         } else {
434                 cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
435                 if (!cval)
436                         return TC_ACT_SHOT;
437                 ifindex = cval->ifindex;
438                 tkey.remote_ipv4 = dst->dst;
439         }
440         vip_num = vip_info->vip_num;
441         data_stats = bpf_map_lookup_elem(&stats, &vip_num);
442         if (!data_stats)
443                 return TC_ACT_SHOT;
444         data_stats->pkts++;
445         data_stats->bytes += pkt_bytes;
446         bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
447         *(u32 *)eth->eth_dest = tkey.remote_ipv4;
448         return bpf_redirect(ifindex, 0);
449 }
450 
451 SEC("tc")
452 int balancer_ingress(struct __sk_buff *ctx)
453 {
454         void *data_end = (void *)(long)ctx->data_end;
455         void *data = (void *)(long)ctx->data;
456         struct eth_hdr *eth = data;
457         __u32 eth_proto;
458         __u32 nh_off;
459 
460         nh_off = sizeof(struct eth_hdr);
461         if (data + nh_off > data_end)
462                 return TC_ACT_SHOT;
463         eth_proto = eth->eth_proto;
464         if (eth_proto == bpf_htons(ETH_P_IP))
465                 return process_packet(data, nh_off, data_end, false, ctx);
466         else if (eth_proto == bpf_htons(ETH_P_IPV6))
467                 return process_packet(data, nh_off, data_end, true, ctx);
468         else
469                 return TC_ACT_SHOT;
470 }
471 char _license[] SEC("license") = "GPL";
472 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php