1 // SPDX-License-Identifier: GPL-2.0-only 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 2 /* 3 * This code is taken from the Android Open So 3 * This code is taken from the Android Open Source Project and the author 4 * (Maciej Żenczykowski) has gave permission 4 * (Maciej Żenczykowski) has gave permission to relicense it under the 5 * GPLv2. Therefore this program is free softw 5 * GPLv2. Therefore this program is free software; 6 * You can redistribute it and/or modify it un 6 * You can redistribute it and/or modify it under the terms of the GNU 7 * General Public License version 2 as publish 7 * General Public License version 2 as published by the Free Software 8 * Foundation 8 * Foundation 9 9 10 * The original headers, including the origina 10 * The original headers, including the original license headers, are 11 * included below for completeness. 11 * included below for completeness. 12 * 12 * 13 * Copyright (C) 2019 The Android Open Source 13 * Copyright (C) 2019 The Android Open Source Project 14 * 14 * 15 * Licensed under the Apache License, Version 15 * Licensed under the Apache License, Version 2.0 (the "License"); 16 * you may not use this file except in complia 16 * you may not use this file except in compliance with the License. 17 * You may obtain a copy of the License at 17 * You may obtain a copy of the License at 18 * 18 * 19 * http://www.apache.org/licenses/LICENSE 19 * http://www.apache.org/licenses/LICENSE-2.0 20 * 20 * 21 * Unless required by applicable law or agreed 21 * Unless required by applicable law or agreed to in writing, software 22 * distributed under the License is distribute 22 * distributed under the License is distributed on an "AS IS" BASIS, 23 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIN 23 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 24 * See the License for the specific language g 24 * See the License for the specific language governing permissions and 25 * limitations under the License. 25 * limitations under the License. 26 */ 26 */ 27 #include <linux/bpf.h> 27 #include <linux/bpf.h> 28 #include <linux/if.h> 28 #include <linux/if.h> 29 #include <linux/if_ether.h> 29 #include <linux/if_ether.h> 30 #include <linux/if_packet.h> 30 #include <linux/if_packet.h> 31 #include <linux/in.h> 31 #include <linux/in.h> 32 #include <linux/in6.h> 32 #include <linux/in6.h> 33 #include <linux/ip.h> 33 #include <linux/ip.h> 34 #include <linux/ipv6.h> 34 #include <linux/ipv6.h> 35 #include <linux/pkt_cls.h> 35 #include <linux/pkt_cls.h> 36 #include <linux/swab.h> 36 #include <linux/swab.h> 37 #include <stdbool.h> 37 #include <stdbool.h> 38 #include <stdint.h> 38 #include <stdint.h> 39 39 40 40 41 #include <linux/udp.h> 41 #include <linux/udp.h> 42 42 43 #include <bpf/bpf_helpers.h> 43 #include <bpf/bpf_helpers.h> 44 #include <bpf/bpf_endian.h> 44 #include <bpf/bpf_endian.h> 45 45 46 #define IP_DF 0x4000 // Flag: "Don't Fragment 46 #define IP_DF 0x4000 // Flag: "Don't Fragment" 47 47 48 SEC("schedcls/ingress6/nat_6") 48 SEC("schedcls/ingress6/nat_6") 49 int sched_cls_ingress6_nat_6_prog(struct __sk_ 49 int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb) 50 { 50 { 51 const int l2_header_size = sizeof(str 51 const int l2_header_size = sizeof(struct ethhdr); 52 void *data = (void *)(long)skb->data; 52 void *data = (void *)(long)skb->data; 53 const void *data_end = (void *)(long)s 53 const void *data_end = (void *)(long)skb->data_end; 54 const struct ethhdr * const eth = data 54 const struct ethhdr * const eth = data; // used iff is_ethernet 55 const struct ipv6hdr * const ip6 = (v 55 const struct ipv6hdr * const ip6 = (void *)(eth + 1); 56 56 57 // Require ethernet dst mac address to 57 // Require ethernet dst mac address to be our unicast address. 58 if (skb->pkt_type != PACKET_HOST) 58 if (skb->pkt_type != PACKET_HOST) 59 return TC_ACT_OK; 59 return TC_ACT_OK; 60 60 61 // Must be meta-ethernet IPv6 frame 61 // Must be meta-ethernet IPv6 frame 62 if (skb->protocol != bpf_htons(ETH_P_I 62 if (skb->protocol != bpf_htons(ETH_P_IPV6)) 63 return TC_ACT_OK; 63 return TC_ACT_OK; 64 64 65 // Must have (ethernet and) ipv6 heade 65 // Must have (ethernet and) ipv6 header 66 if (data + l2_header_size + sizeof(*ip 66 if (data + l2_header_size + sizeof(*ip6) > data_end) 67 return TC_ACT_OK; 67 return TC_ACT_OK; 68 68 69 // Ethertype - if present - must be IP 69 // Ethertype - if present - must be IPv6 70 if (eth->h_proto != bpf_htons(ETH_P_IP 70 if (eth->h_proto != bpf_htons(ETH_P_IPV6)) 71 return TC_ACT_OK; 71 return TC_ACT_OK; 72 72 73 // IP version must be 6 73 // IP version must be 6 74 if (ip6->version != 6) 74 if (ip6->version != 6) 75 return TC_ACT_OK; 75 return TC_ACT_OK; 76 // Maximum IPv6 payload length that ca 76 // Maximum IPv6 payload length that can be translated to IPv4 77 if (bpf_ntohs(ip6->payload_len) > 0xFF 77 if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr)) 78 return TC_ACT_OK; 78 return TC_ACT_OK; 79 switch (ip6->nexthdr) { 79 switch (ip6->nexthdr) { 80 case IPPROTO_TCP: // For TCP & UDP th 80 case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 81 case IPPROTO_UDP: // address means th 81 case IPPROTO_UDP: // address means there is no need to update their checksums. 82 case IPPROTO_GRE: // We do not need t 82 case IPPROTO_GRE: // We do not need to bother looking at GRE/ESP headers, 83 case IPPROTO_ESP: // since there is n 83 case IPPROTO_ESP: // since there is never a checksum to update. 84 break; 84 break; 85 default: // do not know how to handle 85 default: // do not know how to handle anything else 86 return TC_ACT_OK; 86 return TC_ACT_OK; 87 } 87 } 88 88 89 struct ethhdr eth2; // used iff is_et 89 struct ethhdr eth2; // used iff is_ethernet 90 90 91 eth2 = *eth; // Co 91 eth2 = *eth; // Copy over the ethernet header (src/dst mac) 92 eth2.h_proto = bpf_htons(ETH_P_IP); / 92 eth2.h_proto = bpf_htons(ETH_P_IP); // But replace the ethertype 93 93 94 struct iphdr ip = { 94 struct iphdr ip = { 95 .version = 4, 95 .version = 4, // u4 96 .ihl = sizeof(struct iphdr) / 96 .ihl = sizeof(struct iphdr) / sizeof(__u32), // u4 97 .tos = (ip6->priority << 4) + 97 .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4), // u8 98 .tot_len = bpf_htons(bpf_ntohs 98 .tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)), // u16 99 .id = 0, 99 .id = 0, // u16 100 .frag_off = bpf_htons(IP_DF), 100 .frag_off = bpf_htons(IP_DF), // u16 101 .ttl = ip6->hop_limit, 101 .ttl = ip6->hop_limit, // u8 102 .protocol = ip6->nexthdr, 102 .protocol = ip6->nexthdr, // u8 103 .check = 0, 103 .check = 0, // u16 104 .saddr = 0x0201a8c0, 104 .saddr = 0x0201a8c0, // u32 105 .daddr = 0x0101a8c0, 105 .daddr = 0x0101a8c0, // u32 106 }; 106 }; 107 107 108 // Calculate the IPv4 one's complement 108 // Calculate the IPv4 one's complement checksum of the IPv4 header. 109 __wsum sum4 = 0; 109 __wsum sum4 = 0; 110 110 111 for (int i = 0; i < sizeof(ip) / sizeo 111 for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i) 112 sum4 += ((__u16 *)&ip)[i]; 112 sum4 += ((__u16 *)&ip)[i]; 113 113 114 // Note that sum4 is guaranteed to be 114 // Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4 115 sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); 115 sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE 116 sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); 116 sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 117 ip.check = (__u16)~sum4; 117 ip.check = (__u16)~sum4; // sum4 cannot be zero, so this is never 0xFFFF 118 118 119 // Calculate the *negative* IPv6 16-bi 119 // Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header. 120 __wsum sum6 = 0; 120 __wsum sum6 = 0; 121 // We'll end up with a non-zero sum du 121 // We'll end up with a non-zero sum due to ip6->version == 6 (which has '' bits) 122 for (int i = 0; i < sizeof(*ip6) / siz 122 for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i) 123 sum6 += ~((__u16 *)ip6)[i]; / 123 sum6 += ~((__u16 *)ip6)[i]; // note the bitwise negation 124 124 125 // Note that there is no L4 checksum u 125 // Note that there is no L4 checksum update: we are relying on the checksum neutrality 126 // of the ipv6 address chosen by netd' 126 // of the ipv6 address chosen by netd's ClatdController. 127 127 128 // Packet mutations begin - point of n 128 // Packet mutations begin - point of no return, but if this first modification fails 129 // the packet is probably still pristi 129 // the packet is probably still pristine, so let clatd handle it. 130 if (bpf_skb_change_proto(skb, bpf_hton 130 if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0)) 131 return TC_ACT_OK; 131 return TC_ACT_OK; 132 bpf_csum_update(skb, sum6); 132 bpf_csum_update(skb, sum6); 133 133 134 data = (void *)(long)skb->data; 134 data = (void *)(long)skb->data; 135 data_end = (void *)(long)skb->data_end 135 data_end = (void *)(long)skb->data_end; 136 if (data + l2_header_size + sizeof(str 136 if (data + l2_header_size + sizeof(struct iphdr) > data_end) 137 return TC_ACT_SHOT; 137 return TC_ACT_SHOT; 138 138 139 struct ethhdr *new_eth = data; 139 struct ethhdr *new_eth = data; 140 140 141 // Copy over the updated ethernet head 141 // Copy over the updated ethernet header 142 *new_eth = eth2; 142 *new_eth = eth2; 143 143 144 // Copy over the new ipv4 header. 144 // Copy over the new ipv4 header. 145 *(struct iphdr *)(new_eth + 1) = ip; 145 *(struct iphdr *)(new_eth + 1) = ip; 146 return bpf_redirect(skb->ifindex, BPF_ 146 return bpf_redirect(skb->ifindex, BPF_F_INGRESS); 147 } 147 } 148 148 149 SEC("schedcls/egress4/snat4") 149 SEC("schedcls/egress4/snat4") 150 int sched_cls_egress4_snat4_prog(struct __sk_b 150 int sched_cls_egress4_snat4_prog(struct __sk_buff *skb) 151 { 151 { 152 const int l2_header_size = sizeof(str 152 const int l2_header_size = sizeof(struct ethhdr); 153 void *data = (void *)(long)skb->data; 153 void *data = (void *)(long)skb->data; 154 const void *data_end = (void *)(long)s 154 const void *data_end = (void *)(long)skb->data_end; 155 const struct ethhdr *const eth = data; 155 const struct ethhdr *const eth = data; // used iff is_ethernet 156 const struct iphdr *const ip4 = (void 156 const struct iphdr *const ip4 = (void *)(eth + 1); 157 157 158 // Must be meta-ethernet IPv4 frame 158 // Must be meta-ethernet IPv4 frame 159 if (skb->protocol != bpf_htons(ETH_P_I 159 if (skb->protocol != bpf_htons(ETH_P_IP)) 160 return TC_ACT_OK; 160 return TC_ACT_OK; 161 161 162 // Must have ipv4 header 162 // Must have ipv4 header 163 if (data + l2_header_size + sizeof(str 163 if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end) 164 return TC_ACT_OK; 164 return TC_ACT_OK; 165 165 166 // Ethertype - if present - must be IP 166 // Ethertype - if present - must be IPv4 167 if (eth->h_proto != bpf_htons(ETH_P_IP 167 if (eth->h_proto != bpf_htons(ETH_P_IP)) 168 return TC_ACT_OK; 168 return TC_ACT_OK; 169 169 170 // IP version must be 4 170 // IP version must be 4 171 if (ip4->version != 4) 171 if (ip4->version != 4) 172 return TC_ACT_OK; 172 return TC_ACT_OK; 173 173 174 // We cannot handle IP options, just s 174 // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header 175 if (ip4->ihl != 5) 175 if (ip4->ihl != 5) 176 return TC_ACT_OK; 176 return TC_ACT_OK; 177 177 178 // Maximum IPv6 payload length that ca 178 // Maximum IPv6 payload length that can be translated to IPv4 179 if (bpf_htons(ip4->tot_len) > 0xFFFF - 179 if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr)) 180 return TC_ACT_OK; 180 return TC_ACT_OK; 181 181 182 // Calculate the IPv4 one's complement 182 // Calculate the IPv4 one's complement checksum of the IPv4 header. 183 __wsum sum4 = 0; 183 __wsum sum4 = 0; 184 184 185 for (int i = 0; i < sizeof(*ip4) / siz 185 for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i) 186 sum4 += ((__u16 *)ip4)[i]; 186 sum4 += ((__u16 *)ip4)[i]; 187 187 188 // Note that sum4 is guaranteed to be 188 // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4 189 sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); 189 sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE 190 sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); 190 sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 191 // for a correct checksum we should ge 191 // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF 192 if (sum4 != 0xFFFF) 192 if (sum4 != 0xFFFF) 193 return TC_ACT_OK; 193 return TC_ACT_OK; 194 194 195 // Minimum IPv4 total length is the si 195 // Minimum IPv4 total length is the size of the header 196 if (bpf_ntohs(ip4->tot_len) < sizeof(* 196 if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4)) 197 return TC_ACT_OK; 197 return TC_ACT_OK; 198 198 199 // We are incapable of dealing with IP 199 // We are incapable of dealing with IPv4 fragments 200 if (ip4->frag_off & ~bpf_htons(IP_DF)) 200 if (ip4->frag_off & ~bpf_htons(IP_DF)) 201 return TC_ACT_OK; 201 return TC_ACT_OK; 202 202 203 switch (ip4->protocol) { 203 switch (ip4->protocol) { 204 case IPPROTO_TCP: // For TCP & UDP th 204 case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 205 case IPPROTO_GRE: // address means th 205 case IPPROTO_GRE: // address means there is no need to update their checksums. 206 case IPPROTO_ESP: // We do not need t 206 case IPPROTO_ESP: // We do not need to bother looking at GRE/ESP headers, 207 break; // since there 207 break; // since there is never a checksum to update. 208 208 209 case IPPROTO_UDP: // See above commen 209 case IPPROTO_UDP: // See above comment, but must also have UDP header... 210 if (data + sizeof(*ip4) + size 210 if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end) 211 return TC_ACT_OK; 211 return TC_ACT_OK; 212 const struct udphdr *uh = (con 212 const struct udphdr *uh = (const struct udphdr *)(ip4 + 1); 213 // If IPv4/UDP checksum is 0 t 213 // If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the 214 // checksum. Otherwise the ne 214 // checksum. Otherwise the network or more likely the NAT64 gateway might 215 // drop the packet because in 215 // drop the packet because in most cases IPv6/UDP packets with a zero checksum 216 // are invalid. See RFC 6935. 216 // are invalid. See RFC 6935. TODO: calculate checksum via bpf_csum_diff() 217 if (!uh->check) 217 if (!uh->check) 218 return TC_ACT_OK; 218 return TC_ACT_OK; 219 break; 219 break; 220 220 221 default: // do not know how to handle 221 default: // do not know how to handle anything else 222 return TC_ACT_OK; 222 return TC_ACT_OK; 223 } 223 } 224 struct ethhdr eth2; // used iff is_et 224 struct ethhdr eth2; // used iff is_ethernet 225 225 226 eth2 = *eth; // Co 226 eth2 = *eth; // Copy over the ethernet header (src/dst mac) 227 eth2.h_proto = bpf_htons(ETH_P_IPV6); 227 eth2.h_proto = bpf_htons(ETH_P_IPV6); // But replace the ethertype 228 228 229 struct ipv6hdr ip6 = { 229 struct ipv6hdr ip6 = { 230 .version = 6, 230 .version = 6, // __u8:4 231 .priority = ip4->tos >> 4, 231 .priority = ip4->tos >> 4, // __u8:4 232 .flow_lbl = {(ip4->tos & 0xF) 232 .flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0}, // __u8[3] 233 .payload_len = bpf_htons(bpf_n 233 .payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20), // __be16 234 .nexthdr = ip4->protocol, 234 .nexthdr = ip4->protocol, // __u8 235 .hop_limit = ip4->ttl, 235 .hop_limit = ip4->ttl, // __u8 236 }; 236 }; 237 ip6.saddr.in6_u.u6_addr32[0] = bpf_hto 237 ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); 238 ip6.saddr.in6_u.u6_addr32[1] = 0; 238 ip6.saddr.in6_u.u6_addr32[1] = 0; 239 ip6.saddr.in6_u.u6_addr32[2] = 0; 239 ip6.saddr.in6_u.u6_addr32[2] = 0; 240 ip6.saddr.in6_u.u6_addr32[3] = bpf_hto 240 ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1); 241 ip6.daddr.in6_u.u6_addr32[0] = bpf_hto 241 ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); 242 ip6.daddr.in6_u.u6_addr32[1] = 0; 242 ip6.daddr.in6_u.u6_addr32[1] = 0; 243 ip6.daddr.in6_u.u6_addr32[2] = 0; 243 ip6.daddr.in6_u.u6_addr32[2] = 0; 244 ip6.daddr.in6_u.u6_addr32[3] = bpf_hto 244 ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2); 245 245 246 // Calculate the IPv6 16-bit one's com 246 // Calculate the IPv6 16-bit one's complement checksum of the IPv6 header. 247 __wsum sum6 = 0; 247 __wsum sum6 = 0; 248 // We'll end up with a non-zero sum du 248 // We'll end up with a non-zero sum due to ip6.version == 6 249 for (int i = 0; i < sizeof(ip6) / size 249 for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i) 250 sum6 += ((__u16 *)&ip6)[i]; 250 sum6 += ((__u16 *)&ip6)[i]; 251 251 252 // Packet mutations begin - point of n 252 // Packet mutations begin - point of no return, but if this first modification fails 253 // the packet is probably still pristi 253 // the packet is probably still pristine, so let clatd handle it. 254 if (bpf_skb_change_proto(skb, bpf_hton 254 if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0)) 255 return TC_ACT_OK; 255 return TC_ACT_OK; 256 256 257 // This takes care of updating the skb 257 // This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet. 258 // In such a case, skb->csum is a 16-b 258 // In such a case, skb->csum is a 16-bit one's complement sum of the entire payload, 259 // thus we need to subtract out the ip 259 // thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum. 260 // However, we've already verified the 260 // However, we've already verified the ipv4 checksum is correct and thus 0. 261 // Thus we only need to add the ipv6 h 261 // Thus we only need to add the ipv6 header's sum. 262 // 262 // 263 // bpf_csum_update() always succeeds i 263 // bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error 264 // (-ENOTSUPP) if it isn't. So we jus 264 // (-ENOTSUPP) if it isn't. So we just ignore the return code (see above for more details). 265 bpf_csum_update(skb, sum6); 265 bpf_csum_update(skb, sum6); 266 266 267 // bpf_skb_change_proto() invalidates 267 // bpf_skb_change_proto() invalidates all pointers - reload them. 268 data = (void *)(long)skb->data; 268 data = (void *)(long)skb->data; 269 data_end = (void *)(long)skb->data_end 269 data_end = (void *)(long)skb->data_end; 270 270 271 // I cannot think of any valid way for 271 // I cannot think of any valid way for this error condition to trigger, however I do 272 // believe the explicit check is requi 272 // believe the explicit check is required to keep the in kernel ebpf verifier happy. 273 if (data + l2_header_size + sizeof(ip6 273 if (data + l2_header_size + sizeof(ip6) > data_end) 274 return TC_ACT_SHOT; 274 return TC_ACT_SHOT; 275 275 276 struct ethhdr *new_eth = data; 276 struct ethhdr *new_eth = data; 277 277 278 // Copy over the updated ethernet head 278 // Copy over the updated ethernet header 279 *new_eth = eth2; 279 *new_eth = eth2; 280 // Copy over the new ipv4 header. 280 // Copy over the new ipv4 header. 281 *(struct ipv6hdr *)(new_eth + 1) = ip6 281 *(struct ipv6hdr *)(new_eth + 1) = ip6; 282 return TC_ACT_OK; 282 return TC_ACT_OK; 283 } 283 } 284 284 285 char _license[] SEC("license") = ("GPL"); 285 char _license[] SEC("license") = ("GPL"); 286 286
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.