~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/ipv4/ip_tunnel.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Copyright (c) 2013 Nicira, Inc.
  4  */
  5 
  6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  7 
  8 #include <linux/capability.h>
  9 #include <linux/module.h>
 10 #include <linux/types.h>
 11 #include <linux/kernel.h>
 12 #include <linux/slab.h>
 13 #include <linux/uaccess.h>
 14 #include <linux/skbuff.h>
 15 #include <linux/netdevice.h>
 16 #include <linux/in.h>
 17 #include <linux/tcp.h>
 18 #include <linux/udp.h>
 19 #include <linux/if_arp.h>
 20 #include <linux/init.h>
 21 #include <linux/in6.h>
 22 #include <linux/inetdevice.h>
 23 #include <linux/igmp.h>
 24 #include <linux/netfilter_ipv4.h>
 25 #include <linux/etherdevice.h>
 26 #include <linux/if_ether.h>
 27 #include <linux/if_vlan.h>
 28 #include <linux/rculist.h>
 29 #include <linux/err.h>
 30 
 31 #include <net/sock.h>
 32 #include <net/ip.h>
 33 #include <net/icmp.h>
 34 #include <net/protocol.h>
 35 #include <net/ip_tunnels.h>
 36 #include <net/arp.h>
 37 #include <net/checksum.h>
 38 #include <net/dsfield.h>
 39 #include <net/inet_ecn.h>
 40 #include <net/xfrm.h>
 41 #include <net/net_namespace.h>
 42 #include <net/netns/generic.h>
 43 #include <net/rtnetlink.h>
 44 #include <net/udp.h>
 45 #include <net/dst_metadata.h>
 46 
 47 #if IS_ENABLED(CONFIG_IPV6)
 48 #include <net/ipv6.h>
 49 #include <net/ip6_fib.h>
 50 #include <net/ip6_route.h>
 51 #endif
 52 
 53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
 54 {
 55         return hash_32((__force u32)key ^ (__force u32)remote,
 56                          IP_TNL_HASH_BITS);
 57 }
 58 
 59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p,
 60                                 const unsigned long *flags, __be32 key)
 61 {
 62         if (!test_bit(IP_TUNNEL_KEY_BIT, flags))
 63                 return !test_bit(IP_TUNNEL_KEY_BIT, p->i_flags);
 64 
 65         return test_bit(IP_TUNNEL_KEY_BIT, p->i_flags) && p->i_key == key;
 66 }
 67 
 68 /* Fallback tunnel: no source, no destination, no key, no options
 69 
 70    Tunnel hash table:
 71    We require exact key match i.e. if a key is present in packet
 72    it will match only tunnel with the same key; if it is not present,
 73    it will match only keyless tunnel.
 74 
 75    All keysless packets, if not matched configured keyless tunnels
 76    will match fallback tunnel.
 77    Given src, dst and key, find appropriate for input tunnel.
 78 */
 79 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
 80                                    int link, const unsigned long *flags,
 81                                    __be32 remote, __be32 local,
 82                                    __be32 key)
 83 {
 84         struct ip_tunnel *t, *cand = NULL;
 85         struct hlist_head *head;
 86         struct net_device *ndev;
 87         unsigned int hash;
 88 
 89         hash = ip_tunnel_hash(key, remote);
 90         head = &itn->tunnels[hash];
 91 
 92         hlist_for_each_entry_rcu(t, head, hash_node) {
 93                 if (local != t->parms.iph.saddr ||
 94                     remote != t->parms.iph.daddr ||
 95                     !(t->dev->flags & IFF_UP))
 96                         continue;
 97 
 98                 if (!ip_tunnel_key_match(&t->parms, flags, key))
 99                         continue;
100 
101                 if (READ_ONCE(t->parms.link) == link)
102                         return t;
103                 cand = t;
104         }
105 
106         hlist_for_each_entry_rcu(t, head, hash_node) {
107                 if (remote != t->parms.iph.daddr ||
108                     t->parms.iph.saddr != 0 ||
109                     !(t->dev->flags & IFF_UP))
110                         continue;
111 
112                 if (!ip_tunnel_key_match(&t->parms, flags, key))
113                         continue;
114 
115                 if (READ_ONCE(t->parms.link) == link)
116                         return t;
117                 if (!cand)
118                         cand = t;
119         }
120 
121         hash = ip_tunnel_hash(key, 0);
122         head = &itn->tunnels[hash];
123 
124         hlist_for_each_entry_rcu(t, head, hash_node) {
125                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
126                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
127                         continue;
128 
129                 if (!(t->dev->flags & IFF_UP))
130                         continue;
131 
132                 if (!ip_tunnel_key_match(&t->parms, flags, key))
133                         continue;
134 
135                 if (READ_ONCE(t->parms.link) == link)
136                         return t;
137                 if (!cand)
138                         cand = t;
139         }
140 
141         hlist_for_each_entry_rcu(t, head, hash_node) {
142                 if ((!test_bit(IP_TUNNEL_NO_KEY_BIT, flags) &&
143                      t->parms.i_key != key) ||
144                     t->parms.iph.saddr != 0 ||
145                     t->parms.iph.daddr != 0 ||
146                     !(t->dev->flags & IFF_UP))
147                         continue;
148 
149                 if (READ_ONCE(t->parms.link) == link)
150                         return t;
151                 if (!cand)
152                         cand = t;
153         }
154 
155         if (cand)
156                 return cand;
157 
158         t = rcu_dereference(itn->collect_md_tun);
159         if (t && t->dev->flags & IFF_UP)
160                 return t;
161 
162         ndev = READ_ONCE(itn->fb_tunnel_dev);
163         if (ndev && ndev->flags & IFF_UP)
164                 return netdev_priv(ndev);
165 
166         return NULL;
167 }
168 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
169 
170 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
171                                     struct ip_tunnel_parm_kern *parms)
172 {
173         unsigned int h;
174         __be32 remote;
175         __be32 i_key = parms->i_key;
176 
177         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
178                 remote = parms->iph.daddr;
179         else
180                 remote = 0;
181 
182         if (!test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags) &&
183             test_bit(IP_TUNNEL_VTI_BIT, parms->i_flags))
184                 i_key = 0;
185 
186         h = ip_tunnel_hash(i_key, remote);
187         return &itn->tunnels[h];
188 }
189 
190 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
191 {
192         struct hlist_head *head = ip_bucket(itn, &t->parms);
193 
194         if (t->collect_md)
195                 rcu_assign_pointer(itn->collect_md_tun, t);
196         hlist_add_head_rcu(&t->hash_node, head);
197 }
198 
199 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
200 {
201         if (t->collect_md)
202                 rcu_assign_pointer(itn->collect_md_tun, NULL);
203         hlist_del_init_rcu(&t->hash_node);
204 }
205 
206 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
207                                         struct ip_tunnel_parm_kern *parms,
208                                         int type)
209 {
210         __be32 remote = parms->iph.daddr;
211         __be32 local = parms->iph.saddr;
212         IP_TUNNEL_DECLARE_FLAGS(flags);
213         __be32 key = parms->i_key;
214         int link = parms->link;
215         struct ip_tunnel *t = NULL;
216         struct hlist_head *head = ip_bucket(itn, parms);
217 
218         ip_tunnel_flags_copy(flags, parms->i_flags);
219 
220         hlist_for_each_entry_rcu(t, head, hash_node) {
221                 if (local == t->parms.iph.saddr &&
222                     remote == t->parms.iph.daddr &&
223                     link == READ_ONCE(t->parms.link) &&
224                     type == t->dev->type &&
225                     ip_tunnel_key_match(&t->parms, flags, key))
226                         break;
227         }
228         return t;
229 }
230 
231 static struct net_device *__ip_tunnel_create(struct net *net,
232                                              const struct rtnl_link_ops *ops,
233                                              struct ip_tunnel_parm_kern *parms)
234 {
235         int err;
236         struct ip_tunnel *tunnel;
237         struct net_device *dev;
238         char name[IFNAMSIZ];
239 
240         err = -E2BIG;
241         if (parms->name[0]) {
242                 if (!dev_valid_name(parms->name))
243                         goto failed;
244                 strscpy(name, parms->name, IFNAMSIZ);
245         } else {
246                 if (strlen(ops->kind) > (IFNAMSIZ - 3))
247                         goto failed;
248                 strcpy(name, ops->kind);
249                 strcat(name, "%d");
250         }
251 
252         ASSERT_RTNL();
253         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
254         if (!dev) {
255                 err = -ENOMEM;
256                 goto failed;
257         }
258         dev_net_set(dev, net);
259 
260         dev->rtnl_link_ops = ops;
261 
262         tunnel = netdev_priv(dev);
263         tunnel->parms = *parms;
264         tunnel->net = net;
265 
266         err = register_netdevice(dev);
267         if (err)
268                 goto failed_free;
269 
270         return dev;
271 
272 failed_free:
273         free_netdev(dev);
274 failed:
275         return ERR_PTR(err);
276 }
277 
278 static int ip_tunnel_bind_dev(struct net_device *dev)
279 {
280         struct net_device *tdev = NULL;
281         struct ip_tunnel *tunnel = netdev_priv(dev);
282         const struct iphdr *iph;
283         int hlen = LL_MAX_HEADER;
284         int mtu = ETH_DATA_LEN;
285         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
286 
287         iph = &tunnel->parms.iph;
288 
289         /* Guess output device to choose reasonable mtu and needed_headroom */
290         if (iph->daddr) {
291                 struct flowi4 fl4;
292                 struct rtable *rt;
293 
294                 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
295                                     iph->saddr, tunnel->parms.o_key,
296                                     RT_TOS(iph->tos), dev_net(dev),
297                                     tunnel->parms.link, tunnel->fwmark, 0, 0);
298                 rt = ip_route_output_key(tunnel->net, &fl4);
299 
300                 if (!IS_ERR(rt)) {
301                         tdev = rt->dst.dev;
302                         ip_rt_put(rt);
303                 }
304                 if (dev->type != ARPHRD_ETHER)
305                         dev->flags |= IFF_POINTOPOINT;
306 
307                 dst_cache_reset(&tunnel->dst_cache);
308         }
309 
310         if (!tdev && tunnel->parms.link)
311                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
312 
313         if (tdev) {
314                 hlen = tdev->hard_header_len + tdev->needed_headroom;
315                 mtu = min(tdev->mtu, IP_MAX_MTU);
316         }
317 
318         dev->needed_headroom = t_hlen + hlen;
319         mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
320 
321         if (mtu < IPV4_MIN_MTU)
322                 mtu = IPV4_MIN_MTU;
323 
324         return mtu;
325 }
326 
327 static struct ip_tunnel *ip_tunnel_create(struct net *net,
328                                           struct ip_tunnel_net *itn,
329                                           struct ip_tunnel_parm_kern *parms)
330 {
331         struct ip_tunnel *nt;
332         struct net_device *dev;
333         int t_hlen;
334         int mtu;
335         int err;
336 
337         dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
338         if (IS_ERR(dev))
339                 return ERR_CAST(dev);
340 
341         mtu = ip_tunnel_bind_dev(dev);
342         err = dev_set_mtu(dev, mtu);
343         if (err)
344                 goto err_dev_set_mtu;
345 
346         nt = netdev_priv(dev);
347         t_hlen = nt->hlen + sizeof(struct iphdr);
348         dev->min_mtu = ETH_MIN_MTU;
349         dev->max_mtu = IP_MAX_MTU - t_hlen;
350         if (dev->type == ARPHRD_ETHER)
351                 dev->max_mtu -= dev->hard_header_len;
352 
353         ip_tunnel_add(itn, nt);
354         return nt;
355 
356 err_dev_set_mtu:
357         unregister_netdevice(dev);
358         return ERR_PTR(err);
359 }
360 
361 void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
362 {
363         const struct iphdr *iph = ip_hdr(skb);
364         const struct udphdr *udph;
365 
366         if (iph->protocol != IPPROTO_UDP)
367                 return;
368 
369         udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
370         info->encap.sport = udph->source;
371         info->encap.dport = udph->dest;
372 }
373 EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
374 
375 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
376                   const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
377                   bool log_ecn_error)
378 {
379         const struct iphdr *iph = ip_hdr(skb);
380         int nh, err;
381 
382 #ifdef CONFIG_NET_IPGRE_BROADCAST
383         if (ipv4_is_multicast(iph->daddr)) {
384                 DEV_STATS_INC(tunnel->dev, multicast);
385                 skb->pkt_type = PACKET_BROADCAST;
386         }
387 #endif
388 
389         if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) !=
390             test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) {
391                 DEV_STATS_INC(tunnel->dev, rx_crc_errors);
392                 DEV_STATS_INC(tunnel->dev, rx_errors);
393                 goto drop;
394         }
395 
396         if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) {
397                 if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) ||
398                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
399                         DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
400                         DEV_STATS_INC(tunnel->dev, rx_errors);
401                         goto drop;
402                 }
403                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
404         }
405 
406         /* Save offset of outer header relative to skb->head,
407          * because we are going to reset the network header to the inner header
408          * and might change skb->head.
409          */
410         nh = skb_network_header(skb) - skb->head;
411 
412         skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
413 
414         if (!pskb_inet_may_pull(skb)) {
415                 DEV_STATS_INC(tunnel->dev, rx_length_errors);
416                 DEV_STATS_INC(tunnel->dev, rx_errors);
417                 goto drop;
418         }
419         iph = (struct iphdr *)(skb->head + nh);
420 
421         err = IP_ECN_decapsulate(iph, skb);
422         if (unlikely(err)) {
423                 if (log_ecn_error)
424                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
425                                         &iph->saddr, iph->tos);
426                 if (err > 1) {
427                         DEV_STATS_INC(tunnel->dev, rx_frame_errors);
428                         DEV_STATS_INC(tunnel->dev, rx_errors);
429                         goto drop;
430                 }
431         }
432 
433         dev_sw_netstats_rx_add(tunnel->dev, skb->len);
434         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
435 
436         if (tunnel->dev->type == ARPHRD_ETHER) {
437                 skb->protocol = eth_type_trans(skb, tunnel->dev);
438                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
439         } else {
440                 skb->dev = tunnel->dev;
441         }
442 
443         if (tun_dst)
444                 skb_dst_set(skb, (struct dst_entry *)tun_dst);
445 
446         gro_cells_receive(&tunnel->gro_cells, skb);
447         return 0;
448 
449 drop:
450         if (tun_dst)
451                 dst_release((struct dst_entry *)tun_dst);
452         kfree_skb(skb);
453         return 0;
454 }
455 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
456 
457 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
458                             unsigned int num)
459 {
460         if (num >= MAX_IPTUN_ENCAP_OPS)
461                 return -ERANGE;
462 
463         return !cmpxchg((const struct ip_tunnel_encap_ops **)
464                         &iptun_encaps[num],
465                         NULL, ops) ? 0 : -1;
466 }
467 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
468 
469 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
470                             unsigned int num)
471 {
472         int ret;
473 
474         if (num >= MAX_IPTUN_ENCAP_OPS)
475                 return -ERANGE;
476 
477         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
478                        &iptun_encaps[num],
479                        ops, NULL) == ops) ? 0 : -1;
480 
481         synchronize_net();
482 
483         return ret;
484 }
485 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
486 
487 int ip_tunnel_encap_setup(struct ip_tunnel *t,
488                           struct ip_tunnel_encap *ipencap)
489 {
490         int hlen;
491 
492         memset(&t->encap, 0, sizeof(t->encap));
493 
494         hlen = ip_encap_hlen(ipencap);
495         if (hlen < 0)
496                 return hlen;
497 
498         t->encap.type = ipencap->type;
499         t->encap.sport = ipencap->sport;
500         t->encap.dport = ipencap->dport;
501         t->encap.flags = ipencap->flags;
502 
503         t->encap_hlen = hlen;
504         t->hlen = t->encap_hlen + t->tun_hlen;
505 
506         return 0;
507 }
508 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
509 
510 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
511                             struct rtable *rt, __be16 df,
512                             const struct iphdr *inner_iph,
513                             int tunnel_hlen, __be32 dst, bool md)
514 {
515         struct ip_tunnel *tunnel = netdev_priv(dev);
516         int pkt_size;
517         int mtu;
518 
519         tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
520         pkt_size = skb->len - tunnel_hlen;
521         pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
522 
523         if (df) {
524                 mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
525                 mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
526         } else {
527                 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
528         }
529 
530         if (skb_valid_dst(skb))
531                 skb_dst_update_pmtu_no_confirm(skb, mtu);
532 
533         if (skb->protocol == htons(ETH_P_IP)) {
534                 if (!skb_is_gso(skb) &&
535                     (inner_iph->frag_off & htons(IP_DF)) &&
536                     mtu < pkt_size) {
537                         icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
538                         return -E2BIG;
539                 }
540         }
541 #if IS_ENABLED(CONFIG_IPV6)
542         else if (skb->protocol == htons(ETH_P_IPV6)) {
543                 struct rt6_info *rt6;
544                 __be32 daddr;
545 
546                 rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) :
547                                            NULL;
548                 daddr = md ? dst : tunnel->parms.iph.daddr;
549 
550                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
551                            mtu >= IPV6_MIN_MTU) {
552                         if ((daddr && !ipv4_is_multicast(daddr)) ||
553                             rt6->rt6i_dst.plen == 128) {
554                                 rt6->rt6i_flags |= RTF_MODIFIED;
555                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
556                         }
557                 }
558 
559                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
560                                         mtu < pkt_size) {
561                         icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
562                         return -E2BIG;
563                 }
564         }
565 #endif
566         return 0;
567 }
568 
569 static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
570 {
571         /* we must cap headroom to some upperlimit, else pskb_expand_head
572          * will overflow header offsets in skb_headers_offset_update().
573          */
574         static const unsigned int max_allowed = 512;
575 
576         if (headroom > max_allowed)
577                 headroom = max_allowed;
578 
579         if (headroom > READ_ONCE(dev->needed_headroom))
580                 WRITE_ONCE(dev->needed_headroom, headroom);
581 }
582 
583 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
584                        u8 proto, int tunnel_hlen)
585 {
586         struct ip_tunnel *tunnel = netdev_priv(dev);
587         u32 headroom = sizeof(struct iphdr);
588         struct ip_tunnel_info *tun_info;
589         const struct ip_tunnel_key *key;
590         const struct iphdr *inner_iph;
591         struct rtable *rt = NULL;
592         struct flowi4 fl4;
593         __be16 df = 0;
594         u8 tos, ttl;
595         bool use_cache;
596 
597         tun_info = skb_tunnel_info(skb);
598         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
599                      ip_tunnel_info_af(tun_info) != AF_INET))
600                 goto tx_error;
601         key = &tun_info->key;
602         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
603         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
604         tos = key->tos;
605         if (tos == 1) {
606                 if (skb->protocol == htons(ETH_P_IP))
607                         tos = inner_iph->tos;
608                 else if (skb->protocol == htons(ETH_P_IPV6))
609                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
610         }
611         ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
612                             tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
613                             dev_net(dev), 0, skb->mark, skb_get_hash(skb),
614                             key->flow_flags);
615 
616         if (!tunnel_hlen)
617                 tunnel_hlen = ip_encap_hlen(&tun_info->encap);
618 
619         if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
620                 goto tx_error;
621 
622         use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
623         if (use_cache)
624                 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
625         if (!rt) {
626                 rt = ip_route_output_key(tunnel->net, &fl4);
627                 if (IS_ERR(rt)) {
628                         DEV_STATS_INC(dev, tx_carrier_errors);
629                         goto tx_error;
630                 }
631                 if (use_cache)
632                         dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
633                                           fl4.saddr);
634         }
635         if (rt->dst.dev == dev) {
636                 ip_rt_put(rt);
637                 DEV_STATS_INC(dev, collisions);
638                 goto tx_error;
639         }
640 
641         if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags))
642                 df = htons(IP_DF);
643         if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
644                             key->u.ipv4.dst, true)) {
645                 ip_rt_put(rt);
646                 goto tx_error;
647         }
648 
649         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
650         ttl = key->ttl;
651         if (ttl == 0) {
652                 if (skb->protocol == htons(ETH_P_IP))
653                         ttl = inner_iph->ttl;
654                 else if (skb->protocol == htons(ETH_P_IPV6))
655                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
656                 else
657                         ttl = ip4_dst_hoplimit(&rt->dst);
658         }
659 
660         headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
661         if (skb_cow_head(skb, headroom)) {
662                 ip_rt_put(rt);
663                 goto tx_dropped;
664         }
665 
666         ip_tunnel_adj_headroom(dev, headroom);
667 
668         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
669                       df, !net_eq(tunnel->net, dev_net(dev)));
670         return;
671 tx_error:
672         DEV_STATS_INC(dev, tx_errors);
673         goto kfree;
674 tx_dropped:
675         DEV_STATS_INC(dev, tx_dropped);
676 kfree:
677         kfree_skb(skb);
678 }
679 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
680 
681 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
682                     const struct iphdr *tnl_params, u8 protocol)
683 {
684         struct ip_tunnel *tunnel = netdev_priv(dev);
685         struct ip_tunnel_info *tun_info = NULL;
686         const struct iphdr *inner_iph;
687         unsigned int max_headroom;      /* The extra header space needed */
688         struct rtable *rt = NULL;               /* Route to the other host */
689         __be16 payload_protocol;
690         bool use_cache = false;
691         struct flowi4 fl4;
692         bool md = false;
693         bool connected;
694         u8 tos, ttl;
695         __be32 dst;
696         __be16 df;
697 
698         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
699         connected = (tunnel->parms.iph.daddr != 0);
700         payload_protocol = skb_protocol(skb, true);
701 
702         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
703 
704         dst = tnl_params->daddr;
705         if (dst == 0) {
706                 /* NBMA tunnel */
707 
708                 if (!skb_dst(skb)) {
709                         DEV_STATS_INC(dev, tx_fifo_errors);
710                         goto tx_error;
711                 }
712 
713                 tun_info = skb_tunnel_info(skb);
714                 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
715                     ip_tunnel_info_af(tun_info) == AF_INET &&
716                     tun_info->key.u.ipv4.dst) {
717                         dst = tun_info->key.u.ipv4.dst;
718                         md = true;
719                         connected = true;
720                 } else if (payload_protocol == htons(ETH_P_IP)) {
721                         rt = skb_rtable(skb);
722                         dst = rt_nexthop(rt, inner_iph->daddr);
723                 }
724 #if IS_ENABLED(CONFIG_IPV6)
725                 else if (payload_protocol == htons(ETH_P_IPV6)) {
726                         const struct in6_addr *addr6;
727                         struct neighbour *neigh;
728                         bool do_tx_error_icmp;
729                         int addr_type;
730 
731                         neigh = dst_neigh_lookup(skb_dst(skb),
732                                                  &ipv6_hdr(skb)->daddr);
733                         if (!neigh)
734                                 goto tx_error;
735 
736                         addr6 = (const struct in6_addr *)&neigh->primary_key;
737                         addr_type = ipv6_addr_type(addr6);
738 
739                         if (addr_type == IPV6_ADDR_ANY) {
740                                 addr6 = &ipv6_hdr(skb)->daddr;
741                                 addr_type = ipv6_addr_type(addr6);
742                         }
743 
744                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
745                                 do_tx_error_icmp = true;
746                         else {
747                                 do_tx_error_icmp = false;
748                                 dst = addr6->s6_addr32[3];
749                         }
750                         neigh_release(neigh);
751                         if (do_tx_error_icmp)
752                                 goto tx_error_icmp;
753                 }
754 #endif
755                 else
756                         goto tx_error;
757 
758                 if (!md)
759                         connected = false;
760         }
761 
762         tos = tnl_params->tos;
763         if (tos & 0x1) {
764                 tos &= ~0x1;
765                 if (payload_protocol == htons(ETH_P_IP)) {
766                         tos = inner_iph->tos;
767                         connected = false;
768                 } else if (payload_protocol == htons(ETH_P_IPV6)) {
769                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
770                         connected = false;
771                 }
772         }
773 
774         ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
775                             tunnel->parms.o_key, RT_TOS(tos),
776                             dev_net(dev), READ_ONCE(tunnel->parms.link),
777                             tunnel->fwmark, skb_get_hash(skb), 0);
778 
779         if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
780                 goto tx_error;
781 
782         if (connected && md) {
783                 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
784                 if (use_cache)
785                         rt = dst_cache_get_ip4(&tun_info->dst_cache,
786                                                &fl4.saddr);
787         } else {
788                 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
789                                                 &fl4.saddr) : NULL;
790         }
791 
792         if (!rt) {
793                 rt = ip_route_output_key(tunnel->net, &fl4);
794 
795                 if (IS_ERR(rt)) {
796                         DEV_STATS_INC(dev, tx_carrier_errors);
797                         goto tx_error;
798                 }
799                 if (use_cache)
800                         dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
801                                           fl4.saddr);
802                 else if (!md && connected)
803                         dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
804                                           fl4.saddr);
805         }
806 
807         if (rt->dst.dev == dev) {
808                 ip_rt_put(rt);
809                 DEV_STATS_INC(dev, collisions);
810                 goto tx_error;
811         }
812 
813         df = tnl_params->frag_off;
814         if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
815                 df |= (inner_iph->frag_off & htons(IP_DF));
816 
817         if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
818                 ip_rt_put(rt);
819                 goto tx_error;
820         }
821 
822         if (tunnel->err_count > 0) {
823                 if (time_before(jiffies,
824                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
825                         tunnel->err_count--;
826 
827                         dst_link_failure(skb);
828                 } else
829                         tunnel->err_count = 0;
830         }
831 
832         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
833         ttl = tnl_params->ttl;
834         if (ttl == 0) {
835                 if (payload_protocol == htons(ETH_P_IP))
836                         ttl = inner_iph->ttl;
837 #if IS_ENABLED(CONFIG_IPV6)
838                 else if (payload_protocol == htons(ETH_P_IPV6))
839                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
840 #endif
841                 else
842                         ttl = ip4_dst_hoplimit(&rt->dst);
843         }
844 
845         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
846                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
847 
848         if (skb_cow_head(skb, max_headroom)) {
849                 ip_rt_put(rt);
850                 DEV_STATS_INC(dev, tx_dropped);
851                 kfree_skb(skb);
852                 return;
853         }
854 
855         ip_tunnel_adj_headroom(dev, max_headroom);
856 
857         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
858                       df, !net_eq(tunnel->net, dev_net(dev)));
859         return;
860 
861 #if IS_ENABLED(CONFIG_IPV6)
862 tx_error_icmp:
863         dst_link_failure(skb);
864 #endif
865 tx_error:
866         DEV_STATS_INC(dev, tx_errors);
867         kfree_skb(skb);
868 }
869 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
870 
871 static void ip_tunnel_update(struct ip_tunnel_net *itn,
872                              struct ip_tunnel *t,
873                              struct net_device *dev,
874                              struct ip_tunnel_parm_kern *p,
875                              bool set_mtu,
876                              __u32 fwmark)
877 {
878         ip_tunnel_del(itn, t);
879         t->parms.iph.saddr = p->iph.saddr;
880         t->parms.iph.daddr = p->iph.daddr;
881         t->parms.i_key = p->i_key;
882         t->parms.o_key = p->o_key;
883         if (dev->type != ARPHRD_ETHER) {
884                 __dev_addr_set(dev, &p->iph.saddr, 4);
885                 memcpy(dev->broadcast, &p->iph.daddr, 4);
886         }
887         ip_tunnel_add(itn, t);
888 
889         t->parms.iph.ttl = p->iph.ttl;
890         t->parms.iph.tos = p->iph.tos;
891         t->parms.iph.frag_off = p->iph.frag_off;
892 
893         if (t->parms.link != p->link || t->fwmark != fwmark) {
894                 int mtu;
895 
896                 WRITE_ONCE(t->parms.link, p->link);
897                 t->fwmark = fwmark;
898                 mtu = ip_tunnel_bind_dev(dev);
899                 if (set_mtu)
900                         WRITE_ONCE(dev->mtu, mtu);
901         }
902         dst_cache_reset(&t->dst_cache);
903         netdev_state_change(dev);
904 }
905 
906 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
907                   int cmd)
908 {
909         int err = 0;
910         struct ip_tunnel *t = netdev_priv(dev);
911         struct net *net = t->net;
912         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
913 
914         switch (cmd) {
915         case SIOCGETTUNNEL:
916                 if (dev == itn->fb_tunnel_dev) {
917                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
918                         if (!t)
919                                 t = netdev_priv(dev);
920                 }
921                 memcpy(p, &t->parms, sizeof(*p));
922                 break;
923 
924         case SIOCADDTUNNEL:
925         case SIOCCHGTUNNEL:
926                 err = -EPERM;
927                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
928                         goto done;
929                 if (p->iph.ttl)
930                         p->iph.frag_off |= htons(IP_DF);
931                 if (!test_bit(IP_TUNNEL_VTI_BIT, p->i_flags)) {
932                         if (!test_bit(IP_TUNNEL_KEY_BIT, p->i_flags))
933                                 p->i_key = 0;
934                         if (!test_bit(IP_TUNNEL_KEY_BIT, p->o_flags))
935                                 p->o_key = 0;
936                 }
937 
938                 t = ip_tunnel_find(itn, p, itn->type);
939 
940                 if (cmd == SIOCADDTUNNEL) {
941                         if (!t) {
942                                 t = ip_tunnel_create(net, itn, p);
943                                 err = PTR_ERR_OR_ZERO(t);
944                                 break;
945                         }
946 
947                         err = -EEXIST;
948                         break;
949                 }
950                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
951                         if (t) {
952                                 if (t->dev != dev) {
953                                         err = -EEXIST;
954                                         break;
955                                 }
956                         } else {
957                                 unsigned int nflags = 0;
958 
959                                 if (ipv4_is_multicast(p->iph.daddr))
960                                         nflags = IFF_BROADCAST;
961                                 else if (p->iph.daddr)
962                                         nflags = IFF_POINTOPOINT;
963 
964                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
965                                         err = -EINVAL;
966                                         break;
967                                 }
968 
969                                 t = netdev_priv(dev);
970                         }
971                 }
972 
973                 if (t) {
974                         err = 0;
975                         ip_tunnel_update(itn, t, dev, p, true, 0);
976                 } else {
977                         err = -ENOENT;
978                 }
979                 break;
980 
981         case SIOCDELTUNNEL:
982                 err = -EPERM;
983                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
984                         goto done;
985 
986                 if (dev == itn->fb_tunnel_dev) {
987                         err = -ENOENT;
988                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
989                         if (!t)
990                                 goto done;
991                         err = -EPERM;
992                         if (t == netdev_priv(itn->fb_tunnel_dev))
993                                 goto done;
994                         dev = t->dev;
995                 }
996                 unregister_netdevice(dev);
997                 err = 0;
998                 break;
999 
1000         default:
1001                 err = -EINVAL;
1002         }
1003 
1004 done:
1005         return err;
1006 }
1007 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
1008 
1009 bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp,
1010                               const void __user *data)
1011 {
1012         struct ip_tunnel_parm p;
1013 
1014         if (copy_from_user(&p, data, sizeof(p)))
1015                 return false;
1016 
1017         strscpy(kp->name, p.name);
1018         kp->link = p.link;
1019         ip_tunnel_flags_from_be16(kp->i_flags, p.i_flags);
1020         ip_tunnel_flags_from_be16(kp->o_flags, p.o_flags);
1021         kp->i_key = p.i_key;
1022         kp->o_key = p.o_key;
1023         memcpy(&kp->iph, &p.iph, min(sizeof(kp->iph), sizeof(p.iph)));
1024 
1025         return true;
1026 }
1027 EXPORT_SYMBOL_GPL(ip_tunnel_parm_from_user);
1028 
1029 bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp)
1030 {
1031         struct ip_tunnel_parm p;
1032 
1033         if (!ip_tunnel_flags_is_be16_compat(kp->i_flags) ||
1034             !ip_tunnel_flags_is_be16_compat(kp->o_flags))
1035                 return false;
1036 
1037         memset(&p, 0, sizeof(p));
1038 
1039         strscpy(p.name, kp->name);
1040         p.link = kp->link;
1041         p.i_flags = ip_tunnel_flags_to_be16(kp->i_flags);
1042         p.o_flags = ip_tunnel_flags_to_be16(kp->o_flags);
1043         p.i_key = kp->i_key;
1044         p.o_key = kp->o_key;
1045         memcpy(&p.iph, &kp->iph, min(sizeof(p.iph), sizeof(kp->iph)));
1046 
1047         return !copy_to_user(data, &p, sizeof(p));
1048 }
1049 EXPORT_SYMBOL_GPL(ip_tunnel_parm_to_user);
1050 
1051 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
1052                              void __user *data, int cmd)
1053 {
1054         struct ip_tunnel_parm_kern p;
1055         int err;
1056 
1057         if (!ip_tunnel_parm_from_user(&p, data))
1058                 return -EFAULT;
1059         err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
1060         if (!err && !ip_tunnel_parm_to_user(data, &p))
1061                 return -EFAULT;
1062         return err;
1063 }
1064 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
1065 
1066 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
1067 {
1068         struct ip_tunnel *tunnel = netdev_priv(dev);
1069         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1070         int max_mtu = IP_MAX_MTU - t_hlen;
1071 
1072         if (dev->type == ARPHRD_ETHER)
1073                 max_mtu -= dev->hard_header_len;
1074 
1075         if (new_mtu < ETH_MIN_MTU)
1076                 return -EINVAL;
1077 
1078         if (new_mtu > max_mtu) {
1079                 if (strict)
1080                         return -EINVAL;
1081 
1082                 new_mtu = max_mtu;
1083         }
1084 
1085         WRITE_ONCE(dev->mtu, new_mtu);
1086         return 0;
1087 }
1088 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1089 
1090 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1091 {
1092         return __ip_tunnel_change_mtu(dev, new_mtu, true);
1093 }
1094 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1095 
1096 static void ip_tunnel_dev_free(struct net_device *dev)
1097 {
1098         struct ip_tunnel *tunnel = netdev_priv(dev);
1099 
1100         gro_cells_destroy(&tunnel->gro_cells);
1101         dst_cache_destroy(&tunnel->dst_cache);
1102 }
1103 
1104 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1105 {
1106         struct ip_tunnel *tunnel = netdev_priv(dev);
1107         struct ip_tunnel_net *itn;
1108 
1109         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1110 
1111         if (itn->fb_tunnel_dev != dev) {
1112                 ip_tunnel_del(itn, netdev_priv(dev));
1113                 unregister_netdevice_queue(dev, head);
1114         }
1115 }
1116 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1117 
1118 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1119 {
1120         struct ip_tunnel *tunnel = netdev_priv(dev);
1121 
1122         return READ_ONCE(tunnel->net);
1123 }
1124 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1125 
1126 int ip_tunnel_get_iflink(const struct net_device *dev)
1127 {
1128         const struct ip_tunnel *tunnel = netdev_priv(dev);
1129 
1130         return READ_ONCE(tunnel->parms.link);
1131 }
1132 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1133 
1134 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1135                                   struct rtnl_link_ops *ops, char *devname)
1136 {
1137         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1138         struct ip_tunnel_parm_kern parms;
1139         unsigned int i;
1140 
1141         itn->rtnl_link_ops = ops;
1142         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1143                 INIT_HLIST_HEAD(&itn->tunnels[i]);
1144 
1145         if (!ops || !net_has_fallback_tunnels(net)) {
1146                 struct ip_tunnel_net *it_init_net;
1147 
1148                 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1149                 itn->type = it_init_net->type;
1150                 itn->fb_tunnel_dev = NULL;
1151                 return 0;
1152         }
1153 
1154         memset(&parms, 0, sizeof(parms));
1155         if (devname)
1156                 strscpy(parms.name, devname, IFNAMSIZ);
1157 
1158         rtnl_lock();
1159         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1160         /* FB netdevice is special: we have one, and only one per netns.
1161          * Allowing to move it to another netns is clearly unsafe.
1162          */
1163         if (!IS_ERR(itn->fb_tunnel_dev)) {
1164                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1165                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1166                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1167                 itn->type = itn->fb_tunnel_dev->type;
1168         }
1169         rtnl_unlock();
1170 
1171         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1172 }
1173 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1174 
1175 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1176                               struct list_head *head,
1177                               struct rtnl_link_ops *ops)
1178 {
1179         struct net_device *dev, *aux;
1180         int h;
1181 
1182         for_each_netdev_safe(net, dev, aux)
1183                 if (dev->rtnl_link_ops == ops)
1184                         unregister_netdevice_queue(dev, head);
1185 
1186         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1187                 struct ip_tunnel *t;
1188                 struct hlist_node *n;
1189                 struct hlist_head *thead = &itn->tunnels[h];
1190 
1191                 hlist_for_each_entry_safe(t, n, thead, hash_node)
1192                         /* If dev is in the same netns, it has already
1193                          * been added to the list by the previous loop.
1194                          */
1195                         if (!net_eq(dev_net(t->dev), net))
1196                                 unregister_netdevice_queue(t->dev, head);
1197         }
1198 }
1199 
1200 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1201                            struct rtnl_link_ops *ops,
1202                            struct list_head *dev_to_kill)
1203 {
1204         struct ip_tunnel_net *itn;
1205         struct net *net;
1206 
1207         ASSERT_RTNL();
1208         list_for_each_entry(net, net_list, exit_list) {
1209                 itn = net_generic(net, id);
1210                 ip_tunnel_destroy(net, itn, dev_to_kill, ops);
1211         }
1212 }
1213 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1214 
1215 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1216                       struct ip_tunnel_parm_kern *p, __u32 fwmark)
1217 {
1218         struct ip_tunnel *nt;
1219         struct net *net = dev_net(dev);
1220         struct ip_tunnel_net *itn;
1221         int mtu;
1222         int err;
1223 
1224         nt = netdev_priv(dev);
1225         itn = net_generic(net, nt->ip_tnl_net_id);
1226 
1227         if (nt->collect_md) {
1228                 if (rtnl_dereference(itn->collect_md_tun))
1229                         return -EEXIST;
1230         } else {
1231                 if (ip_tunnel_find(itn, p, dev->type))
1232                         return -EEXIST;
1233         }
1234 
1235         nt->net = net;
1236         nt->parms = *p;
1237         nt->fwmark = fwmark;
1238         err = register_netdevice(dev);
1239         if (err)
1240                 goto err_register_netdevice;
1241 
1242         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1243                 eth_hw_addr_random(dev);
1244 
1245         mtu = ip_tunnel_bind_dev(dev);
1246         if (tb[IFLA_MTU]) {
1247                 unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1248 
1249                 if (dev->type == ARPHRD_ETHER)
1250                         max -= dev->hard_header_len;
1251 
1252                 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1253         }
1254 
1255         err = dev_set_mtu(dev, mtu);
1256         if (err)
1257                 goto err_dev_set_mtu;
1258 
1259         ip_tunnel_add(itn, nt);
1260         return 0;
1261 
1262 err_dev_set_mtu:
1263         unregister_netdevice(dev);
1264 err_register_netdevice:
1265         return err;
1266 }
1267 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1268 
1269 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1270                          struct ip_tunnel_parm_kern *p, __u32 fwmark)
1271 {
1272         struct ip_tunnel *t;
1273         struct ip_tunnel *tunnel = netdev_priv(dev);
1274         struct net *net = tunnel->net;
1275         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1276 
1277         if (dev == itn->fb_tunnel_dev)
1278                 return -EINVAL;
1279 
1280         t = ip_tunnel_find(itn, p, dev->type);
1281 
1282         if (t) {
1283                 if (t->dev != dev)
1284                         return -EEXIST;
1285         } else {
1286                 t = tunnel;
1287 
1288                 if (dev->type != ARPHRD_ETHER) {
1289                         unsigned int nflags = 0;
1290 
1291                         if (ipv4_is_multicast(p->iph.daddr))
1292                                 nflags = IFF_BROADCAST;
1293                         else if (p->iph.daddr)
1294                                 nflags = IFF_POINTOPOINT;
1295 
1296                         if ((dev->flags ^ nflags) &
1297                             (IFF_POINTOPOINT | IFF_BROADCAST))
1298                                 return -EINVAL;
1299                 }
1300         }
1301 
1302         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1303         return 0;
1304 }
1305 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1306 
1307 int ip_tunnel_init(struct net_device *dev)
1308 {
1309         struct ip_tunnel *tunnel = netdev_priv(dev);
1310         struct iphdr *iph = &tunnel->parms.iph;
1311         int err;
1312 
1313         dev->needs_free_netdev = true;
1314         dev->priv_destructor = ip_tunnel_dev_free;
1315         dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
1316 
1317         err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1318         if (err)
1319                 return err;
1320 
1321         err = gro_cells_init(&tunnel->gro_cells, dev);
1322         if (err) {
1323                 dst_cache_destroy(&tunnel->dst_cache);
1324                 return err;
1325         }
1326 
1327         tunnel->dev = dev;
1328         tunnel->net = dev_net(dev);
1329         strcpy(tunnel->parms.name, dev->name);
1330         iph->version            = 4;
1331         iph->ihl                = 5;
1332 
1333         if (tunnel->collect_md)
1334                 netif_keep_dst(dev);
1335         netdev_lockdep_set_classes(dev);
1336         return 0;
1337 }
1338 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1339 
1340 void ip_tunnel_uninit(struct net_device *dev)
1341 {
1342         struct ip_tunnel *tunnel = netdev_priv(dev);
1343         struct net *net = tunnel->net;
1344         struct ip_tunnel_net *itn;
1345 
1346         itn = net_generic(net, tunnel->ip_tnl_net_id);
1347         ip_tunnel_del(itn, netdev_priv(dev));
1348         if (itn->fb_tunnel_dev == dev)
1349                 WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1350 
1351         dst_cache_reset(&tunnel->dst_cache);
1352 }
1353 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1354 
1355 /* Do least required initialization, rest of init is done in tunnel_init call */
1356 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1357 {
1358         struct ip_tunnel *tunnel = netdev_priv(dev);
1359         tunnel->ip_tnl_net_id = net_id;
1360 }
1361 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1362 
1363 MODULE_DESCRIPTION("IPv4 tunnel implementation library");
1364 MODULE_LICENSE("GPL");
1365 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php