1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Internet Control Message Protocol (ICMPv6) 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on net/ipv4/icmp.c 10 * 11 * RFC 1885 12 */ 13 14 /* 15 * Changes: 16 * 17 * Andi Kleen : exception handling 18 * Andi Kleen add rate limits. never reply to a icmp. 19 * add more length checks and other fixes. 20 * yoshfuji : ensure to sent parameter problem for 21 * fragments. 22 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit. 23 * Randy Dunlap and 24 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support 25 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data 26 */ 27 28 #define pr_fmt(fmt) "IPv6: " fmt 29 30 #include <linux/module.h> 31 #include <linux/errno.h> 32 #include <linux/types.h> 33 #include <linux/socket.h> 34 #include <linux/in.h> 35 #include <linux/kernel.h> 36 #include <linux/sockios.h> 37 #include <linux/net.h> 38 #include <linux/skbuff.h> 39 #include <linux/init.h> 40 #include <linux/netfilter.h> 41 #include <linux/slab.h> 42 43 #ifdef CONFIG_SYSCTL 44 #include <linux/sysctl.h> 45 #endif 46 47 #include <linux/inet.h> 48 #include <linux/netdevice.h> 49 #include <linux/icmpv6.h> 50 51 #include <net/ip.h> 52 #include <net/sock.h> 53 54 #include <net/ipv6.h> 55 #include <net/ip6_checksum.h> 56 #include <net/ping.h> 57 #include <net/protocol.h> 58 #include <net/raw.h> 59 #include <net/rawv6.h> 60 #include <net/seg6.h> 61 #include <net/transp_v6.h> 62 #include <net/ip6_route.h> 63 #include <net/addrconf.h> 64 #include <net/icmp.h> 65 #include <net/xfrm.h> 66 #include <net/inet_common.h> 67 #include <net/dsfield.h> 68 #include <net/l3mdev.h> 69 70 #include <linux/uaccess.h> 71 72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk); 73 74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 75 u8 type, u8 code, int offset, __be32 info) 76 { 77 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ 78 struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); 79 struct net *net = dev_net(skb->dev); 80 81 if (type == ICMPV6_PKT_TOOBIG) 82 ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); 83 else if (type == NDISC_REDIRECT) 84 ip6_redirect(skb, net, skb->dev->ifindex, 0, 85 sock_net_uid(net, NULL)); 86 87 if (!(type & ICMPV6_INFOMSG_MASK)) 88 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) 89 ping_err(skb, offset, ntohl(info)); 90 91 return 0; 92 } 93 94 static int icmpv6_rcv(struct sk_buff *skb); 95 96 static const struct inet6_protocol icmpv6_protocol = { 97 .handler = icmpv6_rcv, 98 .err_handler = icmpv6_err, 99 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 100 }; 101 102 /* Called with BH disabled */ 103 static struct sock *icmpv6_xmit_lock(struct net *net) 104 { 105 struct sock *sk; 106 107 sk = this_cpu_read(ipv6_icmp_sk); 108 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { 109 /* This can happen if the output path (f.e. SIT or 110 * ip6ip6 tunnel) signals dst_link_failure() for an 111 * outgoing ICMP6 packet. 112 */ 113 return NULL; 114 } 115 sock_net_set(sk, net); 116 return sk; 117 } 118 119 static void icmpv6_xmit_unlock(struct sock *sk) 120 { 121 sock_net_set(sk, &init_net); 122 spin_unlock(&sk->sk_lock.slock); 123 } 124 125 /* 126 * Figure out, may we reply to this packet with icmp error. 127 * 128 * We do not reply, if: 129 * - it was icmp error message. 130 * - it is truncated, so that it is known, that protocol is ICMPV6 131 * (i.e. in the middle of some exthdr) 132 * 133 * --ANK (980726) 134 */ 135 136 static bool is_ineligible(const struct sk_buff *skb) 137 { 138 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; 139 int len = skb->len - ptr; 140 __u8 nexthdr = ipv6_hdr(skb)->nexthdr; 141 __be16 frag_off; 142 143 if (len < 0) 144 return true; 145 146 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off); 147 if (ptr < 0) 148 return false; 149 if (nexthdr == IPPROTO_ICMPV6) { 150 u8 _type, *tp; 151 tp = skb_header_pointer(skb, 152 ptr+offsetof(struct icmp6hdr, icmp6_type), 153 sizeof(_type), &_type); 154 155 /* Based on RFC 8200, Section 4.5 Fragment Header, return 156 * false if this is a fragment packet with no icmp header info. 157 */ 158 if (!tp && frag_off != 0) 159 return false; 160 else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) 161 return true; 162 } 163 return false; 164 } 165 166 static bool icmpv6_mask_allow(struct net *net, int type) 167 { 168 if (type > ICMPV6_MSG_MAX) 169 return true; 170 171 /* Limit if icmp type is set in ratemask. */ 172 if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask)) 173 return true; 174 175 return false; 176 } 177 178 static bool icmpv6_global_allow(struct net *net, int type, 179 bool *apply_ratelimit) 180 { 181 if (icmpv6_mask_allow(net, type)) 182 return true; 183 184 if (icmp_global_allow()) { 185 *apply_ratelimit = true; 186 return true; 187 } 188 __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); 189 return false; 190 } 191 192 /* 193 * Check the ICMP output rate limit 194 */ 195 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, 196 struct flowi6 *fl6, bool apply_ratelimit) 197 { 198 struct net *net = sock_net(sk); 199 struct dst_entry *dst; 200 bool res = false; 201 202 if (!apply_ratelimit) 203 return true; 204 205 /* 206 * Look up the output route. 207 * XXX: perhaps the expire for routing entries cloned by 208 * this lookup should be more aggressive (not longer than timeout). 209 */ 210 dst = ip6_route_output(net, sk, fl6); 211 if (dst->error) { 212 IP6_INC_STATS(net, ip6_dst_idev(dst), 213 IPSTATS_MIB_OUTNOROUTES); 214 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { 215 res = true; 216 } else { 217 struct rt6_info *rt = dst_rt6_info(dst); 218 int tmo = net->ipv6.sysctl.icmpv6_time; 219 struct inet_peer *peer; 220 221 /* Give more bandwidth to wider prefixes. */ 222 if (rt->rt6i_dst.plen < 128) 223 tmo >>= ((128 - rt->rt6i_dst.plen)>>5); 224 225 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1); 226 res = inet_peer_xrlim_allow(peer, tmo); 227 if (peer) 228 inet_putpeer(peer); 229 } 230 if (!res) 231 __ICMP6_INC_STATS(net, ip6_dst_idev(dst), 232 ICMP6_MIB_RATELIMITHOST); 233 else 234 icmp_global_consume(); 235 dst_release(dst); 236 return res; 237 } 238 239 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type, 240 struct flowi6 *fl6) 241 { 242 struct net *net = sock_net(sk); 243 struct dst_entry *dst; 244 bool res = false; 245 246 dst = ip6_route_output(net, sk, fl6); 247 if (!dst->error) { 248 struct rt6_info *rt = dst_rt6_info(dst); 249 struct in6_addr prefsrc; 250 251 rt6_get_prefsrc(rt, &prefsrc); 252 res = !ipv6_addr_any(&prefsrc); 253 } 254 dst_release(dst); 255 return res; 256 } 257 258 /* 259 * an inline helper for the "simple" if statement below 260 * checks if parameter problem report is caused by an 261 * unrecognized IPv6 option that has the Option Type 262 * highest-order two bits set to 10 263 */ 264 265 static bool opt_unrec(struct sk_buff *skb, __u32 offset) 266 { 267 u8 _optval, *op; 268 269 offset += skb_network_offset(skb); 270 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); 271 if (!op) 272 return true; 273 return (*op & 0xC0) == 0x80; 274 } 275 276 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, 277 struct icmp6hdr *thdr, int len) 278 { 279 struct sk_buff *skb; 280 struct icmp6hdr *icmp6h; 281 282 skb = skb_peek(&sk->sk_write_queue); 283 if (!skb) 284 return; 285 286 icmp6h = icmp6_hdr(skb); 287 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr)); 288 icmp6h->icmp6_cksum = 0; 289 290 if (skb_queue_len(&sk->sk_write_queue) == 1) { 291 skb->csum = csum_partial(icmp6h, 292 sizeof(struct icmp6hdr), skb->csum); 293 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, 294 &fl6->daddr, 295 len, fl6->flowi6_proto, 296 skb->csum); 297 } else { 298 __wsum tmp_csum = 0; 299 300 skb_queue_walk(&sk->sk_write_queue, skb) { 301 tmp_csum = csum_add(tmp_csum, skb->csum); 302 } 303 304 tmp_csum = csum_partial(icmp6h, 305 sizeof(struct icmp6hdr), tmp_csum); 306 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, 307 &fl6->daddr, 308 len, fl6->flowi6_proto, 309 tmp_csum); 310 } 311 ip6_push_pending_frames(sk); 312 } 313 314 struct icmpv6_msg { 315 struct sk_buff *skb; 316 int offset; 317 uint8_t type; 318 }; 319 320 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) 321 { 322 struct icmpv6_msg *msg = (struct icmpv6_msg *) from; 323 struct sk_buff *org_skb = msg->skb; 324 __wsum csum; 325 326 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset, 327 to, len); 328 skb->csum = csum_block_add(skb->csum, csum, odd); 329 if (!(msg->type & ICMPV6_INFOMSG_MASK)) 330 nf_ct_attach(skb, org_skb); 331 return 0; 332 } 333 334 #if IS_ENABLED(CONFIG_IPV6_MIP6) 335 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) 336 { 337 struct ipv6hdr *iph = ipv6_hdr(skb); 338 struct ipv6_destopt_hao *hao; 339 int off; 340 341 if (opt->dsthao) { 342 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); 343 if (likely(off >= 0)) { 344 hao = (struct ipv6_destopt_hao *) 345 (skb_network_header(skb) + off); 346 swap(iph->saddr, hao->addr); 347 } 348 } 349 } 350 #else 351 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {} 352 #endif 353 354 static struct dst_entry *icmpv6_route_lookup(struct net *net, 355 struct sk_buff *skb, 356 struct sock *sk, 357 struct flowi6 *fl6) 358 { 359 struct dst_entry *dst, *dst2; 360 struct flowi6 fl2; 361 int err; 362 363 err = ip6_dst_lookup(net, sk, &dst, fl6); 364 if (err) 365 return ERR_PTR(err); 366 367 /* 368 * We won't send icmp if the destination is known 369 * anycast unless we need to treat anycast as unicast. 370 */ 371 if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) && 372 ipv6_anycast_destination(dst, &fl6->daddr)) { 373 net_dbg_ratelimited("icmp6_send: acast source\n"); 374 dst_release(dst); 375 return ERR_PTR(-EINVAL); 376 } 377 378 /* No need to clone since we're just using its address. */ 379 dst2 = dst; 380 381 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0); 382 if (!IS_ERR(dst)) { 383 if (dst != dst2) 384 return dst; 385 } else { 386 if (PTR_ERR(dst) == -EPERM) 387 dst = NULL; 388 else 389 return dst; 390 } 391 392 err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6); 393 if (err) 394 goto relookup_failed; 395 396 err = ip6_dst_lookup(net, sk, &dst2, &fl2); 397 if (err) 398 goto relookup_failed; 399 400 dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP); 401 if (!IS_ERR(dst2)) { 402 dst_release(dst); 403 dst = dst2; 404 } else { 405 err = PTR_ERR(dst2); 406 if (err == -EPERM) { 407 dst_release(dst); 408 return dst2; 409 } else 410 goto relookup_failed; 411 } 412 413 relookup_failed: 414 if (dst) 415 return dst; 416 return ERR_PTR(err); 417 } 418 419 static struct net_device *icmp6_dev(const struct sk_buff *skb) 420 { 421 struct net_device *dev = skb->dev; 422 423 /* for local traffic to local address, skb dev is the loopback 424 * device. Check if there is a dst attached to the skb and if so 425 * get the real device index. Same is needed for replies to a link 426 * local address on a device enslaved to an L3 master device 427 */ 428 if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { 429 const struct rt6_info *rt6 = skb_rt6_info(skb); 430 431 /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.), 432 * and ip6_null_entry could be set to skb if no route is found. 433 */ 434 if (rt6 && rt6->rt6i_idev) 435 dev = rt6->rt6i_idev->dev; 436 } 437 438 return dev; 439 } 440 441 static int icmp6_iif(const struct sk_buff *skb) 442 { 443 return icmp6_dev(skb)->ifindex; 444 } 445 446 /* 447 * Send an ICMP message in response to a packet in error 448 */ 449 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, 450 const struct in6_addr *force_saddr, 451 const struct inet6_skb_parm *parm) 452 { 453 struct inet6_dev *idev = NULL; 454 struct ipv6hdr *hdr = ipv6_hdr(skb); 455 struct sock *sk; 456 struct net *net; 457 struct ipv6_pinfo *np; 458 const struct in6_addr *saddr = NULL; 459 bool apply_ratelimit = false; 460 struct dst_entry *dst; 461 struct icmp6hdr tmp_hdr; 462 struct flowi6 fl6; 463 struct icmpv6_msg msg; 464 struct ipcm6_cookie ipc6; 465 int iif = 0; 466 int addr_type = 0; 467 int len; 468 u32 mark; 469 470 if ((u8 *)hdr < skb->head || 471 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) 472 return; 473 474 if (!skb->dev) 475 return; 476 net = dev_net(skb->dev); 477 mark = IP6_REPLY_MARK(net, skb->mark); 478 /* 479 * Make sure we respect the rules 480 * i.e. RFC 1885 2.4(e) 481 * Rule (e.1) is enforced by not using icmp6_send 482 * in any code that processes icmp errors. 483 */ 484 addr_type = ipv6_addr_type(&hdr->daddr); 485 486 if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) || 487 ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr)) 488 saddr = &hdr->daddr; 489 490 /* 491 * Dest addr check 492 */ 493 494 if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) { 495 if (type != ICMPV6_PKT_TOOBIG && 496 !(type == ICMPV6_PARAMPROB && 497 code == ICMPV6_UNK_OPTION && 498 (opt_unrec(skb, info)))) 499 return; 500 501 saddr = NULL; 502 } 503 504 addr_type = ipv6_addr_type(&hdr->saddr); 505 506 /* 507 * Source addr check 508 */ 509 510 if (__ipv6_addr_needs_scope_id(addr_type)) { 511 iif = icmp6_iif(skb); 512 } else { 513 /* 514 * The source device is used for looking up which routing table 515 * to use for sending an ICMP error. 516 */ 517 iif = l3mdev_master_ifindex(skb->dev); 518 } 519 520 /* 521 * Must not send error if the source does not uniquely 522 * identify a single node (RFC2463 Section 2.4). 523 * We check unspecified / multicast addresses here, 524 * and anycast addresses will be checked later. 525 */ 526 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { 527 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n", 528 &hdr->saddr, &hdr->daddr); 529 return; 530 } 531 532 /* 533 * Never answer to a ICMP packet. 534 */ 535 if (is_ineligible(skb)) { 536 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n", 537 &hdr->saddr, &hdr->daddr); 538 return; 539 } 540 541 /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */ 542 local_bh_disable(); 543 544 /* Check global sysctl_icmp_msgs_per_sec ratelimit */ 545 if (!(skb->dev->flags & IFF_LOOPBACK) && 546 !icmpv6_global_allow(net, type, &apply_ratelimit)) 547 goto out_bh_enable; 548 549 mip6_addr_swap(skb, parm); 550 551 sk = icmpv6_xmit_lock(net); 552 if (!sk) 553 goto out_bh_enable; 554 555 memset(&fl6, 0, sizeof(fl6)); 556 fl6.flowi6_proto = IPPROTO_ICMPV6; 557 fl6.daddr = hdr->saddr; 558 if (force_saddr) 559 saddr = force_saddr; 560 if (saddr) { 561 fl6.saddr = *saddr; 562 } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) { 563 /* select a more meaningful saddr from input if */ 564 struct net_device *in_netdev; 565 566 in_netdev = dev_get_by_index(net, parm->iif); 567 if (in_netdev) { 568 ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr, 569 inet6_sk(sk)->srcprefs, 570 &fl6.saddr); 571 dev_put(in_netdev); 572 } 573 } 574 fl6.flowi6_mark = mark; 575 fl6.flowi6_oif = iif; 576 fl6.fl6_icmp_type = type; 577 fl6.fl6_icmp_code = code; 578 fl6.flowi6_uid = sock_net_uid(net, NULL); 579 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); 580 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 581 582 np = inet6_sk(sk); 583 584 if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit)) 585 goto out; 586 587 tmp_hdr.icmp6_type = type; 588 tmp_hdr.icmp6_code = code; 589 tmp_hdr.icmp6_cksum = 0; 590 tmp_hdr.icmp6_pointer = htonl(info); 591 592 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 593 fl6.flowi6_oif = READ_ONCE(np->mcast_oif); 594 else if (!fl6.flowi6_oif) 595 fl6.flowi6_oif = READ_ONCE(np->ucast_oif); 596 597 ipcm6_init_sk(&ipc6, sk); 598 ipc6.sockc.mark = mark; 599 fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); 600 601 dst = icmpv6_route_lookup(net, skb, sk, &fl6); 602 if (IS_ERR(dst)) 603 goto out; 604 605 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 606 607 msg.skb = skb; 608 msg.offset = skb_network_offset(skb); 609 msg.type = type; 610 611 len = skb->len - msg.offset; 612 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); 613 if (len < 0) { 614 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n", 615 &hdr->saddr, &hdr->daddr); 616 goto out_dst_release; 617 } 618 619 rcu_read_lock(); 620 idev = __in6_dev_get(skb->dev); 621 622 if (ip6_append_data(sk, icmpv6_getfrag, &msg, 623 len + sizeof(struct icmp6hdr), 624 sizeof(struct icmp6hdr), 625 &ipc6, &fl6, dst_rt6_info(dst), 626 MSG_DONTWAIT)) { 627 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); 628 ip6_flush_pending_frames(sk); 629 } else { 630 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, 631 len + sizeof(struct icmp6hdr)); 632 } 633 rcu_read_unlock(); 634 out_dst_release: 635 dst_release(dst); 636 out: 637 icmpv6_xmit_unlock(sk); 638 out_bh_enable: 639 local_bh_enable(); 640 } 641 EXPORT_SYMBOL(icmp6_send); 642 643 /* Slightly more convenient version of icmp6_send with drop reasons. 644 */ 645 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos, 646 enum skb_drop_reason reason) 647 { 648 icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb)); 649 kfree_skb_reason(skb, reason); 650 } 651 652 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH 653 * if sufficient data bytes are available 654 * @nhs is the size of the tunnel header(s) : 655 * Either an IPv4 header for SIT encap 656 * an IPv4 header + GRE header for GRE encap 657 */ 658 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, 659 unsigned int data_len) 660 { 661 struct in6_addr temp_saddr; 662 struct rt6_info *rt; 663 struct sk_buff *skb2; 664 u32 info = 0; 665 666 if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8)) 667 return 1; 668 669 /* RFC 4884 (partial) support for ICMP extensions */ 670 if (data_len < 128 || (data_len & 7) || skb->len < data_len) 671 data_len = 0; 672 673 skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC); 674 675 if (!skb2) 676 return 1; 677 678 skb_dst_drop(skb2); 679 skb_pull(skb2, nhs); 680 skb_reset_network_header(skb2); 681 682 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 683 skb, 0); 684 685 if (rt && rt->dst.dev) 686 skb2->dev = rt->dst.dev; 687 688 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr); 689 690 if (data_len) { 691 /* RFC 4884 (partial) support : 692 * insert 0 padding at the end, before the extensions 693 */ 694 __skb_push(skb2, nhs); 695 skb_reset_network_header(skb2); 696 memmove(skb2->data, skb2->data + nhs, data_len - nhs); 697 memset(skb2->data + data_len - nhs, 0, nhs); 698 /* RFC 4884 4.5 : Length is measured in 64-bit words, 699 * and stored in reserved[0] 700 */ 701 info = (data_len/8) << 24; 702 } 703 if (type == ICMP_TIME_EXCEEDED) 704 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 705 info, &temp_saddr, IP6CB(skb2)); 706 else 707 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 708 info, &temp_saddr, IP6CB(skb2)); 709 if (rt) 710 ip6_rt_put(rt); 711 712 kfree_skb(skb2); 713 714 return 0; 715 } 716 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach); 717 718 static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) 719 { 720 struct net *net = dev_net(skb->dev); 721 struct sock *sk; 722 struct inet6_dev *idev; 723 struct ipv6_pinfo *np; 724 const struct in6_addr *saddr = NULL; 725 struct icmp6hdr *icmph = icmp6_hdr(skb); 726 bool apply_ratelimit = false; 727 struct icmp6hdr tmp_hdr; 728 struct flowi6 fl6; 729 struct icmpv6_msg msg; 730 struct dst_entry *dst; 731 struct ipcm6_cookie ipc6; 732 u32 mark = IP6_REPLY_MARK(net, skb->mark); 733 SKB_DR(reason); 734 bool acast; 735 u8 type; 736 737 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) && 738 net->ipv6.sysctl.icmpv6_echo_ignore_multicast) 739 return reason; 740 741 saddr = &ipv6_hdr(skb)->daddr; 742 743 acast = ipv6_anycast_destination(skb_dst(skb), saddr); 744 if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast) 745 return reason; 746 747 if (!ipv6_unicast_destination(skb) && 748 !(net->ipv6.sysctl.anycast_src_echo_reply && acast)) 749 saddr = NULL; 750 751 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) 752 type = ICMPV6_EXT_ECHO_REPLY; 753 else 754 type = ICMPV6_ECHO_REPLY; 755 756 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); 757 tmp_hdr.icmp6_type = type; 758 759 memset(&fl6, 0, sizeof(fl6)); 760 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES) 761 fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb)); 762 763 fl6.flowi6_proto = IPPROTO_ICMPV6; 764 fl6.daddr = ipv6_hdr(skb)->saddr; 765 if (saddr) 766 fl6.saddr = *saddr; 767 fl6.flowi6_oif = icmp6_iif(skb); 768 fl6.fl6_icmp_type = type; 769 fl6.flowi6_mark = mark; 770 fl6.flowi6_uid = sock_net_uid(net, NULL); 771 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 772 773 local_bh_disable(); 774 sk = icmpv6_xmit_lock(net); 775 if (!sk) 776 goto out_bh_enable; 777 np = inet6_sk(sk); 778 779 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 780 fl6.flowi6_oif = READ_ONCE(np->mcast_oif); 781 else if (!fl6.flowi6_oif) 782 fl6.flowi6_oif = READ_ONCE(np->ucast_oif); 783 784 if (ip6_dst_lookup(net, sk, &dst, &fl6)) 785 goto out; 786 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); 787 if (IS_ERR(dst)) 788 goto out; 789 790 /* Check the ratelimit */ 791 if ((!(skb->dev->flags & IFF_LOOPBACK) && 792 !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) || 793 !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit)) 794 goto out_dst_release; 795 796 idev = __in6_dev_get(skb->dev); 797 798 msg.skb = skb; 799 msg.offset = 0; 800 msg.type = type; 801 802 ipcm6_init_sk(&ipc6, sk); 803 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 804 ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); 805 ipc6.sockc.mark = mark; 806 807 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) 808 if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr)) 809 goto out_dst_release; 810 811 if (ip6_append_data(sk, icmpv6_getfrag, &msg, 812 skb->len + sizeof(struct icmp6hdr), 813 sizeof(struct icmp6hdr), &ipc6, &fl6, 814 dst_rt6_info(dst), MSG_DONTWAIT)) { 815 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); 816 ip6_flush_pending_frames(sk); 817 } else { 818 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, 819 skb->len + sizeof(struct icmp6hdr)); 820 reason = SKB_CONSUMED; 821 } 822 out_dst_release: 823 dst_release(dst); 824 out: 825 icmpv6_xmit_unlock(sk); 826 out_bh_enable: 827 local_bh_enable(); 828 return reason; 829 } 830 831 enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type, 832 u8 code, __be32 info) 833 { 834 struct inet6_skb_parm *opt = IP6CB(skb); 835 struct net *net = dev_net(skb->dev); 836 const struct inet6_protocol *ipprot; 837 enum skb_drop_reason reason; 838 int inner_offset; 839 __be16 frag_off; 840 u8 nexthdr; 841 842 reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr)); 843 if (reason != SKB_NOT_DROPPED_YET) 844 goto out; 845 846 seg6_icmp_srh(skb, opt); 847 848 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr; 849 if (ipv6_ext_hdr(nexthdr)) { 850 /* now skip over extension headers */ 851 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), 852 &nexthdr, &frag_off); 853 if (inner_offset < 0) { 854 SKB_DR_SET(reason, IPV6_BAD_EXTHDR); 855 goto out; 856 } 857 } else { 858 inner_offset = sizeof(struct ipv6hdr); 859 } 860 861 /* Checkin header including 8 bytes of inner protocol header. */ 862 reason = pskb_may_pull_reason(skb, inner_offset + 8); 863 if (reason != SKB_NOT_DROPPED_YET) 864 goto out; 865 866 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. 867 Without this we will not able f.e. to make source routed 868 pmtu discovery. 869 Corresponding argument (opt) to notifiers is already added. 870 --ANK (980726) 871 */ 872 873 ipprot = rcu_dereference(inet6_protos[nexthdr]); 874 if (ipprot && ipprot->err_handler) 875 ipprot->err_handler(skb, opt, type, code, inner_offset, info); 876 877 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info); 878 return SKB_CONSUMED; 879 880 out: 881 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); 882 return reason; 883 } 884 885 /* 886 * Handle icmp messages 887 */ 888 889 static int icmpv6_rcv(struct sk_buff *skb) 890 { 891 enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; 892 struct net *net = dev_net(skb->dev); 893 struct net_device *dev = icmp6_dev(skb); 894 struct inet6_dev *idev = __in6_dev_get(dev); 895 const struct in6_addr *saddr, *daddr; 896 struct icmp6hdr *hdr; 897 u8 type; 898 899 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 900 struct sec_path *sp = skb_sec_path(skb); 901 int nh; 902 903 if (!(sp && sp->xvec[sp->len - 1]->props.flags & 904 XFRM_STATE_ICMP)) { 905 reason = SKB_DROP_REASON_XFRM_POLICY; 906 goto drop_no_count; 907 } 908 909 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) 910 goto drop_no_count; 911 912 nh = skb_network_offset(skb); 913 skb_set_network_header(skb, sizeof(*hdr)); 914 915 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, 916 skb)) { 917 reason = SKB_DROP_REASON_XFRM_POLICY; 918 goto drop_no_count; 919 } 920 921 skb_set_network_header(skb, nh); 922 } 923 924 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS); 925 926 saddr = &ipv6_hdr(skb)->saddr; 927 daddr = &ipv6_hdr(skb)->daddr; 928 929 if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { 930 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n", 931 saddr, daddr); 932 goto csum_error; 933 } 934 935 if (!pskb_pull(skb, sizeof(*hdr))) 936 goto discard_it; 937 938 hdr = icmp6_hdr(skb); 939 940 type = hdr->icmp6_type; 941 942 ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type); 943 944 switch (type) { 945 case ICMPV6_ECHO_REQUEST: 946 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all) 947 reason = icmpv6_echo_reply(skb); 948 break; 949 case ICMPV6_EXT_ECHO_REQUEST: 950 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && 951 READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) 952 reason = icmpv6_echo_reply(skb); 953 break; 954 955 case ICMPV6_ECHO_REPLY: 956 reason = ping_rcv(skb); 957 break; 958 959 case ICMPV6_EXT_ECHO_REPLY: 960 reason = ping_rcv(skb); 961 break; 962 963 case ICMPV6_PKT_TOOBIG: 964 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update 965 standard destination cache. Seems, only "advanced" 966 destination cache will allow to solve this problem 967 --ANK (980726) 968 */ 969 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 970 goto discard_it; 971 hdr = icmp6_hdr(skb); 972 973 /* to notify */ 974 fallthrough; 975 case ICMPV6_DEST_UNREACH: 976 case ICMPV6_TIME_EXCEED: 977 case ICMPV6_PARAMPROB: 978 reason = icmpv6_notify(skb, type, hdr->icmp6_code, 979 hdr->icmp6_mtu); 980 break; 981 982 case NDISC_ROUTER_SOLICITATION: 983 case NDISC_ROUTER_ADVERTISEMENT: 984 case NDISC_NEIGHBOUR_SOLICITATION: 985 case NDISC_NEIGHBOUR_ADVERTISEMENT: 986 case NDISC_REDIRECT: 987 reason = ndisc_rcv(skb); 988 break; 989 990 case ICMPV6_MGM_QUERY: 991 igmp6_event_query(skb); 992 return 0; 993 994 case ICMPV6_MGM_REPORT: 995 igmp6_event_report(skb); 996 return 0; 997 998 case ICMPV6_MGM_REDUCTION: 999 case ICMPV6_NI_QUERY: 1000 case ICMPV6_NI_REPLY: 1001 case ICMPV6_MLD2_REPORT: 1002 case ICMPV6_DHAAD_REQUEST: 1003 case ICMPV6_DHAAD_REPLY: 1004 case ICMPV6_MOBILE_PREFIX_SOL: 1005 case ICMPV6_MOBILE_PREFIX_ADV: 1006 break; 1007 1008 default: 1009 /* informational */ 1010 if (type & ICMPV6_INFOMSG_MASK) 1011 break; 1012 1013 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n", 1014 saddr, daddr); 1015 1016 /* 1017 * error of unknown type. 1018 * must pass to upper level 1019 */ 1020 1021 reason = icmpv6_notify(skb, type, hdr->icmp6_code, 1022 hdr->icmp6_mtu); 1023 } 1024 1025 /* until the v6 path can be better sorted assume failure and 1026 * preserve the status quo behaviour for the rest of the paths to here 1027 */ 1028 if (reason) 1029 kfree_skb_reason(skb, reason); 1030 else 1031 consume_skb(skb); 1032 1033 return 0; 1034 1035 csum_error: 1036 reason = SKB_DROP_REASON_ICMP_CSUM; 1037 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); 1038 discard_it: 1039 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS); 1040 drop_no_count: 1041 kfree_skb_reason(skb, reason); 1042 return 0; 1043 } 1044 1045 void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type, 1046 const struct in6_addr *saddr, 1047 const struct in6_addr *daddr, int oif) 1048 { 1049 memset(fl6, 0, sizeof(*fl6)); 1050 fl6->saddr = *saddr; 1051 fl6->daddr = *daddr; 1052 fl6->flowi6_proto = IPPROTO_ICMPV6; 1053 fl6->fl6_icmp_type = type; 1054 fl6->fl6_icmp_code = 0; 1055 fl6->flowi6_oif = oif; 1056 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); 1057 } 1058 1059 int __init icmpv6_init(void) 1060 { 1061 struct sock *sk; 1062 int err, i; 1063 1064 for_each_possible_cpu(i) { 1065 err = inet_ctl_sock_create(&sk, PF_INET6, 1066 SOCK_RAW, IPPROTO_ICMPV6, &init_net); 1067 if (err < 0) { 1068 pr_err("Failed to initialize the ICMP6 control socket (err %d)\n", 1069 err); 1070 return err; 1071 } 1072 1073 per_cpu(ipv6_icmp_sk, i) = sk; 1074 1075 /* Enough space for 2 64K ICMP packets, including 1076 * sk_buff struct overhead. 1077 */ 1078 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); 1079 } 1080 1081 err = -EAGAIN; 1082 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) 1083 goto fail; 1084 1085 err = inet6_register_icmp_sender(icmp6_send); 1086 if (err) 1087 goto sender_reg_err; 1088 return 0; 1089 1090 sender_reg_err: 1091 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); 1092 fail: 1093 pr_err("Failed to register ICMP6 protocol\n"); 1094 return err; 1095 } 1096 1097 void icmpv6_cleanup(void) 1098 { 1099 inet6_unregister_icmp_sender(icmp6_send); 1100 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); 1101 } 1102 1103 1104 static const struct icmp6_err { 1105 int err; 1106 int fatal; 1107 } tab_unreach[] = { 1108 { /* NOROUTE */ 1109 .err = ENETUNREACH, 1110 .fatal = 0, 1111 }, 1112 { /* ADM_PROHIBITED */ 1113 .err = EACCES, 1114 .fatal = 1, 1115 }, 1116 { /* Was NOT_NEIGHBOUR, now reserved */ 1117 .err = EHOSTUNREACH, 1118 .fatal = 0, 1119 }, 1120 { /* ADDR_UNREACH */ 1121 .err = EHOSTUNREACH, 1122 .fatal = 0, 1123 }, 1124 { /* PORT_UNREACH */ 1125 .err = ECONNREFUSED, 1126 .fatal = 1, 1127 }, 1128 { /* POLICY_FAIL */ 1129 .err = EACCES, 1130 .fatal = 1, 1131 }, 1132 { /* REJECT_ROUTE */ 1133 .err = EACCES, 1134 .fatal = 1, 1135 }, 1136 }; 1137 1138 int icmpv6_err_convert(u8 type, u8 code, int *err) 1139 { 1140 int fatal = 0; 1141 1142 *err = EPROTO; 1143 1144 switch (type) { 1145 case ICMPV6_DEST_UNREACH: 1146 fatal = 1; 1147 if (code < ARRAY_SIZE(tab_unreach)) { 1148 *err = tab_unreach[code].err; 1149 fatal = tab_unreach[code].fatal; 1150 } 1151 break; 1152 1153 case ICMPV6_PKT_TOOBIG: 1154 *err = EMSGSIZE; 1155 break; 1156 1157 case ICMPV6_PARAMPROB: 1158 *err = EPROTO; 1159 fatal = 1; 1160 break; 1161 1162 case ICMPV6_TIME_EXCEED: 1163 *err = EHOSTUNREACH; 1164 break; 1165 } 1166 1167 return fatal; 1168 } 1169 EXPORT_SYMBOL(icmpv6_err_convert); 1170 1171 #ifdef CONFIG_SYSCTL 1172 static struct ctl_table ipv6_icmp_table_template[] = { 1173 { 1174 .procname = "ratelimit", 1175 .data = &init_net.ipv6.sysctl.icmpv6_time, 1176 .maxlen = sizeof(int), 1177 .mode = 0644, 1178 .proc_handler = proc_dointvec_ms_jiffies, 1179 }, 1180 { 1181 .procname = "echo_ignore_all", 1182 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all, 1183 .maxlen = sizeof(u8), 1184 .mode = 0644, 1185 .proc_handler = proc_dou8vec_minmax, 1186 }, 1187 { 1188 .procname = "echo_ignore_multicast", 1189 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast, 1190 .maxlen = sizeof(u8), 1191 .mode = 0644, 1192 .proc_handler = proc_dou8vec_minmax, 1193 }, 1194 { 1195 .procname = "echo_ignore_anycast", 1196 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast, 1197 .maxlen = sizeof(u8), 1198 .mode = 0644, 1199 .proc_handler = proc_dou8vec_minmax, 1200 }, 1201 { 1202 .procname = "ratemask", 1203 .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr, 1204 .maxlen = ICMPV6_MSG_MAX + 1, 1205 .mode = 0644, 1206 .proc_handler = proc_do_large_bitmap, 1207 }, 1208 { 1209 .procname = "error_anycast_as_unicast", 1210 .data = &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast, 1211 .maxlen = sizeof(u8), 1212 .mode = 0644, 1213 .proc_handler = proc_dou8vec_minmax, 1214 .extra1 = SYSCTL_ZERO, 1215 .extra2 = SYSCTL_ONE, 1216 }, 1217 }; 1218 1219 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) 1220 { 1221 struct ctl_table *table; 1222 1223 table = kmemdup(ipv6_icmp_table_template, 1224 sizeof(ipv6_icmp_table_template), 1225 GFP_KERNEL); 1226 1227 if (table) { 1228 table[0].data = &net->ipv6.sysctl.icmpv6_time; 1229 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all; 1230 table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast; 1231 table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast; 1232 table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr; 1233 table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast; 1234 } 1235 return table; 1236 } 1237 1238 size_t ipv6_icmp_sysctl_table_size(void) 1239 { 1240 return ARRAY_SIZE(ipv6_icmp_table_template); 1241 } 1242 #endif 1243
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.