~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
af_inet.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~
  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /*
  3  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  4  *              operating system.  INET is implemented using the  BSD Socket
  5  *              interface as the means of communication with the user level.
  6  *
  7  *              PF_INET protocol family socket handler.
  8  *
  9  * Authors:     Ross Biro
 10  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 11  *              Florian La Roche, <flla@stud.uni-sb.de>
 12  *              Alan Cox, <A.Cox@swansea.ac.uk>
 13  *
 14  * Changes (see also sock.c)
 15  *
 16  *              piggy,
 17  *              Karl Knutson    :       Socket protocol table
 18  *              A.N.Kuznetsov   :       Socket death error in accept().
 19  *              John Richardson :       Fix non blocking error in connect()
 20  *                                      so sockets that fail to connect
 21  *                                      don't return -EINPROGRESS.
 22  *              Alan Cox        :       Asynchronous I/O support
 23  *              Alan Cox        :       Keep correct socket pointer on sock
 24  *                                      structures
 25  *                                      when accept() ed
 26  *              Alan Cox        :       Semantics of SO_LINGER aren't state
 27  *                                      moved to close when you look carefully.
 28  *                                      With this fixed and the accept bug fixed
 29  *                                      some RPC stuff seems happier.
 30  *              Niibe Yutaka    :       4.4BSD style write async I/O
 31  *              Alan Cox,
 32  *              Tony Gale       :       Fixed reuse semantics.
 33  *              Alan Cox        :       bind() shouldn't abort existing but dead
 34  *                                      sockets. Stops FTP netin:.. I hope.
 35  *              Alan Cox        :       bind() works correctly for RAW sockets.
 36  *                                      Note that FreeBSD at least was broken
 37  *                                      in this respect so be careful with
 38  *                                      compatibility tests...
 39  *              Alan Cox        :       routing cache support
 40  *              Alan Cox        :       memzero the socket structure for
 41  *                                      compactness.
 42  *              Matt Day        :       nonblock connect error handler
 43  *              Alan Cox        :       Allow large numbers of pending sockets
 44  *                                      (eg for big web sites), but only if
 45  *                                      specifically application requested.
 46  *              Alan Cox        :       New buffering throughout IP. Used
 47  *                                      dumbly.
 48  *              Alan Cox        :       New buffering now used smartly.
 49  *              Alan Cox        :       BSD rather than common sense
 50  *                                      interpretation of listen.
 51  *              Germano Caronni :       Assorted small races.
 52  *              Alan Cox        :       sendmsg/recvmsg basic support.
 53  *              Alan Cox        :       Only sendmsg/recvmsg now supported.
 54  *              Alan Cox        :       Locked down bind (see security list).
 55  *              Alan Cox        :       Loosened bind a little.
 56  *              Mike McLagan    :       ADD/DEL DLCI Ioctls
 57  *      Willy Konynenberg       :       Transparent proxying support.
 58  *              David S. Miller :       New socket lookup architecture.
 59  *                                      Some other random speedups.
 60  *              Cyrus Durgin    :       Cleaned up file for kmod hacks.
 61  *              Andi Kleen      :       Fix inet_stream_connect TCP race.
 62  */
 63 
 64 #define pr_fmt(fmt) "IPv4: " fmt
 65 
 66 #include <linux/err.h>
 67 #include <linux/errno.h>
 68 #include <linux/types.h>
 69 #include <linux/socket.h>
 70 #include <linux/in.h>
 71 #include <linux/kernel.h>
 72 #include <linux/kmod.h>
 73 #include <linux/sched.h>
 74 #include <linux/timer.h>
 75 #include <linux/string.h>
 76 #include <linux/sockios.h>
 77 #include <linux/net.h>
 78 #include <linux/capability.h>
 79 #include <linux/fcntl.h>
 80 #include <linux/mm.h>
 81 #include <linux/interrupt.h>
 82 #include <linux/stat.h>
 83 #include <linux/init.h>
 84 #include <linux/poll.h>
 85 #include <linux/netfilter_ipv4.h>
 86 #include <linux/random.h>
 87 #include <linux/slab.h>
 88 
 89 #include <linux/uaccess.h>
 90 
 91 #include <linux/inet.h>
 92 #include <linux/igmp.h>
 93 #include <linux/inetdevice.h>
 94 #include <linux/netdevice.h>
 95 #include <net/checksum.h>
 96 #include <net/ip.h>
 97 #include <net/protocol.h>
 98 #include <net/arp.h>
 99 #include <net/route.h>
100 #include <net/ip_fib.h>
101 #include <net/inet_connection_sock.h>
102 #include <net/gro.h>
103 #include <net/gso.h>
104 #include <net/tcp.h>
105 #include <net/udp.h>
106 #include <net/udplite.h>
107 #include <net/ping.h>
108 #include <linux/skbuff.h>
109 #include <net/sock.h>
110 #include <net/raw.h>
111 #include <net/icmp.h>
112 #include <net/inet_common.h>
113 #include <net/ip_tunnels.h>
114 #include <net/xfrm.h>
115 #include <net/net_namespace.h>
116 #include <net/secure_seq.h>
117 #ifdef CONFIG_IP_MROUTE
118 #include <linux/mroute.h>
119 #endif
120 #include <net/l3mdev.h>
121 #include <net/compat.h>
122 #include <net/rps.h>
123 
124 #include <trace/events/sock.h>
125 
126 /* The inetsw table contains everything that inet_create needs to
127  * build a new socket.
128  */
129 static struct list_head inetsw[SOCK_MAX];
130 static DEFINE_SPINLOCK(inetsw_lock);
131 
132 /* New destruction routine */
133 
134 void inet_sock_destruct(struct sock *sk)
135 {
136         struct inet_sock *inet = inet_sk(sk);
137 
138         __skb_queue_purge(&sk->sk_receive_queue);
139         __skb_queue_purge(&sk->sk_error_queue);
140 
141         sk_mem_reclaim_final(sk);
142 
143         if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
144                 pr_err("Attempt to release TCP socket in state %d %p\n",
145                        sk->sk_state, sk);
146                 return;
147         }
148         if (!sock_flag(sk, SOCK_DEAD)) {
149                 pr_err("Attempt to release alive inet socket %p\n", sk);
150                 return;
151         }
152 
153         WARN_ON_ONCE(atomic_read(&sk->sk_rmem_alloc));
154         WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
155         WARN_ON_ONCE(sk->sk_wmem_queued);
156         WARN_ON_ONCE(sk_forward_alloc_get(sk));
157 
158         kfree(rcu_dereference_protected(inet->inet_opt, 1));
159         dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
160         dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1));
161 }
162 EXPORT_SYMBOL(inet_sock_destruct);
163 
164 /*
165  *      The routines beyond this point handle the behaviour of an AF_INET
166  *      socket object. Mostly it punts to the subprotocols of IP to do
167  *      the work.
168  */
169 
170 /*
171  *      Automatically bind an unbound socket.
172  */
173 
174 static int inet_autobind(struct sock *sk)
175 {
176         struct inet_sock *inet;
177         /* We may need to bind the socket. */
178         lock_sock(sk);
179         inet = inet_sk(sk);
180         if (!inet->inet_num) {
181                 if (sk->sk_prot->get_port(sk, 0)) {
182                         release_sock(sk);
183                         return -EAGAIN;
184                 }
185                 inet->inet_sport = htons(inet->inet_num);
186         }
187         release_sock(sk);
188         return 0;
189 }
190 
191 int __inet_listen_sk(struct sock *sk, int backlog)
192 {
193         unsigned char old_state = sk->sk_state;
194         int err, tcp_fastopen;
195 
196         if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
197                 return -EINVAL;
198 
199         WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
200         /* Really, if the socket is already in listen state
201          * we can only allow the backlog to be adjusted.
202          */
203         if (old_state != TCP_LISTEN) {
204                 /* Enable TFO w/o requiring TCP_FASTOPEN socket option.
205                  * Note that only TCP sockets (SOCK_STREAM) will reach here.
206                  * Also fastopen backlog may already been set via the option
207                  * because the socket was in TCP_LISTEN state previously but
208                  * was shutdown() rather than close().
209                  */
210                 tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
211                 if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
212                     (tcp_fastopen & TFO_SERVER_ENABLE) &&
213                     !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
214                         fastopen_queue_tune(sk, backlog);
215                         tcp_fastopen_init_key_once(sock_net(sk));
216                 }
217 
218                 err = inet_csk_listen_start(sk);
219                 if (err)
220                         return err;
221 
222                 tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
223         }
224         return 0;
225 }
226 
227 /*
228  *      Move a socket into listening state.
229  */
230 int inet_listen(struct socket *sock, int backlog)
231 {
232         struct sock *sk = sock->sk;
233         int err = -EINVAL;
234 
235         lock_sock(sk);
236 
237         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
238                 goto out;
239 
240         err = __inet_listen_sk(sk, backlog);
241 
242 out:
243         release_sock(sk);
244         return err;
245 }
246 EXPORT_SYMBOL(inet_listen);
247 
248 /*
249  *      Create an inet socket.
250  */
251 
252 static int inet_create(struct net *net, struct socket *sock, int protocol,
253                        int kern)
254 {
255         struct sock *sk;
256         struct inet_protosw *answer;
257         struct inet_sock *inet;
258         struct proto *answer_prot;
259         unsigned char answer_flags;
260         int try_loading_module = 0;
261         int err;
262 
263         if (protocol < 0 || protocol >= IPPROTO_MAX)
264                 return -EINVAL;
265 
266         sock->state = SS_UNCONNECTED;
267 
268         /* Look for the requested type/protocol pair. */
269 lookup_protocol:
270         err = -ESOCKTNOSUPPORT;
271         rcu_read_lock();
272         list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
273 
274                 err = 0;
275                 /* Check the non-wild match. */
276                 if (protocol == answer->protocol) {
277                         if (protocol != IPPROTO_IP)
278                                 break;
279                 } else {
280                         /* Check for the two wild cases. */
281                         if (IPPROTO_IP == protocol) {
282                                 protocol = answer->protocol;
283                                 break;
284                         }
285                         if (IPPROTO_IP == answer->protocol)
286                                 break;
287                 }
288                 err = -EPROTONOSUPPORT;
289         }
290 
291         if (unlikely(err)) {
292                 if (try_loading_module < 2) {
293                         rcu_read_unlock();
294                         /*
295                          * Be more specific, e.g. net-pf-2-proto-132-type-1
296                          * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
297                          */
298                         if (++try_loading_module == 1)
299                                 request_module("net-pf-%d-proto-%d-type-%d",
300                                                PF_INET, protocol, sock->type);
301                         /*
302                          * Fall back to generic, e.g. net-pf-2-proto-132
303                          * (net-pf-PF_INET-proto-IPPROTO_SCTP)
304                          */
305                         else
306                                 request_module("net-pf-%d-proto-%d",
307                                                PF_INET, protocol);
308                         goto lookup_protocol;
309                 } else
310                         goto out_rcu_unlock;
311         }
312 
313         err = -EPERM;
314         if (sock->type == SOCK_RAW && !kern &&
315             !ns_capable(net->user_ns, CAP_NET_RAW))
316                 goto out_rcu_unlock;
317 
318         sock->ops = answer->ops;
319         answer_prot = answer->prot;
320         answer_flags = answer->flags;
321         rcu_read_unlock();
322 
323         WARN_ON(!answer_prot->slab);
324 
325         err = -ENOMEM;
326         sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
327         if (!sk)
328                 goto out;
329 
330         err = 0;
331         if (INET_PROTOSW_REUSE & answer_flags)
332                 sk->sk_reuse = SK_CAN_REUSE;
333 
334         if (INET_PROTOSW_ICSK & answer_flags)
335                 inet_init_csk_locks(sk);
336 
337         inet = inet_sk(sk);
338         inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
339 
340         inet_clear_bit(NODEFRAG, sk);
341 
342         if (SOCK_RAW == sock->type) {
343                 inet->inet_num = protocol;
344                 if (IPPROTO_RAW == protocol)
345                         inet_set_bit(HDRINCL, sk);
346         }
347 
348         if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
349                 inet->pmtudisc = IP_PMTUDISC_DONT;
350         else
351                 inet->pmtudisc = IP_PMTUDISC_WANT;
352 
353         atomic_set(&inet->inet_id, 0);
354 
355         sock_init_data(sock, sk);
356 
357         sk->sk_destruct    = inet_sock_destruct;
358         sk->sk_protocol    = protocol;
359         sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
360         sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
361 
362         inet->uc_ttl    = -1;
363         inet_set_bit(MC_LOOP, sk);
364         inet->mc_ttl    = 1;
365         inet_set_bit(MC_ALL, sk);
366         inet->mc_index  = 0;
367         inet->mc_list   = NULL;
368         inet->rcv_tos   = 0;
369 
370         if (inet->inet_num) {
371                 /* It assumes that any protocol which allows
372                  * the user to assign a number at socket
373                  * creation time automatically
374                  * shares.
375                  */
376                 inet->inet_sport = htons(inet->inet_num);
377                 /* Add to protocol hash chains. */
378                 err = sk->sk_prot->hash(sk);
379                 if (err) {
380                         sk_common_release(sk);
381                         goto out;
382                 }
383         }
384 
385         if (sk->sk_prot->init) {
386                 err = sk->sk_prot->init(sk);
387                 if (err) {
388                         sk_common_release(sk);
389                         goto out;
390                 }
391         }
392 
393         if (!kern) {
394                 err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
395                 if (err) {
396                         sk_common_release(sk);
397                         goto out;
398                 }
399         }
400 out:
401         return err;
402 out_rcu_unlock:
403         rcu_read_unlock();
404         goto out;
405 }
406 
407 
408 /*
409  *      The peer socket should always be NULL (or else). When we call this
410  *      function we are destroying the object and from then on nobody
411  *      should refer to it.
412  */
413 int inet_release(struct socket *sock)
414 {
415         struct sock *sk = sock->sk;
416 
417         if (sk) {
418                 long timeout;
419 
420                 if (!sk->sk_kern_sock)
421                         BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk);
422 
423                 /* Applications forget to leave groups before exiting */
424                 ip_mc_drop_socket(sk);
425 
426                 /* If linger is set, we don't return until the close
427                  * is complete.  Otherwise we return immediately. The
428                  * actually closing is done the same either way.
429                  *
430                  * If the close is due to the process exiting, we never
431                  * linger..
432                  */
433                 timeout = 0;
434                 if (sock_flag(sk, SOCK_LINGER) &&
435                     !(current->flags & PF_EXITING))
436                         timeout = sk->sk_lingertime;
437                 sk->sk_prot->close(sk, timeout);
438                 sock->sk = NULL;
439         }
440         return 0;
441 }
442 EXPORT_SYMBOL(inet_release);
443 
444 int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
445 {
446         u32 flags = BIND_WITH_LOCK;
447         int err;
448 
449         /* If the socket has its own bind function then use it. (RAW) */
450         if (sk->sk_prot->bind) {
451                 return sk->sk_prot->bind(sk, uaddr, addr_len);
452         }
453         if (addr_len < sizeof(struct sockaddr_in))
454                 return -EINVAL;
455 
456         /* BPF prog is run before any checks are done so that if the prog
457          * changes context in a wrong way it will be caught.
458          */
459         err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len,
460                                                  CGROUP_INET4_BIND, &flags);
461         if (err)
462                 return err;
463 
464         return __inet_bind(sk, uaddr, addr_len, flags);
465 }
466 
467 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
468 {
469         return inet_bind_sk(sock->sk, uaddr, addr_len);
470 }
471 EXPORT_SYMBOL(inet_bind);
472 
473 int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
474                 u32 flags)
475 {
476         struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
477         struct inet_sock *inet = inet_sk(sk);
478         struct net *net = sock_net(sk);
479         unsigned short snum;
480         int chk_addr_ret;
481         u32 tb_id = RT_TABLE_LOCAL;
482         int err;
483 
484         if (addr->sin_family != AF_INET) {
485                 /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
486                  * only if s_addr is INADDR_ANY.
487                  */
488                 err = -EAFNOSUPPORT;
489                 if (addr->sin_family != AF_UNSPEC ||
490                     addr->sin_addr.s_addr != htonl(INADDR_ANY))
491                         goto out;
492         }
493 
494         tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
495         chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
496 
497         /* Not specified by any standard per-se, however it breaks too
498          * many applications when removed.  It is unfortunate since
499          * allowing applications to make a non-local bind solves
500          * several problems with systems using dynamic addressing.
501          * (ie. your servers still start up even if your ISDN link
502          *  is temporarily down)
503          */
504         err = -EADDRNOTAVAIL;
505         if (!inet_addr_valid_or_nonlocal(net, inet, addr->sin_addr.s_addr,
506                                          chk_addr_ret))
507                 goto out;
508 
509         snum = ntohs(addr->sin_port);
510         err = -EACCES;
511         if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
512             snum && inet_port_requires_bind_service(net, snum) &&
513             !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
514                 goto out;
515 
516         /*      We keep a pair of addresses. rcv_saddr is the one
517          *      used by hash lookups, and saddr is used for transmit.
518          *
519          *      In the BSD API these are the same except where it
520          *      would be illegal to use them (multicast/broadcast) in
521          *      which case the sending device address is used.
522          */
523         if (flags & BIND_WITH_LOCK)
524                 lock_sock(sk);
525 
526         /* Check these errors (active socket, double bind). */
527         err = -EINVAL;
528         if (sk->sk_state != TCP_CLOSE || inet->inet_num)
529                 goto out_release_sock;
530 
531         inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
532         if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
533                 inet->inet_saddr = 0;  /* Use device */
534 
535         /* Make sure we are allowed to bind here. */
536         if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) ||
537                       (flags & BIND_FORCE_ADDRESS_NO_PORT))) {
538                 err = sk->sk_prot->get_port(sk, snum);
539                 if (err) {
540                         inet->inet_saddr = inet->inet_rcv_saddr = 0;
541                         goto out_release_sock;
542                 }
543                 if (!(flags & BIND_FROM_BPF)) {
544                         err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
545                         if (err) {
546                                 inet->inet_saddr = inet->inet_rcv_saddr = 0;
547                                 if (sk->sk_prot->put_port)
548                                         sk->sk_prot->put_port(sk);
549                                 goto out_release_sock;
550                         }
551                 }
552         }
553 
554         if (inet->inet_rcv_saddr)
555                 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
556         if (snum)
557                 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
558         inet->inet_sport = htons(inet->inet_num);
559         inet->inet_daddr = 0;
560         inet->inet_dport = 0;
561         sk_dst_reset(sk);
562         err = 0;
563 out_release_sock:
564         if (flags & BIND_WITH_LOCK)
565                 release_sock(sk);
566 out:
567         return err;
568 }
569 
570 int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
571                        int addr_len, int flags)
572 {
573         struct sock *sk = sock->sk;
574         const struct proto *prot;
575         int err;
576 
577         if (addr_len < sizeof(uaddr->sa_family))
578                 return -EINVAL;
579 
580         /* IPV6_ADDRFORM can change sk->sk_prot under us. */
581         prot = READ_ONCE(sk->sk_prot);
582 
583         if (uaddr->sa_family == AF_UNSPEC)
584                 return prot->disconnect(sk, flags);
585 
586         if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
587                 err = prot->pre_connect(sk, uaddr, addr_len);
588                 if (err)
589                         return err;
590         }
591 
592         if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk))
593                 return -EAGAIN;
594         return prot->connect(sk, uaddr, addr_len);
595 }
596 EXPORT_SYMBOL(inet_dgram_connect);
597 
598 static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
599 {
600         DEFINE_WAIT_FUNC(wait, woken_wake_function);
601 
602         add_wait_queue(sk_sleep(sk), &wait);
603         sk->sk_write_pending += writebias;
604 
605         /* Basic assumption: if someone sets sk->sk_err, he _must_
606          * change state of the socket from TCP_SYN_*.
607          * Connect() does not allow to get error notifications
608          * without closing the socket.
609          */
610         while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
611                 release_sock(sk);
612                 timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
613                 lock_sock(sk);
614                 if (signal_pending(current) || !timeo)
615                         break;
616         }
617         remove_wait_queue(sk_sleep(sk), &wait);
618         sk->sk_write_pending -= writebias;
619         return timeo;
620 }
621 
622 /*
623  *      Connect to a remote host. There is regrettably still a little
624  *      TCP 'magic' in here.
625  */
626 int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
627                           int addr_len, int flags, int is_sendmsg)
628 {
629         struct sock *sk = sock->sk;
630         int err;
631         long timeo;
632 
633         /*
634          * uaddr can be NULL and addr_len can be 0 if:
635          * sk is a TCP fastopen active socket and
636          * TCP_FASTOPEN_CONNECT sockopt is set and
637          * we already have a valid cookie for this socket.
638          * In this case, user can call write() after connect().
639          * write() will invoke tcp_sendmsg_fastopen() which calls
640          * __inet_stream_connect().
641          */
642         if (uaddr) {
643                 if (addr_len < sizeof(uaddr->sa_family))
644                         return -EINVAL;
645 
646                 if (uaddr->sa_family == AF_UNSPEC) {
647                         sk->sk_disconnects++;
648                         err = sk->sk_prot->disconnect(sk, flags);
649                         sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
650                         goto out;
651                 }
652         }
653 
654         switch (sock->state) {
655         default:
656                 err = -EINVAL;
657                 goto out;
658         case SS_CONNECTED:
659                 err = -EISCONN;
660                 goto out;
661         case SS_CONNECTING:
662                 if (inet_test_bit(DEFER_CONNECT, sk))
663                         err = is_sendmsg ? -EINPROGRESS : -EISCONN;
664                 else
665                         err = -EALREADY;
666                 /* Fall out of switch with err, set for this state */
667                 break;
668         case SS_UNCONNECTED:
669                 err = -EISCONN;
670                 if (sk->sk_state != TCP_CLOSE)
671                         goto out;
672 
673                 if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
674                         err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
675                         if (err)
676                                 goto out;
677                 }
678 
679                 err = sk->sk_prot->connect(sk, uaddr, addr_len);
680                 if (err < 0)
681                         goto out;
682 
683                 sock->state = SS_CONNECTING;
684 
685                 if (!err && inet_test_bit(DEFER_CONNECT, sk))
686                         goto out;
687 
688                 /* Just entered SS_CONNECTING state; the only
689                  * difference is that return value in non-blocking
690                  * case is EINPROGRESS, rather than EALREADY.
691                  */
692                 err = -EINPROGRESS;
693                 break;
694         }
695 
696         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
697 
698         if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
699                 int writebias = (sk->sk_protocol == IPPROTO_TCP) &&
700                                 tcp_sk(sk)->fastopen_req &&
701                                 tcp_sk(sk)->fastopen_req->data ? 1 : 0;
702                 int dis = sk->sk_disconnects;
703 
704                 /* Error code is set above */
705                 if (!timeo || !inet_wait_for_connect(sk, timeo, writebias))
706                         goto out;
707 
708                 err = sock_intr_errno(timeo);
709                 if (signal_pending(current))
710                         goto out;
711 
712                 if (dis != sk->sk_disconnects) {
713                         err = -EPIPE;
714                         goto out;
715                 }
716         }
717 
718         /* Connection was closed by RST, timeout, ICMP error
719          * or another process disconnected us.
720          */
721         if (sk->sk_state == TCP_CLOSE)
722                 goto sock_error;
723 
724         /* sk->sk_err may be not zero now, if RECVERR was ordered by user
725          * and error was received after socket entered established state.
726          * Hence, it is handled normally after connect() return successfully.
727          */
728 
729         sock->state = SS_CONNECTED;
730         err = 0;
731 out:
732         return err;
733 
734 sock_error:
735         err = sock_error(sk) ? : -ECONNABORTED;
736         sock->state = SS_UNCONNECTED;
737         sk->sk_disconnects++;
738         if (sk->sk_prot->disconnect(sk, flags))
739                 sock->state = SS_DISCONNECTING;
740         goto out;
741 }
742 EXPORT_SYMBOL(__inet_stream_connect);
743 
744 int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
745                         int addr_len, int flags)
746 {
747         int err;
748 
749         lock_sock(sock->sk);
750         err = __inet_stream_connect(sock, uaddr, addr_len, flags, 0);
751         release_sock(sock->sk);
752         return err;
753 }
754 EXPORT_SYMBOL(inet_stream_connect);
755 
756 void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk)
757 {
758         sock_rps_record_flow(newsk);
759         WARN_ON(!((1 << newsk->sk_state) &
760                   (TCPF_ESTABLISHED | TCPF_SYN_RECV |
761                    TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 |
762                    TCPF_CLOSING | TCPF_CLOSE_WAIT |
763                    TCPF_CLOSE)));
764 
765         if (test_bit(SOCK_SUPPORT_ZC, &sock->flags))
766                 set_bit(SOCK_SUPPORT_ZC, &newsock->flags);
767         sock_graft(newsk, newsock);
768 
769         newsock->state = SS_CONNECTED;
770 }
771 
772 /*
773  *      Accept a pending connection. The TCP layer now gives BSD semantics.
774  */
775 
776 int inet_accept(struct socket *sock, struct socket *newsock,
777                 struct proto_accept_arg *arg)
778 {
779         struct sock *sk1 = sock->sk, *sk2;
780 
781         /* IPV6_ADDRFORM can change sk->sk_prot under us. */
782         arg->err = -EINVAL;
783         sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, arg);
784         if (!sk2)
785                 return arg->err;
786 
787         lock_sock(sk2);
788         __inet_accept(sock, newsock, sk2);
789         release_sock(sk2);
790         return 0;
791 }
792 EXPORT_SYMBOL(inet_accept);
793 
794 /*
795  *      This does both peername and sockname.
796  */
797 int inet_getname(struct socket *sock, struct sockaddr *uaddr,
798                  int peer)
799 {
800         struct sock *sk         = sock->sk;
801         struct inet_sock *inet  = inet_sk(sk);
802         DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
803         int sin_addr_len = sizeof(*sin);
804 
805         sin->sin_family = AF_INET;
806         lock_sock(sk);
807         if (peer) {
808                 if (!inet->inet_dport ||
809                     (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
810                      peer == 1)) {
811                         release_sock(sk);
812                         return -ENOTCONN;
813                 }
814                 sin->sin_port = inet->inet_dport;
815                 sin->sin_addr.s_addr = inet->inet_daddr;
816                 BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
817                                        CGROUP_INET4_GETPEERNAME);
818         } else {
819                 __be32 addr = inet->inet_rcv_saddr;
820                 if (!addr)
821                         addr = inet->inet_saddr;
822                 sin->sin_port = inet->inet_sport;
823                 sin->sin_addr.s_addr = addr;
824                 BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
825                                        CGROUP_INET4_GETSOCKNAME);
826         }
827         release_sock(sk);
828         memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
829         return sin_addr_len;
830 }
831 EXPORT_SYMBOL(inet_getname);
832 
833 int inet_send_prepare(struct sock *sk)
834 {
835         sock_rps_record_flow(sk);
836 
837         /* We may need to bind the socket. */
838         if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind &&
839             inet_autobind(sk))
840                 return -EAGAIN;
841 
842         return 0;
843 }
844 EXPORT_SYMBOL_GPL(inet_send_prepare);
845 
846 int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
847 {
848         struct sock *sk = sock->sk;
849 
850         if (unlikely(inet_send_prepare(sk)))
851                 return -EAGAIN;
852 
853         return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udp_sendmsg,
854                                sk, msg, size);
855 }
856 EXPORT_SYMBOL(inet_sendmsg);
857 
858 void inet_splice_eof(struct socket *sock)
859 {
860         const struct proto *prot;
861         struct sock *sk = sock->sk;
862 
863         if (unlikely(inet_send_prepare(sk)))
864                 return;
865 
866         /* IPV6_ADDRFORM can change sk->sk_prot under us. */
867         prot = READ_ONCE(sk->sk_prot);
868         if (prot->splice_eof)
869                 prot->splice_eof(sock);
870 }
871 EXPORT_SYMBOL_GPL(inet_splice_eof);
872 
873 INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *,
874                                           size_t, int, int *));
875 int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
876                  int flags)
877 {
878         struct sock *sk = sock->sk;
879         int addr_len = 0;
880         int err;
881 
882         if (likely(!(flags & MSG_ERRQUEUE)))
883                 sock_rps_record_flow(sk);
884 
885         err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg,
886                               sk, msg, size, flags, &addr_len);
887         if (err >= 0)
888                 msg->msg_namelen = addr_len;
889         return err;
890 }
891 EXPORT_SYMBOL(inet_recvmsg);
892 
893 int inet_shutdown(struct socket *sock, int how)
894 {
895         struct sock *sk = sock->sk;
896         int err = 0;
897 
898         /* This should really check to make sure
899          * the socket is a TCP socket. (WHY AC...)
900          */
901         how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
902                        1->2 bit 2 snds.
903                        2->3 */
904         if ((how & ~SHUTDOWN_MASK) || !how)     /* MAXINT->0 */
905                 return -EINVAL;
906 
907         lock_sock(sk);
908         if (sock->state == SS_CONNECTING) {
909                 if ((1 << sk->sk_state) &
910                     (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
911                         sock->state = SS_DISCONNECTING;
912                 else
913                         sock->state = SS_CONNECTED;
914         }
915 
916         switch (sk->sk_state) {
917         case TCP_CLOSE:
918                 err = -ENOTCONN;
919                 /* Hack to wake up other listeners, who can poll for
920                    EPOLLHUP, even on eg. unconnected UDP sockets -- RR */
921                 fallthrough;
922         default:
923                 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | how);
924                 if (sk->sk_prot->shutdown)
925                         sk->sk_prot->shutdown(sk, how);
926                 break;
927 
928         /* Remaining two branches are temporary solution for missing
929          * close() in multithreaded environment. It is _not_ a good idea,
930          * but we have no choice until close() is repaired at VFS level.
931          */
932         case TCP_LISTEN:
933                 if (!(how & RCV_SHUTDOWN))
934                         break;
935                 fallthrough;
936         case TCP_SYN_SENT:
937                 err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
938                 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
939                 break;
940         }
941 
942         /* Wake up anyone sleeping in poll. */
943         sk->sk_state_change(sk);
944         release_sock(sk);
945         return err;
946 }
947 EXPORT_SYMBOL(inet_shutdown);
948 
949 /*
950  *      ioctl() calls you can issue on an INET socket. Most of these are
951  *      device configuration and stuff and very rarely used. Some ioctls
952  *      pass on to the socket itself.
953  *
954  *      NOTE: I like the idea of a module for the config stuff. ie ifconfig
955  *      loads the devconfigure module does its configuring and unloads it.
956  *      There's a good 20K of config code hanging around the kernel.
957  */
958 
959 int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
960 {
961         struct sock *sk = sock->sk;
962         int err = 0;
963         struct net *net = sock_net(sk);
964         void __user *p = (void __user *)arg;
965         struct ifreq ifr;
966         struct rtentry rt;
967 
968         switch (cmd) {
969         case SIOCADDRT:
970         case SIOCDELRT:
971                 if (copy_from_user(&rt, p, sizeof(struct rtentry)))
972                         return -EFAULT;
973                 err = ip_rt_ioctl(net, cmd, &rt);
974                 break;
975         case SIOCRTMSG:
976                 err = -EINVAL;
977                 break;
978         case SIOCDARP:
979         case SIOCGARP:
980         case SIOCSARP:
981                 err = arp_ioctl(net, cmd, (void __user *)arg);
982                 break;
983         case SIOCGIFADDR:
984         case SIOCGIFBRDADDR:
985         case SIOCGIFNETMASK:
986         case SIOCGIFDSTADDR:
987         case SIOCGIFPFLAGS:
988                 if (get_user_ifreq(&ifr, NULL, p))
989                         return -EFAULT;
990                 err = devinet_ioctl(net, cmd, &ifr);
991                 if (!err && put_user_ifreq(&ifr, p))
992                         err = -EFAULT;
993                 break;
994 
995         case SIOCSIFADDR:
996         case SIOCSIFBRDADDR:
997         case SIOCSIFNETMASK:
998         case SIOCSIFDSTADDR:
999         case SIOCSIFPFLAGS:
1000         case SIOCSIFFLAGS:
1001                 if (get_user_ifreq(&ifr, NULL, p))
1002                         return -EFAULT;
1003                 err = devinet_ioctl(net, cmd, &ifr);
1004                 break;
1005         default:
1006                 if (sk->sk_prot->ioctl)
1007                         err = sk_ioctl(sk, cmd, (void __user *)arg);
1008                 else
1009                         err = -ENOIOCTLCMD;
1010                 break;
1011         }
1012         return err;
1013 }
1014 EXPORT_SYMBOL(inet_ioctl);
1015 
1016 #ifdef CONFIG_COMPAT
1017 static int inet_compat_routing_ioctl(struct sock *sk, unsigned int cmd,
1018                 struct compat_rtentry __user *ur)
1019 {
1020         compat_uptr_t rtdev;
1021         struct rtentry rt;
1022 
1023         if (copy_from_user(&rt.rt_dst, &ur->rt_dst,
1024                         3 * sizeof(struct sockaddr)) ||
1025             get_user(rt.rt_flags, &ur->rt_flags) ||
1026             get_user(rt.rt_metric, &ur->rt_metric) ||
1027             get_user(rt.rt_mtu, &ur->rt_mtu) ||
1028             get_user(rt.rt_window, &ur->rt_window) ||
1029             get_user(rt.rt_irtt, &ur->rt_irtt) ||
1030             get_user(rtdev, &ur->rt_dev))
1031                 return -EFAULT;
1032 
1033         rt.rt_dev = compat_ptr(rtdev);
1034         return ip_rt_ioctl(sock_net(sk), cmd, &rt);
1035 }
1036 
1037 static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1038 {
1039         void __user *argp = compat_ptr(arg);
1040         struct sock *sk = sock->sk;
1041 
1042         switch (cmd) {
1043         case SIOCADDRT:
1044         case SIOCDELRT:
1045                 return inet_compat_routing_ioctl(sk, cmd, argp);
1046         default:
1047                 if (!sk->sk_prot->compat_ioctl)
1048                         return -ENOIOCTLCMD;
1049                 return sk->sk_prot->compat_ioctl(sk, cmd, arg);
1050         }
1051 }
1052 #endif /* CONFIG_COMPAT */
1053 
1054 const struct proto_ops inet_stream_ops = {
1055         .family            = PF_INET,
1056         .owner             = THIS_MODULE,
1057         .release           = inet_release,
1058         .bind              = inet_bind,
1059         .connect           = inet_stream_connect,
1060         .socketpair        = sock_no_socketpair,
1061         .accept            = inet_accept,
1062         .getname           = inet_getname,
1063         .poll              = tcp_poll,
1064         .ioctl             = inet_ioctl,
1065         .gettstamp         = sock_gettstamp,
1066         .listen            = inet_listen,
1067         .shutdown          = inet_shutdown,
1068         .setsockopt        = sock_common_setsockopt,
1069         .getsockopt        = sock_common_getsockopt,
1070         .sendmsg           = inet_sendmsg,
1071         .recvmsg           = inet_recvmsg,
1072 #ifdef CONFIG_MMU
1073         .mmap              = tcp_mmap,
1074 #endif
1075         .splice_eof        = inet_splice_eof,
1076         .splice_read       = tcp_splice_read,
1077         .set_peek_off      = sk_set_peek_off,
1078         .read_sock         = tcp_read_sock,
1079         .read_skb          = tcp_read_skb,
1080         .sendmsg_locked    = tcp_sendmsg_locked,
1081         .peek_len          = tcp_peek_len,
1082 #ifdef CONFIG_COMPAT
1083         .compat_ioctl      = inet_compat_ioctl,
1084 #endif
1085         .set_rcvlowat      = tcp_set_rcvlowat,
1086 };
1087 EXPORT_SYMBOL(inet_stream_ops);
1088 
1089 const struct proto_ops inet_dgram_ops = {
1090         .family            = PF_INET,
1091         .owner             = THIS_MODULE,
1092         .release           = inet_release,
1093         .bind              = inet_bind,
1094         .connect           = inet_dgram_connect,
1095         .socketpair        = sock_no_socketpair,
1096         .accept            = sock_no_accept,
1097         .getname           = inet_getname,
1098         .poll              = udp_poll,
1099         .ioctl             = inet_ioctl,
1100         .gettstamp         = sock_gettstamp,
1101         .listen            = sock_no_listen,
1102         .shutdown          = inet_shutdown,
1103         .setsockopt        = sock_common_setsockopt,
1104         .getsockopt        = sock_common_getsockopt,
1105         .sendmsg           = inet_sendmsg,
1106         .read_skb          = udp_read_skb,
1107         .recvmsg           = inet_recvmsg,
1108         .mmap              = sock_no_mmap,
1109         .splice_eof        = inet_splice_eof,
1110         .set_peek_off      = udp_set_peek_off,
1111 #ifdef CONFIG_COMPAT
1112         .compat_ioctl      = inet_compat_ioctl,
1113 #endif
1114 };
1115 EXPORT_SYMBOL(inet_dgram_ops);
1116 
1117 /*
1118  * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
1119  * udp_poll
1120  */
1121 static const struct proto_ops inet_sockraw_ops = {
1122         .family            = PF_INET,
1123         .owner             = THIS_MODULE,
1124         .release           = inet_release,
1125         .bind              = inet_bind,
1126         .connect           = inet_dgram_connect,
1127         .socketpair        = sock_no_socketpair,
1128         .accept            = sock_no_accept,
1129         .getname           = inet_getname,
1130         .poll              = datagram_poll,
1131         .ioctl             = inet_ioctl,
1132         .gettstamp         = sock_gettstamp,
1133         .listen            = sock_no_listen,
1134         .shutdown          = inet_shutdown,
1135         .setsockopt        = sock_common_setsockopt,
1136         .getsockopt        = sock_common_getsockopt,
1137         .sendmsg           = inet_sendmsg,
1138         .recvmsg           = inet_recvmsg,
1139         .mmap              = sock_no_mmap,
1140         .splice_eof        = inet_splice_eof,
1141 #ifdef CONFIG_COMPAT
1142         .compat_ioctl      = inet_compat_ioctl,
1143 #endif
1144 };
1145 
1146 static const struct net_proto_family inet_family_ops = {
1147         .family = PF_INET,
1148         .create = inet_create,
1149         .owner  = THIS_MODULE,
1150 };
1151 
1152 /* Upon startup we insert all the elements in inetsw_array[] into
1153  * the linked list inetsw.
1154  */
1155 static struct inet_protosw inetsw_array[] =
1156 {
1157         {
1158                 .type =       SOCK_STREAM,
1159                 .protocol =   IPPROTO_TCP,
1160                 .prot =       &tcp_prot,
1161                 .ops =        &inet_stream_ops,
1162                 .flags =      INET_PROTOSW_PERMANENT |
1163                               INET_PROTOSW_ICSK,
1164         },
1165 
1166         {
1167                 .type =       SOCK_DGRAM,
1168                 .protocol =   IPPROTO_UDP,
1169                 .prot =       &udp_prot,
1170                 .ops =        &inet_dgram_ops,
1171                 .flags =      INET_PROTOSW_PERMANENT,
1172        },
1173 
1174        {
1175                 .type =       SOCK_DGRAM,
1176                 .protocol =   IPPROTO_ICMP,
1177                 .prot =       &ping_prot,
1178                 .ops =        &inet_sockraw_ops,
1179                 .flags =      INET_PROTOSW_REUSE,
1180        },
1181 
1182        {
1183                .type =       SOCK_RAW,
1184                .protocol =   IPPROTO_IP,        /* wild card */
1185                .prot =       &raw_prot,
1186                .ops =        &inet_sockraw_ops,
1187                .flags =      INET_PROTOSW_REUSE,
1188        }
1189 };
1190 
1191 #define INETSW_ARRAY_LEN ARRAY_SIZE(inetsw_array)
1192 
1193 void inet_register_protosw(struct inet_protosw *p)
1194 {
1195         struct list_head *lh;
1196         struct inet_protosw *answer;
1197         int protocol = p->protocol;
1198         struct list_head *last_perm;
1199 
1200         spin_lock_bh(&inetsw_lock);
1201 
1202         if (p->type >= SOCK_MAX)
1203                 goto out_illegal;
1204 
1205         /* If we are trying to override a permanent protocol, bail. */
1206         last_perm = &inetsw[p->type];
1207         list_for_each(lh, &inetsw[p->type]) {
1208                 answer = list_entry(lh, struct inet_protosw, list);
1209                 /* Check only the non-wild match. */
1210                 if ((INET_PROTOSW_PERMANENT & answer->flags) == 0)
1211                         break;
1212                 if (protocol == answer->protocol)
1213                         goto out_permanent;
1214                 last_perm = lh;
1215         }
1216 
1217         /* Add the new entry after the last permanent entry if any, so that
1218          * the new entry does not override a permanent entry when matched with
1219          * a wild-card protocol. But it is allowed to override any existing
1220          * non-permanent entry.  This means that when we remove this entry, the
1221          * system automatically returns to the old behavior.
1222          */
1223         list_add_rcu(&p->list, last_perm);
1224 out:
1225         spin_unlock_bh(&inetsw_lock);
1226 
1227         return;
1228 
1229 out_permanent:
1230         pr_err("Attempt to override permanent protocol %d\n", protocol);
1231         goto out;
1232 
1233 out_illegal:
1234         pr_err("Ignoring attempt to register invalid socket type %d\n",
1235                p->type);
1236         goto out;
1237 }
1238 EXPORT_SYMBOL(inet_register_protosw);
1239 
1240 void inet_unregister_protosw(struct inet_protosw *p)
1241 {
1242         if (INET_PROTOSW_PERMANENT & p->flags) {
1243                 pr_err("Attempt to unregister permanent protocol %d\n",
1244                        p->protocol);
1245         } else {
1246                 spin_lock_bh(&inetsw_lock);
1247                 list_del_rcu(&p->list);
1248                 spin_unlock_bh(&inetsw_lock);
1249 
1250                 synchronize_net();
1251         }
1252 }
1253 EXPORT_SYMBOL(inet_unregister_protosw);
1254 
1255 static int inet_sk_reselect_saddr(struct sock *sk)
1256 {
1257         struct inet_sock *inet = inet_sk(sk);
1258         __be32 old_saddr = inet->inet_saddr;
1259         __be32 daddr = inet->inet_daddr;
1260         struct flowi4 *fl4;
1261         struct rtable *rt;
1262         __be32 new_saddr;
1263         struct ip_options_rcu *inet_opt;
1264         int err;
1265 
1266         inet_opt = rcu_dereference_protected(inet->inet_opt,
1267                                              lockdep_sock_is_held(sk));
1268         if (inet_opt && inet_opt->opt.srr)
1269                 daddr = inet_opt->opt.faddr;
1270 
1271         /* Query new route. */
1272         fl4 = &inet->cork.fl.u.ip4;
1273         rt = ip_route_connect(fl4, daddr, 0, sk->sk_bound_dev_if,
1274                               sk->sk_protocol, inet->inet_sport,
1275                               inet->inet_dport, sk);
1276         if (IS_ERR(rt))
1277                 return PTR_ERR(rt);
1278 
1279         new_saddr = fl4->saddr;
1280 
1281         if (new_saddr == old_saddr) {
1282                 sk_setup_caps(sk, &rt->dst);
1283                 return 0;
1284         }
1285 
1286         err = inet_bhash2_update_saddr(sk, &new_saddr, AF_INET);
1287         if (err) {
1288                 ip_rt_put(rt);
1289                 return err;
1290         }
1291 
1292         sk_setup_caps(sk, &rt->dst);
1293 
1294         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) {
1295                 pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
1296                         __func__, &old_saddr, &new_saddr);
1297         }
1298 
1299         /*
1300          * XXX The only one ugly spot where we need to
1301          * XXX really change the sockets identity after
1302          * XXX it has entered the hashes. -DaveM
1303          *
1304          * Besides that, it does not check for connection
1305          * uniqueness. Wait for troubles.
1306          */
1307         return __sk_prot_rehash(sk);
1308 }
1309 
1310 int inet_sk_rebuild_header(struct sock *sk)
1311 {
1312         struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0));
1313         struct inet_sock *inet = inet_sk(sk);
1314         __be32 daddr;
1315         struct ip_options_rcu *inet_opt;
1316         struct flowi4 *fl4;
1317         int err;
1318 
1319         /* Route is OK, nothing to do. */
1320         if (rt)
1321                 return 0;
1322 
1323         /* Reroute. */
1324         rcu_read_lock();
1325         inet_opt = rcu_dereference(inet->inet_opt);
1326         daddr = inet->inet_daddr;
1327         if (inet_opt && inet_opt->opt.srr)
1328                 daddr = inet_opt->opt.faddr;
1329         rcu_read_unlock();
1330         fl4 = &inet->cork.fl.u.ip4;
1331         rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr,
1332                                    inet->inet_dport, inet->inet_sport,
1333                                    sk->sk_protocol, ip_sock_rt_tos(sk),
1334                                    sk->sk_bound_dev_if);
1335         if (!IS_ERR(rt)) {
1336                 err = 0;
1337                 sk_setup_caps(sk, &rt->dst);
1338         } else {
1339                 err = PTR_ERR(rt);
1340 
1341                 /* Routing failed... */
1342                 sk->sk_route_caps = 0;
1343                 /*
1344                  * Other protocols have to map its equivalent state to TCP_SYN_SENT.
1345                  * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
1346                  */
1347                 if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) ||
1348                     sk->sk_state != TCP_SYN_SENT ||
1349                     (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
1350                     (err = inet_sk_reselect_saddr(sk)) != 0)
1351                         WRITE_ONCE(sk->sk_err_soft, -err);
1352         }
1353 
1354         return err;
1355 }
1356 EXPORT_SYMBOL(inet_sk_rebuild_header);
1357 
1358 void inet_sk_set_state(struct sock *sk, int state)
1359 {
1360         trace_inet_sock_set_state(sk, sk->sk_state, state);
1361         sk->sk_state = state;
1362 }
1363 EXPORT_SYMBOL(inet_sk_set_state);
1364 
1365 void inet_sk_state_store(struct sock *sk, int newstate)
1366 {
1367         trace_inet_sock_set_state(sk, sk->sk_state, newstate);
1368         smp_store_release(&sk->sk_state, newstate);
1369 }
1370 
1371 struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1372                                  netdev_features_t features)
1373 {
1374         bool udpfrag = false, fixedid = false, gso_partial, encap;
1375         struct sk_buff *segs = ERR_PTR(-EINVAL);
1376         const struct net_offload *ops;
1377         unsigned int offset = 0;
1378         struct iphdr *iph;
1379         int proto, tot_len;
1380         int nhoff;
1381         int ihl;
1382         int id;
1383 
1384         skb_reset_network_header(skb);
1385         nhoff = skb_network_header(skb) - skb_mac_header(skb);
1386         if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
1387                 goto out;
1388 
1389         iph = ip_hdr(skb);
1390         ihl = iph->ihl * 4;
1391         if (ihl < sizeof(*iph))
1392                 goto out;
1393 
1394         id = ntohs(iph->id);
1395         proto = iph->protocol;
1396 
1397         /* Warning: after this point, iph might be no longer valid */
1398         if (unlikely(!pskb_may_pull(skb, ihl)))
1399                 goto out;
1400         __skb_pull(skb, ihl);
1401 
1402         encap = SKB_GSO_CB(skb)->encap_level > 0;
1403         if (encap)
1404                 features &= skb->dev->hw_enc_features;
1405         SKB_GSO_CB(skb)->encap_level += ihl;
1406 
1407         skb_reset_transport_header(skb);
1408 
1409         segs = ERR_PTR(-EPROTONOSUPPORT);
1410 
1411         if (!skb->encapsulation || encap) {
1412                 udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
1413                 fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
1414 
1415                 /* fixed ID is invalid if DF bit is not set */
1416                 if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF)))
1417                         goto out;
1418         }
1419 
1420         ops = rcu_dereference(inet_offloads[proto]);
1421         if (likely(ops && ops->callbacks.gso_segment)) {
1422                 segs = ops->callbacks.gso_segment(skb, features);
1423                 if (!segs)
1424                         skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
1425         }
1426 
1427         if (IS_ERR_OR_NULL(segs))
1428                 goto out;
1429 
1430         gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL);
1431 
1432         skb = segs;
1433         do {
1434                 iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
1435                 if (udpfrag) {
1436                         iph->frag_off = htons(offset >> 3);
1437                         if (skb->next)
1438                                 iph->frag_off |= htons(IP_MF);
1439                         offset += skb->len - nhoff - ihl;
1440                         tot_len = skb->len - nhoff;
1441                 } else if (skb_is_gso(skb)) {
1442                         if (!fixedid) {
1443                                 iph->id = htons(id);
1444                                 id += skb_shinfo(skb)->gso_segs;
1445                         }
1446 
1447                         if (gso_partial)
1448                                 tot_len = skb_shinfo(skb)->gso_size +
1449                                           SKB_GSO_CB(skb)->data_offset +
1450                                           skb->head - (unsigned char *)iph;
1451                         else
1452                                 tot_len = skb->len - nhoff;
1453                 } else {
1454                         if (!fixedid)
1455                                 iph->id = htons(id++);
1456                         tot_len = skb->len - nhoff;
1457                 }
1458                 iph->tot_len = htons(tot_len);
1459                 ip_send_check(iph);
1460                 if (encap)
1461                         skb_reset_inner_headers(skb);
1462                 skb->network_header = (u8 *)iph - skb->head;
1463                 skb_reset_mac_len(skb);
1464         } while ((skb = skb->next));
1465 
1466 out:
1467         return segs;
1468 }
1469 
1470 static struct sk_buff *ipip_gso_segment(struct sk_buff *skb,
1471                                         netdev_features_t features)
1472 {
1473         if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4))
1474                 return ERR_PTR(-EINVAL);
1475 
1476         return inet_gso_segment(skb, features);
1477 }
1478 
1479 struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
1480 {
1481         const struct net_offload *ops;
1482         struct sk_buff *pp = NULL;
1483         const struct iphdr *iph;
1484         struct sk_buff *p;
1485         unsigned int hlen;
1486         unsigned int off;
1487         int flush = 1;
1488         int proto;
1489 
1490         off = skb_gro_offset(skb);
1491         hlen = off + sizeof(*iph);
1492         iph = skb_gro_header(skb, hlen, off);
1493         if (unlikely(!iph))
1494                 goto out;
1495 
1496         proto = iph->protocol;
1497 
1498         ops = rcu_dereference(inet_offloads[proto]);
1499         if (!ops || !ops->callbacks.gro_receive)
1500                 goto out;
1501 
1502         if (*(u8 *)iph != 0x45)
1503                 goto out;
1504 
1505         if (ip_is_fragment(iph))
1506                 goto out;
1507 
1508         if (unlikely(ip_fast_csum((u8 *)iph, 5)))
1509                 goto out;
1510 
1511         NAPI_GRO_CB(skb)->proto = proto;
1512         flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (ntohl(*(__be32 *)&iph->id) & ~IP_DF));
1513 
1514         list_for_each_entry(p, head, list) {
1515                 struct iphdr *iph2;
1516 
1517                 if (!NAPI_GRO_CB(p)->same_flow)
1518                         continue;
1519 
1520                 iph2 = (struct iphdr *)(p->data + off);
1521                 /* The above works because, with the exception of the top
1522                  * (inner most) layer, we only aggregate pkts with the same
1523                  * hdr length so all the hdrs we'll need to verify will start
1524                  * at the same offset.
1525                  */
1526                 if ((iph->protocol ^ iph2->protocol) |
1527                     ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
1528                     ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
1529                         NAPI_GRO_CB(p)->same_flow = 0;
1530                         continue;
1531                 }
1532         }
1533 
1534         NAPI_GRO_CB(skb)->flush |= flush;
1535         NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = off;
1536 
1537         /* Note : No need to call skb_gro_postpull_rcsum() here,
1538          * as we already checked checksum over ipv4 header was 0
1539          */
1540         skb_gro_pull(skb, sizeof(*iph));
1541         skb_set_transport_header(skb, skb_gro_offset(skb));
1542 
1543         pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive,
1544                                        ops->callbacks.gro_receive, head, skb);
1545 
1546 out:
1547         skb_gro_flush_final(skb, pp, flush);
1548 
1549         return pp;
1550 }
1551 
1552 static struct sk_buff *ipip_gro_receive(struct list_head *head,
1553                                         struct sk_buff *skb)
1554 {
1555         if (NAPI_GRO_CB(skb)->encap_mark) {
1556                 NAPI_GRO_CB(skb)->flush = 1;
1557                 return NULL;
1558         }
1559 
1560         NAPI_GRO_CB(skb)->encap_mark = 1;
1561 
1562         return inet_gro_receive(head, skb);
1563 }
1564 
1565 #define SECONDS_PER_DAY 86400
1566 
1567 /* inet_current_timestamp - Return IP network timestamp
1568  *
1569  * Return milliseconds since midnight in network byte order.
1570  */
1571 __be32 inet_current_timestamp(void)
1572 {
1573         u32 secs;
1574         u32 msecs;
1575         struct timespec64 ts;
1576 
1577         ktime_get_real_ts64(&ts);
1578 
1579         /* Get secs since midnight. */
1580         (void)div_u64_rem(ts.tv_sec, SECONDS_PER_DAY, &secs);
1581         /* Convert to msecs. */
1582         msecs = secs * MSEC_PER_SEC;
1583         /* Convert nsec to msec. */
1584         msecs += (u32)ts.tv_nsec / NSEC_PER_MSEC;
1585 
1586         /* Convert to network byte order. */
1587         return htonl(msecs);
1588 }
1589 EXPORT_SYMBOL(inet_current_timestamp);
1590 
1591 int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
1592 {
1593         unsigned int family = READ_ONCE(sk->sk_family);
1594 
1595         if (family == AF_INET)
1596                 return ip_recv_error(sk, msg, len, addr_len);
1597 #if IS_ENABLED(CONFIG_IPV6)
1598         if (family == AF_INET6)
1599                 return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len);
1600 #endif
1601         return -EINVAL;
1602 }
1603 EXPORT_SYMBOL(inet_recv_error);
1604 
1605 int inet_gro_complete(struct sk_buff *skb, int nhoff)
1606 {
1607         struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
1608         const struct net_offload *ops;
1609         __be16 totlen = iph->tot_len;
1610         int proto = iph->protocol;
1611         int err = -ENOSYS;
1612 
1613         if (skb->encapsulation) {
1614                 skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP));
1615                 skb_set_inner_network_header(skb, nhoff);
1616         }
1617 
1618         iph_set_totlen(iph, skb->len - nhoff);
1619         csum_replace2(&iph->check, totlen, iph->tot_len);
1620 
1621         ops = rcu_dereference(inet_offloads[proto]);
1622         if (WARN_ON(!ops || !ops->callbacks.gro_complete))
1623                 goto out;
1624 
1625         /* Only need to add sizeof(*iph) to get to the next hdr below
1626          * because any hdr with option will have been flushed in
1627          * inet_gro_receive().
1628          */
1629         err = INDIRECT_CALL_2(ops->callbacks.gro_complete,
1630                               tcp4_gro_complete, udp4_gro_complete,
1631                               skb, nhoff + sizeof(*iph));
1632 
1633 out:
1634         return err;
1635 }
1636 
1637 static int ipip_gro_complete(struct sk_buff *skb, int nhoff)
1638 {
1639         skb->encapsulation = 1;
1640         skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4;
1641         return inet_gro_complete(skb, nhoff);
1642 }
1643 
1644 int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1645                          unsigned short type, unsigned char protocol,
1646                          struct net *net)
1647 {
1648         struct socket *sock;
1649         int rc = sock_create_kern(net, family, type, protocol, &sock);
1650 
1651         if (rc == 0) {
1652                 *sk = sock->sk;
1653                 (*sk)->sk_allocation = GFP_ATOMIC;
1654                 (*sk)->sk_use_task_frag = false;
1655                 /*
1656                  * Unhash it so that IP input processing does not even see it,
1657                  * we do not wish this socket to see incoming packets.
1658                  */
1659                 (*sk)->sk_prot->unhash(*sk);
1660         }
1661         return rc;
1662 }
1663 EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
1664 
1665 unsigned long snmp_fold_field(void __percpu *mib, int offt)
1666 {
1667         unsigned long res = 0;
1668         int i;
1669 
1670         for_each_possible_cpu(i)
1671                 res += snmp_get_cpu_field(mib, i, offt);
1672         return res;
1673 }
1674 EXPORT_SYMBOL_GPL(snmp_fold_field);
1675 
1676 #if BITS_PER_LONG==32
1677 
1678 u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offt,
1679                          size_t syncp_offset)
1680 {
1681         void *bhptr;
1682         struct u64_stats_sync *syncp;
1683         u64 v;
1684         unsigned int start;
1685 
1686         bhptr = per_cpu_ptr(mib, cpu);
1687         syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
1688         do {
1689                 start = u64_stats_fetch_begin(syncp);
1690                 v = *(((u64 *)bhptr) + offt);
1691         } while (u64_stats_fetch_retry(syncp, start));
1692 
1693         return v;
1694 }
1695 EXPORT_SYMBOL_GPL(snmp_get_cpu_field64);
1696 
1697 u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
1698 {
1699         u64 res = 0;
1700         int cpu;
1701 
1702         for_each_possible_cpu(cpu) {
1703                 res += snmp_get_cpu_field64(mib, cpu, offt, syncp_offset);
1704         }
1705         return res;
1706 }
1707 EXPORT_SYMBOL_GPL(snmp_fold_field64);
1708 #endif
1709 
1710 #ifdef CONFIG_IP_MULTICAST
1711 static const struct net_protocol igmp_protocol = {
1712         .handler =      igmp_rcv,
1713 };
1714 #endif
1715 
1716 static const struct net_protocol icmp_protocol = {
1717         .handler =      icmp_rcv,
1718         .err_handler =  icmp_err,
1719         .no_policy =    1,
1720 };
1721 
1722 static __net_init int ipv4_mib_init_net(struct net *net)
1723 {
1724         int i;
1725 
1726         net->mib.tcp_statistics = alloc_percpu(struct tcp_mib);
1727         if (!net->mib.tcp_statistics)
1728                 goto err_tcp_mib;
1729         net->mib.ip_statistics = alloc_percpu(struct ipstats_mib);
1730         if (!net->mib.ip_statistics)
1731                 goto err_ip_mib;
1732 
1733         for_each_possible_cpu(i) {
1734                 struct ipstats_mib *af_inet_stats;
1735                 af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i);
1736                 u64_stats_init(&af_inet_stats->syncp);
1737         }
1738 
1739         net->mib.net_statistics = alloc_percpu(struct linux_mib);
1740         if (!net->mib.net_statistics)
1741                 goto err_net_mib;
1742         net->mib.udp_statistics = alloc_percpu(struct udp_mib);
1743         if (!net->mib.udp_statistics)
1744                 goto err_udp_mib;
1745         net->mib.udplite_statistics = alloc_percpu(struct udp_mib);
1746         if (!net->mib.udplite_statistics)
1747                 goto err_udplite_mib;
1748         net->mib.icmp_statistics = alloc_percpu(struct icmp_mib);
1749         if (!net->mib.icmp_statistics)
1750                 goto err_icmp_mib;
1751         net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
1752                                               GFP_KERNEL);
1753         if (!net->mib.icmpmsg_statistics)
1754                 goto err_icmpmsg_mib;
1755 
1756         tcp_mib_init(net);
1757         return 0;
1758 
1759 err_icmpmsg_mib:
1760         free_percpu(net->mib.icmp_statistics);
1761 err_icmp_mib:
1762         free_percpu(net->mib.udplite_statistics);
1763 err_udplite_mib:
1764         free_percpu(net->mib.udp_statistics);
1765 err_udp_mib:
1766         free_percpu(net->mib.net_statistics);
1767 err_net_mib:
1768         free_percpu(net->mib.ip_statistics);
1769 err_ip_mib:
1770         free_percpu(net->mib.tcp_statistics);
1771 err_tcp_mib:
1772         return -ENOMEM;
1773 }
1774 
1775 static __net_exit void ipv4_mib_exit_net(struct net *net)
1776 {
1777         kfree(net->mib.icmpmsg_statistics);
1778         free_percpu(net->mib.icmp_statistics);
1779         free_percpu(net->mib.udplite_statistics);
1780         free_percpu(net->mib.udp_statistics);
1781         free_percpu(net->mib.net_statistics);
1782         free_percpu(net->mib.ip_statistics);
1783         free_percpu(net->mib.tcp_statistics);
1784 #ifdef CONFIG_MPTCP
1785         /* allocated on demand, see mptcp_init_sock() */
1786         free_percpu(net->mib.mptcp_statistics);
1787 #endif
1788 }
1789 
1790 static __net_initdata struct pernet_operations ipv4_mib_ops = {
1791         .init = ipv4_mib_init_net,
1792         .exit = ipv4_mib_exit_net,
1793 };
1794 
1795 static int __init init_ipv4_mibs(void)
1796 {
1797         return register_pernet_subsys(&ipv4_mib_ops);
1798 }
1799 
1800 static __net_init int inet_init_net(struct net *net)
1801 {
1802         /*
1803          * Set defaults for local port range
1804          */
1805         net->ipv4.ip_local_ports.range = 60999u << 16 | 32768u;
1806 
1807         seqlock_init(&net->ipv4.ping_group_range.lock);
1808         /*
1809          * Sane defaults - nobody may create ping sockets.
1810          * Boot scripts should set this to distro-specific group.
1811          */
1812         net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1);
1813         net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0);
1814 
1815         /* Default values for sysctl-controlled parameters.
1816          * We set them here, in case sysctl is not compiled.
1817          */
1818         net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
1819         net->ipv4.sysctl_ip_fwd_update_priority = 1;
1820         net->ipv4.sysctl_ip_dynaddr = 0;
1821         net->ipv4.sysctl_ip_early_demux = 1;
1822         net->ipv4.sysctl_udp_early_demux = 1;
1823         net->ipv4.sysctl_tcp_early_demux = 1;
1824         net->ipv4.sysctl_nexthop_compat_mode = 1;
1825 #ifdef CONFIG_SYSCTL
1826         net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
1827 #endif
1828 
1829         /* Some igmp sysctl, whose values are always used */
1830         net->ipv4.sysctl_igmp_max_memberships = 20;
1831         net->ipv4.sysctl_igmp_max_msf = 10;
1832         /* IGMP reports for link-local multicast groups are enabled by default */
1833         net->ipv4.sysctl_igmp_llm_reports = 1;
1834         net->ipv4.sysctl_igmp_qrv = 2;
1835 
1836         net->ipv4.sysctl_fib_notify_on_flag_change = 0;
1837 
1838         return 0;
1839 }
1840 
1841 static __net_initdata struct pernet_operations af_inet_ops = {
1842         .init = inet_init_net,
1843 };
1844 
1845 static int __init init_inet_pernet_ops(void)
1846 {
1847         return register_pernet_subsys(&af_inet_ops);
1848 }
1849 
1850 static int ipv4_proc_init(void);
1851 
1852 /*
1853  *      IP protocol layer initialiser
1854  */
1855 
1856 
1857 static const struct net_offload ipip_offload = {
1858         .callbacks = {
1859                 .gso_segment    = ipip_gso_segment,
1860                 .gro_receive    = ipip_gro_receive,
1861                 .gro_complete   = ipip_gro_complete,
1862         },
1863 };
1864 
1865 static int __init ipip_offload_init(void)
1866 {
1867         return inet_add_offload(&ipip_offload, IPPROTO_IPIP);
1868 }
1869 
1870 static int __init ipv4_offload_init(void)
1871 {
1872         /*
1873          * Add offloads
1874          */
1875         if (udpv4_offload_init() < 0)
1876                 pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
1877         if (tcpv4_offload_init() < 0)
1878                 pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
1879         if (ipip_offload_init() < 0)
1880                 pr_crit("%s: Cannot add IPIP protocol offload\n", __func__);
1881 
1882         net_hotdata.ip_packet_offload = (struct packet_offload) {
1883                 .type = cpu_to_be16(ETH_P_IP),
1884                 .callbacks = {
1885                         .gso_segment = inet_gso_segment,
1886                         .gro_receive = inet_gro_receive,
1887                         .gro_complete = inet_gro_complete,
1888                 },
1889         };
1890         dev_add_offload(&net_hotdata.ip_packet_offload);
1891         return 0;
1892 }
1893 
1894 fs_initcall(ipv4_offload_init);
1895 
1896 static struct packet_type ip_packet_type __read_mostly = {
1897         .type = cpu_to_be16(ETH_P_IP),
1898         .func = ip_rcv,
1899         .list_func = ip_list_rcv,
1900 };
1901 
1902 static int __init inet_init(void)
1903 {
1904         struct inet_protosw *q;
1905         struct list_head *r;
1906         int rc;
1907 
1908         sock_skb_cb_check_size(sizeof(struct inet_skb_parm));
1909 
1910         raw_hashinfo_init(&raw_v4_hashinfo);
1911 
1912         rc = proto_register(&tcp_prot, 1);
1913         if (rc)
1914                 goto out;
1915 
1916         rc = proto_register(&udp_prot, 1);
1917         if (rc)
1918                 goto out_unregister_tcp_proto;
1919 
1920         rc = proto_register(&raw_prot, 1);
1921         if (rc)
1922                 goto out_unregister_udp_proto;
1923 
1924         rc = proto_register(&ping_prot, 1);
1925         if (rc)
1926                 goto out_unregister_raw_proto;
1927 
1928         /*
1929          *      Tell SOCKET that we are alive...
1930          */
1931 
1932         (void)sock_register(&inet_family_ops);
1933 
1934 #ifdef CONFIG_SYSCTL
1935         ip_static_sysctl_init();
1936 #endif
1937 
1938         /*
1939          *      Add all the base protocols.
1940          */
1941 
1942         if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
1943                 pr_crit("%s: Cannot add ICMP protocol\n", __func__);
1944 
1945         net_hotdata.udp_protocol = (struct net_protocol) {
1946                 .handler =      udp_rcv,
1947                 .err_handler =  udp_err,
1948                 .no_policy =    1,
1949         };
1950         if (inet_add_protocol(&net_hotdata.udp_protocol, IPPROTO_UDP) < 0)
1951                 pr_crit("%s: Cannot add UDP protocol\n", __func__);
1952 
1953         net_hotdata.tcp_protocol = (struct net_protocol) {
1954                 .handler        =       tcp_v4_rcv,
1955                 .err_handler    =       tcp_v4_err,
1956                 .no_policy      =       1,
1957                 .icmp_strict_tag_validation = 1,
1958         };
1959         if (inet_add_protocol(&net_hotdata.tcp_protocol, IPPROTO_TCP) < 0)
1960                 pr_crit("%s: Cannot add TCP protocol\n", __func__);
1961 #ifdef CONFIG_IP_MULTICAST
1962         if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
1963                 pr_crit("%s: Cannot add IGMP protocol\n", __func__);
1964 #endif
1965 
1966         /* Register the socket-side information for inet_create. */
1967         for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
1968                 INIT_LIST_HEAD(r);
1969 
1970         for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
1971                 inet_register_protosw(q);
1972 
1973         /*
1974          *      Set the ARP module up
1975          */
1976 
1977         arp_init();
1978 
1979         /*
1980          *      Set the IP module up
1981          */
1982 
1983         ip_init();
1984 
1985         /* Initialise per-cpu ipv4 mibs */
1986         if (init_ipv4_mibs())
1987                 panic("%s: Cannot init ipv4 mibs\n", __func__);
1988 
1989         /* Setup TCP slab cache for open requests. */
1990         tcp_init();
1991 
1992         /* Setup UDP memory threshold */
1993         udp_init();
1994 
1995         /* Add UDP-Lite (RFC 3828) */
1996         udplite4_register();
1997 
1998         raw_init();
1999 
2000         ping_init();
2001 
2002         /*
2003          *      Set the ICMP layer up
2004          */
2005 
2006         if (icmp_init() < 0)
2007                 panic("Failed to create the ICMP control socket.\n");
2008 
2009         /*
2010          *      Initialise the multicast router
2011          */
2012 #if defined(CONFIG_IP_MROUTE)
2013         if (ip_mr_init())
2014                 pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
2015 #endif
2016 
2017         if (init_inet_pernet_ops())
2018                 pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
2019 
2020         ipv4_proc_init();
2021 
2022         ipfrag_init();
2023 
2024         dev_add_pack(&ip_packet_type);
2025 
2026         ip_tunnel_core_init();
2027 
2028         rc = 0;
2029 out:
2030         return rc;
2031 out_unregister_raw_proto:
2032         proto_unregister(&raw_prot);
2033 out_unregister_udp_proto:
2034         proto_unregister(&udp_prot);
2035 out_unregister_tcp_proto:
2036         proto_unregister(&tcp_prot);
2037         goto out;
2038 }
2039 
2040 fs_initcall(inet_init);
2041 
2042 /* ------------------------------------------------------------------------ */
2043 
2044 #ifdef CONFIG_PROC_FS
2045 static int __init ipv4_proc_init(void)
2046 {
2047         int rc = 0;
2048 
2049         if (raw_proc_init())
2050                 goto out_raw;
2051         if (tcp4_proc_init())
2052                 goto out_tcp;
2053         if (udp4_proc_init())
2054                 goto out_udp;
2055         if (ping_proc_init())
2056                 goto out_ping;
2057         if (ip_misc_proc_init())
2058                 goto out_misc;
2059 out:
2060         return rc;
2061 out_misc:
2062         ping_proc_exit();
2063 out_ping:
2064         udp4_proc_exit();
2065 out_udp:
2066         tcp4_proc_exit();
2067 out_tcp:
2068         raw_proc_exit();
2069 out_raw:
2070         rc = -ENOMEM;
2071         goto out;
2072 }
2073 
2074 #else /* CONFIG_PROC_FS */
2075 static int __init ipv4_proc_init(void)
2076 {
2077         return 0;
2078 }
2079 #endif /* CONFIG_PROC_FS */
2080
~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.
TOMOYO Linux Cross Reference Linux/net/ipv4/af_inet.c

TOMOYO Linux Cross Reference
Linux/net/ipv4/af_inet.c