~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/core/sock.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /*
  3  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  4  *              operating system.  INET is implemented using the  BSD Socket
  5  *              interface as the means of communication with the user level.
  6  *
  7  *              Generic socket support routines. Memory allocators, socket lock/release
  8  *              handler for protocols to use and generic option handler.
  9  *
 10  * Authors:     Ross Biro
 11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 12  *              Florian La Roche, <flla@stud.uni-sb.de>
 13  *              Alan Cox, <A.Cox@swansea.ac.uk>
 14  *
 15  * Fixes:
 16  *              Alan Cox        :       Numerous verify_area() problems
 17  *              Alan Cox        :       Connecting on a connecting socket
 18  *                                      now returns an error for tcp.
 19  *              Alan Cox        :       sock->protocol is set correctly.
 20  *                                      and is not sometimes left as 0.
 21  *              Alan Cox        :       connect handles icmp errors on a
 22  *                                      connect properly. Unfortunately there
 23  *                                      is a restart syscall nasty there. I
 24  *                                      can't match BSD without hacking the C
 25  *                                      library. Ideas urgently sought!
 26  *              Alan Cox        :       Disallow bind() to addresses that are
 27  *                                      not ours - especially broadcast ones!!
 28  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
 29  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
 30  *                                      instead they leave that for the DESTROY timer.
 31  *              Alan Cox        :       Clean up error flag in accept
 32  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
 33  *                                      was buggy. Put a remove_sock() in the handler
 34  *                                      for memory when we hit 0. Also altered the timer
 35  *                                      code. The ACK stuff can wait and needs major
 36  *                                      TCP layer surgery.
 37  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
 38  *                                      and fixed timer/inet_bh race.
 39  *              Alan Cox        :       Added zapped flag for TCP
 40  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
 41  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
 42  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
 43  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
 44  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
 45  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
 46  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
 47  *      Pauline Middelink       :       identd support
 48  *              Alan Cox        :       Fixed connect() taking signals I think.
 49  *              Alan Cox        :       SO_LINGER supported
 50  *              Alan Cox        :       Error reporting fixes
 51  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
 52  *              Alan Cox        :       inet sockets don't set sk->type!
 53  *              Alan Cox        :       Split socket option code
 54  *              Alan Cox        :       Callbacks
 55  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
 56  *              Alex            :       Removed restriction on inet fioctl
 57  *              Alan Cox        :       Splitting INET from NET core
 58  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
 59  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
 60  *              Alan Cox        :       Split IP from generic code
 61  *              Alan Cox        :       New kfree_skbmem()
 62  *              Alan Cox        :       Make SO_DEBUG superuser only.
 63  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
 64  *                                      (compatibility fix)
 65  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
 66  *              Alan Cox        :       Allocator for a socket is settable.
 67  *              Alan Cox        :       SO_ERROR includes soft errors.
 68  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
 69  *              Alan Cox        :       Generic socket allocation to make hooks
 70  *                                      easier (suggested by Craig Metz).
 71  *              Michael Pall    :       SO_ERROR returns positive errno again
 72  *              Steve Whitehouse:       Added default destructor to free
 73  *                                      protocol private data.
 74  *              Steve Whitehouse:       Added various other default routines
 75  *                                      common to several socket families.
 76  *              Chris Evans     :       Call suser() check last on F_SETOWN
 77  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
 78  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
 79  *              Andi Kleen      :       Fix write_space callback
 80  *              Chris Evans     :       Security fixes - signedness again
 81  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
 82  *
 83  * To Fix:
 84  */
 85 
 86 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 87 
 88 #include <asm/unaligned.h>
 89 #include <linux/capability.h>
 90 #include <linux/errno.h>
 91 #include <linux/errqueue.h>
 92 #include <linux/types.h>
 93 #include <linux/socket.h>
 94 #include <linux/in.h>
 95 #include <linux/kernel.h>
 96 #include <linux/module.h>
 97 #include <linux/proc_fs.h>
 98 #include <linux/seq_file.h>
 99 #include <linux/sched.h>
100 #include <linux/sched/mm.h>
101 #include <linux/timer.h>
102 #include <linux/string.h>
103 #include <linux/sockios.h>
104 #include <linux/net.h>
105 #include <linux/mm.h>
106 #include <linux/slab.h>
107 #include <linux/interrupt.h>
108 #include <linux/poll.h>
109 #include <linux/tcp.h>
110 #include <linux/udp.h>
111 #include <linux/init.h>
112 #include <linux/highmem.h>
113 #include <linux/user_namespace.h>
114 #include <linux/static_key.h>
115 #include <linux/memcontrol.h>
116 #include <linux/prefetch.h>
117 #include <linux/compat.h>
118 #include <linux/mroute.h>
119 #include <linux/mroute6.h>
120 #include <linux/icmpv6.h>
121 
122 #include <linux/uaccess.h>
123 
124 #include <linux/netdevice.h>
125 #include <net/protocol.h>
126 #include <linux/skbuff.h>
127 #include <net/net_namespace.h>
128 #include <net/request_sock.h>
129 #include <net/sock.h>
130 #include <net/proto_memory.h>
131 #include <linux/net_tstamp.h>
132 #include <net/xfrm.h>
133 #include <linux/ipsec.h>
134 #include <net/cls_cgroup.h>
135 #include <net/netprio_cgroup.h>
136 #include <linux/sock_diag.h>
137 
138 #include <linux/filter.h>
139 #include <net/sock_reuseport.h>
140 #include <net/bpf_sk_storage.h>
141 
142 #include <trace/events/sock.h>
143 
144 #include <net/tcp.h>
145 #include <net/busy_poll.h>
146 #include <net/phonet/phonet.h>
147 
148 #include <linux/ethtool.h>
149 
150 #include "dev.h"
151 
152 static DEFINE_MUTEX(proto_list_mutex);
153 static LIST_HEAD(proto_list);
154 
155 static void sock_def_write_space_wfree(struct sock *sk);
156 static void sock_def_write_space(struct sock *sk);
157 
158 /**
159  * sk_ns_capable - General socket capability test
160  * @sk: Socket to use a capability on or through
161  * @user_ns: The user namespace of the capability to use
162  * @cap: The capability to use
163  *
164  * Test to see if the opener of the socket had when the socket was
165  * created and the current process has the capability @cap in the user
166  * namespace @user_ns.
167  */
168 bool sk_ns_capable(const struct sock *sk,
169                    struct user_namespace *user_ns, int cap)
170 {
171         return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
172                 ns_capable(user_ns, cap);
173 }
174 EXPORT_SYMBOL(sk_ns_capable);
175 
176 /**
177  * sk_capable - Socket global capability test
178  * @sk: Socket to use a capability on or through
179  * @cap: The global capability to use
180  *
181  * Test to see if the opener of the socket had when the socket was
182  * created and the current process has the capability @cap in all user
183  * namespaces.
184  */
185 bool sk_capable(const struct sock *sk, int cap)
186 {
187         return sk_ns_capable(sk, &init_user_ns, cap);
188 }
189 EXPORT_SYMBOL(sk_capable);
190 
191 /**
192  * sk_net_capable - Network namespace socket capability test
193  * @sk: Socket to use a capability on or through
194  * @cap: The capability to use
195  *
196  * Test to see if the opener of the socket had when the socket was created
197  * and the current process has the capability @cap over the network namespace
198  * the socket is a member of.
199  */
200 bool sk_net_capable(const struct sock *sk, int cap)
201 {
202         return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
203 }
204 EXPORT_SYMBOL(sk_net_capable);
205 
206 /*
207  * Each address family might have different locking rules, so we have
208  * one slock key per address family and separate keys for internal and
209  * userspace sockets.
210  */
211 static struct lock_class_key af_family_keys[AF_MAX];
212 static struct lock_class_key af_family_kern_keys[AF_MAX];
213 static struct lock_class_key af_family_slock_keys[AF_MAX];
214 static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
215 
216 /*
217  * Make lock validator output more readable. (we pre-construct these
218  * strings build-time, so that runtime initialization of socket
219  * locks is fast):
220  */
221 
222 #define _sock_locks(x)                                            \
223   x "AF_UNSPEC",        x "AF_UNIX"     ,       x "AF_INET"     , \
224   x "AF_AX25"  ,        x "AF_IPX"      ,       x "AF_APPLETALK", \
225   x "AF_NETROM",        x "AF_BRIDGE"   ,       x "AF_ATMPVC"   , \
226   x "AF_X25"   ,        x "AF_INET6"    ,       x "AF_ROSE"     , \
227   x "AF_DECnet",        x "AF_NETBEUI"  ,       x "AF_SECURITY" , \
228   x "AF_KEY"   ,        x "AF_NETLINK"  ,       x "AF_PACKET"   , \
229   x "AF_ASH"   ,        x "AF_ECONET"   ,       x "AF_ATMSVC"   , \
230   x "AF_RDS"   ,        x "AF_SNA"      ,       x "AF_IRDA"     , \
231   x "AF_PPPOX" ,        x "AF_WANPIPE"  ,       x "AF_LLC"      , \
232   x "27"       ,        x "28"          ,       x "AF_CAN"      , \
233   x "AF_TIPC"  ,        x "AF_BLUETOOTH",       x "IUCV"        , \
234   x "AF_RXRPC" ,        x "AF_ISDN"     ,       x "AF_PHONET"   , \
235   x "AF_IEEE802154",    x "AF_CAIF"     ,       x "AF_ALG"      , \
236   x "AF_NFC"   ,        x "AF_VSOCK"    ,       x "AF_KCM"      , \
237   x "AF_QIPCRTR",       x "AF_SMC"      ,       x "AF_XDP"      , \
238   x "AF_MCTP"  , \
239   x "AF_MAX"
240 
241 static const char *const af_family_key_strings[AF_MAX+1] = {
242         _sock_locks("sk_lock-")
243 };
244 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
245         _sock_locks("slock-")
246 };
247 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
248         _sock_locks("clock-")
249 };
250 
251 static const char *const af_family_kern_key_strings[AF_MAX+1] = {
252         _sock_locks("k-sk_lock-")
253 };
254 static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
255         _sock_locks("k-slock-")
256 };
257 static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
258         _sock_locks("k-clock-")
259 };
260 static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
261         _sock_locks("rlock-")
262 };
263 static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
264         _sock_locks("wlock-")
265 };
266 static const char *const af_family_elock_key_strings[AF_MAX+1] = {
267         _sock_locks("elock-")
268 };
269 
270 /*
271  * sk_callback_lock and sk queues locking rules are per-address-family,
272  * so split the lock classes by using a per-AF key:
273  */
274 static struct lock_class_key af_callback_keys[AF_MAX];
275 static struct lock_class_key af_rlock_keys[AF_MAX];
276 static struct lock_class_key af_wlock_keys[AF_MAX];
277 static struct lock_class_key af_elock_keys[AF_MAX];
278 static struct lock_class_key af_kern_callback_keys[AF_MAX];
279 
280 /* Run time adjustable parameters. */
281 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
282 EXPORT_SYMBOL(sysctl_wmem_max);
283 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
284 EXPORT_SYMBOL(sysctl_rmem_max);
285 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
286 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
287 
288 int sysctl_tstamp_allow_data __read_mostly = 1;
289 
290 DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
291 EXPORT_SYMBOL_GPL(memalloc_socks_key);
292 
293 /**
294  * sk_set_memalloc - sets %SOCK_MEMALLOC
295  * @sk: socket to set it on
296  *
297  * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
298  * It's the responsibility of the admin to adjust min_free_kbytes
299  * to meet the requirements
300  */
301 void sk_set_memalloc(struct sock *sk)
302 {
303         sock_set_flag(sk, SOCK_MEMALLOC);
304         sk->sk_allocation |= __GFP_MEMALLOC;
305         static_branch_inc(&memalloc_socks_key);
306 }
307 EXPORT_SYMBOL_GPL(sk_set_memalloc);
308 
309 void sk_clear_memalloc(struct sock *sk)
310 {
311         sock_reset_flag(sk, SOCK_MEMALLOC);
312         sk->sk_allocation &= ~__GFP_MEMALLOC;
313         static_branch_dec(&memalloc_socks_key);
314 
315         /*
316          * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
317          * progress of swapping. SOCK_MEMALLOC may be cleared while
318          * it has rmem allocations due to the last swapfile being deactivated
319          * but there is a risk that the socket is unusable due to exceeding
320          * the rmem limits. Reclaim the reserves and obey rmem limits again.
321          */
322         sk_mem_reclaim(sk);
323 }
324 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
325 
326 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
327 {
328         int ret;
329         unsigned int noreclaim_flag;
330 
331         /* these should have been dropped before queueing */
332         BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
333 
334         noreclaim_flag = memalloc_noreclaim_save();
335         ret = INDIRECT_CALL_INET(sk->sk_backlog_rcv,
336                                  tcp_v6_do_rcv,
337                                  tcp_v4_do_rcv,
338                                  sk, skb);
339         memalloc_noreclaim_restore(noreclaim_flag);
340 
341         return ret;
342 }
343 EXPORT_SYMBOL(__sk_backlog_rcv);
344 
345 void sk_error_report(struct sock *sk)
346 {
347         sk->sk_error_report(sk);
348 
349         switch (sk->sk_family) {
350         case AF_INET:
351                 fallthrough;
352         case AF_INET6:
353                 trace_inet_sk_error_report(sk);
354                 break;
355         default:
356                 break;
357         }
358 }
359 EXPORT_SYMBOL(sk_error_report);
360 
361 int sock_get_timeout(long timeo, void *optval, bool old_timeval)
362 {
363         struct __kernel_sock_timeval tv;
364 
365         if (timeo == MAX_SCHEDULE_TIMEOUT) {
366                 tv.tv_sec = 0;
367                 tv.tv_usec = 0;
368         } else {
369                 tv.tv_sec = timeo / HZ;
370                 tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
371         }
372 
373         if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
374                 struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
375                 *(struct old_timeval32 *)optval = tv32;
376                 return sizeof(tv32);
377         }
378 
379         if (old_timeval) {
380                 struct __kernel_old_timeval old_tv;
381                 old_tv.tv_sec = tv.tv_sec;
382                 old_tv.tv_usec = tv.tv_usec;
383                 *(struct __kernel_old_timeval *)optval = old_tv;
384                 return sizeof(old_tv);
385         }
386 
387         *(struct __kernel_sock_timeval *)optval = tv;
388         return sizeof(tv);
389 }
390 EXPORT_SYMBOL(sock_get_timeout);
391 
392 int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
393                            sockptr_t optval, int optlen, bool old_timeval)
394 {
395         if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
396                 struct old_timeval32 tv32;
397 
398                 if (optlen < sizeof(tv32))
399                         return -EINVAL;
400 
401                 if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
402                         return -EFAULT;
403                 tv->tv_sec = tv32.tv_sec;
404                 tv->tv_usec = tv32.tv_usec;
405         } else if (old_timeval) {
406                 struct __kernel_old_timeval old_tv;
407 
408                 if (optlen < sizeof(old_tv))
409                         return -EINVAL;
410                 if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
411                         return -EFAULT;
412                 tv->tv_sec = old_tv.tv_sec;
413                 tv->tv_usec = old_tv.tv_usec;
414         } else {
415                 if (optlen < sizeof(*tv))
416                         return -EINVAL;
417                 if (copy_from_sockptr(tv, optval, sizeof(*tv)))
418                         return -EFAULT;
419         }
420 
421         return 0;
422 }
423 EXPORT_SYMBOL(sock_copy_user_timeval);
424 
425 static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
426                             bool old_timeval)
427 {
428         struct __kernel_sock_timeval tv;
429         int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
430         long val;
431 
432         if (err)
433                 return err;
434 
435         if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
436                 return -EDOM;
437 
438         if (tv.tv_sec < 0) {
439                 static int warned __read_mostly;
440 
441                 WRITE_ONCE(*timeo_p, 0);
442                 if (warned < 10 && net_ratelimit()) {
443                         warned++;
444                         pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
445                                 __func__, current->comm, task_pid_nr(current));
446                 }
447                 return 0;
448         }
449         val = MAX_SCHEDULE_TIMEOUT;
450         if ((tv.tv_sec || tv.tv_usec) &&
451             (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)))
452                 val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec,
453                                                     USEC_PER_SEC / HZ);
454         WRITE_ONCE(*timeo_p, val);
455         return 0;
456 }
457 
458 static bool sock_needs_netstamp(const struct sock *sk)
459 {
460         switch (sk->sk_family) {
461         case AF_UNSPEC:
462         case AF_UNIX:
463                 return false;
464         default:
465                 return true;
466         }
467 }
468 
469 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
470 {
471         if (sk->sk_flags & flags) {
472                 sk->sk_flags &= ~flags;
473                 if (sock_needs_netstamp(sk) &&
474                     !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
475                         net_disable_timestamp();
476         }
477 }
478 
479 
480 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
481 {
482         unsigned long flags;
483         struct sk_buff_head *list = &sk->sk_receive_queue;
484 
485         if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
486                 atomic_inc(&sk->sk_drops);
487                 trace_sock_rcvqueue_full(sk, skb);
488                 return -ENOMEM;
489         }
490 
491         if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
492                 atomic_inc(&sk->sk_drops);
493                 return -ENOBUFS;
494         }
495 
496         skb->dev = NULL;
497         skb_set_owner_r(skb, sk);
498 
499         /* we escape from rcu protected region, make sure we dont leak
500          * a norefcounted dst
501          */
502         skb_dst_force(skb);
503 
504         spin_lock_irqsave(&list->lock, flags);
505         sock_skb_set_dropcount(sk, skb);
506         __skb_queue_tail(list, skb);
507         spin_unlock_irqrestore(&list->lock, flags);
508 
509         if (!sock_flag(sk, SOCK_DEAD))
510                 sk->sk_data_ready(sk);
511         return 0;
512 }
513 EXPORT_SYMBOL(__sock_queue_rcv_skb);
514 
515 int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
516                               enum skb_drop_reason *reason)
517 {
518         enum skb_drop_reason drop_reason;
519         int err;
520 
521         err = sk_filter(sk, skb);
522         if (err) {
523                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
524                 goto out;
525         }
526         err = __sock_queue_rcv_skb(sk, skb);
527         switch (err) {
528         case -ENOMEM:
529                 drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
530                 break;
531         case -ENOBUFS:
532                 drop_reason = SKB_DROP_REASON_PROTO_MEM;
533                 break;
534         default:
535                 drop_reason = SKB_NOT_DROPPED_YET;
536                 break;
537         }
538 out:
539         if (reason)
540                 *reason = drop_reason;
541         return err;
542 }
543 EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
544 
545 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
546                      const int nested, unsigned int trim_cap, bool refcounted)
547 {
548         int rc = NET_RX_SUCCESS;
549 
550         if (sk_filter_trim_cap(sk, skb, trim_cap))
551                 goto discard_and_relse;
552 
553         skb->dev = NULL;
554 
555         if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
556                 atomic_inc(&sk->sk_drops);
557                 goto discard_and_relse;
558         }
559         if (nested)
560                 bh_lock_sock_nested(sk);
561         else
562                 bh_lock_sock(sk);
563         if (!sock_owned_by_user(sk)) {
564                 /*
565                  * trylock + unlock semantics:
566                  */
567                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
568 
569                 rc = sk_backlog_rcv(sk, skb);
570 
571                 mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
572         } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
573                 bh_unlock_sock(sk);
574                 atomic_inc(&sk->sk_drops);
575                 goto discard_and_relse;
576         }
577 
578         bh_unlock_sock(sk);
579 out:
580         if (refcounted)
581                 sock_put(sk);
582         return rc;
583 discard_and_relse:
584         kfree_skb(skb);
585         goto out;
586 }
587 EXPORT_SYMBOL(__sk_receive_skb);
588 
589 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
590                                                           u32));
591 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
592                                                            u32));
593 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
594 {
595         struct dst_entry *dst = __sk_dst_get(sk);
596 
597         if (dst && dst->obsolete &&
598             INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
599                                dst, cookie) == NULL) {
600                 sk_tx_queue_clear(sk);
601                 WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
602                 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
603                 dst_release(dst);
604                 return NULL;
605         }
606 
607         return dst;
608 }
609 EXPORT_SYMBOL(__sk_dst_check);
610 
611 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
612 {
613         struct dst_entry *dst = sk_dst_get(sk);
614 
615         if (dst && dst->obsolete &&
616             INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
617                                dst, cookie) == NULL) {
618                 sk_dst_reset(sk);
619                 dst_release(dst);
620                 return NULL;
621         }
622 
623         return dst;
624 }
625 EXPORT_SYMBOL(sk_dst_check);
626 
627 static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
628 {
629         int ret = -ENOPROTOOPT;
630 #ifdef CONFIG_NETDEVICES
631         struct net *net = sock_net(sk);
632 
633         /* Sorry... */
634         ret = -EPERM;
635         if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW))
636                 goto out;
637 
638         ret = -EINVAL;
639         if (ifindex < 0)
640                 goto out;
641 
642         /* Paired with all READ_ONCE() done locklessly. */
643         WRITE_ONCE(sk->sk_bound_dev_if, ifindex);
644 
645         if (sk->sk_prot->rehash)
646                 sk->sk_prot->rehash(sk);
647         sk_dst_reset(sk);
648 
649         ret = 0;
650 
651 out:
652 #endif
653 
654         return ret;
655 }
656 
657 int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
658 {
659         int ret;
660 
661         if (lock_sk)
662                 lock_sock(sk);
663         ret = sock_bindtoindex_locked(sk, ifindex);
664         if (lock_sk)
665                 release_sock(sk);
666 
667         return ret;
668 }
669 EXPORT_SYMBOL(sock_bindtoindex);
670 
671 static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
672 {
673         int ret = -ENOPROTOOPT;
674 #ifdef CONFIG_NETDEVICES
675         struct net *net = sock_net(sk);
676         char devname[IFNAMSIZ];
677         int index;
678 
679         ret = -EINVAL;
680         if (optlen < 0)
681                 goto out;
682 
683         /* Bind this socket to a particular device like "eth0",
684          * as specified in the passed interface name. If the
685          * name is "" or the option length is zero the socket
686          * is not bound.
687          */
688         if (optlen > IFNAMSIZ - 1)
689                 optlen = IFNAMSIZ - 1;
690         memset(devname, 0, sizeof(devname));
691 
692         ret = -EFAULT;
693         if (copy_from_sockptr(devname, optval, optlen))
694                 goto out;
695 
696         index = 0;
697         if (devname[0] != '\0') {
698                 struct net_device *dev;
699 
700                 rcu_read_lock();
701                 dev = dev_get_by_name_rcu(net, devname);
702                 if (dev)
703                         index = dev->ifindex;
704                 rcu_read_unlock();
705                 ret = -ENODEV;
706                 if (!dev)
707                         goto out;
708         }
709 
710         sockopt_lock_sock(sk);
711         ret = sock_bindtoindex_locked(sk, index);
712         sockopt_release_sock(sk);
713 out:
714 #endif
715 
716         return ret;
717 }
718 
719 static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
720                                 sockptr_t optlen, int len)
721 {
722         int ret = -ENOPROTOOPT;
723 #ifdef CONFIG_NETDEVICES
724         int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
725         struct net *net = sock_net(sk);
726         char devname[IFNAMSIZ];
727 
728         if (bound_dev_if == 0) {
729                 len = 0;
730                 goto zero;
731         }
732 
733         ret = -EINVAL;
734         if (len < IFNAMSIZ)
735                 goto out;
736 
737         ret = netdev_get_name(net, devname, bound_dev_if);
738         if (ret)
739                 goto out;
740 
741         len = strlen(devname) + 1;
742 
743         ret = -EFAULT;
744         if (copy_to_sockptr(optval, devname, len))
745                 goto out;
746 
747 zero:
748         ret = -EFAULT;
749         if (copy_to_sockptr(optlen, &len, sizeof(int)))
750                 goto out;
751 
752         ret = 0;
753 
754 out:
755 #endif
756 
757         return ret;
758 }
759 
760 bool sk_mc_loop(const struct sock *sk)
761 {
762         if (dev_recursion_level())
763                 return false;
764         if (!sk)
765                 return true;
766         /* IPV6_ADDRFORM can change sk->sk_family under us. */
767         switch (READ_ONCE(sk->sk_family)) {
768         case AF_INET:
769                 return inet_test_bit(MC_LOOP, sk);
770 #if IS_ENABLED(CONFIG_IPV6)
771         case AF_INET6:
772                 return inet6_test_bit(MC6_LOOP, sk);
773 #endif
774         }
775         WARN_ON_ONCE(1);
776         return true;
777 }
778 EXPORT_SYMBOL(sk_mc_loop);
779 
780 void sock_set_reuseaddr(struct sock *sk)
781 {
782         lock_sock(sk);
783         sk->sk_reuse = SK_CAN_REUSE;
784         release_sock(sk);
785 }
786 EXPORT_SYMBOL(sock_set_reuseaddr);
787 
788 void sock_set_reuseport(struct sock *sk)
789 {
790         lock_sock(sk);
791         sk->sk_reuseport = true;
792         release_sock(sk);
793 }
794 EXPORT_SYMBOL(sock_set_reuseport);
795 
796 void sock_no_linger(struct sock *sk)
797 {
798         lock_sock(sk);
799         WRITE_ONCE(sk->sk_lingertime, 0);
800         sock_set_flag(sk, SOCK_LINGER);
801         release_sock(sk);
802 }
803 EXPORT_SYMBOL(sock_no_linger);
804 
805 void sock_set_priority(struct sock *sk, u32 priority)
806 {
807         WRITE_ONCE(sk->sk_priority, priority);
808 }
809 EXPORT_SYMBOL(sock_set_priority);
810 
811 void sock_set_sndtimeo(struct sock *sk, s64 secs)
812 {
813         lock_sock(sk);
814         if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
815                 WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
816         else
817                 WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
818         release_sock(sk);
819 }
820 EXPORT_SYMBOL(sock_set_sndtimeo);
821 
822 static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
823 {
824         if (val)  {
825                 sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
826                 sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
827                 sock_set_flag(sk, SOCK_RCVTSTAMP);
828                 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
829         } else {
830                 sock_reset_flag(sk, SOCK_RCVTSTAMP);
831                 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
832         }
833 }
834 
835 void sock_enable_timestamps(struct sock *sk)
836 {
837         lock_sock(sk);
838         __sock_set_timestamps(sk, true, false, true);
839         release_sock(sk);
840 }
841 EXPORT_SYMBOL(sock_enable_timestamps);
842 
843 void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
844 {
845         switch (optname) {
846         case SO_TIMESTAMP_OLD:
847                 __sock_set_timestamps(sk, valbool, false, false);
848                 break;
849         case SO_TIMESTAMP_NEW:
850                 __sock_set_timestamps(sk, valbool, true, false);
851                 break;
852         case SO_TIMESTAMPNS_OLD:
853                 __sock_set_timestamps(sk, valbool, false, true);
854                 break;
855         case SO_TIMESTAMPNS_NEW:
856                 __sock_set_timestamps(sk, valbool, true, true);
857                 break;
858         }
859 }
860 
861 static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
862 {
863         struct net *net = sock_net(sk);
864         struct net_device *dev = NULL;
865         bool match = false;
866         int *vclock_index;
867         int i, num;
868 
869         if (sk->sk_bound_dev_if)
870                 dev = dev_get_by_index(net, sk->sk_bound_dev_if);
871 
872         if (!dev) {
873                 pr_err("%s: sock not bind to device\n", __func__);
874                 return -EOPNOTSUPP;
875         }
876 
877         num = ethtool_get_phc_vclocks(dev, &vclock_index);
878         dev_put(dev);
879 
880         for (i = 0; i < num; i++) {
881                 if (*(vclock_index + i) == phc_index) {
882                         match = true;
883                         break;
884                 }
885         }
886 
887         if (num > 0)
888                 kfree(vclock_index);
889 
890         if (!match)
891                 return -EINVAL;
892 
893         WRITE_ONCE(sk->sk_bind_phc, phc_index);
894 
895         return 0;
896 }
897 
898 int sock_set_timestamping(struct sock *sk, int optname,
899                           struct so_timestamping timestamping)
900 {
901         int val = timestamping.flags;
902         int ret;
903 
904         if (val & ~SOF_TIMESTAMPING_MASK)
905                 return -EINVAL;
906 
907         if (val & SOF_TIMESTAMPING_OPT_ID_TCP &&
908             !(val & SOF_TIMESTAMPING_OPT_ID))
909                 return -EINVAL;
910 
911         if (val & SOF_TIMESTAMPING_OPT_ID &&
912             !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
913                 if (sk_is_tcp(sk)) {
914                         if ((1 << sk->sk_state) &
915                             (TCPF_CLOSE | TCPF_LISTEN))
916                                 return -EINVAL;
917                         if (val & SOF_TIMESTAMPING_OPT_ID_TCP)
918                                 atomic_set(&sk->sk_tskey, tcp_sk(sk)->write_seq);
919                         else
920                                 atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
921                 } else {
922                         atomic_set(&sk->sk_tskey, 0);
923                 }
924         }
925 
926         if (val & SOF_TIMESTAMPING_OPT_STATS &&
927             !(val & SOF_TIMESTAMPING_OPT_TSONLY))
928                 return -EINVAL;
929 
930         if (val & SOF_TIMESTAMPING_BIND_PHC) {
931                 ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc);
932                 if (ret)
933                         return ret;
934         }
935 
936         WRITE_ONCE(sk->sk_tsflags, val);
937         sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
938 
939         if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
940                 sock_enable_timestamp(sk,
941                                       SOCK_TIMESTAMPING_RX_SOFTWARE);
942         else
943                 sock_disable_timestamp(sk,
944                                        (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
945         return 0;
946 }
947 
948 void sock_set_keepalive(struct sock *sk)
949 {
950         lock_sock(sk);
951         if (sk->sk_prot->keepalive)
952                 sk->sk_prot->keepalive(sk, true);
953         sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
954         release_sock(sk);
955 }
956 EXPORT_SYMBOL(sock_set_keepalive);
957 
958 static void __sock_set_rcvbuf(struct sock *sk, int val)
959 {
960         /* Ensure val * 2 fits into an int, to prevent max_t() from treating it
961          * as a negative value.
962          */
963         val = min_t(int, val, INT_MAX / 2);
964         sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
965 
966         /* We double it on the way in to account for "struct sk_buff" etc.
967          * overhead.   Applications assume that the SO_RCVBUF setting they make
968          * will allow that much actual data to be received on that socket.
969          *
970          * Applications are unaware that "struct sk_buff" and other overheads
971          * allocate from the receive buffer during socket buffer allocation.
972          *
973          * And after considering the possible alternatives, returning the value
974          * we actually used in getsockopt is the most desirable behavior.
975          */
976         WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
977 }
978 
979 void sock_set_rcvbuf(struct sock *sk, int val)
980 {
981         lock_sock(sk);
982         __sock_set_rcvbuf(sk, val);
983         release_sock(sk);
984 }
985 EXPORT_SYMBOL(sock_set_rcvbuf);
986 
987 static void __sock_set_mark(struct sock *sk, u32 val)
988 {
989         if (val != sk->sk_mark) {
990                 WRITE_ONCE(sk->sk_mark, val);
991                 sk_dst_reset(sk);
992         }
993 }
994 
995 void sock_set_mark(struct sock *sk, u32 val)
996 {
997         lock_sock(sk);
998         __sock_set_mark(sk, val);
999         release_sock(sk);
1000 }
1001 EXPORT_SYMBOL(sock_set_mark);
1002 
1003 static void sock_release_reserved_memory(struct sock *sk, int bytes)
1004 {
1005         /* Round down bytes to multiple of pages */
1006         bytes = round_down(bytes, PAGE_SIZE);
1007 
1008         WARN_ON(bytes > sk->sk_reserved_mem);
1009         WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
1010         sk_mem_reclaim(sk);
1011 }
1012 
1013 static int sock_reserve_memory(struct sock *sk, int bytes)
1014 {
1015         long allocated;
1016         bool charged;
1017         int pages;
1018 
1019         if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk))
1020                 return -EOPNOTSUPP;
1021 
1022         if (!bytes)
1023                 return 0;
1024 
1025         pages = sk_mem_pages(bytes);
1026 
1027         /* pre-charge to memcg */
1028         charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages,
1029                                           GFP_KERNEL | __GFP_RETRY_MAYFAIL);
1030         if (!charged)
1031                 return -ENOMEM;
1032 
1033         /* pre-charge to forward_alloc */
1034         sk_memory_allocated_add(sk, pages);
1035         allocated = sk_memory_allocated(sk);
1036         /* If the system goes into memory pressure with this
1037          * precharge, give up and return error.
1038          */
1039         if (allocated > sk_prot_mem_limits(sk, 1)) {
1040                 sk_memory_allocated_sub(sk, pages);
1041                 mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
1042                 return -ENOMEM;
1043         }
1044         sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
1045 
1046         WRITE_ONCE(sk->sk_reserved_mem,
1047                    sk->sk_reserved_mem + (pages << PAGE_SHIFT));
1048 
1049         return 0;
1050 }
1051 
1052 void sockopt_lock_sock(struct sock *sk)
1053 {
1054         /* When current->bpf_ctx is set, the setsockopt is called from
1055          * a bpf prog.  bpf has ensured the sk lock has been
1056          * acquired before calling setsockopt().
1057          */
1058         if (has_current_bpf_ctx())
1059                 return;
1060 
1061         lock_sock(sk);
1062 }
1063 EXPORT_SYMBOL(sockopt_lock_sock);
1064 
1065 void sockopt_release_sock(struct sock *sk)
1066 {
1067         if (has_current_bpf_ctx())
1068                 return;
1069 
1070         release_sock(sk);
1071 }
1072 EXPORT_SYMBOL(sockopt_release_sock);
1073 
1074 bool sockopt_ns_capable(struct user_namespace *ns, int cap)
1075 {
1076         return has_current_bpf_ctx() || ns_capable(ns, cap);
1077 }
1078 EXPORT_SYMBOL(sockopt_ns_capable);
1079 
1080 bool sockopt_capable(int cap)
1081 {
1082         return has_current_bpf_ctx() || capable(cap);
1083 }
1084 EXPORT_SYMBOL(sockopt_capable);
1085 
1086 static int sockopt_validate_clockid(__kernel_clockid_t value)
1087 {
1088         switch (value) {
1089         case CLOCK_REALTIME:
1090         case CLOCK_MONOTONIC:
1091         case CLOCK_TAI:
1092                 return 0;
1093         }
1094         return -EINVAL;
1095 }
1096 
1097 /*
1098  *      This is meant for all protocols to use and covers goings on
1099  *      at the socket level. Everything here is generic.
1100  */
1101 
1102 int sk_setsockopt(struct sock *sk, int level, int optname,
1103                   sockptr_t optval, unsigned int optlen)
1104 {
1105         struct so_timestamping timestamping;
1106         struct socket *sock = sk->sk_socket;
1107         struct sock_txtime sk_txtime;
1108         int val;
1109         int valbool;
1110         struct linger ling;
1111         int ret = 0;
1112 
1113         /*
1114          *      Options without arguments
1115          */
1116 
1117         if (optname == SO_BINDTODEVICE)
1118                 return sock_setbindtodevice(sk, optval, optlen);
1119 
1120         if (optlen < sizeof(int))
1121                 return -EINVAL;
1122 
1123         if (copy_from_sockptr(&val, optval, sizeof(val)))
1124                 return -EFAULT;
1125 
1126         valbool = val ? 1 : 0;
1127 
1128         /* handle options which do not require locking the socket. */
1129         switch (optname) {
1130         case SO_PRIORITY:
1131                 if ((val >= 0 && val <= 6) ||
1132                     sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
1133                     sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1134                         sock_set_priority(sk, val);
1135                         return 0;
1136                 }
1137                 return -EPERM;
1138         case SO_PASSSEC:
1139                 assign_bit(SOCK_PASSSEC, &sock->flags, valbool);
1140                 return 0;
1141         case SO_PASSCRED:
1142                 assign_bit(SOCK_PASSCRED, &sock->flags, valbool);
1143                 return 0;
1144         case SO_PASSPIDFD:
1145                 assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool);
1146                 return 0;
1147         case SO_TYPE:
1148         case SO_PROTOCOL:
1149         case SO_DOMAIN:
1150         case SO_ERROR:
1151                 return -ENOPROTOOPT;
1152 #ifdef CONFIG_NET_RX_BUSY_POLL
1153         case SO_BUSY_POLL:
1154                 if (val < 0)
1155                         return -EINVAL;
1156                 WRITE_ONCE(sk->sk_ll_usec, val);
1157                 return 0;
1158         case SO_PREFER_BUSY_POLL:
1159                 if (valbool && !sockopt_capable(CAP_NET_ADMIN))
1160                         return -EPERM;
1161                 WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
1162                 return 0;
1163         case SO_BUSY_POLL_BUDGET:
1164                 if (val > READ_ONCE(sk->sk_busy_poll_budget) &&
1165                     !sockopt_capable(CAP_NET_ADMIN))
1166                         return -EPERM;
1167                 if (val < 0 || val > U16_MAX)
1168                         return -EINVAL;
1169                 WRITE_ONCE(sk->sk_busy_poll_budget, val);
1170                 return 0;
1171 #endif
1172         case SO_MAX_PACING_RATE:
1173                 {
1174                 unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
1175                 unsigned long pacing_rate;
1176 
1177                 if (sizeof(ulval) != sizeof(val) &&
1178                     optlen >= sizeof(ulval) &&
1179                     copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
1180                         return -EFAULT;
1181                 }
1182                 if (ulval != ~0UL)
1183                         cmpxchg(&sk->sk_pacing_status,
1184                                 SK_PACING_NONE,
1185                                 SK_PACING_NEEDED);
1186                 /* Pairs with READ_ONCE() from sk_getsockopt() */
1187                 WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
1188                 pacing_rate = READ_ONCE(sk->sk_pacing_rate);
1189                 if (ulval < pacing_rate)
1190                         WRITE_ONCE(sk->sk_pacing_rate, ulval);
1191                 return 0;
1192                 }
1193         case SO_TXREHASH:
1194                 if (val < -1 || val > 1)
1195                         return -EINVAL;
1196                 if ((u8)val == SOCK_TXREHASH_DEFAULT)
1197                         val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
1198                 /* Paired with READ_ONCE() in tcp_rtx_synack()
1199                  * and sk_getsockopt().
1200                  */
1201                 WRITE_ONCE(sk->sk_txrehash, (u8)val);
1202                 return 0;
1203         case SO_PEEK_OFF:
1204                 {
1205                 int (*set_peek_off)(struct sock *sk, int val);
1206 
1207                 set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
1208                 if (set_peek_off)
1209                         ret = set_peek_off(sk, val);
1210                 else
1211                         ret = -EOPNOTSUPP;
1212                 return ret;
1213                 }
1214         }
1215 
1216         sockopt_lock_sock(sk);
1217 
1218         switch (optname) {
1219         case SO_DEBUG:
1220                 if (val && !sockopt_capable(CAP_NET_ADMIN))
1221                         ret = -EACCES;
1222                 else
1223                         sock_valbool_flag(sk, SOCK_DBG, valbool);
1224                 break;
1225         case SO_REUSEADDR:
1226                 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
1227                 break;
1228         case SO_REUSEPORT:
1229                 sk->sk_reuseport = valbool;
1230                 break;
1231         case SO_DONTROUTE:
1232                 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
1233                 sk_dst_reset(sk);
1234                 break;
1235         case SO_BROADCAST:
1236                 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
1237                 break;
1238         case SO_SNDBUF:
1239                 /* Don't error on this BSD doesn't and if you think
1240                  * about it this is right. Otherwise apps have to
1241                  * play 'guess the biggest size' games. RCVBUF/SNDBUF
1242                  * are treated in BSD as hints
1243                  */
1244                 val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
1245 set_sndbuf:
1246                 /* Ensure val * 2 fits into an int, to prevent max_t()
1247                  * from treating it as a negative value.
1248                  */
1249                 val = min_t(int, val, INT_MAX / 2);
1250                 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1251                 WRITE_ONCE(sk->sk_sndbuf,
1252                            max_t(int, val * 2, SOCK_MIN_SNDBUF));
1253                 /* Wake up sending tasks if we upped the value. */
1254                 sk->sk_write_space(sk);
1255                 break;
1256 
1257         case SO_SNDBUFFORCE:
1258                 if (!sockopt_capable(CAP_NET_ADMIN)) {
1259                         ret = -EPERM;
1260                         break;
1261                 }
1262 
1263                 /* No negative values (to prevent underflow, as val will be
1264                  * multiplied by 2).
1265                  */
1266                 if (val < 0)
1267                         val = 0;
1268                 goto set_sndbuf;
1269 
1270         case SO_RCVBUF:
1271                 /* Don't error on this BSD doesn't and if you think
1272                  * about it this is right. Otherwise apps have to
1273                  * play 'guess the biggest size' games. RCVBUF/SNDBUF
1274                  * are treated in BSD as hints
1275                  */
1276                 __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
1277                 break;
1278 
1279         case SO_RCVBUFFORCE:
1280                 if (!sockopt_capable(CAP_NET_ADMIN)) {
1281                         ret = -EPERM;
1282                         break;
1283                 }
1284 
1285                 /* No negative values (to prevent underflow, as val will be
1286                  * multiplied by 2).
1287                  */
1288                 __sock_set_rcvbuf(sk, max(val, 0));
1289                 break;
1290 
1291         case SO_KEEPALIVE:
1292                 if (sk->sk_prot->keepalive)
1293                         sk->sk_prot->keepalive(sk, valbool);
1294                 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
1295                 break;
1296 
1297         case SO_OOBINLINE:
1298                 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
1299                 break;
1300 
1301         case SO_NO_CHECK:
1302                 sk->sk_no_check_tx = valbool;
1303                 break;
1304 
1305         case SO_LINGER:
1306                 if (optlen < sizeof(ling)) {
1307                         ret = -EINVAL;  /* 1003.1g */
1308                         break;
1309                 }
1310                 if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
1311                         ret = -EFAULT;
1312                         break;
1313                 }
1314                 if (!ling.l_onoff) {
1315                         sock_reset_flag(sk, SOCK_LINGER);
1316                 } else {
1317                         unsigned long t_sec = ling.l_linger;
1318 
1319                         if (t_sec >= MAX_SCHEDULE_TIMEOUT / HZ)
1320                                 WRITE_ONCE(sk->sk_lingertime, MAX_SCHEDULE_TIMEOUT);
1321                         else
1322                                 WRITE_ONCE(sk->sk_lingertime, t_sec * HZ);
1323                         sock_set_flag(sk, SOCK_LINGER);
1324                 }
1325                 break;
1326 
1327         case SO_BSDCOMPAT:
1328                 break;
1329 
1330         case SO_TIMESTAMP_OLD:
1331         case SO_TIMESTAMP_NEW:
1332         case SO_TIMESTAMPNS_OLD:
1333         case SO_TIMESTAMPNS_NEW:
1334                 sock_set_timestamp(sk, optname, valbool);
1335                 break;
1336 
1337         case SO_TIMESTAMPING_NEW:
1338         case SO_TIMESTAMPING_OLD:
1339                 if (optlen == sizeof(timestamping)) {
1340                         if (copy_from_sockptr(&timestamping, optval,
1341                                               sizeof(timestamping))) {
1342                                 ret = -EFAULT;
1343                                 break;
1344                         }
1345                 } else {
1346                         memset(&timestamping, 0, sizeof(timestamping));
1347                         timestamping.flags = val;
1348                 }
1349                 ret = sock_set_timestamping(sk, optname, timestamping);
1350                 break;
1351 
1352         case SO_RCVLOWAT:
1353                 {
1354                 int (*set_rcvlowat)(struct sock *sk, int val) = NULL;
1355 
1356                 if (val < 0)
1357                         val = INT_MAX;
1358                 if (sock)
1359                         set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat;
1360                 if (set_rcvlowat)
1361                         ret = set_rcvlowat(sk, val);
1362                 else
1363                         WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
1364                 break;
1365                 }
1366         case SO_RCVTIMEO_OLD:
1367         case SO_RCVTIMEO_NEW:
1368                 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
1369                                        optlen, optname == SO_RCVTIMEO_OLD);
1370                 break;
1371 
1372         case SO_SNDTIMEO_OLD:
1373         case SO_SNDTIMEO_NEW:
1374                 ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
1375                                        optlen, optname == SO_SNDTIMEO_OLD);
1376                 break;
1377 
1378         case SO_ATTACH_FILTER: {
1379                 struct sock_fprog fprog;
1380 
1381                 ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1382                 if (!ret)
1383                         ret = sk_attach_filter(&fprog, sk);
1384                 break;
1385         }
1386         case SO_ATTACH_BPF:
1387                 ret = -EINVAL;
1388                 if (optlen == sizeof(u32)) {
1389                         u32 ufd;
1390 
1391                         ret = -EFAULT;
1392                         if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1393                                 break;
1394 
1395                         ret = sk_attach_bpf(ufd, sk);
1396                 }
1397                 break;
1398 
1399         case SO_ATTACH_REUSEPORT_CBPF: {
1400                 struct sock_fprog fprog;
1401 
1402                 ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1403                 if (!ret)
1404                         ret = sk_reuseport_attach_filter(&fprog, sk);
1405                 break;
1406         }
1407         case SO_ATTACH_REUSEPORT_EBPF:
1408                 ret = -EINVAL;
1409                 if (optlen == sizeof(u32)) {
1410                         u32 ufd;
1411 
1412                         ret = -EFAULT;
1413                         if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1414                                 break;
1415 
1416                         ret = sk_reuseport_attach_bpf(ufd, sk);
1417                 }
1418                 break;
1419 
1420         case SO_DETACH_REUSEPORT_BPF:
1421                 ret = reuseport_detach_prog(sk);
1422                 break;
1423 
1424         case SO_DETACH_FILTER:
1425                 ret = sk_detach_filter(sk);
1426                 break;
1427 
1428         case SO_LOCK_FILTER:
1429                 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
1430                         ret = -EPERM;
1431                 else
1432                         sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
1433                 break;
1434 
1435         case SO_MARK:
1436                 if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1437                     !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1438                         ret = -EPERM;
1439                         break;
1440                 }
1441 
1442                 __sock_set_mark(sk, val);
1443                 break;
1444         case SO_RCVMARK:
1445                 sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
1446                 break;
1447 
1448         case SO_RXQ_OVFL:
1449                 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
1450                 break;
1451 
1452         case SO_WIFI_STATUS:
1453                 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1454                 break;
1455 
1456         case SO_NOFCS:
1457                 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1458                 break;
1459 
1460         case SO_SELECT_ERR_QUEUE:
1461                 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1462                 break;
1463 
1464 
1465         case SO_INCOMING_CPU:
1466                 reuseport_update_incoming_cpu(sk, val);
1467                 break;
1468 
1469         case SO_CNX_ADVICE:
1470                 if (val == 1)
1471                         dst_negative_advice(sk);
1472                 break;
1473 
1474         case SO_ZEROCOPY:
1475                 if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
1476                         if (!(sk_is_tcp(sk) ||
1477                               (sk->sk_type == SOCK_DGRAM &&
1478                                sk->sk_protocol == IPPROTO_UDP)))
1479                                 ret = -EOPNOTSUPP;
1480                 } else if (sk->sk_family != PF_RDS) {
1481                         ret = -EOPNOTSUPP;
1482                 }
1483                 if (!ret) {
1484                         if (val < 0 || val > 1)
1485                                 ret = -EINVAL;
1486                         else
1487                                 sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
1488                 }
1489                 break;
1490 
1491         case SO_TXTIME:
1492                 if (optlen != sizeof(struct sock_txtime)) {
1493                         ret = -EINVAL;
1494                         break;
1495                 } else if (copy_from_sockptr(&sk_txtime, optval,
1496                            sizeof(struct sock_txtime))) {
1497                         ret = -EFAULT;
1498                         break;
1499                 } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
1500                         ret = -EINVAL;
1501                         break;
1502                 }
1503                 /* CLOCK_MONOTONIC is only used by sch_fq, and this packet
1504                  * scheduler has enough safe guards.
1505                  */
1506                 if (sk_txtime.clockid != CLOCK_MONOTONIC &&
1507                     !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1508                         ret = -EPERM;
1509                         break;
1510                 }
1511 
1512                 ret = sockopt_validate_clockid(sk_txtime.clockid);
1513                 if (ret)
1514                         break;
1515 
1516                 sock_valbool_flag(sk, SOCK_TXTIME, true);
1517                 sk->sk_clockid = sk_txtime.clockid;
1518                 sk->sk_txtime_deadline_mode =
1519                         !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
1520                 sk->sk_txtime_report_errors =
1521                         !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
1522                 break;
1523 
1524         case SO_BINDTOIFINDEX:
1525                 ret = sock_bindtoindex_locked(sk, val);
1526                 break;
1527 
1528         case SO_BUF_LOCK:
1529                 if (val & ~SOCK_BUF_LOCK_MASK) {
1530                         ret = -EINVAL;
1531                         break;
1532                 }
1533                 sk->sk_userlocks = val | (sk->sk_userlocks &
1534                                           ~SOCK_BUF_LOCK_MASK);
1535                 break;
1536 
1537         case SO_RESERVE_MEM:
1538         {
1539                 int delta;
1540 
1541                 if (val < 0) {
1542                         ret = -EINVAL;
1543                         break;
1544                 }
1545 
1546                 delta = val - sk->sk_reserved_mem;
1547                 if (delta < 0)
1548                         sock_release_reserved_memory(sk, -delta);
1549                 else
1550                         ret = sock_reserve_memory(sk, delta);
1551                 break;
1552         }
1553 
1554         default:
1555                 ret = -ENOPROTOOPT;
1556                 break;
1557         }
1558         sockopt_release_sock(sk);
1559         return ret;
1560 }
1561 
1562 int sock_setsockopt(struct socket *sock, int level, int optname,
1563                     sockptr_t optval, unsigned int optlen)
1564 {
1565         return sk_setsockopt(sock->sk, level, optname,
1566                              optval, optlen);
1567 }
1568 EXPORT_SYMBOL(sock_setsockopt);
1569 
1570 static const struct cred *sk_get_peer_cred(struct sock *sk)
1571 {
1572         const struct cred *cred;
1573 
1574         spin_lock(&sk->sk_peer_lock);
1575         cred = get_cred(sk->sk_peer_cred);
1576         spin_unlock(&sk->sk_peer_lock);
1577 
1578         return cred;
1579 }
1580 
1581 static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1582                           struct ucred *ucred)
1583 {
1584         ucred->pid = pid_vnr(pid);
1585         ucred->uid = ucred->gid = -1;
1586         if (cred) {
1587                 struct user_namespace *current_ns = current_user_ns();
1588 
1589                 ucred->uid = from_kuid_munged(current_ns, cred->euid);
1590                 ucred->gid = from_kgid_munged(current_ns, cred->egid);
1591         }
1592 }
1593 
1594 static int groups_to_user(sockptr_t dst, const struct group_info *src)
1595 {
1596         struct user_namespace *user_ns = current_user_ns();
1597         int i;
1598 
1599         for (i = 0; i < src->ngroups; i++) {
1600                 gid_t gid = from_kgid_munged(user_ns, src->gid[i]);
1601 
1602                 if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
1603                         return -EFAULT;
1604         }
1605 
1606         return 0;
1607 }
1608 
1609 int sk_getsockopt(struct sock *sk, int level, int optname,
1610                   sockptr_t optval, sockptr_t optlen)
1611 {
1612         struct socket *sock = sk->sk_socket;
1613 
1614         union {
1615                 int val;
1616                 u64 val64;
1617                 unsigned long ulval;
1618                 struct linger ling;
1619                 struct old_timeval32 tm32;
1620                 struct __kernel_old_timeval tm;
1621                 struct  __kernel_sock_timeval stm;
1622                 struct sock_txtime txtime;
1623                 struct so_timestamping timestamping;
1624         } v;
1625 
1626         int lv = sizeof(int);
1627         int len;
1628 
1629         if (copy_from_sockptr(&len, optlen, sizeof(int)))
1630                 return -EFAULT;
1631         if (len < 0)
1632                 return -EINVAL;
1633 
1634         memset(&v, 0, sizeof(v));
1635 
1636         switch (optname) {
1637         case SO_DEBUG:
1638                 v.val = sock_flag(sk, SOCK_DBG);
1639                 break;
1640 
1641         case SO_DONTROUTE:
1642                 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1643                 break;
1644 
1645         case SO_BROADCAST:
1646                 v.val = sock_flag(sk, SOCK_BROADCAST);
1647                 break;
1648 
1649         case SO_SNDBUF:
1650                 v.val = READ_ONCE(sk->sk_sndbuf);
1651                 break;
1652 
1653         case SO_RCVBUF:
1654                 v.val = READ_ONCE(sk->sk_rcvbuf);
1655                 break;
1656 
1657         case SO_REUSEADDR:
1658                 v.val = sk->sk_reuse;
1659                 break;
1660 
1661         case SO_REUSEPORT:
1662                 v.val = sk->sk_reuseport;
1663                 break;
1664 
1665         case SO_KEEPALIVE:
1666                 v.val = sock_flag(sk, SOCK_KEEPOPEN);
1667                 break;
1668 
1669         case SO_TYPE:
1670                 v.val = sk->sk_type;
1671                 break;
1672 
1673         case SO_PROTOCOL:
1674                 v.val = sk->sk_protocol;
1675                 break;
1676 
1677         case SO_DOMAIN:
1678                 v.val = sk->sk_family;
1679                 break;
1680 
1681         case SO_ERROR:
1682                 v.val = -sock_error(sk);
1683                 if (v.val == 0)
1684                         v.val = xchg(&sk->sk_err_soft, 0);
1685                 break;
1686 
1687         case SO_OOBINLINE:
1688                 v.val = sock_flag(sk, SOCK_URGINLINE);
1689                 break;
1690 
1691         case SO_NO_CHECK:
1692                 v.val = sk->sk_no_check_tx;
1693                 break;
1694 
1695         case SO_PRIORITY:
1696                 v.val = READ_ONCE(sk->sk_priority);
1697                 break;
1698 
1699         case SO_LINGER:
1700                 lv              = sizeof(v.ling);
1701                 v.ling.l_onoff  = sock_flag(sk, SOCK_LINGER);
1702                 v.ling.l_linger = READ_ONCE(sk->sk_lingertime) / HZ;
1703                 break;
1704 
1705         case SO_BSDCOMPAT:
1706                 break;
1707 
1708         case SO_TIMESTAMP_OLD:
1709                 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1710                                 !sock_flag(sk, SOCK_TSTAMP_NEW) &&
1711                                 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1712                 break;
1713 
1714         case SO_TIMESTAMPNS_OLD:
1715                 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
1716                 break;
1717 
1718         case SO_TIMESTAMP_NEW:
1719                 v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
1720                 break;
1721 
1722         case SO_TIMESTAMPNS_NEW:
1723                 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
1724                 break;
1725 
1726         case SO_TIMESTAMPING_OLD:
1727         case SO_TIMESTAMPING_NEW:
1728                 lv = sizeof(v.timestamping);
1729                 /* For the later-added case SO_TIMESTAMPING_NEW: Be strict about only
1730                  * returning the flags when they were set through the same option.
1731                  * Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
1732                  */
1733                 if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
1734                         v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
1735                         v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
1736                 }
1737                 break;
1738 
1739         case SO_RCVTIMEO_OLD:
1740         case SO_RCVTIMEO_NEW:
1741                 lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v,
1742                                       SO_RCVTIMEO_OLD == optname);
1743                 break;
1744 
1745         case SO_SNDTIMEO_OLD:
1746         case SO_SNDTIMEO_NEW:
1747                 lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v,
1748                                       SO_SNDTIMEO_OLD == optname);
1749                 break;
1750 
1751         case SO_RCVLOWAT:
1752                 v.val = READ_ONCE(sk->sk_rcvlowat);
1753                 break;
1754 
1755         case SO_SNDLOWAT:
1756                 v.val = 1;
1757                 break;
1758 
1759         case SO_PASSCRED:
1760                 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1761                 break;
1762 
1763         case SO_PASSPIDFD:
1764                 v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags);
1765                 break;
1766 
1767         case SO_PEERCRED:
1768         {
1769                 struct ucred peercred;
1770                 if (len > sizeof(peercred))
1771                         len = sizeof(peercred);
1772 
1773                 spin_lock(&sk->sk_peer_lock);
1774                 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1775                 spin_unlock(&sk->sk_peer_lock);
1776 
1777                 if (copy_to_sockptr(optval, &peercred, len))
1778                         return -EFAULT;
1779                 goto lenout;
1780         }
1781 
1782         case SO_PEERPIDFD:
1783         {
1784                 struct pid *peer_pid;
1785                 struct file *pidfd_file = NULL;
1786                 int pidfd;
1787 
1788                 if (len > sizeof(pidfd))
1789                         len = sizeof(pidfd);
1790 
1791                 spin_lock(&sk->sk_peer_lock);
1792                 peer_pid = get_pid(sk->sk_peer_pid);
1793                 spin_unlock(&sk->sk_peer_lock);
1794 
1795                 if (!peer_pid)
1796                         return -ENODATA;
1797 
1798                 pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
1799                 put_pid(peer_pid);
1800                 if (pidfd < 0)
1801                         return pidfd;
1802 
1803                 if (copy_to_sockptr(optval, &pidfd, len) ||
1804                     copy_to_sockptr(optlen, &len, sizeof(int))) {
1805                         put_unused_fd(pidfd);
1806                         fput(pidfd_file);
1807 
1808                         return -EFAULT;
1809                 }
1810 
1811                 fd_install(pidfd, pidfd_file);
1812                 return 0;
1813         }
1814 
1815         case SO_PEERGROUPS:
1816         {
1817                 const struct cred *cred;
1818                 int ret, n;
1819 
1820                 cred = sk_get_peer_cred(sk);
1821                 if (!cred)
1822                         return -ENODATA;
1823 
1824                 n = cred->group_info->ngroups;
1825                 if (len < n * sizeof(gid_t)) {
1826                         len = n * sizeof(gid_t);
1827                         put_cred(cred);
1828                         return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
1829                 }
1830                 len = n * sizeof(gid_t);
1831 
1832                 ret = groups_to_user(optval, cred->group_info);
1833                 put_cred(cred);
1834                 if (ret)
1835                         return ret;
1836                 goto lenout;
1837         }
1838 
1839         case SO_PEERNAME:
1840         {
1841                 struct sockaddr_storage address;
1842 
1843                 lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 2);
1844                 if (lv < 0)
1845                         return -ENOTCONN;
1846                 if (lv < len)
1847                         return -EINVAL;
1848                 if (copy_to_sockptr(optval, &address, len))
1849                         return -EFAULT;
1850                 goto lenout;
1851         }
1852 
1853         /* Dubious BSD thing... Probably nobody even uses it, but
1854          * the UNIX standard wants it for whatever reason... -DaveM
1855          */
1856         case SO_ACCEPTCONN:
1857                 v.val = sk->sk_state == TCP_LISTEN;
1858                 break;
1859 
1860         case SO_PASSSEC:
1861                 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1862                 break;
1863 
1864         case SO_PEERSEC:
1865                 return security_socket_getpeersec_stream(sock,
1866                                                          optval, optlen, len);
1867 
1868         case SO_MARK:
1869                 v.val = READ_ONCE(sk->sk_mark);
1870                 break;
1871 
1872         case SO_RCVMARK:
1873                 v.val = sock_flag(sk, SOCK_RCVMARK);
1874                 break;
1875 
1876         case SO_RXQ_OVFL:
1877                 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1878                 break;
1879 
1880         case SO_WIFI_STATUS:
1881                 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1882                 break;
1883 
1884         case SO_PEEK_OFF:
1885                 if (!READ_ONCE(sock->ops)->set_peek_off)
1886                         return -EOPNOTSUPP;
1887 
1888                 v.val = READ_ONCE(sk->sk_peek_off);
1889                 break;
1890         case SO_NOFCS:
1891                 v.val = sock_flag(sk, SOCK_NOFCS);
1892                 break;
1893 
1894         case SO_BINDTODEVICE:
1895                 return sock_getbindtodevice(sk, optval, optlen, len);
1896 
1897         case SO_GET_FILTER:
1898                 len = sk_get_filter(sk, optval, len);
1899                 if (len < 0)
1900                         return len;
1901 
1902                 goto lenout;
1903 
1904         case SO_LOCK_FILTER:
1905                 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1906                 break;
1907 
1908         case SO_BPF_EXTENSIONS:
1909                 v.val = bpf_tell_extensions();
1910                 break;
1911 
1912         case SO_SELECT_ERR_QUEUE:
1913                 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1914                 break;
1915 
1916 #ifdef CONFIG_NET_RX_BUSY_POLL
1917         case SO_BUSY_POLL:
1918                 v.val = READ_ONCE(sk->sk_ll_usec);
1919                 break;
1920         case SO_PREFER_BUSY_POLL:
1921                 v.val = READ_ONCE(sk->sk_prefer_busy_poll);
1922                 break;
1923 #endif
1924 
1925         case SO_MAX_PACING_RATE:
1926                 /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
1927                 if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
1928                         lv = sizeof(v.ulval);
1929                         v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
1930                 } else {
1931                         /* 32bit version */
1932                         v.val = min_t(unsigned long, ~0U,
1933                                       READ_ONCE(sk->sk_max_pacing_rate));
1934                 }
1935                 break;
1936 
1937         case SO_INCOMING_CPU:
1938                 v.val = READ_ONCE(sk->sk_incoming_cpu);
1939                 break;
1940 
1941         case SO_MEMINFO:
1942         {
1943                 u32 meminfo[SK_MEMINFO_VARS];
1944 
1945                 sk_get_meminfo(sk, meminfo);
1946 
1947                 len = min_t(unsigned int, len, sizeof(meminfo));
1948                 if (copy_to_sockptr(optval, &meminfo, len))
1949                         return -EFAULT;
1950 
1951                 goto lenout;
1952         }
1953 
1954 #ifdef CONFIG_NET_RX_BUSY_POLL
1955         case SO_INCOMING_NAPI_ID:
1956                 v.val = READ_ONCE(sk->sk_napi_id);
1957 
1958                 /* aggregate non-NAPI IDs down to 0 */
1959                 if (v.val < MIN_NAPI_ID)
1960                         v.val = 0;
1961 
1962                 break;
1963 #endif
1964 
1965         case SO_COOKIE:
1966                 lv = sizeof(u64);
1967                 if (len < lv)
1968                         return -EINVAL;
1969                 v.val64 = sock_gen_cookie(sk);
1970                 break;
1971 
1972         case SO_ZEROCOPY:
1973                 v.val = sock_flag(sk, SOCK_ZEROCOPY);
1974                 break;
1975 
1976         case SO_TXTIME:
1977                 lv = sizeof(v.txtime);
1978                 v.txtime.clockid = sk->sk_clockid;
1979                 v.txtime.flags |= sk->sk_txtime_deadline_mode ?
1980                                   SOF_TXTIME_DEADLINE_MODE : 0;
1981                 v.txtime.flags |= sk->sk_txtime_report_errors ?
1982                                   SOF_TXTIME_REPORT_ERRORS : 0;
1983                 break;
1984 
1985         case SO_BINDTOIFINDEX:
1986                 v.val = READ_ONCE(sk->sk_bound_dev_if);
1987                 break;
1988 
1989         case SO_NETNS_COOKIE:
1990                 lv = sizeof(u64);
1991                 if (len != lv)
1992                         return -EINVAL;
1993                 v.val64 = sock_net(sk)->net_cookie;
1994                 break;
1995 
1996         case SO_BUF_LOCK:
1997                 v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
1998                 break;
1999 
2000         case SO_RESERVE_MEM:
2001                 v.val = READ_ONCE(sk->sk_reserved_mem);
2002                 break;
2003 
2004         case SO_TXREHASH:
2005                 /* Paired with WRITE_ONCE() in sk_setsockopt() */
2006                 v.val = READ_ONCE(sk->sk_txrehash);
2007                 break;
2008 
2009         default:
2010                 /* We implement the SO_SNDLOWAT etc to not be settable
2011                  * (1003.1g 7).
2012                  */
2013                 return -ENOPROTOOPT;
2014         }
2015 
2016         if (len > lv)
2017                 len = lv;
2018         if (copy_to_sockptr(optval, &v, len))
2019                 return -EFAULT;
2020 lenout:
2021         if (copy_to_sockptr(optlen, &len, sizeof(int)))
2022                 return -EFAULT;
2023         return 0;
2024 }
2025 
2026 /*
2027  * Initialize an sk_lock.
2028  *
2029  * (We also register the sk_lock with the lock validator.)
2030  */
2031 static inline void sock_lock_init(struct sock *sk)
2032 {
2033         if (sk->sk_kern_sock)
2034                 sock_lock_init_class_and_name(
2035                         sk,
2036                         af_family_kern_slock_key_strings[sk->sk_family],
2037                         af_family_kern_slock_keys + sk->sk_family,
2038                         af_family_kern_key_strings[sk->sk_family],
2039                         af_family_kern_keys + sk->sk_family);
2040         else
2041                 sock_lock_init_class_and_name(
2042                         sk,
2043                         af_family_slock_key_strings[sk->sk_family],
2044                         af_family_slock_keys + sk->sk_family,
2045                         af_family_key_strings[sk->sk_family],
2046                         af_family_keys + sk->sk_family);
2047 }
2048 
2049 /*
2050  * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
2051  * even temporarly, because of RCU lookups. sk_node should also be left as is.
2052  * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
2053  */
2054 static void sock_copy(struct sock *nsk, const struct sock *osk)
2055 {
2056         const struct proto *prot = READ_ONCE(osk->sk_prot);
2057 #ifdef CONFIG_SECURITY_NETWORK
2058         void *sptr = nsk->sk_security;
2059 #endif
2060 
2061         /* If we move sk_tx_queue_mapping out of the private section,
2062          * we must check if sk_tx_queue_clear() is called after
2063          * sock_copy() in sk_clone_lock().
2064          */
2065         BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) <
2066                      offsetof(struct sock, sk_dontcopy_begin) ||
2067                      offsetof(struct sock, sk_tx_queue_mapping) >=
2068                      offsetof(struct sock, sk_dontcopy_end));
2069 
2070         memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
2071 
2072         unsafe_memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
2073                       prot->obj_size - offsetof(struct sock, sk_dontcopy_end),
2074                       /* alloc is larger than struct, see sk_prot_alloc() */);
2075 
2076 #ifdef CONFIG_SECURITY_NETWORK
2077         nsk->sk_security = sptr;
2078         security_sk_clone(osk, nsk);
2079 #endif
2080 }
2081 
2082 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
2083                 int family)
2084 {
2085         struct sock *sk;
2086         struct kmem_cache *slab;
2087 
2088         slab = prot->slab;
2089         if (slab != NULL) {
2090                 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
2091                 if (!sk)
2092                         return sk;
2093                 if (want_init_on_alloc(priority))
2094                         sk_prot_clear_nulls(sk, prot->obj_size);
2095         } else
2096                 sk = kmalloc(prot->obj_size, priority);
2097 
2098         if (sk != NULL) {
2099                 if (security_sk_alloc(sk, family, priority))
2100                         goto out_free;
2101 
2102                 if (!try_module_get(prot->owner))
2103                         goto out_free_sec;
2104         }
2105 
2106         return sk;
2107 
2108 out_free_sec:
2109         security_sk_free(sk);
2110 out_free:
2111         if (slab != NULL)
2112                 kmem_cache_free(slab, sk);
2113         else
2114                 kfree(sk);
2115         return NULL;
2116 }
2117 
2118 static void sk_prot_free(struct proto *prot, struct sock *sk)
2119 {
2120         struct kmem_cache *slab;
2121         struct module *owner;
2122 
2123         owner = prot->owner;
2124         slab = prot->slab;
2125 
2126         cgroup_sk_free(&sk->sk_cgrp_data);
2127         mem_cgroup_sk_free(sk);
2128         security_sk_free(sk);
2129         if (slab != NULL)
2130                 kmem_cache_free(slab, sk);
2131         else
2132                 kfree(sk);
2133         module_put(owner);
2134 }
2135 
2136 /**
2137  *      sk_alloc - All socket objects are allocated here
2138  *      @net: the applicable net namespace
2139  *      @family: protocol family
2140  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
2141  *      @prot: struct proto associated with this new sock instance
2142  *      @kern: is this to be a kernel socket?
2143  */
2144 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
2145                       struct proto *prot, int kern)
2146 {
2147         struct sock *sk;
2148 
2149         sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
2150         if (sk) {
2151                 sk->sk_family = family;
2152                 /*
2153                  * See comment in struct sock definition to understand
2154                  * why we need sk_prot_creator -acme
2155                  */
2156                 sk->sk_prot = sk->sk_prot_creator = prot;
2157                 sk->sk_kern_sock = kern;
2158                 sock_lock_init(sk);
2159                 sk->sk_net_refcnt = kern ? 0 : 1;
2160                 if (likely(sk->sk_net_refcnt)) {
2161                         get_net_track(net, &sk->ns_tracker, priority);
2162                         sock_inuse_add(net, 1);
2163                 } else {
2164                         __netns_tracker_alloc(net, &sk->ns_tracker,
2165                                               false, priority);
2166                 }
2167 
2168                 sock_net_set(sk, net);
2169                 refcount_set(&sk->sk_wmem_alloc, 1);
2170 
2171                 mem_cgroup_sk_alloc(sk);
2172                 cgroup_sk_alloc(&sk->sk_cgrp_data);
2173                 sock_update_classid(&sk->sk_cgrp_data);
2174                 sock_update_netprioidx(&sk->sk_cgrp_data);
2175                 sk_tx_queue_clear(sk);
2176         }
2177 
2178         return sk;
2179 }
2180 EXPORT_SYMBOL(sk_alloc);
2181 
2182 /* Sockets having SOCK_RCU_FREE will call this function after one RCU
2183  * grace period. This is the case for UDP sockets and TCP listeners.
2184  */
2185 static void __sk_destruct(struct rcu_head *head)
2186 {
2187         struct sock *sk = container_of(head, struct sock, sk_rcu);
2188         struct sk_filter *filter;
2189 
2190         if (sk->sk_destruct)
2191                 sk->sk_destruct(sk);
2192 
2193         filter = rcu_dereference_check(sk->sk_filter,
2194                                        refcount_read(&sk->sk_wmem_alloc) == 0);
2195         if (filter) {
2196                 sk_filter_uncharge(sk, filter);
2197                 RCU_INIT_POINTER(sk->sk_filter, NULL);
2198         }
2199 
2200         sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
2201 
2202 #ifdef CONFIG_BPF_SYSCALL
2203         bpf_sk_storage_free(sk);
2204 #endif
2205 
2206         if (atomic_read(&sk->sk_omem_alloc))
2207                 pr_debug("%s: optmem leakage (%d bytes) detected\n",
2208                          __func__, atomic_read(&sk->sk_omem_alloc));
2209 
2210         if (sk->sk_frag.page) {
2211                 put_page(sk->sk_frag.page);
2212                 sk->sk_frag.page = NULL;
2213         }
2214 
2215         /* We do not need to acquire sk->sk_peer_lock, we are the last user. */
2216         put_cred(sk->sk_peer_cred);
2217         put_pid(sk->sk_peer_pid);
2218 
2219         if (likely(sk->sk_net_refcnt))
2220                 put_net_track(sock_net(sk), &sk->ns_tracker);
2221         else
2222                 __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
2223 
2224         sk_prot_free(sk->sk_prot_creator, sk);
2225 }
2226 
2227 void sk_destruct(struct sock *sk)
2228 {
2229         bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
2230 
2231         if (rcu_access_pointer(sk->sk_reuseport_cb)) {
2232                 reuseport_detach_sock(sk);
2233                 use_call_rcu = true;
2234         }
2235 
2236         if (use_call_rcu)
2237                 call_rcu(&sk->sk_rcu, __sk_destruct);
2238         else
2239                 __sk_destruct(&sk->sk_rcu);
2240 }
2241 
2242 static void __sk_free(struct sock *sk)
2243 {
2244         if (likely(sk->sk_net_refcnt))
2245                 sock_inuse_add(sock_net(sk), -1);
2246 
2247         if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
2248                 sock_diag_broadcast_destroy(sk);
2249         else
2250                 sk_destruct(sk);
2251 }
2252 
2253 void sk_free(struct sock *sk)
2254 {
2255         /*
2256          * We subtract one from sk_wmem_alloc and can know if
2257          * some packets are still in some tx queue.
2258          * If not null, sock_wfree() will call __sk_free(sk) later
2259          */
2260         if (refcount_dec_and_test(&sk->sk_wmem_alloc))
2261                 __sk_free(sk);
2262 }
2263 EXPORT_SYMBOL(sk_free);
2264 
2265 static void sk_init_common(struct sock *sk)
2266 {
2267         skb_queue_head_init(&sk->sk_receive_queue);
2268         skb_queue_head_init(&sk->sk_write_queue);
2269         skb_queue_head_init(&sk->sk_error_queue);
2270 
2271         rwlock_init(&sk->sk_callback_lock);
2272         lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
2273                         af_rlock_keys + sk->sk_family,
2274                         af_family_rlock_key_strings[sk->sk_family]);
2275         lockdep_set_class_and_name(&sk->sk_write_queue.lock,
2276                         af_wlock_keys + sk->sk_family,
2277                         af_family_wlock_key_strings[sk->sk_family]);
2278         lockdep_set_class_and_name(&sk->sk_error_queue.lock,
2279                         af_elock_keys + sk->sk_family,
2280                         af_family_elock_key_strings[sk->sk_family]);
2281         if (sk->sk_kern_sock)
2282                 lockdep_set_class_and_name(&sk->sk_callback_lock,
2283                         af_kern_callback_keys + sk->sk_family,
2284                         af_family_kern_clock_key_strings[sk->sk_family]);
2285         else
2286                 lockdep_set_class_and_name(&sk->sk_callback_lock,
2287                         af_callback_keys + sk->sk_family,
2288                         af_family_clock_key_strings[sk->sk_family]);
2289 }
2290 
2291 /**
2292  *      sk_clone_lock - clone a socket, and lock its clone
2293  *      @sk: the socket to clone
2294  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
2295  *
2296  *      Caller must unlock socket even in error path (bh_unlock_sock(newsk))
2297  */
2298 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
2299 {
2300         struct proto *prot = READ_ONCE(sk->sk_prot);
2301         struct sk_filter *filter;
2302         bool is_charged = true;
2303         struct sock *newsk;
2304 
2305         newsk = sk_prot_alloc(prot, priority, sk->sk_family);
2306         if (!newsk)
2307                 goto out;
2308 
2309         sock_copy(newsk, sk);
2310 
2311         newsk->sk_prot_creator = prot;
2312 
2313         /* SANITY */
2314         if (likely(newsk->sk_net_refcnt)) {
2315                 get_net_track(sock_net(newsk), &newsk->ns_tracker, priority);
2316                 sock_inuse_add(sock_net(newsk), 1);
2317         } else {
2318                 /* Kernel sockets are not elevating the struct net refcount.
2319                  * Instead, use a tracker to more easily detect if a layer
2320                  * is not properly dismantling its kernel sockets at netns
2321                  * destroy time.
2322                  */
2323                 __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker,
2324                                       false, priority);
2325         }
2326         sk_node_init(&newsk->sk_node);
2327         sock_lock_init(newsk);
2328         bh_lock_sock(newsk);
2329         newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
2330         newsk->sk_backlog.len = 0;
2331 
2332         atomic_set(&newsk->sk_rmem_alloc, 0);
2333 
2334         /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
2335         refcount_set(&newsk->sk_wmem_alloc, 1);
2336 
2337         atomic_set(&newsk->sk_omem_alloc, 0);
2338         sk_init_common(newsk);
2339 
2340         newsk->sk_dst_cache     = NULL;
2341         newsk->sk_dst_pending_confirm = 0;
2342         newsk->sk_wmem_queued   = 0;
2343         newsk->sk_forward_alloc = 0;
2344         newsk->sk_reserved_mem  = 0;
2345         atomic_set(&newsk->sk_drops, 0);
2346         newsk->sk_send_head     = NULL;
2347         newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
2348         atomic_set(&newsk->sk_zckey, 0);
2349 
2350         sock_reset_flag(newsk, SOCK_DONE);
2351 
2352         /* sk->sk_memcg will be populated at accept() time */
2353         newsk->sk_memcg = NULL;
2354 
2355         cgroup_sk_clone(&newsk->sk_cgrp_data);
2356 
2357         rcu_read_lock();
2358         filter = rcu_dereference(sk->sk_filter);
2359         if (filter != NULL)
2360                 /* though it's an empty new sock, the charging may fail
2361                  * if sysctl_optmem_max was changed between creation of
2362                  * original socket and cloning
2363                  */
2364                 is_charged = sk_filter_charge(newsk, filter);
2365         RCU_INIT_POINTER(newsk->sk_filter, filter);
2366         rcu_read_unlock();
2367 
2368         if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
2369                 /* We need to make sure that we don't uncharge the new
2370                  * socket if we couldn't charge it in the first place
2371                  * as otherwise we uncharge the parent's filter.
2372                  */
2373                 if (!is_charged)
2374                         RCU_INIT_POINTER(newsk->sk_filter, NULL);
2375                 sk_free_unlock_clone(newsk);
2376                 newsk = NULL;
2377                 goto out;
2378         }
2379         RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
2380 
2381         if (bpf_sk_storage_clone(sk, newsk)) {
2382                 sk_free_unlock_clone(newsk);
2383                 newsk = NULL;
2384                 goto out;
2385         }
2386 
2387         /* Clear sk_user_data if parent had the pointer tagged
2388          * as not suitable for copying when cloning.
2389          */
2390         if (sk_user_data_is_nocopy(newsk))
2391                 newsk->sk_user_data = NULL;
2392 
2393         newsk->sk_err      = 0;
2394         newsk->sk_err_soft = 0;
2395         newsk->sk_priority = 0;
2396         newsk->sk_incoming_cpu = raw_smp_processor_id();
2397 
2398         /* Before updating sk_refcnt, we must commit prior changes to memory
2399          * (Documentation/RCU/rculist_nulls.rst for details)
2400          */
2401         smp_wmb();
2402         refcount_set(&newsk->sk_refcnt, 2);
2403 
2404         sk_set_socket(newsk, NULL);
2405         sk_tx_queue_clear(newsk);
2406         RCU_INIT_POINTER(newsk->sk_wq, NULL);
2407 
2408         if (newsk->sk_prot->sockets_allocated)
2409                 sk_sockets_allocated_inc(newsk);
2410 
2411         if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
2412                 net_enable_timestamp();
2413 out:
2414         return newsk;
2415 }
2416 EXPORT_SYMBOL_GPL(sk_clone_lock);
2417 
2418 void sk_free_unlock_clone(struct sock *sk)
2419 {
2420         /* It is still raw copy of parent, so invalidate
2421          * destructor and make plain sk_free() */
2422         sk->sk_destruct = NULL;
2423         bh_unlock_sock(sk);
2424         sk_free(sk);
2425 }
2426 EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
2427 
2428 static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
2429 {
2430         bool is_ipv6 = false;
2431         u32 max_size;
2432 
2433 #if IS_ENABLED(CONFIG_IPV6)
2434         is_ipv6 = (sk->sk_family == AF_INET6 &&
2435                    !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
2436 #endif
2437         /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
2438         max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
2439                         READ_ONCE(dst->dev->gso_ipv4_max_size);
2440         if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
2441                 max_size = GSO_LEGACY_MAX_SIZE;
2442 
2443         return max_size - (MAX_TCP_HEADER + 1);
2444 }
2445 
2446 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
2447 {
2448         u32 max_segs = 1;
2449 
2450         sk->sk_route_caps = dst->dev->features;
2451         if (sk_is_tcp(sk))
2452                 sk->sk_route_caps |= NETIF_F_GSO;
2453         if (sk->sk_route_caps & NETIF_F_GSO)
2454                 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
2455         if (unlikely(sk->sk_gso_disabled))
2456                 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
2457         if (sk_can_gso(sk)) {
2458                 if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
2459                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
2460                 } else {
2461                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
2462                         sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
2463                         /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
2464                         max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
2465                 }
2466         }
2467         sk->sk_gso_max_segs = max_segs;
2468         sk_dst_set(sk, dst);
2469 }
2470 EXPORT_SYMBOL_GPL(sk_setup_caps);
2471 
2472 /*
2473  *      Simple resource managers for sockets.
2474  */
2475 
2476 
2477 /*
2478  * Write buffer destructor automatically called from kfree_skb.
2479  */
2480 void sock_wfree(struct sk_buff *skb)
2481 {
2482         struct sock *sk = skb->sk;
2483         unsigned int len = skb->truesize;
2484         bool free;
2485 
2486         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
2487                 if (sock_flag(sk, SOCK_RCU_FREE) &&
2488                     sk->sk_write_space == sock_def_write_space) {
2489                         rcu_read_lock();
2490                         free = refcount_sub_and_test(len, &sk->sk_wmem_alloc);
2491                         sock_def_write_space_wfree(sk);
2492                         rcu_read_unlock();
2493                         if (unlikely(free))
2494                                 __sk_free(sk);
2495                         return;
2496                 }
2497 
2498                 /*
2499                  * Keep a reference on sk_wmem_alloc, this will be released
2500                  * after sk_write_space() call
2501                  */
2502                 WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
2503                 sk->sk_write_space(sk);
2504                 len = 1;
2505         }
2506         /*
2507          * if sk_wmem_alloc reaches 0, we must finish what sk_free()
2508          * could not do because of in-flight packets
2509          */
2510         if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
2511                 __sk_free(sk);
2512 }
2513 EXPORT_SYMBOL(sock_wfree);
2514 
2515 /* This variant of sock_wfree() is used by TCP,
2516  * since it sets SOCK_USE_WRITE_QUEUE.
2517  */
2518 void __sock_wfree(struct sk_buff *skb)
2519 {
2520         struct sock *sk = skb->sk;
2521 
2522         if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
2523                 __sk_free(sk);
2524 }
2525 
2526 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
2527 {
2528         skb_orphan(skb);
2529         skb->sk = sk;
2530 #ifdef CONFIG_INET
2531         if (unlikely(!sk_fullsock(sk))) {
2532                 skb->destructor = sock_edemux;
2533                 sock_hold(sk);
2534                 return;
2535         }
2536 #endif
2537         skb->destructor = sock_wfree;
2538         skb_set_hash_from_sk(skb, sk);
2539         /*
2540          * We used to take a refcount on sk, but following operation
2541          * is enough to guarantee sk_free() wont free this sock until
2542          * all in-flight packets are completed
2543          */
2544         refcount_add(skb->truesize, &sk->sk_wmem_alloc);
2545 }
2546 EXPORT_SYMBOL(skb_set_owner_w);
2547 
2548 static bool can_skb_orphan_partial(const struct sk_buff *skb)
2549 {
2550         /* Drivers depend on in-order delivery for crypto offload,
2551          * partial orphan breaks out-of-order-OK logic.
2552          */
2553         if (skb_is_decrypted(skb))
2554                 return false;
2555 
2556         return (skb->destructor == sock_wfree ||
2557                 (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
2558 }
2559 
2560 /* This helper is used by netem, as it can hold packets in its
2561  * delay queue. We want to allow the owner socket to send more
2562  * packets, as if they were already TX completed by a typical driver.
2563  * But we also want to keep skb->sk set because some packet schedulers
2564  * rely on it (sch_fq for example).
2565  */
2566 void skb_orphan_partial(struct sk_buff *skb)
2567 {
2568         if (skb_is_tcp_pure_ack(skb))
2569                 return;
2570 
2571         if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
2572                 return;
2573 
2574         skb_orphan(skb);
2575 }
2576 EXPORT_SYMBOL(skb_orphan_partial);
2577 
2578 /*
2579  * Read buffer destructor automatically called from kfree_skb.
2580  */
2581 void sock_rfree(struct sk_buff *skb)
2582 {
2583         struct sock *sk = skb->sk;
2584         unsigned int len = skb->truesize;
2585 
2586         atomic_sub(len, &sk->sk_rmem_alloc);
2587         sk_mem_uncharge(sk, len);
2588 }
2589 EXPORT_SYMBOL(sock_rfree);
2590 
2591 /*
2592  * Buffer destructor for skbs that are not used directly in read or write
2593  * path, e.g. for error handler skbs. Automatically called from kfree_skb.
2594  */
2595 void sock_efree(struct sk_buff *skb)
2596 {
2597         sock_put(skb->sk);
2598 }
2599 EXPORT_SYMBOL(sock_efree);
2600 
2601 /* Buffer destructor for prefetch/receive path where reference count may
2602  * not be held, e.g. for listen sockets.
2603  */
2604 #ifdef CONFIG_INET
2605 void sock_pfree(struct sk_buff *skb)
2606 {
2607         struct sock *sk = skb->sk;
2608 
2609         if (!sk_is_refcounted(sk))
2610                 return;
2611 
2612         if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
2613                 inet_reqsk(sk)->rsk_listener = NULL;
2614                 reqsk_free(inet_reqsk(sk));
2615                 return;
2616         }
2617 
2618         sock_gen_put(sk);
2619 }
2620 EXPORT_SYMBOL(sock_pfree);
2621 #endif /* CONFIG_INET */
2622 
2623 kuid_t sock_i_uid(struct sock *sk)
2624 {
2625         kuid_t uid;
2626 
2627         read_lock_bh(&sk->sk_callback_lock);
2628         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
2629         read_unlock_bh(&sk->sk_callback_lock);
2630         return uid;
2631 }
2632 EXPORT_SYMBOL(sock_i_uid);
2633 
2634 unsigned long __sock_i_ino(struct sock *sk)
2635 {
2636         unsigned long ino;
2637 
2638         read_lock(&sk->sk_callback_lock);
2639         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
2640         read_unlock(&sk->sk_callback_lock);
2641         return ino;
2642 }
2643 EXPORT_SYMBOL(__sock_i_ino);
2644 
2645 unsigned long sock_i_ino(struct sock *sk)
2646 {
2647         unsigned long ino;
2648 
2649         local_bh_disable();
2650         ino = __sock_i_ino(sk);
2651         local_bh_enable();
2652         return ino;
2653 }
2654 EXPORT_SYMBOL(sock_i_ino);
2655 
2656 /*
2657  * Allocate a skb from the socket's send buffer.
2658  */
2659 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
2660                              gfp_t priority)
2661 {
2662         if (force ||
2663             refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
2664                 struct sk_buff *skb = alloc_skb(size, priority);
2665 
2666                 if (skb) {
2667                         skb_set_owner_w(skb, sk);
2668                         return skb;
2669                 }
2670         }
2671         return NULL;
2672 }
2673 EXPORT_SYMBOL(sock_wmalloc);
2674 
2675 static void sock_ofree(struct sk_buff *skb)
2676 {
2677         struct sock *sk = skb->sk;
2678 
2679         atomic_sub(skb->truesize, &sk->sk_omem_alloc);
2680 }
2681 
2682 struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
2683                              gfp_t priority)
2684 {
2685         struct sk_buff *skb;
2686 
2687         /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
2688         if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
2689             READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
2690                 return NULL;
2691 
2692         skb = alloc_skb(size, priority);
2693         if (!skb)
2694                 return NULL;
2695 
2696         atomic_add(skb->truesize, &sk->sk_omem_alloc);
2697         skb->sk = sk;
2698         skb->destructor = sock_ofree;
2699         return skb;
2700 }
2701 
2702 /*
2703  * Allocate a memory block from the socket's option memory buffer.
2704  */
2705 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
2706 {
2707         int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
2708 
2709         if ((unsigned int)size <= optmem_max &&
2710             atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
2711                 void *mem;
2712                 /* First do the add, to avoid the race if kmalloc
2713                  * might sleep.
2714                  */
2715                 atomic_add(size, &sk->sk_omem_alloc);
2716                 mem = kmalloc(size, priority);
2717                 if (mem)
2718                         return mem;
2719                 atomic_sub(size, &sk->sk_omem_alloc);
2720         }
2721         return NULL;
2722 }
2723 EXPORT_SYMBOL(sock_kmalloc);
2724 
2725 /* Free an option memory block. Note, we actually want the inline
2726  * here as this allows gcc to detect the nullify and fold away the
2727  * condition entirely.
2728  */
2729 static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
2730                                   const bool nullify)
2731 {
2732         if (WARN_ON_ONCE(!mem))
2733                 return;
2734         if (nullify)
2735                 kfree_sensitive(mem);
2736         else
2737                 kfree(mem);
2738         atomic_sub(size, &sk->sk_omem_alloc);
2739 }
2740 
2741 void sock_kfree_s(struct sock *sk, void *mem, int size)
2742 {
2743         __sock_kfree_s(sk, mem, size, false);
2744 }
2745 EXPORT_SYMBOL(sock_kfree_s);
2746 
2747 void sock_kzfree_s(struct sock *sk, void *mem, int size)
2748 {
2749         __sock_kfree_s(sk, mem, size, true);
2750 }
2751 EXPORT_SYMBOL(sock_kzfree_s);
2752 
2753 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
2754    I think, these locks should be removed for datagram sockets.
2755  */
2756 static long sock_wait_for_wmem(struct sock *sk, long timeo)
2757 {
2758         DEFINE_WAIT(wait);
2759 
2760         sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2761         for (;;) {
2762                 if (!timeo)
2763                         break;
2764                 if (signal_pending(current))
2765                         break;
2766                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2767                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2768                 if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
2769                         break;
2770                 if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2771                         break;
2772                 if (READ_ONCE(sk->sk_err))
2773                         break;
2774                 timeo = schedule_timeout(timeo);
2775         }
2776         finish_wait(sk_sleep(sk), &wait);
2777         return timeo;
2778 }
2779 
2780 
2781 /*
2782  *      Generic send/receive buffer handlers
2783  */
2784 
2785 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
2786                                      unsigned long data_len, int noblock,
2787                                      int *errcode, int max_page_order)
2788 {
2789         struct sk_buff *skb;
2790         long timeo;
2791         int err;
2792 
2793         timeo = sock_sndtimeo(sk, noblock);
2794         for (;;) {
2795                 err = sock_error(sk);
2796                 if (err != 0)
2797                         goto failure;
2798 
2799                 err = -EPIPE;
2800                 if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2801                         goto failure;
2802 
2803                 if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
2804                         break;
2805 
2806                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2807                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2808                 err = -EAGAIN;
2809                 if (!timeo)
2810                         goto failure;
2811                 if (signal_pending(current))
2812                         goto interrupted;
2813                 timeo = sock_wait_for_wmem(sk, timeo);
2814         }
2815         skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
2816                                    errcode, sk->sk_allocation);
2817         if (skb)
2818                 skb_set_owner_w(skb, sk);
2819         return skb;
2820 
2821 interrupted:
2822         err = sock_intr_errno(timeo);
2823 failure:
2824         *errcode = err;
2825         return NULL;
2826 }
2827 EXPORT_SYMBOL(sock_alloc_send_pskb);
2828 
2829 int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
2830                      struct sockcm_cookie *sockc)
2831 {
2832         u32 tsflags;
2833 
2834         switch (cmsg->cmsg_type) {
2835         case SO_MARK:
2836                 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
2837                     !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2838                         return -EPERM;
2839                 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2840                         return -EINVAL;
2841                 sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2842                 break;
2843         case SO_TIMESTAMPING_OLD:
2844         case SO_TIMESTAMPING_NEW:
2845                 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2846                         return -EINVAL;
2847 
2848                 tsflags = *(u32 *)CMSG_DATA(cmsg);
2849                 if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
2850                         return -EINVAL;
2851 
2852                 sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
2853                 sockc->tsflags |= tsflags;
2854                 break;
2855         case SCM_TXTIME:
2856                 if (!sock_flag(sk, SOCK_TXTIME))
2857                         return -EINVAL;
2858                 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
2859                         return -EINVAL;
2860                 sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
2861                 break;
2862         /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2863         case SCM_RIGHTS:
2864         case SCM_CREDENTIALS:
2865                 break;
2866         default:
2867                 return -EINVAL;
2868         }
2869         return 0;
2870 }
2871 EXPORT_SYMBOL(__sock_cmsg_send);
2872 
2873 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
2874                    struct sockcm_cookie *sockc)
2875 {
2876         struct cmsghdr *cmsg;
2877         int ret;
2878 
2879         for_each_cmsghdr(cmsg, msg) {
2880                 if (!CMSG_OK(msg, cmsg))
2881                         return -EINVAL;
2882                 if (cmsg->cmsg_level != SOL_SOCKET)
2883                         continue;
2884                 ret = __sock_cmsg_send(sk, cmsg, sockc);
2885                 if (ret)
2886                         return ret;
2887         }
2888         return 0;
2889 }
2890 EXPORT_SYMBOL(sock_cmsg_send);
2891 
2892 static void sk_enter_memory_pressure(struct sock *sk)
2893 {
2894         if (!sk->sk_prot->enter_memory_pressure)
2895                 return;
2896 
2897         sk->sk_prot->enter_memory_pressure(sk);
2898 }
2899 
2900 static void sk_leave_memory_pressure(struct sock *sk)
2901 {
2902         if (sk->sk_prot->leave_memory_pressure) {
2903                 INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure,
2904                                      tcp_leave_memory_pressure, sk);
2905         } else {
2906                 unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
2907 
2908                 if (memory_pressure && READ_ONCE(*memory_pressure))
2909                         WRITE_ONCE(*memory_pressure, 0);
2910         }
2911 }
2912 
2913 DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
2914 
2915 /**
2916  * skb_page_frag_refill - check that a page_frag contains enough room
2917  * @sz: minimum size of the fragment we want to get
2918  * @pfrag: pointer to page_frag
2919  * @gfp: priority for memory allocation
2920  *
2921  * Note: While this allocator tries to use high order pages, there is
2922  * no guarantee that allocations succeed. Therefore, @sz MUST be
2923  * less or equal than PAGE_SIZE.
2924  */
2925 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
2926 {
2927         if (pfrag->page) {
2928                 if (page_ref_count(pfrag->page) == 1) {
2929                         pfrag->offset = 0;
2930                         return true;
2931                 }
2932                 if (pfrag->offset + sz <= pfrag->size)
2933                         return true;
2934                 put_page(pfrag->page);
2935         }
2936 
2937         pfrag->offset = 0;
2938         if (SKB_FRAG_PAGE_ORDER &&
2939             !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
2940                 /* Avoid direct reclaim but allow kswapd to wake */
2941                 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2942                                           __GFP_COMP | __GFP_NOWARN |
2943                                           __GFP_NORETRY,
2944                                           SKB_FRAG_PAGE_ORDER);
2945                 if (likely(pfrag->page)) {
2946                         pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
2947                         return true;
2948                 }
2949         }
2950         pfrag->page = alloc_page(gfp);
2951         if (likely(pfrag->page)) {
2952                 pfrag->size = PAGE_SIZE;
2953                 return true;
2954         }
2955         return false;
2956 }
2957 EXPORT_SYMBOL(skb_page_frag_refill);
2958 
2959 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2960 {
2961         if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2962                 return true;
2963 
2964         sk_enter_memory_pressure(sk);
2965         sk_stream_moderate_sndbuf(sk);
2966         return false;
2967 }
2968 EXPORT_SYMBOL(sk_page_frag_refill);
2969 
2970 void __lock_sock(struct sock *sk)
2971         __releases(&sk->sk_lock.slock)
2972         __acquires(&sk->sk_lock.slock)
2973 {
2974         DEFINE_WAIT(wait);
2975 
2976         for (;;) {
2977                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2978                                         TASK_UNINTERRUPTIBLE);
2979                 spin_unlock_bh(&sk->sk_lock.slock);
2980                 schedule();
2981                 spin_lock_bh(&sk->sk_lock.slock);
2982                 if (!sock_owned_by_user(sk))
2983                         break;
2984         }
2985         finish_wait(&sk->sk_lock.wq, &wait);
2986 }
2987 
2988 void __release_sock(struct sock *sk)
2989         __releases(&sk->sk_lock.slock)
2990         __acquires(&sk->sk_lock.slock)
2991 {
2992         struct sk_buff *skb, *next;
2993 
2994         while ((skb = sk->sk_backlog.head) != NULL) {
2995                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2996 
2997                 spin_unlock_bh(&sk->sk_lock.slock);
2998 
2999                 do {
3000                         next = skb->next;
3001                         prefetch(next);
3002                         DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb));
3003                         skb_mark_not_on_list(skb);
3004                         sk_backlog_rcv(sk, skb);
3005 
3006                         cond_resched();
3007 
3008                         skb = next;
3009                 } while (skb != NULL);
3010 
3011                 spin_lock_bh(&sk->sk_lock.slock);
3012         }
3013 
3014         /*
3015          * Doing the zeroing here guarantee we can not loop forever
3016          * while a wild producer attempts to flood us.
3017          */
3018         sk->sk_backlog.len = 0;
3019 }
3020 
3021 void __sk_flush_backlog(struct sock *sk)
3022 {
3023         spin_lock_bh(&sk->sk_lock.slock);
3024         __release_sock(sk);
3025 
3026         if (sk->sk_prot->release_cb)
3027                 INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
3028                                      tcp_release_cb, sk);
3029 
3030         spin_unlock_bh(&sk->sk_lock.slock);
3031 }
3032 EXPORT_SYMBOL_GPL(__sk_flush_backlog);
3033 
3034 /**
3035  * sk_wait_data - wait for data to arrive at sk_receive_queue
3036  * @sk:    sock to wait on
3037  * @timeo: for how long
3038  * @skb:   last skb seen on sk_receive_queue
3039  *
3040  * Now socket state including sk->sk_err is changed only under lock,
3041  * hence we may omit checks after joining wait queue.
3042  * We check receive queue before schedule() only as optimization;
3043  * it is very likely that release_sock() added new data.
3044  */
3045 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
3046 {
3047         DEFINE_WAIT_FUNC(wait, woken_wake_function);
3048         int rc;
3049 
3050         add_wait_queue(sk_sleep(sk), &wait);
3051         sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
3052         rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
3053         sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
3054         remove_wait_queue(sk_sleep(sk), &wait);
3055         return rc;
3056 }
3057 EXPORT_SYMBOL(sk_wait_data);
3058 
3059 /**
3060  *      __sk_mem_raise_allocated - increase memory_allocated
3061  *      @sk: socket
3062  *      @size: memory size to allocate
3063  *      @amt: pages to allocate
3064  *      @kind: allocation type
3065  *
3066  *      Similar to __sk_mem_schedule(), but does not update sk_forward_alloc.
3067  *
3068  *      Unlike the globally shared limits among the sockets under same protocol,
3069  *      consuming the budget of a memcg won't have direct effect on other ones.
3070  *      So be optimistic about memcg's tolerance, and leave the callers to decide
3071  *      whether or not to raise allocated through sk_under_memory_pressure() or
3072  *      its variants.
3073  */
3074 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
3075 {
3076         struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL;
3077         struct proto *prot = sk->sk_prot;
3078         bool charged = false;
3079         long allocated;
3080 
3081         sk_memory_allocated_add(sk, amt);
3082         allocated = sk_memory_allocated(sk);
3083 
3084         if (memcg) {
3085                 if (!mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge()))
3086                         goto suppress_allocation;
3087                 charged = true;
3088         }
3089 
3090         /* Under limit. */
3091         if (allocated <= sk_prot_mem_limits(sk, 0)) {
3092                 sk_leave_memory_pressure(sk);
3093                 return 1;
3094         }
3095 
3096         /* Under pressure. */
3097         if (allocated > sk_prot_mem_limits(sk, 1))
3098                 sk_enter_memory_pressure(sk);
3099 
3100         /* Over hard limit. */
3101         if (allocated > sk_prot_mem_limits(sk, 2))
3102                 goto suppress_allocation;
3103 
3104         /* Guarantee minimum buffer size under pressure (either global
3105          * or memcg) to make sure features described in RFC 7323 (TCP
3106          * Extensions for High Performance) work properly.
3107          *
3108          * This rule does NOT stand when exceeds global or memcg's hard
3109          * limit, or else a DoS attack can be taken place by spawning
3110          * lots of sockets whose usage are under minimum buffer size.
3111          */
3112         if (kind == SK_MEM_RECV) {
3113                 if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
3114                         return 1;
3115 
3116         } else { /* SK_MEM_SEND */
3117                 int wmem0 = sk_get_wmem0(sk, prot);
3118 
3119                 if (sk->sk_type == SOCK_STREAM) {
3120                         if (sk->sk_wmem_queued < wmem0)
3121                                 return 1;
3122                 } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
3123                                 return 1;
3124                 }
3125         }
3126 
3127         if (sk_has_memory_pressure(sk)) {
3128                 u64 alloc;
3129 
3130                 /* The following 'average' heuristic is within the
3131                  * scope of global accounting, so it only makes
3132                  * sense for global memory pressure.
3133                  */
3134                 if (!sk_under_global_memory_pressure(sk))
3135                         return 1;
3136 
3137                 /* Try to be fair among all the sockets under global
3138                  * pressure by allowing the ones that below average
3139                  * usage to raise.
3140                  */
3141                 alloc = sk_sockets_allocated_read_positive(sk);
3142                 if (sk_prot_mem_limits(sk, 2) > alloc *
3143                     sk_mem_pages(sk->sk_wmem_queued +
3144                                  atomic_read(&sk->sk_rmem_alloc) +
3145                                  sk->sk_forward_alloc))
3146                         return 1;
3147         }
3148 
3149 suppress_allocation:
3150 
3151         if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
3152                 sk_stream_moderate_sndbuf(sk);
3153 
3154                 /* Fail only if socket is _under_ its sndbuf.
3155                  * In this case we cannot block, so that we have to fail.
3156                  */
3157                 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
3158                         /* Force charge with __GFP_NOFAIL */
3159                         if (memcg && !charged) {
3160                                 mem_cgroup_charge_skmem(memcg, amt,
3161                                         gfp_memcg_charge() | __GFP_NOFAIL);
3162                         }
3163                         return 1;
3164                 }
3165         }
3166 
3167         if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
3168                 trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
3169 
3170         sk_memory_allocated_sub(sk, amt);
3171 
3172         if (charged)
3173                 mem_cgroup_uncharge_skmem(memcg, amt);
3174 
3175         return 0;
3176 }
3177 
3178 /**
3179  *      __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
3180  *      @sk: socket
3181  *      @size: memory size to allocate
3182  *      @kind: allocation type
3183  *
3184  *      If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
3185  *      rmem allocation. This function assumes that protocols which have
3186  *      memory_pressure use sk_wmem_queued as write buffer accounting.
3187  */
3188 int __sk_mem_schedule(struct sock *sk, int size, int kind)
3189 {
3190         int ret, amt = sk_mem_pages(size);
3191 
3192         sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
3193         ret = __sk_mem_raise_allocated(sk, size, amt, kind);
3194         if (!ret)
3195                 sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
3196         return ret;
3197 }
3198 EXPORT_SYMBOL(__sk_mem_schedule);
3199 
3200 /**
3201  *      __sk_mem_reduce_allocated - reclaim memory_allocated
3202  *      @sk: socket
3203  *      @amount: number of quanta
3204  *
3205  *      Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
3206  */
3207 void __sk_mem_reduce_allocated(struct sock *sk, int amount)
3208 {
3209         sk_memory_allocated_sub(sk, amount);
3210 
3211         if (mem_cgroup_sockets_enabled && sk->sk_memcg)
3212                 mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
3213 
3214         if (sk_under_global_memory_pressure(sk) &&
3215             (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
3216                 sk_leave_memory_pressure(sk);
3217 }
3218 
3219 /**
3220  *      __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
3221  *      @sk: socket
3222  *      @amount: number of bytes (rounded down to a PAGE_SIZE multiple)
3223  */
3224 void __sk_mem_reclaim(struct sock *sk, int amount)
3225 {
3226         amount >>= PAGE_SHIFT;
3227         sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
3228         __sk_mem_reduce_allocated(sk, amount);
3229 }
3230 EXPORT_SYMBOL(__sk_mem_reclaim);
3231 
3232 int sk_set_peek_off(struct sock *sk, int val)
3233 {
3234         WRITE_ONCE(sk->sk_peek_off, val);
3235         return 0;
3236 }
3237 EXPORT_SYMBOL_GPL(sk_set_peek_off);
3238 
3239 /*
3240  * Set of default routines for initialising struct proto_ops when
3241  * the protocol does not support a particular function. In certain
3242  * cases where it makes no sense for a protocol to have a "do nothing"
3243  * function, some default processing is provided.
3244  */
3245 
3246 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
3247 {
3248         return -EOPNOTSUPP;
3249 }
3250 EXPORT_SYMBOL(sock_no_bind);
3251 
3252 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
3253                     int len, int flags)
3254 {
3255         return -EOPNOTSUPP;
3256 }
3257 EXPORT_SYMBOL(sock_no_connect);
3258 
3259 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
3260 {
3261         return -EOPNOTSUPP;
3262 }
3263 EXPORT_SYMBOL(sock_no_socketpair);
3264 
3265 int sock_no_accept(struct socket *sock, struct socket *newsock,
3266                    struct proto_accept_arg *arg)
3267 {
3268         return -EOPNOTSUPP;
3269 }
3270 EXPORT_SYMBOL(sock_no_accept);
3271 
3272 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
3273                     int peer)
3274 {
3275         return -EOPNOTSUPP;
3276 }
3277 EXPORT_SYMBOL(sock_no_getname);
3278 
3279 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3280 {
3281         return -EOPNOTSUPP;
3282 }
3283 EXPORT_SYMBOL(sock_no_ioctl);
3284 
3285 int sock_no_listen(struct socket *sock, int backlog)
3286 {
3287         return -EOPNOTSUPP;
3288 }
3289 EXPORT_SYMBOL(sock_no_listen);
3290 
3291 int sock_no_shutdown(struct socket *sock, int how)
3292 {
3293         return -EOPNOTSUPP;
3294 }
3295 EXPORT_SYMBOL(sock_no_shutdown);
3296 
3297 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
3298 {
3299         return -EOPNOTSUPP;
3300 }
3301 EXPORT_SYMBOL(sock_no_sendmsg);
3302 
3303 int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
3304 {
3305         return -EOPNOTSUPP;
3306 }
3307 EXPORT_SYMBOL(sock_no_sendmsg_locked);
3308 
3309 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
3310                     int flags)
3311 {
3312         return -EOPNOTSUPP;
3313 }
3314 EXPORT_SYMBOL(sock_no_recvmsg);
3315 
3316 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
3317 {
3318         /* Mirror missing mmap method error code */
3319         return -ENODEV;
3320 }
3321 EXPORT_SYMBOL(sock_no_mmap);
3322 
3323 /*
3324  * When a file is received (via SCM_RIGHTS, etc), we must bump the
3325  * various sock-based usage counts.
3326  */
3327 void __receive_sock(struct file *file)
3328 {
3329         struct socket *sock;
3330 
3331         sock = sock_from_file(file);
3332         if (sock) {
3333                 sock_update_netprioidx(&sock->sk->sk_cgrp_data);
3334                 sock_update_classid(&sock->sk->sk_cgrp_data);
3335         }
3336 }
3337 
3338 /*
3339  *      Default Socket Callbacks
3340  */
3341 
3342 static void sock_def_wakeup(struct sock *sk)
3343 {
3344         struct socket_wq *wq;
3345 
3346         rcu_read_lock();
3347         wq = rcu_dereference(sk->sk_wq);
3348         if (skwq_has_sleeper(wq))
3349                 wake_up_interruptible_all(&wq->wait);
3350         rcu_read_unlock();
3351 }
3352 
3353 static void sock_def_error_report(struct sock *sk)
3354 {
3355         struct socket_wq *wq;
3356 
3357         rcu_read_lock();
3358         wq = rcu_dereference(sk->sk_wq);
3359         if (skwq_has_sleeper(wq))
3360                 wake_up_interruptible_poll(&wq->wait, EPOLLERR);
3361         sk_wake_async_rcu(sk, SOCK_WAKE_IO, POLL_ERR);
3362         rcu_read_unlock();
3363 }
3364 
3365 void sock_def_readable(struct sock *sk)
3366 {
3367         struct socket_wq *wq;
3368 
3369         trace_sk_data_ready(sk);
3370 
3371         rcu_read_lock();
3372         wq = rcu_dereference(sk->sk_wq);
3373         if (skwq_has_sleeper(wq))
3374                 wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
3375                                                 EPOLLRDNORM | EPOLLRDBAND);
3376         sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
3377         rcu_read_unlock();
3378 }
3379 
3380 static void sock_def_write_space(struct sock *sk)
3381 {
3382         struct socket_wq *wq;
3383 
3384         rcu_read_lock();
3385 
3386         /* Do not wake up a writer until he can make "significant"
3387          * progress.  --DaveM
3388          */
3389         if (sock_writeable(sk)) {
3390                 wq = rcu_dereference(sk->sk_wq);
3391                 if (skwq_has_sleeper(wq))
3392                         wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
3393                                                 EPOLLWRNORM | EPOLLWRBAND);
3394 
3395                 /* Should agree with poll, otherwise some programs break */
3396                 sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
3397         }
3398 
3399         rcu_read_unlock();
3400 }
3401 
3402 /* An optimised version of sock_def_write_space(), should only be called
3403  * for SOCK_RCU_FREE sockets under RCU read section and after putting
3404  * ->sk_wmem_alloc.
3405  */
3406 static void sock_def_write_space_wfree(struct sock *sk)
3407 {
3408         /* Do not wake up a writer until he can make "significant"
3409          * progress.  --DaveM
3410          */
3411         if (sock_writeable(sk)) {
3412                 struct socket_wq *wq = rcu_dereference(sk->sk_wq);
3413 
3414                 /* rely on refcount_sub from sock_wfree() */
3415                 smp_mb__after_atomic();
3416                 if (wq && waitqueue_active(&wq->wait))
3417                         wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
3418                                                 EPOLLWRNORM | EPOLLWRBAND);
3419 
3420                 /* Should agree with poll, otherwise some programs break */
3421                 sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
3422         }
3423 }
3424 
3425 static void sock_def_destruct(struct sock *sk)
3426 {
3427 }
3428 
3429 void sk_send_sigurg(struct sock *sk)
3430 {
3431         if (sk->sk_socket && sk->sk_socket->file)
3432                 if (send_sigurg(&sk->sk_socket->file->f_owner))
3433                         sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
3434 }
3435 EXPORT_SYMBOL(sk_send_sigurg);
3436 
3437 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
3438                     unsigned long expires)
3439 {
3440         if (!mod_timer(timer, expires))
3441                 sock_hold(sk);
3442 }
3443 EXPORT_SYMBOL(sk_reset_timer);
3444 
3445 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
3446 {
3447         if (del_timer(timer))
3448                 __sock_put(sk);
3449 }
3450 EXPORT_SYMBOL(sk_stop_timer);
3451 
3452 void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
3453 {
3454         if (del_timer_sync(timer))
3455                 __sock_put(sk);
3456 }
3457 EXPORT_SYMBOL(sk_stop_timer_sync);
3458 
3459 void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
3460 {
3461         sk_init_common(sk);
3462         sk->sk_send_head        =       NULL;
3463 
3464         timer_setup(&sk->sk_timer, NULL, 0);
3465 
3466         sk->sk_allocation       =       GFP_KERNEL;
3467         sk->sk_rcvbuf           =       READ_ONCE(sysctl_rmem_default);
3468         sk->sk_sndbuf           =       READ_ONCE(sysctl_wmem_default);
3469         sk->sk_state            =       TCP_CLOSE;
3470         sk->sk_use_task_frag    =       true;
3471         sk_set_socket(sk, sock);
3472 
3473         sock_set_flag(sk, SOCK_ZAPPED);
3474 
3475         if (sock) {
3476                 sk->sk_type     =       sock->type;
3477                 RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
3478                 sock->sk        =       sk;
3479         } else {
3480                 RCU_INIT_POINTER(sk->sk_wq, NULL);
3481         }
3482         sk->sk_uid      =       uid;
3483 
3484         sk->sk_state_change     =       sock_def_wakeup;
3485         sk->sk_data_ready       =       sock_def_readable;
3486         sk->sk_write_space      =       sock_def_write_space;
3487         sk->sk_error_report     =       sock_def_error_report;
3488         sk->sk_destruct         =       sock_def_destruct;
3489 
3490         sk->sk_frag.page        =       NULL;
3491         sk->sk_frag.offset      =       0;
3492         sk->sk_peek_off         =       -1;
3493 
3494         sk->sk_peer_pid         =       NULL;
3495         sk->sk_peer_cred        =       NULL;
3496         spin_lock_init(&sk->sk_peer_lock);
3497 
3498         sk->sk_write_pending    =       0;
3499         sk->sk_rcvlowat         =       1;
3500         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
3501         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
3502 
3503         sk->sk_stamp = SK_DEFAULT_STAMP;
3504 #if BITS_PER_LONG==32
3505         seqlock_init(&sk->sk_stamp_seq);
3506 #endif
3507         atomic_set(&sk->sk_zckey, 0);
3508 
3509 #ifdef CONFIG_NET_RX_BUSY_POLL
3510         sk->sk_napi_id          =       0;
3511         sk->sk_ll_usec          =       READ_ONCE(sysctl_net_busy_read);
3512 #endif
3513 
3514         sk->sk_max_pacing_rate = ~0UL;
3515         sk->sk_pacing_rate = ~0UL;
3516         WRITE_ONCE(sk->sk_pacing_shift, 10);
3517         sk->sk_incoming_cpu = -1;
3518 
3519         sk_rx_queue_clear(sk);
3520         /*
3521          * Before updating sk_refcnt, we must commit prior changes to memory
3522          * (Documentation/RCU/rculist_nulls.rst for details)
3523          */
3524         smp_wmb();
3525         refcount_set(&sk->sk_refcnt, 1);
3526         atomic_set(&sk->sk_drops, 0);
3527 }
3528 EXPORT_SYMBOL(sock_init_data_uid);
3529 
3530 void sock_init_data(struct socket *sock, struct sock *sk)
3531 {
3532         kuid_t uid = sock ?
3533                 SOCK_INODE(sock)->i_uid :
3534                 make_kuid(sock_net(sk)->user_ns, 0);
3535 
3536         sock_init_data_uid(sock, sk, uid);
3537 }
3538 EXPORT_SYMBOL(sock_init_data);
3539 
3540 void lock_sock_nested(struct sock *sk, int subclass)
3541 {
3542         /* The sk_lock has mutex_lock() semantics here. */
3543         mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
3544 
3545         might_sleep();
3546         spin_lock_bh(&sk->sk_lock.slock);
3547         if (sock_owned_by_user_nocheck(sk))
3548                 __lock_sock(sk);
3549         sk->sk_lock.owned = 1;
3550         spin_unlock_bh(&sk->sk_lock.slock);
3551 }
3552 EXPORT_SYMBOL(lock_sock_nested);
3553 
3554 void release_sock(struct sock *sk)
3555 {
3556         spin_lock_bh(&sk->sk_lock.slock);
3557         if (sk->sk_backlog.tail)
3558                 __release_sock(sk);
3559 
3560         if (sk->sk_prot->release_cb)
3561                 INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
3562                                      tcp_release_cb, sk);
3563 
3564         sock_release_ownership(sk);
3565         if (waitqueue_active(&sk->sk_lock.wq))
3566                 wake_up(&sk->sk_lock.wq);
3567         spin_unlock_bh(&sk->sk_lock.slock);
3568 }
3569 EXPORT_SYMBOL(release_sock);
3570 
3571 bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
3572 {
3573         might_sleep();
3574         spin_lock_bh(&sk->sk_lock.slock);
3575 
3576         if (!sock_owned_by_user_nocheck(sk)) {
3577                 /*
3578                  * Fast path return with bottom halves disabled and
3579                  * sock::sk_lock.slock held.
3580                  *
3581                  * The 'mutex' is not contended and holding
3582                  * sock::sk_lock.slock prevents all other lockers to
3583                  * proceed so the corresponding unlock_sock_fast() can
3584                  * avoid the slow path of release_sock() completely and
3585                  * just release slock.
3586                  *
3587                  * From a semantical POV this is equivalent to 'acquiring'
3588                  * the 'mutex', hence the corresponding lockdep
3589                  * mutex_release() has to happen in the fast path of
3590                  * unlock_sock_fast().
3591                  */
3592                 return false;
3593         }
3594 
3595         __lock_sock(sk);
3596         sk->sk_lock.owned = 1;
3597         __acquire(&sk->sk_lock.slock);
3598         spin_unlock_bh(&sk->sk_lock.slock);
3599         return true;
3600 }
3601 EXPORT_SYMBOL(__lock_sock_fast);
3602 
3603 int sock_gettstamp(struct socket *sock, void __user *userstamp,
3604                    bool timeval, bool time32)
3605 {
3606         struct sock *sk = sock->sk;
3607         struct timespec64 ts;
3608 
3609         sock_enable_timestamp(sk, SOCK_TIMESTAMP);
3610         ts = ktime_to_timespec64(sock_read_timestamp(sk));
3611         if (ts.tv_sec == -1)
3612                 return -ENOENT;
3613         if (ts.tv_sec == 0) {
3614                 ktime_t kt = ktime_get_real();
3615                 sock_write_timestamp(sk, kt);
3616                 ts = ktime_to_timespec64(kt);
3617         }
3618 
3619         if (timeval)
3620                 ts.tv_nsec /= 1000;
3621 
3622 #ifdef CONFIG_COMPAT_32BIT_TIME
3623         if (time32)
3624                 return put_old_timespec32(&ts, userstamp);
3625 #endif
3626 #ifdef CONFIG_SPARC64
3627         /* beware of padding in sparc64 timeval */
3628         if (timeval && !in_compat_syscall()) {
3629                 struct __kernel_old_timeval __user tv = {
3630                         .tv_sec = ts.tv_sec,
3631                         .tv_usec = ts.tv_nsec,
3632                 };
3633                 if (copy_to_user(userstamp, &tv, sizeof(tv)))
3634                         return -EFAULT;
3635                 return 0;
3636         }
3637 #endif
3638         return put_timespec64(&ts, userstamp);
3639 }
3640 EXPORT_SYMBOL(sock_gettstamp);
3641 
3642 void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
3643 {
3644         if (!sock_flag(sk, flag)) {
3645                 unsigned long previous_flags = sk->sk_flags;
3646 
3647                 sock_set_flag(sk, flag);
3648                 /*
3649                  * we just set one of the two flags which require net
3650                  * time stamping, but time stamping might have been on
3651                  * already because of the other one
3652                  */
3653                 if (sock_needs_netstamp(sk) &&
3654                     !(previous_flags & SK_FLAGS_TIMESTAMP))
3655                         net_enable_timestamp();
3656         }
3657 }
3658 
3659 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
3660                        int level, int type)
3661 {
3662         struct sock_exterr_skb *serr;
3663         struct sk_buff *skb;
3664         int copied, err;
3665 
3666         err = -EAGAIN;
3667         skb = sock_dequeue_err_skb(sk);
3668         if (skb == NULL)
3669                 goto out;
3670 
3671         copied = skb->len;
3672         if (copied > len) {
3673                 msg->msg_flags |= MSG_TRUNC;
3674                 copied = len;
3675         }
3676         err = skb_copy_datagram_msg(skb, 0, msg, copied);
3677         if (err)
3678                 goto out_free_skb;
3679 
3680         sock_recv_timestamp(msg, sk, skb);
3681 
3682         serr = SKB_EXT_ERR(skb);
3683         put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
3684 
3685         msg->msg_flags |= MSG_ERRQUEUE;
3686         err = copied;
3687 
3688 out_free_skb:
3689         kfree_skb(skb);
3690 out:
3691         return err;
3692 }
3693 EXPORT_SYMBOL(sock_recv_errqueue);
3694 
3695 /*
3696  *      Get a socket option on an socket.
3697  *
3698  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
3699  *      asynchronous errors should be reported by getsockopt. We assume
3700  *      this means if you specify SO_ERROR (otherwise whats the point of it).
3701  */
3702 int sock_common_getsockopt(struct socket *sock, int level, int optname,
3703                            char __user *optval, int __user *optlen)
3704 {
3705         struct sock *sk = sock->sk;
3706 
3707         /* IPV6_ADDRFORM can change sk->sk_prot under us. */
3708         return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen);
3709 }
3710 EXPORT_SYMBOL(sock_common_getsockopt);
3711 
3712 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
3713                         int flags)
3714 {
3715         struct sock *sk = sock->sk;
3716         int addr_len = 0;
3717         int err;
3718 
3719         err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len);
3720         if (err >= 0)
3721                 msg->msg_namelen = addr_len;
3722         return err;
3723 }
3724 EXPORT_SYMBOL(sock_common_recvmsg);
3725 
3726 /*
3727  *      Set socket options on an inet socket.
3728  */
3729 int sock_common_setsockopt(struct socket *sock, int level, int optname,
3730                            sockptr_t optval, unsigned int optlen)
3731 {
3732         struct sock *sk = sock->sk;
3733 
3734         /* IPV6_ADDRFORM can change sk->sk_prot under us. */
3735         return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen);
3736 }
3737 EXPORT_SYMBOL(sock_common_setsockopt);
3738 
3739 void sk_common_release(struct sock *sk)
3740 {
3741         if (sk->sk_prot->destroy)
3742                 sk->sk_prot->destroy(sk);
3743 
3744         /*
3745          * Observation: when sk_common_release is called, processes have
3746          * no access to socket. But net still has.
3747          * Step one, detach it from networking:
3748          *
3749          * A. Remove from hash tables.
3750          */
3751 
3752         sk->sk_prot->unhash(sk);
3753 
3754         if (sk->sk_socket)
3755                 sk->sk_socket->sk = NULL;
3756 
3757         /*
3758          * In this point socket cannot receive new packets, but it is possible
3759          * that some packets are in flight because some CPU runs receiver and
3760          * did hash table lookup before we unhashed socket. They will achieve
3761          * receive queue and will be purged by socket destructor.
3762          *
3763          * Also we still have packets pending on receive queue and probably,
3764          * our own packets waiting in device queues. sock_destroy will drain
3765          * receive queue, but transmitted packets will delay socket destruction
3766          * until the last reference will be released.
3767          */
3768 
3769         sock_orphan(sk);
3770 
3771         xfrm_sk_free_policy(sk);
3772 
3773         sock_put(sk);
3774 }
3775 EXPORT_SYMBOL(sk_common_release);
3776 
3777 void sk_get_meminfo(const struct sock *sk, u32 *mem)
3778 {
3779         memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
3780 
3781         mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3782         mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
3783         mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3784         mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
3785         mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
3786         mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
3787         mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
3788         mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
3789         mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
3790 }
3791 
3792 #ifdef CONFIG_PROC_FS
3793 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
3794 
3795 int sock_prot_inuse_get(struct net *net, struct proto *prot)
3796 {
3797         int cpu, idx = prot->inuse_idx;
3798         int res = 0;
3799 
3800         for_each_possible_cpu(cpu)
3801                 res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
3802 
3803         return res >= 0 ? res : 0;
3804 }
3805 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3806 
3807 int sock_inuse_get(struct net *net)
3808 {
3809         int cpu, res = 0;
3810 
3811         for_each_possible_cpu(cpu)
3812                 res += per_cpu_ptr(net->core.prot_inuse, cpu)->all;
3813 
3814         return res;
3815 }
3816 
3817 EXPORT_SYMBOL_GPL(sock_inuse_get);
3818 
3819 static int __net_init sock_inuse_init_net(struct net *net)
3820 {
3821         net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3822         if (net->core.prot_inuse == NULL)
3823                 return -ENOMEM;
3824         return 0;
3825 }
3826 
3827 static void __net_exit sock_inuse_exit_net(struct net *net)
3828 {
3829         free_percpu(net->core.prot_inuse);
3830 }
3831 
3832 static struct pernet_operations net_inuse_ops = {
3833         .init = sock_inuse_init_net,
3834         .exit = sock_inuse_exit_net,
3835 };
3836 
3837 static __init int net_inuse_init(void)
3838 {
3839         if (register_pernet_subsys(&net_inuse_ops))
3840                 panic("Cannot initialize net inuse counters");
3841 
3842         return 0;
3843 }
3844 
3845 core_initcall(net_inuse_init);
3846 
3847 static int assign_proto_idx(struct proto *prot)
3848 {
3849         prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
3850 
3851         if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3852                 pr_err("PROTO_INUSE_NR exhausted\n");
3853                 return -ENOSPC;
3854         }
3855 
3856         set_bit(prot->inuse_idx, proto_inuse_idx);
3857         return 0;
3858 }
3859 
3860 static void release_proto_idx(struct proto *prot)
3861 {
3862         if (prot->inuse_idx != PROTO_INUSE_NR - 1)
3863                 clear_bit(prot->inuse_idx, proto_inuse_idx);
3864 }
3865 #else
3866 static inline int assign_proto_idx(struct proto *prot)
3867 {
3868         return 0;
3869 }
3870 
3871 static inline void release_proto_idx(struct proto *prot)
3872 {
3873 }
3874 
3875 #endif
3876 
3877 static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
3878 {
3879         if (!twsk_prot)
3880                 return;
3881         kfree(twsk_prot->twsk_slab_name);
3882         twsk_prot->twsk_slab_name = NULL;
3883         kmem_cache_destroy(twsk_prot->twsk_slab);
3884         twsk_prot->twsk_slab = NULL;
3885 }
3886 
3887 static int tw_prot_init(const struct proto *prot)
3888 {
3889         struct timewait_sock_ops *twsk_prot = prot->twsk_prot;
3890 
3891         if (!twsk_prot)
3892                 return 0;
3893 
3894         twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s",
3895                                               prot->name);
3896         if (!twsk_prot->twsk_slab_name)
3897                 return -ENOMEM;
3898 
3899         twsk_prot->twsk_slab =
3900                 kmem_cache_create(twsk_prot->twsk_slab_name,
3901                                   twsk_prot->twsk_obj_size, 0,
3902                                   SLAB_ACCOUNT | prot->slab_flags,
3903                                   NULL);
3904         if (!twsk_prot->twsk_slab) {
3905                 pr_crit("%s: Can't create timewait sock SLAB cache!\n",
3906                         prot->name);
3907                 return -ENOMEM;
3908         }
3909 
3910         return 0;
3911 }
3912 
3913 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
3914 {
3915         if (!rsk_prot)
3916                 return;
3917         kfree(rsk_prot->slab_name);
3918         rsk_prot->slab_name = NULL;
3919         kmem_cache_destroy(rsk_prot->slab);
3920         rsk_prot->slab = NULL;
3921 }
3922 
3923 static int req_prot_init(const struct proto *prot)
3924 {
3925         struct request_sock_ops *rsk_prot = prot->rsk_prot;
3926 
3927         if (!rsk_prot)
3928                 return 0;
3929 
3930         rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
3931                                         prot->name);
3932         if (!rsk_prot->slab_name)
3933                 return -ENOMEM;
3934 
3935         rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
3936                                            rsk_prot->obj_size, 0,
3937                                            SLAB_ACCOUNT | prot->slab_flags,
3938                                            NULL);
3939 
3940         if (!rsk_prot->slab) {
3941                 pr_crit("%s: Can't create request sock SLAB cache!\n",
3942                         prot->name);
3943                 return -ENOMEM;
3944         }
3945         return 0;
3946 }
3947 
3948 int proto_register(struct proto *prot, int alloc_slab)
3949 {
3950         int ret = -ENOBUFS;
3951 
3952         if (prot->memory_allocated && !prot->sysctl_mem) {
3953                 pr_err("%s: missing sysctl_mem\n", prot->name);
3954                 return -EINVAL;
3955         }
3956         if (prot->memory_allocated && !prot->per_cpu_fw_alloc) {
3957                 pr_err("%s: missing per_cpu_fw_alloc\n", prot->name);
3958                 return -EINVAL;
3959         }
3960         if (alloc_slab) {
3961                 prot->slab = kmem_cache_create_usercopy(prot->name,
3962                                         prot->obj_size, 0,
3963                                         SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
3964                                         prot->slab_flags,
3965                                         prot->useroffset, prot->usersize,
3966                                         NULL);
3967 
3968                 if (prot->slab == NULL) {
3969                         pr_crit("%s: Can't create sock SLAB cache!\n",
3970                                 prot->name);
3971                         goto out;
3972                 }
3973 
3974                 if (req_prot_init(prot))
3975                         goto out_free_request_sock_slab;
3976 
3977                 if (tw_prot_init(prot))
3978                         goto out_free_timewait_sock_slab;
3979         }
3980 
3981         mutex_lock(&proto_list_mutex);
3982         ret = assign_proto_idx(prot);
3983         if (ret) {
3984                 mutex_unlock(&proto_list_mutex);
3985                 goto out_free_timewait_sock_slab;
3986         }
3987         list_add(&prot->node, &proto_list);
3988         mutex_unlock(&proto_list_mutex);
3989         return ret;
3990 
3991 out_free_timewait_sock_slab:
3992         if (alloc_slab)
3993                 tw_prot_cleanup(prot->twsk_prot);
3994 out_free_request_sock_slab:
3995         if (alloc_slab) {
3996                 req_prot_cleanup(prot->rsk_prot);
3997 
3998                 kmem_cache_destroy(prot->slab);
3999                 prot->slab = NULL;
4000         }
4001 out:
4002         return ret;
4003 }
4004 EXPORT_SYMBOL(proto_register);
4005 
4006 void proto_unregister(struct proto *prot)
4007 {
4008         mutex_lock(&proto_list_mutex);
4009         release_proto_idx(prot);
4010         list_del(&prot->node);
4011         mutex_unlock(&proto_list_mutex);
4012 
4013         kmem_cache_destroy(prot->slab);
4014         prot->slab = NULL;
4015 
4016         req_prot_cleanup(prot->rsk_prot);
4017         tw_prot_cleanup(prot->twsk_prot);
4018 }
4019 EXPORT_SYMBOL(proto_unregister);
4020 
4021 int sock_load_diag_module(int family, int protocol)
4022 {
4023         if (!protocol) {
4024                 if (!sock_is_registered(family))
4025                         return -ENOENT;
4026 
4027                 return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
4028                                       NETLINK_SOCK_DIAG, family);
4029         }
4030 
4031 #ifdef CONFIG_INET
4032         if (family == AF_INET &&
4033             protocol != IPPROTO_RAW &&
4034             protocol < MAX_INET_PROTOS &&
4035             !rcu_access_pointer(inet_protos[protocol]))
4036                 return -ENOENT;
4037 #endif
4038 
4039         return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
4040                               NETLINK_SOCK_DIAG, family, protocol);
4041 }
4042 EXPORT_SYMBOL(sock_load_diag_module);
4043 
4044 #ifdef CONFIG_PROC_FS
4045 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
4046         __acquires(proto_list_mutex)
4047 {
4048         mutex_lock(&proto_list_mutex);
4049         return seq_list_start_head(&proto_list, *pos);
4050 }
4051 
4052 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4053 {
4054         return seq_list_next(v, &proto_list, pos);
4055 }
4056 
4057 static void proto_seq_stop(struct seq_file *seq, void *v)
4058         __releases(proto_list_mutex)
4059 {
4060         mutex_unlock(&proto_list_mutex);
4061 }
4062 
4063 static char proto_method_implemented(const void *method)
4064 {
4065         return method == NULL ? 'n' : 'y';
4066 }
4067 static long sock_prot_memory_allocated(struct proto *proto)
4068 {
4069         return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
4070 }
4071 
4072 static const char *sock_prot_memory_pressure(struct proto *proto)
4073 {
4074         return proto->memory_pressure != NULL ?
4075         proto_memory_pressure(proto) ? "yes" : "no" : "NI";
4076 }
4077 
4078 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
4079 {
4080 
4081         seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
4082                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
4083                    proto->name,
4084                    proto->obj_size,
4085                    sock_prot_inuse_get(seq_file_net(seq), proto),
4086                    sock_prot_memory_allocated(proto),
4087                    sock_prot_memory_pressure(proto),
4088                    proto->max_header,
4089                    proto->slab == NULL ? "no" : "yes",
4090                    module_name(proto->owner),
4091                    proto_method_implemented(proto->close),
4092                    proto_method_implemented(proto->connect),
4093                    proto_method_implemented(proto->disconnect),
4094                    proto_method_implemented(proto->accept),
4095                    proto_method_implemented(proto->ioctl),
4096                    proto_method_implemented(proto->init),
4097                    proto_method_implemented(proto->destroy),
4098                    proto_method_implemented(proto->shutdown),
4099                    proto_method_implemented(proto->setsockopt),
4100                    proto_method_implemented(proto->getsockopt),
4101                    proto_method_implemented(proto->sendmsg),
4102                    proto_method_implemented(proto->recvmsg),
4103                    proto_method_implemented(proto->bind),
4104                    proto_method_implemented(proto->backlog_rcv),
4105                    proto_method_implemented(proto->hash),
4106                    proto_method_implemented(proto->unhash),
4107                    proto_method_implemented(proto->get_port),
4108                    proto_method_implemented(proto->enter_memory_pressure));
4109 }
4110 
4111 static int proto_seq_show(struct seq_file *seq, void *v)
4112 {
4113         if (v == &proto_list)
4114                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
4115                            "protocol",
4116                            "size",
4117                            "sockets",
4118                            "memory",
4119                            "press",
4120                            "maxhdr",
4121                            "slab",
4122                            "module",
4123                            "cl co di ac io in de sh ss gs se re bi br ha uh gp em\n");
4124         else
4125                 proto_seq_printf(seq, list_entry(v, struct proto, node));
4126         return 0;
4127 }
4128 
4129 static const struct seq_operations proto_seq_ops = {
4130         .start  = proto_seq_start,
4131         .next   = proto_seq_next,
4132         .stop   = proto_seq_stop,
4133         .show   = proto_seq_show,
4134 };
4135 
4136 static __net_init int proto_init_net(struct net *net)
4137 {
4138         if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
4139                         sizeof(struct seq_net_private)))
4140                 return -ENOMEM;
4141 
4142         return 0;
4143 }
4144 
4145 static __net_exit void proto_exit_net(struct net *net)
4146 {
4147         remove_proc_entry("protocols", net->proc_net);
4148 }
4149 
4150 
4151 static __net_initdata struct pernet_operations proto_net_ops = {
4152         .init = proto_init_net,
4153         .exit = proto_exit_net,
4154 };
4155 
4156 static int __init proto_init(void)
4157 {
4158         return register_pernet_subsys(&proto_net_ops);
4159 }
4160 
4161 subsys_initcall(proto_init);
4162 
4163 #endif /* PROC_FS */
4164 
4165 #ifdef CONFIG_NET_RX_BUSY_POLL
4166 bool sk_busy_loop_end(void *p, unsigned long start_time)
4167 {
4168         struct sock *sk = p;
4169 
4170         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
4171                 return true;
4172 
4173         if (sk_is_udp(sk) &&
4174             !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
4175                 return true;
4176 
4177         return sk_busy_loop_timeout(sk, start_time);
4178 }
4179 EXPORT_SYMBOL(sk_busy_loop_end);
4180 #endif /* CONFIG_NET_RX_BUSY_POLL */
4181 
4182 int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
4183 {
4184         if (!sk->sk_prot->bind_add)
4185                 return -EOPNOTSUPP;
4186         return sk->sk_prot->bind_add(sk, addr, addr_len);
4187 }
4188 EXPORT_SYMBOL(sock_bind_add);
4189 
4190 /* Copy 'size' bytes from userspace and return `size` back to userspace */
4191 int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
4192                      void __user *arg, void *karg, size_t size)
4193 {
4194         int ret;
4195 
4196         if (copy_from_user(karg, arg, size))
4197                 return -EFAULT;
4198 
4199         ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg);
4200         if (ret)
4201                 return ret;
4202 
4203         if (copy_to_user(arg, karg, size))
4204                 return -EFAULT;
4205 
4206         return 0;
4207 }
4208 EXPORT_SYMBOL(sock_ioctl_inout);
4209 
4210 /* This is the most common ioctl prep function, where the result (4 bytes) is
4211  * copied back to userspace if the ioctl() returns successfully. No input is
4212  * copied from userspace as input argument.
4213  */
4214 static int sock_ioctl_out(struct sock *sk, unsigned int cmd, void __user *arg)
4215 {
4216         int ret, karg = 0;
4217 
4218         ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg);
4219         if (ret)
4220                 return ret;
4221 
4222         return put_user(karg, (int __user *)arg);
4223 }
4224 
4225 /* A wrapper around sock ioctls, which copies the data from userspace
4226  * (depending on the protocol/ioctl), and copies back the result to userspace.
4227  * The main motivation for this function is to pass kernel memory to the
4228  * protocol ioctl callbacks, instead of userspace memory.
4229  */
4230 int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
4231 {
4232         int rc = 1;
4233 
4234         if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET)
4235                 rc = ipmr_sk_ioctl(sk, cmd, arg);
4236         else if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET6)
4237                 rc = ip6mr_sk_ioctl(sk, cmd, arg);
4238         else if (sk_is_phonet(sk))
4239                 rc = phonet_sk_ioctl(sk, cmd, arg);
4240 
4241         /* If ioctl was processed, returns its value */
4242         if (rc <= 0)
4243                 return rc;
4244 
4245         /* Otherwise call the default handler */
4246         return sock_ioctl_out(sk, cmd, arg);
4247 }
4248 EXPORT_SYMBOL(sk_ioctl);
4249 
4250 static int __init sock_struct_check(void)
4251 {
4252         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_drops);
4253         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_peek_off);
4254         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_error_queue);
4255         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_receive_queue);
4256         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_backlog);
4257 
4258         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst);
4259         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_ifindex);
4260         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_cookie);
4261         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvbuf);
4262         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_filter);
4263         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_wq);
4264         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_data_ready);
4265         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvtimeo);
4266         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvlowat);
4267 
4268         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_err);
4269         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_socket);
4270         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg);
4271 
4272         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock);
4273         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem);
4274         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_forward_alloc);
4275         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_tsflags);
4276 
4277         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
4278         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
4279         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_sndbuf);
4280         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_queued);
4281         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_alloc);
4282         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tsq_flags);
4283         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_send_head);
4284         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_queue);
4285         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_pending);
4286         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_dst_pending_confirm);
4287         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_status);
4288         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_frag);
4289         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_timer);
4290         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_rate);
4291         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_zckey);
4292         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tskey);
4293 
4294         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_max_pacing_rate);
4295         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo);
4296         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority);
4297         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_mark);
4298         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_cache);
4299         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_route_caps);
4300         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_type);
4301         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_size);
4302         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_allocation);
4303         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_txhash);
4304         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_segs);
4305         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_shift);
4306         CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_use_task_frag);
4307         return 0;
4308 }
4309 
4310 core_initcall(sock_struct_check);
4311 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php