~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/netlink/af_netlink.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /*
  3  * NETLINK      Kernel-user communication protocol.
  4  *
  5  *              Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>
  6  *                              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  7  *                              Patrick McHardy <kaber@trash.net>
  8  *
  9  * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
 10  *                               added netlink_proto_exit
 11  * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
 12  *                               use nlk_sk, as sk->protinfo is on a diet 8)
 13  * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
 14  *                               - inc module use count of module that owns
 15  *                                 the kernel socket in case userspace opens
 16  *                                 socket of same protocol
 17  *                               - remove all module support, since netlink is
 18  *                                 mandatory if CONFIG_NET=y these days
 19  */
 20 
 21 #include <linux/module.h>
 22 
 23 #include <linux/bpf.h>
 24 #include <linux/capability.h>
 25 #include <linux/kernel.h>
 26 #include <linux/filter.h>
 27 #include <linux/init.h>
 28 #include <linux/signal.h>
 29 #include <linux/sched.h>
 30 #include <linux/errno.h>
 31 #include <linux/string.h>
 32 #include <linux/stat.h>
 33 #include <linux/socket.h>
 34 #include <linux/un.h>
 35 #include <linux/fcntl.h>
 36 #include <linux/termios.h>
 37 #include <linux/sockios.h>
 38 #include <linux/net.h>
 39 #include <linux/fs.h>
 40 #include <linux/slab.h>
 41 #include <linux/uaccess.h>
 42 #include <linux/skbuff.h>
 43 #include <linux/netdevice.h>
 44 #include <linux/rtnetlink.h>
 45 #include <linux/proc_fs.h>
 46 #include <linux/seq_file.h>
 47 #include <linux/notifier.h>
 48 #include <linux/security.h>
 49 #include <linux/jhash.h>
 50 #include <linux/jiffies.h>
 51 #include <linux/random.h>
 52 #include <linux/bitops.h>
 53 #include <linux/mm.h>
 54 #include <linux/types.h>
 55 #include <linux/audit.h>
 56 #include <linux/mutex.h>
 57 #include <linux/vmalloc.h>
 58 #include <linux/if_arp.h>
 59 #include <linux/rhashtable.h>
 60 #include <asm/cacheflush.h>
 61 #include <linux/hash.h>
 62 #include <linux/net_namespace.h>
 63 #include <linux/nospec.h>
 64 #include <linux/btf_ids.h>
 65 
 66 #include <net/net_namespace.h>
 67 #include <net/netns/generic.h>
 68 #include <net/sock.h>
 69 #include <net/scm.h>
 70 #include <net/netlink.h>
 71 #define CREATE_TRACE_POINTS
 72 #include <trace/events/netlink.h>
 73 
 74 #include "af_netlink.h"
 75 #include "genetlink.h"
 76 
 77 struct listeners {
 78         struct rcu_head         rcu;
 79         unsigned long           masks[];
 80 };
 81 
 82 /* state bits */
 83 #define NETLINK_S_CONGESTED             0x0
 84 
 85 static inline int netlink_is_kernel(struct sock *sk)
 86 {
 87         return nlk_test_bit(KERNEL_SOCKET, sk);
 88 }
 89 
 90 struct netlink_table *nl_table __read_mostly;
 91 EXPORT_SYMBOL_GPL(nl_table);
 92 
 93 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 94 
 95 static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS];
 96 
 97 static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
 98         "nlk_cb_mutex-ROUTE",
 99         "nlk_cb_mutex-1",
100         "nlk_cb_mutex-USERSOCK",
101         "nlk_cb_mutex-FIREWALL",
102         "nlk_cb_mutex-SOCK_DIAG",
103         "nlk_cb_mutex-NFLOG",
104         "nlk_cb_mutex-XFRM",
105         "nlk_cb_mutex-SELINUX",
106         "nlk_cb_mutex-ISCSI",
107         "nlk_cb_mutex-AUDIT",
108         "nlk_cb_mutex-FIB_LOOKUP",
109         "nlk_cb_mutex-CONNECTOR",
110         "nlk_cb_mutex-NETFILTER",
111         "nlk_cb_mutex-IP6_FW",
112         "nlk_cb_mutex-DNRTMSG",
113         "nlk_cb_mutex-KOBJECT_UEVENT",
114         "nlk_cb_mutex-GENERIC",
115         "nlk_cb_mutex-17",
116         "nlk_cb_mutex-SCSITRANSPORT",
117         "nlk_cb_mutex-ECRYPTFS",
118         "nlk_cb_mutex-RDMA",
119         "nlk_cb_mutex-CRYPTO",
120         "nlk_cb_mutex-SMC",
121         "nlk_cb_mutex-23",
122         "nlk_cb_mutex-24",
123         "nlk_cb_mutex-25",
124         "nlk_cb_mutex-26",
125         "nlk_cb_mutex-27",
126         "nlk_cb_mutex-28",
127         "nlk_cb_mutex-29",
128         "nlk_cb_mutex-30",
129         "nlk_cb_mutex-31",
130         "nlk_cb_mutex-MAX_LINKS"
131 };
132 
133 static int netlink_dump(struct sock *sk, bool lock_taken);
134 
135 /* nl_table locking explained:
136  * Lookup and traversal are protected with an RCU read-side lock. Insertion
137  * and removal are protected with per bucket lock while using RCU list
138  * modification primitives and may run in parallel to RCU protected lookups.
139  * Destruction of the Netlink socket may only occur *after* nl_table_lock has
140  * been acquired * either during or after the socket has been removed from
141  * the list and after an RCU grace period.
142  */
143 DEFINE_RWLOCK(nl_table_lock);
144 EXPORT_SYMBOL_GPL(nl_table_lock);
145 static atomic_t nl_table_users = ATOMIC_INIT(0);
146 
147 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
148 
149 static BLOCKING_NOTIFIER_HEAD(netlink_chain);
150 
151 
152 static const struct rhashtable_params netlink_rhashtable_params;
153 
154 void do_trace_netlink_extack(const char *msg)
155 {
156         trace_netlink_extack(msg);
157 }
158 EXPORT_SYMBOL(do_trace_netlink_extack);
159 
160 static inline u32 netlink_group_mask(u32 group)
161 {
162         if (group > 32)
163                 return 0;
164         return group ? 1 << (group - 1) : 0;
165 }
166 
167 static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
168                                            gfp_t gfp_mask)
169 {
170         unsigned int len = skb->len;
171         struct sk_buff *new;
172 
173         new = alloc_skb(len, gfp_mask);
174         if (new == NULL)
175                 return NULL;
176 
177         NETLINK_CB(new).portid = NETLINK_CB(skb).portid;
178         NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group;
179         NETLINK_CB(new).creds = NETLINK_CB(skb).creds;
180 
181         skb_put_data(new, skb->data, len);
182         return new;
183 }
184 
185 static unsigned int netlink_tap_net_id;
186 
187 struct netlink_tap_net {
188         struct list_head netlink_tap_all;
189         struct mutex netlink_tap_lock;
190 };
191 
192 int netlink_add_tap(struct netlink_tap *nt)
193 {
194         struct net *net = dev_net(nt->dev);
195         struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
196 
197         if (unlikely(nt->dev->type != ARPHRD_NETLINK))
198                 return -EINVAL;
199 
200         mutex_lock(&nn->netlink_tap_lock);
201         list_add_rcu(&nt->list, &nn->netlink_tap_all);
202         mutex_unlock(&nn->netlink_tap_lock);
203 
204         __module_get(nt->module);
205 
206         return 0;
207 }
208 EXPORT_SYMBOL_GPL(netlink_add_tap);
209 
210 static int __netlink_remove_tap(struct netlink_tap *nt)
211 {
212         struct net *net = dev_net(nt->dev);
213         struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
214         bool found = false;
215         struct netlink_tap *tmp;
216 
217         mutex_lock(&nn->netlink_tap_lock);
218 
219         list_for_each_entry(tmp, &nn->netlink_tap_all, list) {
220                 if (nt == tmp) {
221                         list_del_rcu(&nt->list);
222                         found = true;
223                         goto out;
224                 }
225         }
226 
227         pr_warn("__netlink_remove_tap: %p not found\n", nt);
228 out:
229         mutex_unlock(&nn->netlink_tap_lock);
230 
231         if (found)
232                 module_put(nt->module);
233 
234         return found ? 0 : -ENODEV;
235 }
236 
237 int netlink_remove_tap(struct netlink_tap *nt)
238 {
239         int ret;
240 
241         ret = __netlink_remove_tap(nt);
242         synchronize_net();
243 
244         return ret;
245 }
246 EXPORT_SYMBOL_GPL(netlink_remove_tap);
247 
248 static __net_init int netlink_tap_init_net(struct net *net)
249 {
250         struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
251 
252         INIT_LIST_HEAD(&nn->netlink_tap_all);
253         mutex_init(&nn->netlink_tap_lock);
254         return 0;
255 }
256 
257 static struct pernet_operations netlink_tap_net_ops = {
258         .init = netlink_tap_init_net,
259         .id   = &netlink_tap_net_id,
260         .size = sizeof(struct netlink_tap_net),
261 };
262 
263 static bool netlink_filter_tap(const struct sk_buff *skb)
264 {
265         struct sock *sk = skb->sk;
266 
267         /* We take the more conservative approach and
268          * whitelist socket protocols that may pass.
269          */
270         switch (sk->sk_protocol) {
271         case NETLINK_ROUTE:
272         case NETLINK_USERSOCK:
273         case NETLINK_SOCK_DIAG:
274         case NETLINK_NFLOG:
275         case NETLINK_XFRM:
276         case NETLINK_FIB_LOOKUP:
277         case NETLINK_NETFILTER:
278         case NETLINK_GENERIC:
279                 return true;
280         }
281 
282         return false;
283 }
284 
285 static int __netlink_deliver_tap_skb(struct sk_buff *skb,
286                                      struct net_device *dev)
287 {
288         struct sk_buff *nskb;
289         struct sock *sk = skb->sk;
290         int ret = -ENOMEM;
291 
292         if (!net_eq(dev_net(dev), sock_net(sk)))
293                 return 0;
294 
295         dev_hold(dev);
296 
297         if (is_vmalloc_addr(skb->head))
298                 nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
299         else
300                 nskb = skb_clone(skb, GFP_ATOMIC);
301         if (nskb) {
302                 nskb->dev = dev;
303                 nskb->protocol = htons((u16) sk->sk_protocol);
304                 nskb->pkt_type = netlink_is_kernel(sk) ?
305                                  PACKET_KERNEL : PACKET_USER;
306                 skb_reset_network_header(nskb);
307                 ret = dev_queue_xmit(nskb);
308                 if (unlikely(ret > 0))
309                         ret = net_xmit_errno(ret);
310         }
311 
312         dev_put(dev);
313         return ret;
314 }
315 
316 static void __netlink_deliver_tap(struct sk_buff *skb, struct netlink_tap_net *nn)
317 {
318         int ret;
319         struct netlink_tap *tmp;
320 
321         if (!netlink_filter_tap(skb))
322                 return;
323 
324         list_for_each_entry_rcu(tmp, &nn->netlink_tap_all, list) {
325                 ret = __netlink_deliver_tap_skb(skb, tmp->dev);
326                 if (unlikely(ret))
327                         break;
328         }
329 }
330 
331 static void netlink_deliver_tap(struct net *net, struct sk_buff *skb)
332 {
333         struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
334 
335         rcu_read_lock();
336 
337         if (unlikely(!list_empty(&nn->netlink_tap_all)))
338                 __netlink_deliver_tap(skb, nn);
339 
340         rcu_read_unlock();
341 }
342 
343 static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src,
344                                        struct sk_buff *skb)
345 {
346         if (!(netlink_is_kernel(dst) && netlink_is_kernel(src)))
347                 netlink_deliver_tap(sock_net(dst), skb);
348 }
349 
350 static void netlink_overrun(struct sock *sk)
351 {
352         if (!nlk_test_bit(RECV_NO_ENOBUFS, sk)) {
353                 if (!test_and_set_bit(NETLINK_S_CONGESTED,
354                                       &nlk_sk(sk)->state)) {
355                         WRITE_ONCE(sk->sk_err, ENOBUFS);
356                         sk_error_report(sk);
357                 }
358         }
359         atomic_inc(&sk->sk_drops);
360 }
361 
362 static void netlink_rcv_wake(struct sock *sk)
363 {
364         struct netlink_sock *nlk = nlk_sk(sk);
365 
366         if (skb_queue_empty_lockless(&sk->sk_receive_queue))
367                 clear_bit(NETLINK_S_CONGESTED, &nlk->state);
368         if (!test_bit(NETLINK_S_CONGESTED, &nlk->state))
369                 wake_up_interruptible(&nlk->wait);
370 }
371 
372 static void netlink_skb_destructor(struct sk_buff *skb)
373 {
374         if (is_vmalloc_addr(skb->head)) {
375                 if (!skb->cloned ||
376                     !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
377                         vfree_atomic(skb->head);
378 
379                 skb->head = NULL;
380         }
381         if (skb->sk != NULL)
382                 sock_rfree(skb);
383 }
384 
385 static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
386 {
387         WARN_ON(skb->sk != NULL);
388         skb->sk = sk;
389         skb->destructor = netlink_skb_destructor;
390         atomic_add(skb->truesize, &sk->sk_rmem_alloc);
391         sk_mem_charge(sk, skb->truesize);
392 }
393 
394 static void netlink_sock_destruct(struct sock *sk)
395 {
396         struct netlink_sock *nlk = nlk_sk(sk);
397 
398         if (nlk->cb_running) {
399                 if (nlk->cb.done)
400                         nlk->cb.done(&nlk->cb);
401                 module_put(nlk->cb.module);
402                 kfree_skb(nlk->cb.skb);
403         }
404 
405         skb_queue_purge(&sk->sk_receive_queue);
406 
407         if (!sock_flag(sk, SOCK_DEAD)) {
408                 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
409                 return;
410         }
411 
412         WARN_ON(atomic_read(&sk->sk_rmem_alloc));
413         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
414         WARN_ON(nlk_sk(sk)->groups);
415 }
416 
417 static void netlink_sock_destruct_work(struct work_struct *work)
418 {
419         struct netlink_sock *nlk = container_of(work, struct netlink_sock,
420                                                 work);
421 
422         sk_free(&nlk->sk);
423 }
424 
425 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
426  * SMP. Look, when several writers sleep and reader wakes them up, all but one
427  * immediately hit write lock and grab all the cpus. Exclusive sleep solves
428  * this, _but_ remember, it adds useless work on UP machines.
429  */
430 
431 void netlink_table_grab(void)
432         __acquires(nl_table_lock)
433 {
434         might_sleep();
435 
436         write_lock_irq(&nl_table_lock);
437 
438         if (atomic_read(&nl_table_users)) {
439                 DECLARE_WAITQUEUE(wait, current);
440 
441                 add_wait_queue_exclusive(&nl_table_wait, &wait);
442                 for (;;) {
443                         set_current_state(TASK_UNINTERRUPTIBLE);
444                         if (atomic_read(&nl_table_users) == 0)
445                                 break;
446                         write_unlock_irq(&nl_table_lock);
447                         schedule();
448                         write_lock_irq(&nl_table_lock);
449                 }
450 
451                 __set_current_state(TASK_RUNNING);
452                 remove_wait_queue(&nl_table_wait, &wait);
453         }
454 }
455 
456 void netlink_table_ungrab(void)
457         __releases(nl_table_lock)
458 {
459         write_unlock_irq(&nl_table_lock);
460         wake_up(&nl_table_wait);
461 }
462 
463 static inline void
464 netlink_lock_table(void)
465 {
466         unsigned long flags;
467 
468         /* read_lock() synchronizes us to netlink_table_grab */
469 
470         read_lock_irqsave(&nl_table_lock, flags);
471         atomic_inc(&nl_table_users);
472         read_unlock_irqrestore(&nl_table_lock, flags);
473 }
474 
475 static inline void
476 netlink_unlock_table(void)
477 {
478         if (atomic_dec_and_test(&nl_table_users))
479                 wake_up(&nl_table_wait);
480 }
481 
482 struct netlink_compare_arg
483 {
484         possible_net_t pnet;
485         u32 portid;
486 };
487 
488 /* Doing sizeof directly may yield 4 extra bytes on 64-bit. */
489 #define netlink_compare_arg_len \
490         (offsetof(struct netlink_compare_arg, portid) + sizeof(u32))
491 
492 static inline int netlink_compare(struct rhashtable_compare_arg *arg,
493                                   const void *ptr)
494 {
495         const struct netlink_compare_arg *x = arg->key;
496         const struct netlink_sock *nlk = ptr;
497 
498         return nlk->portid != x->portid ||
499                !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet));
500 }
501 
502 static void netlink_compare_arg_init(struct netlink_compare_arg *arg,
503                                      struct net *net, u32 portid)
504 {
505         memset(arg, 0, sizeof(*arg));
506         write_pnet(&arg->pnet, net);
507         arg->portid = portid;
508 }
509 
510 static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
511                                      struct net *net)
512 {
513         struct netlink_compare_arg arg;
514 
515         netlink_compare_arg_init(&arg, net, portid);
516         return rhashtable_lookup_fast(&table->hash, &arg,
517                                       netlink_rhashtable_params);
518 }
519 
520 static int __netlink_insert(struct netlink_table *table, struct sock *sk)
521 {
522         struct netlink_compare_arg arg;
523 
524         netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid);
525         return rhashtable_lookup_insert_key(&table->hash, &arg,
526                                             &nlk_sk(sk)->node,
527                                             netlink_rhashtable_params);
528 }
529 
530 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
531 {
532         struct netlink_table *table = &nl_table[protocol];
533         struct sock *sk;
534 
535         rcu_read_lock();
536         sk = __netlink_lookup(table, portid, net);
537         if (sk)
538                 sock_hold(sk);
539         rcu_read_unlock();
540 
541         return sk;
542 }
543 
544 static const struct proto_ops netlink_ops;
545 
546 static void
547 netlink_update_listeners(struct sock *sk)
548 {
549         struct netlink_table *tbl = &nl_table[sk->sk_protocol];
550         unsigned long mask;
551         unsigned int i;
552         struct listeners *listeners;
553 
554         listeners = nl_deref_protected(tbl->listeners);
555         if (!listeners)
556                 return;
557 
558         for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
559                 mask = 0;
560                 sk_for_each_bound(sk, &tbl->mc_list) {
561                         if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
562                                 mask |= nlk_sk(sk)->groups[i];
563                 }
564                 listeners->masks[i] = mask;
565         }
566         /* this function is only called with the netlink table "grabbed", which
567          * makes sure updates are visible before bind or setsockopt return. */
568 }
569 
570 static int netlink_insert(struct sock *sk, u32 portid)
571 {
572         struct netlink_table *table = &nl_table[sk->sk_protocol];
573         int err;
574 
575         lock_sock(sk);
576 
577         err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY;
578         if (nlk_sk(sk)->bound)
579                 goto err;
580 
581         /* portid can be read locklessly from netlink_getname(). */
582         WRITE_ONCE(nlk_sk(sk)->portid, portid);
583 
584         sock_hold(sk);
585 
586         err = __netlink_insert(table, sk);
587         if (err) {
588                 /* In case the hashtable backend returns with -EBUSY
589                  * from here, it must not escape to the caller.
590                  */
591                 if (unlikely(err == -EBUSY))
592                         err = -EOVERFLOW;
593                 if (err == -EEXIST)
594                         err = -EADDRINUSE;
595                 sock_put(sk);
596                 goto err;
597         }
598 
599         /* We need to ensure that the socket is hashed and visible. */
600         smp_wmb();
601         /* Paired with lockless reads from netlink_bind(),
602          * netlink_connect() and netlink_sendmsg().
603          */
604         WRITE_ONCE(nlk_sk(sk)->bound, portid);
605 
606 err:
607         release_sock(sk);
608         return err;
609 }
610 
611 static void netlink_remove(struct sock *sk)
612 {
613         struct netlink_table *table;
614 
615         table = &nl_table[sk->sk_protocol];
616         if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node,
617                                     netlink_rhashtable_params)) {
618                 WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
619                 __sock_put(sk);
620         }
621 
622         netlink_table_grab();
623         if (nlk_sk(sk)->subscriptions) {
624                 __sk_del_bind_node(sk);
625                 netlink_update_listeners(sk);
626         }
627         if (sk->sk_protocol == NETLINK_GENERIC)
628                 atomic_inc(&genl_sk_destructing_cnt);
629         netlink_table_ungrab();
630 }
631 
632 static struct proto netlink_proto = {
633         .name     = "NETLINK",
634         .owner    = THIS_MODULE,
635         .obj_size = sizeof(struct netlink_sock),
636 };
637 
638 static int __netlink_create(struct net *net, struct socket *sock,
639                             int protocol, int kern)
640 {
641         struct sock *sk;
642         struct netlink_sock *nlk;
643 
644         sock->ops = &netlink_ops;
645 
646         sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern);
647         if (!sk)
648                 return -ENOMEM;
649 
650         sock_init_data(sock, sk);
651 
652         nlk = nlk_sk(sk);
653         mutex_init(&nlk->nl_cb_mutex);
654         lockdep_set_class_and_name(&nlk->nl_cb_mutex,
655                                            nlk_cb_mutex_keys + protocol,
656                                            nlk_cb_mutex_key_strings[protocol]);
657         init_waitqueue_head(&nlk->wait);
658 
659         sk->sk_destruct = netlink_sock_destruct;
660         sk->sk_protocol = protocol;
661         return 0;
662 }
663 
664 static int netlink_create(struct net *net, struct socket *sock, int protocol,
665                           int kern)
666 {
667         struct module *module = NULL;
668         struct netlink_sock *nlk;
669         int (*bind)(struct net *net, int group);
670         void (*unbind)(struct net *net, int group);
671         void (*release)(struct sock *sock, unsigned long *groups);
672         int err = 0;
673 
674         sock->state = SS_UNCONNECTED;
675 
676         if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
677                 return -ESOCKTNOSUPPORT;
678 
679         if (protocol < 0 || protocol >= MAX_LINKS)
680                 return -EPROTONOSUPPORT;
681         protocol = array_index_nospec(protocol, MAX_LINKS);
682 
683         netlink_lock_table();
684 #ifdef CONFIG_MODULES
685         if (!nl_table[protocol].registered) {
686                 netlink_unlock_table();
687                 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
688                 netlink_lock_table();
689         }
690 #endif
691         if (nl_table[protocol].registered &&
692             try_module_get(nl_table[protocol].module))
693                 module = nl_table[protocol].module;
694         else
695                 err = -EPROTONOSUPPORT;
696         bind = nl_table[protocol].bind;
697         unbind = nl_table[protocol].unbind;
698         release = nl_table[protocol].release;
699         netlink_unlock_table();
700 
701         if (err < 0)
702                 goto out;
703 
704         err = __netlink_create(net, sock, protocol, kern);
705         if (err < 0)
706                 goto out_module;
707 
708         sock_prot_inuse_add(net, &netlink_proto, 1);
709 
710         nlk = nlk_sk(sock->sk);
711         nlk->module = module;
712         nlk->netlink_bind = bind;
713         nlk->netlink_unbind = unbind;
714         nlk->netlink_release = release;
715 out:
716         return err;
717 
718 out_module:
719         module_put(module);
720         goto out;
721 }
722 
723 static void deferred_put_nlk_sk(struct rcu_head *head)
724 {
725         struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
726         struct sock *sk = &nlk->sk;
727 
728         kfree(nlk->groups);
729         nlk->groups = NULL;
730 
731         if (!refcount_dec_and_test(&sk->sk_refcnt))
732                 return;
733 
734         if (nlk->cb_running && nlk->cb.done) {
735                 INIT_WORK(&nlk->work, netlink_sock_destruct_work);
736                 schedule_work(&nlk->work);
737                 return;
738         }
739 
740         sk_free(sk);
741 }
742 
743 static int netlink_release(struct socket *sock)
744 {
745         struct sock *sk = sock->sk;
746         struct netlink_sock *nlk;
747 
748         if (!sk)
749                 return 0;
750 
751         netlink_remove(sk);
752         sock_orphan(sk);
753         nlk = nlk_sk(sk);
754 
755         /*
756          * OK. Socket is unlinked, any packets that arrive now
757          * will be purged.
758          */
759         if (nlk->netlink_release)
760                 nlk->netlink_release(sk, nlk->groups);
761 
762         /* must not acquire netlink_table_lock in any way again before unbind
763          * and notifying genetlink is done as otherwise it might deadlock
764          */
765         if (nlk->netlink_unbind) {
766                 int i;
767 
768                 for (i = 0; i < nlk->ngroups; i++)
769                         if (test_bit(i, nlk->groups))
770                                 nlk->netlink_unbind(sock_net(sk), i + 1);
771         }
772         if (sk->sk_protocol == NETLINK_GENERIC &&
773             atomic_dec_return(&genl_sk_destructing_cnt) == 0)
774                 wake_up(&genl_sk_destructing_waitq);
775 
776         sock->sk = NULL;
777         wake_up_interruptible_all(&nlk->wait);
778 
779         skb_queue_purge(&sk->sk_write_queue);
780 
781         if (nlk->portid && nlk->bound) {
782                 struct netlink_notify n = {
783                                                 .net = sock_net(sk),
784                                                 .protocol = sk->sk_protocol,
785                                                 .portid = nlk->portid,
786                                           };
787                 blocking_notifier_call_chain(&netlink_chain,
788                                 NETLINK_URELEASE, &n);
789         }
790 
791         module_put(nlk->module);
792 
793         if (netlink_is_kernel(sk)) {
794                 netlink_table_grab();
795                 BUG_ON(nl_table[sk->sk_protocol].registered == 0);
796                 if (--nl_table[sk->sk_protocol].registered == 0) {
797                         struct listeners *old;
798 
799                         old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
800                         RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
801                         kfree_rcu(old, rcu);
802                         nl_table[sk->sk_protocol].module = NULL;
803                         nl_table[sk->sk_protocol].bind = NULL;
804                         nl_table[sk->sk_protocol].unbind = NULL;
805                         nl_table[sk->sk_protocol].flags = 0;
806                         nl_table[sk->sk_protocol].registered = 0;
807                 }
808                 netlink_table_ungrab();
809         }
810 
811         sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
812 
813         /* Because struct net might disappear soon, do not keep a pointer. */
814         if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) {
815                 __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
816                 /* Because of deferred_put_nlk_sk and use of work queue,
817                  * it is possible  netns will be freed before this socket.
818                  */
819                 sock_net_set(sk, &init_net);
820                 __netns_tracker_alloc(&init_net, &sk->ns_tracker,
821                                       false, GFP_KERNEL);
822         }
823         call_rcu(&nlk->rcu, deferred_put_nlk_sk);
824         return 0;
825 }
826 
827 static int netlink_autobind(struct socket *sock)
828 {
829         struct sock *sk = sock->sk;
830         struct net *net = sock_net(sk);
831         struct netlink_table *table = &nl_table[sk->sk_protocol];
832         s32 portid = task_tgid_vnr(current);
833         int err;
834         s32 rover = -4096;
835         bool ok;
836 
837 retry:
838         cond_resched();
839         rcu_read_lock();
840         ok = !__netlink_lookup(table, portid, net);
841         rcu_read_unlock();
842         if (!ok) {
843                 /* Bind collision, search negative portid values. */
844                 if (rover == -4096)
845                         /* rover will be in range [S32_MIN, -4097] */
846                         rover = S32_MIN + get_random_u32_below(-4096 - S32_MIN);
847                 else if (rover >= -4096)
848                         rover = -4097;
849                 portid = rover--;
850                 goto retry;
851         }
852 
853         err = netlink_insert(sk, portid);
854         if (err == -EADDRINUSE)
855                 goto retry;
856 
857         /* If 2 threads race to autobind, that is fine.  */
858         if (err == -EBUSY)
859                 err = 0;
860 
861         return err;
862 }
863 
864 /**
865  * __netlink_ns_capable - General netlink message capability test
866  * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace.
867  * @user_ns: The user namespace of the capability to use
868  * @cap: The capability to use
869  *
870  * Test to see if the opener of the socket we received the message
871  * from had when the netlink socket was created and the sender of the
872  * message has the capability @cap in the user namespace @user_ns.
873  */
874 bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
875                         struct user_namespace *user_ns, int cap)
876 {
877         return ((nsp->flags & NETLINK_SKB_DST) ||
878                 file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) &&
879                 ns_capable(user_ns, cap);
880 }
881 EXPORT_SYMBOL(__netlink_ns_capable);
882 
883 /**
884  * netlink_ns_capable - General netlink message capability test
885  * @skb: socket buffer holding a netlink command from userspace
886  * @user_ns: The user namespace of the capability to use
887  * @cap: The capability to use
888  *
889  * Test to see if the opener of the socket we received the message
890  * from had when the netlink socket was created and the sender of the
891  * message has the capability @cap in the user namespace @user_ns.
892  */
893 bool netlink_ns_capable(const struct sk_buff *skb,
894                         struct user_namespace *user_ns, int cap)
895 {
896         return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap);
897 }
898 EXPORT_SYMBOL(netlink_ns_capable);
899 
900 /**
901  * netlink_capable - Netlink global message capability test
902  * @skb: socket buffer holding a netlink command from userspace
903  * @cap: The capability to use
904  *
905  * Test to see if the opener of the socket we received the message
906  * from had when the netlink socket was created and the sender of the
907  * message has the capability @cap in all user namespaces.
908  */
909 bool netlink_capable(const struct sk_buff *skb, int cap)
910 {
911         return netlink_ns_capable(skb, &init_user_ns, cap);
912 }
913 EXPORT_SYMBOL(netlink_capable);
914 
915 /**
916  * netlink_net_capable - Netlink network namespace message capability test
917  * @skb: socket buffer holding a netlink command from userspace
918  * @cap: The capability to use
919  *
920  * Test to see if the opener of the socket we received the message
921  * from had when the netlink socket was created and the sender of the
922  * message has the capability @cap over the network namespace of
923  * the socket we received the message from.
924  */
925 bool netlink_net_capable(const struct sk_buff *skb, int cap)
926 {
927         return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap);
928 }
929 EXPORT_SYMBOL(netlink_net_capable);
930 
931 static inline int netlink_allowed(const struct socket *sock, unsigned int flag)
932 {
933         return (nl_table[sock->sk->sk_protocol].flags & flag) ||
934                 ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
935 }
936 
937 static void
938 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
939 {
940         struct netlink_sock *nlk = nlk_sk(sk);
941 
942         if (nlk->subscriptions && !subscriptions)
943                 __sk_del_bind_node(sk);
944         else if (!nlk->subscriptions && subscriptions)
945                 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
946         nlk->subscriptions = subscriptions;
947 }
948 
949 static int netlink_realloc_groups(struct sock *sk)
950 {
951         struct netlink_sock *nlk = nlk_sk(sk);
952         unsigned int groups;
953         unsigned long *new_groups;
954         int err = 0;
955 
956         netlink_table_grab();
957 
958         groups = nl_table[sk->sk_protocol].groups;
959         if (!nl_table[sk->sk_protocol].registered) {
960                 err = -ENOENT;
961                 goto out_unlock;
962         }
963 
964         if (nlk->ngroups >= groups)
965                 goto out_unlock;
966 
967         new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
968         if (new_groups == NULL) {
969                 err = -ENOMEM;
970                 goto out_unlock;
971         }
972         memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
973                NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
974 
975         nlk->groups = new_groups;
976         nlk->ngroups = groups;
977  out_unlock:
978         netlink_table_ungrab();
979         return err;
980 }
981 
982 static void netlink_undo_bind(int group, long unsigned int groups,
983                               struct sock *sk)
984 {
985         struct netlink_sock *nlk = nlk_sk(sk);
986         int undo;
987 
988         if (!nlk->netlink_unbind)
989                 return;
990 
991         for (undo = 0; undo < group; undo++)
992                 if (test_bit(undo, &groups))
993                         nlk->netlink_unbind(sock_net(sk), undo + 1);
994 }
995 
996 static int netlink_bind(struct socket *sock, struct sockaddr *addr,
997                         int addr_len)
998 {
999         struct sock *sk = sock->sk;
1000         struct net *net = sock_net(sk);
1001         struct netlink_sock *nlk = nlk_sk(sk);
1002         struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1003         int err = 0;
1004         unsigned long groups;
1005         bool bound;
1006 
1007         if (addr_len < sizeof(struct sockaddr_nl))
1008                 return -EINVAL;
1009 
1010         if (nladdr->nl_family != AF_NETLINK)
1011                 return -EINVAL;
1012         groups = nladdr->nl_groups;
1013 
1014         /* Only superuser is allowed to listen multicasts */
1015         if (groups) {
1016                 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
1017                         return -EPERM;
1018                 err = netlink_realloc_groups(sk);
1019                 if (err)
1020                         return err;
1021         }
1022 
1023         if (nlk->ngroups < BITS_PER_LONG)
1024                 groups &= (1UL << nlk->ngroups) - 1;
1025 
1026         /* Paired with WRITE_ONCE() in netlink_insert() */
1027         bound = READ_ONCE(nlk->bound);
1028         if (bound) {
1029                 /* Ensure nlk->portid is up-to-date. */
1030                 smp_rmb();
1031 
1032                 if (nladdr->nl_pid != nlk->portid)
1033                         return -EINVAL;
1034         }
1035 
1036         if (nlk->netlink_bind && groups) {
1037                 int group;
1038 
1039                 /* nl_groups is a u32, so cap the maximum groups we can bind */
1040                 for (group = 0; group < BITS_PER_TYPE(u32); group++) {
1041                         if (!test_bit(group, &groups))
1042                                 continue;
1043                         err = nlk->netlink_bind(net, group + 1);
1044                         if (!err)
1045                                 continue;
1046                         netlink_undo_bind(group, groups, sk);
1047                         return err;
1048                 }
1049         }
1050 
1051         /* No need for barriers here as we return to user-space without
1052          * using any of the bound attributes.
1053          */
1054         netlink_lock_table();
1055         if (!bound) {
1056                 err = nladdr->nl_pid ?
1057                         netlink_insert(sk, nladdr->nl_pid) :
1058                         netlink_autobind(sock);
1059                 if (err) {
1060                         netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk);
1061                         goto unlock;
1062                 }
1063         }
1064 
1065         if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
1066                 goto unlock;
1067         netlink_unlock_table();
1068 
1069         netlink_table_grab();
1070         netlink_update_subscriptions(sk, nlk->subscriptions +
1071                                          hweight32(groups) -
1072                                          hweight32(nlk->groups[0]));
1073         nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups;
1074         netlink_update_listeners(sk);
1075         netlink_table_ungrab();
1076 
1077         return 0;
1078 
1079 unlock:
1080         netlink_unlock_table();
1081         return err;
1082 }
1083 
1084 static int netlink_connect(struct socket *sock, struct sockaddr *addr,
1085                            int alen, int flags)
1086 {
1087         int err = 0;
1088         struct sock *sk = sock->sk;
1089         struct netlink_sock *nlk = nlk_sk(sk);
1090         struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1091 
1092         if (alen < sizeof(addr->sa_family))
1093                 return -EINVAL;
1094 
1095         if (addr->sa_family == AF_UNSPEC) {
1096                 /* paired with READ_ONCE() in netlink_getsockbyportid() */
1097                 WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED);
1098                 /* dst_portid and dst_group can be read locklessly */
1099                 WRITE_ONCE(nlk->dst_portid, 0);
1100                 WRITE_ONCE(nlk->dst_group, 0);
1101                 return 0;
1102         }
1103         if (addr->sa_family != AF_NETLINK)
1104                 return -EINVAL;
1105 
1106         if (alen < sizeof(struct sockaddr_nl))
1107                 return -EINVAL;
1108 
1109         if ((nladdr->nl_groups || nladdr->nl_pid) &&
1110             !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
1111                 return -EPERM;
1112 
1113         /* No need for barriers here as we return to user-space without
1114          * using any of the bound attributes.
1115          * Paired with WRITE_ONCE() in netlink_insert().
1116          */
1117         if (!READ_ONCE(nlk->bound))
1118                 err = netlink_autobind(sock);
1119 
1120         if (err == 0) {
1121                 /* paired with READ_ONCE() in netlink_getsockbyportid() */
1122                 WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED);
1123                 /* dst_portid and dst_group can be read locklessly */
1124                 WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid);
1125                 WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups));
1126         }
1127 
1128         return err;
1129 }
1130 
1131 static int netlink_getname(struct socket *sock, struct sockaddr *addr,
1132                            int peer)
1133 {
1134         struct sock *sk = sock->sk;
1135         struct netlink_sock *nlk = nlk_sk(sk);
1136         DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
1137 
1138         nladdr->nl_family = AF_NETLINK;
1139         nladdr->nl_pad = 0;
1140 
1141         if (peer) {
1142                 /* Paired with WRITE_ONCE() in netlink_connect() */
1143                 nladdr->nl_pid = READ_ONCE(nlk->dst_portid);
1144                 nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group));
1145         } else {
1146                 /* Paired with WRITE_ONCE() in netlink_insert() */
1147                 nladdr->nl_pid = READ_ONCE(nlk->portid);
1148                 netlink_lock_table();
1149                 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
1150                 netlink_unlock_table();
1151         }
1152         return sizeof(*nladdr);
1153 }
1154 
1155 static int netlink_ioctl(struct socket *sock, unsigned int cmd,
1156                          unsigned long arg)
1157 {
1158         /* try to hand this ioctl down to the NIC drivers.
1159          */
1160         return -ENOIOCTLCMD;
1161 }
1162 
1163 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
1164 {
1165         struct sock *sock;
1166         struct netlink_sock *nlk;
1167 
1168         sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
1169         if (!sock)
1170                 return ERR_PTR(-ECONNREFUSED);
1171 
1172         /* Don't bother queuing skb if kernel socket has no input function */
1173         nlk = nlk_sk(sock);
1174         /* dst_portid and sk_state can be changed in netlink_connect() */
1175         if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED &&
1176             READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) {
1177                 sock_put(sock);
1178                 return ERR_PTR(-ECONNREFUSED);
1179         }
1180         return sock;
1181 }
1182 
1183 struct sock *netlink_getsockbyfilp(struct file *filp)
1184 {
1185         struct inode *inode = file_inode(filp);
1186         struct sock *sock;
1187 
1188         if (!S_ISSOCK(inode->i_mode))
1189                 return ERR_PTR(-ENOTSOCK);
1190 
1191         sock = SOCKET_I(inode)->sk;
1192         if (sock->sk_family != AF_NETLINK)
1193                 return ERR_PTR(-EINVAL);
1194 
1195         sock_hold(sock);
1196         return sock;
1197 }
1198 
1199 struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
1200 {
1201         size_t head_size = SKB_HEAD_ALIGN(size);
1202         struct sk_buff *skb;
1203         void *data;
1204 
1205         if (head_size <= PAGE_SIZE || broadcast)
1206                 return alloc_skb(size, GFP_KERNEL);
1207 
1208         data = kvmalloc(head_size, GFP_KERNEL);
1209         if (!data)
1210                 return NULL;
1211 
1212         skb = __build_skb(data, head_size);
1213         if (!skb)
1214                 kvfree(data);
1215         else if (is_vmalloc_addr(data))
1216                 skb->destructor = netlink_skb_destructor;
1217 
1218         return skb;
1219 }
1220 
1221 /*
1222  * Attach a skb to a netlink socket.
1223  * The caller must hold a reference to the destination socket. On error, the
1224  * reference is dropped. The skb is not send to the destination, just all
1225  * all error checks are performed and memory in the queue is reserved.
1226  * Return values:
1227  * < 0: error. skb freed, reference to sock dropped.
1228  * 0: continue
1229  * 1: repeat lookup - reference dropped while waiting for socket memory.
1230  */
1231 int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
1232                       long *timeo, struct sock *ssk)
1233 {
1234         struct netlink_sock *nlk;
1235 
1236         nlk = nlk_sk(sk);
1237 
1238         if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1239              test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
1240                 DECLARE_WAITQUEUE(wait, current);
1241                 if (!*timeo) {
1242                         if (!ssk || netlink_is_kernel(ssk))
1243                                 netlink_overrun(sk);
1244                         sock_put(sk);
1245                         kfree_skb(skb);
1246                         return -EAGAIN;
1247                 }
1248 
1249                 __set_current_state(TASK_INTERRUPTIBLE);
1250                 add_wait_queue(&nlk->wait, &wait);
1251 
1252                 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1253                      test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
1254                     !sock_flag(sk, SOCK_DEAD))
1255                         *timeo = schedule_timeout(*timeo);
1256 
1257                 __set_current_state(TASK_RUNNING);
1258                 remove_wait_queue(&nlk->wait, &wait);
1259                 sock_put(sk);
1260 
1261                 if (signal_pending(current)) {
1262                         kfree_skb(skb);
1263                         return sock_intr_errno(*timeo);
1264                 }
1265                 return 1;
1266         }
1267         netlink_skb_set_owner_r(skb, sk);
1268         return 0;
1269 }
1270 
1271 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1272 {
1273         int len = skb->len;
1274 
1275         netlink_deliver_tap(sock_net(sk), skb);
1276 
1277         skb_queue_tail(&sk->sk_receive_queue, skb);
1278         sk->sk_data_ready(sk);
1279         return len;
1280 }
1281 
1282 int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1283 {
1284         int len = __netlink_sendskb(sk, skb);
1285 
1286         sock_put(sk);
1287         return len;
1288 }
1289 
1290 void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
1291 {
1292         kfree_skb(skb);
1293         sock_put(sk);
1294 }
1295 
1296 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
1297 {
1298         int delta;
1299 
1300         WARN_ON(skb->sk != NULL);
1301         delta = skb->end - skb->tail;
1302         if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
1303                 return skb;
1304 
1305         if (skb_shared(skb)) {
1306                 struct sk_buff *nskb = skb_clone(skb, allocation);
1307                 if (!nskb)
1308                         return skb;
1309                 consume_skb(skb);
1310                 skb = nskb;
1311         }
1312 
1313         pskb_expand_head(skb, 0, -delta,
1314                          (allocation & ~__GFP_DIRECT_RECLAIM) |
1315                          __GFP_NOWARN | __GFP_NORETRY);
1316         return skb;
1317 }
1318 
1319 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
1320                                   struct sock *ssk)
1321 {
1322         int ret;
1323         struct netlink_sock *nlk = nlk_sk(sk);
1324 
1325         ret = -ECONNREFUSED;
1326         if (nlk->netlink_rcv != NULL) {
1327                 ret = skb->len;
1328                 netlink_skb_set_owner_r(skb, sk);
1329                 NETLINK_CB(skb).sk = ssk;
1330                 netlink_deliver_tap_kernel(sk, ssk, skb);
1331                 nlk->netlink_rcv(skb);
1332                 consume_skb(skb);
1333         } else {
1334                 kfree_skb(skb);
1335         }
1336         sock_put(sk);
1337         return ret;
1338 }
1339 
1340 int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
1341                     u32 portid, int nonblock)
1342 {
1343         struct sock *sk;
1344         int err;
1345         long timeo;
1346 
1347         skb = netlink_trim(skb, gfp_any());
1348 
1349         timeo = sock_sndtimeo(ssk, nonblock);
1350 retry:
1351         sk = netlink_getsockbyportid(ssk, portid);
1352         if (IS_ERR(sk)) {
1353                 kfree_skb(skb);
1354                 return PTR_ERR(sk);
1355         }
1356         if (netlink_is_kernel(sk))
1357                 return netlink_unicast_kernel(sk, skb, ssk);
1358 
1359         if (sk_filter(sk, skb)) {
1360                 err = skb->len;
1361                 kfree_skb(skb);
1362                 sock_put(sk);
1363                 return err;
1364         }
1365 
1366         err = netlink_attachskb(sk, skb, &timeo, ssk);
1367         if (err == 1)
1368                 goto retry;
1369         if (err)
1370                 return err;
1371 
1372         return netlink_sendskb(sk, skb);
1373 }
1374 EXPORT_SYMBOL(netlink_unicast);
1375 
1376 int netlink_has_listeners(struct sock *sk, unsigned int group)
1377 {
1378         int res = 0;
1379         struct listeners *listeners;
1380 
1381         BUG_ON(!netlink_is_kernel(sk));
1382 
1383         rcu_read_lock();
1384         listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
1385 
1386         if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
1387                 res = test_bit(group - 1, listeners->masks);
1388 
1389         rcu_read_unlock();
1390 
1391         return res;
1392 }
1393 EXPORT_SYMBOL_GPL(netlink_has_listeners);
1394 
1395 bool netlink_strict_get_check(struct sk_buff *skb)
1396 {
1397         return nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
1398 }
1399 EXPORT_SYMBOL_GPL(netlink_strict_get_check);
1400 
1401 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1402 {
1403         struct netlink_sock *nlk = nlk_sk(sk);
1404 
1405         if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
1406             !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
1407                 netlink_skb_set_owner_r(skb, sk);
1408                 __netlink_sendskb(sk, skb);
1409                 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
1410         }
1411         return -1;
1412 }
1413 
1414 struct netlink_broadcast_data {
1415         struct sock *exclude_sk;
1416         struct net *net;
1417         u32 portid;
1418         u32 group;
1419         int failure;
1420         int delivery_failure;
1421         int congested;
1422         int delivered;
1423         gfp_t allocation;
1424         struct sk_buff *skb, *skb2;
1425         int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
1426         void *tx_data;
1427 };
1428 
1429 static void do_one_broadcast(struct sock *sk,
1430                                     struct netlink_broadcast_data *p)
1431 {
1432         struct netlink_sock *nlk = nlk_sk(sk);
1433         int val;
1434 
1435         if (p->exclude_sk == sk)
1436                 return;
1437 
1438         if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1439             !test_bit(p->group - 1, nlk->groups))
1440                 return;
1441 
1442         if (!net_eq(sock_net(sk), p->net)) {
1443                 if (!nlk_test_bit(LISTEN_ALL_NSID, sk))
1444                         return;
1445 
1446                 if (!peernet_has_id(sock_net(sk), p->net))
1447                         return;
1448 
1449                 if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns,
1450                                      CAP_NET_BROADCAST))
1451                         return;
1452         }
1453 
1454         if (p->failure) {
1455                 netlink_overrun(sk);
1456                 return;
1457         }
1458 
1459         sock_hold(sk);
1460         if (p->skb2 == NULL) {
1461                 if (skb_shared(p->skb)) {
1462                         p->skb2 = skb_clone(p->skb, p->allocation);
1463                 } else {
1464                         p->skb2 = skb_get(p->skb);
1465                         /*
1466                          * skb ownership may have been set when
1467                          * delivered to a previous socket.
1468                          */
1469                         skb_orphan(p->skb2);
1470                 }
1471         }
1472         if (p->skb2 == NULL) {
1473                 netlink_overrun(sk);
1474                 /* Clone failed. Notify ALL listeners. */
1475                 p->failure = 1;
1476                 if (nlk_test_bit(BROADCAST_SEND_ERROR, sk))
1477                         p->delivery_failure = 1;
1478                 goto out;
1479         }
1480 
1481         if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1482                 kfree_skb(p->skb2);
1483                 p->skb2 = NULL;
1484                 goto out;
1485         }
1486 
1487         if (sk_filter(sk, p->skb2)) {
1488                 kfree_skb(p->skb2);
1489                 p->skb2 = NULL;
1490                 goto out;
1491         }
1492         NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net);
1493         if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED)
1494                 NETLINK_CB(p->skb2).nsid_is_set = true;
1495         val = netlink_broadcast_deliver(sk, p->skb2);
1496         if (val < 0) {
1497                 netlink_overrun(sk);
1498                 if (nlk_test_bit(BROADCAST_SEND_ERROR, sk))
1499                         p->delivery_failure = 1;
1500         } else {
1501                 p->congested |= val;
1502                 p->delivered = 1;
1503                 p->skb2 = NULL;
1504         }
1505 out:
1506         sock_put(sk);
1507 }
1508 
1509 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
1510                                u32 portid,
1511                                u32 group, gfp_t allocation,
1512                                netlink_filter_fn filter,
1513                                void *filter_data)
1514 {
1515         struct net *net = sock_net(ssk);
1516         struct netlink_broadcast_data info;
1517         struct sock *sk;
1518 
1519         skb = netlink_trim(skb, allocation);
1520 
1521         info.exclude_sk = ssk;
1522         info.net = net;
1523         info.portid = portid;
1524         info.group = group;
1525         info.failure = 0;
1526         info.delivery_failure = 0;
1527         info.congested = 0;
1528         info.delivered = 0;
1529         info.allocation = allocation;
1530         info.skb = skb;
1531         info.skb2 = NULL;
1532         info.tx_filter = filter;
1533         info.tx_data = filter_data;
1534 
1535         /* While we sleep in clone, do not allow to change socket list */
1536 
1537         netlink_lock_table();
1538 
1539         sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
1540                 do_one_broadcast(sk, &info);
1541 
1542         consume_skb(skb);
1543 
1544         netlink_unlock_table();
1545 
1546         if (info.delivery_failure) {
1547                 kfree_skb(info.skb2);
1548                 return -ENOBUFS;
1549         }
1550         consume_skb(info.skb2);
1551 
1552         if (info.delivered) {
1553                 if (info.congested && gfpflags_allow_blocking(allocation))
1554                         yield();
1555                 return 0;
1556         }
1557         return -ESRCH;
1558 }
1559 EXPORT_SYMBOL(netlink_broadcast_filtered);
1560 
1561 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
1562                       u32 group, gfp_t allocation)
1563 {
1564         return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
1565                                           NULL, NULL);
1566 }
1567 EXPORT_SYMBOL(netlink_broadcast);
1568 
1569 struct netlink_set_err_data {
1570         struct sock *exclude_sk;
1571         u32 portid;
1572         u32 group;
1573         int code;
1574 };
1575 
1576 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
1577 {
1578         struct netlink_sock *nlk = nlk_sk(sk);
1579         int ret = 0;
1580 
1581         if (sk == p->exclude_sk)
1582                 goto out;
1583 
1584         if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1585                 goto out;
1586 
1587         if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1588             !test_bit(p->group - 1, nlk->groups))
1589                 goto out;
1590 
1591         if (p->code == ENOBUFS && nlk_test_bit(RECV_NO_ENOBUFS, sk)) {
1592                 ret = 1;
1593                 goto out;
1594         }
1595 
1596         WRITE_ONCE(sk->sk_err, p->code);
1597         sk_error_report(sk);
1598 out:
1599         return ret;
1600 }
1601 
1602 /**
1603  * netlink_set_err - report error to broadcast listeners
1604  * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
1605  * @portid: the PORTID of a process that we want to skip (if any)
1606  * @group: the broadcast group that will notice the error
1607  * @code: error code, must be negative (as usual in kernelspace)
1608  *
1609  * This function returns the number of broadcast listeners that have set the
1610  * NETLINK_NO_ENOBUFS socket option.
1611  */
1612 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
1613 {
1614         struct netlink_set_err_data info;
1615         unsigned long flags;
1616         struct sock *sk;
1617         int ret = 0;
1618 
1619         info.exclude_sk = ssk;
1620         info.portid = portid;
1621         info.group = group;
1622         /* sk->sk_err wants a positive error value */
1623         info.code = -code;
1624 
1625         read_lock_irqsave(&nl_table_lock, flags);
1626 
1627         sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
1628                 ret += do_one_set_err(sk, &info);
1629 
1630         read_unlock_irqrestore(&nl_table_lock, flags);
1631         return ret;
1632 }
1633 EXPORT_SYMBOL(netlink_set_err);
1634 
1635 /* must be called with netlink table grabbed */
1636 static void netlink_update_socket_mc(struct netlink_sock *nlk,
1637                                      unsigned int group,
1638                                      int is_new)
1639 {
1640         int old, new = !!is_new, subscriptions;
1641 
1642         old = test_bit(group - 1, nlk->groups);
1643         subscriptions = nlk->subscriptions - old + new;
1644         __assign_bit(group - 1, nlk->groups, new);
1645         netlink_update_subscriptions(&nlk->sk, subscriptions);
1646         netlink_update_listeners(&nlk->sk);
1647 }
1648 
1649 static int netlink_setsockopt(struct socket *sock, int level, int optname,
1650                               sockptr_t optval, unsigned int optlen)
1651 {
1652         struct sock *sk = sock->sk;
1653         struct netlink_sock *nlk = nlk_sk(sk);
1654         unsigned int val = 0;
1655         int nr = -1;
1656 
1657         if (level != SOL_NETLINK)
1658                 return -ENOPROTOOPT;
1659 
1660         if (optlen >= sizeof(int) &&
1661             copy_from_sockptr(&val, optval, sizeof(val)))
1662                 return -EFAULT;
1663 
1664         switch (optname) {
1665         case NETLINK_PKTINFO:
1666                 nr = NETLINK_F_RECV_PKTINFO;
1667                 break;
1668         case NETLINK_ADD_MEMBERSHIP:
1669         case NETLINK_DROP_MEMBERSHIP: {
1670                 int err;
1671 
1672                 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
1673                         return -EPERM;
1674                 err = netlink_realloc_groups(sk);
1675                 if (err)
1676                         return err;
1677                 if (!val || val - 1 >= nlk->ngroups)
1678                         return -EINVAL;
1679                 if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) {
1680                         err = nlk->netlink_bind(sock_net(sk), val);
1681                         if (err)
1682                                 return err;
1683                 }
1684                 netlink_table_grab();
1685                 netlink_update_socket_mc(nlk, val,
1686                                          optname == NETLINK_ADD_MEMBERSHIP);
1687                 netlink_table_ungrab();
1688                 if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
1689                         nlk->netlink_unbind(sock_net(sk), val);
1690 
1691                 break;
1692         }
1693         case NETLINK_BROADCAST_ERROR:
1694                 nr = NETLINK_F_BROADCAST_SEND_ERROR;
1695                 break;
1696         case NETLINK_NO_ENOBUFS:
1697                 assign_bit(NETLINK_F_RECV_NO_ENOBUFS, &nlk->flags, val);
1698                 if (val) {
1699                         clear_bit(NETLINK_S_CONGESTED, &nlk->state);
1700                         wake_up_interruptible(&nlk->wait);
1701                 }
1702                 break;
1703         case NETLINK_LISTEN_ALL_NSID:
1704                 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
1705                         return -EPERM;
1706                 nr = NETLINK_F_LISTEN_ALL_NSID;
1707                 break;
1708         case NETLINK_CAP_ACK:
1709                 nr = NETLINK_F_CAP_ACK;
1710                 break;
1711         case NETLINK_EXT_ACK:
1712                 nr = NETLINK_F_EXT_ACK;
1713                 break;
1714         case NETLINK_GET_STRICT_CHK:
1715                 nr = NETLINK_F_STRICT_CHK;
1716                 break;
1717         default:
1718                 return -ENOPROTOOPT;
1719         }
1720         if (nr >= 0)
1721                 assign_bit(nr, &nlk->flags, val);
1722         return 0;
1723 }
1724 
1725 static int netlink_getsockopt(struct socket *sock, int level, int optname,
1726                               char __user *optval, int __user *optlen)
1727 {
1728         struct sock *sk = sock->sk;
1729         struct netlink_sock *nlk = nlk_sk(sk);
1730         unsigned int flag;
1731         int len, val;
1732 
1733         if (level != SOL_NETLINK)
1734                 return -ENOPROTOOPT;
1735 
1736         if (get_user(len, optlen))
1737                 return -EFAULT;
1738         if (len < 0)
1739                 return -EINVAL;
1740 
1741         switch (optname) {
1742         case NETLINK_PKTINFO:
1743                 flag = NETLINK_F_RECV_PKTINFO;
1744                 break;
1745         case NETLINK_BROADCAST_ERROR:
1746                 flag = NETLINK_F_BROADCAST_SEND_ERROR;
1747                 break;
1748         case NETLINK_NO_ENOBUFS:
1749                 flag = NETLINK_F_RECV_NO_ENOBUFS;
1750                 break;
1751         case NETLINK_LIST_MEMBERSHIPS: {
1752                 int pos, idx, shift, err = 0;
1753 
1754                 netlink_lock_table();
1755                 for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
1756                         if (len - pos < sizeof(u32))
1757                                 break;
1758 
1759                         idx = pos / sizeof(unsigned long);
1760                         shift = (pos % sizeof(unsigned long)) * 8;
1761                         if (put_user((u32)(nlk->groups[idx] >> shift),
1762                                      (u32 __user *)(optval + pos))) {
1763                                 err = -EFAULT;
1764                                 break;
1765                         }
1766                 }
1767                 if (put_user(ALIGN(BITS_TO_BYTES(nlk->ngroups), sizeof(u32)), optlen))
1768                         err = -EFAULT;
1769                 netlink_unlock_table();
1770                 return err;
1771         }
1772         case NETLINK_LISTEN_ALL_NSID:
1773                 flag = NETLINK_F_LISTEN_ALL_NSID;
1774                 break;
1775         case NETLINK_CAP_ACK:
1776                 flag = NETLINK_F_CAP_ACK;
1777                 break;
1778         case NETLINK_EXT_ACK:
1779                 flag = NETLINK_F_EXT_ACK;
1780                 break;
1781         case NETLINK_GET_STRICT_CHK:
1782                 flag = NETLINK_F_STRICT_CHK;
1783                 break;
1784         default:
1785                 return -ENOPROTOOPT;
1786         }
1787 
1788         if (len < sizeof(int))
1789                 return -EINVAL;
1790 
1791         len = sizeof(int);
1792         val = test_bit(flag, &nlk->flags);
1793 
1794         if (put_user(len, optlen) ||
1795             copy_to_user(optval, &val, len))
1796                 return -EFAULT;
1797 
1798         return 0;
1799 }
1800 
1801 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1802 {
1803         struct nl_pktinfo info;
1804 
1805         info.group = NETLINK_CB(skb).dst_group;
1806         put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1807 }
1808 
1809 static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg,
1810                                          struct sk_buff *skb)
1811 {
1812         if (!NETLINK_CB(skb).nsid_is_set)
1813                 return;
1814 
1815         put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int),
1816                  &NETLINK_CB(skb).nsid);
1817 }
1818 
1819 static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1820 {
1821         struct sock *sk = sock->sk;
1822         struct netlink_sock *nlk = nlk_sk(sk);
1823         DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
1824         u32 dst_portid;
1825         u32 dst_group;
1826         struct sk_buff *skb;
1827         int err;
1828         struct scm_cookie scm;
1829         u32 netlink_skb_flags = 0;
1830 
1831         if (msg->msg_flags & MSG_OOB)
1832                 return -EOPNOTSUPP;
1833 
1834         if (len == 0) {
1835                 pr_warn_once("Zero length message leads to an empty skb\n");
1836                 return -ENODATA;
1837         }
1838 
1839         err = scm_send(sock, msg, &scm, true);
1840         if (err < 0)
1841                 return err;
1842 
1843         if (msg->msg_namelen) {
1844                 err = -EINVAL;
1845                 if (msg->msg_namelen < sizeof(struct sockaddr_nl))
1846                         goto out;
1847                 if (addr->nl_family != AF_NETLINK)
1848                         goto out;
1849                 dst_portid = addr->nl_pid;
1850                 dst_group = ffs(addr->nl_groups);
1851                 err =  -EPERM;
1852                 if ((dst_group || dst_portid) &&
1853                     !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
1854                         goto out;
1855                 netlink_skb_flags |= NETLINK_SKB_DST;
1856         } else {
1857                 /* Paired with WRITE_ONCE() in netlink_connect() */
1858                 dst_portid = READ_ONCE(nlk->dst_portid);
1859                 dst_group = READ_ONCE(nlk->dst_group);
1860         }
1861 
1862         /* Paired with WRITE_ONCE() in netlink_insert() */
1863         if (!READ_ONCE(nlk->bound)) {
1864                 err = netlink_autobind(sock);
1865                 if (err)
1866                         goto out;
1867         } else {
1868                 /* Ensure nlk is hashed and visible. */
1869                 smp_rmb();
1870         }
1871 
1872         err = -EMSGSIZE;
1873         if (len > sk->sk_sndbuf - 32)
1874                 goto out;
1875         err = -ENOBUFS;
1876         skb = netlink_alloc_large_skb(len, dst_group);
1877         if (skb == NULL)
1878                 goto out;
1879 
1880         NETLINK_CB(skb).portid  = nlk->portid;
1881         NETLINK_CB(skb).dst_group = dst_group;
1882         NETLINK_CB(skb).creds   = scm.creds;
1883         NETLINK_CB(skb).flags   = netlink_skb_flags;
1884 
1885         err = -EFAULT;
1886         if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
1887                 kfree_skb(skb);
1888                 goto out;
1889         }
1890 
1891         err = security_netlink_send(sk, skb);
1892         if (err) {
1893                 kfree_skb(skb);
1894                 goto out;
1895         }
1896 
1897         if (dst_group) {
1898                 refcount_inc(&skb->users);
1899                 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
1900         }
1901         err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags & MSG_DONTWAIT);
1902 
1903 out:
1904         scm_destroy(&scm);
1905         return err;
1906 }
1907 
1908 static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1909                            int flags)
1910 {
1911         struct scm_cookie scm;
1912         struct sock *sk = sock->sk;
1913         struct netlink_sock *nlk = nlk_sk(sk);
1914         size_t copied, max_recvmsg_len;
1915         struct sk_buff *skb, *data_skb;
1916         int err, ret;
1917 
1918         if (flags & MSG_OOB)
1919                 return -EOPNOTSUPP;
1920 
1921         copied = 0;
1922 
1923         skb = skb_recv_datagram(sk, flags, &err);
1924         if (skb == NULL)
1925                 goto out;
1926 
1927         data_skb = skb;
1928 
1929 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1930         if (unlikely(skb_shinfo(skb)->frag_list)) {
1931                 /*
1932                  * If this skb has a frag_list, then here that means that we
1933                  * will have to use the frag_list skb's data for compat tasks
1934                  * and the regular skb's data for normal (non-compat) tasks.
1935                  *
1936                  * If we need to send the compat skb, assign it to the
1937                  * 'data_skb' variable so that it will be used below for data
1938                  * copying. We keep 'skb' for everything else, including
1939                  * freeing both later.
1940                  */
1941                 if (flags & MSG_CMSG_COMPAT)
1942                         data_skb = skb_shinfo(skb)->frag_list;
1943         }
1944 #endif
1945 
1946         /* Record the max length of recvmsg() calls for future allocations */
1947         max_recvmsg_len = max(READ_ONCE(nlk->max_recvmsg_len), len);
1948         max_recvmsg_len = min_t(size_t, max_recvmsg_len,
1949                                 SKB_WITH_OVERHEAD(32768));
1950         WRITE_ONCE(nlk->max_recvmsg_len, max_recvmsg_len);
1951 
1952         copied = data_skb->len;
1953         if (len < copied) {
1954                 msg->msg_flags |= MSG_TRUNC;
1955                 copied = len;
1956         }
1957 
1958         err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
1959 
1960         if (msg->msg_name) {
1961                 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
1962                 addr->nl_family = AF_NETLINK;
1963                 addr->nl_pad    = 0;
1964                 addr->nl_pid    = NETLINK_CB(skb).portid;
1965                 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
1966                 msg->msg_namelen = sizeof(*addr);
1967         }
1968 
1969         if (nlk_test_bit(RECV_PKTINFO, sk))
1970                 netlink_cmsg_recv_pktinfo(msg, skb);
1971         if (nlk_test_bit(LISTEN_ALL_NSID, sk))
1972                 netlink_cmsg_listen_all_nsid(sk, msg, skb);
1973 
1974         memset(&scm, 0, sizeof(scm));
1975         scm.creds = *NETLINK_CREDS(skb);
1976         if (flags & MSG_TRUNC)
1977                 copied = data_skb->len;
1978 
1979         skb_free_datagram(sk, skb);
1980 
1981         if (READ_ONCE(nlk->cb_running) &&
1982             atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1983                 ret = netlink_dump(sk, false);
1984                 if (ret) {
1985                         WRITE_ONCE(sk->sk_err, -ret);
1986                         sk_error_report(sk);
1987                 }
1988         }
1989 
1990         scm_recv(sock, msg, &scm, flags);
1991 out:
1992         netlink_rcv_wake(sk);
1993         return err ? : copied;
1994 }
1995 
1996 static void netlink_data_ready(struct sock *sk)
1997 {
1998         BUG();
1999 }
2000 
2001 /*
2002  *      We export these functions to other modules. They provide a
2003  *      complete set of kernel non-blocking support for message
2004  *      queueing.
2005  */
2006 
2007 struct sock *
2008 __netlink_kernel_create(struct net *net, int unit, struct module *module,
2009                         struct netlink_kernel_cfg *cfg)
2010 {
2011         struct socket *sock;
2012         struct sock *sk;
2013         struct netlink_sock *nlk;
2014         struct listeners *listeners = NULL;
2015         unsigned int groups;
2016 
2017         BUG_ON(!nl_table);
2018 
2019         if (unit < 0 || unit >= MAX_LINKS)
2020                 return NULL;
2021 
2022         if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
2023                 return NULL;
2024 
2025         if (__netlink_create(net, sock, unit, 1) < 0)
2026                 goto out_sock_release_nosk;
2027 
2028         sk = sock->sk;
2029 
2030         if (!cfg || cfg->groups < 32)
2031                 groups = 32;
2032         else
2033                 groups = cfg->groups;
2034 
2035         listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2036         if (!listeners)
2037                 goto out_sock_release;
2038 
2039         sk->sk_data_ready = netlink_data_ready;
2040         if (cfg && cfg->input)
2041                 nlk_sk(sk)->netlink_rcv = cfg->input;
2042 
2043         if (netlink_insert(sk, 0))
2044                 goto out_sock_release;
2045 
2046         nlk = nlk_sk(sk);
2047         set_bit(NETLINK_F_KERNEL_SOCKET, &nlk->flags);
2048 
2049         netlink_table_grab();
2050         if (!nl_table[unit].registered) {
2051                 nl_table[unit].groups = groups;
2052                 rcu_assign_pointer(nl_table[unit].listeners, listeners);
2053                 nl_table[unit].module = module;
2054                 if (cfg) {
2055                         nl_table[unit].bind = cfg->bind;
2056                         nl_table[unit].unbind = cfg->unbind;
2057                         nl_table[unit].release = cfg->release;
2058                         nl_table[unit].flags = cfg->flags;
2059                 }
2060                 nl_table[unit].registered = 1;
2061         } else {
2062                 kfree(listeners);
2063                 nl_table[unit].registered++;
2064         }
2065         netlink_table_ungrab();
2066         return sk;
2067 
2068 out_sock_release:
2069         kfree(listeners);
2070         netlink_kernel_release(sk);
2071         return NULL;
2072 
2073 out_sock_release_nosk:
2074         sock_release(sock);
2075         return NULL;
2076 }
2077 EXPORT_SYMBOL(__netlink_kernel_create);
2078 
2079 void
2080 netlink_kernel_release(struct sock *sk)
2081 {
2082         if (sk == NULL || sk->sk_socket == NULL)
2083                 return;
2084 
2085         sock_release(sk->sk_socket);
2086 }
2087 EXPORT_SYMBOL(netlink_kernel_release);
2088 
2089 int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
2090 {
2091         struct listeners *new, *old;
2092         struct netlink_table *tbl = &nl_table[sk->sk_protocol];
2093 
2094         if (groups < 32)
2095                 groups = 32;
2096 
2097         if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
2098                 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
2099                 if (!new)
2100                         return -ENOMEM;
2101                 old = nl_deref_protected(tbl->listeners);
2102                 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
2103                 rcu_assign_pointer(tbl->listeners, new);
2104 
2105                 kfree_rcu(old, rcu);
2106         }
2107         tbl->groups = groups;
2108 
2109         return 0;
2110 }
2111 
2112 /**
2113  * netlink_change_ngroups - change number of multicast groups
2114  *
2115  * This changes the number of multicast groups that are available
2116  * on a certain netlink family. Note that it is not possible to
2117  * change the number of groups to below 32. Also note that it does
2118  * not implicitly call netlink_clear_multicast_users() when the
2119  * number of groups is reduced.
2120  *
2121  * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
2122  * @groups: The new number of groups.
2123  */
2124 int netlink_change_ngroups(struct sock *sk, unsigned int groups)
2125 {
2126         int err;
2127 
2128         netlink_table_grab();
2129         err = __netlink_change_ngroups(sk, groups);
2130         netlink_table_ungrab();
2131 
2132         return err;
2133 }
2134 
2135 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
2136 {
2137         struct sock *sk;
2138         struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
2139 
2140         sk_for_each_bound(sk, &tbl->mc_list)
2141                 netlink_update_socket_mc(nlk_sk(sk), group, 0);
2142 }
2143 
2144 struct nlmsghdr *
2145 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
2146 {
2147         struct nlmsghdr *nlh;
2148         int size = nlmsg_msg_size(len);
2149 
2150         nlh = skb_put(skb, NLMSG_ALIGN(size));
2151         nlh->nlmsg_type = type;
2152         nlh->nlmsg_len = size;
2153         nlh->nlmsg_flags = flags;
2154         nlh->nlmsg_pid = portid;
2155         nlh->nlmsg_seq = seq;
2156         if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
2157                 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);
2158         return nlh;
2159 }
2160 EXPORT_SYMBOL(__nlmsg_put);
2161 
2162 static size_t
2163 netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
2164                     const struct netlink_ext_ack *extack)
2165 {
2166         size_t tlvlen;
2167 
2168         if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags))
2169                 return 0;
2170 
2171         tlvlen = 0;
2172         if (extack->_msg)
2173                 tlvlen += nla_total_size(strlen(extack->_msg) + 1);
2174         if (extack->cookie_len)
2175                 tlvlen += nla_total_size(extack->cookie_len);
2176 
2177         /* Following attributes are only reported as error (not warning) */
2178         if (!err)
2179                 return tlvlen;
2180 
2181         if (extack->bad_attr)
2182                 tlvlen += nla_total_size(sizeof(u32));
2183         if (extack->policy)
2184                 tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
2185         if (extack->miss_type)
2186                 tlvlen += nla_total_size(sizeof(u32));
2187         if (extack->miss_nest)
2188                 tlvlen += nla_total_size(sizeof(u32));
2189 
2190         return tlvlen;
2191 }
2192 
2193 static void
2194 netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
2195                      const struct nlmsghdr *nlh, int err,
2196                      const struct netlink_ext_ack *extack)
2197 {
2198         if (extack->_msg)
2199                 WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg));
2200         if (extack->cookie_len)
2201                 WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
2202                                 extack->cookie_len, extack->cookie));
2203 
2204         if (!err)
2205                 return;
2206 
2207         if (extack->bad_attr &&
2208             !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
2209                      (u8 *)extack->bad_attr >= in_skb->data + in_skb->len))
2210                 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
2211                                     (u8 *)extack->bad_attr - (const u8 *)nlh));
2212         if (extack->policy)
2213                 netlink_policy_dump_write_attr(skb, extack->policy,
2214                                                NLMSGERR_ATTR_POLICY);
2215         if (extack->miss_type)
2216                 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE,
2217                                     extack->miss_type));
2218         if (extack->miss_nest &&
2219             !WARN_ON((u8 *)extack->miss_nest < in_skb->data ||
2220                      (u8 *)extack->miss_nest > in_skb->data + in_skb->len))
2221                 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST,
2222                                     (u8 *)extack->miss_nest - (const u8 *)nlh));
2223 }
2224 
2225 /*
2226  * It looks a bit ugly.
2227  * It would be better to create kernel thread.
2228  */
2229 
2230 static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
2231                              struct netlink_callback *cb,
2232                              struct netlink_ext_ack *extack)
2233 {
2234         struct nlmsghdr *nlh;
2235         size_t extack_len;
2236 
2237         nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(nlk->dump_done_errno),
2238                                NLM_F_MULTI | cb->answer_flags);
2239         if (WARN_ON(!nlh))
2240                 return -ENOBUFS;
2241 
2242         nl_dump_check_consistent(cb, nlh);
2243         memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno));
2244 
2245         extack_len = netlink_ack_tlv_len(nlk, nlk->dump_done_errno, extack);
2246         if (extack_len) {
2247                 nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
2248                 if (skb_tailroom(skb) >= extack_len) {
2249                         netlink_ack_tlv_fill(cb->skb, skb, cb->nlh,
2250                                              nlk->dump_done_errno, extack);
2251                         nlmsg_end(skb, nlh);
2252                 }
2253         }
2254 
2255         return 0;
2256 }
2257 
2258 static int netlink_dump(struct sock *sk, bool lock_taken)
2259 {
2260         struct netlink_sock *nlk = nlk_sk(sk);
2261         struct netlink_ext_ack extack = {};
2262         struct netlink_callback *cb;
2263         struct sk_buff *skb = NULL;
2264         size_t max_recvmsg_len;
2265         struct module *module;
2266         int err = -ENOBUFS;
2267         int alloc_min_size;
2268         int alloc_size;
2269 
2270         if (!lock_taken)
2271                 mutex_lock(&nlk->nl_cb_mutex);
2272         if (!nlk->cb_running) {
2273                 err = -EINVAL;
2274                 goto errout_skb;
2275         }
2276 
2277         if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2278                 goto errout_skb;
2279 
2280         /* NLMSG_GOODSIZE is small to avoid high order allocations being
2281          * required, but it makes sense to _attempt_ a 16K bytes allocation
2282          * to reduce number of system calls on dump operations, if user
2283          * ever provided a big enough buffer.
2284          */
2285         cb = &nlk->cb;
2286         alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
2287 
2288         max_recvmsg_len = READ_ONCE(nlk->max_recvmsg_len);
2289         if (alloc_min_size < max_recvmsg_len) {
2290                 alloc_size = max_recvmsg_len;
2291                 skb = alloc_skb(alloc_size,
2292                                 (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) |
2293                                 __GFP_NOWARN | __GFP_NORETRY);
2294         }
2295         if (!skb) {
2296                 alloc_size = alloc_min_size;
2297                 skb = alloc_skb(alloc_size, GFP_KERNEL);
2298         }
2299         if (!skb)
2300                 goto errout_skb;
2301 
2302         /* Trim skb to allocated size. User is expected to provide buffer as
2303          * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
2304          * netlink_recvmsg())). dump will pack as many smaller messages as
2305          * could fit within the allocated skb. skb is typically allocated
2306          * with larger space than required (could be as much as near 2x the
2307          * requested size with align to next power of 2 approach). Allowing
2308          * dump to use the excess space makes it difficult for a user to have a
2309          * reasonable static buffer based on the expected largest dump of a
2310          * single netdev. The outcome is MSG_TRUNC error.
2311          */
2312         skb_reserve(skb, skb_tailroom(skb) - alloc_size);
2313 
2314         /* Make sure malicious BPF programs can not read unitialized memory
2315          * from skb->head -> skb->data
2316          */
2317         skb_reset_network_header(skb);
2318         skb_reset_mac_header(skb);
2319 
2320         netlink_skb_set_owner_r(skb, sk);
2321 
2322         if (nlk->dump_done_errno > 0) {
2323                 cb->extack = &extack;
2324 
2325                 nlk->dump_done_errno = cb->dump(skb, cb);
2326 
2327                 /* EMSGSIZE plus something already in the skb means
2328                  * that there's more to dump but current skb has filled up.
2329                  * If the callback really wants to return EMSGSIZE to user space
2330                  * it needs to do so again, on the next cb->dump() call,
2331                  * without putting data in the skb.
2332                  */
2333                 if (nlk->dump_done_errno == -EMSGSIZE && skb->len)
2334                         nlk->dump_done_errno = skb->len;
2335 
2336                 cb->extack = NULL;
2337         }
2338 
2339         if (nlk->dump_done_errno > 0 ||
2340             skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
2341                 mutex_unlock(&nlk->nl_cb_mutex);
2342 
2343                 if (sk_filter(sk, skb))
2344                         kfree_skb(skb);
2345                 else
2346                         __netlink_sendskb(sk, skb);
2347                 return 0;
2348         }
2349 
2350         if (netlink_dump_done(nlk, skb, cb, &extack))
2351                 goto errout_skb;
2352 
2353 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES
2354         /* frag_list skb's data is used for compat tasks
2355          * and the regular skb's data for normal (non-compat) tasks.
2356          * See netlink_recvmsg().
2357          */
2358         if (unlikely(skb_shinfo(skb)->frag_list)) {
2359                 if (netlink_dump_done(nlk, skb_shinfo(skb)->frag_list, cb, &extack))
2360                         goto errout_skb;
2361         }
2362 #endif
2363 
2364         if (sk_filter(sk, skb))
2365                 kfree_skb(skb);
2366         else
2367                 __netlink_sendskb(sk, skb);
2368 
2369         if (cb->done)
2370                 cb->done(cb);
2371 
2372         WRITE_ONCE(nlk->cb_running, false);
2373         module = cb->module;
2374         skb = cb->skb;
2375         mutex_unlock(&nlk->nl_cb_mutex);
2376         module_put(module);
2377         consume_skb(skb);
2378         return 0;
2379 
2380 errout_skb:
2381         mutex_unlock(&nlk->nl_cb_mutex);
2382         kfree_skb(skb);
2383         return err;
2384 }
2385 
2386 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2387                          const struct nlmsghdr *nlh,
2388                          struct netlink_dump_control *control)
2389 {
2390         struct netlink_callback *cb;
2391         struct netlink_sock *nlk;
2392         struct sock *sk;
2393         int ret;
2394 
2395         refcount_inc(&skb->users);
2396 
2397         sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
2398         if (sk == NULL) {
2399                 ret = -ECONNREFUSED;
2400                 goto error_free;
2401         }
2402 
2403         nlk = nlk_sk(sk);
2404         mutex_lock(&nlk->nl_cb_mutex);
2405         /* A dump is in progress... */
2406         if (nlk->cb_running) {
2407                 ret = -EBUSY;
2408                 goto error_unlock;
2409         }
2410         /* add reference of module which cb->dump belongs to */
2411         if (!try_module_get(control->module)) {
2412                 ret = -EPROTONOSUPPORT;
2413                 goto error_unlock;
2414         }
2415 
2416         cb = &nlk->cb;
2417         memset(cb, 0, sizeof(*cb));
2418         cb->dump = control->dump;
2419         cb->done = control->done;
2420         cb->nlh = nlh;
2421         cb->data = control->data;
2422         cb->module = control->module;
2423         cb->min_dump_alloc = control->min_dump_alloc;
2424         cb->flags = control->flags;
2425         cb->skb = skb;
2426 
2427         cb->strict_check = nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
2428 
2429         if (control->start) {
2430                 cb->extack = control->extack;
2431                 ret = control->start(cb);
2432                 cb->extack = NULL;
2433                 if (ret)
2434                         goto error_put;
2435         }
2436 
2437         WRITE_ONCE(nlk->cb_running, true);
2438         nlk->dump_done_errno = INT_MAX;
2439 
2440         ret = netlink_dump(sk, true);
2441 
2442         sock_put(sk);
2443 
2444         if (ret)
2445                 return ret;
2446 
2447         /* We successfully started a dump, by returning -EINTR we
2448          * signal not to send ACK even if it was requested.
2449          */
2450         return -EINTR;
2451 
2452 error_put:
2453         module_put(control->module);
2454 error_unlock:
2455         sock_put(sk);
2456         mutex_unlock(&nlk->nl_cb_mutex);
2457 error_free:
2458         kfree_skb(skb);
2459         return ret;
2460 }
2461 EXPORT_SYMBOL(__netlink_dump_start);
2462 
2463 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
2464                  const struct netlink_ext_ack *extack)
2465 {
2466         struct sk_buff *skb;
2467         struct nlmsghdr *rep;
2468         struct nlmsgerr *errmsg;
2469         size_t payload = sizeof(*errmsg);
2470         struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
2471         unsigned int flags = 0;
2472         size_t tlvlen;
2473 
2474         /* Error messages get the original request appened, unless the user
2475          * requests to cap the error message, and get extra error data if
2476          * requested.
2477          */
2478         if (err && !test_bit(NETLINK_F_CAP_ACK, &nlk->flags))
2479                 payload += nlmsg_len(nlh);
2480         else
2481                 flags |= NLM_F_CAPPED;
2482 
2483         tlvlen = netlink_ack_tlv_len(nlk, err, extack);
2484         if (tlvlen)
2485                 flags |= NLM_F_ACK_TLVS;
2486 
2487         skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
2488         if (!skb)
2489                 goto err_skb;
2490 
2491         rep = nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
2492                         NLMSG_ERROR, sizeof(*errmsg), flags);
2493         if (!rep)
2494                 goto err_bad_put;
2495         errmsg = nlmsg_data(rep);
2496         errmsg->error = err;
2497         errmsg->msg = *nlh;
2498 
2499         if (!(flags & NLM_F_CAPPED)) {
2500                 if (!nlmsg_append(skb, nlmsg_len(nlh)))
2501                         goto err_bad_put;
2502 
2503                 memcpy(nlmsg_data(&errmsg->msg), nlmsg_data(nlh),
2504                        nlmsg_len(nlh));
2505         }
2506 
2507         if (tlvlen)
2508                 netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack);
2509 
2510         nlmsg_end(skb, rep);
2511 
2512         nlmsg_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid);
2513 
2514         return;
2515 
2516 err_bad_put:
2517         nlmsg_free(skb);
2518 err_skb:
2519         WRITE_ONCE(NETLINK_CB(in_skb).sk->sk_err, ENOBUFS);
2520         sk_error_report(NETLINK_CB(in_skb).sk);
2521 }
2522 EXPORT_SYMBOL(netlink_ack);
2523 
2524 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
2525                                                    struct nlmsghdr *,
2526                                                    struct netlink_ext_ack *))
2527 {
2528         struct netlink_ext_ack extack;
2529         struct nlmsghdr *nlh;
2530         int err;
2531 
2532         while (skb->len >= nlmsg_total_size(0)) {
2533                 int msglen;
2534 
2535                 memset(&extack, 0, sizeof(extack));
2536                 nlh = nlmsg_hdr(skb);
2537                 err = 0;
2538 
2539                 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
2540                         return 0;
2541 
2542                 /* Only requests are handled by the kernel */
2543                 if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
2544                         goto ack;
2545 
2546                 /* Skip control messages */
2547                 if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
2548                         goto ack;
2549 
2550                 err = cb(skb, nlh, &extack);
2551                 if (err == -EINTR)
2552                         goto skip;
2553 
2554 ack:
2555                 if (nlh->nlmsg_flags & NLM_F_ACK || err)
2556                         netlink_ack(skb, nlh, err, &extack);
2557 
2558 skip:
2559                 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
2560                 if (msglen > skb->len)
2561                         msglen = skb->len;
2562                 skb_pull(skb, msglen);
2563         }
2564 
2565         return 0;
2566 }
2567 EXPORT_SYMBOL(netlink_rcv_skb);
2568 
2569 /**
2570  * nlmsg_notify - send a notification netlink message
2571  * @sk: netlink socket to use
2572  * @skb: notification message
2573  * @portid: destination netlink portid for reports or 0
2574  * @group: destination multicast group or 0
2575  * @report: 1 to report back, 0 to disable
2576  * @flags: allocation flags
2577  */
2578 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
2579                  unsigned int group, int report, gfp_t flags)
2580 {
2581         int err = 0;
2582 
2583         if (group) {
2584                 int exclude_portid = 0;
2585 
2586                 if (report) {
2587                         refcount_inc(&skb->users);
2588                         exclude_portid = portid;
2589                 }
2590 
2591                 /* errors reported via destination sk->sk_err, but propagate
2592                  * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
2593                 err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
2594                 if (err == -ESRCH)
2595                         err = 0;
2596         }
2597 
2598         if (report) {
2599                 int err2;
2600 
2601                 err2 = nlmsg_unicast(sk, skb, portid);
2602                 if (!err)
2603                         err = err2;
2604         }
2605 
2606         return err;
2607 }
2608 EXPORT_SYMBOL(nlmsg_notify);
2609 
2610 #ifdef CONFIG_PROC_FS
2611 struct nl_seq_iter {
2612         struct seq_net_private p;
2613         struct rhashtable_iter hti;
2614         int link;
2615 };
2616 
2617 static void netlink_walk_start(struct nl_seq_iter *iter)
2618 {
2619         rhashtable_walk_enter(&nl_table[iter->link].hash, &iter->hti);
2620         rhashtable_walk_start(&iter->hti);
2621 }
2622 
2623 static void netlink_walk_stop(struct nl_seq_iter *iter)
2624 {
2625         rhashtable_walk_stop(&iter->hti);
2626         rhashtable_walk_exit(&iter->hti);
2627 }
2628 
2629 static void *__netlink_seq_next(struct seq_file *seq)
2630 {
2631         struct nl_seq_iter *iter = seq->private;
2632         struct netlink_sock *nlk;
2633 
2634         do {
2635                 for (;;) {
2636                         nlk = rhashtable_walk_next(&iter->hti);
2637 
2638                         if (IS_ERR(nlk)) {
2639                                 if (PTR_ERR(nlk) == -EAGAIN)
2640                                         continue;
2641 
2642                                 return nlk;
2643                         }
2644 
2645                         if (nlk)
2646                                 break;
2647 
2648                         netlink_walk_stop(iter);
2649                         if (++iter->link >= MAX_LINKS)
2650                                 return NULL;
2651 
2652                         netlink_walk_start(iter);
2653                 }
2654         } while (sock_net(&nlk->sk) != seq_file_net(seq));
2655 
2656         return nlk;
2657 }
2658 
2659 static void *netlink_seq_start(struct seq_file *seq, loff_t *posp)
2660         __acquires(RCU)
2661 {
2662         struct nl_seq_iter *iter = seq->private;
2663         void *obj = SEQ_START_TOKEN;
2664         loff_t pos;
2665 
2666         iter->link = 0;
2667 
2668         netlink_walk_start(iter);
2669 
2670         for (pos = *posp; pos && obj && !IS_ERR(obj); pos--)
2671                 obj = __netlink_seq_next(seq);
2672 
2673         return obj;
2674 }
2675 
2676 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2677 {
2678         ++*pos;
2679         return __netlink_seq_next(seq);
2680 }
2681 
2682 static void netlink_native_seq_stop(struct seq_file *seq, void *v)
2683 {
2684         struct nl_seq_iter *iter = seq->private;
2685 
2686         if (iter->link >= MAX_LINKS)
2687                 return;
2688 
2689         netlink_walk_stop(iter);
2690 }
2691 
2692 
2693 static int netlink_native_seq_show(struct seq_file *seq, void *v)
2694 {
2695         if (v == SEQ_START_TOKEN) {
2696                 seq_puts(seq,
2697                          "sk               Eth Pid        Groups   "
2698                          "Rmem     Wmem     Dump  Locks    Drops    Inode\n");
2699         } else {
2700                 struct sock *s = v;
2701                 struct netlink_sock *nlk = nlk_sk(s);
2702 
2703                 seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8u %-8lu\n",
2704                            s,
2705                            s->sk_protocol,
2706                            nlk->portid,
2707                            nlk->groups ? (u32)nlk->groups[0] : 0,
2708                            sk_rmem_alloc_get(s),
2709                            sk_wmem_alloc_get(s),
2710                            READ_ONCE(nlk->cb_running),
2711                            refcount_read(&s->sk_refcnt),
2712                            atomic_read(&s->sk_drops),
2713                            sock_i_ino(s)
2714                         );
2715 
2716         }
2717         return 0;
2718 }
2719 
2720 #ifdef CONFIG_BPF_SYSCALL
2721 struct bpf_iter__netlink {
2722         __bpf_md_ptr(struct bpf_iter_meta *, meta);
2723         __bpf_md_ptr(struct netlink_sock *, sk);
2724 };
2725 
2726 DEFINE_BPF_ITER_FUNC(netlink, struct bpf_iter_meta *meta, struct netlink_sock *sk)
2727 
2728 static int netlink_prog_seq_show(struct bpf_prog *prog,
2729                                   struct bpf_iter_meta *meta,
2730                                   void *v)
2731 {
2732         struct bpf_iter__netlink ctx;
2733 
2734         meta->seq_num--;  /* skip SEQ_START_TOKEN */
2735         ctx.meta = meta;
2736         ctx.sk = nlk_sk((struct sock *)v);
2737         return bpf_iter_run_prog(prog, &ctx);
2738 }
2739 
2740 static int netlink_seq_show(struct seq_file *seq, void *v)
2741 {
2742         struct bpf_iter_meta meta;
2743         struct bpf_prog *prog;
2744 
2745         meta.seq = seq;
2746         prog = bpf_iter_get_info(&meta, false);
2747         if (!prog)
2748                 return netlink_native_seq_show(seq, v);
2749 
2750         if (v != SEQ_START_TOKEN)
2751                 return netlink_prog_seq_show(prog, &meta, v);
2752 
2753         return 0;
2754 }
2755 
2756 static void netlink_seq_stop(struct seq_file *seq, void *v)
2757 {
2758         struct bpf_iter_meta meta;
2759         struct bpf_prog *prog;
2760 
2761         if (!v) {
2762                 meta.seq = seq;
2763                 prog = bpf_iter_get_info(&meta, true);
2764                 if (prog)
2765                         (void)netlink_prog_seq_show(prog, &meta, v);
2766         }
2767 
2768         netlink_native_seq_stop(seq, v);
2769 }
2770 #else
2771 static int netlink_seq_show(struct seq_file *seq, void *v)
2772 {
2773         return netlink_native_seq_show(seq, v);
2774 }
2775 
2776 static void netlink_seq_stop(struct seq_file *seq, void *v)
2777 {
2778         netlink_native_seq_stop(seq, v);
2779 }
2780 #endif
2781 
2782 static const struct seq_operations netlink_seq_ops = {
2783         .start  = netlink_seq_start,
2784         .next   = netlink_seq_next,
2785         .stop   = netlink_seq_stop,
2786         .show   = netlink_seq_show,
2787 };
2788 #endif
2789 
2790 int netlink_register_notifier(struct notifier_block *nb)
2791 {
2792         return blocking_notifier_chain_register(&netlink_chain, nb);
2793 }
2794 EXPORT_SYMBOL(netlink_register_notifier);
2795 
2796 int netlink_unregister_notifier(struct notifier_block *nb)
2797 {
2798         return blocking_notifier_chain_unregister(&netlink_chain, nb);
2799 }
2800 EXPORT_SYMBOL(netlink_unregister_notifier);
2801 
2802 static const struct proto_ops netlink_ops = {
2803         .family =       PF_NETLINK,
2804         .owner =        THIS_MODULE,
2805         .release =      netlink_release,
2806         .bind =         netlink_bind,
2807         .connect =      netlink_connect,
2808         .socketpair =   sock_no_socketpair,
2809         .accept =       sock_no_accept,
2810         .getname =      netlink_getname,
2811         .poll =         datagram_poll,
2812         .ioctl =        netlink_ioctl,
2813         .listen =       sock_no_listen,
2814         .shutdown =     sock_no_shutdown,
2815         .setsockopt =   netlink_setsockopt,
2816         .getsockopt =   netlink_getsockopt,
2817         .sendmsg =      netlink_sendmsg,
2818         .recvmsg =      netlink_recvmsg,
2819         .mmap =         sock_no_mmap,
2820 };
2821 
2822 static const struct net_proto_family netlink_family_ops = {
2823         .family = PF_NETLINK,
2824         .create = netlink_create,
2825         .owner  = THIS_MODULE,  /* for consistency 8) */
2826 };
2827 
2828 static int __net_init netlink_net_init(struct net *net)
2829 {
2830 #ifdef CONFIG_PROC_FS
2831         if (!proc_create_net("netlink", 0, net->proc_net, &netlink_seq_ops,
2832                         sizeof(struct nl_seq_iter)))
2833                 return -ENOMEM;
2834 #endif
2835         return 0;
2836 }
2837 
2838 static void __net_exit netlink_net_exit(struct net *net)
2839 {
2840 #ifdef CONFIG_PROC_FS
2841         remove_proc_entry("netlink", net->proc_net);
2842 #endif
2843 }
2844 
2845 static void __init netlink_add_usersock_entry(void)
2846 {
2847         struct listeners *listeners;
2848         int groups = 32;
2849 
2850         listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2851         if (!listeners)
2852                 panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2853 
2854         netlink_table_grab();
2855 
2856         nl_table[NETLINK_USERSOCK].groups = groups;
2857         rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2858         nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2859         nl_table[NETLINK_USERSOCK].registered = 1;
2860         nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND;
2861 
2862         netlink_table_ungrab();
2863 }
2864 
2865 static struct pernet_operations __net_initdata netlink_net_ops = {
2866         .init = netlink_net_init,
2867         .exit = netlink_net_exit,
2868 };
2869 
2870 static inline u32 netlink_hash(const void *data, u32 len, u32 seed)
2871 {
2872         const struct netlink_sock *nlk = data;
2873         struct netlink_compare_arg arg;
2874 
2875         netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid);
2876         return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed);
2877 }
2878 
2879 static const struct rhashtable_params netlink_rhashtable_params = {
2880         .head_offset = offsetof(struct netlink_sock, node),
2881         .key_len = netlink_compare_arg_len,
2882         .obj_hashfn = netlink_hash,
2883         .obj_cmpfn = netlink_compare,
2884         .automatic_shrinking = true,
2885 };
2886 
2887 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
2888 BTF_ID_LIST(btf_netlink_sock_id)
2889 BTF_ID(struct, netlink_sock)
2890 
2891 static const struct bpf_iter_seq_info netlink_seq_info = {
2892         .seq_ops                = &netlink_seq_ops,
2893         .init_seq_private       = bpf_iter_init_seq_net,
2894         .fini_seq_private       = bpf_iter_fini_seq_net,
2895         .seq_priv_size          = sizeof(struct nl_seq_iter),
2896 };
2897 
2898 static struct bpf_iter_reg netlink_reg_info = {
2899         .target                 = "netlink",
2900         .ctx_arg_info_size      = 1,
2901         .ctx_arg_info           = {
2902                 { offsetof(struct bpf_iter__netlink, sk),
2903                   PTR_TO_BTF_ID_OR_NULL },
2904         },
2905         .seq_info               = &netlink_seq_info,
2906 };
2907 
2908 static int __init bpf_iter_register(void)
2909 {
2910         netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id;
2911         return bpf_iter_reg_target(&netlink_reg_info);
2912 }
2913 #endif
2914 
2915 static int __init netlink_proto_init(void)
2916 {
2917         int i;
2918         int err = proto_register(&netlink_proto, 0);
2919 
2920         if (err != 0)
2921                 goto out;
2922 
2923 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
2924         err = bpf_iter_register();
2925         if (err)
2926                 goto out;
2927 #endif
2928 
2929         BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof_field(struct sk_buff, cb));
2930 
2931         nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
2932         if (!nl_table)
2933                 goto panic;
2934 
2935         for (i = 0; i < MAX_LINKS; i++) {
2936                 if (rhashtable_init(&nl_table[i].hash,
2937                                     &netlink_rhashtable_params) < 0) {
2938                         while (--i > 0)
2939                                 rhashtable_destroy(&nl_table[i].hash);
2940                         kfree(nl_table);
2941                         goto panic;
2942                 }
2943         }
2944 
2945         netlink_add_usersock_entry();
2946 
2947         sock_register(&netlink_family_ops);
2948         register_pernet_subsys(&netlink_net_ops);
2949         register_pernet_subsys(&netlink_tap_net_ops);
2950         /* The netlink device handler may be needed early. */
2951         rtnetlink_init();
2952 out:
2953         return err;
2954 panic:
2955         panic("netlink_init: Cannot allocate nl_table\n");
2956 }
2957 
2958 core_initcall(netlink_proto_init);
2959 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php