~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/ipv6/ip6_fib.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /*
  3  *      Linux INET6 implementation
  4  *      Forwarding Information Database
  5  *
  6  *      Authors:
  7  *      Pedro Roque             <roque@di.fc.ul.pt>
  8  *
  9  *      Changes:
 10  *      Yuji SEKIYA @USAGI:     Support default route on router node;
 11  *                              remove ip6_null_entry from the top of
 12  *                              routing table.
 13  *      Ville Nuorvala:         Fixed routing subtrees.
 14  */
 15 
 16 #define pr_fmt(fmt) "IPv6: " fmt
 17 
 18 #include <linux/bpf.h>
 19 #include <linux/errno.h>
 20 #include <linux/types.h>
 21 #include <linux/net.h>
 22 #include <linux/route.h>
 23 #include <linux/netdevice.h>
 24 #include <linux/in6.h>
 25 #include <linux/init.h>
 26 #include <linux/list.h>
 27 #include <linux/slab.h>
 28 
 29 #include <net/ip.h>
 30 #include <net/ipv6.h>
 31 #include <net/ndisc.h>
 32 #include <net/addrconf.h>
 33 #include <net/lwtunnel.h>
 34 #include <net/fib_notifier.h>
 35 
 36 #include <net/ip_fib.h>
 37 #include <net/ip6_fib.h>
 38 #include <net/ip6_route.h>
 39 
 40 static struct kmem_cache *fib6_node_kmem __read_mostly;
 41 
 42 struct fib6_cleaner {
 43         struct fib6_walker w;
 44         struct net *net;
 45         int (*func)(struct fib6_info *, void *arg);
 46         int sernum;
 47         void *arg;
 48         bool skip_notify;
 49 };
 50 
 51 #ifdef CONFIG_IPV6_SUBTREES
 52 #define FWS_INIT FWS_S
 53 #else
 54 #define FWS_INIT FWS_L
 55 #endif
 56 
 57 static struct fib6_info *fib6_find_prefix(struct net *net,
 58                                          struct fib6_table *table,
 59                                          struct fib6_node *fn);
 60 static struct fib6_node *fib6_repair_tree(struct net *net,
 61                                           struct fib6_table *table,
 62                                           struct fib6_node *fn);
 63 static int fib6_walk(struct net *net, struct fib6_walker *w);
 64 static int fib6_walk_continue(struct fib6_walker *w);
 65 
 66 /*
 67  *      A routing update causes an increase of the serial number on the
 68  *      affected subtree. This allows for cached routes to be asynchronously
 69  *      tested when modifications are made to the destination cache as a
 70  *      result of redirects, path MTU changes, etc.
 71  */
 72 
 73 static void fib6_gc_timer_cb(struct timer_list *t);
 74 
 75 #define FOR_WALKERS(net, w) \
 76         list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh)
 77 
 78 static void fib6_walker_link(struct net *net, struct fib6_walker *w)
 79 {
 80         write_lock_bh(&net->ipv6.fib6_walker_lock);
 81         list_add(&w->lh, &net->ipv6.fib6_walkers);
 82         write_unlock_bh(&net->ipv6.fib6_walker_lock);
 83 }
 84 
 85 static void fib6_walker_unlink(struct net *net, struct fib6_walker *w)
 86 {
 87         write_lock_bh(&net->ipv6.fib6_walker_lock);
 88         list_del(&w->lh);
 89         write_unlock_bh(&net->ipv6.fib6_walker_lock);
 90 }
 91 
 92 static int fib6_new_sernum(struct net *net)
 93 {
 94         int new, old = atomic_read(&net->ipv6.fib6_sernum);
 95 
 96         do {
 97                 new = old < INT_MAX ? old + 1 : 1;
 98         } while (!atomic_try_cmpxchg(&net->ipv6.fib6_sernum, &old, new));
 99 
100         return new;
101 }
102 
103 enum {
104         FIB6_NO_SERNUM_CHANGE = 0,
105 };
106 
107 void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
108 {
109         struct fib6_node *fn;
110 
111         fn = rcu_dereference_protected(f6i->fib6_node,
112                         lockdep_is_held(&f6i->fib6_table->tb6_lock));
113         if (fn)
114                 WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
115 }
116 
117 /*
118  *      Auxiliary address test functions for the radix tree.
119  *
120  *      These assume a 32bit processor (although it will work on
121  *      64bit processors)
122  */
123 
124 /*
125  *      test bit
126  */
127 #if defined(__LITTLE_ENDIAN)
128 # define BITOP_BE32_SWIZZLE     (0x1F & ~7)
129 #else
130 # define BITOP_BE32_SWIZZLE     0
131 #endif
132 
133 static __be32 addr_bit_set(const void *token, int fn_bit)
134 {
135         const __be32 *addr = token;
136         /*
137          * Here,
138          *      1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
139          * is optimized version of
140          *      htonl(1 << ((~fn_bit)&0x1F))
141          * See include/asm-generic/bitops/le.h.
142          */
143         return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) &
144                addr[fn_bit >> 5];
145 }
146 
147 struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
148 {
149         struct fib6_info *f6i;
150         size_t sz = sizeof(*f6i);
151 
152         if (with_fib6_nh)
153                 sz += sizeof(struct fib6_nh);
154 
155         f6i = kzalloc(sz, gfp_flags);
156         if (!f6i)
157                 return NULL;
158 
159         /* fib6_siblings is a union with nh_list, so this initializes both */
160         INIT_LIST_HEAD(&f6i->fib6_siblings);
161         refcount_set(&f6i->fib6_ref, 1);
162 
163         INIT_HLIST_NODE(&f6i->gc_link);
164 
165         return f6i;
166 }
167 
168 void fib6_info_destroy_rcu(struct rcu_head *head)
169 {
170         struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
171 
172         WARN_ON(f6i->fib6_node);
173 
174         if (f6i->nh)
175                 nexthop_put(f6i->nh);
176         else
177                 fib6_nh_release(f6i->fib6_nh);
178 
179         ip_fib_metrics_put(f6i->fib6_metrics);
180         kfree(f6i);
181 }
182 EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
183 
184 static struct fib6_node *node_alloc(struct net *net)
185 {
186         struct fib6_node *fn;
187 
188         fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
189         if (fn)
190                 net->ipv6.rt6_stats->fib_nodes++;
191 
192         return fn;
193 }
194 
195 static void node_free_immediate(struct net *net, struct fib6_node *fn)
196 {
197         kmem_cache_free(fib6_node_kmem, fn);
198         net->ipv6.rt6_stats->fib_nodes--;
199 }
200 
201 static void node_free_rcu(struct rcu_head *head)
202 {
203         struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
204 
205         kmem_cache_free(fib6_node_kmem, fn);
206 }
207 
208 static void node_free(struct net *net, struct fib6_node *fn)
209 {
210         call_rcu(&fn->rcu, node_free_rcu);
211         net->ipv6.rt6_stats->fib_nodes--;
212 }
213 
214 static void fib6_free_table(struct fib6_table *table)
215 {
216         inetpeer_invalidate_tree(&table->tb6_peers);
217         kfree(table);
218 }
219 
220 static void fib6_link_table(struct net *net, struct fib6_table *tb)
221 {
222         unsigned int h;
223 
224         /*
225          * Initialize table lock at a single place to give lockdep a key,
226          * tables aren't visible prior to being linked to the list.
227          */
228         spin_lock_init(&tb->tb6_lock);
229         h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
230 
231         /*
232          * No protection necessary, this is the only list mutatation
233          * operation, tables never disappear once they exist.
234          */
235         hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]);
236 }
237 
238 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
239 
240 static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
241 {
242         struct fib6_table *table;
243 
244         table = kzalloc(sizeof(*table), GFP_ATOMIC);
245         if (table) {
246                 table->tb6_id = id;
247                 rcu_assign_pointer(table->tb6_root.leaf,
248                                    net->ipv6.fib6_null_entry);
249                 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
250                 inet_peer_base_init(&table->tb6_peers);
251                 INIT_HLIST_HEAD(&table->tb6_gc_hlist);
252         }
253 
254         return table;
255 }
256 
257 struct fib6_table *fib6_new_table(struct net *net, u32 id)
258 {
259         struct fib6_table *tb;
260 
261         if (id == 0)
262                 id = RT6_TABLE_MAIN;
263         tb = fib6_get_table(net, id);
264         if (tb)
265                 return tb;
266 
267         tb = fib6_alloc_table(net, id);
268         if (tb)
269                 fib6_link_table(net, tb);
270 
271         return tb;
272 }
273 EXPORT_SYMBOL_GPL(fib6_new_table);
274 
275 struct fib6_table *fib6_get_table(struct net *net, u32 id)
276 {
277         struct fib6_table *tb;
278         struct hlist_head *head;
279         unsigned int h;
280 
281         if (id == 0)
282                 id = RT6_TABLE_MAIN;
283         h = id & (FIB6_TABLE_HASHSZ - 1);
284         rcu_read_lock();
285         head = &net->ipv6.fib_table_hash[h];
286         hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
287                 if (tb->tb6_id == id) {
288                         rcu_read_unlock();
289                         return tb;
290                 }
291         }
292         rcu_read_unlock();
293 
294         return NULL;
295 }
296 EXPORT_SYMBOL_GPL(fib6_get_table);
297 
298 static void __net_init fib6_tables_init(struct net *net)
299 {
300         fib6_link_table(net, net->ipv6.fib6_main_tbl);
301         fib6_link_table(net, net->ipv6.fib6_local_tbl);
302 }
303 #else
304 
305 struct fib6_table *fib6_new_table(struct net *net, u32 id)
306 {
307         return fib6_get_table(net, id);
308 }
309 
310 struct fib6_table *fib6_get_table(struct net *net, u32 id)
311 {
312           return net->ipv6.fib6_main_tbl;
313 }
314 
315 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
316                                    const struct sk_buff *skb,
317                                    int flags, pol_lookup_t lookup)
318 {
319         struct rt6_info *rt;
320 
321         rt = pol_lookup_func(lookup,
322                         net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
323         if (rt->dst.error == -EAGAIN) {
324                 ip6_rt_put_flags(rt, flags);
325                 rt = net->ipv6.ip6_null_entry;
326                 if (!(flags & RT6_LOOKUP_F_DST_NOREF))
327                         dst_hold(&rt->dst);
328         }
329 
330         return &rt->dst;
331 }
332 
333 /* called with rcu lock held; no reference taken on fib6_info */
334 int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
335                 struct fib6_result *res, int flags)
336 {
337         return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6,
338                                  res, flags);
339 }
340 
341 static void __net_init fib6_tables_init(struct net *net)
342 {
343         fib6_link_table(net, net->ipv6.fib6_main_tbl);
344 }
345 
346 #endif
347 
348 unsigned int fib6_tables_seq_read(struct net *net)
349 {
350         unsigned int h, fib_seq = 0;
351 
352         rcu_read_lock();
353         for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
354                 struct hlist_head *head = &net->ipv6.fib_table_hash[h];
355                 struct fib6_table *tb;
356 
357                 hlist_for_each_entry_rcu(tb, head, tb6_hlist)
358                         fib_seq += tb->fib_seq;
359         }
360         rcu_read_unlock();
361 
362         return fib_seq;
363 }
364 
365 static int call_fib6_entry_notifier(struct notifier_block *nb,
366                                     enum fib_event_type event_type,
367                                     struct fib6_info *rt,
368                                     struct netlink_ext_ack *extack)
369 {
370         struct fib6_entry_notifier_info info = {
371                 .info.extack = extack,
372                 .rt = rt,
373         };
374 
375         return call_fib6_notifier(nb, event_type, &info.info);
376 }
377 
378 static int call_fib6_multipath_entry_notifier(struct notifier_block *nb,
379                                               enum fib_event_type event_type,
380                                               struct fib6_info *rt,
381                                               unsigned int nsiblings,
382                                               struct netlink_ext_ack *extack)
383 {
384         struct fib6_entry_notifier_info info = {
385                 .info.extack = extack,
386                 .rt = rt,
387                 .nsiblings = nsiblings,
388         };
389 
390         return call_fib6_notifier(nb, event_type, &info.info);
391 }
392 
393 int call_fib6_entry_notifiers(struct net *net,
394                               enum fib_event_type event_type,
395                               struct fib6_info *rt,
396                               struct netlink_ext_ack *extack)
397 {
398         struct fib6_entry_notifier_info info = {
399                 .info.extack = extack,
400                 .rt = rt,
401         };
402 
403         rt->fib6_table->fib_seq++;
404         return call_fib6_notifiers(net, event_type, &info.info);
405 }
406 
407 int call_fib6_multipath_entry_notifiers(struct net *net,
408                                         enum fib_event_type event_type,
409                                         struct fib6_info *rt,
410                                         unsigned int nsiblings,
411                                         struct netlink_ext_ack *extack)
412 {
413         struct fib6_entry_notifier_info info = {
414                 .info.extack = extack,
415                 .rt = rt,
416                 .nsiblings = nsiblings,
417         };
418 
419         rt->fib6_table->fib_seq++;
420         return call_fib6_notifiers(net, event_type, &info.info);
421 }
422 
423 int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
424 {
425         struct fib6_entry_notifier_info info = {
426                 .rt = rt,
427                 .nsiblings = rt->fib6_nsiblings,
428         };
429 
430         rt->fib6_table->fib_seq++;
431         return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
432 }
433 
434 struct fib6_dump_arg {
435         struct net *net;
436         struct notifier_block *nb;
437         struct netlink_ext_ack *extack;
438 };
439 
440 static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
441 {
442         enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
443         int err;
444 
445         if (!rt || rt == arg->net->ipv6.fib6_null_entry)
446                 return 0;
447 
448         if (rt->fib6_nsiblings)
449                 err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
450                                                          rt,
451                                                          rt->fib6_nsiblings,
452                                                          arg->extack);
453         else
454                 err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
455                                                arg->extack);
456 
457         return err;
458 }
459 
460 static int fib6_node_dump(struct fib6_walker *w)
461 {
462         int err;
463 
464         err = fib6_rt_dump(w->leaf, w->args);
465         w->leaf = NULL;
466         return err;
467 }
468 
469 static int fib6_table_dump(struct net *net, struct fib6_table *tb,
470                            struct fib6_walker *w)
471 {
472         int err;
473 
474         w->root = &tb->tb6_root;
475         spin_lock_bh(&tb->tb6_lock);
476         err = fib6_walk(net, w);
477         spin_unlock_bh(&tb->tb6_lock);
478         return err;
479 }
480 
481 /* Called with rcu_read_lock() */
482 int fib6_tables_dump(struct net *net, struct notifier_block *nb,
483                      struct netlink_ext_ack *extack)
484 {
485         struct fib6_dump_arg arg;
486         struct fib6_walker *w;
487         unsigned int h;
488         int err = 0;
489 
490         w = kzalloc(sizeof(*w), GFP_ATOMIC);
491         if (!w)
492                 return -ENOMEM;
493 
494         w->func = fib6_node_dump;
495         arg.net = net;
496         arg.nb = nb;
497         arg.extack = extack;
498         w->args = &arg;
499 
500         for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
501                 struct hlist_head *head = &net->ipv6.fib_table_hash[h];
502                 struct fib6_table *tb;
503 
504                 hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
505                         err = fib6_table_dump(net, tb, w);
506                         if (err)
507                                 goto out;
508                 }
509         }
510 
511 out:
512         kfree(w);
513 
514         /* The tree traversal function should never return a positive value. */
515         return err > 0 ? -EINVAL : err;
516 }
517 
518 static int fib6_dump_node(struct fib6_walker *w)
519 {
520         int res;
521         struct fib6_info *rt;
522 
523         for_each_fib6_walker_rt(w) {
524                 res = rt6_dump_route(rt, w->args, w->skip_in_node);
525                 if (res >= 0) {
526                         /* Frame is full, suspend walking */
527                         w->leaf = rt;
528 
529                         /* We'll restart from this node, so if some routes were
530                          * already dumped, skip them next time.
531                          */
532                         w->skip_in_node += res;
533 
534                         return 1;
535                 }
536                 w->skip_in_node = 0;
537 
538                 /* Multipath routes are dumped in one route with the
539                  * RTA_MULTIPATH attribute. Jump 'rt' to point to the
540                  * last sibling of this route (no need to dump the
541                  * sibling routes again)
542                  */
543                 if (rt->fib6_nsiblings)
544                         rt = list_last_entry(&rt->fib6_siblings,
545                                              struct fib6_info,
546                                              fib6_siblings);
547         }
548         w->leaf = NULL;
549         return 0;
550 }
551 
552 static void fib6_dump_end(struct netlink_callback *cb)
553 {
554         struct net *net = sock_net(cb->skb->sk);
555         struct fib6_walker *w = (void *)cb->args[2];
556 
557         if (w) {
558                 if (cb->args[4]) {
559                         cb->args[4] = 0;
560                         fib6_walker_unlink(net, w);
561                 }
562                 cb->args[2] = 0;
563                 kfree(w);
564         }
565         cb->done = (void *)cb->args[3];
566         cb->args[1] = 3;
567 }
568 
569 static int fib6_dump_done(struct netlink_callback *cb)
570 {
571         fib6_dump_end(cb);
572         return cb->done ? cb->done(cb) : 0;
573 }
574 
575 static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
576                            struct netlink_callback *cb)
577 {
578         struct net *net = sock_net(skb->sk);
579         struct fib6_walker *w;
580         int res;
581 
582         w = (void *)cb->args[2];
583         w->root = &table->tb6_root;
584 
585         if (cb->args[4] == 0) {
586                 w->count = 0;
587                 w->skip = 0;
588                 w->skip_in_node = 0;
589 
590                 spin_lock_bh(&table->tb6_lock);
591                 res = fib6_walk(net, w);
592                 spin_unlock_bh(&table->tb6_lock);
593                 if (res > 0) {
594                         cb->args[4] = 1;
595                         cb->args[5] = READ_ONCE(w->root->fn_sernum);
596                 }
597         } else {
598                 int sernum = READ_ONCE(w->root->fn_sernum);
599                 if (cb->args[5] != sernum) {
600                         /* Begin at the root if the tree changed */
601                         cb->args[5] = sernum;
602                         w->state = FWS_INIT;
603                         w->node = w->root;
604                         w->skip = w->count;
605                         w->skip_in_node = 0;
606                 } else
607                         w->skip = 0;
608 
609                 spin_lock_bh(&table->tb6_lock);
610                 res = fib6_walk_continue(w);
611                 spin_unlock_bh(&table->tb6_lock);
612                 if (res <= 0) {
613                         fib6_walker_unlink(net, w);
614                         cb->args[4] = 0;
615                 }
616         }
617 
618         return res;
619 }
620 
621 static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
622 {
623         struct rt6_rtnl_dump_arg arg = {
624                 .filter.dump_exceptions = true,
625                 .filter.dump_routes = true,
626                 .filter.rtnl_held = false,
627         };
628         const struct nlmsghdr *nlh = cb->nlh;
629         struct net *net = sock_net(skb->sk);
630         unsigned int e = 0, s_e;
631         struct hlist_head *head;
632         struct fib6_walker *w;
633         struct fib6_table *tb;
634         unsigned int h, s_h;
635         int err = 0;
636 
637         rcu_read_lock();
638         if (cb->strict_check) {
639                 err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb);
640                 if (err < 0)
641                         goto unlock;
642         } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
643                 struct rtmsg *rtm = nlmsg_data(nlh);
644 
645                 if (rtm->rtm_flags & RTM_F_PREFIX)
646                         arg.filter.flags = RTM_F_PREFIX;
647         }
648 
649         w = (void *)cb->args[2];
650         if (!w) {
651                 /* New dump:
652                  *
653                  * 1. allocate and initialize walker.
654                  */
655                 w = kzalloc(sizeof(*w), GFP_ATOMIC);
656                 if (!w) {
657                         err = -ENOMEM;
658                         goto unlock;
659                 }
660                 w->func = fib6_dump_node;
661                 cb->args[2] = (long)w;
662 
663                 /* 2. hook callback destructor.
664                  */
665                 cb->args[3] = (long)cb->done;
666                 cb->done = fib6_dump_done;
667 
668         }
669 
670         arg.skb = skb;
671         arg.cb = cb;
672         arg.net = net;
673         w->args = &arg;
674 
675         if (arg.filter.table_id) {
676                 tb = fib6_get_table(net, arg.filter.table_id);
677                 if (!tb) {
678                         if (rtnl_msg_family(cb->nlh) != PF_INET6)
679                                 goto unlock;
680 
681                         NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
682                         err = -ENOENT;
683                         goto unlock;
684                 }
685 
686                 if (!cb->args[0]) {
687                         err = fib6_dump_table(tb, skb, cb);
688                         if (!err)
689                                 cb->args[0] = 1;
690                 }
691                 goto unlock;
692         }
693 
694         s_h = cb->args[0];
695         s_e = cb->args[1];
696 
697         for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
698                 e = 0;
699                 head = &net->ipv6.fib_table_hash[h];
700                 hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
701                         if (e < s_e)
702                                 goto next;
703                         err = fib6_dump_table(tb, skb, cb);
704                         if (err != 0)
705                                 goto out;
706 next:
707                         e++;
708                 }
709         }
710 out:
711         cb->args[1] = e;
712         cb->args[0] = h;
713 
714 unlock:
715         rcu_read_unlock();
716         if (err <= 0)
717                 fib6_dump_end(cb);
718         return err;
719 }
720 
721 void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
722 {
723         if (!f6i)
724                 return;
725 
726         if (f6i->fib6_metrics == &dst_default_metrics) {
727                 struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC);
728 
729                 if (!p)
730                         return;
731 
732                 refcount_set(&p->refcnt, 1);
733                 f6i->fib6_metrics = p;
734         }
735 
736         f6i->fib6_metrics->metrics[metric - 1] = val;
737 }
738 
739 /*
740  *      Routing Table
741  *
742  *      return the appropriate node for a routing tree "add" operation
743  *      by either creating and inserting or by returning an existing
744  *      node.
745  */
746 
747 static struct fib6_node *fib6_add_1(struct net *net,
748                                     struct fib6_table *table,
749                                     struct fib6_node *root,
750                                     struct in6_addr *addr, int plen,
751                                     int offset, int allow_create,
752                                     int replace_required,
753                                     struct netlink_ext_ack *extack)
754 {
755         struct fib6_node *fn, *in, *ln;
756         struct fib6_node *pn = NULL;
757         struct rt6key *key;
758         int     bit;
759         __be32  dir = 0;
760 
761         /* insert node in tree */
762 
763         fn = root;
764 
765         do {
766                 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
767                                             lockdep_is_held(&table->tb6_lock));
768                 key = (struct rt6key *)((u8 *)leaf + offset);
769 
770                 /*
771                  *      Prefix match
772                  */
773                 if (plen < fn->fn_bit ||
774                     !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
775                         if (!allow_create) {
776                                 if (replace_required) {
777                                         NL_SET_ERR_MSG(extack,
778                                                        "Can not replace route - no match found");
779                                         pr_warn("Can't replace route, no match found\n");
780                                         return ERR_PTR(-ENOENT);
781                                 }
782                                 pr_warn("NLM_F_CREATE should be set when creating new route\n");
783                         }
784                         goto insert_above;
785                 }
786 
787                 /*
788                  *      Exact match ?
789                  */
790 
791                 if (plen == fn->fn_bit) {
792                         /* clean up an intermediate node */
793                         if (!(fn->fn_flags & RTN_RTINFO)) {
794                                 RCU_INIT_POINTER(fn->leaf, NULL);
795                                 fib6_info_release(leaf);
796                         /* remove null_entry in the root node */
797                         } else if (fn->fn_flags & RTN_TL_ROOT &&
798                                    rcu_access_pointer(fn->leaf) ==
799                                    net->ipv6.fib6_null_entry) {
800                                 RCU_INIT_POINTER(fn->leaf, NULL);
801                         }
802 
803                         return fn;
804                 }
805 
806                 /*
807                  *      We have more bits to go
808                  */
809 
810                 /* Try to walk down on tree. */
811                 dir = addr_bit_set(addr, fn->fn_bit);
812                 pn = fn;
813                 fn = dir ?
814                      rcu_dereference_protected(fn->right,
815                                         lockdep_is_held(&table->tb6_lock)) :
816                      rcu_dereference_protected(fn->left,
817                                         lockdep_is_held(&table->tb6_lock));
818         } while (fn);
819 
820         if (!allow_create) {
821                 /* We should not create new node because
822                  * NLM_F_REPLACE was specified without NLM_F_CREATE
823                  * I assume it is safe to require NLM_F_CREATE when
824                  * REPLACE flag is used! Later we may want to remove the
825                  * check for replace_required, because according
826                  * to netlink specification, NLM_F_CREATE
827                  * MUST be specified if new route is created.
828                  * That would keep IPv6 consistent with IPv4
829                  */
830                 if (replace_required) {
831                         NL_SET_ERR_MSG(extack,
832                                        "Can not replace route - no match found");
833                         pr_warn("Can't replace route, no match found\n");
834                         return ERR_PTR(-ENOENT);
835                 }
836                 pr_warn("NLM_F_CREATE should be set when creating new route\n");
837         }
838         /*
839          *      We walked to the bottom of tree.
840          *      Create new leaf node without children.
841          */
842 
843         ln = node_alloc(net);
844 
845         if (!ln)
846                 return ERR_PTR(-ENOMEM);
847         ln->fn_bit = plen;
848         RCU_INIT_POINTER(ln->parent, pn);
849 
850         if (dir)
851                 rcu_assign_pointer(pn->right, ln);
852         else
853                 rcu_assign_pointer(pn->left, ln);
854 
855         return ln;
856 
857 
858 insert_above:
859         /*
860          * split since we don't have a common prefix anymore or
861          * we have a less significant route.
862          * we've to insert an intermediate node on the list
863          * this new node will point to the one we need to create
864          * and the current
865          */
866 
867         pn = rcu_dereference_protected(fn->parent,
868                                        lockdep_is_held(&table->tb6_lock));
869 
870         /* find 1st bit in difference between the 2 addrs.
871 
872            See comment in __ipv6_addr_diff: bit may be an invalid value,
873            but if it is >= plen, the value is ignored in any case.
874          */
875 
876         bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr));
877 
878         /*
879          *              (intermediate)[in]
880          *                /        \
881          *      (new leaf node)[ln] (old node)[fn]
882          */
883         if (plen > bit) {
884                 in = node_alloc(net);
885                 ln = node_alloc(net);
886 
887                 if (!in || !ln) {
888                         if (in)
889                                 node_free_immediate(net, in);
890                         if (ln)
891                                 node_free_immediate(net, ln);
892                         return ERR_PTR(-ENOMEM);
893                 }
894 
895                 /*
896                  * new intermediate node.
897                  * RTN_RTINFO will
898                  * be off since that an address that chooses one of
899                  * the branches would not match less specific routes
900                  * in the other branch
901                  */
902 
903                 in->fn_bit = bit;
904 
905                 RCU_INIT_POINTER(in->parent, pn);
906                 in->leaf = fn->leaf;
907                 fib6_info_hold(rcu_dereference_protected(in->leaf,
908                                 lockdep_is_held(&table->tb6_lock)));
909 
910                 /* update parent pointer */
911                 if (dir)
912                         rcu_assign_pointer(pn->right, in);
913                 else
914                         rcu_assign_pointer(pn->left, in);
915 
916                 ln->fn_bit = plen;
917 
918                 RCU_INIT_POINTER(ln->parent, in);
919                 rcu_assign_pointer(fn->parent, in);
920 
921                 if (addr_bit_set(addr, bit)) {
922                         rcu_assign_pointer(in->right, ln);
923                         rcu_assign_pointer(in->left, fn);
924                 } else {
925                         rcu_assign_pointer(in->left, ln);
926                         rcu_assign_pointer(in->right, fn);
927                 }
928         } else { /* plen <= bit */
929 
930                 /*
931                  *              (new leaf node)[ln]
932                  *                /        \
933                  *           (old node)[fn] NULL
934                  */
935 
936                 ln = node_alloc(net);
937 
938                 if (!ln)
939                         return ERR_PTR(-ENOMEM);
940 
941                 ln->fn_bit = plen;
942 
943                 RCU_INIT_POINTER(ln->parent, pn);
944 
945                 if (addr_bit_set(&key->addr, plen))
946                         RCU_INIT_POINTER(ln->right, fn);
947                 else
948                         RCU_INIT_POINTER(ln->left, fn);
949 
950                 rcu_assign_pointer(fn->parent, ln);
951 
952                 if (dir)
953                         rcu_assign_pointer(pn->right, ln);
954                 else
955                         rcu_assign_pointer(pn->left, ln);
956         }
957         return ln;
958 }
959 
960 static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
961                                   const struct fib6_info *match,
962                                   const struct fib6_table *table)
963 {
964         int cpu;
965 
966         if (!fib6_nh->rt6i_pcpu)
967                 return;
968 
969         rcu_read_lock();
970         /* release the reference to this fib entry from
971          * all of its cached pcpu routes
972          */
973         for_each_possible_cpu(cpu) {
974                 struct rt6_info **ppcpu_rt;
975                 struct rt6_info *pcpu_rt;
976 
977                 ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
978 
979                 /* Paired with xchg() in rt6_get_pcpu_route() */
980                 pcpu_rt = READ_ONCE(*ppcpu_rt);
981 
982                 /* only dropping the 'from' reference if the cached route
983                  * is using 'match'. The cached pcpu_rt->from only changes
984                  * from a fib6_info to NULL (ip6_dst_destroy); it can never
985                  * change from one fib6_info reference to another
986                  */
987                 if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
988                         struct fib6_info *from;
989 
990                         from = unrcu_pointer(xchg(&pcpu_rt->from, NULL));
991                         fib6_info_release(from);
992                 }
993         }
994         rcu_read_unlock();
995 }
996 
997 struct fib6_nh_pcpu_arg {
998         struct fib6_info        *from;
999         const struct fib6_table *table;
1000 };
1001 
1002 static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg)
1003 {
1004         struct fib6_nh_pcpu_arg *arg = _arg;
1005 
1006         __fib6_drop_pcpu_from(nh, arg->from, arg->table);
1007         return 0;
1008 }
1009 
1010 static void fib6_drop_pcpu_from(struct fib6_info *f6i,
1011                                 const struct fib6_table *table)
1012 {
1013         /* Make sure rt6_make_pcpu_route() wont add other percpu routes
1014          * while we are cleaning them here.
1015          */
1016         f6i->fib6_destroying = 1;
1017         mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
1018 
1019         if (f6i->nh) {
1020                 struct fib6_nh_pcpu_arg arg = {
1021                         .from = f6i,
1022                         .table = table
1023                 };
1024 
1025                 nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from,
1026                                          &arg);
1027         } else {
1028                 struct fib6_nh *fib6_nh;
1029 
1030                 fib6_nh = f6i->fib6_nh;
1031                 __fib6_drop_pcpu_from(fib6_nh, f6i, table);
1032         }
1033 }
1034 
1035 static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
1036                           struct net *net)
1037 {
1038         struct fib6_table *table = rt->fib6_table;
1039 
1040         /* Flush all cached dst in exception table */
1041         rt6_flush_exceptions(rt);
1042         fib6_drop_pcpu_from(rt, table);
1043 
1044         if (rt->nh && !list_empty(&rt->nh_list))
1045                 list_del_init(&rt->nh_list);
1046 
1047         if (refcount_read(&rt->fib6_ref) != 1) {
1048                 /* This route is used as dummy address holder in some split
1049                  * nodes. It is not leaked, but it still holds other resources,
1050                  * which must be released in time. So, scan ascendant nodes
1051                  * and replace dummy references to this route with references
1052                  * to still alive ones.
1053                  */
1054                 while (fn) {
1055                         struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
1056                                             lockdep_is_held(&table->tb6_lock));
1057                         struct fib6_info *new_leaf;
1058                         if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
1059                                 new_leaf = fib6_find_prefix(net, table, fn);
1060                                 fib6_info_hold(new_leaf);
1061 
1062                                 rcu_assign_pointer(fn->leaf, new_leaf);
1063                                 fib6_info_release(rt);
1064                         }
1065                         fn = rcu_dereference_protected(fn->parent,
1066                                     lockdep_is_held(&table->tb6_lock));
1067                 }
1068         }
1069 
1070         fib6_clean_expires(rt);
1071         fib6_remove_gc_list(rt);
1072 }
1073 
1074 /*
1075  *      Insert routing information in a node.
1076  */
1077 
1078 static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
1079                             struct nl_info *info,
1080                             struct netlink_ext_ack *extack)
1081 {
1082         struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
1083                                     lockdep_is_held(&rt->fib6_table->tb6_lock));
1084         struct fib6_info *iter = NULL;
1085         struct fib6_info __rcu **ins;
1086         struct fib6_info __rcu **fallback_ins = NULL;
1087         int replace = (info->nlh &&
1088                        (info->nlh->nlmsg_flags & NLM_F_REPLACE));
1089         int add = (!info->nlh ||
1090                    (info->nlh->nlmsg_flags & NLM_F_CREATE));
1091         int found = 0;
1092         bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
1093         bool notify_sibling_rt = false;
1094         u16 nlflags = NLM_F_EXCL;
1095         int err;
1096 
1097         if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
1098                 nlflags |= NLM_F_APPEND;
1099 
1100         ins = &fn->leaf;
1101 
1102         for (iter = leaf; iter;
1103              iter = rcu_dereference_protected(iter->fib6_next,
1104                                 lockdep_is_held(&rt->fib6_table->tb6_lock))) {
1105                 /*
1106                  *      Search for duplicates
1107                  */
1108 
1109                 if (iter->fib6_metric == rt->fib6_metric) {
1110                         /*
1111                          *      Same priority level
1112                          */
1113                         if (info->nlh &&
1114                             (info->nlh->nlmsg_flags & NLM_F_EXCL))
1115                                 return -EEXIST;
1116 
1117                         nlflags &= ~NLM_F_EXCL;
1118                         if (replace) {
1119                                 if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
1120                                         found++;
1121                                         break;
1122                                 }
1123                                 fallback_ins = fallback_ins ?: ins;
1124                                 goto next_iter;
1125                         }
1126 
1127                         if (rt6_duplicate_nexthop(iter, rt)) {
1128                                 if (rt->fib6_nsiblings)
1129                                         rt->fib6_nsiblings = 0;
1130                                 if (!(iter->fib6_flags & RTF_EXPIRES))
1131                                         return -EEXIST;
1132                                 if (!(rt->fib6_flags & RTF_EXPIRES)) {
1133                                         fib6_clean_expires(iter);
1134                                         fib6_remove_gc_list(iter);
1135                                 } else {
1136                                         fib6_set_expires(iter, rt->expires);
1137                                         fib6_add_gc_list(iter);
1138                                 }
1139 
1140                                 if (rt->fib6_pmtu)
1141                                         fib6_metric_set(iter, RTAX_MTU,
1142                                                         rt->fib6_pmtu);
1143                                 return -EEXIST;
1144                         }
1145                         /* If we have the same destination and the same metric,
1146                          * but not the same gateway, then the route we try to
1147                          * add is sibling to this route, increment our counter
1148                          * of siblings, and later we will add our route to the
1149                          * list.
1150                          * Only static routes (which don't have flag
1151                          * RTF_EXPIRES) are used for ECMPv6.
1152                          *
1153                          * To avoid long list, we only had siblings if the
1154                          * route have a gateway.
1155                          */
1156                         if (rt_can_ecmp &&
1157                             rt6_qualify_for_ecmp(iter))
1158                                 rt->fib6_nsiblings++;
1159                 }
1160 
1161                 if (iter->fib6_metric > rt->fib6_metric)
1162                         break;
1163 
1164 next_iter:
1165                 ins = &iter->fib6_next;
1166         }
1167 
1168         if (fallback_ins && !found) {
1169                 /* No matching route with same ecmp-able-ness found, replace
1170                  * first matching route
1171                  */
1172                 ins = fallback_ins;
1173                 iter = rcu_dereference_protected(*ins,
1174                                     lockdep_is_held(&rt->fib6_table->tb6_lock));
1175                 found++;
1176         }
1177 
1178         /* Reset round-robin state, if necessary */
1179         if (ins == &fn->leaf)
1180                 fn->rr_ptr = NULL;
1181 
1182         /* Link this route to others same route. */
1183         if (rt->fib6_nsiblings) {
1184                 unsigned int fib6_nsiblings;
1185                 struct fib6_info *sibling, *temp_sibling;
1186 
1187                 /* Find the first route that have the same metric */
1188                 sibling = leaf;
1189                 notify_sibling_rt = true;
1190                 while (sibling) {
1191                         if (sibling->fib6_metric == rt->fib6_metric &&
1192                             rt6_qualify_for_ecmp(sibling)) {
1193                                 list_add_tail(&rt->fib6_siblings,
1194                                               &sibling->fib6_siblings);
1195                                 break;
1196                         }
1197                         sibling = rcu_dereference_protected(sibling->fib6_next,
1198                                     lockdep_is_held(&rt->fib6_table->tb6_lock));
1199                         notify_sibling_rt = false;
1200                 }
1201                 /* For each sibling in the list, increment the counter of
1202                  * siblings. BUG() if counters does not match, list of siblings
1203                  * is broken!
1204                  */
1205                 fib6_nsiblings = 0;
1206                 list_for_each_entry_safe(sibling, temp_sibling,
1207                                          &rt->fib6_siblings, fib6_siblings) {
1208                         sibling->fib6_nsiblings++;
1209                         BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
1210                         fib6_nsiblings++;
1211                 }
1212                 BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
1213                 rt6_multipath_rebalance(temp_sibling);
1214         }
1215 
1216         /*
1217          *      insert node
1218          */
1219         if (!replace) {
1220                 if (!add)
1221                         pr_warn("NLM_F_CREATE should be set when creating new route\n");
1222 
1223 add:
1224                 nlflags |= NLM_F_CREATE;
1225 
1226                 /* The route should only be notified if it is the first
1227                  * route in the node or if it is added as a sibling
1228                  * route to the first route in the node.
1229                  */
1230                 if (!info->skip_notify_kernel &&
1231                     (notify_sibling_rt || ins == &fn->leaf)) {
1232                         enum fib_event_type fib_event;
1233 
1234                         if (notify_sibling_rt)
1235                                 fib_event = FIB_EVENT_ENTRY_APPEND;
1236                         else
1237                                 fib_event = FIB_EVENT_ENTRY_REPLACE;
1238                         err = call_fib6_entry_notifiers(info->nl_net,
1239                                                         fib_event, rt,
1240                                                         extack);
1241                         if (err) {
1242                                 struct fib6_info *sibling, *next_sibling;
1243 
1244                                 /* If the route has siblings, then it first
1245                                  * needs to be unlinked from them.
1246                                  */
1247                                 if (!rt->fib6_nsiblings)
1248                                         return err;
1249 
1250                                 list_for_each_entry_safe(sibling, next_sibling,
1251                                                          &rt->fib6_siblings,
1252                                                          fib6_siblings)
1253                                         sibling->fib6_nsiblings--;
1254                                 rt->fib6_nsiblings = 0;
1255                                 list_del_init(&rt->fib6_siblings);
1256                                 rt6_multipath_rebalance(next_sibling);
1257                                 return err;
1258                         }
1259                 }
1260 
1261                 rcu_assign_pointer(rt->fib6_next, iter);
1262                 fib6_info_hold(rt);
1263                 rcu_assign_pointer(rt->fib6_node, fn);
1264                 rcu_assign_pointer(*ins, rt);
1265                 if (!info->skip_notify)
1266                         inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
1267                 info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
1268 
1269                 if (!(fn->fn_flags & RTN_RTINFO)) {
1270                         info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
1271                         fn->fn_flags |= RTN_RTINFO;
1272                 }
1273 
1274         } else {
1275                 int nsiblings;
1276 
1277                 if (!found) {
1278                         if (add)
1279                                 goto add;
1280                         pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
1281                         return -ENOENT;
1282                 }
1283 
1284                 if (!info->skip_notify_kernel && ins == &fn->leaf) {
1285                         err = call_fib6_entry_notifiers(info->nl_net,
1286                                                         FIB_EVENT_ENTRY_REPLACE,
1287                                                         rt, extack);
1288                         if (err)
1289                                 return err;
1290                 }
1291 
1292                 fib6_info_hold(rt);
1293                 rcu_assign_pointer(rt->fib6_node, fn);
1294                 rt->fib6_next = iter->fib6_next;
1295                 rcu_assign_pointer(*ins, rt);
1296                 if (!info->skip_notify)
1297                         inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
1298                 if (!(fn->fn_flags & RTN_RTINFO)) {
1299                         info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
1300                         fn->fn_flags |= RTN_RTINFO;
1301                 }
1302                 nsiblings = iter->fib6_nsiblings;
1303                 iter->fib6_node = NULL;
1304                 fib6_purge_rt(iter, fn, info->nl_net);
1305                 if (rcu_access_pointer(fn->rr_ptr) == iter)
1306                         fn->rr_ptr = NULL;
1307                 fib6_info_release(iter);
1308 
1309                 if (nsiblings) {
1310                         /* Replacing an ECMP route, remove all siblings */
1311                         ins = &rt->fib6_next;
1312                         iter = rcu_dereference_protected(*ins,
1313                                     lockdep_is_held(&rt->fib6_table->tb6_lock));
1314                         while (iter) {
1315                                 if (iter->fib6_metric > rt->fib6_metric)
1316                                         break;
1317                                 if (rt6_qualify_for_ecmp(iter)) {
1318                                         *ins = iter->fib6_next;
1319                                         iter->fib6_node = NULL;
1320                                         fib6_purge_rt(iter, fn, info->nl_net);
1321                                         if (rcu_access_pointer(fn->rr_ptr) == iter)
1322                                                 fn->rr_ptr = NULL;
1323                                         fib6_info_release(iter);
1324                                         nsiblings--;
1325                                         info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
1326                                 } else {
1327                                         ins = &iter->fib6_next;
1328                                 }
1329                                 iter = rcu_dereference_protected(*ins,
1330                                         lockdep_is_held(&rt->fib6_table->tb6_lock));
1331                         }
1332                         WARN_ON(nsiblings != 0);
1333                 }
1334         }
1335 
1336         return 0;
1337 }
1338 
1339 static void fib6_start_gc(struct net *net, struct fib6_info *rt)
1340 {
1341         if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
1342             (rt->fib6_flags & RTF_EXPIRES))
1343                 mod_timer(&net->ipv6.ip6_fib_timer,
1344                           jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1345 }
1346 
1347 void fib6_force_start_gc(struct net *net)
1348 {
1349         if (!timer_pending(&net->ipv6.ip6_fib_timer))
1350                 mod_timer(&net->ipv6.ip6_fib_timer,
1351                           jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1352 }
1353 
1354 static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
1355                                            int sernum)
1356 {
1357         struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
1358                                 lockdep_is_held(&rt->fib6_table->tb6_lock));
1359 
1360         /* paired with smp_rmb() in fib6_get_cookie_safe() */
1361         smp_wmb();
1362         while (fn) {
1363                 WRITE_ONCE(fn->fn_sernum, sernum);
1364                 fn = rcu_dereference_protected(fn->parent,
1365                                 lockdep_is_held(&rt->fib6_table->tb6_lock));
1366         }
1367 }
1368 
1369 void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
1370 {
1371         __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
1372 }
1373 
1374 /* allow ipv4 to update sernum via ipv6_stub */
1375 void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i)
1376 {
1377         spin_lock_bh(&f6i->fib6_table->tb6_lock);
1378         fib6_update_sernum_upto_root(net, f6i);
1379         spin_unlock_bh(&f6i->fib6_table->tb6_lock);
1380 }
1381 
1382 /*
1383  *      Add routing information to the routing tree.
1384  *      <destination addr>/<source addr>
1385  *      with source addr info in sub-trees
1386  *      Need to own table->tb6_lock
1387  */
1388 
1389 int fib6_add(struct fib6_node *root, struct fib6_info *rt,
1390              struct nl_info *info, struct netlink_ext_ack *extack)
1391 {
1392         struct fib6_table *table = rt->fib6_table;
1393         struct fib6_node *fn;
1394 #ifdef CONFIG_IPV6_SUBTREES
1395         struct fib6_node *pn = NULL;
1396 #endif
1397         int err = -ENOMEM;
1398         int allow_create = 1;
1399         int replace_required = 0;
1400 
1401         if (info->nlh) {
1402                 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
1403                         allow_create = 0;
1404                 if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
1405                         replace_required = 1;
1406         }
1407         if (!allow_create && !replace_required)
1408                 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
1409 
1410         fn = fib6_add_1(info->nl_net, table, root,
1411                         &rt->fib6_dst.addr, rt->fib6_dst.plen,
1412                         offsetof(struct fib6_info, fib6_dst), allow_create,
1413                         replace_required, extack);
1414         if (IS_ERR(fn)) {
1415                 err = PTR_ERR(fn);
1416                 fn = NULL;
1417                 goto out;
1418         }
1419 
1420 #ifdef CONFIG_IPV6_SUBTREES
1421         pn = fn;
1422 
1423         if (rt->fib6_src.plen) {
1424                 struct fib6_node *sn;
1425 
1426                 if (!rcu_access_pointer(fn->subtree)) {
1427                         struct fib6_node *sfn;
1428 
1429                         /*
1430                          * Create subtree.
1431                          *
1432                          *              fn[main tree]
1433                          *              |
1434                          *              sfn[subtree root]
1435                          *                 \
1436                          *                  sn[new leaf node]
1437                          */
1438 
1439                         /* Create subtree root node */
1440                         sfn = node_alloc(info->nl_net);
1441                         if (!sfn)
1442                                 goto failure;
1443 
1444                         fib6_info_hold(info->nl_net->ipv6.fib6_null_entry);
1445                         rcu_assign_pointer(sfn->leaf,
1446                                            info->nl_net->ipv6.fib6_null_entry);
1447                         sfn->fn_flags = RTN_ROOT;
1448 
1449                         /* Now add the first leaf node to new subtree */
1450 
1451                         sn = fib6_add_1(info->nl_net, table, sfn,
1452                                         &rt->fib6_src.addr, rt->fib6_src.plen,
1453                                         offsetof(struct fib6_info, fib6_src),
1454                                         allow_create, replace_required, extack);
1455 
1456                         if (IS_ERR(sn)) {
1457                                 /* If it is failed, discard just allocated
1458                                    root, and then (in failure) stale node
1459                                    in main tree.
1460                                  */
1461                                 node_free_immediate(info->nl_net, sfn);
1462                                 err = PTR_ERR(sn);
1463                                 goto failure;
1464                         }
1465 
1466                         /* Now link new subtree to main tree */
1467                         rcu_assign_pointer(sfn->parent, fn);
1468                         rcu_assign_pointer(fn->subtree, sfn);
1469                 } else {
1470                         sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
1471                                         &rt->fib6_src.addr, rt->fib6_src.plen,
1472                                         offsetof(struct fib6_info, fib6_src),
1473                                         allow_create, replace_required, extack);
1474 
1475                         if (IS_ERR(sn)) {
1476                                 err = PTR_ERR(sn);
1477                                 goto failure;
1478                         }
1479                 }
1480 
1481                 if (!rcu_access_pointer(fn->leaf)) {
1482                         if (fn->fn_flags & RTN_TL_ROOT) {
1483                                 /* put back null_entry for root node */
1484                                 rcu_assign_pointer(fn->leaf,
1485                                             info->nl_net->ipv6.fib6_null_entry);
1486                         } else {
1487                                 fib6_info_hold(rt);
1488                                 rcu_assign_pointer(fn->leaf, rt);
1489                         }
1490                 }
1491                 fn = sn;
1492         }
1493 #endif
1494 
1495         err = fib6_add_rt2node(fn, rt, info, extack);
1496         if (!err) {
1497                 if (rt->nh)
1498                         list_add(&rt->nh_list, &rt->nh->f6i_list);
1499                 __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
1500 
1501                 if (rt->fib6_flags & RTF_EXPIRES)
1502                         fib6_add_gc_list(rt);
1503 
1504                 fib6_start_gc(info->nl_net, rt);
1505         }
1506 
1507 out:
1508         if (err) {
1509 #ifdef CONFIG_IPV6_SUBTREES
1510                 /*
1511                  * If fib6_add_1 has cleared the old leaf pointer in the
1512                  * super-tree leaf node we have to find a new one for it.
1513                  */
1514                 if (pn != fn) {
1515                         struct fib6_info *pn_leaf =
1516                                 rcu_dereference_protected(pn->leaf,
1517                                     lockdep_is_held(&table->tb6_lock));
1518                         if (pn_leaf == rt) {
1519                                 pn_leaf = NULL;
1520                                 RCU_INIT_POINTER(pn->leaf, NULL);
1521                                 fib6_info_release(rt);
1522                         }
1523                         if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
1524                                 pn_leaf = fib6_find_prefix(info->nl_net, table,
1525                                                            pn);
1526                                 if (!pn_leaf)
1527                                         pn_leaf =
1528                                             info->nl_net->ipv6.fib6_null_entry;
1529                                 fib6_info_hold(pn_leaf);
1530                                 rcu_assign_pointer(pn->leaf, pn_leaf);
1531                         }
1532                 }
1533 #endif
1534                 goto failure;
1535         } else if (fib6_requires_src(rt)) {
1536                 fib6_routes_require_src_inc(info->nl_net);
1537         }
1538         return err;
1539 
1540 failure:
1541         /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
1542          * 1. fn is an intermediate node and we failed to add the new
1543          * route to it in both subtree creation failure and fib6_add_rt2node()
1544          * failure case.
1545          * 2. fn is the root node in the table and we fail to add the first
1546          * default route to it.
1547          */
1548         if (fn &&
1549             (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
1550              (fn->fn_flags & RTN_TL_ROOT &&
1551               !rcu_access_pointer(fn->leaf))))
1552                 fib6_repair_tree(info->nl_net, table, fn);
1553         return err;
1554 }
1555 
1556 /*
1557  *      Routing tree lookup
1558  *
1559  */
1560 
1561 struct lookup_args {
1562         int                     offset;         /* key offset on fib6_info */
1563         const struct in6_addr   *addr;          /* search key                   */
1564 };
1565 
1566 static struct fib6_node *fib6_node_lookup_1(struct fib6_node *root,
1567                                             struct lookup_args *args)
1568 {
1569         struct fib6_node *fn;
1570         __be32 dir;
1571 
1572         if (unlikely(args->offset == 0))
1573                 return NULL;
1574 
1575         /*
1576          *      Descend on a tree
1577          */
1578 
1579         fn = root;
1580 
1581         for (;;) {
1582                 struct fib6_node *next;
1583 
1584                 dir = addr_bit_set(args->addr, fn->fn_bit);
1585 
1586                 next = dir ? rcu_dereference(fn->right) :
1587                              rcu_dereference(fn->left);
1588 
1589                 if (next) {
1590                         fn = next;
1591                         continue;
1592                 }
1593                 break;
1594         }
1595 
1596         while (fn) {
1597                 struct fib6_node *subtree = FIB6_SUBTREE(fn);
1598 
1599                 if (subtree || fn->fn_flags & RTN_RTINFO) {
1600                         struct fib6_info *leaf = rcu_dereference(fn->leaf);
1601                         struct rt6key *key;
1602 
1603                         if (!leaf)
1604                                 goto backtrack;
1605 
1606                         key = (struct rt6key *) ((u8 *)leaf + args->offset);
1607 
1608                         if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
1609 #ifdef CONFIG_IPV6_SUBTREES
1610                                 if (subtree) {
1611                                         struct fib6_node *sfn;
1612                                         sfn = fib6_node_lookup_1(subtree,
1613                                                                  args + 1);
1614                                         if (!sfn)
1615                                                 goto backtrack;
1616                                         fn = sfn;
1617                                 }
1618 #endif
1619                                 if (fn->fn_flags & RTN_RTINFO)
1620                                         return fn;
1621                         }
1622                 }
1623 backtrack:
1624                 if (fn->fn_flags & RTN_ROOT)
1625                         break;
1626 
1627                 fn = rcu_dereference(fn->parent);
1628         }
1629 
1630         return NULL;
1631 }
1632 
1633 /* called with rcu_read_lock() held
1634  */
1635 struct fib6_node *fib6_node_lookup(struct fib6_node *root,
1636                                    const struct in6_addr *daddr,
1637                                    const struct in6_addr *saddr)
1638 {
1639         struct fib6_node *fn;
1640         struct lookup_args args[] = {
1641                 {
1642                         .offset = offsetof(struct fib6_info, fib6_dst),
1643                         .addr = daddr,
1644                 },
1645 #ifdef CONFIG_IPV6_SUBTREES
1646                 {
1647                         .offset = offsetof(struct fib6_info, fib6_src),
1648                         .addr = saddr,
1649                 },
1650 #endif
1651                 {
1652                         .offset = 0,    /* sentinel */
1653                 }
1654         };
1655 
1656         fn = fib6_node_lookup_1(root, daddr ? args : args + 1);
1657         if (!fn || fn->fn_flags & RTN_TL_ROOT)
1658                 fn = root;
1659 
1660         return fn;
1661 }
1662 
1663 /*
1664  *      Get node with specified destination prefix (and source prefix,
1665  *      if subtrees are used)
1666  *      exact_match == true means we try to find fn with exact match of
1667  *      the passed in prefix addr
1668  *      exact_match == false means we try to find fn with longest prefix
1669  *      match of the passed in prefix addr. This is useful for finding fn
1670  *      for cached route as it will be stored in the exception table under
1671  *      the node with longest prefix length.
1672  */
1673 
1674 
1675 static struct fib6_node *fib6_locate_1(struct fib6_node *root,
1676                                        const struct in6_addr *addr,
1677                                        int plen, int offset,
1678                                        bool exact_match)
1679 {
1680         struct fib6_node *fn, *prev = NULL;
1681 
1682         for (fn = root; fn ; ) {
1683                 struct fib6_info *leaf = rcu_dereference(fn->leaf);
1684                 struct rt6key *key;
1685 
1686                 /* This node is being deleted */
1687                 if (!leaf) {
1688                         if (plen <= fn->fn_bit)
1689                                 goto out;
1690                         else
1691                                 goto next;
1692                 }
1693 
1694                 key = (struct rt6key *)((u8 *)leaf + offset);
1695 
1696                 /*
1697                  *      Prefix match
1698                  */
1699                 if (plen < fn->fn_bit ||
1700                     !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
1701                         goto out;
1702 
1703                 if (plen == fn->fn_bit)
1704                         return fn;
1705 
1706                 if (fn->fn_flags & RTN_RTINFO)
1707                         prev = fn;
1708 
1709 next:
1710                 /*
1711                  *      We have more bits to go
1712                  */
1713                 if (addr_bit_set(addr, fn->fn_bit))
1714                         fn = rcu_dereference(fn->right);
1715                 else
1716                         fn = rcu_dereference(fn->left);
1717         }
1718 out:
1719         if (exact_match)
1720                 return NULL;
1721         else
1722                 return prev;
1723 }
1724 
1725 struct fib6_node *fib6_locate(struct fib6_node *root,
1726                               const struct in6_addr *daddr, int dst_len,
1727                               const struct in6_addr *saddr, int src_len,
1728                               bool exact_match)
1729 {
1730         struct fib6_node *fn;
1731 
1732         fn = fib6_locate_1(root, daddr, dst_len,
1733                            offsetof(struct fib6_info, fib6_dst),
1734                            exact_match);
1735 
1736 #ifdef CONFIG_IPV6_SUBTREES
1737         if (src_len) {
1738                 WARN_ON(saddr == NULL);
1739                 if (fn) {
1740                         struct fib6_node *subtree = FIB6_SUBTREE(fn);
1741 
1742                         if (subtree) {
1743                                 fn = fib6_locate_1(subtree, saddr, src_len,
1744                                            offsetof(struct fib6_info, fib6_src),
1745                                            exact_match);
1746                         }
1747                 }
1748         }
1749 #endif
1750 
1751         if (fn && fn->fn_flags & RTN_RTINFO)
1752                 return fn;
1753 
1754         return NULL;
1755 }
1756 
1757 
1758 /*
1759  *      Deletion
1760  *
1761  */
1762 
1763 static struct fib6_info *fib6_find_prefix(struct net *net,
1764                                          struct fib6_table *table,
1765                                          struct fib6_node *fn)
1766 {
1767         struct fib6_node *child_left, *child_right;
1768 
1769         if (fn->fn_flags & RTN_ROOT)
1770                 return net->ipv6.fib6_null_entry;
1771 
1772         while (fn) {
1773                 child_left = rcu_dereference_protected(fn->left,
1774                                     lockdep_is_held(&table->tb6_lock));
1775                 child_right = rcu_dereference_protected(fn->right,
1776                                     lockdep_is_held(&table->tb6_lock));
1777                 if (child_left)
1778                         return rcu_dereference_protected(child_left->leaf,
1779                                         lockdep_is_held(&table->tb6_lock));
1780                 if (child_right)
1781                         return rcu_dereference_protected(child_right->leaf,
1782                                         lockdep_is_held(&table->tb6_lock));
1783 
1784                 fn = FIB6_SUBTREE(fn);
1785         }
1786         return NULL;
1787 }
1788 
1789 /*
1790  *      Called to trim the tree of intermediate nodes when possible. "fn"
1791  *      is the node we want to try and remove.
1792  *      Need to own table->tb6_lock
1793  */
1794 
1795 static struct fib6_node *fib6_repair_tree(struct net *net,
1796                                           struct fib6_table *table,
1797                                           struct fib6_node *fn)
1798 {
1799         int children;
1800         int nstate;
1801         struct fib6_node *child;
1802         struct fib6_walker *w;
1803         int iter = 0;
1804 
1805         /* Set fn->leaf to null_entry for root node. */
1806         if (fn->fn_flags & RTN_TL_ROOT) {
1807                 rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry);
1808                 return fn;
1809         }
1810 
1811         for (;;) {
1812                 struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
1813                                             lockdep_is_held(&table->tb6_lock));
1814                 struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
1815                                             lockdep_is_held(&table->tb6_lock));
1816                 struct fib6_node *pn = rcu_dereference_protected(fn->parent,
1817                                             lockdep_is_held(&table->tb6_lock));
1818                 struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
1819                                             lockdep_is_held(&table->tb6_lock));
1820                 struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
1821                                             lockdep_is_held(&table->tb6_lock));
1822                 struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
1823                                             lockdep_is_held(&table->tb6_lock));
1824                 struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
1825                                             lockdep_is_held(&table->tb6_lock));
1826                 struct fib6_info *new_fn_leaf;
1827 
1828                 pr_debug("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
1829                 iter++;
1830 
1831                 WARN_ON(fn->fn_flags & RTN_RTINFO);
1832                 WARN_ON(fn->fn_flags & RTN_TL_ROOT);
1833                 WARN_ON(fn_leaf);
1834 
1835                 children = 0;
1836                 child = NULL;
1837                 if (fn_r) {
1838                         child = fn_r;
1839                         children |= 1;
1840                 }
1841                 if (fn_l) {
1842                         child = fn_l;
1843                         children |= 2;
1844                 }
1845 
1846                 if (children == 3 || FIB6_SUBTREE(fn)
1847 #ifdef CONFIG_IPV6_SUBTREES
1848                     /* Subtree root (i.e. fn) may have one child */
1849                     || (children && fn->fn_flags & RTN_ROOT)
1850 #endif
1851                     ) {
1852                         new_fn_leaf = fib6_find_prefix(net, table, fn);
1853 #if RT6_DEBUG >= 2
1854                         if (!new_fn_leaf) {
1855                                 WARN_ON(!new_fn_leaf);
1856                                 new_fn_leaf = net->ipv6.fib6_null_entry;
1857                         }
1858 #endif
1859                         fib6_info_hold(new_fn_leaf);
1860                         rcu_assign_pointer(fn->leaf, new_fn_leaf);
1861                         return pn;
1862                 }
1863 
1864 #ifdef CONFIG_IPV6_SUBTREES
1865                 if (FIB6_SUBTREE(pn) == fn) {
1866                         WARN_ON(!(fn->fn_flags & RTN_ROOT));
1867                         RCU_INIT_POINTER(pn->subtree, NULL);
1868                         nstate = FWS_L;
1869                 } else {
1870                         WARN_ON(fn->fn_flags & RTN_ROOT);
1871 #endif
1872                         if (pn_r == fn)
1873                                 rcu_assign_pointer(pn->right, child);
1874                         else if (pn_l == fn)
1875                                 rcu_assign_pointer(pn->left, child);
1876 #if RT6_DEBUG >= 2
1877                         else
1878                                 WARN_ON(1);
1879 #endif
1880                         if (child)
1881                                 rcu_assign_pointer(child->parent, pn);
1882                         nstate = FWS_R;
1883 #ifdef CONFIG_IPV6_SUBTREES
1884                 }
1885 #endif
1886 
1887                 read_lock(&net->ipv6.fib6_walker_lock);
1888                 FOR_WALKERS(net, w) {
1889                         if (!child) {
1890                                 if (w->node == fn) {
1891                                         pr_debug("W %p adjusted by delnode 1, s=%d/%d\n",
1892                                                  w, w->state, nstate);
1893                                         w->node = pn;
1894                                         w->state = nstate;
1895                                 }
1896                         } else {
1897                                 if (w->node == fn) {
1898                                         w->node = child;
1899                                         if (children&2) {
1900                                                 pr_debug("W %p adjusted by delnode 2, s=%d\n",
1901                                                          w, w->state);
1902                                                 w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
1903                                         } else {
1904                                                 pr_debug("W %p adjusted by delnode 2, s=%d\n",
1905                                                          w, w->state);
1906                                                 w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
1907                                         }
1908                                 }
1909                         }
1910                 }
1911                 read_unlock(&net->ipv6.fib6_walker_lock);
1912 
1913                 node_free(net, fn);
1914                 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
1915                         return pn;
1916 
1917                 RCU_INIT_POINTER(pn->leaf, NULL);
1918                 fib6_info_release(pn_leaf);
1919                 fn = pn;
1920         }
1921 }
1922 
1923 static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
1924                            struct fib6_info __rcu **rtp, struct nl_info *info)
1925 {
1926         struct fib6_info *leaf, *replace_rt = NULL;
1927         struct fib6_walker *w;
1928         struct fib6_info *rt = rcu_dereference_protected(*rtp,
1929                                     lockdep_is_held(&table->tb6_lock));
1930         struct net *net = info->nl_net;
1931         bool notify_del = false;
1932 
1933         /* If the deleted route is the first in the node and it is not part of
1934          * a multipath route, then we need to replace it with the next route
1935          * in the node, if exists.
1936          */
1937         leaf = rcu_dereference_protected(fn->leaf,
1938                                          lockdep_is_held(&table->tb6_lock));
1939         if (leaf == rt && !rt->fib6_nsiblings) {
1940                 if (rcu_access_pointer(rt->fib6_next))
1941                         replace_rt = rcu_dereference_protected(rt->fib6_next,
1942                                             lockdep_is_held(&table->tb6_lock));
1943                 else
1944                         notify_del = true;
1945         }
1946 
1947         /* Unlink it */
1948         *rtp = rt->fib6_next;
1949         rt->fib6_node = NULL;
1950         net->ipv6.rt6_stats->fib_rt_entries--;
1951         net->ipv6.rt6_stats->fib_discarded_routes++;
1952 
1953         /* Reset round-robin state, if necessary */
1954         if (rcu_access_pointer(fn->rr_ptr) == rt)
1955                 fn->rr_ptr = NULL;
1956 
1957         /* Remove this entry from other siblings */
1958         if (rt->fib6_nsiblings) {
1959                 struct fib6_info *sibling, *next_sibling;
1960 
1961                 /* The route is deleted from a multipath route. If this
1962                  * multipath route is the first route in the node, then we need
1963                  * to emit a delete notification. Otherwise, we need to skip
1964                  * the notification.
1965                  */
1966                 if (rt->fib6_metric == leaf->fib6_metric &&
1967                     rt6_qualify_for_ecmp(leaf))
1968                         notify_del = true;
1969                 list_for_each_entry_safe(sibling, next_sibling,
1970                                          &rt->fib6_siblings, fib6_siblings)
1971                         sibling->fib6_nsiblings--;
1972                 rt->fib6_nsiblings = 0;
1973                 list_del_init(&rt->fib6_siblings);
1974                 rt6_multipath_rebalance(next_sibling);
1975         }
1976 
1977         /* Adjust walkers */
1978         read_lock(&net->ipv6.fib6_walker_lock);
1979         FOR_WALKERS(net, w) {
1980                 if (w->state == FWS_C && w->leaf == rt) {
1981                         pr_debug("walker %p adjusted by delroute\n", w);
1982                         w->leaf = rcu_dereference_protected(rt->fib6_next,
1983                                             lockdep_is_held(&table->tb6_lock));
1984                         if (!w->leaf)
1985                                 w->state = FWS_U;
1986                 }
1987         }
1988         read_unlock(&net->ipv6.fib6_walker_lock);
1989 
1990         /* If it was last route, call fib6_repair_tree() to:
1991          * 1. For root node, put back null_entry as how the table was created.
1992          * 2. For other nodes, expunge its radix tree node.
1993          */
1994         if (!rcu_access_pointer(fn->leaf)) {
1995                 if (!(fn->fn_flags & RTN_TL_ROOT)) {
1996                         fn->fn_flags &= ~RTN_RTINFO;
1997                         net->ipv6.rt6_stats->fib_route_nodes--;
1998                 }
1999                 fn = fib6_repair_tree(net, table, fn);
2000         }
2001 
2002         fib6_purge_rt(rt, fn, net);
2003 
2004         if (!info->skip_notify_kernel) {
2005                 if (notify_del)
2006                         call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
2007                                                   rt, NULL);
2008                 else if (replace_rt)
2009                         call_fib6_entry_notifiers_replace(net, replace_rt);
2010         }
2011         if (!info->skip_notify)
2012                 inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
2013 
2014         fib6_info_release(rt);
2015 }
2016 
2017 /* Need to own table->tb6_lock */
2018 int fib6_del(struct fib6_info *rt, struct nl_info *info)
2019 {
2020         struct net *net = info->nl_net;
2021         struct fib6_info __rcu **rtp;
2022         struct fib6_info __rcu **rtp_next;
2023         struct fib6_table *table;
2024         struct fib6_node *fn;
2025 
2026         if (rt == net->ipv6.fib6_null_entry)
2027                 return -ENOENT;
2028 
2029         table = rt->fib6_table;
2030         fn = rcu_dereference_protected(rt->fib6_node,
2031                                        lockdep_is_held(&table->tb6_lock));
2032         if (!fn)
2033                 return -ENOENT;
2034 
2035         WARN_ON(!(fn->fn_flags & RTN_RTINFO));
2036 
2037         /*
2038          *      Walk the leaf entries looking for ourself
2039          */
2040 
2041         for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
2042                 struct fib6_info *cur = rcu_dereference_protected(*rtp,
2043                                         lockdep_is_held(&table->tb6_lock));
2044                 if (rt == cur) {
2045                         if (fib6_requires_src(cur))
2046                                 fib6_routes_require_src_dec(info->nl_net);
2047                         fib6_del_route(table, fn, rtp, info);
2048                         return 0;
2049                 }
2050                 rtp_next = &cur->fib6_next;
2051         }
2052         return -ENOENT;
2053 }
2054 
2055 /*
2056  *      Tree traversal function.
2057  *
2058  *      Certainly, it is not interrupt safe.
2059  *      However, it is internally reenterable wrt itself and fib6_add/fib6_del.
2060  *      It means, that we can modify tree during walking
2061  *      and use this function for garbage collection, clone pruning,
2062  *      cleaning tree when a device goes down etc. etc.
2063  *
2064  *      It guarantees that every node will be traversed,
2065  *      and that it will be traversed only once.
2066  *
2067  *      Callback function w->func may return:
2068  *      0 -> continue walking.
2069  *      positive value -> walking is suspended (used by tree dumps,
2070  *      and probably by gc, if it will be split to several slices)
2071  *      negative value -> terminate walking.
2072  *
2073  *      The function itself returns:
2074  *      0   -> walk is complete.
2075  *      >0  -> walk is incomplete (i.e. suspended)
2076  *      <0  -> walk is terminated by an error.
2077  *
2078  *      This function is called with tb6_lock held.
2079  */
2080 
2081 static int fib6_walk_continue(struct fib6_walker *w)
2082 {
2083         struct fib6_node *fn, *pn, *left, *right;
2084 
2085         /* w->root should always be table->tb6_root */
2086         WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
2087 
2088         for (;;) {
2089                 fn = w->node;
2090                 if (!fn)
2091                         return 0;
2092 
2093                 switch (w->state) {
2094 #ifdef CONFIG_IPV6_SUBTREES
2095                 case FWS_S:
2096                         if (FIB6_SUBTREE(fn)) {
2097                                 w->node = FIB6_SUBTREE(fn);
2098                                 continue;
2099                         }
2100                         w->state = FWS_L;
2101                         fallthrough;
2102 #endif
2103                 case FWS_L:
2104                         left = rcu_dereference_protected(fn->left, 1);
2105                         if (left) {
2106                                 w->node = left;
2107                                 w->state = FWS_INIT;
2108                                 continue;
2109                         }
2110                         w->state = FWS_R;
2111                         fallthrough;
2112                 case FWS_R:
2113                         right = rcu_dereference_protected(fn->right, 1);
2114                         if (right) {
2115                                 w->node = right;
2116                                 w->state = FWS_INIT;
2117                                 continue;
2118                         }
2119                         w->state = FWS_C;
2120                         w->leaf = rcu_dereference_protected(fn->leaf, 1);
2121                         fallthrough;
2122                 case FWS_C:
2123                         if (w->leaf && fn->fn_flags & RTN_RTINFO) {
2124                                 int err;
2125 
2126                                 if (w->skip) {
2127                                         w->skip--;
2128                                         goto skip;
2129                                 }
2130 
2131                                 err = w->func(w);
2132                                 if (err)
2133                                         return err;
2134 
2135                                 w->count++;
2136                                 continue;
2137                         }
2138 skip:
2139                         w->state = FWS_U;
2140                         fallthrough;
2141                 case FWS_U:
2142                         if (fn == w->root)
2143                                 return 0;
2144                         pn = rcu_dereference_protected(fn->parent, 1);
2145                         left = rcu_dereference_protected(pn->left, 1);
2146                         right = rcu_dereference_protected(pn->right, 1);
2147                         w->node = pn;
2148 #ifdef CONFIG_IPV6_SUBTREES
2149                         if (FIB6_SUBTREE(pn) == fn) {
2150                                 WARN_ON(!(fn->fn_flags & RTN_ROOT));
2151                                 w->state = FWS_L;
2152                                 continue;
2153                         }
2154 #endif
2155                         if (left == fn) {
2156                                 w->state = FWS_R;
2157                                 continue;
2158                         }
2159                         if (right == fn) {
2160                                 w->state = FWS_C;
2161                                 w->leaf = rcu_dereference_protected(w->node->leaf, 1);
2162                                 continue;
2163                         }
2164 #if RT6_DEBUG >= 2
2165                         WARN_ON(1);
2166 #endif
2167                 }
2168         }
2169 }
2170 
2171 static int fib6_walk(struct net *net, struct fib6_walker *w)
2172 {
2173         int res;
2174 
2175         w->state = FWS_INIT;
2176         w->node = w->root;
2177 
2178         fib6_walker_link(net, w);
2179         res = fib6_walk_continue(w);
2180         if (res <= 0)
2181                 fib6_walker_unlink(net, w);
2182         return res;
2183 }
2184 
2185 static int fib6_clean_node(struct fib6_walker *w)
2186 {
2187         int res;
2188         struct fib6_info *rt;
2189         struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
2190         struct nl_info info = {
2191                 .nl_net = c->net,
2192                 .skip_notify = c->skip_notify,
2193         };
2194 
2195         if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
2196             READ_ONCE(w->node->fn_sernum) != c->sernum)
2197                 WRITE_ONCE(w->node->fn_sernum, c->sernum);
2198 
2199         if (!c->func) {
2200                 WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
2201                 w->leaf = NULL;
2202                 return 0;
2203         }
2204 
2205         for_each_fib6_walker_rt(w) {
2206                 res = c->func(rt, c->arg);
2207                 if (res == -1) {
2208                         w->leaf = rt;
2209                         res = fib6_del(rt, &info);
2210                         if (res) {
2211 #if RT6_DEBUG >= 2
2212                                 pr_debug("%s: del failed: rt=%p@%p err=%d\n",
2213                                          __func__, rt,
2214                                          rcu_access_pointer(rt->fib6_node),
2215                                          res);
2216 #endif
2217                                 continue;
2218                         }
2219                         return 0;
2220                 } else if (res == -2) {
2221                         if (WARN_ON(!rt->fib6_nsiblings))
2222                                 continue;
2223                         rt = list_last_entry(&rt->fib6_siblings,
2224                                              struct fib6_info, fib6_siblings);
2225                         continue;
2226                 }
2227                 WARN_ON(res != 0);
2228         }
2229         w->leaf = rt;
2230         return 0;
2231 }
2232 
2233 /*
2234  *      Convenient frontend to tree walker.
2235  *
2236  *      func is called on each route.
2237  *              It may return -2 -> skip multipath route.
2238  *                            -1 -> delete this route.
2239  *                            0  -> continue walking
2240  */
2241 
2242 static void fib6_clean_tree(struct net *net, struct fib6_node *root,
2243                             int (*func)(struct fib6_info *, void *arg),
2244                             int sernum, void *arg, bool skip_notify)
2245 {
2246         struct fib6_cleaner c;
2247 
2248         c.w.root = root;
2249         c.w.func = fib6_clean_node;
2250         c.w.count = 0;
2251         c.w.skip = 0;
2252         c.w.skip_in_node = 0;
2253         c.func = func;
2254         c.sernum = sernum;
2255         c.arg = arg;
2256         c.net = net;
2257         c.skip_notify = skip_notify;
2258 
2259         fib6_walk(net, &c.w);
2260 }
2261 
2262 static void __fib6_clean_all(struct net *net,
2263                              int (*func)(struct fib6_info *, void *),
2264                              int sernum, void *arg, bool skip_notify)
2265 {
2266         struct fib6_table *table;
2267         struct hlist_head *head;
2268         unsigned int h;
2269 
2270         rcu_read_lock();
2271         for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2272                 head = &net->ipv6.fib_table_hash[h];
2273                 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2274                         spin_lock_bh(&table->tb6_lock);
2275                         fib6_clean_tree(net, &table->tb6_root,
2276                                         func, sernum, arg, skip_notify);
2277                         spin_unlock_bh(&table->tb6_lock);
2278                 }
2279         }
2280         rcu_read_unlock();
2281 }
2282 
2283 void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
2284                     void *arg)
2285 {
2286         __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false);
2287 }
2288 
2289 void fib6_clean_all_skip_notify(struct net *net,
2290                                 int (*func)(struct fib6_info *, void *),
2291                                 void *arg)
2292 {
2293         __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true);
2294 }
2295 
2296 static void fib6_flush_trees(struct net *net)
2297 {
2298         int new_sernum = fib6_new_sernum(net);
2299 
2300         __fib6_clean_all(net, NULL, new_sernum, NULL, false);
2301 }
2302 
2303 /*
2304  *      Garbage collection
2305  */
2306 
2307 static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
2308 {
2309         unsigned long now = jiffies;
2310 
2311         /*
2312          *      check addrconf expiration here.
2313          *      Routes are expired even if they are in use.
2314          */
2315 
2316         if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
2317                 if (time_after(now, rt->expires)) {
2318                         pr_debug("expiring %p\n", rt);
2319                         return -1;
2320                 }
2321                 gc_args->more++;
2322         }
2323 
2324         /*      Also age clones in the exception table.
2325          *      Note, that clones are aged out
2326          *      only if they are not in use now.
2327          */
2328         rt6_age_exceptions(rt, gc_args, now);
2329 
2330         return 0;
2331 }
2332 
2333 static void fib6_gc_table(struct net *net,
2334                           struct fib6_table *tb6,
2335                           struct fib6_gc_args *gc_args)
2336 {
2337         struct fib6_info *rt;
2338         struct hlist_node *n;
2339         struct nl_info info = {
2340                 .nl_net = net,
2341                 .skip_notify = false,
2342         };
2343 
2344         hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
2345                 if (fib6_age(rt, gc_args) == -1)
2346                         fib6_del(rt, &info);
2347 }
2348 
2349 static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
2350 {
2351         struct fib6_table *table;
2352         struct hlist_head *head;
2353         unsigned int h;
2354 
2355         rcu_read_lock();
2356         for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2357                 head = &net->ipv6.fib_table_hash[h];
2358                 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2359                         spin_lock_bh(&table->tb6_lock);
2360 
2361                         fib6_gc_table(net, table, gc_args);
2362 
2363                         spin_unlock_bh(&table->tb6_lock);
2364                 }
2365         }
2366         rcu_read_unlock();
2367 }
2368 
2369 void fib6_run_gc(unsigned long expires, struct net *net, bool force)
2370 {
2371         struct fib6_gc_args gc_args;
2372         unsigned long now;
2373 
2374         if (force) {
2375                 spin_lock_bh(&net->ipv6.fib6_gc_lock);
2376         } else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) {
2377                 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
2378                 return;
2379         }
2380         gc_args.timeout = expires ? (int)expires :
2381                           net->ipv6.sysctl.ip6_rt_gc_interval;
2382         gc_args.more = 0;
2383 
2384         fib6_gc_all(net, &gc_args);
2385         now = jiffies;
2386         net->ipv6.ip6_rt_last_gc = now;
2387 
2388         if (gc_args.more)
2389                 mod_timer(&net->ipv6.ip6_fib_timer,
2390                           round_jiffies(now
2391                                         + net->ipv6.sysctl.ip6_rt_gc_interval));
2392         else
2393                 del_timer(&net->ipv6.ip6_fib_timer);
2394         spin_unlock_bh(&net->ipv6.fib6_gc_lock);
2395 }
2396 
2397 static void fib6_gc_timer_cb(struct timer_list *t)
2398 {
2399         struct net *arg = from_timer(arg, t, ipv6.ip6_fib_timer);
2400 
2401         fib6_run_gc(0, arg, true);
2402 }
2403 
2404 static int __net_init fib6_net_init(struct net *net)
2405 {
2406         size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
2407         int err;
2408 
2409         err = fib6_notifier_init(net);
2410         if (err)
2411                 return err;
2412 
2413         /* Default to 3-tuple */
2414         net->ipv6.sysctl.multipath_hash_fields =
2415                 FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK;
2416 
2417         spin_lock_init(&net->ipv6.fib6_gc_lock);
2418         rwlock_init(&net->ipv6.fib6_walker_lock);
2419         INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
2420         timer_setup(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, 0);
2421 
2422         net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
2423         if (!net->ipv6.rt6_stats)
2424                 goto out_notifier;
2425 
2426         /* Avoid false sharing : Use at least a full cache line */
2427         size = max_t(size_t, size, L1_CACHE_BYTES);
2428 
2429         net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL);
2430         if (!net->ipv6.fib_table_hash)
2431                 goto out_rt6_stats;
2432 
2433         net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
2434                                           GFP_KERNEL);
2435         if (!net->ipv6.fib6_main_tbl)
2436                 goto out_fib_table_hash;
2437 
2438         net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
2439         rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
2440                            net->ipv6.fib6_null_entry);
2441         net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
2442                 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
2443         inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
2444         INIT_HLIST_HEAD(&net->ipv6.fib6_main_tbl->tb6_gc_hlist);
2445 
2446 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2447         net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
2448                                            GFP_KERNEL);
2449         if (!net->ipv6.fib6_local_tbl)
2450                 goto out_fib6_main_tbl;
2451         net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
2452         rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
2453                            net->ipv6.fib6_null_entry);
2454         net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
2455                 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
2456         inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
2457         INIT_HLIST_HEAD(&net->ipv6.fib6_local_tbl->tb6_gc_hlist);
2458 #endif
2459         fib6_tables_init(net);
2460 
2461         return 0;
2462 
2463 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2464 out_fib6_main_tbl:
2465         kfree(net->ipv6.fib6_main_tbl);
2466 #endif
2467 out_fib_table_hash:
2468         kfree(net->ipv6.fib_table_hash);
2469 out_rt6_stats:
2470         kfree(net->ipv6.rt6_stats);
2471 out_notifier:
2472         fib6_notifier_exit(net);
2473         return -ENOMEM;
2474 }
2475 
2476 static void fib6_net_exit(struct net *net)
2477 {
2478         unsigned int i;
2479 
2480         del_timer_sync(&net->ipv6.ip6_fib_timer);
2481 
2482         for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
2483                 struct hlist_head *head = &net->ipv6.fib_table_hash[i];
2484                 struct hlist_node *tmp;
2485                 struct fib6_table *tb;
2486 
2487                 hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) {
2488                         hlist_del(&tb->tb6_hlist);
2489                         fib6_free_table(tb);
2490                 }
2491         }
2492 
2493         kfree(net->ipv6.fib_table_hash);
2494         kfree(net->ipv6.rt6_stats);
2495         fib6_notifier_exit(net);
2496 }
2497 
2498 static struct pernet_operations fib6_net_ops = {
2499         .init = fib6_net_init,
2500         .exit = fib6_net_exit,
2501 };
2502 
2503 int __init fib6_init(void)
2504 {
2505         int ret = -ENOMEM;
2506 
2507         fib6_node_kmem = KMEM_CACHE(fib6_node,
2508                                     SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT);
2509         if (!fib6_node_kmem)
2510                 goto out;
2511 
2512         ret = register_pernet_subsys(&fib6_net_ops);
2513         if (ret)
2514                 goto out_kmem_cache_create;
2515 
2516         ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
2517                                    inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED |
2518                                    RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
2519         if (ret)
2520                 goto out_unregister_subsys;
2521 
2522         __fib6_flush_trees = fib6_flush_trees;
2523 out:
2524         return ret;
2525 
2526 out_unregister_subsys:
2527         unregister_pernet_subsys(&fib6_net_ops);
2528 out_kmem_cache_create:
2529         kmem_cache_destroy(fib6_node_kmem);
2530         goto out;
2531 }
2532 
2533 void fib6_gc_cleanup(void)
2534 {
2535         unregister_pernet_subsys(&fib6_net_ops);
2536         kmem_cache_destroy(fib6_node_kmem);
2537 }
2538 
2539 #ifdef CONFIG_PROC_FS
2540 static int ipv6_route_native_seq_show(struct seq_file *seq, void *v)
2541 {
2542         struct fib6_info *rt = v;
2543         struct ipv6_route_iter *iter = seq->private;
2544         struct fib6_nh *fib6_nh = rt->fib6_nh;
2545         unsigned int flags = rt->fib6_flags;
2546         const struct net_device *dev;
2547 
2548         if (rt->nh)
2549                 fib6_nh = nexthop_fib6_nh(rt->nh);
2550 
2551         seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
2552 
2553 #ifdef CONFIG_IPV6_SUBTREES
2554         seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen);
2555 #else
2556         seq_puts(seq, "00000000000000000000000000000000 00 ");
2557 #endif
2558         if (fib6_nh->fib_nh_gw_family) {
2559                 flags |= RTF_GATEWAY;
2560                 seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6);
2561         } else {
2562                 seq_puts(seq, "00000000000000000000000000000000");
2563         }
2564 
2565         dev = fib6_nh->fib_nh_dev;
2566         seq_printf(seq, " %08x %08x %08x %08x %8s\n",
2567                    rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
2568                    flags, dev ? dev->name : "");
2569         iter->w.leaf = NULL;
2570         return 0;
2571 }
2572 
2573 static int ipv6_route_yield(struct fib6_walker *w)
2574 {
2575         struct ipv6_route_iter *iter = w->args;
2576 
2577         if (!iter->skip)
2578                 return 1;
2579 
2580         do {
2581                 iter->w.leaf = rcu_dereference_protected(
2582                                 iter->w.leaf->fib6_next,
2583                                 lockdep_is_held(&iter->tbl->tb6_lock));
2584                 iter->skip--;
2585                 if (!iter->skip && iter->w.leaf)
2586                         return 1;
2587         } while (iter->w.leaf);
2588 
2589         return 0;
2590 }
2591 
2592 static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
2593                                       struct net *net)
2594 {
2595         memset(&iter->w, 0, sizeof(iter->w));
2596         iter->w.func = ipv6_route_yield;
2597         iter->w.root = &iter->tbl->tb6_root;
2598         iter->w.state = FWS_INIT;
2599         iter->w.node = iter->w.root;
2600         iter->w.args = iter;
2601         iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
2602         INIT_LIST_HEAD(&iter->w.lh);
2603         fib6_walker_link(net, &iter->w);
2604 }
2605 
2606 static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
2607                                                     struct net *net)
2608 {
2609         unsigned int h;
2610         struct hlist_node *node;
2611 
2612         if (tbl) {
2613                 h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
2614                 node = rcu_dereference(hlist_next_rcu(&tbl->tb6_hlist));
2615         } else {
2616                 h = 0;
2617                 node = NULL;
2618         }
2619 
2620         while (!node && h < FIB6_TABLE_HASHSZ) {
2621                 node = rcu_dereference(
2622                         hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
2623         }
2624         return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
2625 }
2626 
2627 static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
2628 {
2629         int sernum = READ_ONCE(iter->w.root->fn_sernum);
2630 
2631         if (iter->sernum != sernum) {
2632                 iter->sernum = sernum;
2633                 iter->w.state = FWS_INIT;
2634                 iter->w.node = iter->w.root;
2635                 WARN_ON(iter->w.skip);
2636                 iter->w.skip = iter->w.count;
2637         }
2638 }
2639 
2640 static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2641 {
2642         int r;
2643         struct fib6_info *n;
2644         struct net *net = seq_file_net(seq);
2645         struct ipv6_route_iter *iter = seq->private;
2646 
2647         ++(*pos);
2648         if (!v)
2649                 goto iter_table;
2650 
2651         n = rcu_dereference(((struct fib6_info *)v)->fib6_next);
2652         if (n)
2653                 return n;
2654 
2655 iter_table:
2656         ipv6_route_check_sernum(iter);
2657         spin_lock_bh(&iter->tbl->tb6_lock);
2658         r = fib6_walk_continue(&iter->w);
2659         spin_unlock_bh(&iter->tbl->tb6_lock);
2660         if (r > 0) {
2661                 return iter->w.leaf;
2662         } else if (r < 0) {
2663                 fib6_walker_unlink(net, &iter->w);
2664                 return NULL;
2665         }
2666         fib6_walker_unlink(net, &iter->w);
2667 
2668         iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
2669         if (!iter->tbl)
2670                 return NULL;
2671 
2672         ipv6_route_seq_setup_walk(iter, net);
2673         goto iter_table;
2674 }
2675 
2676 static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
2677         __acquires(RCU)
2678 {
2679         struct net *net = seq_file_net(seq);
2680         struct ipv6_route_iter *iter = seq->private;
2681 
2682         rcu_read_lock();
2683         iter->tbl = ipv6_route_seq_next_table(NULL, net);
2684         iter->skip = *pos;
2685 
2686         if (iter->tbl) {
2687                 loff_t p = 0;
2688 
2689                 ipv6_route_seq_setup_walk(iter, net);
2690                 return ipv6_route_seq_next(seq, NULL, &p);
2691         } else {
2692                 return NULL;
2693         }
2694 }
2695 
2696 static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
2697 {
2698         struct fib6_walker *w = &iter->w;
2699         return w->node && !(w->state == FWS_U && w->node == w->root);
2700 }
2701 
2702 static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
2703         __releases(RCU)
2704 {
2705         struct net *net = seq_file_net(seq);
2706         struct ipv6_route_iter *iter = seq->private;
2707 
2708         if (ipv6_route_iter_active(iter))
2709                 fib6_walker_unlink(net, &iter->w);
2710 
2711         rcu_read_unlock();
2712 }
2713 
2714 #if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
2715 static int ipv6_route_prog_seq_show(struct bpf_prog *prog,
2716                                     struct bpf_iter_meta *meta,
2717                                     void *v)
2718 {
2719         struct bpf_iter__ipv6_route ctx;
2720 
2721         ctx.meta = meta;
2722         ctx.rt = v;
2723         return bpf_iter_run_prog(prog, &ctx);
2724 }
2725 
2726 static int ipv6_route_seq_show(struct seq_file *seq, void *v)
2727 {
2728         struct ipv6_route_iter *iter = seq->private;
2729         struct bpf_iter_meta meta;
2730         struct bpf_prog *prog;
2731         int ret;
2732 
2733         meta.seq = seq;
2734         prog = bpf_iter_get_info(&meta, false);
2735         if (!prog)
2736                 return ipv6_route_native_seq_show(seq, v);
2737 
2738         ret = ipv6_route_prog_seq_show(prog, &meta, v);
2739         iter->w.leaf = NULL;
2740 
2741         return ret;
2742 }
2743 
2744 static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
2745 {
2746         struct bpf_iter_meta meta;
2747         struct bpf_prog *prog;
2748 
2749         if (!v) {
2750                 meta.seq = seq;
2751                 prog = bpf_iter_get_info(&meta, true);
2752                 if (prog)
2753                         (void)ipv6_route_prog_seq_show(prog, &meta, v);
2754         }
2755 
2756         ipv6_route_native_seq_stop(seq, v);
2757 }
2758 #else
2759 static int ipv6_route_seq_show(struct seq_file *seq, void *v)
2760 {
2761         return ipv6_route_native_seq_show(seq, v);
2762 }
2763 
2764 static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
2765 {
2766         ipv6_route_native_seq_stop(seq, v);
2767 }
2768 #endif
2769 
2770 const struct seq_operations ipv6_route_seq_ops = {
2771         .start  = ipv6_route_seq_start,
2772         .next   = ipv6_route_seq_next,
2773         .stop   = ipv6_route_seq_stop,
2774         .show   = ipv6_route_seq_show
2775 };
2776 #endif /* CONFIG_PROC_FS */
2777 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php