~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/ipv6/seg6_local.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /*
  3  *  SR-IPv6 implementation
  4  *
  5  *  Authors:
  6  *  David Lebrun <david.lebrun@uclouvain.be>
  7  *  eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
  8  */
  9 
 10 #include <linux/filter.h>
 11 #include <linux/types.h>
 12 #include <linux/skbuff.h>
 13 #include <linux/net.h>
 14 #include <linux/module.h>
 15 #include <net/ip.h>
 16 #include <net/lwtunnel.h>
 17 #include <net/netevent.h>
 18 #include <net/netns/generic.h>
 19 #include <net/ip6_fib.h>
 20 #include <net/route.h>
 21 #include <net/seg6.h>
 22 #include <linux/seg6.h>
 23 #include <linux/seg6_local.h>
 24 #include <net/addrconf.h>
 25 #include <net/ip6_route.h>
 26 #include <net/dst_cache.h>
 27 #include <net/ip_tunnels.h>
 28 #ifdef CONFIG_IPV6_SEG6_HMAC
 29 #include <net/seg6_hmac.h>
 30 #endif
 31 #include <net/seg6_local.h>
 32 #include <linux/etherdevice.h>
 33 #include <linux/bpf.h>
 34 #include <linux/netfilter.h>
 35 
 36 #define SEG6_F_ATTR(i)          BIT(i)
 37 
 38 struct seg6_local_lwt;
 39 
 40 /* callbacks used for customizing the creation and destruction of a behavior */
 41 struct seg6_local_lwtunnel_ops {
 42         int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg,
 43                            struct netlink_ext_ack *extack);
 44         void (*destroy_state)(struct seg6_local_lwt *slwt);
 45 };
 46 
 47 struct seg6_action_desc {
 48         int action;
 49         unsigned long attrs;
 50 
 51         /* The optattrs field is used for specifying all the optional
 52          * attributes supported by a specific behavior.
 53          * It means that if one of these attributes is not provided in the
 54          * netlink message during the behavior creation, no errors will be
 55          * returned to the userspace.
 56          *
 57          * Each attribute can be only of two types (mutually exclusive):
 58          * 1) required or 2) optional.
 59          * Every user MUST obey to this rule! If you set an attribute as
 60          * required the same attribute CANNOT be set as optional and vice
 61          * versa.
 62          */
 63         unsigned long optattrs;
 64 
 65         int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
 66         int static_headroom;
 67 
 68         struct seg6_local_lwtunnel_ops slwt_ops;
 69 };
 70 
 71 struct bpf_lwt_prog {
 72         struct bpf_prog *prog;
 73         char *name;
 74 };
 75 
 76 /* default length values (expressed in bits) for both Locator-Block and
 77  * Locator-Node Function.
 78  *
 79  * Both SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS *must* be:
 80  *    i) greater than 0;
 81  *   ii) evenly divisible by 8. In other terms, the lengths of the
 82  *       Locator-Block and Locator-Node Function must be byte-aligned (we can
 83  *       relax this constraint in the future if really needed).
 84  *
 85  * Moreover, a third condition must hold:
 86  *  iii) SEG6_LOCAL_LCBLOCK_DBITS + SEG6_LOCAL_LCNODE_FN_DBITS <= 128.
 87  *
 88  * The correctness of SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS
 89  * values are checked during the kernel compilation. If the compilation stops,
 90  * check the value of these parameters to see if they meet conditions (i), (ii)
 91  * and (iii).
 92  */
 93 #define SEG6_LOCAL_LCBLOCK_DBITS        32
 94 #define SEG6_LOCAL_LCNODE_FN_DBITS      16
 95 
 96 /* The following next_csid_chk_{cntr,lcblock,lcblock_fn}_bits macros can be
 97  * used directly to check whether the lengths (in bits) of Locator-Block and
 98  * Locator-Node Function are valid according to (i), (ii), (iii).
 99  */
100 #define next_csid_chk_cntr_bits(blen, flen)             \
101         ((blen) + (flen) > 128)
102 
103 #define next_csid_chk_lcblock_bits(blen)                \
104 ({                                                      \
105         typeof(blen) __tmp = blen;                      \
106         (!__tmp || __tmp > 120 || (__tmp & 0x07));      \
107 })
108 
109 #define next_csid_chk_lcnode_fn_bits(flen)              \
110         next_csid_chk_lcblock_bits(flen)
111 
112 /* flag indicating that flavors are set up for a given End* behavior */
113 #define SEG6_F_LOCAL_FLAVORS            SEG6_F_ATTR(SEG6_LOCAL_FLAVORS)
114 
115 #define SEG6_F_LOCAL_FLV_OP(flvname)    BIT(SEG6_LOCAL_FLV_OP_##flvname)
116 #define SEG6_F_LOCAL_FLV_NEXT_CSID      SEG6_F_LOCAL_FLV_OP(NEXT_CSID)
117 #define SEG6_F_LOCAL_FLV_PSP            SEG6_F_LOCAL_FLV_OP(PSP)
118 
119 /* Supported RFC8986 Flavor operations are reported in this bitmask */
120 #define SEG6_LOCAL_FLV8986_SUPP_OPS     SEG6_F_LOCAL_FLV_PSP
121 
122 #define SEG6_LOCAL_END_FLV_SUPP_OPS     (SEG6_F_LOCAL_FLV_NEXT_CSID | \
123                                          SEG6_LOCAL_FLV8986_SUPP_OPS)
124 #define SEG6_LOCAL_END_X_FLV_SUPP_OPS   SEG6_F_LOCAL_FLV_NEXT_CSID
125 
126 struct seg6_flavors_info {
127         /* Flavor operations */
128         __u32 flv_ops;
129 
130         /* Locator-Block length, expressed in bits */
131         __u8 lcblock_bits;
132         /* Locator-Node Function length, expressed in bits*/
133         __u8 lcnode_func_bits;
134 };
135 
136 enum seg6_end_dt_mode {
137         DT_INVALID_MODE = -EINVAL,
138         DT_LEGACY_MODE  = 0,
139         DT_VRF_MODE     = 1,
140 };
141 
142 struct seg6_end_dt_info {
143         enum seg6_end_dt_mode mode;
144 
145         struct net *net;
146         /* VRF device associated to the routing table used by the SRv6
147          * End.DT4/DT6 behavior for routing IPv4/IPv6 packets.
148          */
149         int vrf_ifindex;
150         int vrf_table;
151 
152         /* tunneled packet family (IPv4 or IPv6).
153          * Protocol and header length are inferred from family.
154          */
155         u16 family;
156 };
157 
158 struct pcpu_seg6_local_counters {
159         u64_stats_t packets;
160         u64_stats_t bytes;
161         u64_stats_t errors;
162 
163         struct u64_stats_sync syncp;
164 };
165 
166 /* This struct groups all the SRv6 Behavior counters supported so far.
167  *
168  * put_nla_counters() makes use of this data structure to collect all counter
169  * values after the per-CPU counter evaluation has been performed.
170  * Finally, each counter value (in seg6_local_counters) is stored in the
171  * corresponding netlink attribute and sent to user space.
172  *
173  * NB: we don't want to expose this structure to user space!
174  */
175 struct seg6_local_counters {
176         __u64 packets;
177         __u64 bytes;
178         __u64 errors;
179 };
180 
181 #define seg6_local_alloc_pcpu_counters(__gfp)                           \
182         __netdev_alloc_pcpu_stats(struct pcpu_seg6_local_counters,      \
183                                   ((__gfp) | __GFP_ZERO))
184 
185 #define SEG6_F_LOCAL_COUNTERS   SEG6_F_ATTR(SEG6_LOCAL_COUNTERS)
186 
187 struct seg6_local_lwt {
188         int action;
189         struct ipv6_sr_hdr *srh;
190         int table;
191         struct in_addr nh4;
192         struct in6_addr nh6;
193         int iif;
194         int oif;
195         struct bpf_lwt_prog bpf;
196 #ifdef CONFIG_NET_L3_MASTER_DEV
197         struct seg6_end_dt_info dt_info;
198 #endif
199         struct seg6_flavors_info flv_info;
200 
201         struct pcpu_seg6_local_counters __percpu *pcpu_counters;
202 
203         int headroom;
204         struct seg6_action_desc *desc;
205         /* unlike the required attrs, we have to track the optional attributes
206          * that have been effectively parsed.
207          */
208         unsigned long parsed_optattrs;
209 };
210 
211 static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
212 {
213         return (struct seg6_local_lwt *)lwt->data;
214 }
215 
216 static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
217 {
218         struct ipv6_sr_hdr *srh;
219 
220         srh = seg6_get_srh(skb, IP6_FH_F_SKIP_RH);
221         if (!srh)
222                 return NULL;
223 
224 #ifdef CONFIG_IPV6_SEG6_HMAC
225         if (!seg6_hmac_validate_skb(skb))
226                 return NULL;
227 #endif
228 
229         return srh;
230 }
231 
232 static bool decap_and_validate(struct sk_buff *skb, int proto)
233 {
234         struct ipv6_sr_hdr *srh;
235         unsigned int off = 0;
236 
237         srh = seg6_get_srh(skb, 0);
238         if (srh && srh->segments_left > 0)
239                 return false;
240 
241 #ifdef CONFIG_IPV6_SEG6_HMAC
242         if (srh && !seg6_hmac_validate_skb(skb))
243                 return false;
244 #endif
245 
246         if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
247                 return false;
248 
249         if (!pskb_pull(skb, off))
250                 return false;
251 
252         skb_postpull_rcsum(skb, skb_network_header(skb), off);
253 
254         skb_reset_network_header(skb);
255         skb_reset_transport_header(skb);
256         if (iptunnel_pull_offloads(skb))
257                 return false;
258 
259         return true;
260 }
261 
262 static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
263 {
264         struct in6_addr *addr;
265 
266         srh->segments_left--;
267         addr = srh->segments + srh->segments_left;
268         *daddr = *addr;
269 }
270 
271 static int
272 seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
273                         u32 tbl_id, bool local_delivery)
274 {
275         struct net *net = dev_net(skb->dev);
276         struct ipv6hdr *hdr = ipv6_hdr(skb);
277         int flags = RT6_LOOKUP_F_HAS_SADDR;
278         struct dst_entry *dst = NULL;
279         struct rt6_info *rt;
280         struct flowi6 fl6;
281         int dev_flags = 0;
282 
283         memset(&fl6, 0, sizeof(fl6));
284         fl6.flowi6_iif = skb->dev->ifindex;
285         fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
286         fl6.saddr = hdr->saddr;
287         fl6.flowlabel = ip6_flowinfo(hdr);
288         fl6.flowi6_mark = skb->mark;
289         fl6.flowi6_proto = hdr->nexthdr;
290 
291         if (nhaddr)
292                 fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
293 
294         if (!tbl_id) {
295                 dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
296         } else {
297                 struct fib6_table *table;
298 
299                 table = fib6_get_table(net, tbl_id);
300                 if (!table)
301                         goto out;
302 
303                 rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
304                 dst = &rt->dst;
305         }
306 
307         /* we want to discard traffic destined for local packet processing,
308          * if @local_delivery is set to false.
309          */
310         if (!local_delivery)
311                 dev_flags |= IFF_LOOPBACK;
312 
313         if (dst && (dst->dev->flags & dev_flags) && !dst->error) {
314                 dst_release(dst);
315                 dst = NULL;
316         }
317 
318 out:
319         if (!dst) {
320                 rt = net->ipv6.ip6_blk_hole_entry;
321                 dst = &rt->dst;
322                 dst_hold(dst);
323         }
324 
325         skb_dst_drop(skb);
326         skb_dst_set(skb, dst);
327         return dst->error;
328 }
329 
330 int seg6_lookup_nexthop(struct sk_buff *skb,
331                         struct in6_addr *nhaddr, u32 tbl_id)
332 {
333         return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false);
334 }
335 
336 static __u8 seg6_flv_lcblock_octects(const struct seg6_flavors_info *finfo)
337 {
338         return finfo->lcblock_bits >> 3;
339 }
340 
341 static __u8 seg6_flv_lcnode_func_octects(const struct seg6_flavors_info *finfo)
342 {
343         return finfo->lcnode_func_bits >> 3;
344 }
345 
346 static bool seg6_next_csid_is_arg_zero(const struct in6_addr *addr,
347                                        const struct seg6_flavors_info *finfo)
348 {
349         __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
350         __u8 blk_octects = seg6_flv_lcblock_octects(finfo);
351         __u8 arg_octects;
352         int i;
353 
354         arg_octects = 16 - blk_octects - fnc_octects;
355         for (i = 0; i < arg_octects; ++i) {
356                 if (addr->s6_addr[blk_octects + fnc_octects + i] != 0x00)
357                         return false;
358         }
359 
360         return true;
361 }
362 
363 /* assume that DA.Argument length > 0 */
364 static void seg6_next_csid_advance_arg(struct in6_addr *addr,
365                                        const struct seg6_flavors_info *finfo)
366 {
367         __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
368         __u8 blk_octects = seg6_flv_lcblock_octects(finfo);
369 
370         /* advance DA.Argument */
371         memmove(&addr->s6_addr[blk_octects],
372                 &addr->s6_addr[blk_octects + fnc_octects],
373                 16 - blk_octects - fnc_octects);
374 
375         memset(&addr->s6_addr[16 - fnc_octects], 0x00, fnc_octects);
376 }
377 
378 static int input_action_end_finish(struct sk_buff *skb,
379                                    struct seg6_local_lwt *slwt)
380 {
381         seg6_lookup_nexthop(skb, NULL, 0);
382 
383         return dst_input(skb);
384 }
385 
386 static int input_action_end_core(struct sk_buff *skb,
387                                  struct seg6_local_lwt *slwt)
388 {
389         struct ipv6_sr_hdr *srh;
390 
391         srh = get_and_validate_srh(skb);
392         if (!srh)
393                 goto drop;
394 
395         advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
396 
397         return input_action_end_finish(skb, slwt);
398 
399 drop:
400         kfree_skb(skb);
401         return -EINVAL;
402 }
403 
404 static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
405 {
406         const struct seg6_flavors_info *finfo = &slwt->flv_info;
407         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
408 
409         if (seg6_next_csid_is_arg_zero(daddr, finfo))
410                 return input_action_end_core(skb, slwt);
411 
412         /* update DA */
413         seg6_next_csid_advance_arg(daddr, finfo);
414 
415         return input_action_end_finish(skb, slwt);
416 }
417 
418 static int input_action_end_x_finish(struct sk_buff *skb,
419                                      struct seg6_local_lwt *slwt)
420 {
421         seg6_lookup_nexthop(skb, &slwt->nh6, 0);
422 
423         return dst_input(skb);
424 }
425 
426 static int input_action_end_x_core(struct sk_buff *skb,
427                                    struct seg6_local_lwt *slwt)
428 {
429         struct ipv6_sr_hdr *srh;
430 
431         srh = get_and_validate_srh(skb);
432         if (!srh)
433                 goto drop;
434 
435         advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
436 
437         return input_action_end_x_finish(skb, slwt);
438 
439 drop:
440         kfree_skb(skb);
441         return -EINVAL;
442 }
443 
444 static int end_x_next_csid_core(struct sk_buff *skb,
445                                 struct seg6_local_lwt *slwt)
446 {
447         const struct seg6_flavors_info *finfo = &slwt->flv_info;
448         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
449 
450         if (seg6_next_csid_is_arg_zero(daddr, finfo))
451                 return input_action_end_x_core(skb, slwt);
452 
453         /* update DA */
454         seg6_next_csid_advance_arg(daddr, finfo);
455 
456         return input_action_end_x_finish(skb, slwt);
457 }
458 
459 static bool seg6_next_csid_enabled(__u32 fops)
460 {
461         return fops & SEG6_F_LOCAL_FLV_NEXT_CSID;
462 }
463 
464 /* Processing of SRv6 End, End.X, and End.T behaviors can be extended through
465  * the flavors framework. These behaviors must report the subset of (flavor)
466  * operations they currently implement. In this way, if a user specifies a
467  * flavor combination that is not supported by a given End* behavior, the
468  * kernel refuses to instantiate the tunnel reporting the error.
469  */
470 static int seg6_flv_supp_ops_by_action(int action, __u32 *fops)
471 {
472         switch (action) {
473         case SEG6_LOCAL_ACTION_END:
474                 *fops = SEG6_LOCAL_END_FLV_SUPP_OPS;
475                 break;
476         case SEG6_LOCAL_ACTION_END_X:
477                 *fops = SEG6_LOCAL_END_X_FLV_SUPP_OPS;
478                 break;
479         default:
480                 return -EOPNOTSUPP;
481         }
482 
483         return 0;
484 }
485 
486 /* We describe the packet state in relation to the absence/presence of the SRH
487  * and the Segment Left (SL) field.
488  * For our purposes, it is not necessary to record the exact value of the SL
489  * when the SID List consists of two or more segments.
490  */
491 enum seg6_local_pktinfo {
492         /* the order really matters! */
493         SEG6_LOCAL_PKTINFO_NOHDR        = 0,
494         SEG6_LOCAL_PKTINFO_SL_ZERO,
495         SEG6_LOCAL_PKTINFO_SL_ONE,
496         SEG6_LOCAL_PKTINFO_SL_MORE,
497         __SEG6_LOCAL_PKTINFO_MAX,
498 };
499 
500 #define SEG6_LOCAL_PKTINFO_MAX (__SEG6_LOCAL_PKTINFO_MAX - 1)
501 
502 static enum seg6_local_pktinfo seg6_get_srh_pktinfo(struct ipv6_sr_hdr *srh)
503 {
504         __u8 sgl;
505 
506         if (!srh)
507                 return SEG6_LOCAL_PKTINFO_NOHDR;
508 
509         sgl = srh->segments_left;
510         if (sgl < 2)
511                 return SEG6_LOCAL_PKTINFO_SL_ZERO + sgl;
512 
513         return SEG6_LOCAL_PKTINFO_SL_MORE;
514 }
515 
516 enum seg6_local_flv_action {
517         SEG6_LOCAL_FLV_ACT_UNSPEC       = 0,
518         SEG6_LOCAL_FLV_ACT_END,
519         SEG6_LOCAL_FLV_ACT_PSP,
520         SEG6_LOCAL_FLV_ACT_USP,
521         SEG6_LOCAL_FLV_ACT_USD,
522         __SEG6_LOCAL_FLV_ACT_MAX
523 };
524 
525 #define SEG6_LOCAL_FLV_ACT_MAX (__SEG6_LOCAL_FLV_ACT_MAX - 1)
526 
527 /* The action table for RFC8986 flavors (see the flv8986_act_tbl below)
528  * contains the actions (i.e. processing operations) to be applied on packets
529  * when flavors are configured for an End* behavior.
530  * By combining the pkinfo data and from the flavors mask, the macro
531  * computes the index used to access the elements (actions) stored in the
532  * action table. The index is structured as follows:
533  *
534  *                     index
535  *       _______________/\________________
536  *      /                                 \
537  *      +----------------+----------------+
538  *      |        pf      |      afm       |
539  *      +----------------+----------------+
540  *        ph-1 ... p1 p0   fk-1 ... f1 f0
541  *     MSB                               LSB
542  *
543  * where:
544  *  - 'afm' (adjusted flavor mask) is the mask containing a combination of the
545  *     RFC8986 flavors currently supported. 'afm' corresponds to the @fm
546  *     argument of the macro whose value is righ-shifted by 1 bit. By doing so,
547  *     we discard the SEG6_LOCAL_FLV_OP_UNSPEC flag (bit 0 in @fm) which is
548  *     never used here;
549  *  - 'pf' encodes the packet info (pktinfo) regarding the presence/absence of
550  *    the SRH, SL = 0, etc. 'pf' is set with the value of @pf provided as
551  *    argument to the macro.
552  */
553 #define flv8986_act_tbl_idx(pf, fm)                                     \
554         ((((pf) << bits_per(SEG6_LOCAL_FLV8986_SUPP_OPS)) |             \
555           ((fm) & SEG6_LOCAL_FLV8986_SUPP_OPS)) >> SEG6_LOCAL_FLV_OP_PSP)
556 
557 /* We compute the size of the action table by considering the RFC8986 flavors
558  * actually supported by the kernel. In this way, the size is automatically
559  * adjusted when new flavors are supported.
560  */
561 #define FLV8986_ACT_TBL_SIZE                                            \
562         roundup_pow_of_two(flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_MAX,  \
563                                                SEG6_LOCAL_FLV8986_SUPP_OPS))
564 
565 /* tbl_cfg(act, pf, fm) macro is used to easily configure the action
566  * table; it accepts 3 arguments:
567  *     i) @act, the suffix from SEG6_LOCAL_FLV_ACT_{act} representing
568  *        the action that should be applied on the packet;
569  *    ii) @pf, the suffix from SEG6_LOCAL_PKTINFO_{pf} reporting the packet
570  *        info about the lack/presence of SRH, SRH with SL = 0, etc;
571  *   iii) @fm, the mask of flavors.
572  */
573 #define tbl_cfg(act, pf, fm)                                            \
574         [flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_##pf,                   \
575                              (fm))] = SEG6_LOCAL_FLV_ACT_##act
576 
577 /* shorthand for improving readability */
578 #define F_PSP   SEG6_F_LOCAL_FLV_PSP
579 
580 /* The table contains, for each combination of the pktinfo data and
581  * flavors, the action that should be taken on a packet (e.g.
582  * "standard" Endpoint processing, Penultimate Segment Pop, etc).
583  *
584  * By default, table entries not explicitly configured are initialized with the
585  * SEG6_LOCAL_FLV_ACT_UNSPEC action, which generally has the effect of
586  * discarding the processed packet.
587  */
588 static const u8 flv8986_act_tbl[FLV8986_ACT_TBL_SIZE] = {
589         /* PSP variant for packet where SRH with SL = 1 */
590         tbl_cfg(PSP, SL_ONE, F_PSP),
591         /* End for packet where the SRH with SL > 1*/
592         tbl_cfg(END, SL_MORE, F_PSP),
593 };
594 
595 #undef F_PSP
596 #undef tbl_cfg
597 
598 /* For each flavor defined in RFC8986 (or a combination of them) an action is
599  * performed on the packet. The specific action depends on:
600  *  - info extracted from the packet (i.e. pktinfo data) regarding the
601  *    lack/presence of the SRH, and if the SRH is available, on the value of
602  *    Segment Left field;
603  *  - the mask of flavors configured for the specific SRv6 End* behavior.
604  *
605  * The function combines both the pkinfo and the flavors mask to evaluate the
606  * corresponding action to be taken on the packet.
607  */
608 static enum seg6_local_flv_action
609 seg6_local_flv8986_act_lookup(enum seg6_local_pktinfo pinfo, __u32 flvmask)
610 {
611         unsigned long index;
612 
613         /* check if the provided mask of flavors is supported */
614         if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS))
615                 return SEG6_LOCAL_FLV_ACT_UNSPEC;
616 
617         index = flv8986_act_tbl_idx(pinfo, flvmask);
618         if (unlikely(index >= FLV8986_ACT_TBL_SIZE))
619                 return SEG6_LOCAL_FLV_ACT_UNSPEC;
620 
621         return flv8986_act_tbl[index];
622 }
623 
624 /* skb->data must be aligned with skb->network_header */
625 static bool seg6_pop_srh(struct sk_buff *skb, int srhoff)
626 {
627         struct ipv6_sr_hdr *srh;
628         struct ipv6hdr *iph;
629         __u8 srh_nexthdr;
630         int thoff = -1;
631         int srhlen;
632         int nhlen;
633 
634         if (unlikely(srhoff < sizeof(*iph) ||
635                      !pskb_may_pull(skb, srhoff + sizeof(*srh))))
636                 return false;
637 
638         srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
639         srhlen = ipv6_optlen(srh);
640 
641         /* we are about to mangle the pkt, let's check if we can write on it */
642         if (unlikely(skb_ensure_writable(skb, srhoff + srhlen)))
643                 return false;
644 
645         /* skb_ensure_writable() may change skb pointers; evaluate srh again */
646         srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
647         srh_nexthdr = srh->nexthdr;
648 
649         if (unlikely(!skb_transport_header_was_set(skb)))
650                 goto pull;
651 
652         nhlen = skb_network_header_len(skb);
653         /* we have to deal with the transport header: it could be set before
654          * the SRH, after the SRH, or within it (which is considered wrong,
655          * however).
656          */
657         if (likely(nhlen <= srhoff))
658                 thoff = nhlen;
659         else if (nhlen >= srhoff + srhlen)
660                 /* transport_header is set after the SRH */
661                 thoff = nhlen - srhlen;
662         else
663                 /* transport_header falls inside the SRH; hence, we can't
664                  * restore the transport_header pointer properly after
665                  * SRH removing operation.
666                  */
667                 return false;
668 pull:
669         /* we need to pop the SRH:
670          *  1) first of all, we pull out everything from IPv6 header up to SRH
671          *     (included) evaluating also the rcsum;
672          *  2) we overwrite (and then remove) the SRH by properly moving the
673          *     IPv6 along with any extension header that precedes the SRH;
674          *  3) At the end, we push back the pulled headers (except for SRH,
675          *     obviously).
676          */
677         skb_pull_rcsum(skb, srhoff + srhlen);
678         memmove(skb_network_header(skb) + srhlen, skb_network_header(skb),
679                 srhoff);
680         skb_push(skb, srhoff);
681 
682         skb_reset_network_header(skb);
683         skb_mac_header_rebuild(skb);
684         if (likely(thoff >= 0))
685                 skb_set_transport_header(skb, thoff);
686 
687         iph = ipv6_hdr(skb);
688         if (iph->nexthdr == NEXTHDR_ROUTING) {
689                 iph->nexthdr = srh_nexthdr;
690         } else {
691                 /* we must look for the extension header (EXTH, for short) that
692                  * immediately precedes the SRH we have just removed.
693                  * Then, we update the value of the EXTH nexthdr with the one
694                  * contained in the SRH nexthdr.
695                  */
696                 unsigned int off = sizeof(*iph);
697                 struct ipv6_opt_hdr *hp, _hdr;
698                 __u8 nexthdr = iph->nexthdr;
699 
700                 for (;;) {
701                         if (unlikely(!ipv6_ext_hdr(nexthdr) ||
702                                      nexthdr == NEXTHDR_NONE))
703                                 return false;
704 
705                         hp = skb_header_pointer(skb, off, sizeof(_hdr), &_hdr);
706                         if (unlikely(!hp))
707                                 return false;
708 
709                         if (hp->nexthdr == NEXTHDR_ROUTING) {
710                                 hp->nexthdr = srh_nexthdr;
711                                 break;
712                         }
713 
714                         switch (nexthdr) {
715                         case NEXTHDR_FRAGMENT:
716                                 fallthrough;
717                         case NEXTHDR_AUTH:
718                                 /* we expect SRH before FRAG and AUTH */
719                                 return false;
720                         default:
721                                 off += ipv6_optlen(hp);
722                                 break;
723                         }
724 
725                         nexthdr = hp->nexthdr;
726                 }
727         }
728 
729         iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
730 
731         skb_postpush_rcsum(skb, iph, srhoff);
732 
733         return true;
734 }
735 
736 /* process the packet on the basis of the RFC8986 flavors set for the given
737  * SRv6 End behavior instance.
738  */
739 static int end_flv8986_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
740 {
741         const struct seg6_flavors_info *finfo = &slwt->flv_info;
742         enum seg6_local_flv_action action;
743         enum seg6_local_pktinfo pinfo;
744         struct ipv6_sr_hdr *srh;
745         __u32 flvmask;
746         int srhoff;
747 
748         srh = seg6_get_srh(skb, 0);
749         srhoff = srh ? ((unsigned char *)srh - skb->data) : 0;
750         pinfo = seg6_get_srh_pktinfo(srh);
751 #ifdef CONFIG_IPV6_SEG6_HMAC
752         if (srh && !seg6_hmac_validate_skb(skb))
753                 goto drop;
754 #endif
755         flvmask = finfo->flv_ops;
756         if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS)) {
757                 pr_warn_once("seg6local: invalid RFC8986 flavors\n");
758                 goto drop;
759         }
760 
761         /* retrieve the action triggered by the combination of pktinfo data and
762          * the flavors mask.
763          */
764         action = seg6_local_flv8986_act_lookup(pinfo, flvmask);
765         switch (action) {
766         case SEG6_LOCAL_FLV_ACT_END:
767                 /* process the packet as the "standard" End behavior */
768                 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
769                 break;
770         case SEG6_LOCAL_FLV_ACT_PSP:
771                 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
772 
773                 if (unlikely(!seg6_pop_srh(skb, srhoff)))
774                         goto drop;
775                 break;
776         case SEG6_LOCAL_FLV_ACT_UNSPEC:
777                 fallthrough;
778         default:
779                 /* by default, we drop the packet since we could not find a
780                  * suitable action.
781                  */
782                 goto drop;
783         }
784 
785         return input_action_end_finish(skb, slwt);
786 
787 drop:
788         kfree_skb(skb);
789         return -EINVAL;
790 }
791 
792 /* regular endpoint function */
793 static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
794 {
795         const struct seg6_flavors_info *finfo = &slwt->flv_info;
796         __u32 fops = finfo->flv_ops;
797 
798         if (!fops)
799                 return input_action_end_core(skb, slwt);
800 
801         /* check for the presence of NEXT-C-SID since it applies first */
802         if (seg6_next_csid_enabled(fops))
803                 return end_next_csid_core(skb, slwt);
804 
805         /* the specific processing function to be performed on the packet
806          * depends on the combination of flavors defined in RFC8986 and some
807          * information extracted from the packet, e.g. presence/absence of SRH,
808          * Segment Left = 0, etc.
809          */
810         return end_flv8986_core(skb, slwt);
811 }
812 
813 /* regular endpoint, and forward to specified nexthop */
814 static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
815 {
816         const struct seg6_flavors_info *finfo = &slwt->flv_info;
817         __u32 fops = finfo->flv_ops;
818 
819         /* check for the presence of NEXT-C-SID since it applies first */
820         if (seg6_next_csid_enabled(fops))
821                 return end_x_next_csid_core(skb, slwt);
822 
823         return input_action_end_x_core(skb, slwt);
824 }
825 
826 static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
827 {
828         struct ipv6_sr_hdr *srh;
829 
830         srh = get_and_validate_srh(skb);
831         if (!srh)
832                 goto drop;
833 
834         advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
835 
836         seg6_lookup_nexthop(skb, NULL, slwt->table);
837 
838         return dst_input(skb);
839 
840 drop:
841         kfree_skb(skb);
842         return -EINVAL;
843 }
844 
845 /* decapsulate and forward inner L2 frame on specified interface */
846 static int input_action_end_dx2(struct sk_buff *skb,
847                                 struct seg6_local_lwt *slwt)
848 {
849         struct net *net = dev_net(skb->dev);
850         struct net_device *odev;
851         struct ethhdr *eth;
852 
853         if (!decap_and_validate(skb, IPPROTO_ETHERNET))
854                 goto drop;
855 
856         if (!pskb_may_pull(skb, ETH_HLEN))
857                 goto drop;
858 
859         skb_reset_mac_header(skb);
860         eth = (struct ethhdr *)skb->data;
861 
862         /* To determine the frame's protocol, we assume it is 802.3. This avoids
863          * a call to eth_type_trans(), which is not really relevant for our
864          * use case.
865          */
866         if (!eth_proto_is_802_3(eth->h_proto))
867                 goto drop;
868 
869         odev = dev_get_by_index_rcu(net, slwt->oif);
870         if (!odev)
871                 goto drop;
872 
873         /* As we accept Ethernet frames, make sure the egress device is of
874          * the correct type.
875          */
876         if (odev->type != ARPHRD_ETHER)
877                 goto drop;
878 
879         if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
880                 goto drop;
881 
882         skb_orphan(skb);
883 
884         if (skb_warn_if_lro(skb))
885                 goto drop;
886 
887         skb_forward_csum(skb);
888 
889         if (skb->len - ETH_HLEN > odev->mtu)
890                 goto drop;
891 
892         skb->dev = odev;
893         skb->protocol = eth->h_proto;
894 
895         return dev_queue_xmit(skb);
896 
897 drop:
898         kfree_skb(skb);
899         return -EINVAL;
900 }
901 
902 static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
903                                        struct sk_buff *skb)
904 {
905         struct dst_entry *orig_dst = skb_dst(skb);
906         struct in6_addr *nhaddr = NULL;
907         struct seg6_local_lwt *slwt;
908 
909         slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
910 
911         /* The inner packet is not associated to any local interface,
912          * so we do not call netif_rx().
913          *
914          * If slwt->nh6 is set to ::, then lookup the nexthop for the
915          * inner packet's DA. Otherwise, use the specified nexthop.
916          */
917         if (!ipv6_addr_any(&slwt->nh6))
918                 nhaddr = &slwt->nh6;
919 
920         seg6_lookup_nexthop(skb, nhaddr, 0);
921 
922         return dst_input(skb);
923 }
924 
925 /* decapsulate and forward to specified nexthop */
926 static int input_action_end_dx6(struct sk_buff *skb,
927                                 struct seg6_local_lwt *slwt)
928 {
929         /* this function accepts IPv6 encapsulated packets, with either
930          * an SRH with SL=0, or no SRH.
931          */
932 
933         if (!decap_and_validate(skb, IPPROTO_IPV6))
934                 goto drop;
935 
936         if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
937                 goto drop;
938 
939         skb_set_transport_header(skb, sizeof(struct ipv6hdr));
940         nf_reset_ct(skb);
941 
942         if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
943                 return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
944                                dev_net(skb->dev), NULL, skb, skb->dev,
945                                NULL, input_action_end_dx6_finish);
946 
947         return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
948 drop:
949         kfree_skb(skb);
950         return -EINVAL;
951 }
952 
953 static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
954                                        struct sk_buff *skb)
955 {
956         struct dst_entry *orig_dst = skb_dst(skb);
957         struct seg6_local_lwt *slwt;
958         struct iphdr *iph;
959         __be32 nhaddr;
960         int err;
961 
962         slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
963 
964         iph = ip_hdr(skb);
965 
966         nhaddr = slwt->nh4.s_addr ?: iph->daddr;
967 
968         skb_dst_drop(skb);
969 
970         err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
971         if (err) {
972                 kfree_skb(skb);
973                 return -EINVAL;
974         }
975 
976         return dst_input(skb);
977 }
978 
979 static int input_action_end_dx4(struct sk_buff *skb,
980                                 struct seg6_local_lwt *slwt)
981 {
982         if (!decap_and_validate(skb, IPPROTO_IPIP))
983                 goto drop;
984 
985         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
986                 goto drop;
987 
988         skb->protocol = htons(ETH_P_IP);
989         skb_set_transport_header(skb, sizeof(struct iphdr));
990         nf_reset_ct(skb);
991 
992         if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
993                 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
994                                dev_net(skb->dev), NULL, skb, skb->dev,
995                                NULL, input_action_end_dx4_finish);
996 
997         return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
998 drop:
999         kfree_skb(skb);
1000         return -EINVAL;
1001 }
1002 
1003 #ifdef CONFIG_NET_L3_MASTER_DEV
1004 static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg)
1005 {
1006         const struct nl_info *nli = &fib6_cfg->fc_nlinfo;
1007 
1008         return nli->nl_net;
1009 }
1010 
1011 static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
1012                                    u16 family, struct netlink_ext_ack *extack)
1013 {
1014         struct seg6_end_dt_info *info = &slwt->dt_info;
1015         int vrf_ifindex;
1016         struct net *net;
1017 
1018         net = fib6_config_get_net(cfg);
1019 
1020         /* note that vrf_table was already set by parse_nla_vrftable() */
1021         vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net,
1022                                                         info->vrf_table);
1023         if (vrf_ifindex < 0) {
1024                 if (vrf_ifindex == -EPERM) {
1025                         NL_SET_ERR_MSG(extack,
1026                                        "Strict mode for VRF is disabled");
1027                 } else if (vrf_ifindex == -ENODEV) {
1028                         NL_SET_ERR_MSG(extack,
1029                                        "Table has no associated VRF device");
1030                 } else {
1031                         pr_debug("seg6local: SRv6 End.DT* creation error=%d\n",
1032                                  vrf_ifindex);
1033                 }
1034 
1035                 return vrf_ifindex;
1036         }
1037 
1038         info->net = net;
1039         info->vrf_ifindex = vrf_ifindex;
1040 
1041         info->family = family;
1042         info->mode = DT_VRF_MODE;
1043 
1044         return 0;
1045 }
1046 
1047 /* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and
1048  * routes the IPv4/IPv6 packet by looking at the configured routing table.
1049  *
1050  * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment
1051  * Routing Header packets) from several interfaces and the outer IPv6
1052  * destination address (DA) is used for retrieving the specific instance of the
1053  * End.DT4/DT6 behavior that should process the packets.
1054  *
1055  * However, the inner IPv4/IPv6 packet is not really bound to any receiving
1056  * interface and thus the End.DT4/DT6 sets the VRF (associated with the
1057  * corresponding routing table) as the *receiving* interface.
1058  * In other words, the End.DT4/DT6 processes a packet as if it has been received
1059  * directly by the VRF (and not by one of its slave devices, if any).
1060  * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in
1061  * according to the routing table configured by the End.DT4/DT6 instance.
1062  *
1063  * This design allows you to get some interesting features like:
1064  *  1) the statistics on rx packets;
1065  *  2) the possibility to install a packet sniffer on the receiving interface
1066  *     (the VRF one) for looking at the incoming packets;
1067  *  3) the possibility to leverage the netfilter prerouting hook for the inner
1068  *     IPv4 packet.
1069  *
1070  * This function returns:
1071  *  - the sk_buff* when the VRF rcv handler has processed the packet correctly;
1072  *  - NULL when the skb is consumed by the VRF rcv handler;
1073  *  - a pointer which encodes a negative error number in case of error.
1074  *    Note that in this case, the function takes care of freeing the skb.
1075  */
1076 static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family,
1077                                       struct net_device *dev)
1078 {
1079         /* based on l3mdev_ip_rcv; we are only interested in the master */
1080         if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev)))
1081                 goto drop;
1082 
1083         if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv))
1084                 goto drop;
1085 
1086         /* the decap packet IPv4/IPv6 does not come with any mac header info.
1087          * We must unset the mac header to allow the VRF device to rebuild it,
1088          * just in case there is a sniffer attached on the device.
1089          */
1090         skb_unset_mac_header(skb);
1091 
1092         skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family);
1093         if (!skb)
1094                 /* the skb buffer was consumed by the handler */
1095                 return NULL;
1096 
1097         /* when a packet is received by a VRF or by one of its slaves, the
1098          * master device reference is set into the skb.
1099          */
1100         if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex))
1101                 goto drop;
1102 
1103         return skb;
1104 
1105 drop:
1106         kfree_skb(skb);
1107         return ERR_PTR(-EINVAL);
1108 }
1109 
1110 static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb,
1111                                              struct seg6_end_dt_info *info)
1112 {
1113         int vrf_ifindex = info->vrf_ifindex;
1114         struct net *net = info->net;
1115 
1116         if (unlikely(vrf_ifindex < 0))
1117                 goto error;
1118 
1119         if (unlikely(!net_eq(dev_net(skb->dev), net)))
1120                 goto error;
1121 
1122         return dev_get_by_index_rcu(net, vrf_ifindex);
1123 
1124 error:
1125         return NULL;
1126 }
1127 
1128 static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
1129                                        struct seg6_local_lwt *slwt, u16 family)
1130 {
1131         struct seg6_end_dt_info *info = &slwt->dt_info;
1132         struct net_device *vrf;
1133         __be16 protocol;
1134         int hdrlen;
1135 
1136         vrf = end_dt_get_vrf_rcu(skb, info);
1137         if (unlikely(!vrf))
1138                 goto drop;
1139 
1140         switch (family) {
1141         case AF_INET:
1142                 protocol = htons(ETH_P_IP);
1143                 hdrlen = sizeof(struct iphdr);
1144                 break;
1145         case AF_INET6:
1146                 protocol = htons(ETH_P_IPV6);
1147                 hdrlen = sizeof(struct ipv6hdr);
1148                 break;
1149         case AF_UNSPEC:
1150                 fallthrough;
1151         default:
1152                 goto drop;
1153         }
1154 
1155         if (unlikely(info->family != AF_UNSPEC && info->family != family)) {
1156                 pr_warn_once("seg6local: SRv6 End.DT* family mismatch");
1157                 goto drop;
1158         }
1159 
1160         skb->protocol = protocol;
1161 
1162         skb_dst_drop(skb);
1163 
1164         skb_set_transport_header(skb, hdrlen);
1165         nf_reset_ct(skb);
1166 
1167         return end_dt_vrf_rcv(skb, family, vrf);
1168 
1169 drop:
1170         kfree_skb(skb);
1171         return ERR_PTR(-EINVAL);
1172 }
1173 
1174 static int input_action_end_dt4(struct sk_buff *skb,
1175                                 struct seg6_local_lwt *slwt)
1176 {
1177         struct iphdr *iph;
1178         int err;
1179 
1180         if (!decap_and_validate(skb, IPPROTO_IPIP))
1181                 goto drop;
1182 
1183         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
1184                 goto drop;
1185 
1186         skb = end_dt_vrf_core(skb, slwt, AF_INET);
1187         if (!skb)
1188                 /* packet has been processed and consumed by the VRF */
1189                 return 0;
1190 
1191         if (IS_ERR(skb))
1192                 return PTR_ERR(skb);
1193 
1194         iph = ip_hdr(skb);
1195 
1196         err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
1197         if (unlikely(err))
1198                 goto drop;
1199 
1200         return dst_input(skb);
1201 
1202 drop:
1203         kfree_skb(skb);
1204         return -EINVAL;
1205 }
1206 
1207 static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg,
1208                               struct netlink_ext_ack *extack)
1209 {
1210         return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack);
1211 }
1212 
1213 static enum
1214 seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt)
1215 {
1216         unsigned long parsed_optattrs = slwt->parsed_optattrs;
1217         bool legacy, vrfmode;
1218 
1219         legacy  = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE));
1220         vrfmode = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE));
1221 
1222         if (!(legacy ^ vrfmode))
1223                 /* both are absent or present: invalid DT6 mode */
1224                 return DT_INVALID_MODE;
1225 
1226         return legacy ? DT_LEGACY_MODE : DT_VRF_MODE;
1227 }
1228 
1229 static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt)
1230 {
1231         struct seg6_end_dt_info *info = &slwt->dt_info;
1232 
1233         return info->mode;
1234 }
1235 
1236 static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg,
1237                               struct netlink_ext_ack *extack)
1238 {
1239         enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt);
1240         struct seg6_end_dt_info *info = &slwt->dt_info;
1241 
1242         switch (mode) {
1243         case DT_LEGACY_MODE:
1244                 info->mode = DT_LEGACY_MODE;
1245                 return 0;
1246         case DT_VRF_MODE:
1247                 return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack);
1248         default:
1249                 NL_SET_ERR_MSG(extack, "table or vrftable must be specified");
1250                 return -EINVAL;
1251         }
1252 }
1253 #endif
1254 
1255 static int input_action_end_dt6(struct sk_buff *skb,
1256                                 struct seg6_local_lwt *slwt)
1257 {
1258         if (!decap_and_validate(skb, IPPROTO_IPV6))
1259                 goto drop;
1260 
1261         if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
1262                 goto drop;
1263 
1264 #ifdef CONFIG_NET_L3_MASTER_DEV
1265         if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE)
1266                 goto legacy_mode;
1267 
1268         /* DT6_VRF_MODE */
1269         skb = end_dt_vrf_core(skb, slwt, AF_INET6);
1270         if (!skb)
1271                 /* packet has been processed and consumed by the VRF */
1272                 return 0;
1273 
1274         if (IS_ERR(skb))
1275                 return PTR_ERR(skb);
1276 
1277         /* note: this time we do not need to specify the table because the VRF
1278          * takes care of selecting the correct table.
1279          */
1280         seg6_lookup_any_nexthop(skb, NULL, 0, true);
1281 
1282         return dst_input(skb);
1283 
1284 legacy_mode:
1285 #endif
1286         skb_set_transport_header(skb, sizeof(struct ipv6hdr));
1287 
1288         seg6_lookup_any_nexthop(skb, NULL, slwt->table, true);
1289 
1290         return dst_input(skb);
1291 
1292 drop:
1293         kfree_skb(skb);
1294         return -EINVAL;
1295 }
1296 
1297 #ifdef CONFIG_NET_L3_MASTER_DEV
1298 static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg,
1299                                struct netlink_ext_ack *extack)
1300 {
1301         return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack);
1302 }
1303 
1304 static int input_action_end_dt46(struct sk_buff *skb,
1305                                  struct seg6_local_lwt *slwt)
1306 {
1307         unsigned int off = 0;
1308         int nexthdr;
1309 
1310         nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL);
1311         if (unlikely(nexthdr < 0))
1312                 goto drop;
1313 
1314         switch (nexthdr) {
1315         case IPPROTO_IPIP:
1316                 return input_action_end_dt4(skb, slwt);
1317         case IPPROTO_IPV6:
1318                 return input_action_end_dt6(skb, slwt);
1319         }
1320 
1321 drop:
1322         kfree_skb(skb);
1323         return -EINVAL;
1324 }
1325 #endif
1326 
1327 /* push an SRH on top of the current one */
1328 static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1329 {
1330         struct ipv6_sr_hdr *srh;
1331         int err = -EINVAL;
1332 
1333         srh = get_and_validate_srh(skb);
1334         if (!srh)
1335                 goto drop;
1336 
1337         err = seg6_do_srh_inline(skb, slwt->srh);
1338         if (err)
1339                 goto drop;
1340 
1341         skb_set_transport_header(skb, sizeof(struct ipv6hdr));
1342 
1343         seg6_lookup_nexthop(skb, NULL, 0);
1344 
1345         return dst_input(skb);
1346 
1347 drop:
1348         kfree_skb(skb);
1349         return err;
1350 }
1351 
1352 /* encapsulate within an outer IPv6 header and a specified SRH */
1353 static int input_action_end_b6_encap(struct sk_buff *skb,
1354                                      struct seg6_local_lwt *slwt)
1355 {
1356         struct ipv6_sr_hdr *srh;
1357         int err = -EINVAL;
1358 
1359         srh = get_and_validate_srh(skb);
1360         if (!srh)
1361                 goto drop;
1362 
1363         advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
1364 
1365         skb_reset_inner_headers(skb);
1366         skb->encapsulation = 1;
1367 
1368         err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
1369         if (err)
1370                 goto drop;
1371 
1372         skb_set_transport_header(skb, sizeof(struct ipv6hdr));
1373 
1374         seg6_lookup_nexthop(skb, NULL, 0);
1375 
1376         return dst_input(skb);
1377 
1378 drop:
1379         kfree_skb(skb);
1380         return err;
1381 }
1382 
1383 DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states) = {
1384         .bh_lock        = INIT_LOCAL_LOCK(bh_lock),
1385 };
1386 
1387 bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
1388 {
1389         struct seg6_bpf_srh_state *srh_state =
1390                 this_cpu_ptr(&seg6_bpf_srh_states);
1391         struct ipv6_sr_hdr *srh = srh_state->srh;
1392 
1393         lockdep_assert_held(&srh_state->bh_lock);
1394         if (unlikely(srh == NULL))
1395                 return false;
1396 
1397         if (unlikely(!srh_state->valid)) {
1398                 if ((srh_state->hdrlen & 7) != 0)
1399                         return false;
1400 
1401                 srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
1402                 if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3, true))
1403                         return false;
1404 
1405                 srh_state->valid = true;
1406         }
1407 
1408         return true;
1409 }
1410 
1411 static int input_action_end_bpf(struct sk_buff *skb,
1412                                 struct seg6_local_lwt *slwt)
1413 {
1414         struct seg6_bpf_srh_state *srh_state;
1415         struct ipv6_sr_hdr *srh;
1416         int ret;
1417 
1418         srh = get_and_validate_srh(skb);
1419         if (!srh) {
1420                 kfree_skb(skb);
1421                 return -EINVAL;
1422         }
1423         advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
1424 
1425         /* The access to the per-CPU buffer srh_state is protected by running
1426          * always in softirq context (with disabled BH). On PREEMPT_RT the
1427          * required locking is provided by the following local_lock_nested_bh()
1428          * statement. It is also accessed by the bpf_lwt_seg6_* helpers via
1429          * bpf_prog_run_save_cb().
1430          */
1431         local_lock_nested_bh(&seg6_bpf_srh_states.bh_lock);
1432         srh_state = this_cpu_ptr(&seg6_bpf_srh_states);
1433         srh_state->srh = srh;
1434         srh_state->hdrlen = srh->hdrlen << 3;
1435         srh_state->valid = true;
1436 
1437         rcu_read_lock();
1438         bpf_compute_data_pointers(skb);
1439         ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
1440         rcu_read_unlock();
1441 
1442         switch (ret) {
1443         case BPF_OK:
1444         case BPF_REDIRECT:
1445                 break;
1446         case BPF_DROP:
1447                 goto drop;
1448         default:
1449                 pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret);
1450                 goto drop;
1451         }
1452 
1453         if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
1454                 goto drop;
1455         local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock);
1456 
1457         if (ret != BPF_REDIRECT)
1458                 seg6_lookup_nexthop(skb, NULL, 0);
1459 
1460         return dst_input(skb);
1461 
1462 drop:
1463         local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock);
1464         kfree_skb(skb);
1465         return -EINVAL;
1466 }
1467 
1468 static struct seg6_action_desc seg6_action_table[] = {
1469         {
1470                 .action         = SEG6_LOCAL_ACTION_END,
1471                 .attrs          = 0,
1472                 .optattrs       = SEG6_F_LOCAL_COUNTERS |
1473                                   SEG6_F_LOCAL_FLAVORS,
1474                 .input          = input_action_end,
1475         },
1476         {
1477                 .action         = SEG6_LOCAL_ACTION_END_X,
1478                 .attrs          = SEG6_F_ATTR(SEG6_LOCAL_NH6),
1479                 .optattrs       = SEG6_F_LOCAL_COUNTERS |
1480                                   SEG6_F_LOCAL_FLAVORS,
1481                 .input          = input_action_end_x,
1482         },
1483         {
1484                 .action         = SEG6_LOCAL_ACTION_END_T,
1485                 .attrs          = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
1486                 .optattrs       = SEG6_F_LOCAL_COUNTERS,
1487                 .input          = input_action_end_t,
1488         },
1489         {
1490                 .action         = SEG6_LOCAL_ACTION_END_DX2,
1491                 .attrs          = SEG6_F_ATTR(SEG6_LOCAL_OIF),
1492                 .optattrs       = SEG6_F_LOCAL_COUNTERS,
1493                 .input          = input_action_end_dx2,
1494         },
1495         {
1496                 .action         = SEG6_LOCAL_ACTION_END_DX6,
1497                 .attrs          = SEG6_F_ATTR(SEG6_LOCAL_NH6),
1498                 .optattrs       = SEG6_F_LOCAL_COUNTERS,
1499                 .input          = input_action_end_dx6,
1500         },
1501         {
1502                 .action         = SEG6_LOCAL_ACTION_END_DX4,
1503                 .attrs          = SEG6_F_ATTR(SEG6_LOCAL_NH4),
1504                 .optattrs       = SEG6_F_LOCAL_COUNTERS,
1505                 .input          = input_action_end_dx4,
1506         },
1507         {
1508                 .action         = SEG6_LOCAL_ACTION_END_DT4,
1509                 .attrs          = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
1510                 .optattrs       = SEG6_F_LOCAL_COUNTERS,
1511 #ifdef CONFIG_NET_L3_MASTER_DEV
1512                 .input          = input_action_end_dt4,
1513                 .slwt_ops       = {
1514                                         .build_state = seg6_end_dt4_build,
1515                                   },
1516 #endif
1517         },
1518         {
1519                 .action         = SEG6_LOCAL_ACTION_END_DT6,
1520 #ifdef CONFIG_NET_L3_MASTER_DEV
1521                 .attrs          = 0,
1522                 .optattrs       = SEG6_F_LOCAL_COUNTERS         |
1523                                   SEG6_F_ATTR(SEG6_LOCAL_TABLE) |
1524                                   SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
1525                 .slwt_ops       = {
1526                                         .build_state = seg6_end_dt6_build,
1527                                   },
1528 #else
1529                 .attrs          = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
1530                 .optattrs       = SEG6_F_LOCAL_COUNTERS,
1531 #endif
1532                 .input          = input_action_end_dt6,
1533         },
1534         {
1535                 .action         = SEG6_LOCAL_ACTION_END_DT46,
1536                 .attrs          = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
1537                 .optattrs       = SEG6_F_LOCAL_COUNTERS,
1538 #ifdef CONFIG_NET_L3_MASTER_DEV
1539                 .input          = input_action_end_dt46,
1540                 .slwt_ops       = {
1541                                         .build_state = seg6_end_dt46_build,
1542                                   },
1543 #endif
1544         },
1545         {
1546                 .action         = SEG6_LOCAL_ACTION_END_B6,
1547                 .attrs          = SEG6_F_ATTR(SEG6_LOCAL_SRH),
1548                 .optattrs       = SEG6_F_LOCAL_COUNTERS,
1549                 .input          = input_action_end_b6,
1550         },
1551         {
1552                 .action         = SEG6_LOCAL_ACTION_END_B6_ENCAP,
1553                 .attrs          = SEG6_F_ATTR(SEG6_LOCAL_SRH),
1554                 .optattrs       = SEG6_F_LOCAL_COUNTERS,
1555                 .input          = input_action_end_b6_encap,
1556                 .static_headroom        = sizeof(struct ipv6hdr),
1557         },
1558         {
1559                 .action         = SEG6_LOCAL_ACTION_END_BPF,
1560                 .attrs          = SEG6_F_ATTR(SEG6_LOCAL_BPF),
1561                 .optattrs       = SEG6_F_LOCAL_COUNTERS,
1562                 .input          = input_action_end_bpf,
1563         },
1564 
1565 };
1566 
1567 static struct seg6_action_desc *__get_action_desc(int action)
1568 {
1569         struct seg6_action_desc *desc;
1570         int i, count;
1571 
1572         count = ARRAY_SIZE(seg6_action_table);
1573         for (i = 0; i < count; i++) {
1574                 desc = &seg6_action_table[i];
1575                 if (desc->action == action)
1576                         return desc;
1577         }
1578 
1579         return NULL;
1580 }
1581 
1582 static bool seg6_lwtunnel_counters_enabled(struct seg6_local_lwt *slwt)
1583 {
1584         return slwt->parsed_optattrs & SEG6_F_LOCAL_COUNTERS;
1585 }
1586 
1587 static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
1588                                        unsigned int len, int err)
1589 {
1590         struct pcpu_seg6_local_counters *pcounters;
1591 
1592         pcounters = this_cpu_ptr(slwt->pcpu_counters);
1593         u64_stats_update_begin(&pcounters->syncp);
1594 
1595         if (likely(!err)) {
1596                 u64_stats_inc(&pcounters->packets);
1597                 u64_stats_add(&pcounters->bytes, len);
1598         } else {
1599                 u64_stats_inc(&pcounters->errors);
1600         }
1601 
1602         u64_stats_update_end(&pcounters->syncp);
1603 }
1604 
1605 static int seg6_local_input_core(struct net *net, struct sock *sk,
1606                                  struct sk_buff *skb)
1607 {
1608         struct dst_entry *orig_dst = skb_dst(skb);
1609         struct seg6_action_desc *desc;
1610         struct seg6_local_lwt *slwt;
1611         unsigned int len = skb->len;
1612         int rc;
1613 
1614         slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
1615         desc = slwt->desc;
1616 
1617         rc = desc->input(skb, slwt);
1618 
1619         if (!seg6_lwtunnel_counters_enabled(slwt))
1620                 return rc;
1621 
1622         seg6_local_update_counters(slwt, len, rc);
1623 
1624         return rc;
1625 }
1626 
1627 static int seg6_local_input(struct sk_buff *skb)
1628 {
1629         if (skb->protocol != htons(ETH_P_IPV6)) {
1630                 kfree_skb(skb);
1631                 return -EINVAL;
1632         }
1633 
1634         if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
1635                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
1636                                dev_net(skb->dev), NULL, skb, skb->dev, NULL,
1637                                seg6_local_input_core);
1638 
1639         return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
1640 }
1641 
1642 static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
1643         [SEG6_LOCAL_ACTION]     = { .type = NLA_U32 },
1644         [SEG6_LOCAL_SRH]        = { .type = NLA_BINARY },
1645         [SEG6_LOCAL_TABLE]      = { .type = NLA_U32 },
1646         [SEG6_LOCAL_VRFTABLE]   = { .type = NLA_U32 },
1647         [SEG6_LOCAL_NH4]        = { .type = NLA_BINARY,
1648                                     .len = sizeof(struct in_addr) },
1649         [SEG6_LOCAL_NH6]        = { .type = NLA_BINARY,
1650                                     .len = sizeof(struct in6_addr) },
1651         [SEG6_LOCAL_IIF]        = { .type = NLA_U32 },
1652         [SEG6_LOCAL_OIF]        = { .type = NLA_U32 },
1653         [SEG6_LOCAL_BPF]        = { .type = NLA_NESTED },
1654         [SEG6_LOCAL_COUNTERS]   = { .type = NLA_NESTED },
1655         [SEG6_LOCAL_FLAVORS]    = { .type = NLA_NESTED },
1656 };
1657 
1658 static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1659                          struct netlink_ext_ack *extack)
1660 {
1661         struct ipv6_sr_hdr *srh;
1662         int len;
1663 
1664         srh = nla_data(attrs[SEG6_LOCAL_SRH]);
1665         len = nla_len(attrs[SEG6_LOCAL_SRH]);
1666 
1667         /* SRH must contain at least one segment */
1668         if (len < sizeof(*srh) + sizeof(struct in6_addr))
1669                 return -EINVAL;
1670 
1671         if (!seg6_validate_srh(srh, len, false))
1672                 return -EINVAL;
1673 
1674         slwt->srh = kmemdup(srh, len, GFP_KERNEL);
1675         if (!slwt->srh)
1676                 return -ENOMEM;
1677 
1678         slwt->headroom += len;
1679 
1680         return 0;
1681 }
1682 
1683 static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1684 {
1685         struct ipv6_sr_hdr *srh;
1686         struct nlattr *nla;
1687         int len;
1688 
1689         srh = slwt->srh;
1690         len = (srh->hdrlen + 1) << 3;
1691 
1692         nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
1693         if (!nla)
1694                 return -EMSGSIZE;
1695 
1696         memcpy(nla_data(nla), srh, len);
1697 
1698         return 0;
1699 }
1700 
1701 static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1702 {
1703         int len = (a->srh->hdrlen + 1) << 3;
1704 
1705         if (len != ((b->srh->hdrlen + 1) << 3))
1706                 return 1;
1707 
1708         return memcmp(a->srh, b->srh, len);
1709 }
1710 
1711 static void destroy_attr_srh(struct seg6_local_lwt *slwt)
1712 {
1713         kfree(slwt->srh);
1714 }
1715 
1716 static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1717                            struct netlink_ext_ack *extack)
1718 {
1719         slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
1720 
1721         return 0;
1722 }
1723 
1724 static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1725 {
1726         if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
1727                 return -EMSGSIZE;
1728 
1729         return 0;
1730 }
1731 
1732 static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1733 {
1734         if (a->table != b->table)
1735                 return 1;
1736 
1737         return 0;
1738 }
1739 
1740 static struct
1741 seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt)
1742 {
1743 #ifdef CONFIG_NET_L3_MASTER_DEV
1744         return &slwt->dt_info;
1745 #else
1746         return ERR_PTR(-EOPNOTSUPP);
1747 #endif
1748 }
1749 
1750 static int parse_nla_vrftable(struct nlattr **attrs,
1751                               struct seg6_local_lwt *slwt,
1752                               struct netlink_ext_ack *extack)
1753 {
1754         struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
1755 
1756         if (IS_ERR(info))
1757                 return PTR_ERR(info);
1758 
1759         info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]);
1760 
1761         return 0;
1762 }
1763 
1764 static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1765 {
1766         struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
1767 
1768         if (IS_ERR(info))
1769                 return PTR_ERR(info);
1770 
1771         if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table))
1772                 return -EMSGSIZE;
1773 
1774         return 0;
1775 }
1776 
1777 static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1778 {
1779         struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a);
1780         struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b);
1781 
1782         if (info_a->vrf_table != info_b->vrf_table)
1783                 return 1;
1784 
1785         return 0;
1786 }
1787 
1788 static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1789                          struct netlink_ext_ack *extack)
1790 {
1791         memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
1792                sizeof(struct in_addr));
1793 
1794         return 0;
1795 }
1796 
1797 static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1798 {
1799         struct nlattr *nla;
1800 
1801         nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
1802         if (!nla)
1803                 return -EMSGSIZE;
1804 
1805         memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
1806 
1807         return 0;
1808 }
1809 
1810 static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1811 {
1812         return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
1813 }
1814 
1815 static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1816                          struct netlink_ext_ack *extack)
1817 {
1818         memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
1819                sizeof(struct in6_addr));
1820 
1821         return 0;
1822 }
1823 
1824 static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1825 {
1826         struct nlattr *nla;
1827 
1828         nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
1829         if (!nla)
1830                 return -EMSGSIZE;
1831 
1832         memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
1833 
1834         return 0;
1835 }
1836 
1837 static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1838 {
1839         return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
1840 }
1841 
1842 static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1843                          struct netlink_ext_ack *extack)
1844 {
1845         slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
1846 
1847         return 0;
1848 }
1849 
1850 static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1851 {
1852         if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
1853                 return -EMSGSIZE;
1854 
1855         return 0;
1856 }
1857 
1858 static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1859 {
1860         if (a->iif != b->iif)
1861                 return 1;
1862 
1863         return 0;
1864 }
1865 
1866 static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1867                          struct netlink_ext_ack *extack)
1868 {
1869         slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
1870 
1871         return 0;
1872 }
1873 
1874 static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1875 {
1876         if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
1877                 return -EMSGSIZE;
1878 
1879         return 0;
1880 }
1881 
1882 static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1883 {
1884         if (a->oif != b->oif)
1885                 return 1;
1886 
1887         return 0;
1888 }
1889 
1890 #define MAX_PROG_NAME 256
1891 static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
1892         [SEG6_LOCAL_BPF_PROG]      = { .type = NLA_U32, },
1893         [SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
1894                                        .len = MAX_PROG_NAME },
1895 };
1896 
1897 static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1898                          struct netlink_ext_ack *extack)
1899 {
1900         struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
1901         struct bpf_prog *p;
1902         int ret;
1903         u32 fd;
1904 
1905         ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX,
1906                                           attrs[SEG6_LOCAL_BPF],
1907                                           bpf_prog_policy, NULL);
1908         if (ret < 0)
1909                 return ret;
1910 
1911         if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME])
1912                 return -EINVAL;
1913 
1914         slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL);
1915         if (!slwt->bpf.name)
1916                 return -ENOMEM;
1917 
1918         fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]);
1919         p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
1920         if (IS_ERR(p)) {
1921                 kfree(slwt->bpf.name);
1922                 return PTR_ERR(p);
1923         }
1924 
1925         slwt->bpf.prog = p;
1926         return 0;
1927 }
1928 
1929 static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1930 {
1931         struct nlattr *nest;
1932 
1933         if (!slwt->bpf.prog)
1934                 return 0;
1935 
1936         nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF);
1937         if (!nest)
1938                 return -EMSGSIZE;
1939 
1940         if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id))
1941                 return -EMSGSIZE;
1942 
1943         if (slwt->bpf.name &&
1944             nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name))
1945                 return -EMSGSIZE;
1946 
1947         return nla_nest_end(skb, nest);
1948 }
1949 
1950 static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1951 {
1952         if (!a->bpf.name && !b->bpf.name)
1953                 return 0;
1954 
1955         if (!a->bpf.name || !b->bpf.name)
1956                 return 1;
1957 
1958         return strcmp(a->bpf.name, b->bpf.name);
1959 }
1960 
1961 static void destroy_attr_bpf(struct seg6_local_lwt *slwt)
1962 {
1963         kfree(slwt->bpf.name);
1964         if (slwt->bpf.prog)
1965                 bpf_prog_put(slwt->bpf.prog);
1966 }
1967 
1968 static const struct
1969 nla_policy seg6_local_counters_policy[SEG6_LOCAL_CNT_MAX + 1] = {
1970         [SEG6_LOCAL_CNT_PACKETS]        = { .type = NLA_U64 },
1971         [SEG6_LOCAL_CNT_BYTES]          = { .type = NLA_U64 },
1972         [SEG6_LOCAL_CNT_ERRORS]         = { .type = NLA_U64 },
1973 };
1974 
1975 static int parse_nla_counters(struct nlattr **attrs,
1976                               struct seg6_local_lwt *slwt,
1977                               struct netlink_ext_ack *extack)
1978 {
1979         struct pcpu_seg6_local_counters __percpu *pcounters;
1980         struct nlattr *tb[SEG6_LOCAL_CNT_MAX + 1];
1981         int ret;
1982 
1983         ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_CNT_MAX,
1984                                           attrs[SEG6_LOCAL_COUNTERS],
1985                                           seg6_local_counters_policy, NULL);
1986         if (ret < 0)
1987                 return ret;
1988 
1989         /* basic support for SRv6 Behavior counters requires at least:
1990          * packets, bytes and errors.
1991          */
1992         if (!tb[SEG6_LOCAL_CNT_PACKETS] || !tb[SEG6_LOCAL_CNT_BYTES] ||
1993             !tb[SEG6_LOCAL_CNT_ERRORS])
1994                 return -EINVAL;
1995 
1996         /* counters are always zero initialized */
1997         pcounters = seg6_local_alloc_pcpu_counters(GFP_KERNEL);
1998         if (!pcounters)
1999                 return -ENOMEM;
2000 
2001         slwt->pcpu_counters = pcounters;
2002 
2003         return 0;
2004 }
2005 
2006 static int seg6_local_fill_nla_counters(struct sk_buff *skb,
2007                                         struct seg6_local_counters *counters)
2008 {
2009         if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_PACKETS, counters->packets,
2010                               SEG6_LOCAL_CNT_PAD))
2011                 return -EMSGSIZE;
2012 
2013         if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_BYTES, counters->bytes,
2014                               SEG6_LOCAL_CNT_PAD))
2015                 return -EMSGSIZE;
2016 
2017         if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_ERRORS, counters->errors,
2018                               SEG6_LOCAL_CNT_PAD))
2019                 return -EMSGSIZE;
2020 
2021         return 0;
2022 }
2023 
2024 static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt)
2025 {
2026         struct seg6_local_counters counters = { 0, 0, 0 };
2027         struct nlattr *nest;
2028         int rc, i;
2029 
2030         nest = nla_nest_start(skb, SEG6_LOCAL_COUNTERS);
2031         if (!nest)
2032                 return -EMSGSIZE;
2033 
2034         for_each_possible_cpu(i) {
2035                 struct pcpu_seg6_local_counters *pcounters;
2036                 u64 packets, bytes, errors;
2037                 unsigned int start;
2038 
2039                 pcounters = per_cpu_ptr(slwt->pcpu_counters, i);
2040                 do {
2041                         start = u64_stats_fetch_begin(&pcounters->syncp);
2042 
2043                         packets = u64_stats_read(&pcounters->packets);
2044                         bytes = u64_stats_read(&pcounters->bytes);
2045                         errors = u64_stats_read(&pcounters->errors);
2046 
2047                 } while (u64_stats_fetch_retry(&pcounters->syncp, start));
2048 
2049                 counters.packets += packets;
2050                 counters.bytes += bytes;
2051                 counters.errors += errors;
2052         }
2053 
2054         rc = seg6_local_fill_nla_counters(skb, &counters);
2055         if (rc < 0) {
2056                 nla_nest_cancel(skb, nest);
2057                 return rc;
2058         }
2059 
2060         return nla_nest_end(skb, nest);
2061 }
2062 
2063 static int cmp_nla_counters(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
2064 {
2065         /* a and b are equal if both have pcpu_counters set or not */
2066         return (!!((unsigned long)a->pcpu_counters)) ^
2067                 (!!((unsigned long)b->pcpu_counters));
2068 }
2069 
2070 static void destroy_attr_counters(struct seg6_local_lwt *slwt)
2071 {
2072         free_percpu(slwt->pcpu_counters);
2073 }
2074 
2075 static const
2076 struct nla_policy seg6_local_flavors_policy[SEG6_LOCAL_FLV_MAX + 1] = {
2077         [SEG6_LOCAL_FLV_OPERATION]      = { .type = NLA_U32 },
2078         [SEG6_LOCAL_FLV_LCBLOCK_BITS]   = { .type = NLA_U8 },
2079         [SEG6_LOCAL_FLV_LCNODE_FN_BITS] = { .type = NLA_U8 },
2080 };
2081 
2082 /* check whether the lengths of the Locator-Block and Locator-Node Function
2083  * are compatible with the dimension of a C-SID container.
2084  */
2085 static int seg6_chk_next_csid_cfg(__u8 block_len, __u8 func_len)
2086 {
2087         /* Locator-Block and Locator-Node Function cannot exceed 128 bits
2088          * (i.e. C-SID container lenghts).
2089          */
2090         if (next_csid_chk_cntr_bits(block_len, func_len))
2091                 return -EINVAL;
2092 
2093         /* Locator-Block length must be greater than zero and evenly divisible
2094          * by 8. There must be room for a Locator-Node Function, at least.
2095          */
2096         if (next_csid_chk_lcblock_bits(block_len))
2097                 return -EINVAL;
2098 
2099         /* Locator-Node Function length must be greater than zero and evenly
2100          * divisible by 8. There must be room for the Locator-Block.
2101          */
2102         if (next_csid_chk_lcnode_fn_bits(func_len))
2103                 return -EINVAL;
2104 
2105         return 0;
2106 }
2107 
2108 static int seg6_parse_nla_next_csid_cfg(struct nlattr **tb,
2109                                         struct seg6_flavors_info *finfo,
2110                                         struct netlink_ext_ack *extack)
2111 {
2112         __u8 func_len = SEG6_LOCAL_LCNODE_FN_DBITS;
2113         __u8 block_len = SEG6_LOCAL_LCBLOCK_DBITS;
2114         int rc;
2115 
2116         if (tb[SEG6_LOCAL_FLV_LCBLOCK_BITS])
2117                 block_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCBLOCK_BITS]);
2118 
2119         if (tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS])
2120                 func_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS]);
2121 
2122         rc = seg6_chk_next_csid_cfg(block_len, func_len);
2123         if (rc < 0) {
2124                 NL_SET_ERR_MSG(extack,
2125                                "Invalid Locator Block/Node Function lengths");
2126                 return rc;
2127         }
2128 
2129         finfo->lcblock_bits = block_len;
2130         finfo->lcnode_func_bits = func_len;
2131 
2132         return 0;
2133 }
2134 
2135 static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt,
2136                              struct netlink_ext_ack *extack)
2137 {
2138         struct seg6_flavors_info *finfo = &slwt->flv_info;
2139         struct nlattr *tb[SEG6_LOCAL_FLV_MAX + 1];
2140         int action = slwt->action;
2141         __u32 fops, supp_fops;
2142         int rc;
2143 
2144         rc = nla_parse_nested_deprecated(tb, SEG6_LOCAL_FLV_MAX,
2145                                          attrs[SEG6_LOCAL_FLAVORS],
2146                                          seg6_local_flavors_policy, NULL);
2147         if (rc < 0)
2148                 return rc;
2149 
2150         /* this attribute MUST always be present since it represents the Flavor
2151          * operation(s) to be carried out.
2152          */
2153         if (!tb[SEG6_LOCAL_FLV_OPERATION])
2154                 return -EINVAL;
2155 
2156         fops = nla_get_u32(tb[SEG6_LOCAL_FLV_OPERATION]);
2157         rc = seg6_flv_supp_ops_by_action(action, &supp_fops);
2158         if (rc < 0 || (fops & ~supp_fops)) {
2159                 NL_SET_ERR_MSG(extack, "Unsupported Flavor operation(s)");
2160                 return -EOPNOTSUPP;
2161         }
2162 
2163         finfo->flv_ops = fops;
2164 
2165         if (seg6_next_csid_enabled(fops)) {
2166                 /* Locator-Block and Locator-Node Function lengths can be
2167                  * provided by the user space. Otherwise, default values are
2168                  * applied.
2169                  */
2170                 rc = seg6_parse_nla_next_csid_cfg(tb, finfo, extack);
2171                 if (rc < 0)
2172                         return rc;
2173         }
2174 
2175         return 0;
2176 }
2177 
2178 static int seg6_fill_nla_next_csid_cfg(struct sk_buff *skb,
2179                                        struct seg6_flavors_info *finfo)
2180 {
2181         if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCBLOCK_BITS, finfo->lcblock_bits))
2182                 return -EMSGSIZE;
2183 
2184         if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCNODE_FN_BITS,
2185                        finfo->lcnode_func_bits))
2186                 return -EMSGSIZE;
2187 
2188         return 0;
2189 }
2190 
2191 static int put_nla_flavors(struct sk_buff *skb, struct seg6_local_lwt *slwt)
2192 {
2193         struct seg6_flavors_info *finfo = &slwt->flv_info;
2194         __u32 fops = finfo->flv_ops;
2195         struct nlattr *nest;
2196         int rc;
2197 
2198         nest = nla_nest_start(skb, SEG6_LOCAL_FLAVORS);
2199         if (!nest)
2200                 return -EMSGSIZE;
2201 
2202         if (nla_put_u32(skb, SEG6_LOCAL_FLV_OPERATION, fops)) {
2203                 rc = -EMSGSIZE;
2204                 goto err;
2205         }
2206 
2207         if (seg6_next_csid_enabled(fops)) {
2208                 rc = seg6_fill_nla_next_csid_cfg(skb, finfo);
2209                 if (rc < 0)
2210                         goto err;
2211         }
2212 
2213         return nla_nest_end(skb, nest);
2214 
2215 err:
2216         nla_nest_cancel(skb, nest);
2217         return rc;
2218 }
2219 
2220 static int seg6_cmp_nla_next_csid_cfg(struct seg6_flavors_info *finfo_a,
2221                                       struct seg6_flavors_info *finfo_b)
2222 {
2223         if (finfo_a->lcblock_bits != finfo_b->lcblock_bits)
2224                 return 1;
2225 
2226         if (finfo_a->lcnode_func_bits != finfo_b->lcnode_func_bits)
2227                 return 1;
2228 
2229         return 0;
2230 }
2231 
2232 static int cmp_nla_flavors(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
2233 {
2234         struct seg6_flavors_info *finfo_a = &a->flv_info;
2235         struct seg6_flavors_info *finfo_b = &b->flv_info;
2236 
2237         if (finfo_a->flv_ops != finfo_b->flv_ops)
2238                 return 1;
2239 
2240         if (seg6_next_csid_enabled(finfo_a->flv_ops)) {
2241                 if (seg6_cmp_nla_next_csid_cfg(finfo_a, finfo_b))
2242                         return 1;
2243         }
2244 
2245         return 0;
2246 }
2247 
2248 static int encap_size_flavors(struct seg6_local_lwt *slwt)
2249 {
2250         struct seg6_flavors_info *finfo = &slwt->flv_info;
2251         int nlsize;
2252 
2253         nlsize = nla_total_size(0) +    /* nest SEG6_LOCAL_FLAVORS */
2254                  nla_total_size(4);     /* SEG6_LOCAL_FLV_OPERATION */
2255 
2256         if (seg6_next_csid_enabled(finfo->flv_ops))
2257                 nlsize += nla_total_size(1) + /* SEG6_LOCAL_FLV_LCBLOCK_BITS */
2258                           nla_total_size(1); /* SEG6_LOCAL_FLV_LCNODE_FN_BITS */
2259 
2260         return nlsize;
2261 }
2262 
2263 struct seg6_action_param {
2264         int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt,
2265                      struct netlink_ext_ack *extack);
2266         int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
2267         int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
2268 
2269         /* optional destroy() callback useful for releasing resources which
2270          * have been previously acquired in the corresponding parse()
2271          * function.
2272          */
2273         void (*destroy)(struct seg6_local_lwt *slwt);
2274 };
2275 
2276 static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
2277         [SEG6_LOCAL_SRH]        = { .parse = parse_nla_srh,
2278                                     .put = put_nla_srh,
2279                                     .cmp = cmp_nla_srh,
2280                                     .destroy = destroy_attr_srh },
2281 
2282         [SEG6_LOCAL_TABLE]      = { .parse = parse_nla_table,
2283                                     .put = put_nla_table,
2284                                     .cmp = cmp_nla_table },
2285 
2286         [SEG6_LOCAL_NH4]        = { .parse = parse_nla_nh4,
2287                                     .put = put_nla_nh4,
2288                                     .cmp = cmp_nla_nh4 },
2289 
2290         [SEG6_LOCAL_NH6]        = { .parse = parse_nla_nh6,
2291                                     .put = put_nla_nh6,
2292                                     .cmp = cmp_nla_nh6 },
2293 
2294         [SEG6_LOCAL_IIF]        = { .parse = parse_nla_iif,
2295                                     .put = put_nla_iif,
2296                                     .cmp = cmp_nla_iif },
2297 
2298         [SEG6_LOCAL_OIF]        = { .parse = parse_nla_oif,
2299                                     .put = put_nla_oif,
2300                                     .cmp = cmp_nla_oif },
2301 
2302         [SEG6_LOCAL_BPF]        = { .parse = parse_nla_bpf,
2303                                     .put = put_nla_bpf,
2304                                     .cmp = cmp_nla_bpf,
2305                                     .destroy = destroy_attr_bpf },
2306 
2307         [SEG6_LOCAL_VRFTABLE]   = { .parse = parse_nla_vrftable,
2308                                     .put = put_nla_vrftable,
2309                                     .cmp = cmp_nla_vrftable },
2310 
2311         [SEG6_LOCAL_COUNTERS]   = { .parse = parse_nla_counters,
2312                                     .put = put_nla_counters,
2313                                     .cmp = cmp_nla_counters,
2314                                     .destroy = destroy_attr_counters },
2315 
2316         [SEG6_LOCAL_FLAVORS]    = { .parse = parse_nla_flavors,
2317                                     .put = put_nla_flavors,
2318                                     .cmp = cmp_nla_flavors },
2319 };
2320 
2321 /* call the destroy() callback (if available) for each set attribute in
2322  * @parsed_attrs, starting from the first attribute up to the @max_parsed
2323  * (excluded) attribute.
2324  */
2325 static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed,
2326                             struct seg6_local_lwt *slwt)
2327 {
2328         struct seg6_action_param *param;
2329         int i;
2330 
2331         /* Every required seg6local attribute is identified by an ID which is
2332          * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask;
2333          *
2334          * We scan the 'parsed_attrs' bitmask, starting from the first attribute
2335          * up to the @max_parsed (excluded) attribute.
2336          * For each set attribute, we retrieve the corresponding destroy()
2337          * callback. If the callback is not available, then we skip to the next
2338          * attribute; otherwise, we call the destroy() callback.
2339          */
2340         for (i = SEG6_LOCAL_SRH; i < max_parsed; ++i) {
2341                 if (!(parsed_attrs & SEG6_F_ATTR(i)))
2342                         continue;
2343 
2344                 param = &seg6_action_params[i];
2345 
2346                 if (param->destroy)
2347                         param->destroy(slwt);
2348         }
2349 }
2350 
2351 /* release all the resources that may have been acquired during parsing
2352  * operations.
2353  */
2354 static void destroy_attrs(struct seg6_local_lwt *slwt)
2355 {
2356         unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs;
2357 
2358         __destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt);
2359 }
2360 
2361 static int parse_nla_optional_attrs(struct nlattr **attrs,
2362                                     struct seg6_local_lwt *slwt,
2363                                     struct netlink_ext_ack *extack)
2364 {
2365         struct seg6_action_desc *desc = slwt->desc;
2366         unsigned long parsed_optattrs = 0;
2367         struct seg6_action_param *param;
2368         int err, i;
2369 
2370         for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; ++i) {
2371                 if (!(desc->optattrs & SEG6_F_ATTR(i)) || !attrs[i])
2372                         continue;
2373 
2374                 /* once here, the i-th attribute is provided by the
2375                  * userspace AND it is identified optional as well.
2376                  */
2377                 param = &seg6_action_params[i];
2378 
2379                 err = param->parse(attrs, slwt, extack);
2380                 if (err < 0)
2381                         goto parse_optattrs_err;
2382 
2383                 /* current attribute has been correctly parsed */
2384                 parsed_optattrs |= SEG6_F_ATTR(i);
2385         }
2386 
2387         /* store in the tunnel state all the optional attributed successfully
2388          * parsed.
2389          */
2390         slwt->parsed_optattrs = parsed_optattrs;
2391 
2392         return 0;
2393 
2394 parse_optattrs_err:
2395         __destroy_attrs(parsed_optattrs, i, slwt);
2396 
2397         return err;
2398 }
2399 
2400 /* call the custom constructor of the behavior during its initialization phase
2401  * and after that all its attributes have been parsed successfully.
2402  */
2403 static int
2404 seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg,
2405                                 struct netlink_ext_ack *extack)
2406 {
2407         struct seg6_action_desc *desc = slwt->desc;
2408         struct seg6_local_lwtunnel_ops *ops;
2409 
2410         ops = &desc->slwt_ops;
2411         if (!ops->build_state)
2412                 return 0;
2413 
2414         return ops->build_state(slwt, cfg, extack);
2415 }
2416 
2417 /* call the custom destructor of the behavior which is invoked before the
2418  * tunnel is going to be destroyed.
2419  */
2420 static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt)
2421 {
2422         struct seg6_action_desc *desc = slwt->desc;
2423         struct seg6_local_lwtunnel_ops *ops;
2424 
2425         ops = &desc->slwt_ops;
2426         if (!ops->destroy_state)
2427                 return;
2428 
2429         ops->destroy_state(slwt);
2430 }
2431 
2432 static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt,
2433                             struct netlink_ext_ack *extack)
2434 {
2435         struct seg6_action_param *param;
2436         struct seg6_action_desc *desc;
2437         unsigned long invalid_attrs;
2438         int i, err;
2439 
2440         desc = __get_action_desc(slwt->action);
2441         if (!desc)
2442                 return -EINVAL;
2443 
2444         if (!desc->input)
2445                 return -EOPNOTSUPP;
2446 
2447         slwt->desc = desc;
2448         slwt->headroom += desc->static_headroom;
2449 
2450         /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be
2451          * disjoined, this allow us to release acquired resources by optional
2452          * attributes and by required attributes independently from each other
2453          * without any interference.
2454          * In other terms, we are sure that we do not release some the acquired
2455          * resources twice.
2456          *
2457          * Note that if an attribute is configured both as required and as
2458          * optional, it means that the user has messed something up in the
2459          * seg6_action_table. Therefore, this check is required for SRv6
2460          * behaviors to work properly.
2461          */
2462         invalid_attrs = desc->attrs & desc->optattrs;
2463         if (invalid_attrs) {
2464                 WARN_ONCE(1,
2465                           "An attribute cannot be both required AND optional");
2466                 return -EINVAL;
2467         }
2468 
2469         /* parse the required attributes */
2470         for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
2471                 if (desc->attrs & SEG6_F_ATTR(i)) {
2472                         if (!attrs[i])
2473                                 return -EINVAL;
2474 
2475                         param = &seg6_action_params[i];
2476 
2477                         err = param->parse(attrs, slwt, extack);
2478                         if (err < 0)
2479                                 goto parse_attrs_err;
2480                 }
2481         }
2482 
2483         /* parse the optional attributes, if any */
2484         err = parse_nla_optional_attrs(attrs, slwt, extack);
2485         if (err < 0)
2486                 goto parse_attrs_err;
2487 
2488         return 0;
2489 
2490 parse_attrs_err:
2491         /* release any resource that may have been acquired during the i-1
2492          * parse() operations.
2493          */
2494         __destroy_attrs(desc->attrs, i, slwt);
2495 
2496         return err;
2497 }
2498 
2499 static int seg6_local_build_state(struct net *net, struct nlattr *nla,
2500                                   unsigned int family, const void *cfg,
2501                                   struct lwtunnel_state **ts,
2502                                   struct netlink_ext_ack *extack)
2503 {
2504         struct nlattr *tb[SEG6_LOCAL_MAX + 1];
2505         struct lwtunnel_state *newts;
2506         struct seg6_local_lwt *slwt;
2507         int err;
2508 
2509         if (family != AF_INET6)
2510                 return -EINVAL;
2511 
2512         err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla,
2513                                           seg6_local_policy, extack);
2514 
2515         if (err < 0)
2516                 return err;
2517 
2518         if (!tb[SEG6_LOCAL_ACTION])
2519                 return -EINVAL;
2520 
2521         newts = lwtunnel_state_alloc(sizeof(*slwt));
2522         if (!newts)
2523                 return -ENOMEM;
2524 
2525         slwt = seg6_local_lwtunnel(newts);
2526         slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
2527 
2528         err = parse_nla_action(tb, slwt, extack);
2529         if (err < 0)
2530                 goto out_free;
2531 
2532         err = seg6_local_lwtunnel_build_state(slwt, cfg, extack);
2533         if (err < 0)
2534                 goto out_destroy_attrs;
2535 
2536         newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
2537         newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
2538         newts->headroom = slwt->headroom;
2539 
2540         *ts = newts;
2541 
2542         return 0;
2543 
2544 out_destroy_attrs:
2545         destroy_attrs(slwt);
2546 out_free:
2547         kfree(newts);
2548         return err;
2549 }
2550 
2551 static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
2552 {
2553         struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
2554 
2555         seg6_local_lwtunnel_destroy_state(slwt);
2556 
2557         destroy_attrs(slwt);
2558 
2559         return;
2560 }
2561 
2562 static int seg6_local_fill_encap(struct sk_buff *skb,
2563                                  struct lwtunnel_state *lwt)
2564 {
2565         struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
2566         struct seg6_action_param *param;
2567         unsigned long attrs;
2568         int i, err;
2569 
2570         if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
2571                 return -EMSGSIZE;
2572 
2573         attrs = slwt->desc->attrs | slwt->parsed_optattrs;
2574 
2575         for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
2576                 if (attrs & SEG6_F_ATTR(i)) {
2577                         param = &seg6_action_params[i];
2578                         err = param->put(skb, slwt);
2579                         if (err < 0)
2580                                 return err;
2581                 }
2582         }
2583 
2584         return 0;
2585 }
2586 
2587 static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
2588 {
2589         struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
2590         unsigned long attrs;
2591         int nlsize;
2592 
2593         nlsize = nla_total_size(4); /* action */
2594 
2595         attrs = slwt->desc->attrs | slwt->parsed_optattrs;
2596 
2597         if (attrs & SEG6_F_ATTR(SEG6_LOCAL_SRH))
2598                 nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
2599 
2600         if (attrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE))
2601                 nlsize += nla_total_size(4);
2602 
2603         if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH4))
2604                 nlsize += nla_total_size(4);
2605 
2606         if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH6))
2607                 nlsize += nla_total_size(16);
2608 
2609         if (attrs & SEG6_F_ATTR(SEG6_LOCAL_IIF))
2610                 nlsize += nla_total_size(4);
2611 
2612         if (attrs & SEG6_F_ATTR(SEG6_LOCAL_OIF))
2613                 nlsize += nla_total_size(4);
2614 
2615         if (attrs & SEG6_F_ATTR(SEG6_LOCAL_BPF))
2616                 nlsize += nla_total_size(sizeof(struct nlattr)) +
2617                        nla_total_size(MAX_PROG_NAME) +
2618                        nla_total_size(4);
2619 
2620         if (attrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE))
2621                 nlsize += nla_total_size(4);
2622 
2623         if (attrs & SEG6_F_LOCAL_COUNTERS)
2624                 nlsize += nla_total_size(0) + /* nest SEG6_LOCAL_COUNTERS */
2625                           /* SEG6_LOCAL_CNT_PACKETS */
2626                           nla_total_size_64bit(sizeof(__u64)) +
2627                           /* SEG6_LOCAL_CNT_BYTES */
2628                           nla_total_size_64bit(sizeof(__u64)) +
2629                           /* SEG6_LOCAL_CNT_ERRORS */
2630                           nla_total_size_64bit(sizeof(__u64));
2631 
2632         if (attrs & SEG6_F_ATTR(SEG6_LOCAL_FLAVORS))
2633                 nlsize += encap_size_flavors(slwt);
2634 
2635         return nlsize;
2636 }
2637 
2638 static int seg6_local_cmp_encap(struct lwtunnel_state *a,
2639                                 struct lwtunnel_state *b)
2640 {
2641         struct seg6_local_lwt *slwt_a, *slwt_b;
2642         struct seg6_action_param *param;
2643         unsigned long attrs_a, attrs_b;
2644         int i;
2645 
2646         slwt_a = seg6_local_lwtunnel(a);
2647         slwt_b = seg6_local_lwtunnel(b);
2648 
2649         if (slwt_a->action != slwt_b->action)
2650                 return 1;
2651 
2652         attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs;
2653         attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs;
2654 
2655         if (attrs_a != attrs_b)
2656                 return 1;
2657 
2658         for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
2659                 if (attrs_a & SEG6_F_ATTR(i)) {
2660                         param = &seg6_action_params[i];
2661                         if (param->cmp(slwt_a, slwt_b))
2662                                 return 1;
2663                 }
2664         }
2665 
2666         return 0;
2667 }
2668 
2669 static const struct lwtunnel_encap_ops seg6_local_ops = {
2670         .build_state    = seg6_local_build_state,
2671         .destroy_state  = seg6_local_destroy_state,
2672         .input          = seg6_local_input,
2673         .fill_encap     = seg6_local_fill_encap,
2674         .get_encap_size = seg6_local_get_encap_size,
2675         .cmp_encap      = seg6_local_cmp_encap,
2676         .owner          = THIS_MODULE,
2677 };
2678 
2679 int __init seg6_local_init(void)
2680 {
2681         /* If the max total number of defined attributes is reached, then your
2682          * kernel build stops here.
2683          *
2684          * This check is required to avoid arithmetic overflows when processing
2685          * behavior attributes and the maximum number of defined attributes
2686          * exceeds the allowed value.
2687          */
2688         BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long));
2689 
2690         /* Check whether the number of defined flavors exceeds the maximum
2691          * allowed value.
2692          */
2693         BUILD_BUG_ON(SEG6_LOCAL_FLV_OP_MAX + 1 > BITS_PER_TYPE(__u32));
2694 
2695         /* If the default NEXT-C-SID Locator-Block/Node Function lengths (in
2696          * bits) have been changed with invalid values, kernel build stops
2697          * here.
2698          */
2699         BUILD_BUG_ON(next_csid_chk_cntr_bits(SEG6_LOCAL_LCBLOCK_DBITS,
2700                                              SEG6_LOCAL_LCNODE_FN_DBITS));
2701         BUILD_BUG_ON(next_csid_chk_lcblock_bits(SEG6_LOCAL_LCBLOCK_DBITS));
2702         BUILD_BUG_ON(next_csid_chk_lcnode_fn_bits(SEG6_LOCAL_LCNODE_FN_DBITS));
2703 
2704         /* To be memory efficient, we use 'u8' to represent the different
2705          * actions related to RFC8986 flavors. If the kernel build stops here,
2706          * it means that it is not possible to correctly encode these actions
2707          * with the data type chosen for the action table.
2708          */
2709         BUILD_BUG_ON(SEG6_LOCAL_FLV_ACT_MAX > (typeof(flv8986_act_tbl[0]))~0U);
2710 
2711         return lwtunnel_encap_add_ops(&seg6_local_ops,
2712                                       LWTUNNEL_ENCAP_SEG6_LOCAL);
2713 }
2714 
2715 void seg6_local_exit(void)
2716 {
2717         lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);
2718 }
2719 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php