~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/bpf/verifier.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  3  * Copyright (c) 2016 Facebook
  4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
  5  */
  6 #include <uapi/linux/btf.h>
  7 #include <linux/bpf-cgroup.h>
  8 #include <linux/kernel.h>
  9 #include <linux/types.h>
 10 #include <linux/slab.h>
 11 #include <linux/bpf.h>
 12 #include <linux/btf.h>
 13 #include <linux/bpf_verifier.h>
 14 #include <linux/filter.h>
 15 #include <net/netlink.h>
 16 #include <linux/file.h>
 17 #include <linux/vmalloc.h>
 18 #include <linux/stringify.h>
 19 #include <linux/bsearch.h>
 20 #include <linux/sort.h>
 21 #include <linux/perf_event.h>
 22 #include <linux/ctype.h>
 23 #include <linux/error-injection.h>
 24 #include <linux/bpf_lsm.h>
 25 #include <linux/btf_ids.h>
 26 #include <linux/poison.h>
 27 #include <linux/module.h>
 28 #include <linux/cpumask.h>
 29 #include <linux/bpf_mem_alloc.h>
 30 #include <net/xdp.h>
 31 
 32 #include "disasm.h"
 33 
 34 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
 35 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
 36         [_id] = & _name ## _verifier_ops,
 37 #define BPF_MAP_TYPE(_id, _ops)
 38 #define BPF_LINK_TYPE(_id, _name)
 39 #include <linux/bpf_types.h>
 40 #undef BPF_PROG_TYPE
 41 #undef BPF_MAP_TYPE
 42 #undef BPF_LINK_TYPE
 43 };
 44 
 45 struct bpf_mem_alloc bpf_global_percpu_ma;
 46 static bool bpf_global_percpu_ma_set;
 47 
 48 /* bpf_check() is a static code analyzer that walks eBPF program
 49  * instruction by instruction and updates register/stack state.
 50  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
 51  *
 52  * The first pass is depth-first-search to check that the program is a DAG.
 53  * It rejects the following programs:
 54  * - larger than BPF_MAXINSNS insns
 55  * - if loop is present (detected via back-edge)
 56  * - unreachable insns exist (shouldn't be a forest. program = one function)
 57  * - out of bounds or malformed jumps
 58  * The second pass is all possible path descent from the 1st insn.
 59  * Since it's analyzing all paths through the program, the length of the
 60  * analysis is limited to 64k insn, which may be hit even if total number of
 61  * insn is less then 4K, but there are too many branches that change stack/regs.
 62  * Number of 'branches to be analyzed' is limited to 1k
 63  *
 64  * On entry to each instruction, each register has a type, and the instruction
 65  * changes the types of the registers depending on instruction semantics.
 66  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
 67  * copied to R1.
 68  *
 69  * All registers are 64-bit.
 70  * R0 - return register
 71  * R1-R5 argument passing registers
 72  * R6-R9 callee saved registers
 73  * R10 - frame pointer read-only
 74  *
 75  * At the start of BPF program the register R1 contains a pointer to bpf_context
 76  * and has type PTR_TO_CTX.
 77  *
 78  * Verifier tracks arithmetic operations on pointers in case:
 79  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 80  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
 81  * 1st insn copies R10 (which has FRAME_PTR) type into R1
 82  * and 2nd arithmetic instruction is pattern matched to recognize
 83  * that it wants to construct a pointer to some element within stack.
 84  * So after 2nd insn, the register R1 has type PTR_TO_STACK
 85  * (and -20 constant is saved for further stack bounds checking).
 86  * Meaning that this reg is a pointer to stack plus known immediate constant.
 87  *
 88  * Most of the time the registers have SCALAR_VALUE type, which
 89  * means the register has some value, but it's not a valid pointer.
 90  * (like pointer plus pointer becomes SCALAR_VALUE type)
 91  *
 92  * When verifier sees load or store instructions the type of base register
 93  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
 94  * four pointer types recognized by check_mem_access() function.
 95  *
 96  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
 97  * and the range of [ptr, ptr + map's value_size) is accessible.
 98  *
 99  * registers used to pass values to function calls are checked against
100  * function argument constraints.
101  *
102  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
103  * It means that the register type passed to this function must be
104  * PTR_TO_STACK and it will be used inside the function as
105  * 'pointer to map element key'
106  *
107  * For example the argument constraints for bpf_map_lookup_elem():
108  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
109  *   .arg1_type = ARG_CONST_MAP_PTR,
110  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
111  *
112  * ret_type says that this function returns 'pointer to map elem value or null'
113  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
114  * 2nd argument should be a pointer to stack, which will be used inside
115  * the helper function as a pointer to map element key.
116  *
117  * On the kernel side the helper function looks like:
118  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
119  * {
120  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
121  *    void *key = (void *) (unsigned long) r2;
122  *    void *value;
123  *
124  *    here kernel can access 'key' and 'map' pointers safely, knowing that
125  *    [key, key + map->key_size) bytes are valid and were initialized on
126  *    the stack of eBPF program.
127  * }
128  *
129  * Corresponding eBPF program may look like:
130  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
131  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
132  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
133  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
134  * here verifier looks at prototype of map_lookup_elem() and sees:
135  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
136  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
137  *
138  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
139  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
140  * and were initialized prior to this call.
141  * If it's ok, then verifier allows this BPF_CALL insn and looks at
142  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
143  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
144  * returns either pointer to map value or NULL.
145  *
146  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
147  * insn, the register holding that pointer in the true branch changes state to
148  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
149  * branch. See check_cond_jmp_op().
150  *
151  * After the call R0 is set to return type of the function and registers R1-R5
152  * are set to NOT_INIT to indicate that they are no longer readable.
153  *
154  * The following reference types represent a potential reference to a kernel
155  * resource which, after first being allocated, must be checked and freed by
156  * the BPF program:
157  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
158  *
159  * When the verifier sees a helper call return a reference type, it allocates a
160  * pointer id for the reference and stores it in the current function state.
161  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
162  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
163  * passes through a NULL-check conditional. For the branch wherein the state is
164  * changed to CONST_IMM, the verifier releases the reference.
165  *
166  * For each helper function that allocates a reference, such as
167  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
168  * bpf_sk_release(). When a reference type passes into the release function,
169  * the verifier also releases the reference. If any unchecked or unreleased
170  * reference remains at the end of the program, the verifier rejects it.
171  */
172 
173 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
174 struct bpf_verifier_stack_elem {
175         /* verifier state is 'st'
176          * before processing instruction 'insn_idx'
177          * and after processing instruction 'prev_insn_idx'
178          */
179         struct bpf_verifier_state st;
180         int insn_idx;
181         int prev_insn_idx;
182         struct bpf_verifier_stack_elem *next;
183         /* length of verifier log at the time this state was pushed on stack */
184         u32 log_pos;
185 };
186 
187 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ    8192
188 #define BPF_COMPLEXITY_LIMIT_STATES     64
189 
190 #define BPF_MAP_KEY_POISON      (1ULL << 63)
191 #define BPF_MAP_KEY_SEEN        (1ULL << 62)
192 
193 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  512
194 
195 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
196 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
197 static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
198 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
199 static int ref_set_non_owning(struct bpf_verifier_env *env,
200                               struct bpf_reg_state *reg);
201 static void specialize_kfunc(struct bpf_verifier_env *env,
202                              u32 func_id, u16 offset, unsigned long *addr);
203 static bool is_trusted_reg(const struct bpf_reg_state *reg);
204 
205 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
206 {
207         return aux->map_ptr_state.poison;
208 }
209 
210 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
211 {
212         return aux->map_ptr_state.unpriv;
213 }
214 
215 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
216                               struct bpf_map *map,
217                               bool unpriv, bool poison)
218 {
219         unpriv |= bpf_map_ptr_unpriv(aux);
220         aux->map_ptr_state.unpriv = unpriv;
221         aux->map_ptr_state.poison = poison;
222         aux->map_ptr_state.map_ptr = map;
223 }
224 
225 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
226 {
227         return aux->map_key_state & BPF_MAP_KEY_POISON;
228 }
229 
230 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
231 {
232         return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
233 }
234 
235 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
236 {
237         return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
238 }
239 
240 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
241 {
242         bool poisoned = bpf_map_key_poisoned(aux);
243 
244         aux->map_key_state = state | BPF_MAP_KEY_SEEN |
245                              (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
246 }
247 
248 static bool bpf_helper_call(const struct bpf_insn *insn)
249 {
250         return insn->code == (BPF_JMP | BPF_CALL) &&
251                insn->src_reg == 0;
252 }
253 
254 static bool bpf_pseudo_call(const struct bpf_insn *insn)
255 {
256         return insn->code == (BPF_JMP | BPF_CALL) &&
257                insn->src_reg == BPF_PSEUDO_CALL;
258 }
259 
260 static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
261 {
262         return insn->code == (BPF_JMP | BPF_CALL) &&
263                insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
264 }
265 
266 struct bpf_call_arg_meta {
267         struct bpf_map *map_ptr;
268         bool raw_mode;
269         bool pkt_access;
270         u8 release_regno;
271         int regno;
272         int access_size;
273         int mem_size;
274         u64 msize_max_value;
275         int ref_obj_id;
276         int dynptr_id;
277         int map_uid;
278         int func_id;
279         struct btf *btf;
280         u32 btf_id;
281         struct btf *ret_btf;
282         u32 ret_btf_id;
283         u32 subprogno;
284         struct btf_field *kptr_field;
285 };
286 
287 struct bpf_kfunc_call_arg_meta {
288         /* In parameters */
289         struct btf *btf;
290         u32 func_id;
291         u32 kfunc_flags;
292         const struct btf_type *func_proto;
293         const char *func_name;
294         /* Out parameters */
295         u32 ref_obj_id;
296         u8 release_regno;
297         bool r0_rdonly;
298         u32 ret_btf_id;
299         u64 r0_size;
300         u32 subprogno;
301         struct {
302                 u64 value;
303                 bool found;
304         } arg_constant;
305 
306         /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
307          * generally to pass info about user-defined local kptr types to later
308          * verification logic
309          *   bpf_obj_drop/bpf_percpu_obj_drop
310          *     Record the local kptr type to be drop'd
311          *   bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
312          *     Record the local kptr type to be refcount_incr'd and use
313          *     arg_owning_ref to determine whether refcount_acquire should be
314          *     fallible
315          */
316         struct btf *arg_btf;
317         u32 arg_btf_id;
318         bool arg_owning_ref;
319 
320         struct {
321                 struct btf_field *field;
322         } arg_list_head;
323         struct {
324                 struct btf_field *field;
325         } arg_rbtree_root;
326         struct {
327                 enum bpf_dynptr_type type;
328                 u32 id;
329                 u32 ref_obj_id;
330         } initialized_dynptr;
331         struct {
332                 u8 spi;
333                 u8 frameno;
334         } iter;
335         struct {
336                 struct bpf_map *ptr;
337                 int uid;
338         } map;
339         u64 mem_size;
340 };
341 
342 struct btf *btf_vmlinux;
343 
344 static const char *btf_type_name(const struct btf *btf, u32 id)
345 {
346         return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
347 }
348 
349 static DEFINE_MUTEX(bpf_verifier_lock);
350 static DEFINE_MUTEX(bpf_percpu_ma_lock);
351 
352 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
353 {
354         struct bpf_verifier_env *env = private_data;
355         va_list args;
356 
357         if (!bpf_verifier_log_needed(&env->log))
358                 return;
359 
360         va_start(args, fmt);
361         bpf_verifier_vlog(&env->log, fmt, args);
362         va_end(args);
363 }
364 
365 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
366                                    struct bpf_reg_state *reg,
367                                    struct bpf_retval_range range, const char *ctx,
368                                    const char *reg_name)
369 {
370         bool unknown = true;
371 
372         verbose(env, "%s the register %s has", ctx, reg_name);
373         if (reg->smin_value > S64_MIN) {
374                 verbose(env, " smin=%lld", reg->smin_value);
375                 unknown = false;
376         }
377         if (reg->smax_value < S64_MAX) {
378                 verbose(env, " smax=%lld", reg->smax_value);
379                 unknown = false;
380         }
381         if (unknown)
382                 verbose(env, " unknown scalar value");
383         verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
384 }
385 
386 static bool type_may_be_null(u32 type)
387 {
388         return type & PTR_MAYBE_NULL;
389 }
390 
391 static bool reg_not_null(const struct bpf_reg_state *reg)
392 {
393         enum bpf_reg_type type;
394 
395         type = reg->type;
396         if (type_may_be_null(type))
397                 return false;
398 
399         type = base_type(type);
400         return type == PTR_TO_SOCKET ||
401                 type == PTR_TO_TCP_SOCK ||
402                 type == PTR_TO_MAP_VALUE ||
403                 type == PTR_TO_MAP_KEY ||
404                 type == PTR_TO_SOCK_COMMON ||
405                 (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) ||
406                 type == PTR_TO_MEM;
407 }
408 
409 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
410 {
411         struct btf_record *rec = NULL;
412         struct btf_struct_meta *meta;
413 
414         if (reg->type == PTR_TO_MAP_VALUE) {
415                 rec = reg->map_ptr->record;
416         } else if (type_is_ptr_alloc_obj(reg->type)) {
417                 meta = btf_find_struct_meta(reg->btf, reg->btf_id);
418                 if (meta)
419                         rec = meta->record;
420         }
421         return rec;
422 }
423 
424 static bool subprog_is_global(const struct bpf_verifier_env *env, int subprog)
425 {
426         struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
427 
428         return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
429 }
430 
431 static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
432 {
433         struct bpf_func_info *info;
434 
435         if (!env->prog->aux->func_info)
436                 return "";
437 
438         info = &env->prog->aux->func_info[subprog];
439         return btf_type_name(env->prog->aux->btf, info->type_id);
440 }
441 
442 static void mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
443 {
444         struct bpf_subprog_info *info = subprog_info(env, subprog);
445 
446         info->is_cb = true;
447         info->is_async_cb = true;
448         info->is_exception_cb = true;
449 }
450 
451 static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog)
452 {
453         return subprog_info(env, subprog)->is_exception_cb;
454 }
455 
456 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
457 {
458         return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
459 }
460 
461 static bool type_is_rdonly_mem(u32 type)
462 {
463         return type & MEM_RDONLY;
464 }
465 
466 static bool is_acquire_function(enum bpf_func_id func_id,
467                                 const struct bpf_map *map)
468 {
469         enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
470 
471         if (func_id == BPF_FUNC_sk_lookup_tcp ||
472             func_id == BPF_FUNC_sk_lookup_udp ||
473             func_id == BPF_FUNC_skc_lookup_tcp ||
474             func_id == BPF_FUNC_ringbuf_reserve ||
475             func_id == BPF_FUNC_kptr_xchg)
476                 return true;
477 
478         if (func_id == BPF_FUNC_map_lookup_elem &&
479             (map_type == BPF_MAP_TYPE_SOCKMAP ||
480              map_type == BPF_MAP_TYPE_SOCKHASH))
481                 return true;
482 
483         return false;
484 }
485 
486 static bool is_ptr_cast_function(enum bpf_func_id func_id)
487 {
488         return func_id == BPF_FUNC_tcp_sock ||
489                 func_id == BPF_FUNC_sk_fullsock ||
490                 func_id == BPF_FUNC_skc_to_tcp_sock ||
491                 func_id == BPF_FUNC_skc_to_tcp6_sock ||
492                 func_id == BPF_FUNC_skc_to_udp6_sock ||
493                 func_id == BPF_FUNC_skc_to_mptcp_sock ||
494                 func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
495                 func_id == BPF_FUNC_skc_to_tcp_request_sock;
496 }
497 
498 static bool is_dynptr_ref_function(enum bpf_func_id func_id)
499 {
500         return func_id == BPF_FUNC_dynptr_data;
501 }
502 
503 static bool is_sync_callback_calling_kfunc(u32 btf_id);
504 static bool is_async_callback_calling_kfunc(u32 btf_id);
505 static bool is_callback_calling_kfunc(u32 btf_id);
506 static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
507 
508 static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id);
509 
510 static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
511 {
512         return func_id == BPF_FUNC_for_each_map_elem ||
513                func_id == BPF_FUNC_find_vma ||
514                func_id == BPF_FUNC_loop ||
515                func_id == BPF_FUNC_user_ringbuf_drain;
516 }
517 
518 static bool is_async_callback_calling_function(enum bpf_func_id func_id)
519 {
520         return func_id == BPF_FUNC_timer_set_callback;
521 }
522 
523 static bool is_callback_calling_function(enum bpf_func_id func_id)
524 {
525         return is_sync_callback_calling_function(func_id) ||
526                is_async_callback_calling_function(func_id);
527 }
528 
529 static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
530 {
531         return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
532                (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
533 }
534 
535 static bool is_async_callback_calling_insn(struct bpf_insn *insn)
536 {
537         return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
538                (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
539 }
540 
541 static bool is_may_goto_insn(struct bpf_insn *insn)
542 {
543         return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
544 }
545 
546 static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
547 {
548         return is_may_goto_insn(&env->prog->insnsi[insn_idx]);
549 }
550 
551 static bool is_storage_get_function(enum bpf_func_id func_id)
552 {
553         return func_id == BPF_FUNC_sk_storage_get ||
554                func_id == BPF_FUNC_inode_storage_get ||
555                func_id == BPF_FUNC_task_storage_get ||
556                func_id == BPF_FUNC_cgrp_storage_get;
557 }
558 
559 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
560                                         const struct bpf_map *map)
561 {
562         int ref_obj_uses = 0;
563 
564         if (is_ptr_cast_function(func_id))
565                 ref_obj_uses++;
566         if (is_acquire_function(func_id, map))
567                 ref_obj_uses++;
568         if (is_dynptr_ref_function(func_id))
569                 ref_obj_uses++;
570 
571         return ref_obj_uses > 1;
572 }
573 
574 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
575 {
576         return BPF_CLASS(insn->code) == BPF_STX &&
577                BPF_MODE(insn->code) == BPF_ATOMIC &&
578                insn->imm == BPF_CMPXCHG;
579 }
580 
581 static int __get_spi(s32 off)
582 {
583         return (-off - 1) / BPF_REG_SIZE;
584 }
585 
586 static struct bpf_func_state *func(struct bpf_verifier_env *env,
587                                    const struct bpf_reg_state *reg)
588 {
589         struct bpf_verifier_state *cur = env->cur_state;
590 
591         return cur->frame[reg->frameno];
592 }
593 
594 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
595 {
596        int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
597 
598        /* We need to check that slots between [spi - nr_slots + 1, spi] are
599         * within [0, allocated_stack).
600         *
601         * Please note that the spi grows downwards. For example, a dynptr
602         * takes the size of two stack slots; the first slot will be at
603         * spi and the second slot will be at spi - 1.
604         */
605        return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
606 }
607 
608 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
609                                   const char *obj_kind, int nr_slots)
610 {
611         int off, spi;
612 
613         if (!tnum_is_const(reg->var_off)) {
614                 verbose(env, "%s has to be at a constant offset\n", obj_kind);
615                 return -EINVAL;
616         }
617 
618         off = reg->off + reg->var_off.value;
619         if (off % BPF_REG_SIZE) {
620                 verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
621                 return -EINVAL;
622         }
623 
624         spi = __get_spi(off);
625         if (spi + 1 < nr_slots) {
626                 verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
627                 return -EINVAL;
628         }
629 
630         if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots))
631                 return -ERANGE;
632         return spi;
633 }
634 
635 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
636 {
637         return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
638 }
639 
640 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
641 {
642         return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
643 }
644 
645 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
646 {
647         switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
648         case DYNPTR_TYPE_LOCAL:
649                 return BPF_DYNPTR_TYPE_LOCAL;
650         case DYNPTR_TYPE_RINGBUF:
651                 return BPF_DYNPTR_TYPE_RINGBUF;
652         case DYNPTR_TYPE_SKB:
653                 return BPF_DYNPTR_TYPE_SKB;
654         case DYNPTR_TYPE_XDP:
655                 return BPF_DYNPTR_TYPE_XDP;
656         default:
657                 return BPF_DYNPTR_TYPE_INVALID;
658         }
659 }
660 
661 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
662 {
663         switch (type) {
664         case BPF_DYNPTR_TYPE_LOCAL:
665                 return DYNPTR_TYPE_LOCAL;
666         case BPF_DYNPTR_TYPE_RINGBUF:
667                 return DYNPTR_TYPE_RINGBUF;
668         case BPF_DYNPTR_TYPE_SKB:
669                 return DYNPTR_TYPE_SKB;
670         case BPF_DYNPTR_TYPE_XDP:
671                 return DYNPTR_TYPE_XDP;
672         default:
673                 return 0;
674         }
675 }
676 
677 static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
678 {
679         return type == BPF_DYNPTR_TYPE_RINGBUF;
680 }
681 
682 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
683                               enum bpf_dynptr_type type,
684                               bool first_slot, int dynptr_id);
685 
686 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
687                                 struct bpf_reg_state *reg);
688 
689 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
690                                    struct bpf_reg_state *sreg1,
691                                    struct bpf_reg_state *sreg2,
692                                    enum bpf_dynptr_type type)
693 {
694         int id = ++env->id_gen;
695 
696         __mark_dynptr_reg(sreg1, type, true, id);
697         __mark_dynptr_reg(sreg2, type, false, id);
698 }
699 
700 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
701                                struct bpf_reg_state *reg,
702                                enum bpf_dynptr_type type)
703 {
704         __mark_dynptr_reg(reg, type, true, ++env->id_gen);
705 }
706 
707 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
708                                         struct bpf_func_state *state, int spi);
709 
710 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
711                                    enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
712 {
713         struct bpf_func_state *state = func(env, reg);
714         enum bpf_dynptr_type type;
715         int spi, i, err;
716 
717         spi = dynptr_get_spi(env, reg);
718         if (spi < 0)
719                 return spi;
720 
721         /* We cannot assume both spi and spi - 1 belong to the same dynptr,
722          * hence we need to call destroy_if_dynptr_stack_slot twice for both,
723          * to ensure that for the following example:
724          *      [d1][d1][d2][d2]
725          * spi    3   2   1   0
726          * So marking spi = 2 should lead to destruction of both d1 and d2. In
727          * case they do belong to same dynptr, second call won't see slot_type
728          * as STACK_DYNPTR and will simply skip destruction.
729          */
730         err = destroy_if_dynptr_stack_slot(env, state, spi);
731         if (err)
732                 return err;
733         err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
734         if (err)
735                 return err;
736 
737         for (i = 0; i < BPF_REG_SIZE; i++) {
738                 state->stack[spi].slot_type[i] = STACK_DYNPTR;
739                 state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
740         }
741 
742         type = arg_to_dynptr_type(arg_type);
743         if (type == BPF_DYNPTR_TYPE_INVALID)
744                 return -EINVAL;
745 
746         mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
747                                &state->stack[spi - 1].spilled_ptr, type);
748 
749         if (dynptr_type_refcounted(type)) {
750                 /* The id is used to track proper releasing */
751                 int id;
752 
753                 if (clone_ref_obj_id)
754                         id = clone_ref_obj_id;
755                 else
756                         id = acquire_reference_state(env, insn_idx);
757 
758                 if (id < 0)
759                         return id;
760 
761                 state->stack[spi].spilled_ptr.ref_obj_id = id;
762                 state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
763         }
764 
765         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
766         state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
767 
768         return 0;
769 }
770 
771 static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi)
772 {
773         int i;
774 
775         for (i = 0; i < BPF_REG_SIZE; i++) {
776                 state->stack[spi].slot_type[i] = STACK_INVALID;
777                 state->stack[spi - 1].slot_type[i] = STACK_INVALID;
778         }
779 
780         __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
781         __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
782 
783         /* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?
784          *
785          * While we don't allow reading STACK_INVALID, it is still possible to
786          * do <8 byte writes marking some but not all slots as STACK_MISC. Then,
787          * helpers or insns can do partial read of that part without failing,
788          * but check_stack_range_initialized, check_stack_read_var_off, and
789          * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
790          * the slot conservatively. Hence we need to prevent those liveness
791          * marking walks.
792          *
793          * This was not a problem before because STACK_INVALID is only set by
794          * default (where the default reg state has its reg->parent as NULL), or
795          * in clean_live_states after REG_LIVE_DONE (at which point
796          * mark_reg_read won't walk reg->parent chain), but not randomly during
797          * verifier state exploration (like we did above). Hence, for our case
798          * parentage chain will still be live (i.e. reg->parent may be
799          * non-NULL), while earlier reg->parent was NULL, so we need
800          * REG_LIVE_WRITTEN to screen off read marker propagation when it is
801          * done later on reads or by mark_dynptr_read as well to unnecessary
802          * mark registers in verifier state.
803          */
804         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
805         state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
806 }
807 
808 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
809 {
810         struct bpf_func_state *state = func(env, reg);
811         int spi, ref_obj_id, i;
812 
813         spi = dynptr_get_spi(env, reg);
814         if (spi < 0)
815                 return spi;
816 
817         if (!dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
818                 invalidate_dynptr(env, state, spi);
819                 return 0;
820         }
821 
822         ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
823 
824         /* If the dynptr has a ref_obj_id, then we need to invalidate
825          * two things:
826          *
827          * 1) Any dynptrs with a matching ref_obj_id (clones)
828          * 2) Any slices derived from this dynptr.
829          */
830 
831         /* Invalidate any slices associated with this dynptr */
832         WARN_ON_ONCE(release_reference(env, ref_obj_id));
833 
834         /* Invalidate any dynptr clones */
835         for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) {
836                 if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id)
837                         continue;
838 
839                 /* it should always be the case that if the ref obj id
840                  * matches then the stack slot also belongs to a
841                  * dynptr
842                  */
843                 if (state->stack[i].slot_type[0] != STACK_DYNPTR) {
844                         verbose(env, "verifier internal error: misconfigured ref_obj_id\n");
845                         return -EFAULT;
846                 }
847                 if (state->stack[i].spilled_ptr.dynptr.first_slot)
848                         invalidate_dynptr(env, state, i);
849         }
850 
851         return 0;
852 }
853 
854 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
855                                struct bpf_reg_state *reg);
856 
857 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
858 {
859         if (!env->allow_ptr_leaks)
860                 __mark_reg_not_init(env, reg);
861         else
862                 __mark_reg_unknown(env, reg);
863 }
864 
865 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
866                                         struct bpf_func_state *state, int spi)
867 {
868         struct bpf_func_state *fstate;
869         struct bpf_reg_state *dreg;
870         int i, dynptr_id;
871 
872         /* We always ensure that STACK_DYNPTR is never set partially,
873          * hence just checking for slot_type[0] is enough. This is
874          * different for STACK_SPILL, where it may be only set for
875          * 1 byte, so code has to use is_spilled_reg.
876          */
877         if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
878                 return 0;
879 
880         /* Reposition spi to first slot */
881         if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
882                 spi = spi + 1;
883 
884         if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
885                 verbose(env, "cannot overwrite referenced dynptr\n");
886                 return -EINVAL;
887         }
888 
889         mark_stack_slot_scratched(env, spi);
890         mark_stack_slot_scratched(env, spi - 1);
891 
892         /* Writing partially to one dynptr stack slot destroys both. */
893         for (i = 0; i < BPF_REG_SIZE; i++) {
894                 state->stack[spi].slot_type[i] = STACK_INVALID;
895                 state->stack[spi - 1].slot_type[i] = STACK_INVALID;
896         }
897 
898         dynptr_id = state->stack[spi].spilled_ptr.id;
899         /* Invalidate any slices associated with this dynptr */
900         bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
901                 /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
902                 if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
903                         continue;
904                 if (dreg->dynptr_id == dynptr_id)
905                         mark_reg_invalid(env, dreg);
906         }));
907 
908         /* Do not release reference state, we are destroying dynptr on stack,
909          * not using some helper to release it. Just reset register.
910          */
911         __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
912         __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
913 
914         /* Same reason as unmark_stack_slots_dynptr above */
915         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
916         state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
917 
918         return 0;
919 }
920 
921 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
922 {
923         int spi;
924 
925         if (reg->type == CONST_PTR_TO_DYNPTR)
926                 return false;
927 
928         spi = dynptr_get_spi(env, reg);
929 
930         /* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
931          * error because this just means the stack state hasn't been updated yet.
932          * We will do check_mem_access to check and update stack bounds later.
933          */
934         if (spi < 0 && spi != -ERANGE)
935                 return false;
936 
937         /* We don't need to check if the stack slots are marked by previous
938          * dynptr initializations because we allow overwriting existing unreferenced
939          * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
940          * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
941          * touching are completely destructed before we reinitialize them for a new
942          * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
943          * instead of delaying it until the end where the user will get "Unreleased
944          * reference" error.
945          */
946         return true;
947 }
948 
949 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
950 {
951         struct bpf_func_state *state = func(env, reg);
952         int i, spi;
953 
954         /* This already represents first slot of initialized bpf_dynptr.
955          *
956          * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
957          * check_func_arg_reg_off's logic, so we don't need to check its
958          * offset and alignment.
959          */
960         if (reg->type == CONST_PTR_TO_DYNPTR)
961                 return true;
962 
963         spi = dynptr_get_spi(env, reg);
964         if (spi < 0)
965                 return false;
966         if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
967                 return false;
968 
969         for (i = 0; i < BPF_REG_SIZE; i++) {
970                 if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
971                     state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
972                         return false;
973         }
974 
975         return true;
976 }
977 
978 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
979                                     enum bpf_arg_type arg_type)
980 {
981         struct bpf_func_state *state = func(env, reg);
982         enum bpf_dynptr_type dynptr_type;
983         int spi;
984 
985         /* ARG_PTR_TO_DYNPTR takes any type of dynptr */
986         if (arg_type == ARG_PTR_TO_DYNPTR)
987                 return true;
988 
989         dynptr_type = arg_to_dynptr_type(arg_type);
990         if (reg->type == CONST_PTR_TO_DYNPTR) {
991                 return reg->dynptr.type == dynptr_type;
992         } else {
993                 spi = dynptr_get_spi(env, reg);
994                 if (spi < 0)
995                         return false;
996                 return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
997         }
998 }
999 
1000 static void __mark_reg_known_zero(struct bpf_reg_state *reg);
1001 
1002 static bool in_rcu_cs(struct bpf_verifier_env *env);
1003 
1004 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
1005 
1006 static int mark_stack_slots_iter(struct bpf_verifier_env *env,
1007                                  struct bpf_kfunc_call_arg_meta *meta,
1008                                  struct bpf_reg_state *reg, int insn_idx,
1009                                  struct btf *btf, u32 btf_id, int nr_slots)
1010 {
1011         struct bpf_func_state *state = func(env, reg);
1012         int spi, i, j, id;
1013 
1014         spi = iter_get_spi(env, reg, nr_slots);
1015         if (spi < 0)
1016                 return spi;
1017 
1018         id = acquire_reference_state(env, insn_idx);
1019         if (id < 0)
1020                 return id;
1021 
1022         for (i = 0; i < nr_slots; i++) {
1023                 struct bpf_stack_state *slot = &state->stack[spi - i];
1024                 struct bpf_reg_state *st = &slot->spilled_ptr;
1025 
1026                 __mark_reg_known_zero(st);
1027                 st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1028                 if (is_kfunc_rcu_protected(meta)) {
1029                         if (in_rcu_cs(env))
1030                                 st->type |= MEM_RCU;
1031                         else
1032                                 st->type |= PTR_UNTRUSTED;
1033                 }
1034                 st->live |= REG_LIVE_WRITTEN;
1035                 st->ref_obj_id = i == 0 ? id : 0;
1036                 st->iter.btf = btf;
1037                 st->iter.btf_id = btf_id;
1038                 st->iter.state = BPF_ITER_STATE_ACTIVE;
1039                 st->iter.depth = 0;
1040 
1041                 for (j = 0; j < BPF_REG_SIZE; j++)
1042                         slot->slot_type[j] = STACK_ITER;
1043 
1044                 mark_stack_slot_scratched(env, spi - i);
1045         }
1046 
1047         return 0;
1048 }
1049 
1050 static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
1051                                    struct bpf_reg_state *reg, int nr_slots)
1052 {
1053         struct bpf_func_state *state = func(env, reg);
1054         int spi, i, j;
1055 
1056         spi = iter_get_spi(env, reg, nr_slots);
1057         if (spi < 0)
1058                 return spi;
1059 
1060         for (i = 0; i < nr_slots; i++) {
1061                 struct bpf_stack_state *slot = &state->stack[spi - i];
1062                 struct bpf_reg_state *st = &slot->spilled_ptr;
1063 
1064                 if (i == 0)
1065                         WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
1066 
1067                 __mark_reg_not_init(env, st);
1068 
1069                 /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */
1070                 st->live |= REG_LIVE_WRITTEN;
1071 
1072                 for (j = 0; j < BPF_REG_SIZE; j++)
1073                         slot->slot_type[j] = STACK_INVALID;
1074 
1075                 mark_stack_slot_scratched(env, spi - i);
1076         }
1077 
1078         return 0;
1079 }
1080 
1081 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1082                                      struct bpf_reg_state *reg, int nr_slots)
1083 {
1084         struct bpf_func_state *state = func(env, reg);
1085         int spi, i, j;
1086 
1087         /* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1088          * will do check_mem_access to check and update stack bounds later, so
1089          * return true for that case.
1090          */
1091         spi = iter_get_spi(env, reg, nr_slots);
1092         if (spi == -ERANGE)
1093                 return true;
1094         if (spi < 0)
1095                 return false;
1096 
1097         for (i = 0; i < nr_slots; i++) {
1098                 struct bpf_stack_state *slot = &state->stack[spi - i];
1099 
1100                 for (j = 0; j < BPF_REG_SIZE; j++)
1101                         if (slot->slot_type[j] == STACK_ITER)
1102                                 return false;
1103         }
1104 
1105         return true;
1106 }
1107 
1108 static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1109                                    struct btf *btf, u32 btf_id, int nr_slots)
1110 {
1111         struct bpf_func_state *state = func(env, reg);
1112         int spi, i, j;
1113 
1114         spi = iter_get_spi(env, reg, nr_slots);
1115         if (spi < 0)
1116                 return -EINVAL;
1117 
1118         for (i = 0; i < nr_slots; i++) {
1119                 struct bpf_stack_state *slot = &state->stack[spi - i];
1120                 struct bpf_reg_state *st = &slot->spilled_ptr;
1121 
1122                 if (st->type & PTR_UNTRUSTED)
1123                         return -EPROTO;
1124                 /* only main (first) slot has ref_obj_id set */
1125                 if (i == 0 && !st->ref_obj_id)
1126                         return -EINVAL;
1127                 if (i != 0 && st->ref_obj_id)
1128                         return -EINVAL;
1129                 if (st->iter.btf != btf || st->iter.btf_id != btf_id)
1130                         return -EINVAL;
1131 
1132                 for (j = 0; j < BPF_REG_SIZE; j++)
1133                         if (slot->slot_type[j] != STACK_ITER)
1134                                 return -EINVAL;
1135         }
1136 
1137         return 0;
1138 }
1139 
1140 /* Check if given stack slot is "special":
1141  *   - spilled register state (STACK_SPILL);
1142  *   - dynptr state (STACK_DYNPTR);
1143  *   - iter state (STACK_ITER).
1144  */
1145 static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1146 {
1147         enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
1148 
1149         switch (type) {
1150         case STACK_SPILL:
1151         case STACK_DYNPTR:
1152         case STACK_ITER:
1153                 return true;
1154         case STACK_INVALID:
1155         case STACK_MISC:
1156         case STACK_ZERO:
1157                 return false;
1158         default:
1159                 WARN_ONCE(1, "unknown stack slot type %d\n", type);
1160                 return true;
1161         }
1162 }
1163 
1164 /* The reg state of a pointer or a bounded scalar was saved when
1165  * it was spilled to the stack.
1166  */
1167 static bool is_spilled_reg(const struct bpf_stack_state *stack)
1168 {
1169         return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
1170 }
1171 
1172 static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
1173 {
1174         return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL &&
1175                stack->spilled_ptr.type == SCALAR_VALUE;
1176 }
1177 
1178 static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
1179 {
1180         return stack->slot_type[0] == STACK_SPILL &&
1181                stack->spilled_ptr.type == SCALAR_VALUE;
1182 }
1183 
1184 /* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which
1185  * case they are equivalent, or it's STACK_ZERO, in which case we preserve
1186  * more precise STACK_ZERO.
1187  * Note, in uprivileged mode leaving STACK_INVALID is wrong, so we take
1188  * env->allow_ptr_leaks into account and force STACK_MISC, if necessary.
1189  */
1190 static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
1191 {
1192         if (*stype == STACK_ZERO)
1193                 return;
1194         if (env->allow_ptr_leaks && *stype == STACK_INVALID)
1195                 return;
1196         *stype = STACK_MISC;
1197 }
1198 
1199 static void scrub_spilled_slot(u8 *stype)
1200 {
1201         if (*stype != STACK_INVALID)
1202                 *stype = STACK_MISC;
1203 }
1204 
1205 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1206  * small to hold src. This is different from krealloc since we don't want to preserve
1207  * the contents of dst.
1208  *
1209  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1210  * not be allocated.
1211  */
1212 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1213 {
1214         size_t alloc_bytes;
1215         void *orig = dst;
1216         size_t bytes;
1217 
1218         if (ZERO_OR_NULL_PTR(src))
1219                 goto out;
1220 
1221         if (unlikely(check_mul_overflow(n, size, &bytes)))
1222                 return NULL;
1223 
1224         alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1225         dst = krealloc(orig, alloc_bytes, flags);
1226         if (!dst) {
1227                 kfree(orig);
1228                 return NULL;
1229         }
1230 
1231         memcpy(dst, src, bytes);
1232 out:
1233         return dst ? dst : ZERO_SIZE_PTR;
1234 }
1235 
1236 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1237  * small to hold new_n items. new items are zeroed out if the array grows.
1238  *
1239  * Contrary to krealloc_array, does not free arr if new_n is zero.
1240  */
1241 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1242 {
1243         size_t alloc_size;
1244         void *new_arr;
1245 
1246         if (!new_n || old_n == new_n)
1247                 goto out;
1248 
1249         alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1250         new_arr = krealloc(arr, alloc_size, GFP_KERNEL);
1251         if (!new_arr) {
1252                 kfree(arr);
1253                 return NULL;
1254         }
1255         arr = new_arr;
1256 
1257         if (new_n > old_n)
1258                 memset(arr + old_n * size, 0, (new_n - old_n) * size);
1259 
1260 out:
1261         return arr ? arr : ZERO_SIZE_PTR;
1262 }
1263 
1264 static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1265 {
1266         dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1267                                sizeof(struct bpf_reference_state), GFP_KERNEL);
1268         if (!dst->refs)
1269                 return -ENOMEM;
1270 
1271         dst->acquired_refs = src->acquired_refs;
1272         return 0;
1273 }
1274 
1275 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1276 {
1277         size_t n = src->allocated_stack / BPF_REG_SIZE;
1278 
1279         dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1280                                 GFP_KERNEL);
1281         if (!dst->stack)
1282                 return -ENOMEM;
1283 
1284         dst->allocated_stack = src->allocated_stack;
1285         return 0;
1286 }
1287 
1288 static int resize_reference_state(struct bpf_func_state *state, size_t n)
1289 {
1290         state->refs = realloc_array(state->refs, state->acquired_refs, n,
1291                                     sizeof(struct bpf_reference_state));
1292         if (!state->refs)
1293                 return -ENOMEM;
1294 
1295         state->acquired_refs = n;
1296         return 0;
1297 }
1298 
1299 /* Possibly update state->allocated_stack to be at least size bytes. Also
1300  * possibly update the function's high-water mark in its bpf_subprog_info.
1301  */
1302 static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
1303 {
1304         size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
1305 
1306         /* The stack size is always a multiple of BPF_REG_SIZE. */
1307         size = round_up(size, BPF_REG_SIZE);
1308         n = size / BPF_REG_SIZE;
1309 
1310         if (old_n >= n)
1311                 return 0;
1312 
1313         state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1314         if (!state->stack)
1315                 return -ENOMEM;
1316 
1317         state->allocated_stack = size;
1318 
1319         /* update known max for given subprogram */
1320         if (env->subprog_info[state->subprogno].stack_depth < size)
1321                 env->subprog_info[state->subprogno].stack_depth = size;
1322 
1323         return 0;
1324 }
1325 
1326 /* Acquire a pointer id from the env and update the state->refs to include
1327  * this new pointer reference.
1328  * On success, returns a valid pointer id to associate with the register
1329  * On failure, returns a negative errno.
1330  */
1331 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1332 {
1333         struct bpf_func_state *state = cur_func(env);
1334         int new_ofs = state->acquired_refs;
1335         int id, err;
1336 
1337         err = resize_reference_state(state, state->acquired_refs + 1);
1338         if (err)
1339                 return err;
1340         id = ++env->id_gen;
1341         state->refs[new_ofs].id = id;
1342         state->refs[new_ofs].insn_idx = insn_idx;
1343         state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0;
1344 
1345         return id;
1346 }
1347 
1348 /* release function corresponding to acquire_reference_state(). Idempotent. */
1349 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
1350 {
1351         int i, last_idx;
1352 
1353         last_idx = state->acquired_refs - 1;
1354         for (i = 0; i < state->acquired_refs; i++) {
1355                 if (state->refs[i].id == ptr_id) {
1356                         /* Cannot release caller references in callbacks */
1357                         if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
1358                                 return -EINVAL;
1359                         if (last_idx && i != last_idx)
1360                                 memcpy(&state->refs[i], &state->refs[last_idx],
1361                                        sizeof(*state->refs));
1362                         memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1363                         state->acquired_refs--;
1364                         return 0;
1365                 }
1366         }
1367         return -EINVAL;
1368 }
1369 
1370 static void free_func_state(struct bpf_func_state *state)
1371 {
1372         if (!state)
1373                 return;
1374         kfree(state->refs);
1375         kfree(state->stack);
1376         kfree(state);
1377 }
1378 
1379 static void clear_jmp_history(struct bpf_verifier_state *state)
1380 {
1381         kfree(state->jmp_history);
1382         state->jmp_history = NULL;
1383         state->jmp_history_cnt = 0;
1384 }
1385 
1386 static void free_verifier_state(struct bpf_verifier_state *state,
1387                                 bool free_self)
1388 {
1389         int i;
1390 
1391         for (i = 0; i <= state->curframe; i++) {
1392                 free_func_state(state->frame[i]);
1393                 state->frame[i] = NULL;
1394         }
1395         clear_jmp_history(state);
1396         if (free_self)
1397                 kfree(state);
1398 }
1399 
1400 /* copy verifier state from src to dst growing dst stack space
1401  * when necessary to accommodate larger src stack
1402  */
1403 static int copy_func_state(struct bpf_func_state *dst,
1404                            const struct bpf_func_state *src)
1405 {
1406         int err;
1407 
1408         memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
1409         err = copy_reference_state(dst, src);
1410         if (err)
1411                 return err;
1412         return copy_stack_state(dst, src);
1413 }
1414 
1415 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1416                                const struct bpf_verifier_state *src)
1417 {
1418         struct bpf_func_state *dst;
1419         int i, err;
1420 
1421         dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1422                                           src->jmp_history_cnt, sizeof(*dst_state->jmp_history),
1423                                           GFP_USER);
1424         if (!dst_state->jmp_history)
1425                 return -ENOMEM;
1426         dst_state->jmp_history_cnt = src->jmp_history_cnt;
1427 
1428         /* if dst has more stack frames then src frame, free them, this is also
1429          * necessary in case of exceptional exits using bpf_throw.
1430          */
1431         for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1432                 free_func_state(dst_state->frame[i]);
1433                 dst_state->frame[i] = NULL;
1434         }
1435         dst_state->speculative = src->speculative;
1436         dst_state->active_rcu_lock = src->active_rcu_lock;
1437         dst_state->active_preempt_lock = src->active_preempt_lock;
1438         dst_state->in_sleepable = src->in_sleepable;
1439         dst_state->curframe = src->curframe;
1440         dst_state->active_lock.ptr = src->active_lock.ptr;
1441         dst_state->active_lock.id = src->active_lock.id;
1442         dst_state->branches = src->branches;
1443         dst_state->parent = src->parent;
1444         dst_state->first_insn_idx = src->first_insn_idx;
1445         dst_state->last_insn_idx = src->last_insn_idx;
1446         dst_state->dfs_depth = src->dfs_depth;
1447         dst_state->callback_unroll_depth = src->callback_unroll_depth;
1448         dst_state->used_as_loop_entry = src->used_as_loop_entry;
1449         dst_state->may_goto_depth = src->may_goto_depth;
1450         for (i = 0; i <= src->curframe; i++) {
1451                 dst = dst_state->frame[i];
1452                 if (!dst) {
1453                         dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1454                         if (!dst)
1455                                 return -ENOMEM;
1456                         dst_state->frame[i] = dst;
1457                 }
1458                 err = copy_func_state(dst, src->frame[i]);
1459                 if (err)
1460                         return err;
1461         }
1462         return 0;
1463 }
1464 
1465 static u32 state_htab_size(struct bpf_verifier_env *env)
1466 {
1467         return env->prog->len;
1468 }
1469 
1470 static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx)
1471 {
1472         struct bpf_verifier_state *cur = env->cur_state;
1473         struct bpf_func_state *state = cur->frame[cur->curframe];
1474 
1475         return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
1476 }
1477 
1478 static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b)
1479 {
1480         int fr;
1481 
1482         if (a->curframe != b->curframe)
1483                 return false;
1484 
1485         for (fr = a->curframe; fr >= 0; fr--)
1486                 if (a->frame[fr]->callsite != b->frame[fr]->callsite)
1487                         return false;
1488 
1489         return true;
1490 }
1491 
1492 /* Open coded iterators allow back-edges in the state graph in order to
1493  * check unbounded loops that iterators.
1494  *
1495  * In is_state_visited() it is necessary to know if explored states are
1496  * part of some loops in order to decide whether non-exact states
1497  * comparison could be used:
1498  * - non-exact states comparison establishes sub-state relation and uses
1499  *   read and precision marks to do so, these marks are propagated from
1500  *   children states and thus are not guaranteed to be final in a loop;
1501  * - exact states comparison just checks if current and explored states
1502  *   are identical (and thus form a back-edge).
1503  *
1504  * Paper "A New Algorithm for Identifying Loops in Decompilation"
1505  * by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient
1506  * algorithm for loop structure detection and gives an overview of
1507  * relevant terminology. It also has helpful illustrations.
1508  *
1509  * [1] https://api.semanticscholar.org/CorpusID:15784067
1510  *
1511  * We use a similar algorithm but because loop nested structure is
1512  * irrelevant for verifier ours is significantly simpler and resembles
1513  * strongly connected components algorithm from Sedgewick's textbook.
1514  *
1515  * Define topmost loop entry as a first node of the loop traversed in a
1516  * depth first search starting from initial state. The goal of the loop
1517  * tracking algorithm is to associate topmost loop entries with states
1518  * derived from these entries.
1519  *
1520  * For each step in the DFS states traversal algorithm needs to identify
1521  * the following situations:
1522  *
1523  *          initial                     initial                   initial
1524  *            |                           |                         |
1525  *            V                           V                         V
1526  *           ...                         ...           .---------> hdr
1527  *            |                           |            |            |
1528  *            V                           V            |            V
1529  *           cur                     .-> succ          |    .------...
1530  *            |                      |    |            |    |       |
1531  *            V                      |    V            |    V       V
1532  *           succ                    '-- cur           |   ...     ...
1533  *                                                     |    |       |
1534  *                                                     |    V       V
1535  *                                                     |   succ <- cur
1536  *                                                     |    |
1537  *                                                     |    V
1538  *                                                     |   ...
1539  *                                                     |    |
1540  *                                                     '----'
1541  *
1542  *  (A) successor state of cur   (B) successor state of cur or it's entry
1543  *      not yet traversed            are in current DFS path, thus cur and succ
1544  *                                   are members of the same outermost loop
1545  *
1546  *                      initial                  initial
1547  *                        |                        |
1548  *                        V                        V
1549  *                       ...                      ...
1550  *                        |                        |
1551  *                        V                        V
1552  *                .------...               .------...
1553  *                |       |                |       |
1554  *                V       V                V       V
1555  *           .-> hdr     ...              ...     ...
1556  *           |    |       |                |       |
1557  *           |    V       V                V       V
1558  *           |   succ <- cur              succ <- cur
1559  *           |    |                        |
1560  *           |    V                        V
1561  *           |   ...                      ...
1562  *           |    |                        |
1563  *           '----'                       exit
1564  *
1565  * (C) successor state of cur is a part of some loop but this loop
1566  *     does not include cur or successor state is not in a loop at all.
1567  *
1568  * Algorithm could be described as the following python code:
1569  *
1570  *     traversed = set()   # Set of traversed nodes
1571  *     entries = {}        # Mapping from node to loop entry
1572  *     depths = {}         # Depth level assigned to graph node
1573  *     path = set()        # Current DFS path
1574  *
1575  *     # Find outermost loop entry known for n
1576  *     def get_loop_entry(n):
1577  *         h = entries.get(n, None)
1578  *         while h in entries and entries[h] != h:
1579  *             h = entries[h]
1580  *         return h
1581  *
1582  *     # Update n's loop entry if h's outermost entry comes
1583  *     # before n's outermost entry in current DFS path.
1584  *     def update_loop_entry(n, h):
1585  *         n1 = get_loop_entry(n) or n
1586  *         h1 = get_loop_entry(h) or h
1587  *         if h1 in path and depths[h1] <= depths[n1]:
1588  *             entries[n] = h1
1589  *
1590  *     def dfs(n, depth):
1591  *         traversed.add(n)
1592  *         path.add(n)
1593  *         depths[n] = depth
1594  *         for succ in G.successors(n):
1595  *             if succ not in traversed:
1596  *                 # Case A: explore succ and update cur's loop entry
1597  *                 #         only if succ's entry is in current DFS path.
1598  *                 dfs(succ, depth + 1)
1599  *                 h = get_loop_entry(succ)
1600  *                 update_loop_entry(n, h)
1601  *             else:
1602  *                 # Case B or C depending on `h1 in path` check in update_loop_entry().
1603  *                 update_loop_entry(n, succ)
1604  *         path.remove(n)
1605  *
1606  * To adapt this algorithm for use with verifier:
1607  * - use st->branch == 0 as a signal that DFS of succ had been finished
1608  *   and cur's loop entry has to be updated (case A), handle this in
1609  *   update_branch_counts();
1610  * - use st->branch > 0 as a signal that st is in the current DFS path;
1611  * - handle cases B and C in is_state_visited();
1612  * - update topmost loop entry for intermediate states in get_loop_entry().
1613  */
1614 static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_state *st)
1615 {
1616         struct bpf_verifier_state *topmost = st->loop_entry, *old;
1617 
1618         while (topmost && topmost->loop_entry && topmost != topmost->loop_entry)
1619                 topmost = topmost->loop_entry;
1620         /* Update loop entries for intermediate states to avoid this
1621          * traversal in future get_loop_entry() calls.
1622          */
1623         while (st && st->loop_entry != topmost) {
1624                 old = st->loop_entry;
1625                 st->loop_entry = topmost;
1626                 st = old;
1627         }
1628         return topmost;
1629 }
1630 
1631 static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr)
1632 {
1633         struct bpf_verifier_state *cur1, *hdr1;
1634 
1635         cur1 = get_loop_entry(cur) ?: cur;
1636         hdr1 = get_loop_entry(hdr) ?: hdr;
1637         /* The head1->branches check decides between cases B and C in
1638          * comment for get_loop_entry(). If hdr1->branches == 0 then
1639          * head's topmost loop entry is not in current DFS path,
1640          * hence 'cur' and 'hdr' are not in the same loop and there is
1641          * no need to update cur->loop_entry.
1642          */
1643         if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) {
1644                 cur->loop_entry = hdr;
1645                 hdr->used_as_loop_entry = true;
1646         }
1647 }
1648 
1649 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1650 {
1651         while (st) {
1652                 u32 br = --st->branches;
1653 
1654                 /* br == 0 signals that DFS exploration for 'st' is finished,
1655                  * thus it is necessary to update parent's loop entry if it
1656                  * turned out that st is a part of some loop.
1657                  * This is a part of 'case A' in get_loop_entry() comment.
1658                  */
1659                 if (br == 0 && st->parent && st->loop_entry)
1660                         update_loop_entry(st->parent, st->loop_entry);
1661 
1662                 /* WARN_ON(br > 1) technically makes sense here,
1663                  * but see comment in push_stack(), hence:
1664                  */
1665                 WARN_ONCE((int)br < 0,
1666                           "BUG update_branch_counts:branches_to_explore=%d\n",
1667                           br);
1668                 if (br)
1669                         break;
1670                 st = st->parent;
1671         }
1672 }
1673 
1674 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1675                      int *insn_idx, bool pop_log)
1676 {
1677         struct bpf_verifier_state *cur = env->cur_state;
1678         struct bpf_verifier_stack_elem *elem, *head = env->head;
1679         int err;
1680 
1681         if (env->head == NULL)
1682                 return -ENOENT;
1683 
1684         if (cur) {
1685                 err = copy_verifier_state(cur, &head->st);
1686                 if (err)
1687                         return err;
1688         }
1689         if (pop_log)
1690                 bpf_vlog_reset(&env->log, head->log_pos);
1691         if (insn_idx)
1692                 *insn_idx = head->insn_idx;
1693         if (prev_insn_idx)
1694                 *prev_insn_idx = head->prev_insn_idx;
1695         elem = head->next;
1696         free_verifier_state(&head->st, false);
1697         kfree(head);
1698         env->head = elem;
1699         env->stack_size--;
1700         return 0;
1701 }
1702 
1703 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1704                                              int insn_idx, int prev_insn_idx,
1705                                              bool speculative)
1706 {
1707         struct bpf_verifier_state *cur = env->cur_state;
1708         struct bpf_verifier_stack_elem *elem;
1709         int err;
1710 
1711         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1712         if (!elem)
1713                 goto err;
1714 
1715         elem->insn_idx = insn_idx;
1716         elem->prev_insn_idx = prev_insn_idx;
1717         elem->next = env->head;
1718         elem->log_pos = env->log.end_pos;
1719         env->head = elem;
1720         env->stack_size++;
1721         err = copy_verifier_state(&elem->st, cur);
1722         if (err)
1723                 goto err;
1724         elem->st.speculative |= speculative;
1725         if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1726                 verbose(env, "The sequence of %d jumps is too complex.\n",
1727                         env->stack_size);
1728                 goto err;
1729         }
1730         if (elem->st.parent) {
1731                 ++elem->st.parent->branches;
1732                 /* WARN_ON(branches > 2) technically makes sense here,
1733                  * but
1734                  * 1. speculative states will bump 'branches' for non-branch
1735                  * instructions
1736                  * 2. is_state_visited() heuristics may decide not to create
1737                  * a new state for a sequence of branches and all such current
1738                  * and cloned states will be pointing to a single parent state
1739                  * which might have large 'branches' count.
1740                  */
1741         }
1742         return &elem->st;
1743 err:
1744         free_verifier_state(env->cur_state, true);
1745         env->cur_state = NULL;
1746         /* pop all elements and return */
1747         while (!pop_stack(env, NULL, NULL, false));
1748         return NULL;
1749 }
1750 
1751 #define CALLER_SAVED_REGS 6
1752 static const int caller_saved[CALLER_SAVED_REGS] = {
1753         BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1754 };
1755 
1756 /* This helper doesn't clear reg->id */
1757 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1758 {
1759         reg->var_off = tnum_const(imm);
1760         reg->smin_value = (s64)imm;
1761         reg->smax_value = (s64)imm;
1762         reg->umin_value = imm;
1763         reg->umax_value = imm;
1764 
1765         reg->s32_min_value = (s32)imm;
1766         reg->s32_max_value = (s32)imm;
1767         reg->u32_min_value = (u32)imm;
1768         reg->u32_max_value = (u32)imm;
1769 }
1770 
1771 /* Mark the unknown part of a register (variable offset or scalar value) as
1772  * known to have the value @imm.
1773  */
1774 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1775 {
1776         /* Clear off and union(map_ptr, range) */
1777         memset(((u8 *)reg) + sizeof(reg->type), 0,
1778                offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1779         reg->id = 0;
1780         reg->ref_obj_id = 0;
1781         ___mark_reg_known(reg, imm);
1782 }
1783 
1784 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1785 {
1786         reg->var_off = tnum_const_subreg(reg->var_off, imm);
1787         reg->s32_min_value = (s32)imm;
1788         reg->s32_max_value = (s32)imm;
1789         reg->u32_min_value = (u32)imm;
1790         reg->u32_max_value = (u32)imm;
1791 }
1792 
1793 /* Mark the 'variable offset' part of a register as zero.  This should be
1794  * used only on registers holding a pointer type.
1795  */
1796 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1797 {
1798         __mark_reg_known(reg, 0);
1799 }
1800 
1801 static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1802 {
1803         __mark_reg_known(reg, 0);
1804         reg->type = SCALAR_VALUE;
1805         /* all scalars are assumed imprecise initially (unless unprivileged,
1806          * in which case everything is forced to be precise)
1807          */
1808         reg->precise = !env->bpf_capable;
1809 }
1810 
1811 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1812                                 struct bpf_reg_state *regs, u32 regno)
1813 {
1814         if (WARN_ON(regno >= MAX_BPF_REG)) {
1815                 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1816                 /* Something bad happened, let's kill all regs */
1817                 for (regno = 0; regno < MAX_BPF_REG; regno++)
1818                         __mark_reg_not_init(env, regs + regno);
1819                 return;
1820         }
1821         __mark_reg_known_zero(regs + regno);
1822 }
1823 
1824 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
1825                               bool first_slot, int dynptr_id)
1826 {
1827         /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
1828          * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
1829          * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
1830          */
1831         __mark_reg_known_zero(reg);
1832         reg->type = CONST_PTR_TO_DYNPTR;
1833         /* Give each dynptr a unique id to uniquely associate slices to it. */
1834         reg->id = dynptr_id;
1835         reg->dynptr.type = type;
1836         reg->dynptr.first_slot = first_slot;
1837 }
1838 
1839 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1840 {
1841         if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1842                 const struct bpf_map *map = reg->map_ptr;
1843 
1844                 if (map->inner_map_meta) {
1845                         reg->type = CONST_PTR_TO_MAP;
1846                         reg->map_ptr = map->inner_map_meta;
1847                         /* transfer reg's id which is unique for every map_lookup_elem
1848                          * as UID of the inner map.
1849                          */
1850                         if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER))
1851                                 reg->map_uid = reg->id;
1852                         if (btf_record_has_field(map->inner_map_meta->record, BPF_WORKQUEUE))
1853                                 reg->map_uid = reg->id;
1854                 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1855                         reg->type = PTR_TO_XDP_SOCK;
1856                 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1857                            map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1858                         reg->type = PTR_TO_SOCKET;
1859                 } else {
1860                         reg->type = PTR_TO_MAP_VALUE;
1861                 }
1862                 return;
1863         }
1864 
1865         reg->type &= ~PTR_MAYBE_NULL;
1866 }
1867 
1868 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
1869                                 struct btf_field_graph_root *ds_head)
1870 {
1871         __mark_reg_known_zero(&regs[regno]);
1872         regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
1873         regs[regno].btf = ds_head->btf;
1874         regs[regno].btf_id = ds_head->value_btf_id;
1875         regs[regno].off = ds_head->node_offset;
1876 }
1877 
1878 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1879 {
1880         return type_is_pkt_pointer(reg->type);
1881 }
1882 
1883 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1884 {
1885         return reg_is_pkt_pointer(reg) ||
1886                reg->type == PTR_TO_PACKET_END;
1887 }
1888 
1889 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
1890 {
1891         return base_type(reg->type) == PTR_TO_MEM &&
1892                 (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP);
1893 }
1894 
1895 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1896 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1897                                     enum bpf_reg_type which)
1898 {
1899         /* The register can already have a range from prior markings.
1900          * This is fine as long as it hasn't been advanced from its
1901          * origin.
1902          */
1903         return reg->type == which &&
1904                reg->id == 0 &&
1905                reg->off == 0 &&
1906                tnum_equals_const(reg->var_off, 0);
1907 }
1908 
1909 /* Reset the min/max bounds of a register */
1910 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1911 {
1912         reg->smin_value = S64_MIN;
1913         reg->smax_value = S64_MAX;
1914         reg->umin_value = 0;
1915         reg->umax_value = U64_MAX;
1916 
1917         reg->s32_min_value = S32_MIN;
1918         reg->s32_max_value = S32_MAX;
1919         reg->u32_min_value = 0;
1920         reg->u32_max_value = U32_MAX;
1921 }
1922 
1923 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1924 {
1925         reg->smin_value = S64_MIN;
1926         reg->smax_value = S64_MAX;
1927         reg->umin_value = 0;
1928         reg->umax_value = U64_MAX;
1929 }
1930 
1931 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1932 {
1933         reg->s32_min_value = S32_MIN;
1934         reg->s32_max_value = S32_MAX;
1935         reg->u32_min_value = 0;
1936         reg->u32_max_value = U32_MAX;
1937 }
1938 
1939 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1940 {
1941         struct tnum var32_off = tnum_subreg(reg->var_off);
1942 
1943         /* min signed is max(sign bit) | min(other bits) */
1944         reg->s32_min_value = max_t(s32, reg->s32_min_value,
1945                         var32_off.value | (var32_off.mask & S32_MIN));
1946         /* max signed is min(sign bit) | max(other bits) */
1947         reg->s32_max_value = min_t(s32, reg->s32_max_value,
1948                         var32_off.value | (var32_off.mask & S32_MAX));
1949         reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1950         reg->u32_max_value = min(reg->u32_max_value,
1951                                  (u32)(var32_off.value | var32_off.mask));
1952 }
1953 
1954 static void __update_reg64_bounds(struct bpf_reg_state *reg)
1955 {
1956         /* min signed is max(sign bit) | min(other bits) */
1957         reg->smin_value = max_t(s64, reg->smin_value,
1958                                 reg->var_off.value | (reg->var_off.mask & S64_MIN));
1959         /* max signed is min(sign bit) | max(other bits) */
1960         reg->smax_value = min_t(s64, reg->smax_value,
1961                                 reg->var_off.value | (reg->var_off.mask & S64_MAX));
1962         reg->umin_value = max(reg->umin_value, reg->var_off.value);
1963         reg->umax_value = min(reg->umax_value,
1964                               reg->var_off.value | reg->var_off.mask);
1965 }
1966 
1967 static void __update_reg_bounds(struct bpf_reg_state *reg)
1968 {
1969         __update_reg32_bounds(reg);
1970         __update_reg64_bounds(reg);
1971 }
1972 
1973 /* Uses signed min/max values to inform unsigned, and vice-versa */
1974 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1975 {
1976         /* If upper 32 bits of u64/s64 range don't change, we can use lower 32
1977          * bits to improve our u32/s32 boundaries.
1978          *
1979          * E.g., the case where we have upper 32 bits as zero ([10, 20] in
1980          * u64) is pretty trivial, it's obvious that in u32 we'll also have
1981          * [10, 20] range. But this property holds for any 64-bit range as
1982          * long as upper 32 bits in that entire range of values stay the same.
1983          *
1984          * E.g., u64 range [0x10000000A, 0x10000000F] ([4294967306, 4294967311]
1985          * in decimal) has the same upper 32 bits throughout all the values in
1986          * that range. As such, lower 32 bits form a valid [0xA, 0xF] ([10, 15])
1987          * range.
1988          *
1989          * Note also, that [0xA, 0xF] is a valid range both in u32 and in s32,
1990          * following the rules outlined below about u64/s64 correspondence
1991          * (which equally applies to u32 vs s32 correspondence). In general it
1992          * depends on actual hexadecimal values of 32-bit range. They can form
1993          * only valid u32, or only valid s32 ranges in some cases.
1994          *
1995          * So we use all these insights to derive bounds for subregisters here.
1996          */
1997         if ((reg->umin_value >> 32) == (reg->umax_value >> 32)) {
1998                 /* u64 to u32 casting preserves validity of low 32 bits as
1999                  * a range, if upper 32 bits are the same
2000                  */
2001                 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->umin_value);
2002                 reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->umax_value);
2003 
2004                 if ((s32)reg->umin_value <= (s32)reg->umax_value) {
2005                         reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
2006                         reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
2007                 }
2008         }
2009         if ((reg->smin_value >> 32) == (reg->smax_value >> 32)) {
2010                 /* low 32 bits should form a proper u32 range */
2011                 if ((u32)reg->smin_value <= (u32)reg->smax_value) {
2012                         reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->smin_value);
2013                         reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->smax_value);
2014                 }
2015                 /* low 32 bits should form a proper s32 range */
2016                 if ((s32)reg->smin_value <= (s32)reg->smax_value) {
2017                         reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2018                         reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2019                 }
2020         }
2021         /* Special case where upper bits form a small sequence of two
2022          * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
2023          * 0x00000000 is also valid), while lower bits form a proper s32 range
2024          * going from negative numbers to positive numbers. E.g., let's say we
2025          * have s64 range [-1, 1] ([0xffffffffffffffff, 0x0000000000000001]).
2026          * Possible s64 values are {-1, 0, 1} ({0xffffffffffffffff,
2027          * 0x0000000000000000, 0x00000000000001}). Ignoring upper 32 bits,
2028          * we still get a valid s32 range [-1, 1] ([0xffffffff, 0x00000001]).
2029          * Note that it doesn't have to be 0xffffffff going to 0x00000000 in
2030          * upper 32 bits. As a random example, s64 range
2031          * [0xfffffff0fffffff0; 0xfffffff100000010], forms a valid s32 range
2032          * [-16, 16] ([0xfffffff0; 0x00000010]) in its 32 bit subregister.
2033          */
2034         if ((u32)(reg->umin_value >> 32) + 1 == (u32)(reg->umax_value >> 32) &&
2035             (s32)reg->umin_value < 0 && (s32)reg->umax_value >= 0) {
2036                 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
2037                 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
2038         }
2039         if ((u32)(reg->smin_value >> 32) + 1 == (u32)(reg->smax_value >> 32) &&
2040             (s32)reg->smin_value < 0 && (s32)reg->smax_value >= 0) {
2041                 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2042                 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2043         }
2044         /* if u32 range forms a valid s32 range (due to matching sign bit),
2045          * try to learn from that
2046          */
2047         if ((s32)reg->u32_min_value <= (s32)reg->u32_max_value) {
2048                 reg->s32_min_value = max_t(s32, reg->s32_min_value, reg->u32_min_value);
2049                 reg->s32_max_value = min_t(s32, reg->s32_max_value, reg->u32_max_value);
2050         }
2051         /* If we cannot cross the sign boundary, then signed and unsigned bounds
2052          * are the same, so combine.  This works even in the negative case, e.g.
2053          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2054          */
2055         if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
2056                 reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
2057                 reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
2058         }
2059 }
2060 
2061 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
2062 {
2063         /* If u64 range forms a valid s64 range (due to matching sign bit),
2064          * try to learn from that. Let's do a bit of ASCII art to see when
2065          * this is happening. Let's take u64 range first:
2066          *
2067          * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
2068          * |-------------------------------|--------------------------------|
2069          *
2070          * Valid u64 range is formed when umin and umax are anywhere in the
2071          * range [0, U64_MAX], and umin <= umax. u64 case is simple and
2072          * straightforward. Let's see how s64 range maps onto the same range
2073          * of values, annotated below the line for comparison:
2074          *
2075          * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
2076          * |-------------------------------|--------------------------------|
2077          * 0                        S64_MAX S64_MIN                        -1
2078          *
2079          * So s64 values basically start in the middle and they are logically
2080          * contiguous to the right of it, wrapping around from -1 to 0, and
2081          * then finishing as S64_MAX (0x7fffffffffffffff) right before
2082          * S64_MIN. We can try drawing the continuity of u64 vs s64 values
2083          * more visually as mapped to sign-agnostic range of hex values.
2084          *
2085          *  u64 start                                               u64 end
2086          *  _______________________________________________________________
2087          * /                                                               \
2088          * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
2089          * |-------------------------------|--------------------------------|
2090          * 0                        S64_MAX S64_MIN                        -1
2091          *                                / \
2092          * >------------------------------   ------------------------------->
2093          * s64 continues...        s64 end   s64 start          s64 "midpoint"
2094          *
2095          * What this means is that, in general, we can't always derive
2096          * something new about u64 from any random s64 range, and vice versa.
2097          *
2098          * But we can do that in two particular cases. One is when entire
2099          * u64/s64 range is *entirely* contained within left half of the above
2100          * diagram or when it is *entirely* contained in the right half. I.e.:
2101          *
2102          * |-------------------------------|--------------------------------|
2103          *     ^                   ^            ^                 ^
2104          *     A                   B            C                 D
2105          *
2106          * [A, B] and [C, D] are contained entirely in their respective halves
2107          * and form valid contiguous ranges as both u64 and s64 values. [A, B]
2108          * will be non-negative both as u64 and s64 (and in fact it will be
2109          * identical ranges no matter the signedness). [C, D] treated as s64
2110          * will be a range of negative values, while in u64 it will be
2111          * non-negative range of values larger than 0x8000000000000000.
2112          *
2113          * Now, any other range here can't be represented in both u64 and s64
2114          * simultaneously. E.g., [A, C], [A, D], [B, C], [B, D] are valid
2115          * contiguous u64 ranges, but they are discontinuous in s64. [B, C]
2116          * in s64 would be properly presented as [S64_MIN, C] and [B, S64_MAX],
2117          * for example. Similarly, valid s64 range [D, A] (going from negative
2118          * to positive values), would be two separate [D, U64_MAX] and [0, A]
2119          * ranges as u64. Currently reg_state can't represent two segments per
2120          * numeric domain, so in such situations we can only derive maximal
2121          * possible range ([0, U64_MAX] for u64, and [S64_MIN, S64_MAX] for s64).
2122          *
2123          * So we use these facts to derive umin/umax from smin/smax and vice
2124          * versa only if they stay within the same "half". This is equivalent
2125          * to checking sign bit: lower half will have sign bit as zero, upper
2126          * half have sign bit 1. Below in code we simplify this by just
2127          * casting umin/umax as smin/smax and checking if they form valid
2128          * range, and vice versa. Those are equivalent checks.
2129          */
2130         if ((s64)reg->umin_value <= (s64)reg->umax_value) {
2131                 reg->smin_value = max_t(s64, reg->smin_value, reg->umin_value);
2132                 reg->smax_value = min_t(s64, reg->smax_value, reg->umax_value);
2133         }
2134         /* If we cannot cross the sign boundary, then signed and unsigned bounds
2135          * are the same, so combine.  This works even in the negative case, e.g.
2136          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2137          */
2138         if ((u64)reg->smin_value <= (u64)reg->smax_value) {
2139                 reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
2140                 reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
2141         }
2142 }
2143 
2144 static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
2145 {
2146         /* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit
2147          * values on both sides of 64-bit range in hope to have tighter range.
2148          * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from
2149          * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff].
2150          * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound
2151          * (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of
2152          * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a
2153          * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff].
2154          * We just need to make sure that derived bounds we are intersecting
2155          * with are well-formed ranges in respective s64 or u64 domain, just
2156          * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments.
2157          */
2158         __u64 new_umin, new_umax;
2159         __s64 new_smin, new_smax;
2160 
2161         /* u32 -> u64 tightening, it's always well-formed */
2162         new_umin = (reg->umin_value & ~0xffffffffULL) | reg->u32_min_value;
2163         new_umax = (reg->umax_value & ~0xffffffffULL) | reg->u32_max_value;
2164         reg->umin_value = max_t(u64, reg->umin_value, new_umin);
2165         reg->umax_value = min_t(u64, reg->umax_value, new_umax);
2166         /* u32 -> s64 tightening, u32 range embedded into s64 preserves range validity */
2167         new_smin = (reg->smin_value & ~0xffffffffULL) | reg->u32_min_value;
2168         new_smax = (reg->smax_value & ~0xffffffffULL) | reg->u32_max_value;
2169         reg->smin_value = max_t(s64, reg->smin_value, new_smin);
2170         reg->smax_value = min_t(s64, reg->smax_value, new_smax);
2171 
2172         /* if s32 can be treated as valid u32 range, we can use it as well */
2173         if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
2174                 /* s32 -> u64 tightening */
2175                 new_umin = (reg->umin_value & ~0xffffffffULL) | (u32)reg->s32_min_value;
2176                 new_umax = (reg->umax_value & ~0xffffffffULL) | (u32)reg->s32_max_value;
2177                 reg->umin_value = max_t(u64, reg->umin_value, new_umin);
2178                 reg->umax_value = min_t(u64, reg->umax_value, new_umax);
2179                 /* s32 -> s64 tightening */
2180                 new_smin = (reg->smin_value & ~0xffffffffULL) | (u32)reg->s32_min_value;
2181                 new_smax = (reg->smax_value & ~0xffffffffULL) | (u32)reg->s32_max_value;
2182                 reg->smin_value = max_t(s64, reg->smin_value, new_smin);
2183                 reg->smax_value = min_t(s64, reg->smax_value, new_smax);
2184         }
2185 }
2186 
2187 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2188 {
2189         __reg32_deduce_bounds(reg);
2190         __reg64_deduce_bounds(reg);
2191         __reg_deduce_mixed_bounds(reg);
2192 }
2193 
2194 /* Attempts to improve var_off based on unsigned min/max information */
2195 static void __reg_bound_offset(struct bpf_reg_state *reg)
2196 {
2197         struct tnum var64_off = tnum_intersect(reg->var_off,
2198                                                tnum_range(reg->umin_value,
2199                                                           reg->umax_value));
2200         struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
2201                                                tnum_range(reg->u32_min_value,
2202                                                           reg->u32_max_value));
2203 
2204         reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
2205 }
2206 
2207 static void reg_bounds_sync(struct bpf_reg_state *reg)
2208 {
2209         /* We might have learned new bounds from the var_off. */
2210         __update_reg_bounds(reg);
2211         /* We might have learned something about the sign bit. */
2212         __reg_deduce_bounds(reg);
2213         __reg_deduce_bounds(reg);
2214         /* We might have learned some bits from the bounds. */
2215         __reg_bound_offset(reg);
2216         /* Intersecting with the old var_off might have improved our bounds
2217          * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2218          * then new var_off is (0; 0x7f...fc) which improves our umax.
2219          */
2220         __update_reg_bounds(reg);
2221 }
2222 
2223 static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
2224                                    struct bpf_reg_state *reg, const char *ctx)
2225 {
2226         const char *msg;
2227 
2228         if (reg->umin_value > reg->umax_value ||
2229             reg->smin_value > reg->smax_value ||
2230             reg->u32_min_value > reg->u32_max_value ||
2231             reg->s32_min_value > reg->s32_max_value) {
2232                     msg = "range bounds violation";
2233                     goto out;
2234         }
2235 
2236         if (tnum_is_const(reg->var_off)) {
2237                 u64 uval = reg->var_off.value;
2238                 s64 sval = (s64)uval;
2239 
2240                 if (reg->umin_value != uval || reg->umax_value != uval ||
2241                     reg->smin_value != sval || reg->smax_value != sval) {
2242                         msg = "const tnum out of sync with range bounds";
2243                         goto out;
2244                 }
2245         }
2246 
2247         if (tnum_subreg_is_const(reg->var_off)) {
2248                 u32 uval32 = tnum_subreg(reg->var_off).value;
2249                 s32 sval32 = (s32)uval32;
2250 
2251                 if (reg->u32_min_value != uval32 || reg->u32_max_value != uval32 ||
2252                     reg->s32_min_value != sval32 || reg->s32_max_value != sval32) {
2253                         msg = "const subreg tnum out of sync with range bounds";
2254                         goto out;
2255                 }
2256         }
2257 
2258         return 0;
2259 out:
2260         verbose(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] "
2261                 "s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)\n",
2262                 ctx, msg, reg->umin_value, reg->umax_value,
2263                 reg->smin_value, reg->smax_value,
2264                 reg->u32_min_value, reg->u32_max_value,
2265                 reg->s32_min_value, reg->s32_max_value,
2266                 reg->var_off.value, reg->var_off.mask);
2267         if (env->test_reg_invariants)
2268                 return -EFAULT;
2269         __mark_reg_unbounded(reg);
2270         return 0;
2271 }
2272 
2273 static bool __reg32_bound_s64(s32 a)
2274 {
2275         return a >= 0 && a <= S32_MAX;
2276 }
2277 
2278 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
2279 {
2280         reg->umin_value = reg->u32_min_value;
2281         reg->umax_value = reg->u32_max_value;
2282 
2283         /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
2284          * be positive otherwise set to worse case bounds and refine later
2285          * from tnum.
2286          */
2287         if (__reg32_bound_s64(reg->s32_min_value) &&
2288             __reg32_bound_s64(reg->s32_max_value)) {
2289                 reg->smin_value = reg->s32_min_value;
2290                 reg->smax_value = reg->s32_max_value;
2291         } else {
2292                 reg->smin_value = 0;
2293                 reg->smax_value = U32_MAX;
2294         }
2295 }
2296 
2297 /* Mark a register as having a completely unknown (scalar) value. */
2298 static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
2299 {
2300         /*
2301          * Clear type, off, and union(map_ptr, range) and
2302          * padding between 'type' and union
2303          */
2304         memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
2305         reg->type = SCALAR_VALUE;
2306         reg->id = 0;
2307         reg->ref_obj_id = 0;
2308         reg->var_off = tnum_unknown;
2309         reg->frameno = 0;
2310         reg->precise = false;
2311         __mark_reg_unbounded(reg);
2312 }
2313 
2314 /* Mark a register as having a completely unknown (scalar) value,
2315  * initialize .precise as true when not bpf capable.
2316  */
2317 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2318                                struct bpf_reg_state *reg)
2319 {
2320         __mark_reg_unknown_imprecise(reg);
2321         reg->precise = !env->bpf_capable;
2322 }
2323 
2324 static void mark_reg_unknown(struct bpf_verifier_env *env,
2325                              struct bpf_reg_state *regs, u32 regno)
2326 {
2327         if (WARN_ON(regno >= MAX_BPF_REG)) {
2328                 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
2329                 /* Something bad happened, let's kill all regs except FP */
2330                 for (regno = 0; regno < BPF_REG_FP; regno++)
2331                         __mark_reg_not_init(env, regs + regno);
2332                 return;
2333         }
2334         __mark_reg_unknown(env, regs + regno);
2335 }
2336 
2337 static int __mark_reg_s32_range(struct bpf_verifier_env *env,
2338                                 struct bpf_reg_state *regs,
2339                                 u32 regno,
2340                                 s32 s32_min,
2341                                 s32 s32_max)
2342 {
2343         struct bpf_reg_state *reg = regs + regno;
2344 
2345         reg->s32_min_value = max_t(s32, reg->s32_min_value, s32_min);
2346         reg->s32_max_value = min_t(s32, reg->s32_max_value, s32_max);
2347 
2348         reg->smin_value = max_t(s64, reg->smin_value, s32_min);
2349         reg->smax_value = min_t(s64, reg->smax_value, s32_max);
2350 
2351         reg_bounds_sync(reg);
2352 
2353         return reg_bounds_sanity_check(env, reg, "s32_range");
2354 }
2355 
2356 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
2357                                 struct bpf_reg_state *reg)
2358 {
2359         __mark_reg_unknown(env, reg);
2360         reg->type = NOT_INIT;
2361 }
2362 
2363 static void mark_reg_not_init(struct bpf_verifier_env *env,
2364                               struct bpf_reg_state *regs, u32 regno)
2365 {
2366         if (WARN_ON(regno >= MAX_BPF_REG)) {
2367                 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
2368                 /* Something bad happened, let's kill all regs except FP */
2369                 for (regno = 0; regno < BPF_REG_FP; regno++)
2370                         __mark_reg_not_init(env, regs + regno);
2371                 return;
2372         }
2373         __mark_reg_not_init(env, regs + regno);
2374 }
2375 
2376 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
2377                             struct bpf_reg_state *regs, u32 regno,
2378                             enum bpf_reg_type reg_type,
2379                             struct btf *btf, u32 btf_id,
2380                             enum bpf_type_flag flag)
2381 {
2382         if (reg_type == SCALAR_VALUE) {
2383                 mark_reg_unknown(env, regs, regno);
2384                 return;
2385         }
2386         mark_reg_known_zero(env, regs, regno);
2387         regs[regno].type = PTR_TO_BTF_ID | flag;
2388         regs[regno].btf = btf;
2389         regs[regno].btf_id = btf_id;
2390         if (type_may_be_null(flag))
2391                 regs[regno].id = ++env->id_gen;
2392 }
2393 
2394 #define DEF_NOT_SUBREG  (0)
2395 static void init_reg_state(struct bpf_verifier_env *env,
2396                            struct bpf_func_state *state)
2397 {
2398         struct bpf_reg_state *regs = state->regs;
2399         int i;
2400 
2401         for (i = 0; i < MAX_BPF_REG; i++) {
2402                 mark_reg_not_init(env, regs, i);
2403                 regs[i].live = REG_LIVE_NONE;
2404                 regs[i].parent = NULL;
2405                 regs[i].subreg_def = DEF_NOT_SUBREG;
2406         }
2407 
2408         /* frame pointer */
2409         regs[BPF_REG_FP].type = PTR_TO_STACK;
2410         mark_reg_known_zero(env, regs, BPF_REG_FP);
2411         regs[BPF_REG_FP].frameno = state->frameno;
2412 }
2413 
2414 static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
2415 {
2416         return (struct bpf_retval_range){ minval, maxval };
2417 }
2418 
2419 #define BPF_MAIN_FUNC (-1)
2420 static void init_func_state(struct bpf_verifier_env *env,
2421                             struct bpf_func_state *state,
2422                             int callsite, int frameno, int subprogno)
2423 {
2424         state->callsite = callsite;
2425         state->frameno = frameno;
2426         state->subprogno = subprogno;
2427         state->callback_ret_range = retval_range(0, 0);
2428         init_reg_state(env, state);
2429         mark_verifier_state_scratched(env);
2430 }
2431 
2432 /* Similar to push_stack(), but for async callbacks */
2433 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2434                                                 int insn_idx, int prev_insn_idx,
2435                                                 int subprog, bool is_sleepable)
2436 {
2437         struct bpf_verifier_stack_elem *elem;
2438         struct bpf_func_state *frame;
2439 
2440         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
2441         if (!elem)
2442                 goto err;
2443 
2444         elem->insn_idx = insn_idx;
2445         elem->prev_insn_idx = prev_insn_idx;
2446         elem->next = env->head;
2447         elem->log_pos = env->log.end_pos;
2448         env->head = elem;
2449         env->stack_size++;
2450         if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2451                 verbose(env,
2452                         "The sequence of %d jumps is too complex for async cb.\n",
2453                         env->stack_size);
2454                 goto err;
2455         }
2456         /* Unlike push_stack() do not copy_verifier_state().
2457          * The caller state doesn't matter.
2458          * This is async callback. It starts in a fresh stack.
2459          * Initialize it similar to do_check_common().
2460          */
2461         elem->st.branches = 1;
2462         elem->st.in_sleepable = is_sleepable;
2463         frame = kzalloc(sizeof(*frame), GFP_KERNEL);
2464         if (!frame)
2465                 goto err;
2466         init_func_state(env, frame,
2467                         BPF_MAIN_FUNC /* callsite */,
2468                         0 /* frameno within this callchain */,
2469                         subprog /* subprog number within this prog */);
2470         elem->st.frame[0] = frame;
2471         return &elem->st;
2472 err:
2473         free_verifier_state(env->cur_state, true);
2474         env->cur_state = NULL;
2475         /* pop all elements and return */
2476         while (!pop_stack(env, NULL, NULL, false));
2477         return NULL;
2478 }
2479 
2480 
2481 enum reg_arg_type {
2482         SRC_OP,         /* register is used as source operand */
2483         DST_OP,         /* register is used as destination operand */
2484         DST_OP_NO_MARK  /* same as above, check only, don't mark */
2485 };
2486 
2487 static int cmp_subprogs(const void *a, const void *b)
2488 {
2489         return ((struct bpf_subprog_info *)a)->start -
2490                ((struct bpf_subprog_info *)b)->start;
2491 }
2492 
2493 static int find_subprog(struct bpf_verifier_env *env, int off)
2494 {
2495         struct bpf_subprog_info *p;
2496 
2497         p = bsearch(&off, env->subprog_info, env->subprog_cnt,
2498                     sizeof(env->subprog_info[0]), cmp_subprogs);
2499         if (!p)
2500                 return -ENOENT;
2501         return p - env->subprog_info;
2502 
2503 }
2504 
2505 static int add_subprog(struct bpf_verifier_env *env, int off)
2506 {
2507         int insn_cnt = env->prog->len;
2508         int ret;
2509 
2510         if (off >= insn_cnt || off < 0) {
2511                 verbose(env, "call to invalid destination\n");
2512                 return -EINVAL;
2513         }
2514         ret = find_subprog(env, off);
2515         if (ret >= 0)
2516                 return ret;
2517         if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2518                 verbose(env, "too many subprograms\n");
2519                 return -E2BIG;
2520         }
2521         /* determine subprog starts. The end is one before the next starts */
2522         env->subprog_info[env->subprog_cnt++].start = off;
2523         sort(env->subprog_info, env->subprog_cnt,
2524              sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
2525         return env->subprog_cnt - 1;
2526 }
2527 
2528 static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
2529 {
2530         struct bpf_prog_aux *aux = env->prog->aux;
2531         struct btf *btf = aux->btf;
2532         const struct btf_type *t;
2533         u32 main_btf_id, id;
2534         const char *name;
2535         int ret, i;
2536 
2537         /* Non-zero func_info_cnt implies valid btf */
2538         if (!aux->func_info_cnt)
2539                 return 0;
2540         main_btf_id = aux->func_info[0].type_id;
2541 
2542         t = btf_type_by_id(btf, main_btf_id);
2543         if (!t) {
2544                 verbose(env, "invalid btf id for main subprog in func_info\n");
2545                 return -EINVAL;
2546         }
2547 
2548         name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:");
2549         if (IS_ERR(name)) {
2550                 ret = PTR_ERR(name);
2551                 /* If there is no tag present, there is no exception callback */
2552                 if (ret == -ENOENT)
2553                         ret = 0;
2554                 else if (ret == -EEXIST)
2555                         verbose(env, "multiple exception callback tags for main subprog\n");
2556                 return ret;
2557         }
2558 
2559         ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC);
2560         if (ret < 0) {
2561                 verbose(env, "exception callback '%s' could not be found in BTF\n", name);
2562                 return ret;
2563         }
2564         id = ret;
2565         t = btf_type_by_id(btf, id);
2566         if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
2567                 verbose(env, "exception callback '%s' must have global linkage\n", name);
2568                 return -EINVAL;
2569         }
2570         ret = 0;
2571         for (i = 0; i < aux->func_info_cnt; i++) {
2572                 if (aux->func_info[i].type_id != id)
2573                         continue;
2574                 ret = aux->func_info[i].insn_off;
2575                 /* Further func_info and subprog checks will also happen
2576                  * later, so assume this is the right insn_off for now.
2577                  */
2578                 if (!ret) {
2579                         verbose(env, "invalid exception callback insn_off in func_info: 0\n");
2580                         ret = -EINVAL;
2581                 }
2582         }
2583         if (!ret) {
2584                 verbose(env, "exception callback type id not found in func_info\n");
2585                 ret = -EINVAL;
2586         }
2587         return ret;
2588 }
2589 
2590 #define MAX_KFUNC_DESCS 256
2591 #define MAX_KFUNC_BTFS  256
2592 
2593 struct bpf_kfunc_desc {
2594         struct btf_func_model func_model;
2595         u32 func_id;
2596         s32 imm;
2597         u16 offset;
2598         unsigned long addr;
2599 };
2600 
2601 struct bpf_kfunc_btf {
2602         struct btf *btf;
2603         struct module *module;
2604         u16 offset;
2605 };
2606 
2607 struct bpf_kfunc_desc_tab {
2608         /* Sorted by func_id (BTF ID) and offset (fd_array offset) during
2609          * verification. JITs do lookups by bpf_insn, where func_id may not be
2610          * available, therefore at the end of verification do_misc_fixups()
2611          * sorts this by imm and offset.
2612          */
2613         struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
2614         u32 nr_descs;
2615 };
2616 
2617 struct bpf_kfunc_btf_tab {
2618         struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2619         u32 nr_descs;
2620 };
2621 
2622 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
2623 {
2624         const struct bpf_kfunc_desc *d0 = a;
2625         const struct bpf_kfunc_desc *d1 = b;
2626 
2627         /* func_id is not greater than BTF_MAX_TYPE */
2628         return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2629 }
2630 
2631 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
2632 {
2633         const struct bpf_kfunc_btf *d0 = a;
2634         const struct bpf_kfunc_btf *d1 = b;
2635 
2636         return d0->offset - d1->offset;
2637 }
2638 
2639 static const struct bpf_kfunc_desc *
2640 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2641 {
2642         struct bpf_kfunc_desc desc = {
2643                 .func_id = func_id,
2644                 .offset = offset,
2645         };
2646         struct bpf_kfunc_desc_tab *tab;
2647 
2648         tab = prog->aux->kfunc_tab;
2649         return bsearch(&desc, tab->descs, tab->nr_descs,
2650                        sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
2651 }
2652 
2653 int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
2654                        u16 btf_fd_idx, u8 **func_addr)
2655 {
2656         const struct bpf_kfunc_desc *desc;
2657 
2658         desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
2659         if (!desc)
2660                 return -EFAULT;
2661 
2662         *func_addr = (u8 *)desc->addr;
2663         return 0;
2664 }
2665 
2666 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
2667                                          s16 offset)
2668 {
2669         struct bpf_kfunc_btf kf_btf = { .offset = offset };
2670         struct bpf_kfunc_btf_tab *tab;
2671         struct bpf_kfunc_btf *b;
2672         struct module *mod;
2673         struct btf *btf;
2674         int btf_fd;
2675 
2676         tab = env->prog->aux->kfunc_btf_tab;
2677         b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2678                     sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2679         if (!b) {
2680                 if (tab->nr_descs == MAX_KFUNC_BTFS) {
2681                         verbose(env, "too many different module BTFs\n");
2682                         return ERR_PTR(-E2BIG);
2683                 }
2684 
2685                 if (bpfptr_is_null(env->fd_array)) {
2686                         verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2687                         return ERR_PTR(-EPROTO);
2688                 }
2689 
2690                 if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2691                                             offset * sizeof(btf_fd),
2692                                             sizeof(btf_fd)))
2693                         return ERR_PTR(-EFAULT);
2694 
2695                 btf = btf_get_by_fd(btf_fd);
2696                 if (IS_ERR(btf)) {
2697                         verbose(env, "invalid module BTF fd specified\n");
2698                         return btf;
2699                 }
2700 
2701                 if (!btf_is_module(btf)) {
2702                         verbose(env, "BTF fd for kfunc is not a module BTF\n");
2703                         btf_put(btf);
2704                         return ERR_PTR(-EINVAL);
2705                 }
2706 
2707                 mod = btf_try_get_module(btf);
2708                 if (!mod) {
2709                         btf_put(btf);
2710                         return ERR_PTR(-ENXIO);
2711                 }
2712 
2713                 b = &tab->descs[tab->nr_descs++];
2714                 b->btf = btf;
2715                 b->module = mod;
2716                 b->offset = offset;
2717 
2718                 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2719                      kfunc_btf_cmp_by_off, NULL);
2720         }
2721         return b->btf;
2722 }
2723 
2724 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2725 {
2726         if (!tab)
2727                 return;
2728 
2729         while (tab->nr_descs--) {
2730                 module_put(tab->descs[tab->nr_descs].module);
2731                 btf_put(tab->descs[tab->nr_descs].btf);
2732         }
2733         kfree(tab);
2734 }
2735 
2736 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2737 {
2738         if (offset) {
2739                 if (offset < 0) {
2740                         /* In the future, this can be allowed to increase limit
2741                          * of fd index into fd_array, interpreted as u16.
2742                          */
2743                         verbose(env, "negative offset disallowed for kernel module function call\n");
2744                         return ERR_PTR(-EINVAL);
2745                 }
2746 
2747                 return __find_kfunc_desc_btf(env, offset);
2748         }
2749         return btf_vmlinux ?: ERR_PTR(-ENOENT);
2750 }
2751 
2752 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
2753 {
2754         const struct btf_type *func, *func_proto;
2755         struct bpf_kfunc_btf_tab *btf_tab;
2756         struct bpf_kfunc_desc_tab *tab;
2757         struct bpf_prog_aux *prog_aux;
2758         struct bpf_kfunc_desc *desc;
2759         const char *func_name;
2760         struct btf *desc_btf;
2761         unsigned long call_imm;
2762         unsigned long addr;
2763         int err;
2764 
2765         prog_aux = env->prog->aux;
2766         tab = prog_aux->kfunc_tab;
2767         btf_tab = prog_aux->kfunc_btf_tab;
2768         if (!tab) {
2769                 if (!btf_vmlinux) {
2770                         verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2771                         return -ENOTSUPP;
2772                 }
2773 
2774                 if (!env->prog->jit_requested) {
2775                         verbose(env, "JIT is required for calling kernel function\n");
2776                         return -ENOTSUPP;
2777                 }
2778 
2779                 if (!bpf_jit_supports_kfunc_call()) {
2780                         verbose(env, "JIT does not support calling kernel function\n");
2781                         return -ENOTSUPP;
2782                 }
2783 
2784                 if (!env->prog->gpl_compatible) {
2785                         verbose(env, "cannot call kernel function from non-GPL compatible program\n");
2786                         return -EINVAL;
2787                 }
2788 
2789                 tab = kzalloc(sizeof(*tab), GFP_KERNEL);
2790                 if (!tab)
2791                         return -ENOMEM;
2792                 prog_aux->kfunc_tab = tab;
2793         }
2794 
2795         /* func_id == 0 is always invalid, but instead of returning an error, be
2796          * conservative and wait until the code elimination pass before returning
2797          * error, so that invalid calls that get pruned out can be in BPF programs
2798          * loaded from userspace.  It is also required that offset be untouched
2799          * for such calls.
2800          */
2801         if (!func_id && !offset)
2802                 return 0;
2803 
2804         if (!btf_tab && offset) {
2805                 btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
2806                 if (!btf_tab)
2807                         return -ENOMEM;
2808                 prog_aux->kfunc_btf_tab = btf_tab;
2809         }
2810 
2811         desc_btf = find_kfunc_desc_btf(env, offset);
2812         if (IS_ERR(desc_btf)) {
2813                 verbose(env, "failed to find BTF for kernel function\n");
2814                 return PTR_ERR(desc_btf);
2815         }
2816 
2817         if (find_kfunc_desc(env->prog, func_id, offset))
2818                 return 0;
2819 
2820         if (tab->nr_descs == MAX_KFUNC_DESCS) {
2821                 verbose(env, "too many different kernel function calls\n");
2822                 return -E2BIG;
2823         }
2824 
2825         func = btf_type_by_id(desc_btf, func_id);
2826         if (!func || !btf_type_is_func(func)) {
2827                 verbose(env, "kernel btf_id %u is not a function\n",
2828                         func_id);
2829                 return -EINVAL;
2830         }
2831         func_proto = btf_type_by_id(desc_btf, func->type);
2832         if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2833                 verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
2834                         func_id);
2835                 return -EINVAL;
2836         }
2837 
2838         func_name = btf_name_by_offset(desc_btf, func->name_off);
2839         addr = kallsyms_lookup_name(func_name);
2840         if (!addr) {
2841                 verbose(env, "cannot find address for kernel function %s\n",
2842                         func_name);
2843                 return -EINVAL;
2844         }
2845         specialize_kfunc(env, func_id, offset, &addr);
2846 
2847         if (bpf_jit_supports_far_kfunc_call()) {
2848                 call_imm = func_id;
2849         } else {
2850                 call_imm = BPF_CALL_IMM(addr);
2851                 /* Check whether the relative offset overflows desc->imm */
2852                 if ((unsigned long)(s32)call_imm != call_imm) {
2853                         verbose(env, "address of kernel function %s is out of range\n",
2854                                 func_name);
2855                         return -EINVAL;
2856                 }
2857         }
2858 
2859         if (bpf_dev_bound_kfunc_id(func_id)) {
2860                 err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
2861                 if (err)
2862                         return err;
2863         }
2864 
2865         desc = &tab->descs[tab->nr_descs++];
2866         desc->func_id = func_id;
2867         desc->imm = call_imm;
2868         desc->offset = offset;
2869         desc->addr = addr;
2870         err = btf_distill_func_proto(&env->log, desc_btf,
2871                                      func_proto, func_name,
2872                                      &desc->func_model);
2873         if (!err)
2874                 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2875                      kfunc_desc_cmp_by_id_off, NULL);
2876         return err;
2877 }
2878 
2879 static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
2880 {
2881         const struct bpf_kfunc_desc *d0 = a;
2882         const struct bpf_kfunc_desc *d1 = b;
2883 
2884         if (d0->imm != d1->imm)
2885                 return d0->imm < d1->imm ? -1 : 1;
2886         if (d0->offset != d1->offset)
2887                 return d0->offset < d1->offset ? -1 : 1;
2888         return 0;
2889 }
2890 
2891 static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog)
2892 {
2893         struct bpf_kfunc_desc_tab *tab;
2894 
2895         tab = prog->aux->kfunc_tab;
2896         if (!tab)
2897                 return;
2898 
2899         sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2900              kfunc_desc_cmp_by_imm_off, NULL);
2901 }
2902 
2903 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2904 {
2905         return !!prog->aux->kfunc_tab;
2906 }
2907 
2908 const struct btf_func_model *
2909 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
2910                          const struct bpf_insn *insn)
2911 {
2912         const struct bpf_kfunc_desc desc = {
2913                 .imm = insn->imm,
2914                 .offset = insn->off,
2915         };
2916         const struct bpf_kfunc_desc *res;
2917         struct bpf_kfunc_desc_tab *tab;
2918 
2919         tab = prog->aux->kfunc_tab;
2920         res = bsearch(&desc, tab->descs, tab->nr_descs,
2921                       sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
2922 
2923         return res ? &res->func_model : NULL;
2924 }
2925 
2926 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
2927 {
2928         struct bpf_subprog_info *subprog = env->subprog_info;
2929         int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
2930         struct bpf_insn *insn = env->prog->insnsi;
2931 
2932         /* Add entry function. */
2933         ret = add_subprog(env, 0);
2934         if (ret)
2935                 return ret;
2936 
2937         for (i = 0; i < insn_cnt; i++, insn++) {
2938                 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2939                     !bpf_pseudo_kfunc_call(insn))
2940                         continue;
2941 
2942                 if (!env->bpf_capable) {
2943                         verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2944                         return -EPERM;
2945                 }
2946 
2947                 if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
2948                         ret = add_subprog(env, i + insn->imm + 1);
2949                 else
2950                         ret = add_kfunc_call(env, insn->imm, insn->off);
2951 
2952                 if (ret < 0)
2953                         return ret;
2954         }
2955 
2956         ret = bpf_find_exception_callback_insn_off(env);
2957         if (ret < 0)
2958                 return ret;
2959         ex_cb_insn = ret;
2960 
2961         /* If ex_cb_insn > 0, this means that the main program has a subprog
2962          * marked using BTF decl tag to serve as the exception callback.
2963          */
2964         if (ex_cb_insn) {
2965                 ret = add_subprog(env, ex_cb_insn);
2966                 if (ret < 0)
2967                         return ret;
2968                 for (i = 1; i < env->subprog_cnt; i++) {
2969                         if (env->subprog_info[i].start != ex_cb_insn)
2970                                 continue;
2971                         env->exception_callback_subprog = i;
2972                         mark_subprog_exc_cb(env, i);
2973                         break;
2974                 }
2975         }
2976 
2977         /* Add a fake 'exit' subprog which could simplify subprog iteration
2978          * logic. 'subprog_cnt' should not be increased.
2979          */
2980         subprog[env->subprog_cnt].start = insn_cnt;
2981 
2982         if (env->log.level & BPF_LOG_LEVEL2)
2983                 for (i = 0; i < env->subprog_cnt; i++)
2984                         verbose(env, "func#%d @%d\n", i, subprog[i].start);
2985 
2986         return 0;
2987 }
2988 
2989 static int check_subprogs(struct bpf_verifier_env *env)
2990 {
2991         int i, subprog_start, subprog_end, off, cur_subprog = 0;
2992         struct bpf_subprog_info *subprog = env->subprog_info;
2993         struct bpf_insn *insn = env->prog->insnsi;
2994         int insn_cnt = env->prog->len;
2995 
2996         /* now check that all jumps are within the same subprog */
2997         subprog_start = subprog[cur_subprog].start;
2998         subprog_end = subprog[cur_subprog + 1].start;
2999         for (i = 0; i < insn_cnt; i++) {
3000                 u8 code = insn[i].code;
3001 
3002                 if (code == (BPF_JMP | BPF_CALL) &&
3003                     insn[i].src_reg == 0 &&
3004                     insn[i].imm == BPF_FUNC_tail_call) {
3005                         subprog[cur_subprog].has_tail_call = true;
3006                         subprog[cur_subprog].tail_call_reachable = true;
3007                 }
3008                 if (BPF_CLASS(code) == BPF_LD &&
3009                     (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
3010                         subprog[cur_subprog].has_ld_abs = true;
3011                 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
3012                         goto next;
3013                 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
3014                         goto next;
3015                 if (code == (BPF_JMP32 | BPF_JA))
3016                         off = i + insn[i].imm + 1;
3017                 else
3018                         off = i + insn[i].off + 1;
3019                 if (off < subprog_start || off >= subprog_end) {
3020                         verbose(env, "jump out of range from insn %d to %d\n", i, off);
3021                         return -EINVAL;
3022                 }
3023 next:
3024                 if (i == subprog_end - 1) {
3025                         /* to avoid fall-through from one subprog into another
3026                          * the last insn of the subprog should be either exit
3027                          * or unconditional jump back or bpf_throw call
3028                          */
3029                         if (code != (BPF_JMP | BPF_EXIT) &&
3030                             code != (BPF_JMP32 | BPF_JA) &&
3031                             code != (BPF_JMP | BPF_JA)) {
3032                                 verbose(env, "last insn is not an exit or jmp\n");
3033                                 return -EINVAL;
3034                         }
3035                         subprog_start = subprog_end;
3036                         cur_subprog++;
3037                         if (cur_subprog < env->subprog_cnt)
3038                                 subprog_end = subprog[cur_subprog + 1].start;
3039                 }
3040         }
3041         return 0;
3042 }
3043 
3044 /* Parentage chain of this register (or stack slot) should take care of all
3045  * issues like callee-saved registers, stack slot allocation time, etc.
3046  */
3047 static int mark_reg_read(struct bpf_verifier_env *env,
3048                          const struct bpf_reg_state *state,
3049                          struct bpf_reg_state *parent, u8 flag)
3050 {
3051         bool writes = parent == state->parent; /* Observe write marks */
3052         int cnt = 0;
3053 
3054         while (parent) {
3055                 /* if read wasn't screened by an earlier write ... */
3056                 if (writes && state->live & REG_LIVE_WRITTEN)
3057                         break;
3058                 if (parent->live & REG_LIVE_DONE) {
3059                         verbose(env, "verifier BUG type %s var_off %lld off %d\n",
3060                                 reg_type_str(env, parent->type),
3061                                 parent->var_off.value, parent->off);
3062                         return -EFAULT;
3063                 }
3064                 /* The first condition is more likely to be true than the
3065                  * second, checked it first.
3066                  */
3067                 if ((parent->live & REG_LIVE_READ) == flag ||
3068                     parent->live & REG_LIVE_READ64)
3069                         /* The parentage chain never changes and
3070                          * this parent was already marked as LIVE_READ.
3071                          * There is no need to keep walking the chain again and
3072                          * keep re-marking all parents as LIVE_READ.
3073                          * This case happens when the same register is read
3074                          * multiple times without writes into it in-between.
3075                          * Also, if parent has the stronger REG_LIVE_READ64 set,
3076                          * then no need to set the weak REG_LIVE_READ32.
3077                          */
3078                         break;
3079                 /* ... then we depend on parent's value */
3080                 parent->live |= flag;
3081                 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
3082                 if (flag == REG_LIVE_READ64)
3083                         parent->live &= ~REG_LIVE_READ32;
3084                 state = parent;
3085                 parent = state->parent;
3086                 writes = true;
3087                 cnt++;
3088         }
3089 
3090         if (env->longest_mark_read_walk < cnt)
3091                 env->longest_mark_read_walk = cnt;
3092         return 0;
3093 }
3094 
3095 static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
3096 {
3097         struct bpf_func_state *state = func(env, reg);
3098         int spi, ret;
3099 
3100         /* For CONST_PTR_TO_DYNPTR, it must have already been done by
3101          * check_reg_arg in check_helper_call and mark_btf_func_reg_size in
3102          * check_kfunc_call.
3103          */
3104         if (reg->type == CONST_PTR_TO_DYNPTR)
3105                 return 0;
3106         spi = dynptr_get_spi(env, reg);
3107         if (spi < 0)
3108                 return spi;
3109         /* Caller ensures dynptr is valid and initialized, which means spi is in
3110          * bounds and spi is the first dynptr slot. Simply mark stack slot as
3111          * read.
3112          */
3113         ret = mark_reg_read(env, &state->stack[spi].spilled_ptr,
3114                             state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64);
3115         if (ret)
3116                 return ret;
3117         return mark_reg_read(env, &state->stack[spi - 1].spilled_ptr,
3118                              state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64);
3119 }
3120 
3121 static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3122                           int spi, int nr_slots)
3123 {
3124         struct bpf_func_state *state = func(env, reg);
3125         int err, i;
3126 
3127         for (i = 0; i < nr_slots; i++) {
3128                 struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr;
3129 
3130                 err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64);
3131                 if (err)
3132                         return err;
3133 
3134                 mark_stack_slot_scratched(env, spi - i);
3135         }
3136 
3137         return 0;
3138 }
3139 
3140 /* This function is supposed to be used by the following 32-bit optimization
3141  * code only. It returns TRUE if the source or destination register operates
3142  * on 64-bit, otherwise return FALSE.
3143  */
3144 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
3145                      u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
3146 {
3147         u8 code, class, op;
3148 
3149         code = insn->code;
3150         class = BPF_CLASS(code);
3151         op = BPF_OP(code);
3152         if (class == BPF_JMP) {
3153                 /* BPF_EXIT for "main" will reach here. Return TRUE
3154                  * conservatively.
3155                  */
3156                 if (op == BPF_EXIT)
3157                         return true;
3158                 if (op == BPF_CALL) {
3159                         /* BPF to BPF call will reach here because of marking
3160                          * caller saved clobber with DST_OP_NO_MARK for which we
3161                          * don't care the register def because they are anyway
3162                          * marked as NOT_INIT already.
3163                          */
3164                         if (insn->src_reg == BPF_PSEUDO_CALL)
3165                                 return false;
3166                         /* Helper call will reach here because of arg type
3167                          * check, conservatively return TRUE.
3168                          */
3169                         if (t == SRC_OP)
3170                                 return true;
3171 
3172                         return false;
3173                 }
3174         }
3175 
3176         if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32))
3177                 return false;
3178 
3179         if (class == BPF_ALU64 || class == BPF_JMP ||
3180             (class == BPF_ALU && op == BPF_END && insn->imm == 64))
3181                 return true;
3182 
3183         if (class == BPF_ALU || class == BPF_JMP32)
3184                 return false;
3185 
3186         if (class == BPF_LDX) {
3187                 if (t != SRC_OP)
3188                         return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX;
3189                 /* LDX source must be ptr. */
3190                 return true;
3191         }
3192 
3193         if (class == BPF_STX) {
3194                 /* BPF_STX (including atomic variants) has multiple source
3195                  * operands, one of which is a ptr. Check whether the caller is
3196                  * asking about it.
3197                  */
3198                 if (t == SRC_OP && reg->type != SCALAR_VALUE)
3199                         return true;
3200                 return BPF_SIZE(code) == BPF_DW;
3201         }
3202 
3203         if (class == BPF_LD) {
3204                 u8 mode = BPF_MODE(code);
3205 
3206                 /* LD_IMM64 */
3207                 if (mode == BPF_IMM)
3208                         return true;
3209 
3210                 /* Both LD_IND and LD_ABS return 32-bit data. */
3211                 if (t != SRC_OP)
3212                         return  false;
3213 
3214                 /* Implicit ctx ptr. */
3215                 if (regno == BPF_REG_6)
3216                         return true;
3217 
3218                 /* Explicit source could be any width. */
3219                 return true;
3220         }
3221 
3222         if (class == BPF_ST)
3223                 /* The only source register for BPF_ST is a ptr. */
3224                 return true;
3225 
3226         /* Conservatively return true at default. */
3227         return true;
3228 }
3229 
3230 /* Return the regno defined by the insn, or -1. */
3231 static int insn_def_regno(const struct bpf_insn *insn)
3232 {
3233         switch (BPF_CLASS(insn->code)) {
3234         case BPF_JMP:
3235         case BPF_JMP32:
3236         case BPF_ST:
3237                 return -1;
3238         case BPF_STX:
3239                 if ((BPF_MODE(insn->code) == BPF_ATOMIC ||
3240                      BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) &&
3241                     (insn->imm & BPF_FETCH)) {
3242                         if (insn->imm == BPF_CMPXCHG)
3243                                 return BPF_REG_0;
3244                         else
3245                                 return insn->src_reg;
3246                 } else {
3247                         return -1;
3248                 }
3249         default:
3250                 return insn->dst_reg;
3251         }
3252 }
3253 
3254 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
3255 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
3256 {
3257         int dst_reg = insn_def_regno(insn);
3258 
3259         if (dst_reg == -1)
3260                 return false;
3261 
3262         return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
3263 }
3264 
3265 static void mark_insn_zext(struct bpf_verifier_env *env,
3266                            struct bpf_reg_state *reg)
3267 {
3268         s32 def_idx = reg->subreg_def;
3269 
3270         if (def_idx == DEF_NOT_SUBREG)
3271                 return;
3272 
3273         env->insn_aux_data[def_idx - 1].zext_dst = true;
3274         /* The dst will be zero extended, so won't be sub-register anymore. */
3275         reg->subreg_def = DEF_NOT_SUBREG;
3276 }
3277 
3278 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
3279                            enum reg_arg_type t)
3280 {
3281         struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3282         struct bpf_reg_state *reg;
3283         bool rw64;
3284 
3285         if (regno >= MAX_BPF_REG) {
3286                 verbose(env, "R%d is invalid\n", regno);
3287                 return -EINVAL;
3288         }
3289 
3290         mark_reg_scratched(env, regno);
3291 
3292         reg = &regs[regno];
3293         rw64 = is_reg64(env, insn, regno, reg, t);
3294         if (t == SRC_OP) {
3295                 /* check whether register used as source operand can be read */
3296                 if (reg->type == NOT_INIT) {
3297                         verbose(env, "R%d !read_ok\n", regno);
3298                         return -EACCES;
3299                 }
3300                 /* We don't need to worry about FP liveness because it's read-only */
3301                 if (regno == BPF_REG_FP)
3302                         return 0;
3303 
3304                 if (rw64)
3305                         mark_insn_zext(env, reg);
3306 
3307                 return mark_reg_read(env, reg, reg->parent,
3308                                      rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
3309         } else {
3310                 /* check whether register used as dest operand can be written to */
3311                 if (regno == BPF_REG_FP) {
3312                         verbose(env, "frame pointer is read only\n");
3313                         return -EACCES;
3314                 }
3315                 reg->live |= REG_LIVE_WRITTEN;
3316                 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
3317                 if (t == DST_OP)
3318                         mark_reg_unknown(env, regs, regno);
3319         }
3320         return 0;
3321 }
3322 
3323 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3324                          enum reg_arg_type t)
3325 {
3326         struct bpf_verifier_state *vstate = env->cur_state;
3327         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3328 
3329         return __check_reg_arg(env, state->regs, regno, t);
3330 }
3331 
3332 static int insn_stack_access_flags(int frameno, int spi)
3333 {
3334         return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno;
3335 }
3336 
3337 static int insn_stack_access_spi(int insn_flags)
3338 {
3339         return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK;
3340 }
3341 
3342 static int insn_stack_access_frameno(int insn_flags)
3343 {
3344         return insn_flags & INSN_F_FRAMENO_MASK;
3345 }
3346 
3347 static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
3348 {
3349         env->insn_aux_data[idx].jmp_point = true;
3350 }
3351 
3352 static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
3353 {
3354         return env->insn_aux_data[insn_idx].jmp_point;
3355 }
3356 
3357 /* for any branch, call, exit record the history of jmps in the given state */
3358 static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
3359                             int insn_flags)
3360 {
3361         u32 cnt = cur->jmp_history_cnt;
3362         struct bpf_jmp_history_entry *p;
3363         size_t alloc_size;
3364 
3365         /* combine instruction flags if we already recorded this instruction */
3366         if (env->cur_hist_ent) {
3367                 /* atomic instructions push insn_flags twice, for READ and
3368                  * WRITE sides, but they should agree on stack slot
3369                  */
3370                 WARN_ONCE((env->cur_hist_ent->flags & insn_flags) &&
3371                           (env->cur_hist_ent->flags & insn_flags) != insn_flags,
3372                           "verifier insn history bug: insn_idx %d cur flags %x new flags %x\n",
3373                           env->insn_idx, env->cur_hist_ent->flags, insn_flags);
3374                 env->cur_hist_ent->flags |= insn_flags;
3375                 return 0;
3376         }
3377 
3378         cnt++;
3379         alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
3380         p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
3381         if (!p)
3382                 return -ENOMEM;
3383         cur->jmp_history = p;
3384 
3385         p = &cur->jmp_history[cnt - 1];
3386         p->idx = env->insn_idx;
3387         p->prev_idx = env->prev_insn_idx;
3388         p->flags = insn_flags;
3389         cur->jmp_history_cnt = cnt;
3390         env->cur_hist_ent = p;
3391 
3392         return 0;
3393 }
3394 
3395 static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st,
3396                                                         u32 hist_end, int insn_idx)
3397 {
3398         if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx)
3399                 return &st->jmp_history[hist_end - 1];
3400         return NULL;
3401 }
3402 
3403 /* Backtrack one insn at a time. If idx is not at the top of recorded
3404  * history then previous instruction came from straight line execution.
3405  * Return -ENOENT if we exhausted all instructions within given state.
3406  *
3407  * It's legal to have a bit of a looping with the same starting and ending
3408  * insn index within the same state, e.g.: 3->4->5->3, so just because current
3409  * instruction index is the same as state's first_idx doesn't mean we are
3410  * done. If there is still some jump history left, we should keep going. We
3411  * need to take into account that we might have a jump history between given
3412  * state's parent and itself, due to checkpointing. In this case, we'll have
3413  * history entry recording a jump from last instruction of parent state and
3414  * first instruction of given state.
3415  */
3416 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
3417                              u32 *history)
3418 {
3419         u32 cnt = *history;
3420 
3421         if (i == st->first_insn_idx) {
3422                 if (cnt == 0)
3423                         return -ENOENT;
3424                 if (cnt == 1 && st->jmp_history[0].idx == i)
3425                         return -ENOENT;
3426         }
3427 
3428         if (cnt && st->jmp_history[cnt - 1].idx == i) {
3429                 i = st->jmp_history[cnt - 1].prev_idx;
3430                 (*history)--;
3431         } else {
3432                 i--;
3433         }
3434         return i;
3435 }
3436 
3437 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
3438 {
3439         const struct btf_type *func;
3440         struct btf *desc_btf;
3441 
3442         if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3443                 return NULL;
3444 
3445         desc_btf = find_kfunc_desc_btf(data, insn->off);
3446         if (IS_ERR(desc_btf))
3447                 return "<error>";
3448 
3449         func = btf_type_by_id(desc_btf, insn->imm);
3450         return btf_name_by_offset(desc_btf, func->name_off);
3451 }
3452 
3453 static inline void bt_init(struct backtrack_state *bt, u32 frame)
3454 {
3455         bt->frame = frame;
3456 }
3457 
3458 static inline void bt_reset(struct backtrack_state *bt)
3459 {
3460         struct bpf_verifier_env *env = bt->env;
3461 
3462         memset(bt, 0, sizeof(*bt));
3463         bt->env = env;
3464 }
3465 
3466 static inline u32 bt_empty(struct backtrack_state *bt)
3467 {
3468         u64 mask = 0;
3469         int i;
3470 
3471         for (i = 0; i <= bt->frame; i++)
3472                 mask |= bt->reg_masks[i] | bt->stack_masks[i];
3473 
3474         return mask == 0;
3475 }
3476 
3477 static inline int bt_subprog_enter(struct backtrack_state *bt)
3478 {
3479         if (bt->frame == MAX_CALL_FRAMES - 1) {
3480                 verbose(bt->env, "BUG subprog enter from frame %d\n", bt->frame);
3481                 WARN_ONCE(1, "verifier backtracking bug");
3482                 return -EFAULT;
3483         }
3484         bt->frame++;
3485         return 0;
3486 }
3487 
3488 static inline int bt_subprog_exit(struct backtrack_state *bt)
3489 {
3490         if (bt->frame == 0) {
3491                 verbose(bt->env, "BUG subprog exit from frame 0\n");
3492                 WARN_ONCE(1, "verifier backtracking bug");
3493                 return -EFAULT;
3494         }
3495         bt->frame--;
3496         return 0;
3497 }
3498 
3499 static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
3500 {
3501         bt->reg_masks[frame] |= 1 << reg;
3502 }
3503 
3504 static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
3505 {
3506         bt->reg_masks[frame] &= ~(1 << reg);
3507 }
3508 
3509 static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
3510 {
3511         bt_set_frame_reg(bt, bt->frame, reg);
3512 }
3513 
3514 static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
3515 {
3516         bt_clear_frame_reg(bt, bt->frame, reg);
3517 }
3518 
3519 static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
3520 {
3521         bt->stack_masks[frame] |= 1ull << slot;
3522 }
3523 
3524 static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
3525 {
3526         bt->stack_masks[frame] &= ~(1ull << slot);
3527 }
3528 
3529 static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
3530 {
3531         return bt->reg_masks[frame];
3532 }
3533 
3534 static inline u32 bt_reg_mask(struct backtrack_state *bt)
3535 {
3536         return bt->reg_masks[bt->frame];
3537 }
3538 
3539 static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
3540 {
3541         return bt->stack_masks[frame];
3542 }
3543 
3544 static inline u64 bt_stack_mask(struct backtrack_state *bt)
3545 {
3546         return bt->stack_masks[bt->frame];
3547 }
3548 
3549 static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
3550 {
3551         return bt->reg_masks[bt->frame] & (1 << reg);
3552 }
3553 
3554 static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
3555 {
3556         return bt->stack_masks[frame] & (1ull << slot);
3557 }
3558 
3559 /* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
3560 static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
3561 {
3562         DECLARE_BITMAP(mask, 64);
3563         bool first = true;
3564         int i, n;
3565 
3566         buf[0] = '\0';
3567 
3568         bitmap_from_u64(mask, reg_mask);
3569         for_each_set_bit(i, mask, 32) {
3570                 n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
3571                 first = false;
3572                 buf += n;
3573                 buf_sz -= n;
3574                 if (buf_sz < 0)
3575                         break;
3576         }
3577 }
3578 /* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
3579 static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
3580 {
3581         DECLARE_BITMAP(mask, 64);
3582         bool first = true;
3583         int i, n;
3584 
3585         buf[0] = '\0';
3586 
3587         bitmap_from_u64(mask, stack_mask);
3588         for_each_set_bit(i, mask, 64) {
3589                 n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
3590                 first = false;
3591                 buf += n;
3592                 buf_sz -= n;
3593                 if (buf_sz < 0)
3594                         break;
3595         }
3596 }
3597 
3598 static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
3599 
3600 /* For given verifier state backtrack_insn() is called from the last insn to
3601  * the first insn. Its purpose is to compute a bitmask of registers and
3602  * stack slots that needs precision in the parent verifier state.
3603  *
3604  * @idx is an index of the instruction we are currently processing;
3605  * @subseq_idx is an index of the subsequent instruction that:
3606  *   - *would be* executed next, if jump history is viewed in forward order;
3607  *   - *was* processed previously during backtracking.
3608  */
3609 static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
3610                           struct bpf_jmp_history_entry *hist, struct backtrack_state *bt)
3611 {
3612         const struct bpf_insn_cbs cbs = {
3613                 .cb_call        = disasm_kfunc_name,
3614                 .cb_print       = verbose,
3615                 .private_data   = env,
3616         };
3617         struct bpf_insn *insn = env->prog->insnsi + idx;
3618         u8 class = BPF_CLASS(insn->code);
3619         u8 opcode = BPF_OP(insn->code);
3620         u8 mode = BPF_MODE(insn->code);
3621         u32 dreg = insn->dst_reg;
3622         u32 sreg = insn->src_reg;
3623         u32 spi, i, fr;
3624 
3625         if (insn->code == 0)
3626                 return 0;
3627         if (env->log.level & BPF_LOG_LEVEL2) {
3628                 fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
3629                 verbose(env, "mark_precise: frame%d: regs=%s ",
3630                         bt->frame, env->tmp_str_buf);
3631                 fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
3632                 verbose(env, "stack=%s before ", env->tmp_str_buf);
3633                 verbose(env, "%d: ", idx);
3634                 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
3635         }
3636 
3637         if (class == BPF_ALU || class == BPF_ALU64) {
3638                 if (!bt_is_reg_set(bt, dreg))
3639                         return 0;
3640                 if (opcode == BPF_END || opcode == BPF_NEG) {
3641                         /* sreg is reserved and unused
3642                          * dreg still need precision before this insn
3643                          */
3644                         return 0;
3645                 } else if (opcode == BPF_MOV) {
3646                         if (BPF_SRC(insn->code) == BPF_X) {
3647                                 /* dreg = sreg or dreg = (s8, s16, s32)sreg
3648                                  * dreg needs precision after this insn
3649                                  * sreg needs precision before this insn
3650                                  */
3651                                 bt_clear_reg(bt, dreg);
3652                                 if (sreg != BPF_REG_FP)
3653                                         bt_set_reg(bt, sreg);
3654                         } else {
3655                                 /* dreg = K
3656                                  * dreg needs precision after this insn.
3657                                  * Corresponding register is already marked
3658                                  * as precise=true in this verifier state.
3659                                  * No further markings in parent are necessary
3660                                  */
3661                                 bt_clear_reg(bt, dreg);
3662                         }
3663                 } else {
3664                         if (BPF_SRC(insn->code) == BPF_X) {
3665                                 /* dreg += sreg
3666                                  * both dreg and sreg need precision
3667                                  * before this insn
3668                                  */
3669                                 if (sreg != BPF_REG_FP)
3670                                         bt_set_reg(bt, sreg);
3671                         } /* else dreg += K
3672                            * dreg still needs precision before this insn
3673                            */
3674                 }
3675         } else if (class == BPF_LDX) {
3676                 if (!bt_is_reg_set(bt, dreg))
3677                         return 0;
3678                 bt_clear_reg(bt, dreg);
3679 
3680                 /* scalars can only be spilled into stack w/o losing precision.
3681                  * Load from any other memory can be zero extended.
3682                  * The desire to keep that precision is already indicated
3683                  * by 'precise' mark in corresponding register of this state.
3684                  * No further tracking necessary.
3685                  */
3686                 if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
3687                         return 0;
3688                 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
3689                  * that [fp - off] slot contains scalar that needs to be
3690                  * tracked with precision
3691                  */
3692                 spi = insn_stack_access_spi(hist->flags);
3693                 fr = insn_stack_access_frameno(hist->flags);
3694                 bt_set_frame_slot(bt, fr, spi);
3695         } else if (class == BPF_STX || class == BPF_ST) {
3696                 if (bt_is_reg_set(bt, dreg))
3697                         /* stx & st shouldn't be using _scalar_ dst_reg
3698                          * to access memory. It means backtracking
3699                          * encountered a case of pointer subtraction.
3700                          */
3701                         return -ENOTSUPP;
3702                 /* scalars can only be spilled into stack */
3703                 if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
3704                         return 0;
3705                 spi = insn_stack_access_spi(hist->flags);
3706                 fr = insn_stack_access_frameno(hist->flags);
3707                 if (!bt_is_frame_slot_set(bt, fr, spi))
3708                         return 0;
3709                 bt_clear_frame_slot(bt, fr, spi);
3710                 if (class == BPF_STX)
3711                         bt_set_reg(bt, sreg);
3712         } else if (class == BPF_JMP || class == BPF_JMP32) {
3713                 if (bpf_pseudo_call(insn)) {
3714                         int subprog_insn_idx, subprog;
3715 
3716                         subprog_insn_idx = idx + insn->imm + 1;
3717                         subprog = find_subprog(env, subprog_insn_idx);
3718                         if (subprog < 0)
3719                                 return -EFAULT;
3720 
3721                         if (subprog_is_global(env, subprog)) {
3722                                 /* check that jump history doesn't have any
3723                                  * extra instructions from subprog; the next
3724                                  * instruction after call to global subprog
3725                                  * should be literally next instruction in
3726                                  * caller program
3727                                  */
3728                                 WARN_ONCE(idx + 1 != subseq_idx, "verifier backtracking bug");
3729                                 /* r1-r5 are invalidated after subprog call,
3730                                  * so for global func call it shouldn't be set
3731                                  * anymore
3732                                  */
3733                                 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3734                                         verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3735                                         WARN_ONCE(1, "verifier backtracking bug");
3736                                         return -EFAULT;
3737                                 }
3738                                 /* global subprog always sets R0 */
3739                                 bt_clear_reg(bt, BPF_REG_0);
3740                                 return 0;
3741                         } else {
3742                                 /* static subprog call instruction, which
3743                                  * means that we are exiting current subprog,
3744                                  * so only r1-r5 could be still requested as
3745                                  * precise, r0 and r6-r10 or any stack slot in
3746                                  * the current frame should be zero by now
3747                                  */
3748                                 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
3749                                         verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3750                                         WARN_ONCE(1, "verifier backtracking bug");
3751                                         return -EFAULT;
3752                                 }
3753                                 /* we are now tracking register spills correctly,
3754                                  * so any instance of leftover slots is a bug
3755                                  */
3756                                 if (bt_stack_mask(bt) != 0) {
3757                                         verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
3758                                         WARN_ONCE(1, "verifier backtracking bug (subprog leftover stack slots)");
3759                                         return -EFAULT;
3760                                 }
3761                                 /* propagate r1-r5 to the caller */
3762                                 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
3763                                         if (bt_is_reg_set(bt, i)) {
3764                                                 bt_clear_reg(bt, i);
3765                                                 bt_set_frame_reg(bt, bt->frame - 1, i);
3766                                         }
3767                                 }
3768                                 if (bt_subprog_exit(bt))
3769                                         return -EFAULT;
3770                                 return 0;
3771                         }
3772                 } else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
3773                         /* exit from callback subprog to callback-calling helper or
3774                          * kfunc call. Use idx/subseq_idx check to discern it from
3775                          * straight line code backtracking.
3776                          * Unlike the subprog call handling above, we shouldn't
3777                          * propagate precision of r1-r5 (if any requested), as they are
3778                          * not actually arguments passed directly to callback subprogs
3779                          */
3780                         if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
3781                                 verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3782                                 WARN_ONCE(1, "verifier backtracking bug");
3783                                 return -EFAULT;
3784                         }
3785                         if (bt_stack_mask(bt) != 0) {
3786                                 verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
3787                                 WARN_ONCE(1, "verifier backtracking bug (callback leftover stack slots)");
3788                                 return -EFAULT;
3789                         }
3790                         /* clear r1-r5 in callback subprog's mask */
3791                         for (i = BPF_REG_1; i <= BPF_REG_5; i++)
3792                                 bt_clear_reg(bt, i);
3793                         if (bt_subprog_exit(bt))
3794                                 return -EFAULT;
3795                         return 0;
3796                 } else if (opcode == BPF_CALL) {
3797                         /* kfunc with imm==0 is invalid and fixup_kfunc_call will
3798                          * catch this error later. Make backtracking conservative
3799                          * with ENOTSUPP.
3800                          */
3801                         if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
3802                                 return -ENOTSUPP;
3803                         /* regular helper call sets R0 */
3804                         bt_clear_reg(bt, BPF_REG_0);
3805                         if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3806                                 /* if backtracing was looking for registers R1-R5
3807                                  * they should have been found already.
3808                                  */
3809                                 verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3810                                 WARN_ONCE(1, "verifier backtracking bug");
3811                                 return -EFAULT;
3812                         }
3813                 } else if (opcode == BPF_EXIT) {
3814                         bool r0_precise;
3815 
3816                         /* Backtracking to a nested function call, 'idx' is a part of
3817                          * the inner frame 'subseq_idx' is a part of the outer frame.
3818                          * In case of a regular function call, instructions giving
3819                          * precision to registers R1-R5 should have been found already.
3820                          * In case of a callback, it is ok to have R1-R5 marked for
3821                          * backtracking, as these registers are set by the function
3822                          * invoking callback.
3823                          */
3824                         if (subseq_idx >= 0 && calls_callback(env, subseq_idx))
3825                                 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
3826                                         bt_clear_reg(bt, i);
3827                         if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3828                                 verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3829                                 WARN_ONCE(1, "verifier backtracking bug");
3830                                 return -EFAULT;
3831                         }
3832 
3833                         /* BPF_EXIT in subprog or callback always returns
3834                          * right after the call instruction, so by checking
3835                          * whether the instruction at subseq_idx-1 is subprog
3836                          * call or not we can distinguish actual exit from
3837                          * *subprog* from exit from *callback*. In the former
3838                          * case, we need to propagate r0 precision, if
3839                          * necessary. In the former we never do that.
3840                          */
3841                         r0_precise = subseq_idx - 1 >= 0 &&
3842                                      bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) &&
3843                                      bt_is_reg_set(bt, BPF_REG_0);
3844 
3845                         bt_clear_reg(bt, BPF_REG_0);
3846                         if (bt_subprog_enter(bt))
3847                                 return -EFAULT;
3848 
3849                         if (r0_precise)
3850                                 bt_set_reg(bt, BPF_REG_0);
3851                         /* r6-r9 and stack slots will stay set in caller frame
3852                          * bitmasks until we return back from callee(s)
3853                          */
3854                         return 0;
3855                 } else if (BPF_SRC(insn->code) == BPF_X) {
3856                         if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
3857                                 return 0;
3858                         /* dreg <cond> sreg
3859                          * Both dreg and sreg need precision before
3860                          * this insn. If only sreg was marked precise
3861                          * before it would be equally necessary to
3862                          * propagate it to dreg.
3863                          */
3864                         bt_set_reg(bt, dreg);
3865                         bt_set_reg(bt, sreg);
3866                          /* else dreg <cond> K
3867                           * Only dreg still needs precision before
3868                           * this insn, so for the K-based conditional
3869                           * there is nothing new to be marked.
3870                           */
3871                 }
3872         } else if (class == BPF_LD) {
3873                 if (!bt_is_reg_set(bt, dreg))
3874                         return 0;
3875                 bt_clear_reg(bt, dreg);
3876                 /* It's ld_imm64 or ld_abs or ld_ind.
3877                  * For ld_imm64 no further tracking of precision
3878                  * into parent is necessary
3879                  */
3880                 if (mode == BPF_IND || mode == BPF_ABS)
3881                         /* to be analyzed */
3882                         return -ENOTSUPP;
3883         }
3884         return 0;
3885 }
3886 
3887 /* the scalar precision tracking algorithm:
3888  * . at the start all registers have precise=false.
3889  * . scalar ranges are tracked as normal through alu and jmp insns.
3890  * . once precise value of the scalar register is used in:
3891  *   .  ptr + scalar alu
3892  *   . if (scalar cond K|scalar)
3893  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
3894  *   backtrack through the verifier states and mark all registers and
3895  *   stack slots with spilled constants that these scalar regisers
3896  *   should be precise.
3897  * . during state pruning two registers (or spilled stack slots)
3898  *   are equivalent if both are not precise.
3899  *
3900  * Note the verifier cannot simply walk register parentage chain,
3901  * since many different registers and stack slots could have been
3902  * used to compute single precise scalar.
3903  *
3904  * The approach of starting with precise=true for all registers and then
3905  * backtrack to mark a register as not precise when the verifier detects
3906  * that program doesn't care about specific value (e.g., when helper
3907  * takes register as ARG_ANYTHING parameter) is not safe.
3908  *
3909  * It's ok to walk single parentage chain of the verifier states.
3910  * It's possible that this backtracking will go all the way till 1st insn.
3911  * All other branches will be explored for needing precision later.
3912  *
3913  * The backtracking needs to deal with cases like:
3914  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
3915  * r9 -= r8
3916  * r5 = r9
3917  * if r5 > 0x79f goto pc+7
3918  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
3919  * r5 += 1
3920  * ...
3921  * call bpf_perf_event_output#25
3922  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
3923  *
3924  * and this case:
3925  * r6 = 1
3926  * call foo // uses callee's r6 inside to compute r0
3927  * r0 += r6
3928  * if r0 == 0 goto
3929  *
3930  * to track above reg_mask/stack_mask needs to be independent for each frame.
3931  *
3932  * Also if parent's curframe > frame where backtracking started,
3933  * the verifier need to mark registers in both frames, otherwise callees
3934  * may incorrectly prune callers. This is similar to
3935  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
3936  *
3937  * For now backtracking falls back into conservative marking.
3938  */
3939 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
3940                                      struct bpf_verifier_state *st)
3941 {
3942         struct bpf_func_state *func;
3943         struct bpf_reg_state *reg;
3944         int i, j;
3945 
3946         if (env->log.level & BPF_LOG_LEVEL2) {
3947                 verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
3948                         st->curframe);
3949         }
3950 
3951         /* big hammer: mark all scalars precise in this path.
3952          * pop_stack may still get !precise scalars.
3953          * We also skip current state and go straight to first parent state,
3954          * because precision markings in current non-checkpointed state are
3955          * not needed. See why in the comment in __mark_chain_precision below.
3956          */
3957         for (st = st->parent; st; st = st->parent) {
3958                 for (i = 0; i <= st->curframe; i++) {
3959                         func = st->frame[i];
3960                         for (j = 0; j < BPF_REG_FP; j++) {
3961                                 reg = &func->regs[j];
3962                                 if (reg->type != SCALAR_VALUE || reg->precise)
3963                                         continue;
3964                                 reg->precise = true;
3965                                 if (env->log.level & BPF_LOG_LEVEL2) {
3966                                         verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
3967                                                 i, j);
3968                                 }
3969                         }
3970                         for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
3971                                 if (!is_spilled_reg(&func->stack[j]))
3972                                         continue;
3973                                 reg = &func->stack[j].spilled_ptr;
3974                                 if (reg->type != SCALAR_VALUE || reg->precise)
3975                                         continue;
3976                                 reg->precise = true;
3977                                 if (env->log.level & BPF_LOG_LEVEL2) {
3978                                         verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
3979                                                 i, -(j + 1) * 8);
3980                                 }
3981                         }
3982                 }
3983         }
3984 }
3985 
3986 static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
3987 {
3988         struct bpf_func_state *func;
3989         struct bpf_reg_state *reg;
3990         int i, j;
3991 
3992         for (i = 0; i <= st->curframe; i++) {
3993                 func = st->frame[i];
3994                 for (j = 0; j < BPF_REG_FP; j++) {
3995                         reg = &func->regs[j];
3996                         if (reg->type != SCALAR_VALUE)
3997                                 continue;
3998                         reg->precise = false;
3999                 }
4000                 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
4001                         if (!is_spilled_reg(&func->stack[j]))
4002                                 continue;
4003                         reg = &func->stack[j].spilled_ptr;
4004                         if (reg->type != SCALAR_VALUE)
4005                                 continue;
4006                         reg->precise = false;
4007                 }
4008         }
4009 }
4010 
4011 static bool idset_contains(struct bpf_idset *s, u32 id)
4012 {
4013         u32 i;
4014 
4015         for (i = 0; i < s->count; ++i)
4016                 if (s->ids[i] == (id & ~BPF_ADD_CONST))
4017                         return true;
4018 
4019         return false;
4020 }
4021 
4022 static int idset_push(struct bpf_idset *s, u32 id)
4023 {
4024         if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids)))
4025                 return -EFAULT;
4026         s->ids[s->count++] = id & ~BPF_ADD_CONST;
4027         return 0;
4028 }
4029 
4030 static void idset_reset(struct bpf_idset *s)
4031 {
4032         s->count = 0;
4033 }
4034 
4035 /* Collect a set of IDs for all registers currently marked as precise in env->bt.
4036  * Mark all registers with these IDs as precise.
4037  */
4038 static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
4039 {
4040         struct bpf_idset *precise_ids = &env->idset_scratch;
4041         struct backtrack_state *bt = &env->bt;
4042         struct bpf_func_state *func;
4043         struct bpf_reg_state *reg;
4044         DECLARE_BITMAP(mask, 64);
4045         int i, fr;
4046 
4047         idset_reset(precise_ids);
4048 
4049         for (fr = bt->frame; fr >= 0; fr--) {
4050                 func = st->frame[fr];
4051 
4052                 bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
4053                 for_each_set_bit(i, mask, 32) {
4054                         reg = &func->regs[i];
4055                         if (!reg->id || reg->type != SCALAR_VALUE)
4056                                 continue;
4057                         if (idset_push(precise_ids, reg->id))
4058                                 return -EFAULT;
4059                 }
4060 
4061                 bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
4062                 for_each_set_bit(i, mask, 64) {
4063                         if (i >= func->allocated_stack / BPF_REG_SIZE)
4064                                 break;
4065                         if (!is_spilled_scalar_reg(&func->stack[i]))
4066                                 continue;
4067                         reg = &func->stack[i].spilled_ptr;
4068                         if (!reg->id)
4069                                 continue;
4070                         if (idset_push(precise_ids, reg->id))
4071                                 return -EFAULT;
4072                 }
4073         }
4074 
4075         for (fr = 0; fr <= st->curframe; ++fr) {
4076                 func = st->frame[fr];
4077 
4078                 for (i = BPF_REG_0; i < BPF_REG_10; ++i) {
4079                         reg = &func->regs[i];
4080                         if (!reg->id)
4081                                 continue;
4082                         if (!idset_contains(precise_ids, reg->id))
4083                                 continue;
4084                         bt_set_frame_reg(bt, fr, i);
4085                 }
4086                 for (i = 0; i < func->allocated_stack / BPF_REG_SIZE; ++i) {
4087                         if (!is_spilled_scalar_reg(&func->stack[i]))
4088                                 continue;
4089                         reg = &func->stack[i].spilled_ptr;
4090                         if (!reg->id)
4091                                 continue;
4092                         if (!idset_contains(precise_ids, reg->id))
4093                                 continue;
4094                         bt_set_frame_slot(bt, fr, i);
4095                 }
4096         }
4097 
4098         return 0;
4099 }
4100 
4101 /*
4102  * __mark_chain_precision() backtracks BPF program instruction sequence and
4103  * chain of verifier states making sure that register *regno* (if regno >= 0)
4104  * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
4105  * SCALARS, as well as any other registers and slots that contribute to
4106  * a tracked state of given registers/stack slots, depending on specific BPF
4107  * assembly instructions (see backtrack_insns() for exact instruction handling
4108  * logic). This backtracking relies on recorded jmp_history and is able to
4109  * traverse entire chain of parent states. This process ends only when all the
4110  * necessary registers/slots and their transitive dependencies are marked as
4111  * precise.
4112  *
4113  * One important and subtle aspect is that precise marks *do not matter* in
4114  * the currently verified state (current state). It is important to understand
4115  * why this is the case.
4116  *
4117  * First, note that current state is the state that is not yet "checkpointed",
4118  * i.e., it is not yet put into env->explored_states, and it has no children
4119  * states as well. It's ephemeral, and can end up either a) being discarded if
4120  * compatible explored state is found at some point or BPF_EXIT instruction is
4121  * reached or b) checkpointed and put into env->explored_states, branching out
4122  * into one or more children states.
4123  *
4124  * In the former case, precise markings in current state are completely
4125  * ignored by state comparison code (see regsafe() for details). Only
4126  * checkpointed ("old") state precise markings are important, and if old
4127  * state's register/slot is precise, regsafe() assumes current state's
4128  * register/slot as precise and checks value ranges exactly and precisely. If
4129  * states turn out to be compatible, current state's necessary precise
4130  * markings and any required parent states' precise markings are enforced
4131  * after the fact with propagate_precision() logic, after the fact. But it's
4132  * important to realize that in this case, even after marking current state
4133  * registers/slots as precise, we immediately discard current state. So what
4134  * actually matters is any of the precise markings propagated into current
4135  * state's parent states, which are always checkpointed (due to b) case above).
4136  * As such, for scenario a) it doesn't matter if current state has precise
4137  * markings set or not.
4138  *
4139  * Now, for the scenario b), checkpointing and forking into child(ren)
4140  * state(s). Note that before current state gets to checkpointing step, any
4141  * processed instruction always assumes precise SCALAR register/slot
4142  * knowledge: if precise value or range is useful to prune jump branch, BPF
4143  * verifier takes this opportunity enthusiastically. Similarly, when
4144  * register's value is used to calculate offset or memory address, exact
4145  * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
4146  * what we mentioned above about state comparison ignoring precise markings
4147  * during state comparison, BPF verifier ignores and also assumes precise
4148  * markings *at will* during instruction verification process. But as verifier
4149  * assumes precision, it also propagates any precision dependencies across
4150  * parent states, which are not yet finalized, so can be further restricted
4151  * based on new knowledge gained from restrictions enforced by their children
4152  * states. This is so that once those parent states are finalized, i.e., when
4153  * they have no more active children state, state comparison logic in
4154  * is_state_visited() would enforce strict and precise SCALAR ranges, if
4155  * required for correctness.
4156  *
4157  * To build a bit more intuition, note also that once a state is checkpointed,
4158  * the path we took to get to that state is not important. This is crucial
4159  * property for state pruning. When state is checkpointed and finalized at
4160  * some instruction index, it can be correctly and safely used to "short
4161  * circuit" any *compatible* state that reaches exactly the same instruction
4162  * index. I.e., if we jumped to that instruction from a completely different
4163  * code path than original finalized state was derived from, it doesn't
4164  * matter, current state can be discarded because from that instruction
4165  * forward having a compatible state will ensure we will safely reach the
4166  * exit. States describe preconditions for further exploration, but completely
4167  * forget the history of how we got here.
4168  *
4169  * This also means that even if we needed precise SCALAR range to get to
4170  * finalized state, but from that point forward *that same* SCALAR register is
4171  * never used in a precise context (i.e., it's precise value is not needed for
4172  * correctness), it's correct and safe to mark such register as "imprecise"
4173  * (i.e., precise marking set to false). This is what we rely on when we do
4174  * not set precise marking in current state. If no child state requires
4175  * precision for any given SCALAR register, it's safe to dictate that it can
4176  * be imprecise. If any child state does require this register to be precise,
4177  * we'll mark it precise later retroactively during precise markings
4178  * propagation from child state to parent states.
4179  *
4180  * Skipping precise marking setting in current state is a mild version of
4181  * relying on the above observation. But we can utilize this property even
4182  * more aggressively by proactively forgetting any precise marking in the
4183  * current state (which we inherited from the parent state), right before we
4184  * checkpoint it and branch off into new child state. This is done by
4185  * mark_all_scalars_imprecise() to hopefully get more permissive and generic
4186  * finalized states which help in short circuiting more future states.
4187  */
4188 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
4189 {
4190         struct backtrack_state *bt = &env->bt;
4191         struct bpf_verifier_state *st = env->cur_state;
4192         int first_idx = st->first_insn_idx;
4193         int last_idx = env->insn_idx;
4194         int subseq_idx = -1;
4195         struct bpf_func_state *func;
4196         struct bpf_reg_state *reg;
4197         bool skip_first = true;
4198         int i, fr, err;
4199 
4200         if (!env->bpf_capable)
4201                 return 0;
4202 
4203         /* set frame number from which we are starting to backtrack */
4204         bt_init(bt, env->cur_state->curframe);
4205 
4206         /* Do sanity checks against current state of register and/or stack
4207          * slot, but don't set precise flag in current state, as precision
4208          * tracking in the current state is unnecessary.
4209          */
4210         func = st->frame[bt->frame];
4211         if (regno >= 0) {
4212                 reg = &func->regs[regno];
4213                 if (reg->type != SCALAR_VALUE) {
4214                         WARN_ONCE(1, "backtracing misuse");
4215                         return -EFAULT;
4216                 }
4217                 bt_set_reg(bt, regno);
4218         }
4219 
4220         if (bt_empty(bt))
4221                 return 0;
4222 
4223         for (;;) {
4224                 DECLARE_BITMAP(mask, 64);
4225                 u32 history = st->jmp_history_cnt;
4226                 struct bpf_jmp_history_entry *hist;
4227 
4228                 if (env->log.level & BPF_LOG_LEVEL2) {
4229                         verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
4230                                 bt->frame, last_idx, first_idx, subseq_idx);
4231                 }
4232 
4233                 /* If some register with scalar ID is marked as precise,
4234                  * make sure that all registers sharing this ID are also precise.
4235                  * This is needed to estimate effect of find_equal_scalars().
4236                  * Do this at the last instruction of each state,
4237                  * bpf_reg_state::id fields are valid for these instructions.
4238                  *
4239                  * Allows to track precision in situation like below:
4240                  *
4241                  *     r2 = unknown value
4242                  *     ...
4243                  *   --- state #0 ---
4244                  *     ...
4245                  *     r1 = r2                 // r1 and r2 now share the same ID
4246                  *     ...
4247                  *   --- state #1 {r1.id = A, r2.id = A} ---
4248                  *     ...
4249                  *     if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1
4250                  *     ...
4251                  *   --- state #2 {r1.id = A, r2.id = A} ---
4252                  *     r3 = r10
4253                  *     r3 += r1                // need to mark both r1 and r2
4254                  */
4255                 if (mark_precise_scalar_ids(env, st))
4256                         return -EFAULT;
4257 
4258                 if (last_idx < 0) {
4259                         /* we are at the entry into subprog, which
4260                          * is expected for global funcs, but only if
4261                          * requested precise registers are R1-R5
4262                          * (which are global func's input arguments)
4263                          */
4264                         if (st->curframe == 0 &&
4265                             st->frame[0]->subprogno > 0 &&
4266                             st->frame[0]->callsite == BPF_MAIN_FUNC &&
4267                             bt_stack_mask(bt) == 0 &&
4268                             (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
4269                                 bitmap_from_u64(mask, bt_reg_mask(bt));
4270                                 for_each_set_bit(i, mask, 32) {
4271                                         reg = &st->frame[0]->regs[i];
4272                                         bt_clear_reg(bt, i);
4273                                         if (reg->type == SCALAR_VALUE)
4274                                                 reg->precise = true;
4275                                 }
4276                                 return 0;
4277                         }
4278 
4279                         verbose(env, "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n",
4280                                 st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
4281                         WARN_ONCE(1, "verifier backtracking bug");
4282                         return -EFAULT;
4283                 }
4284 
4285                 for (i = last_idx;;) {
4286                         if (skip_first) {
4287                                 err = 0;
4288                                 skip_first = false;
4289                         } else {
4290                                 hist = get_jmp_hist_entry(st, history, i);
4291                                 err = backtrack_insn(env, i, subseq_idx, hist, bt);
4292                         }
4293                         if (err == -ENOTSUPP) {
4294                                 mark_all_scalars_precise(env, env->cur_state);
4295                                 bt_reset(bt);
4296                                 return 0;
4297                         } else if (err) {
4298                                 return err;
4299                         }
4300                         if (bt_empty(bt))
4301                                 /* Found assignment(s) into tracked register in this state.
4302                                  * Since this state is already marked, just return.
4303                                  * Nothing to be tracked further in the parent state.
4304                                  */
4305                                 return 0;
4306                         subseq_idx = i;
4307                         i = get_prev_insn_idx(st, i, &history);
4308                         if (i == -ENOENT)
4309                                 break;
4310                         if (i >= env->prog->len) {
4311                                 /* This can happen if backtracking reached insn 0
4312                                  * and there are still reg_mask or stack_mask
4313                                  * to backtrack.
4314                                  * It means the backtracking missed the spot where
4315                                  * particular register was initialized with a constant.
4316                                  */
4317                                 verbose(env, "BUG backtracking idx %d\n", i);
4318                                 WARN_ONCE(1, "verifier backtracking bug");
4319                                 return -EFAULT;
4320                         }
4321                 }
4322                 st = st->parent;
4323                 if (!st)
4324                         break;
4325 
4326                 for (fr = bt->frame; fr >= 0; fr--) {
4327                         func = st->frame[fr];
4328                         bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
4329                         for_each_set_bit(i, mask, 32) {
4330                                 reg = &func->regs[i];
4331                                 if (reg->type != SCALAR_VALUE) {
4332                                         bt_clear_frame_reg(bt, fr, i);
4333                                         continue;
4334                                 }
4335                                 if (reg->precise)
4336                                         bt_clear_frame_reg(bt, fr, i);
4337                                 else
4338                                         reg->precise = true;
4339                         }
4340 
4341                         bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
4342                         for_each_set_bit(i, mask, 64) {
4343                                 if (i >= func->allocated_stack / BPF_REG_SIZE) {
4344                                         verbose(env, "BUG backtracking (stack slot %d, total slots %d)\n",
4345                                                 i, func->allocated_stack / BPF_REG_SIZE);
4346                                         WARN_ONCE(1, "verifier backtracking bug (stack slot out of bounds)");
4347                                         return -EFAULT;
4348                                 }
4349 
4350                                 if (!is_spilled_scalar_reg(&func->stack[i])) {
4351                                         bt_clear_frame_slot(bt, fr, i);
4352                                         continue;
4353                                 }
4354                                 reg = &func->stack[i].spilled_ptr;
4355                                 if (reg->precise)
4356                                         bt_clear_frame_slot(bt, fr, i);
4357                                 else
4358                                         reg->precise = true;
4359                         }
4360                         if (env->log.level & BPF_LOG_LEVEL2) {
4361                                 fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
4362                                              bt_frame_reg_mask(bt, fr));
4363                                 verbose(env, "mark_precise: frame%d: parent state regs=%s ",
4364                                         fr, env->tmp_str_buf);
4365                                 fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
4366                                                bt_frame_stack_mask(bt, fr));
4367                                 verbose(env, "stack=%s: ", env->tmp_str_buf);
4368                                 print_verifier_state(env, func, true);
4369                         }
4370                 }
4371 
4372                 if (bt_empty(bt))
4373                         return 0;
4374 
4375                 subseq_idx = first_idx;
4376                 last_idx = st->last_insn_idx;
4377                 first_idx = st->first_insn_idx;
4378         }
4379 
4380         /* if we still have requested precise regs or slots, we missed
4381          * something (e.g., stack access through non-r10 register), so
4382          * fallback to marking all precise
4383          */
4384         if (!bt_empty(bt)) {
4385                 mark_all_scalars_precise(env, env->cur_state);
4386                 bt_reset(bt);
4387         }
4388 
4389         return 0;
4390 }
4391 
4392 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
4393 {
4394         return __mark_chain_precision(env, regno);
4395 }
4396 
4397 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
4398  * desired reg and stack masks across all relevant frames
4399  */
4400 static int mark_chain_precision_batch(struct bpf_verifier_env *env)
4401 {
4402         return __mark_chain_precision(env, -1);
4403 }
4404 
4405 static bool is_spillable_regtype(enum bpf_reg_type type)
4406 {
4407         switch (base_type(type)) {
4408         case PTR_TO_MAP_VALUE:
4409         case PTR_TO_STACK:
4410         case PTR_TO_CTX:
4411         case PTR_TO_PACKET:
4412         case PTR_TO_PACKET_META:
4413         case PTR_TO_PACKET_END:
4414         case PTR_TO_FLOW_KEYS:
4415         case CONST_PTR_TO_MAP:
4416         case PTR_TO_SOCKET:
4417         case PTR_TO_SOCK_COMMON:
4418         case PTR_TO_TCP_SOCK:
4419         case PTR_TO_XDP_SOCK:
4420         case PTR_TO_BTF_ID:
4421         case PTR_TO_BUF:
4422         case PTR_TO_MEM:
4423         case PTR_TO_FUNC:
4424         case PTR_TO_MAP_KEY:
4425         case PTR_TO_ARENA:
4426                 return true;
4427         default:
4428                 return false;
4429         }
4430 }
4431 
4432 /* Does this register contain a constant zero? */
4433 static bool register_is_null(struct bpf_reg_state *reg)
4434 {
4435         return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
4436 }
4437 
4438 /* check if register is a constant scalar value */
4439 static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
4440 {
4441         return reg->type == SCALAR_VALUE &&
4442                tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off);
4443 }
4444 
4445 /* assuming is_reg_const() is true, return constant value of a register */
4446 static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
4447 {
4448         return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
4449 }
4450 
4451 static bool __is_pointer_value(bool allow_ptr_leaks,
4452                                const struct bpf_reg_state *reg)
4453 {
4454         if (allow_ptr_leaks)
4455                 return false;
4456 
4457         return reg->type != SCALAR_VALUE;
4458 }
4459 
4460 static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
4461                                         struct bpf_reg_state *src_reg)
4462 {
4463         if (src_reg->type != SCALAR_VALUE)
4464                 return;
4465 
4466         if (src_reg->id & BPF_ADD_CONST) {
4467                 /*
4468                  * The verifier is processing rX = rY insn and
4469                  * rY->id has special linked register already.
4470                  * Cleared it, since multiple rX += const are not supported.
4471                  */
4472                 src_reg->id = 0;
4473                 src_reg->off = 0;
4474         }
4475 
4476         if (!src_reg->id && !tnum_is_const(src_reg->var_off))
4477                 /* Ensure that src_reg has a valid ID that will be copied to
4478                  * dst_reg and then will be used by find_equal_scalars() to
4479                  * propagate min/max range.
4480                  */
4481                 src_reg->id = ++env->id_gen;
4482 }
4483 
4484 /* Copy src state preserving dst->parent and dst->live fields */
4485 static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
4486 {
4487         struct bpf_reg_state *parent = dst->parent;
4488         enum bpf_reg_liveness live = dst->live;
4489 
4490         *dst = *src;
4491         dst->parent = parent;
4492         dst->live = live;
4493 }
4494 
4495 static void save_register_state(struct bpf_verifier_env *env,
4496                                 struct bpf_func_state *state,
4497                                 int spi, struct bpf_reg_state *reg,
4498                                 int size)
4499 {
4500         int i;
4501 
4502         copy_register_state(&state->stack[spi].spilled_ptr, reg);
4503         if (size == BPF_REG_SIZE)
4504                 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
4505 
4506         for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
4507                 state->stack[spi].slot_type[i - 1] = STACK_SPILL;
4508 
4509         /* size < 8 bytes spill */
4510         for (; i; i--)
4511                 mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]);
4512 }
4513 
4514 static bool is_bpf_st_mem(struct bpf_insn *insn)
4515 {
4516         return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
4517 }
4518 
4519 static int get_reg_width(struct bpf_reg_state *reg)
4520 {
4521         return fls64(reg->umax_value);
4522 }
4523 
4524 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
4525  * stack boundary and alignment are checked in check_mem_access()
4526  */
4527 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
4528                                        /* stack frame we're writing to */
4529                                        struct bpf_func_state *state,
4530                                        int off, int size, int value_regno,
4531                                        int insn_idx)
4532 {
4533         struct bpf_func_state *cur; /* state of the current function */
4534         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
4535         struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4536         struct bpf_reg_state *reg = NULL;
4537         int insn_flags = insn_stack_access_flags(state->frameno, spi);
4538 
4539         /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
4540          * so it's aligned access and [off, off + size) are within stack limits
4541          */
4542         if (!env->allow_ptr_leaks &&
4543             is_spilled_reg(&state->stack[spi]) &&
4544             size != BPF_REG_SIZE) {
4545                 verbose(env, "attempt to corrupt spilled pointer on stack\n");
4546                 return -EACCES;
4547         }
4548 
4549         cur = env->cur_state->frame[env->cur_state->curframe];
4550         if (value_regno >= 0)
4551                 reg = &cur->regs[value_regno];
4552         if (!env->bypass_spec_v4) {
4553                 bool sanitize = reg && is_spillable_regtype(reg->type);
4554 
4555                 for (i = 0; i < size; i++) {
4556                         u8 type = state->stack[spi].slot_type[i];
4557 
4558                         if (type != STACK_MISC && type != STACK_ZERO) {
4559                                 sanitize = true;
4560                                 break;
4561                         }
4562                 }
4563 
4564                 if (sanitize)
4565                         env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
4566         }
4567 
4568         err = destroy_if_dynptr_stack_slot(env, state, spi);
4569         if (err)
4570                 return err;
4571 
4572         mark_stack_slot_scratched(env, spi);
4573         if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
4574                 bool reg_value_fits;
4575 
4576                 reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size;
4577                 /* Make sure that reg had an ID to build a relation on spill. */
4578                 if (reg_value_fits)
4579                         assign_scalar_id_before_mov(env, reg);
4580                 save_register_state(env, state, spi, reg, size);
4581                 /* Break the relation on a narrowing spill. */
4582                 if (!reg_value_fits)
4583                         state->stack[spi].spilled_ptr.id = 0;
4584         } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
4585                    env->bpf_capable) {
4586                 struct bpf_reg_state *tmp_reg = &env->fake_reg[0];
4587 
4588                 memset(tmp_reg, 0, sizeof(*tmp_reg));
4589                 __mark_reg_known(tmp_reg, insn->imm);
4590                 tmp_reg->type = SCALAR_VALUE;
4591                 save_register_state(env, state, spi, tmp_reg, size);
4592         } else if (reg && is_spillable_regtype(reg->type)) {
4593                 /* register containing pointer is being spilled into stack */
4594                 if (size != BPF_REG_SIZE) {
4595                         verbose_linfo(env, insn_idx, "; ");
4596                         verbose(env, "invalid size of register spill\n");
4597                         return -EACCES;
4598                 }
4599                 if (state != cur && reg->type == PTR_TO_STACK) {
4600                         verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
4601                         return -EINVAL;
4602                 }
4603                 save_register_state(env, state, spi, reg, size);
4604         } else {
4605                 u8 type = STACK_MISC;
4606 
4607                 /* regular write of data into stack destroys any spilled ptr */
4608                 state->stack[spi].spilled_ptr.type = NOT_INIT;
4609                 /* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
4610                 if (is_stack_slot_special(&state->stack[spi]))
4611                         for (i = 0; i < BPF_REG_SIZE; i++)
4612                                 scrub_spilled_slot(&state->stack[spi].slot_type[i]);
4613 
4614                 /* only mark the slot as written if all 8 bytes were written
4615                  * otherwise read propagation may incorrectly stop too soon
4616                  * when stack slots are partially written.
4617                  * This heuristic means that read propagation will be
4618                  * conservative, since it will add reg_live_read marks
4619                  * to stack slots all the way to first state when programs
4620                  * writes+reads less than 8 bytes
4621                  */
4622                 if (size == BPF_REG_SIZE)
4623                         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
4624 
4625                 /* when we zero initialize stack slots mark them as such */
4626                 if ((reg && register_is_null(reg)) ||
4627                     (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
4628                         /* STACK_ZERO case happened because register spill
4629                          * wasn't properly aligned at the stack slot boundary,
4630                          * so it's not a register spill anymore; force
4631                          * originating register to be precise to make
4632                          * STACK_ZERO correct for subsequent states
4633                          */
4634                         err = mark_chain_precision(env, value_regno);
4635                         if (err)
4636                                 return err;
4637                         type = STACK_ZERO;
4638                 }
4639 
4640                 /* Mark slots affected by this stack write. */
4641                 for (i = 0; i < size; i++)
4642                         state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
4643                 insn_flags = 0; /* not a register spill */
4644         }
4645 
4646         if (insn_flags)
4647                 return push_jmp_history(env, env->cur_state, insn_flags);
4648         return 0;
4649 }
4650 
4651 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
4652  * known to contain a variable offset.
4653  * This function checks whether the write is permitted and conservatively
4654  * tracks the effects of the write, considering that each stack slot in the
4655  * dynamic range is potentially written to.
4656  *
4657  * 'off' includes 'regno->off'.
4658  * 'value_regno' can be -1, meaning that an unknown value is being written to
4659  * the stack.
4660  *
4661  * Spilled pointers in range are not marked as written because we don't know
4662  * what's going to be actually written. This means that read propagation for
4663  * future reads cannot be terminated by this write.
4664  *
4665  * For privileged programs, uninitialized stack slots are considered
4666  * initialized by this write (even though we don't know exactly what offsets
4667  * are going to be written to). The idea is that we don't want the verifier to
4668  * reject future reads that access slots written to through variable offsets.
4669  */
4670 static int check_stack_write_var_off(struct bpf_verifier_env *env,
4671                                      /* func where register points to */
4672                                      struct bpf_func_state *state,
4673                                      int ptr_regno, int off, int size,
4674                                      int value_regno, int insn_idx)
4675 {
4676         struct bpf_func_state *cur; /* state of the current function */
4677         int min_off, max_off;
4678         int i, err;
4679         struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
4680         struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4681         bool writing_zero = false;
4682         /* set if the fact that we're writing a zero is used to let any
4683          * stack slots remain STACK_ZERO
4684          */
4685         bool zero_used = false;
4686 
4687         cur = env->cur_state->frame[env->cur_state->curframe];
4688         ptr_reg = &cur->regs[ptr_regno];
4689         min_off = ptr_reg->smin_value + off;
4690         max_off = ptr_reg->smax_value + off + size;
4691         if (value_regno >= 0)
4692                 value_reg = &cur->regs[value_regno];
4693         if ((value_reg && register_is_null(value_reg)) ||
4694             (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
4695                 writing_zero = true;
4696 
4697         for (i = min_off; i < max_off; i++) {
4698                 int spi;
4699 
4700                 spi = __get_spi(i);
4701                 err = destroy_if_dynptr_stack_slot(env, state, spi);
4702                 if (err)
4703                         return err;
4704         }
4705 
4706         /* Variable offset writes destroy any spilled pointers in range. */
4707         for (i = min_off; i < max_off; i++) {
4708                 u8 new_type, *stype;
4709                 int slot, spi;
4710 
4711                 slot = -i - 1;
4712                 spi = slot / BPF_REG_SIZE;
4713                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4714                 mark_stack_slot_scratched(env, spi);
4715 
4716                 if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
4717                         /* Reject the write if range we may write to has not
4718                          * been initialized beforehand. If we didn't reject
4719                          * here, the ptr status would be erased below (even
4720                          * though not all slots are actually overwritten),
4721                          * possibly opening the door to leaks.
4722                          *
4723                          * We do however catch STACK_INVALID case below, and
4724                          * only allow reading possibly uninitialized memory
4725                          * later for CAP_PERFMON, as the write may not happen to
4726                          * that slot.
4727                          */
4728                         verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
4729                                 insn_idx, i);
4730                         return -EINVAL;
4731                 }
4732 
4733                 /* If writing_zero and the spi slot contains a spill of value 0,
4734                  * maintain the spill type.
4735                  */
4736                 if (writing_zero && *stype == STACK_SPILL &&
4737                     is_spilled_scalar_reg(&state->stack[spi])) {
4738                         struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
4739 
4740                         if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
4741                                 zero_used = true;
4742                                 continue;
4743                         }
4744                 }
4745 
4746                 /* Erase all other spilled pointers. */
4747                 state->stack[spi].spilled_ptr.type = NOT_INIT;
4748 
4749                 /* Update the slot type. */
4750                 new_type = STACK_MISC;
4751                 if (writing_zero && *stype == STACK_ZERO) {
4752                         new_type = STACK_ZERO;
4753                         zero_used = true;
4754                 }
4755                 /* If the slot is STACK_INVALID, we check whether it's OK to
4756                  * pretend that it will be initialized by this write. The slot
4757                  * might not actually be written to, and so if we mark it as
4758                  * initialized future reads might leak uninitialized memory.
4759                  * For privileged programs, we will accept such reads to slots
4760                  * that may or may not be written because, if we're reject
4761                  * them, the error would be too confusing.
4762                  */
4763                 if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
4764                         verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
4765                                         insn_idx, i);
4766                         return -EINVAL;
4767                 }
4768                 *stype = new_type;
4769         }
4770         if (zero_used) {
4771                 /* backtracking doesn't work for STACK_ZERO yet. */
4772                 err = mark_chain_precision(env, value_regno);
4773                 if (err)
4774                         return err;
4775         }
4776         return 0;
4777 }
4778 
4779 /* When register 'dst_regno' is assigned some values from stack[min_off,
4780  * max_off), we set the register's type according to the types of the
4781  * respective stack slots. If all the stack values are known to be zeros, then
4782  * so is the destination reg. Otherwise, the register is considered to be
4783  * SCALAR. This function does not deal with register filling; the caller must
4784  * ensure that all spilled registers in the stack range have been marked as
4785  * read.
4786  */
4787 static void mark_reg_stack_read(struct bpf_verifier_env *env,
4788                                 /* func where src register points to */
4789                                 struct bpf_func_state *ptr_state,
4790                                 int min_off, int max_off, int dst_regno)
4791 {
4792         struct bpf_verifier_state *vstate = env->cur_state;
4793         struct bpf_func_state *state = vstate->frame[vstate->curframe];
4794         int i, slot, spi;
4795         u8 *stype;
4796         int zeros = 0;
4797 
4798         for (i = min_off; i < max_off; i++) {
4799                 slot = -i - 1;
4800                 spi = slot / BPF_REG_SIZE;
4801                 mark_stack_slot_scratched(env, spi);
4802                 stype = ptr_state->stack[spi].slot_type;
4803                 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
4804                         break;
4805                 zeros++;
4806         }
4807         if (zeros == max_off - min_off) {
4808                 /* Any access_size read into register is zero extended,
4809                  * so the whole register == const_zero.
4810                  */
4811                 __mark_reg_const_zero(env, &state->regs[dst_regno]);
4812         } else {
4813                 /* have read misc data from the stack */
4814                 mark_reg_unknown(env, state->regs, dst_regno);
4815         }
4816         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
4817 }
4818 
4819 /* Read the stack at 'off' and put the results into the register indicated by
4820  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
4821  * spilled reg.
4822  *
4823  * 'dst_regno' can be -1, meaning that the read value is not going to a
4824  * register.
4825  *
4826  * The access is assumed to be within the current stack bounds.
4827  */
4828 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
4829                                       /* func where src register points to */
4830                                       struct bpf_func_state *reg_state,
4831                                       int off, int size, int dst_regno)
4832 {
4833         struct bpf_verifier_state *vstate = env->cur_state;
4834         struct bpf_func_state *state = vstate->frame[vstate->curframe];
4835         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
4836         struct bpf_reg_state *reg;
4837         u8 *stype, type;
4838         int insn_flags = insn_stack_access_flags(reg_state->frameno, spi);
4839 
4840         stype = reg_state->stack[spi].slot_type;
4841         reg = &reg_state->stack[spi].spilled_ptr;
4842 
4843         mark_stack_slot_scratched(env, spi);
4844 
4845         if (is_spilled_reg(&reg_state->stack[spi])) {
4846                 u8 spill_size = 1;
4847 
4848                 for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
4849                         spill_size++;
4850 
4851                 if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
4852                         if (reg->type != SCALAR_VALUE) {
4853                                 verbose_linfo(env, env->insn_idx, "; ");
4854                                 verbose(env, "invalid size of register fill\n");
4855                                 return -EACCES;
4856                         }
4857 
4858                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
4859                         if (dst_regno < 0)
4860                                 return 0;
4861 
4862                         if (size <= spill_size &&
4863                             bpf_stack_narrow_access_ok(off, size, spill_size)) {
4864                                 /* The earlier check_reg_arg() has decided the
4865                                  * subreg_def for this insn.  Save it first.
4866                                  */
4867                                 s32 subreg_def = state->regs[dst_regno].subreg_def;
4868 
4869                                 copy_register_state(&state->regs[dst_regno], reg);
4870                                 state->regs[dst_regno].subreg_def = subreg_def;
4871 
4872                                 /* Break the relation on a narrowing fill.
4873                                  * coerce_reg_to_size will adjust the boundaries.
4874                                  */
4875                                 if (get_reg_width(reg) > size * BITS_PER_BYTE)
4876                                         state->regs[dst_regno].id = 0;
4877                         } else {
4878                                 int spill_cnt = 0, zero_cnt = 0;
4879 
4880                                 for (i = 0; i < size; i++) {
4881                                         type = stype[(slot - i) % BPF_REG_SIZE];
4882                                         if (type == STACK_SPILL) {
4883                                                 spill_cnt++;
4884                                                 continue;
4885                                         }
4886                                         if (type == STACK_MISC)
4887                                                 continue;
4888                                         if (type == STACK_ZERO) {
4889                                                 zero_cnt++;
4890                                                 continue;
4891                                         }
4892                                         if (type == STACK_INVALID && env->allow_uninit_stack)
4893                                                 continue;
4894                                         verbose(env, "invalid read from stack off %d+%d size %d\n",
4895                                                 off, i, size);
4896                                         return -EACCES;
4897                                 }
4898 
4899                                 if (spill_cnt == size &&
4900                                     tnum_is_const(reg->var_off) && reg->var_off.value == 0) {
4901                                         __mark_reg_const_zero(env, &state->regs[dst_regno]);
4902                                         /* this IS register fill, so keep insn_flags */
4903                                 } else if (zero_cnt == size) {
4904                                         /* similarly to mark_reg_stack_read(), preserve zeroes */
4905                                         __mark_reg_const_zero(env, &state->regs[dst_regno]);
4906                                         insn_flags = 0; /* not restoring original register state */
4907                                 } else {
4908                                         mark_reg_unknown(env, state->regs, dst_regno);
4909                                         insn_flags = 0; /* not restoring original register state */
4910                                 }
4911                         }
4912                         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
4913                 } else if (dst_regno >= 0) {
4914                         /* restore register state from stack */
4915                         copy_register_state(&state->regs[dst_regno], reg);
4916                         /* mark reg as written since spilled pointer state likely
4917                          * has its liveness marks cleared by is_state_visited()
4918                          * which resets stack/reg liveness for state transitions
4919                          */
4920                         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
4921                 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
4922                         /* If dst_regno==-1, the caller is asking us whether
4923                          * it is acceptable to use this value as a SCALAR_VALUE
4924                          * (e.g. for XADD).
4925                          * We must not allow unprivileged callers to do that
4926                          * with spilled pointers.
4927                          */
4928                         verbose(env, "leaking pointer from stack off %d\n",
4929                                 off);
4930                         return -EACCES;
4931                 }
4932                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
4933         } else {
4934                 for (i = 0; i < size; i++) {
4935                         type = stype[(slot - i) % BPF_REG_SIZE];
4936                         if (type == STACK_MISC)
4937                                 continue;
4938                         if (type == STACK_ZERO)
4939                                 continue;
4940                         if (type == STACK_INVALID && env->allow_uninit_stack)
4941                                 continue;
4942                         verbose(env, "invalid read from stack off %d+%d size %d\n",
4943                                 off, i, size);
4944                         return -EACCES;
4945                 }
4946                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
4947                 if (dst_regno >= 0)
4948                         mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
4949                 insn_flags = 0; /* we are not restoring spilled register */
4950         }
4951         if (insn_flags)
4952                 return push_jmp_history(env, env->cur_state, insn_flags);
4953         return 0;
4954 }
4955 
4956 enum bpf_access_src {
4957         ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
4958         ACCESS_HELPER = 2,  /* the access is performed by a helper */
4959 };
4960 
4961 static int check_stack_range_initialized(struct bpf_verifier_env *env,
4962                                          int regno, int off, int access_size,
4963                                          bool zero_size_allowed,
4964                                          enum bpf_access_src type,
4965                                          struct bpf_call_arg_meta *meta);
4966 
4967 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
4968 {
4969         return cur_regs(env) + regno;
4970 }
4971 
4972 /* Read the stack at 'ptr_regno + off' and put the result into the register
4973  * 'dst_regno'.
4974  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
4975  * but not its variable offset.
4976  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
4977  *
4978  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
4979  * filling registers (i.e. reads of spilled register cannot be detected when
4980  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
4981  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
4982  * offset; for a fixed offset check_stack_read_fixed_off should be used
4983  * instead.
4984  */
4985 static int check_stack_read_var_off(struct bpf_verifier_env *env,
4986                                     int ptr_regno, int off, int size, int dst_regno)
4987 {
4988         /* The state of the source register. */
4989         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4990         struct bpf_func_state *ptr_state = func(env, reg);
4991         int err;
4992         int min_off, max_off;
4993 
4994         /* Note that we pass a NULL meta, so raw access will not be permitted.
4995          */
4996         err = check_stack_range_initialized(env, ptr_regno, off, size,
4997                                             false, ACCESS_DIRECT, NULL);
4998         if (err)
4999                 return err;
5000 
5001         min_off = reg->smin_value + off;
5002         max_off = reg->smax_value + off;
5003         mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
5004         return 0;
5005 }
5006 
5007 /* check_stack_read dispatches to check_stack_read_fixed_off or
5008  * check_stack_read_var_off.
5009  *
5010  * The caller must ensure that the offset falls within the allocated stack
5011  * bounds.
5012  *
5013  * 'dst_regno' is a register which will receive the value from the stack. It
5014  * can be -1, meaning that the read value is not going to a register.
5015  */
5016 static int check_stack_read(struct bpf_verifier_env *env,
5017                             int ptr_regno, int off, int size,
5018                             int dst_regno)
5019 {
5020         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
5021         struct bpf_func_state *state = func(env, reg);
5022         int err;
5023         /* Some accesses are only permitted with a static offset. */
5024         bool var_off = !tnum_is_const(reg->var_off);
5025 
5026         /* The offset is required to be static when reads don't go to a
5027          * register, in order to not leak pointers (see
5028          * check_stack_read_fixed_off).
5029          */
5030         if (dst_regno < 0 && var_off) {
5031                 char tn_buf[48];
5032 
5033                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5034                 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
5035                         tn_buf, off, size);
5036                 return -EACCES;
5037         }
5038         /* Variable offset is prohibited for unprivileged mode for simplicity
5039          * since it requires corresponding support in Spectre masking for stack
5040          * ALU. See also retrieve_ptr_limit(). The check in
5041          * check_stack_access_for_ptr_arithmetic() called by
5042          * adjust_ptr_min_max_vals() prevents users from creating stack pointers
5043          * with variable offsets, therefore no check is required here. Further,
5044          * just checking it here would be insufficient as speculative stack
5045          * writes could still lead to unsafe speculative behaviour.
5046          */
5047         if (!var_off) {
5048                 off += reg->var_off.value;
5049                 err = check_stack_read_fixed_off(env, state, off, size,
5050                                                  dst_regno);
5051         } else {
5052                 /* Variable offset stack reads need more conservative handling
5053                  * than fixed offset ones. Note that dst_regno >= 0 on this
5054                  * branch.
5055                  */
5056                 err = check_stack_read_var_off(env, ptr_regno, off, size,
5057                                                dst_regno);
5058         }
5059         return err;
5060 }
5061 
5062 
5063 /* check_stack_write dispatches to check_stack_write_fixed_off or
5064  * check_stack_write_var_off.
5065  *
5066  * 'ptr_regno' is the register used as a pointer into the stack.
5067  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
5068  * 'value_regno' is the register whose value we're writing to the stack. It can
5069  * be -1, meaning that we're not writing from a register.
5070  *
5071  * The caller must ensure that the offset falls within the maximum stack size.
5072  */
5073 static int check_stack_write(struct bpf_verifier_env *env,
5074                              int ptr_regno, int off, int size,
5075                              int value_regno, int insn_idx)
5076 {
5077         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
5078         struct bpf_func_state *state = func(env, reg);
5079         int err;
5080 
5081         if (tnum_is_const(reg->var_off)) {
5082                 off += reg->var_off.value;
5083                 err = check_stack_write_fixed_off(env, state, off, size,
5084                                                   value_regno, insn_idx);
5085         } else {
5086                 /* Variable offset stack reads need more conservative handling
5087                  * than fixed offset ones.
5088                  */
5089                 err = check_stack_write_var_off(env, state,
5090                                                 ptr_regno, off, size,
5091                                                 value_regno, insn_idx);
5092         }
5093         return err;
5094 }
5095 
5096 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
5097                                  int off, int size, enum bpf_access_type type)
5098 {
5099         struct bpf_reg_state *regs = cur_regs(env);
5100         struct bpf_map *map = regs[regno].map_ptr;
5101         u32 cap = bpf_map_flags_to_cap(map);
5102 
5103         if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
5104                 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
5105                         map->value_size, off, size);
5106                 return -EACCES;
5107         }
5108 
5109         if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
5110                 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
5111                         map->value_size, off, size);
5112                 return -EACCES;
5113         }
5114 
5115         return 0;
5116 }
5117 
5118 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
5119 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
5120                               int off, int size, u32 mem_size,
5121                               bool zero_size_allowed)
5122 {
5123         bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
5124         struct bpf_reg_state *reg;
5125 
5126         if (off >= 0 && size_ok && (u64)off + size <= mem_size)
5127                 return 0;
5128 
5129         reg = &cur_regs(env)[regno];
5130         switch (reg->type) {
5131         case PTR_TO_MAP_KEY:
5132                 verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
5133                         mem_size, off, size);
5134                 break;
5135         case PTR_TO_MAP_VALUE:
5136                 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
5137                         mem_size, off, size);
5138                 break;
5139         case PTR_TO_PACKET:
5140         case PTR_TO_PACKET_META:
5141         case PTR_TO_PACKET_END:
5142                 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
5143                         off, size, regno, reg->id, off, mem_size);
5144                 break;
5145         case PTR_TO_MEM:
5146         default:
5147                 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
5148                         mem_size, off, size);
5149         }
5150 
5151         return -EACCES;
5152 }
5153 
5154 /* check read/write into a memory region with possible variable offset */
5155 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
5156                                    int off, int size, u32 mem_size,
5157                                    bool zero_size_allowed)
5158 {
5159         struct bpf_verifier_state *vstate = env->cur_state;
5160         struct bpf_func_state *state = vstate->frame[vstate->curframe];
5161         struct bpf_reg_state *reg = &state->regs[regno];
5162         int err;
5163 
5164         /* We may have adjusted the register pointing to memory region, so we
5165          * need to try adding each of min_value and max_value to off
5166          * to make sure our theoretical access will be safe.
5167          *
5168          * The minimum value is only important with signed
5169          * comparisons where we can't assume the floor of a
5170          * value is 0.  If we are using signed variables for our
5171          * index'es we need to make sure that whatever we use
5172          * will have a set floor within our range.
5173          */
5174         if (reg->smin_value < 0 &&
5175             (reg->smin_value == S64_MIN ||
5176              (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
5177               reg->smin_value + off < 0)) {
5178                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5179                         regno);
5180                 return -EACCES;
5181         }
5182         err = __check_mem_access(env, regno, reg->smin_value + off, size,
5183                                  mem_size, zero_size_allowed);
5184         if (err) {
5185                 verbose(env, "R%d min value is outside of the allowed memory range\n",
5186                         regno);
5187                 return err;
5188         }
5189 
5190         /* If we haven't set a max value then we need to bail since we can't be
5191          * sure we won't do bad things.
5192          * If reg->umax_value + off could overflow, treat that as unbounded too.
5193          */
5194         if (reg->umax_value >= BPF_MAX_VAR_OFF) {
5195                 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
5196                         regno);
5197                 return -EACCES;
5198         }
5199         err = __check_mem_access(env, regno, reg->umax_value + off, size,
5200                                  mem_size, zero_size_allowed);
5201         if (err) {
5202                 verbose(env, "R%d max value is outside of the allowed memory range\n",
5203                         regno);
5204                 return err;
5205         }
5206 
5207         return 0;
5208 }
5209 
5210 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
5211                                const struct bpf_reg_state *reg, int regno,
5212                                bool fixed_off_ok)
5213 {
5214         /* Access to this pointer-typed register or passing it to a helper
5215          * is only allowed in its original, unmodified form.
5216          */
5217 
5218         if (reg->off < 0) {
5219                 verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
5220                         reg_type_str(env, reg->type), regno, reg->off);
5221                 return -EACCES;
5222         }
5223 
5224         if (!fixed_off_ok && reg->off) {
5225                 verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
5226                         reg_type_str(env, reg->type), regno, reg->off);
5227                 return -EACCES;
5228         }
5229 
5230         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5231                 char tn_buf[48];
5232 
5233                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5234                 verbose(env, "variable %s access var_off=%s disallowed\n",
5235                         reg_type_str(env, reg->type), tn_buf);
5236                 return -EACCES;
5237         }
5238 
5239         return 0;
5240 }
5241 
5242 static int check_ptr_off_reg(struct bpf_verifier_env *env,
5243                              const struct bpf_reg_state *reg, int regno)
5244 {
5245         return __check_ptr_off_reg(env, reg, regno, false);
5246 }
5247 
5248 static int map_kptr_match_type(struct bpf_verifier_env *env,
5249                                struct btf_field *kptr_field,
5250                                struct bpf_reg_state *reg, u32 regno)
5251 {
5252         const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
5253         int perm_flags;
5254         const char *reg_name = "";
5255 
5256         if (btf_is_kernel(reg->btf)) {
5257                 perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
5258 
5259                 /* Only unreferenced case accepts untrusted pointers */
5260                 if (kptr_field->type == BPF_KPTR_UNREF)
5261                         perm_flags |= PTR_UNTRUSTED;
5262         } else {
5263                 perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
5264                 if (kptr_field->type == BPF_KPTR_PERCPU)
5265                         perm_flags |= MEM_PERCPU;
5266         }
5267 
5268         if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
5269                 goto bad_type;
5270 
5271         /* We need to verify reg->type and reg->btf, before accessing reg->btf */
5272         reg_name = btf_type_name(reg->btf, reg->btf_id);
5273 
5274         /* For ref_ptr case, release function check should ensure we get one
5275          * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
5276          * normal store of unreferenced kptr, we must ensure var_off is zero.
5277          * Since ref_ptr cannot be accessed directly by BPF insns, checks for
5278          * reg->off and reg->ref_obj_id are not needed here.
5279          */
5280         if (__check_ptr_off_reg(env, reg, regno, true))
5281                 return -EACCES;
5282 
5283         /* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
5284          * we also need to take into account the reg->off.
5285          *
5286          * We want to support cases like:
5287          *
5288          * struct foo {
5289          *         struct bar br;
5290          *         struct baz bz;
5291          * };
5292          *
5293          * struct foo *v;
5294          * v = func();        // PTR_TO_BTF_ID
5295          * val->foo = v;      // reg->off is zero, btf and btf_id match type
5296          * val->bar = &v->br; // reg->off is still zero, but we need to retry with
5297          *                    // first member type of struct after comparison fails
5298          * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
5299          *                    // to match type
5300          *
5301          * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
5302          * is zero. We must also ensure that btf_struct_ids_match does not walk
5303          * the struct to match type against first member of struct, i.e. reject
5304          * second case from above. Hence, when type is BPF_KPTR_REF, we set
5305          * strict mode to true for type match.
5306          */
5307         if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5308                                   kptr_field->kptr.btf, kptr_field->kptr.btf_id,
5309                                   kptr_field->type != BPF_KPTR_UNREF))
5310                 goto bad_type;
5311         return 0;
5312 bad_type:
5313         verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
5314                 reg_type_str(env, reg->type), reg_name);
5315         verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
5316         if (kptr_field->type == BPF_KPTR_UNREF)
5317                 verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
5318                         targ_name);
5319         else
5320                 verbose(env, "\n");
5321         return -EINVAL;
5322 }
5323 
5324 static bool in_sleepable(struct bpf_verifier_env *env)
5325 {
5326         return env->prog->sleepable ||
5327                (env->cur_state && env->cur_state->in_sleepable);
5328 }
5329 
5330 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
5331  * can dereference RCU protected pointers and result is PTR_TRUSTED.
5332  */
5333 static bool in_rcu_cs(struct bpf_verifier_env *env)
5334 {
5335         return env->cur_state->active_rcu_lock ||
5336                env->cur_state->active_lock.ptr ||
5337                !in_sleepable(env);
5338 }
5339 
5340 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
5341 BTF_SET_START(rcu_protected_types)
5342 BTF_ID(struct, prog_test_ref_kfunc)
5343 #ifdef CONFIG_CGROUPS
5344 BTF_ID(struct, cgroup)
5345 #endif
5346 #ifdef CONFIG_BPF_JIT
5347 BTF_ID(struct, bpf_cpumask)
5348 #endif
5349 BTF_ID(struct, task_struct)
5350 BTF_ID(struct, bpf_crypto_ctx)
5351 BTF_SET_END(rcu_protected_types)
5352 
5353 static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
5354 {
5355         if (!btf_is_kernel(btf))
5356                 return true;
5357         return btf_id_set_contains(&rcu_protected_types, btf_id);
5358 }
5359 
5360 static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
5361 {
5362         struct btf_struct_meta *meta;
5363 
5364         if (btf_is_kernel(kptr_field->kptr.btf))
5365                 return NULL;
5366 
5367         meta = btf_find_struct_meta(kptr_field->kptr.btf,
5368                                     kptr_field->kptr.btf_id);
5369 
5370         return meta ? meta->record : NULL;
5371 }
5372 
5373 static bool rcu_safe_kptr(const struct btf_field *field)
5374 {
5375         const struct btf_field_kptr *kptr = &field->kptr;
5376 
5377         return field->type == BPF_KPTR_PERCPU ||
5378                (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
5379 }
5380 
5381 static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
5382 {
5383         struct btf_record *rec;
5384         u32 ret;
5385 
5386         ret = PTR_MAYBE_NULL;
5387         if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
5388                 ret |= MEM_RCU;
5389                 if (kptr_field->type == BPF_KPTR_PERCPU)
5390                         ret |= MEM_PERCPU;
5391                 else if (!btf_is_kernel(kptr_field->kptr.btf))
5392                         ret |= MEM_ALLOC;
5393 
5394                 rec = kptr_pointee_btf_record(kptr_field);
5395                 if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
5396                         ret |= NON_OWN_REF;
5397         } else {
5398                 ret |= PTR_UNTRUSTED;
5399         }
5400 
5401         return ret;
5402 }
5403 
5404 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
5405                                  int value_regno, int insn_idx,
5406                                  struct btf_field *kptr_field)
5407 {
5408         struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
5409         int class = BPF_CLASS(insn->code);
5410         struct bpf_reg_state *val_reg;
5411 
5412         /* Things we already checked for in check_map_access and caller:
5413          *  - Reject cases where variable offset may touch kptr
5414          *  - size of access (must be BPF_DW)
5415          *  - tnum_is_const(reg->var_off)
5416          *  - kptr_field->offset == off + reg->var_off.value
5417          */
5418         /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
5419         if (BPF_MODE(insn->code) != BPF_MEM) {
5420                 verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
5421                 return -EACCES;
5422         }
5423 
5424         /* We only allow loading referenced kptr, since it will be marked as
5425          * untrusted, similar to unreferenced kptr.
5426          */
5427         if (class != BPF_LDX &&
5428             (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
5429                 verbose(env, "store to referenced kptr disallowed\n");
5430                 return -EACCES;
5431         }
5432 
5433         if (class == BPF_LDX) {
5434                 val_reg = reg_state(env, value_regno);
5435                 /* We can simply mark the value_regno receiving the pointer
5436                  * value from map as PTR_TO_BTF_ID, with the correct type.
5437                  */
5438                 mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
5439                                 kptr_field->kptr.btf_id, btf_ld_kptr_type(env, kptr_field));
5440         } else if (class == BPF_STX) {
5441                 val_reg = reg_state(env, value_regno);
5442                 if (!register_is_null(val_reg) &&
5443                     map_kptr_match_type(env, kptr_field, val_reg, value_regno))
5444                         return -EACCES;
5445         } else if (class == BPF_ST) {
5446                 if (insn->imm) {
5447                         verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
5448                                 kptr_field->offset);
5449                         return -EACCES;
5450                 }
5451         } else {
5452                 verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
5453                 return -EACCES;
5454         }
5455         return 0;
5456 }
5457 
5458 /* check read/write into a map element with possible variable offset */
5459 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
5460                             int off, int size, bool zero_size_allowed,
5461                             enum bpf_access_src src)
5462 {
5463         struct bpf_verifier_state *vstate = env->cur_state;
5464         struct bpf_func_state *state = vstate->frame[vstate->curframe];
5465         struct bpf_reg_state *reg = &state->regs[regno];
5466         struct bpf_map *map = reg->map_ptr;
5467         struct btf_record *rec;
5468         int err, i;
5469 
5470         err = check_mem_region_access(env, regno, off, size, map->value_size,
5471                                       zero_size_allowed);
5472         if (err)
5473                 return err;
5474 
5475         if (IS_ERR_OR_NULL(map->record))
5476                 return 0;
5477         rec = map->record;
5478         for (i = 0; i < rec->cnt; i++) {
5479                 struct btf_field *field = &rec->fields[i];
5480                 u32 p = field->offset;
5481 
5482                 /* If any part of a field  can be touched by load/store, reject
5483                  * this program. To check that [x1, x2) overlaps with [y1, y2),
5484                  * it is sufficient to check x1 < y2 && y1 < x2.
5485                  */
5486                 if (reg->smin_value + off < p + field->size &&
5487                     p < reg->umax_value + off + size) {
5488                         switch (field->type) {
5489                         case BPF_KPTR_UNREF:
5490                         case BPF_KPTR_REF:
5491                         case BPF_KPTR_PERCPU:
5492                                 if (src != ACCESS_DIRECT) {
5493                                         verbose(env, "kptr cannot be accessed indirectly by helper\n");
5494                                         return -EACCES;
5495                                 }
5496                                 if (!tnum_is_const(reg->var_off)) {
5497                                         verbose(env, "kptr access cannot have variable offset\n");
5498                                         return -EACCES;
5499                                 }
5500                                 if (p != off + reg->var_off.value) {
5501                                         verbose(env, "kptr access misaligned expected=%u off=%llu\n",
5502                                                 p, off + reg->var_off.value);
5503                                         return -EACCES;
5504                                 }
5505                                 if (size != bpf_size_to_bytes(BPF_DW)) {
5506                                         verbose(env, "kptr access size must be BPF_DW\n");
5507                                         return -EACCES;
5508                                 }
5509                                 break;
5510                         default:
5511                                 verbose(env, "%s cannot be accessed directly by load/store\n",
5512                                         btf_field_type_name(field->type));
5513                                 return -EACCES;
5514                         }
5515                 }
5516         }
5517         return 0;
5518 }
5519 
5520 #define MAX_PACKET_OFF 0xffff
5521 
5522 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
5523                                        const struct bpf_call_arg_meta *meta,
5524                                        enum bpf_access_type t)
5525 {
5526         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
5527 
5528         switch (prog_type) {
5529         /* Program types only with direct read access go here! */
5530         case BPF_PROG_TYPE_LWT_IN:
5531         case BPF_PROG_TYPE_LWT_OUT:
5532         case BPF_PROG_TYPE_LWT_SEG6LOCAL:
5533         case BPF_PROG_TYPE_SK_REUSEPORT:
5534         case BPF_PROG_TYPE_FLOW_DISSECTOR:
5535         case BPF_PROG_TYPE_CGROUP_SKB:
5536                 if (t == BPF_WRITE)
5537                         return false;
5538                 fallthrough;
5539 
5540         /* Program types with direct read + write access go here! */
5541         case BPF_PROG_TYPE_SCHED_CLS:
5542         case BPF_PROG_TYPE_SCHED_ACT:
5543         case BPF_PROG_TYPE_XDP:
5544         case BPF_PROG_TYPE_LWT_XMIT:
5545         case BPF_PROG_TYPE_SK_SKB:
5546         case BPF_PROG_TYPE_SK_MSG:
5547                 if (meta)
5548                         return meta->pkt_access;
5549 
5550                 env->seen_direct_write = true;
5551                 return true;
5552 
5553         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
5554                 if (t == BPF_WRITE)
5555                         env->seen_direct_write = true;
5556 
5557                 return true;
5558 
5559         default:
5560                 return false;
5561         }
5562 }
5563 
5564 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
5565                                int size, bool zero_size_allowed)
5566 {
5567         struct bpf_reg_state *regs = cur_regs(env);
5568         struct bpf_reg_state *reg = &regs[regno];
5569         int err;
5570 
5571         /* We may have added a variable offset to the packet pointer; but any
5572          * reg->range we have comes after that.  We are only checking the fixed
5573          * offset.
5574          */
5575 
5576         /* We don't allow negative numbers, because we aren't tracking enough
5577          * detail to prove they're safe.
5578          */
5579         if (reg->smin_value < 0) {
5580                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5581                         regno);
5582                 return -EACCES;
5583         }
5584 
5585         err = reg->range < 0 ? -EINVAL :
5586               __check_mem_access(env, regno, off, size, reg->range,
5587                                  zero_size_allowed);
5588         if (err) {
5589                 verbose(env, "R%d offset is outside of the packet\n", regno);
5590                 return err;
5591         }
5592 
5593         /* __check_mem_access has made sure "off + size - 1" is within u16.
5594          * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
5595          * otherwise find_good_pkt_pointers would have refused to set range info
5596          * that __check_mem_access would have rejected this pkt access.
5597          * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
5598          */
5599         env->prog->aux->max_pkt_offset =
5600                 max_t(u32, env->prog->aux->max_pkt_offset,
5601                       off + reg->umax_value + size - 1);
5602 
5603         return err;
5604 }
5605 
5606 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
5607 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
5608                             enum bpf_access_type t, enum bpf_reg_type *reg_type,
5609                             struct btf **btf, u32 *btf_id, bool *is_retval, bool is_ldsx)
5610 {
5611         struct bpf_insn_access_aux info = {
5612                 .reg_type = *reg_type,
5613                 .log = &env->log,
5614                 .is_retval = false,
5615                 .is_ldsx = is_ldsx,
5616         };
5617 
5618         if (env->ops->is_valid_access &&
5619             env->ops->is_valid_access(off, size, t, env->prog, &info)) {
5620                 /* A non zero info.ctx_field_size indicates that this field is a
5621                  * candidate for later verifier transformation to load the whole
5622                  * field and then apply a mask when accessed with a narrower
5623                  * access than actual ctx access size. A zero info.ctx_field_size
5624                  * will only allow for whole field access and rejects any other
5625                  * type of narrower access.
5626                  */
5627                 *reg_type = info.reg_type;
5628                 *is_retval = info.is_retval;
5629 
5630                 if (base_type(*reg_type) == PTR_TO_BTF_ID) {
5631                         *btf = info.btf;
5632                         *btf_id = info.btf_id;
5633                 } else {
5634                         env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
5635                 }
5636                 /* remember the offset of last byte accessed in ctx */
5637                 if (env->prog->aux->max_ctx_offset < off + size)
5638                         env->prog->aux->max_ctx_offset = off + size;
5639                 return 0;
5640         }
5641 
5642         verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
5643         return -EACCES;
5644 }
5645 
5646 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
5647                                   int size)
5648 {
5649         if (size < 0 || off < 0 ||
5650             (u64)off + size > sizeof(struct bpf_flow_keys)) {
5651                 verbose(env, "invalid access to flow keys off=%d size=%d\n",
5652                         off, size);
5653                 return -EACCES;
5654         }
5655         return 0;
5656 }
5657 
5658 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
5659                              u32 regno, int off, int size,
5660                              enum bpf_access_type t)
5661 {
5662         struct bpf_reg_state *regs = cur_regs(env);
5663         struct bpf_reg_state *reg = &regs[regno];
5664         struct bpf_insn_access_aux info = {};
5665         bool valid;
5666 
5667         if (reg->smin_value < 0) {
5668                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5669                         regno);
5670                 return -EACCES;
5671         }
5672 
5673         switch (reg->type) {
5674         case PTR_TO_SOCK_COMMON:
5675                 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
5676                 break;
5677         case PTR_TO_SOCKET:
5678                 valid = bpf_sock_is_valid_access(off, size, t, &info);
5679                 break;
5680         case PTR_TO_TCP_SOCK:
5681                 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
5682                 break;
5683         case PTR_TO_XDP_SOCK:
5684                 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
5685                 break;
5686         default:
5687                 valid = false;
5688         }
5689 
5690 
5691         if (valid) {
5692                 env->insn_aux_data[insn_idx].ctx_field_size =
5693                         info.ctx_field_size;
5694                 return 0;
5695         }
5696 
5697         verbose(env, "R%d invalid %s access off=%d size=%d\n",
5698                 regno, reg_type_str(env, reg->type), off, size);
5699 
5700         return -EACCES;
5701 }
5702 
5703 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
5704 {
5705         return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
5706 }
5707 
5708 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
5709 {
5710         const struct bpf_reg_state *reg = reg_state(env, regno);
5711 
5712         return reg->type == PTR_TO_CTX;
5713 }
5714 
5715 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
5716 {
5717         const struct bpf_reg_state *reg = reg_state(env, regno);
5718 
5719         return type_is_sk_pointer(reg->type);
5720 }
5721 
5722 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
5723 {
5724         const struct bpf_reg_state *reg = reg_state(env, regno);
5725 
5726         return type_is_pkt_pointer(reg->type);
5727 }
5728 
5729 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
5730 {
5731         const struct bpf_reg_state *reg = reg_state(env, regno);
5732 
5733         /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
5734         return reg->type == PTR_TO_FLOW_KEYS;
5735 }
5736 
5737 static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
5738 {
5739         const struct bpf_reg_state *reg = reg_state(env, regno);
5740 
5741         return reg->type == PTR_TO_ARENA;
5742 }
5743 
5744 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
5745 #ifdef CONFIG_NET
5746         [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
5747         [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5748         [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
5749 #endif
5750         [CONST_PTR_TO_MAP] = btf_bpf_map_id,
5751 };
5752 
5753 static bool is_trusted_reg(const struct bpf_reg_state *reg)
5754 {
5755         /* A referenced register is always trusted. */
5756         if (reg->ref_obj_id)
5757                 return true;
5758 
5759         /* Types listed in the reg2btf_ids are always trusted */
5760         if (reg2btf_ids[base_type(reg->type)] &&
5761             !bpf_type_has_unsafe_modifiers(reg->type))
5762                 return true;
5763 
5764         /* If a register is not referenced, it is trusted if it has the
5765          * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
5766          * other type modifiers may be safe, but we elect to take an opt-in
5767          * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
5768          * not.
5769          *
5770          * Eventually, we should make PTR_TRUSTED the single source of truth
5771          * for whether a register is trusted.
5772          */
5773         return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
5774                !bpf_type_has_unsafe_modifiers(reg->type);
5775 }
5776 
5777 static bool is_rcu_reg(const struct bpf_reg_state *reg)
5778 {
5779         return reg->type & MEM_RCU;
5780 }
5781 
5782 static void clear_trusted_flags(enum bpf_type_flag *flag)
5783 {
5784         *flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
5785 }
5786 
5787 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
5788                                    const struct bpf_reg_state *reg,
5789                                    int off, int size, bool strict)
5790 {
5791         struct tnum reg_off;
5792         int ip_align;
5793 
5794         /* Byte size accesses are always allowed. */
5795         if (!strict || size == 1)
5796                 return 0;
5797 
5798         /* For platforms that do not have a Kconfig enabling
5799          * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
5800          * NET_IP_ALIGN is universally set to '2'.  And on platforms
5801          * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
5802          * to this code only in strict mode where we want to emulate
5803          * the NET_IP_ALIGN==2 checking.  Therefore use an
5804          * unconditional IP align value of '2'.
5805          */
5806         ip_align = 2;
5807 
5808         reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
5809         if (!tnum_is_aligned(reg_off, size)) {
5810                 char tn_buf[48];
5811 
5812                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5813                 verbose(env,
5814                         "misaligned packet access off %d+%s+%d+%d size %d\n",
5815                         ip_align, tn_buf, reg->off, off, size);
5816                 return -EACCES;
5817         }
5818 
5819         return 0;
5820 }
5821 
5822 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
5823                                        const struct bpf_reg_state *reg,
5824                                        const char *pointer_desc,
5825                                        int off, int size, bool strict)
5826 {
5827         struct tnum reg_off;
5828 
5829         /* Byte size accesses are always allowed. */
5830         if (!strict || size == 1)
5831                 return 0;
5832 
5833         reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
5834         if (!tnum_is_aligned(reg_off, size)) {
5835                 char tn_buf[48];
5836 
5837                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5838                 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
5839                         pointer_desc, tn_buf, reg->off, off, size);
5840                 return -EACCES;
5841         }
5842 
5843         return 0;
5844 }
5845 
5846 static int check_ptr_alignment(struct bpf_verifier_env *env,
5847                                const struct bpf_reg_state *reg, int off,
5848                                int size, bool strict_alignment_once)
5849 {
5850         bool strict = env->strict_alignment || strict_alignment_once;
5851         const char *pointer_desc = "";
5852 
5853         switch (reg->type) {
5854         case PTR_TO_PACKET:
5855         case PTR_TO_PACKET_META:
5856                 /* Special case, because of NET_IP_ALIGN. Given metadata sits
5857                  * right in front, treat it the very same way.
5858                  */
5859                 return check_pkt_ptr_alignment(env, reg, off, size, strict);
5860         case PTR_TO_FLOW_KEYS:
5861                 pointer_desc = "flow keys ";
5862                 break;
5863         case PTR_TO_MAP_KEY:
5864                 pointer_desc = "key ";
5865                 break;
5866         case PTR_TO_MAP_VALUE:
5867                 pointer_desc = "value ";
5868                 break;
5869         case PTR_TO_CTX:
5870                 pointer_desc = "context ";
5871                 break;
5872         case PTR_TO_STACK:
5873                 pointer_desc = "stack ";
5874                 /* The stack spill tracking logic in check_stack_write_fixed_off()
5875                  * and check_stack_read_fixed_off() relies on stack accesses being
5876                  * aligned.
5877                  */
5878                 strict = true;
5879                 break;
5880         case PTR_TO_SOCKET:
5881                 pointer_desc = "sock ";
5882                 break;
5883         case PTR_TO_SOCK_COMMON:
5884                 pointer_desc = "sock_common ";
5885                 break;
5886         case PTR_TO_TCP_SOCK:
5887                 pointer_desc = "tcp_sock ";
5888                 break;
5889         case PTR_TO_XDP_SOCK:
5890                 pointer_desc = "xdp_sock ";
5891                 break;
5892         case PTR_TO_ARENA:
5893                 return 0;
5894         default:
5895                 break;
5896         }
5897         return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
5898                                            strict);
5899 }
5900 
5901 static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
5902 {
5903         if (env->prog->jit_requested)
5904                 return round_up(stack_depth, 16);
5905 
5906         /* round up to 32-bytes, since this is granularity
5907          * of interpreter stack size
5908          */
5909         return round_up(max_t(u32, stack_depth, 1), 32);
5910 }
5911 
5912 /* starting from main bpf function walk all instructions of the function
5913  * and recursively walk all callees that given function can call.
5914  * Ignore jump and exit insns.
5915  * Since recursion is prevented by check_cfg() this algorithm
5916  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
5917  */
5918 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
5919 {
5920         struct bpf_subprog_info *subprog = env->subprog_info;
5921         struct bpf_insn *insn = env->prog->insnsi;
5922         int depth = 0, frame = 0, i, subprog_end;
5923         bool tail_call_reachable = false;
5924         int ret_insn[MAX_CALL_FRAMES];
5925         int ret_prog[MAX_CALL_FRAMES];
5926         int j;
5927 
5928         i = subprog[idx].start;
5929 process_func:
5930         /* protect against potential stack overflow that might happen when
5931          * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
5932          * depth for such case down to 256 so that the worst case scenario
5933          * would result in 8k stack size (32 which is tailcall limit * 256 =
5934          * 8k).
5935          *
5936          * To get the idea what might happen, see an example:
5937          * func1 -> sub rsp, 128
5938          *  subfunc1 -> sub rsp, 256
5939          *  tailcall1 -> add rsp, 256
5940          *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
5941          *   subfunc2 -> sub rsp, 64
5942          *   subfunc22 -> sub rsp, 128
5943          *   tailcall2 -> add rsp, 128
5944          *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
5945          *
5946          * tailcall will unwind the current stack frame but it will not get rid
5947          * of caller's stack as shown on the example above.
5948          */
5949         if (idx && subprog[idx].has_tail_call && depth >= 256) {
5950                 verbose(env,
5951                         "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
5952                         depth);
5953                 return -EACCES;
5954         }
5955         depth += round_up_stack_depth(env, subprog[idx].stack_depth);
5956         if (depth > MAX_BPF_STACK) {
5957                 verbose(env, "combined stack size of %d calls is %d. Too large\n",
5958                         frame + 1, depth);
5959                 return -EACCES;
5960         }
5961 continue_func:
5962         subprog_end = subprog[idx + 1].start;
5963         for (; i < subprog_end; i++) {
5964                 int next_insn, sidx;
5965 
5966                 if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) {
5967                         bool err = false;
5968 
5969                         if (!is_bpf_throw_kfunc(insn + i))
5970                                 continue;
5971                         if (subprog[idx].is_cb)
5972                                 err = true;
5973                         for (int c = 0; c < frame && !err; c++) {
5974                                 if (subprog[ret_prog[c]].is_cb) {
5975                                         err = true;
5976                                         break;
5977                                 }
5978                         }
5979                         if (!err)
5980                                 continue;
5981                         verbose(env,
5982                                 "bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
5983                                 i, idx);
5984                         return -EINVAL;
5985                 }
5986 
5987                 if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
5988                         continue;
5989                 /* remember insn and function to return to */
5990                 ret_insn[frame] = i + 1;
5991                 ret_prog[frame] = idx;
5992 
5993                 /* find the callee */
5994                 next_insn = i + insn[i].imm + 1;
5995                 sidx = find_subprog(env, next_insn);
5996                 if (sidx < 0) {
5997                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
5998                                   next_insn);
5999                         return -EFAULT;
6000                 }
6001                 if (subprog[sidx].is_async_cb) {
6002                         if (subprog[sidx].has_tail_call) {
6003                                 verbose(env, "verifier bug. subprog has tail_call and async cb\n");
6004                                 return -EFAULT;
6005                         }
6006                         /* async callbacks don't increase bpf prog stack size unless called directly */
6007                         if (!bpf_pseudo_call(insn + i))
6008                                 continue;
6009                         if (subprog[sidx].is_exception_cb) {
6010                                 verbose(env, "insn %d cannot call exception cb directly\n", i);
6011                                 return -EINVAL;
6012                         }
6013                 }
6014                 i = next_insn;
6015                 idx = sidx;
6016 
6017                 if (subprog[idx].has_tail_call)
6018                         tail_call_reachable = true;
6019 
6020                 frame++;
6021                 if (frame >= MAX_CALL_FRAMES) {
6022                         verbose(env, "the call stack of %d frames is too deep !\n",
6023                                 frame);
6024                         return -E2BIG;
6025                 }
6026                 goto process_func;
6027         }
6028         /* if tail call got detected across bpf2bpf calls then mark each of the
6029          * currently present subprog frames as tail call reachable subprogs;
6030          * this info will be utilized by JIT so that we will be preserving the
6031          * tail call counter throughout bpf2bpf calls combined with tailcalls
6032          */
6033         if (tail_call_reachable)
6034                 for (j = 0; j < frame; j++) {
6035                         if (subprog[ret_prog[j]].is_exception_cb) {
6036                                 verbose(env, "cannot tail call within exception cb\n");
6037                                 return -EINVAL;
6038                         }
6039                         subprog[ret_prog[j]].tail_call_reachable = true;
6040                 }
6041         if (subprog[0].tail_call_reachable)
6042                 env->prog->aux->tail_call_reachable = true;
6043 
6044         /* end of for() loop means the last insn of the 'subprog'
6045          * was reached. Doesn't matter whether it was JA or EXIT
6046          */
6047         if (frame == 0)
6048                 return 0;
6049         depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
6050         frame--;
6051         i = ret_insn[frame];
6052         idx = ret_prog[frame];
6053         goto continue_func;
6054 }
6055 
6056 static int check_max_stack_depth(struct bpf_verifier_env *env)
6057 {
6058         struct bpf_subprog_info *si = env->subprog_info;
6059         int ret;
6060 
6061         for (int i = 0; i < env->subprog_cnt; i++) {
6062                 if (!i || si[i].is_async_cb) {
6063                         ret = check_max_stack_depth_subprog(env, i);
6064                         if (ret < 0)
6065                                 return ret;
6066                 }
6067                 continue;
6068         }
6069         return 0;
6070 }
6071 
6072 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
6073 static int get_callee_stack_depth(struct bpf_verifier_env *env,
6074                                   const struct bpf_insn *insn, int idx)
6075 {
6076         int start = idx + insn->imm + 1, subprog;
6077 
6078         subprog = find_subprog(env, start);
6079         if (subprog < 0) {
6080                 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
6081                           start);
6082                 return -EFAULT;
6083         }
6084         return env->subprog_info[subprog].stack_depth;
6085 }
6086 #endif
6087 
6088 static int __check_buffer_access(struct bpf_verifier_env *env,
6089                                  const char *buf_info,
6090                                  const struct bpf_reg_state *reg,
6091                                  int regno, int off, int size)
6092 {
6093         if (off < 0) {
6094                 verbose(env,
6095                         "R%d invalid %s buffer access: off=%d, size=%d\n",
6096                         regno, buf_info, off, size);
6097                 return -EACCES;
6098         }
6099         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
6100                 char tn_buf[48];
6101 
6102                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6103                 verbose(env,
6104                         "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
6105                         regno, off, tn_buf);
6106                 return -EACCES;
6107         }
6108 
6109         return 0;
6110 }
6111 
6112 static int check_tp_buffer_access(struct bpf_verifier_env *env,
6113                                   const struct bpf_reg_state *reg,
6114                                   int regno, int off, int size)
6115 {
6116         int err;
6117 
6118         err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
6119         if (err)
6120                 return err;
6121 
6122         if (off + size > env->prog->aux->max_tp_access)
6123                 env->prog->aux->max_tp_access = off + size;
6124 
6125         return 0;
6126 }
6127 
6128 static int check_buffer_access(struct bpf_verifier_env *env,
6129                                const struct bpf_reg_state *reg,
6130                                int regno, int off, int size,
6131                                bool zero_size_allowed,
6132                                u32 *max_access)
6133 {
6134         const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
6135         int err;
6136 
6137         err = __check_buffer_access(env, buf_info, reg, regno, off, size);
6138         if (err)
6139                 return err;
6140 
6141         if (off + size > *max_access)
6142                 *max_access = off + size;
6143 
6144         return 0;
6145 }
6146 
6147 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
6148 static void zext_32_to_64(struct bpf_reg_state *reg)
6149 {
6150         reg->var_off = tnum_subreg(reg->var_off);
6151         __reg_assign_32_into_64(reg);
6152 }
6153 
6154 /* truncate register to smaller size (in bytes)
6155  * must be called with size < BPF_REG_SIZE
6156  */
6157 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
6158 {
6159         u64 mask;
6160 
6161         /* clear high bits in bit representation */
6162         reg->var_off = tnum_cast(reg->var_off, size);
6163 
6164         /* fix arithmetic bounds */
6165         mask = ((u64)1 << (size * 8)) - 1;
6166         if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
6167                 reg->umin_value &= mask;
6168                 reg->umax_value &= mask;
6169         } else {
6170                 reg->umin_value = 0;
6171                 reg->umax_value = mask;
6172         }
6173         reg->smin_value = reg->umin_value;
6174         reg->smax_value = reg->umax_value;
6175 
6176         /* If size is smaller than 32bit register the 32bit register
6177          * values are also truncated so we push 64-bit bounds into
6178          * 32-bit bounds. Above were truncated < 32-bits already.
6179          */
6180         if (size < 4)
6181                 __mark_reg32_unbounded(reg);
6182 
6183         reg_bounds_sync(reg);
6184 }
6185 
6186 static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
6187 {
6188         if (size == 1) {
6189                 reg->smin_value = reg->s32_min_value = S8_MIN;
6190                 reg->smax_value = reg->s32_max_value = S8_MAX;
6191         } else if (size == 2) {
6192                 reg->smin_value = reg->s32_min_value = S16_MIN;
6193                 reg->smax_value = reg->s32_max_value = S16_MAX;
6194         } else {
6195                 /* size == 4 */
6196                 reg->smin_value = reg->s32_min_value = S32_MIN;
6197                 reg->smax_value = reg->s32_max_value = S32_MAX;
6198         }
6199         reg->umin_value = reg->u32_min_value = 0;
6200         reg->umax_value = U64_MAX;
6201         reg->u32_max_value = U32_MAX;
6202         reg->var_off = tnum_unknown;
6203 }
6204 
6205 static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
6206 {
6207         s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
6208         u64 top_smax_value, top_smin_value;
6209         u64 num_bits = size * 8;
6210 
6211         if (tnum_is_const(reg->var_off)) {
6212                 u64_cval = reg->var_off.value;
6213                 if (size == 1)
6214                         reg->var_off = tnum_const((s8)u64_cval);
6215                 else if (size == 2)
6216                         reg->var_off = tnum_const((s16)u64_cval);
6217                 else
6218                         /* size == 4 */
6219                         reg->var_off = tnum_const((s32)u64_cval);
6220 
6221                 u64_cval = reg->var_off.value;
6222                 reg->smax_value = reg->smin_value = u64_cval;
6223                 reg->umax_value = reg->umin_value = u64_cval;
6224                 reg->s32_max_value = reg->s32_min_value = u64_cval;
6225                 reg->u32_max_value = reg->u32_min_value = u64_cval;
6226                 return;
6227         }
6228 
6229         top_smax_value = ((u64)reg->smax_value >> num_bits) << num_bits;
6230         top_smin_value = ((u64)reg->smin_value >> num_bits) << num_bits;
6231 
6232         if (top_smax_value != top_smin_value)
6233                 goto out;
6234 
6235         /* find the s64_min and s64_min after sign extension */
6236         if (size == 1) {
6237                 init_s64_max = (s8)reg->smax_value;
6238                 init_s64_min = (s8)reg->smin_value;
6239         } else if (size == 2) {
6240                 init_s64_max = (s16)reg->smax_value;
6241                 init_s64_min = (s16)reg->smin_value;
6242         } else {
6243                 init_s64_max = (s32)reg->smax_value;
6244                 init_s64_min = (s32)reg->smin_value;
6245         }
6246 
6247         s64_max = max(init_s64_max, init_s64_min);
6248         s64_min = min(init_s64_max, init_s64_min);
6249 
6250         /* both of s64_max/s64_min positive or negative */
6251         if ((s64_max >= 0) == (s64_min >= 0)) {
6252                 reg->smin_value = reg->s32_min_value = s64_min;
6253                 reg->smax_value = reg->s32_max_value = s64_max;
6254                 reg->umin_value = reg->u32_min_value = s64_min;
6255                 reg->umax_value = reg->u32_max_value = s64_max;
6256                 reg->var_off = tnum_range(s64_min, s64_max);
6257                 return;
6258         }
6259 
6260 out:
6261         set_sext64_default_val(reg, size);
6262 }
6263 
6264 static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
6265 {
6266         if (size == 1) {
6267                 reg->s32_min_value = S8_MIN;
6268                 reg->s32_max_value = S8_MAX;
6269         } else {
6270                 /* size == 2 */
6271                 reg->s32_min_value = S16_MIN;
6272                 reg->s32_max_value = S16_MAX;
6273         }
6274         reg->u32_min_value = 0;
6275         reg->u32_max_value = U32_MAX;
6276         reg->var_off = tnum_subreg(tnum_unknown);
6277 }
6278 
6279 static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
6280 {
6281         s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
6282         u32 top_smax_value, top_smin_value;
6283         u32 num_bits = size * 8;
6284 
6285         if (tnum_is_const(reg->var_off)) {
6286                 u32_val = reg->var_off.value;
6287                 if (size == 1)
6288                         reg->var_off = tnum_const((s8)u32_val);
6289                 else
6290                         reg->var_off = tnum_const((s16)u32_val);
6291 
6292                 u32_val = reg->var_off.value;
6293                 reg->s32_min_value = reg->s32_max_value = u32_val;
6294                 reg->u32_min_value = reg->u32_max_value = u32_val;
6295                 return;
6296         }
6297 
6298         top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits;
6299         top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits;
6300 
6301         if (top_smax_value != top_smin_value)
6302                 goto out;
6303 
6304         /* find the s32_min and s32_min after sign extension */
6305         if (size == 1) {
6306                 init_s32_max = (s8)reg->s32_max_value;
6307                 init_s32_min = (s8)reg->s32_min_value;
6308         } else {
6309                 /* size == 2 */
6310                 init_s32_max = (s16)reg->s32_max_value;
6311                 init_s32_min = (s16)reg->s32_min_value;
6312         }
6313         s32_max = max(init_s32_max, init_s32_min);
6314         s32_min = min(init_s32_max, init_s32_min);
6315 
6316         if ((s32_min >= 0) == (s32_max >= 0)) {
6317                 reg->s32_min_value = s32_min;
6318                 reg->s32_max_value = s32_max;
6319                 reg->u32_min_value = (u32)s32_min;
6320                 reg->u32_max_value = (u32)s32_max;
6321                 reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
6322                 return;
6323         }
6324 
6325 out:
6326         set_sext32_default_val(reg, size);
6327 }
6328 
6329 static bool bpf_map_is_rdonly(const struct bpf_map *map)
6330 {
6331         /* A map is considered read-only if the following condition are true:
6332          *
6333          * 1) BPF program side cannot change any of the map content. The
6334          *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
6335          *    and was set at map creation time.
6336          * 2) The map value(s) have been initialized from user space by a
6337          *    loader and then "frozen", such that no new map update/delete
6338          *    operations from syscall side are possible for the rest of
6339          *    the map's lifetime from that point onwards.
6340          * 3) Any parallel/pending map update/delete operations from syscall
6341          *    side have been completed. Only after that point, it's safe to
6342          *    assume that map value(s) are immutable.
6343          */
6344         return (map->map_flags & BPF_F_RDONLY_PROG) &&
6345                READ_ONCE(map->frozen) &&
6346                !bpf_map_write_active(map);
6347 }
6348 
6349 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
6350                                bool is_ldsx)
6351 {
6352         void *ptr;
6353         u64 addr;
6354         int err;
6355 
6356         err = map->ops->map_direct_value_addr(map, &addr, off);
6357         if (err)
6358                 return err;
6359         ptr = (void *)(long)addr + off;
6360 
6361         switch (size) {
6362         case sizeof(u8):
6363                 *val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr;
6364                 break;
6365         case sizeof(u16):
6366                 *val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr;
6367                 break;
6368         case sizeof(u32):
6369                 *val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr;
6370                 break;
6371         case sizeof(u64):
6372                 *val = *(u64 *)ptr;
6373                 break;
6374         default:
6375                 return -EINVAL;
6376         }
6377         return 0;
6378 }
6379 
6380 #define BTF_TYPE_SAFE_RCU(__type)  __PASTE(__type, __safe_rcu)
6381 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type)  __PASTE(__type, __safe_rcu_or_null)
6382 #define BTF_TYPE_SAFE_TRUSTED(__type)  __PASTE(__type, __safe_trusted)
6383 #define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type)  __PASTE(__type, __safe_trusted_or_null)
6384 
6385 /*
6386  * Allow list few fields as RCU trusted or full trusted.
6387  * This logic doesn't allow mix tagging and will be removed once GCC supports
6388  * btf_type_tag.
6389  */
6390 
6391 /* RCU trusted: these fields are trusted in RCU CS and never NULL */
6392 BTF_TYPE_SAFE_RCU(struct task_struct) {
6393         const cpumask_t *cpus_ptr;
6394         struct css_set __rcu *cgroups;
6395         struct task_struct __rcu *real_parent;
6396         struct task_struct *group_leader;
6397 };
6398 
6399 BTF_TYPE_SAFE_RCU(struct cgroup) {
6400         /* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
6401         struct kernfs_node *kn;
6402 };
6403 
6404 BTF_TYPE_SAFE_RCU(struct css_set) {
6405         struct cgroup *dfl_cgrp;
6406 };
6407 
6408 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
6409 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
6410         struct file __rcu *exe_file;
6411 };
6412 
6413 /* skb->sk, req->sk are not RCU protected, but we mark them as such
6414  * because bpf prog accessible sockets are SOCK_RCU_FREE.
6415  */
6416 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
6417         struct sock *sk;
6418 };
6419 
6420 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
6421         struct sock *sk;
6422 };
6423 
6424 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
6425 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
6426         struct seq_file *seq;
6427 };
6428 
6429 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
6430         struct bpf_iter_meta *meta;
6431         struct task_struct *task;
6432 };
6433 
6434 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
6435         struct file *file;
6436 };
6437 
6438 BTF_TYPE_SAFE_TRUSTED(struct file) {
6439         struct inode *f_inode;
6440 };
6441 
6442 BTF_TYPE_SAFE_TRUSTED(struct dentry) {
6443         /* no negative dentry-s in places where bpf can see it */
6444         struct inode *d_inode;
6445 };
6446 
6447 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
6448         struct sock *sk;
6449 };
6450 
6451 static bool type_is_rcu(struct bpf_verifier_env *env,
6452                         struct bpf_reg_state *reg,
6453                         const char *field_name, u32 btf_id)
6454 {
6455         BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
6456         BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
6457         BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
6458 
6459         return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
6460 }
6461 
6462 static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
6463                                 struct bpf_reg_state *reg,
6464                                 const char *field_name, u32 btf_id)
6465 {
6466         BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
6467         BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
6468         BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
6469 
6470         return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
6471 }
6472 
6473 static bool type_is_trusted(struct bpf_verifier_env *env,
6474                             struct bpf_reg_state *reg,
6475                             const char *field_name, u32 btf_id)
6476 {
6477         BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
6478         BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
6479         BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
6480         BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
6481         BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
6482 
6483         return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
6484 }
6485 
6486 static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
6487                                     struct bpf_reg_state *reg,
6488                                     const char *field_name, u32 btf_id)
6489 {
6490         BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
6491 
6492         return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
6493                                           "__safe_trusted_or_null");
6494 }
6495 
6496 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
6497                                    struct bpf_reg_state *regs,
6498                                    int regno, int off, int size,
6499                                    enum bpf_access_type atype,
6500                                    int value_regno)
6501 {
6502         struct bpf_reg_state *reg = regs + regno;
6503         const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
6504         const char *tname = btf_name_by_offset(reg->btf, t->name_off);
6505         const char *field_name = NULL;
6506         enum bpf_type_flag flag = 0;
6507         u32 btf_id = 0;
6508         int ret;
6509 
6510         if (!env->allow_ptr_leaks) {
6511                 verbose(env,
6512                         "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6513                         tname);
6514                 return -EPERM;
6515         }
6516         if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
6517                 verbose(env,
6518                         "Cannot access kernel 'struct %s' from non-GPL compatible program\n",
6519                         tname);
6520                 return -EINVAL;
6521         }
6522         if (off < 0) {
6523                 verbose(env,
6524                         "R%d is ptr_%s invalid negative access: off=%d\n",
6525                         regno, tname, off);
6526                 return -EACCES;
6527         }
6528         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
6529                 char tn_buf[48];
6530 
6531                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6532                 verbose(env,
6533                         "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
6534                         regno, tname, off, tn_buf);
6535                 return -EACCES;
6536         }
6537 
6538         if (reg->type & MEM_USER) {
6539                 verbose(env,
6540                         "R%d is ptr_%s access user memory: off=%d\n",
6541                         regno, tname, off);
6542                 return -EACCES;
6543         }
6544 
6545         if (reg->type & MEM_PERCPU) {
6546                 verbose(env,
6547                         "R%d is ptr_%s access percpu memory: off=%d\n",
6548                         regno, tname, off);
6549                 return -EACCES;
6550         }
6551 
6552         if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
6553                 if (!btf_is_kernel(reg->btf)) {
6554                         verbose(env, "verifier internal error: reg->btf must be kernel btf\n");
6555                         return -EFAULT;
6556                 }
6557                 ret = env->ops->btf_struct_access(&env->log, reg, off, size);
6558         } else {
6559                 /* Writes are permitted with default btf_struct_access for
6560                  * program allocated objects (which always have ref_obj_id > 0),
6561                  * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
6562                  */
6563                 if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
6564                         verbose(env, "only read is supported\n");
6565                         return -EACCES;
6566                 }
6567 
6568                 if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
6569                     !(reg->type & MEM_RCU) && !reg->ref_obj_id) {
6570                         verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
6571                         return -EFAULT;
6572                 }
6573 
6574                 ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
6575         }
6576 
6577         if (ret < 0)
6578                 return ret;
6579 
6580         if (ret != PTR_TO_BTF_ID) {
6581                 /* just mark; */
6582 
6583         } else if (type_flag(reg->type) & PTR_UNTRUSTED) {
6584                 /* If this is an untrusted pointer, all pointers formed by walking it
6585                  * also inherit the untrusted flag.
6586                  */
6587                 flag = PTR_UNTRUSTED;
6588 
6589         } else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
6590                 /* By default any pointer obtained from walking a trusted pointer is no
6591                  * longer trusted, unless the field being accessed has explicitly been
6592                  * marked as inheriting its parent's state of trust (either full or RCU).
6593                  * For example:
6594                  * 'cgroups' pointer is untrusted if task->cgroups dereference
6595                  * happened in a sleepable program outside of bpf_rcu_read_lock()
6596                  * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
6597                  * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
6598                  *
6599                  * A regular RCU-protected pointer with __rcu tag can also be deemed
6600                  * trusted if we are in an RCU CS. Such pointer can be NULL.
6601                  */
6602                 if (type_is_trusted(env, reg, field_name, btf_id)) {
6603                         flag |= PTR_TRUSTED;
6604                 } else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) {
6605                         flag |= PTR_TRUSTED | PTR_MAYBE_NULL;
6606                 } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
6607                         if (type_is_rcu(env, reg, field_name, btf_id)) {
6608                                 /* ignore __rcu tag and mark it MEM_RCU */
6609                                 flag |= MEM_RCU;
6610                         } else if (flag & MEM_RCU ||
6611                                    type_is_rcu_or_null(env, reg, field_name, btf_id)) {
6612                                 /* __rcu tagged pointers can be NULL */
6613                                 flag |= MEM_RCU | PTR_MAYBE_NULL;
6614 
6615                                 /* We always trust them */
6616                                 if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
6617                                     flag & PTR_UNTRUSTED)
6618                                         flag &= ~PTR_UNTRUSTED;
6619                         } else if (flag & (MEM_PERCPU | MEM_USER)) {
6620                                 /* keep as-is */
6621                         } else {
6622                                 /* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
6623                                 clear_trusted_flags(&flag);
6624                         }
6625                 } else {
6626                         /*
6627                          * If not in RCU CS or MEM_RCU pointer can be NULL then
6628                          * aggressively mark as untrusted otherwise such
6629                          * pointers will be plain PTR_TO_BTF_ID without flags
6630                          * and will be allowed to be passed into helpers for
6631                          * compat reasons.
6632                          */
6633                         flag = PTR_UNTRUSTED;
6634                 }
6635         } else {
6636                 /* Old compat. Deprecated */
6637                 clear_trusted_flags(&flag);
6638         }
6639 
6640         if (atype == BPF_READ && value_regno >= 0)
6641                 mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
6642 
6643         return 0;
6644 }
6645 
6646 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
6647                                    struct bpf_reg_state *regs,
6648                                    int regno, int off, int size,
6649                                    enum bpf_access_type atype,
6650                                    int value_regno)
6651 {
6652         struct bpf_reg_state *reg = regs + regno;
6653         struct bpf_map *map = reg->map_ptr;
6654         struct bpf_reg_state map_reg;
6655         enum bpf_type_flag flag = 0;
6656         const struct btf_type *t;
6657         const char *tname;
6658         u32 btf_id;
6659         int ret;
6660 
6661         if (!btf_vmlinux) {
6662                 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
6663                 return -ENOTSUPP;
6664         }
6665 
6666         if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
6667                 verbose(env, "map_ptr access not supported for map type %d\n",
6668                         map->map_type);
6669                 return -ENOTSUPP;
6670         }
6671 
6672         t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
6673         tname = btf_name_by_offset(btf_vmlinux, t->name_off);
6674 
6675         if (!env->allow_ptr_leaks) {
6676                 verbose(env,
6677                         "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6678                         tname);
6679                 return -EPERM;
6680         }
6681 
6682         if (off < 0) {
6683                 verbose(env, "R%d is %s invalid negative access: off=%d\n",
6684                         regno, tname, off);
6685                 return -EACCES;
6686         }
6687 
6688         if (atype != BPF_READ) {
6689                 verbose(env, "only read from %s is supported\n", tname);
6690                 return -EACCES;
6691         }
6692 
6693         /* Simulate access to a PTR_TO_BTF_ID */
6694         memset(&map_reg, 0, sizeof(map_reg));
6695         mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0);
6696         ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
6697         if (ret < 0)
6698                 return ret;
6699 
6700         if (value_regno >= 0)
6701                 mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
6702 
6703         return 0;
6704 }
6705 
6706 /* Check that the stack access at the given offset is within bounds. The
6707  * maximum valid offset is -1.
6708  *
6709  * The minimum valid offset is -MAX_BPF_STACK for writes, and
6710  * -state->allocated_stack for reads.
6711  */
6712 static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
6713                                           s64 off,
6714                                           struct bpf_func_state *state,
6715                                           enum bpf_access_type t)
6716 {
6717         int min_valid_off;
6718 
6719         if (t == BPF_WRITE || env->allow_uninit_stack)
6720                 min_valid_off = -MAX_BPF_STACK;
6721         else
6722                 min_valid_off = -state->allocated_stack;
6723 
6724         if (off < min_valid_off || off > -1)
6725                 return -EACCES;
6726         return 0;
6727 }
6728 
6729 /* Check that the stack access at 'regno + off' falls within the maximum stack
6730  * bounds.
6731  *
6732  * 'off' includes `regno->offset`, but not its dynamic part (if any).
6733  */
6734 static int check_stack_access_within_bounds(
6735                 struct bpf_verifier_env *env,
6736                 int regno, int off, int access_size,
6737                 enum bpf_access_src src, enum bpf_access_type type)
6738 {
6739         struct bpf_reg_state *regs = cur_regs(env);
6740         struct bpf_reg_state *reg = regs + regno;
6741         struct bpf_func_state *state = func(env, reg);
6742         s64 min_off, max_off;
6743         int err;
6744         char *err_extra;
6745 
6746         if (src == ACCESS_HELPER)
6747                 /* We don't know if helpers are reading or writing (or both). */
6748                 err_extra = " indirect access to";
6749         else if (type == BPF_READ)
6750                 err_extra = " read from";
6751         else
6752                 err_extra = " write to";
6753 
6754         if (tnum_is_const(reg->var_off)) {
6755                 min_off = (s64)reg->var_off.value + off;
6756                 max_off = min_off + access_size;
6757         } else {
6758                 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
6759                     reg->smin_value <= -BPF_MAX_VAR_OFF) {
6760                         verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
6761                                 err_extra, regno);
6762                         return -EACCES;
6763                 }
6764                 min_off = reg->smin_value + off;
6765                 max_off = reg->smax_value + off + access_size;
6766         }
6767 
6768         err = check_stack_slot_within_bounds(env, min_off, state, type);
6769         if (!err && max_off > 0)
6770                 err = -EINVAL; /* out of stack access into non-negative offsets */
6771         if (!err && access_size < 0)
6772                 /* access_size should not be negative (or overflow an int); others checks
6773                  * along the way should have prevented such an access.
6774                  */
6775                 err = -EFAULT; /* invalid negative access size; integer overflow? */
6776 
6777         if (err) {
6778                 if (tnum_is_const(reg->var_off)) {
6779                         verbose(env, "invalid%s stack R%d off=%d size=%d\n",
6780                                 err_extra, regno, off, access_size);
6781                 } else {
6782                         char tn_buf[48];
6783 
6784                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6785                         verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n",
6786                                 err_extra, regno, tn_buf, off, access_size);
6787                 }
6788                 return err;
6789         }
6790 
6791         /* Note that there is no stack access with offset zero, so the needed stack
6792          * size is -min_off, not -min_off+1.
6793          */
6794         return grow_stack_state(env, state, -min_off /* size */);
6795 }
6796 
6797 static bool get_func_retval_range(struct bpf_prog *prog,
6798                                   struct bpf_retval_range *range)
6799 {
6800         if (prog->type == BPF_PROG_TYPE_LSM &&
6801                 prog->expected_attach_type == BPF_LSM_MAC &&
6802                 !bpf_lsm_get_retval_range(prog, range)) {
6803                 return true;
6804         }
6805         return false;
6806 }
6807 
6808 /* check whether memory at (regno + off) is accessible for t = (read | write)
6809  * if t==write, value_regno is a register which value is stored into memory
6810  * if t==read, value_regno is a register which will receive the value from memory
6811  * if t==write && value_regno==-1, some unknown value is stored into memory
6812  * if t==read && value_regno==-1, don't care what we read from memory
6813  */
6814 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
6815                             int off, int bpf_size, enum bpf_access_type t,
6816                             int value_regno, bool strict_alignment_once, bool is_ldsx)
6817 {
6818         struct bpf_reg_state *regs = cur_regs(env);
6819         struct bpf_reg_state *reg = regs + regno;
6820         int size, err = 0;
6821 
6822         size = bpf_size_to_bytes(bpf_size);
6823         if (size < 0)
6824                 return size;
6825 
6826         /* alignment checks will add in reg->off themselves */
6827         err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
6828         if (err)
6829                 return err;
6830 
6831         /* for access checks, reg->off is just part of off */
6832         off += reg->off;
6833 
6834         if (reg->type == PTR_TO_MAP_KEY) {
6835                 if (t == BPF_WRITE) {
6836                         verbose(env, "write to change key R%d not allowed\n", regno);
6837                         return -EACCES;
6838                 }
6839 
6840                 err = check_mem_region_access(env, regno, off, size,
6841                                               reg->map_ptr->key_size, false);
6842                 if (err)
6843                         return err;
6844                 if (value_regno >= 0)
6845                         mark_reg_unknown(env, regs, value_regno);
6846         } else if (reg->type == PTR_TO_MAP_VALUE) {
6847                 struct btf_field *kptr_field = NULL;
6848 
6849                 if (t == BPF_WRITE && value_regno >= 0 &&
6850                     is_pointer_value(env, value_regno)) {
6851                         verbose(env, "R%d leaks addr into map\n", value_regno);
6852                         return -EACCES;
6853                 }
6854                 err = check_map_access_type(env, regno, off, size, t);
6855                 if (err)
6856                         return err;
6857                 err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
6858                 if (err)
6859                         return err;
6860                 if (tnum_is_const(reg->var_off))
6861                         kptr_field = btf_record_find(reg->map_ptr->record,
6862                                                      off + reg->var_off.value, BPF_KPTR);
6863                 if (kptr_field) {
6864                         err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
6865                 } else if (t == BPF_READ && value_regno >= 0) {
6866                         struct bpf_map *map = reg->map_ptr;
6867 
6868                         /* if map is read-only, track its contents as scalars */
6869                         if (tnum_is_const(reg->var_off) &&
6870                             bpf_map_is_rdonly(map) &&
6871                             map->ops->map_direct_value_addr) {
6872                                 int map_off = off + reg->var_off.value;
6873                                 u64 val = 0;
6874 
6875                                 err = bpf_map_direct_read(map, map_off, size,
6876                                                           &val, is_ldsx);
6877                                 if (err)
6878                                         return err;
6879 
6880                                 regs[value_regno].type = SCALAR_VALUE;
6881                                 __mark_reg_known(&regs[value_regno], val);
6882                         } else {
6883                                 mark_reg_unknown(env, regs, value_regno);
6884                         }
6885                 }
6886         } else if (base_type(reg->type) == PTR_TO_MEM) {
6887                 bool rdonly_mem = type_is_rdonly_mem(reg->type);
6888 
6889                 if (type_may_be_null(reg->type)) {
6890                         verbose(env, "R%d invalid mem access '%s'\n", regno,
6891                                 reg_type_str(env, reg->type));
6892                         return -EACCES;
6893                 }
6894 
6895                 if (t == BPF_WRITE && rdonly_mem) {
6896                         verbose(env, "R%d cannot write into %s\n",
6897                                 regno, reg_type_str(env, reg->type));
6898                         return -EACCES;
6899                 }
6900 
6901                 if (t == BPF_WRITE && value_regno >= 0 &&
6902                     is_pointer_value(env, value_regno)) {
6903                         verbose(env, "R%d leaks addr into mem\n", value_regno);
6904                         return -EACCES;
6905                 }
6906 
6907                 err = check_mem_region_access(env, regno, off, size,
6908                                               reg->mem_size, false);
6909                 if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
6910                         mark_reg_unknown(env, regs, value_regno);
6911         } else if (reg->type == PTR_TO_CTX) {
6912                 bool is_retval = false;
6913                 struct bpf_retval_range range;
6914                 enum bpf_reg_type reg_type = SCALAR_VALUE;
6915                 struct btf *btf = NULL;
6916                 u32 btf_id = 0;
6917 
6918                 if (t == BPF_WRITE && value_regno >= 0 &&
6919                     is_pointer_value(env, value_regno)) {
6920                         verbose(env, "R%d leaks addr into ctx\n", value_regno);
6921                         return -EACCES;
6922                 }
6923 
6924                 err = check_ptr_off_reg(env, reg, regno);
6925                 if (err < 0)
6926                         return err;
6927 
6928                 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
6929                                        &btf_id, &is_retval, is_ldsx);
6930                 if (err)
6931                         verbose_linfo(env, insn_idx, "; ");
6932                 if (!err && t == BPF_READ && value_regno >= 0) {
6933                         /* ctx access returns either a scalar, or a
6934                          * PTR_TO_PACKET[_META,_END]. In the latter
6935                          * case, we know the offset is zero.
6936                          */
6937                         if (reg_type == SCALAR_VALUE) {
6938                                 if (is_retval && get_func_retval_range(env->prog, &range)) {
6939                                         err = __mark_reg_s32_range(env, regs, value_regno,
6940                                                                    range.minval, range.maxval);
6941                                         if (err)
6942                                                 return err;
6943                                 } else {
6944                                         mark_reg_unknown(env, regs, value_regno);
6945                                 }
6946                         } else {
6947                                 mark_reg_known_zero(env, regs,
6948                                                     value_regno);
6949                                 if (type_may_be_null(reg_type))
6950                                         regs[value_regno].id = ++env->id_gen;
6951                                 /* A load of ctx field could have different
6952                                  * actual load size with the one encoded in the
6953                                  * insn. When the dst is PTR, it is for sure not
6954                                  * a sub-register.
6955                                  */
6956                                 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
6957                                 if (base_type(reg_type) == PTR_TO_BTF_ID) {
6958                                         regs[value_regno].btf = btf;
6959                                         regs[value_regno].btf_id = btf_id;
6960                                 }
6961                         }
6962                         regs[value_regno].type = reg_type;
6963                 }
6964 
6965         } else if (reg->type == PTR_TO_STACK) {
6966                 /* Basic bounds checks. */
6967                 err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
6968                 if (err)
6969                         return err;
6970 
6971                 if (t == BPF_READ)
6972                         err = check_stack_read(env, regno, off, size,
6973                                                value_regno);
6974                 else
6975                         err = check_stack_write(env, regno, off, size,
6976                                                 value_regno, insn_idx);
6977         } else if (reg_is_pkt_pointer(reg)) {
6978                 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
6979                         verbose(env, "cannot write into packet\n");
6980                         return -EACCES;
6981                 }
6982                 if (t == BPF_WRITE && value_regno >= 0 &&
6983                     is_pointer_value(env, value_regno)) {
6984                         verbose(env, "R%d leaks addr into packet\n",
6985                                 value_regno);
6986                         return -EACCES;
6987                 }
6988                 err = check_packet_access(env, regno, off, size, false);
6989                 if (!err && t == BPF_READ && value_regno >= 0)
6990                         mark_reg_unknown(env, regs, value_regno);
6991         } else if (reg->type == PTR_TO_FLOW_KEYS) {
6992                 if (t == BPF_WRITE && value_regno >= 0 &&
6993                     is_pointer_value(env, value_regno)) {
6994                         verbose(env, "R%d leaks addr into flow keys\n",
6995                                 value_regno);
6996                         return -EACCES;
6997                 }
6998 
6999                 err = check_flow_keys_access(env, off, size);
7000                 if (!err && t == BPF_READ && value_regno >= 0)
7001                         mark_reg_unknown(env, regs, value_regno);
7002         } else if (type_is_sk_pointer(reg->type)) {
7003                 if (t == BPF_WRITE) {
7004                         verbose(env, "R%d cannot write into %s\n",
7005                                 regno, reg_type_str(env, reg->type));
7006                         return -EACCES;
7007                 }
7008                 err = check_sock_access(env, insn_idx, regno, off, size, t);
7009                 if (!err && value_regno >= 0)
7010                         mark_reg_unknown(env, regs, value_regno);
7011         } else if (reg->type == PTR_TO_TP_BUFFER) {
7012                 err = check_tp_buffer_access(env, reg, regno, off, size);
7013                 if (!err && t == BPF_READ && value_regno >= 0)
7014                         mark_reg_unknown(env, regs, value_regno);
7015         } else if (base_type(reg->type) == PTR_TO_BTF_ID &&
7016                    !type_may_be_null(reg->type)) {
7017                 err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
7018                                               value_regno);
7019         } else if (reg->type == CONST_PTR_TO_MAP) {
7020                 err = check_ptr_to_map_access(env, regs, regno, off, size, t,
7021                                               value_regno);
7022         } else if (base_type(reg->type) == PTR_TO_BUF) {
7023                 bool rdonly_mem = type_is_rdonly_mem(reg->type);
7024                 u32 *max_access;
7025 
7026                 if (rdonly_mem) {
7027                         if (t == BPF_WRITE) {
7028                                 verbose(env, "R%d cannot write into %s\n",
7029                                         regno, reg_type_str(env, reg->type));
7030                                 return -EACCES;
7031                         }
7032                         max_access = &env->prog->aux->max_rdonly_access;
7033                 } else {
7034                         max_access = &env->prog->aux->max_rdwr_access;
7035                 }
7036 
7037                 err = check_buffer_access(env, reg, regno, off, size, false,
7038                                           max_access);
7039 
7040                 if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
7041                         mark_reg_unknown(env, regs, value_regno);
7042         } else if (reg->type == PTR_TO_ARENA) {
7043                 if (t == BPF_READ && value_regno >= 0)
7044                         mark_reg_unknown(env, regs, value_regno);
7045         } else {
7046                 verbose(env, "R%d invalid mem access '%s'\n", regno,
7047                         reg_type_str(env, reg->type));
7048                 return -EACCES;
7049         }
7050 
7051         if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
7052             regs[value_regno].type == SCALAR_VALUE) {
7053                 if (!is_ldsx)
7054                         /* b/h/w load zero-extends, mark upper bits as known 0 */
7055                         coerce_reg_to_size(&regs[value_regno], size);
7056                 else
7057                         coerce_reg_to_size_sx(&regs[value_regno], size);
7058         }
7059         return err;
7060 }
7061 
7062 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
7063                              bool allow_trust_mismatch);
7064 
7065 static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
7066 {
7067         int load_reg;
7068         int err;
7069 
7070         switch (insn->imm) {
7071         case BPF_ADD:
7072         case BPF_ADD | BPF_FETCH:
7073         case BPF_AND:
7074         case BPF_AND | BPF_FETCH:
7075         case BPF_OR:
7076         case BPF_OR | BPF_FETCH:
7077         case BPF_XOR:
7078         case BPF_XOR | BPF_FETCH:
7079         case BPF_XCHG:
7080         case BPF_CMPXCHG:
7081                 break;
7082         default:
7083                 verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
7084                 return -EINVAL;
7085         }
7086 
7087         if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
7088                 verbose(env, "invalid atomic operand size\n");
7089                 return -EINVAL;
7090         }
7091 
7092         /* check src1 operand */
7093         err = check_reg_arg(env, insn->src_reg, SRC_OP);
7094         if (err)
7095                 return err;
7096 
7097         /* check src2 operand */
7098         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7099         if (err)
7100                 return err;
7101 
7102         if (insn->imm == BPF_CMPXCHG) {
7103                 /* Check comparison of R0 with memory location */
7104                 const u32 aux_reg = BPF_REG_0;
7105 
7106                 err = check_reg_arg(env, aux_reg, SRC_OP);
7107                 if (err)
7108                         return err;
7109 
7110                 if (is_pointer_value(env, aux_reg)) {
7111                         verbose(env, "R%d leaks addr into mem\n", aux_reg);
7112                         return -EACCES;
7113                 }
7114         }
7115 
7116         if (is_pointer_value(env, insn->src_reg)) {
7117                 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
7118                 return -EACCES;
7119         }
7120 
7121         if (is_ctx_reg(env, insn->dst_reg) ||
7122             is_pkt_reg(env, insn->dst_reg) ||
7123             is_flow_key_reg(env, insn->dst_reg) ||
7124             is_sk_reg(env, insn->dst_reg) ||
7125             (is_arena_reg(env, insn->dst_reg) && !bpf_jit_supports_insn(insn, true))) {
7126                 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
7127                         insn->dst_reg,
7128                         reg_type_str(env, reg_state(env, insn->dst_reg)->type));
7129                 return -EACCES;
7130         }
7131 
7132         if (insn->imm & BPF_FETCH) {
7133                 if (insn->imm == BPF_CMPXCHG)
7134                         load_reg = BPF_REG_0;
7135                 else
7136                         load_reg = insn->src_reg;
7137 
7138                 /* check and record load of old value */
7139                 err = check_reg_arg(env, load_reg, DST_OP);
7140                 if (err)
7141                         return err;
7142         } else {
7143                 /* This instruction accesses a memory location but doesn't
7144                  * actually load it into a register.
7145                  */
7146                 load_reg = -1;
7147         }
7148 
7149         /* Check whether we can read the memory, with second call for fetch
7150          * case to simulate the register fill.
7151          */
7152         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
7153                                BPF_SIZE(insn->code), BPF_READ, -1, true, false);
7154         if (!err && load_reg >= 0)
7155                 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
7156                                        BPF_SIZE(insn->code), BPF_READ, load_reg,
7157                                        true, false);
7158         if (err)
7159                 return err;
7160 
7161         if (is_arena_reg(env, insn->dst_reg)) {
7162                 err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
7163                 if (err)
7164                         return err;
7165         }
7166         /* Check whether we can write into the same memory. */
7167         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
7168                                BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
7169         if (err)
7170                 return err;
7171         return 0;
7172 }
7173 
7174 /* When register 'regno' is used to read the stack (either directly or through
7175  * a helper function) make sure that it's within stack boundary and, depending
7176  * on the access type and privileges, that all elements of the stack are
7177  * initialized.
7178  *
7179  * 'off' includes 'regno->off', but not its dynamic part (if any).
7180  *
7181  * All registers that have been spilled on the stack in the slots within the
7182  * read offsets are marked as read.
7183  */
7184 static int check_stack_range_initialized(
7185                 struct bpf_verifier_env *env, int regno, int off,
7186                 int access_size, bool zero_size_allowed,
7187                 enum bpf_access_src type, struct bpf_call_arg_meta *meta)
7188 {
7189         struct bpf_reg_state *reg = reg_state(env, regno);
7190         struct bpf_func_state *state = func(env, reg);
7191         int err, min_off, max_off, i, j, slot, spi;
7192         char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
7193         enum bpf_access_type bounds_check_type;
7194         /* Some accesses can write anything into the stack, others are
7195          * read-only.
7196          */
7197         bool clobber = false;
7198 
7199         if (access_size == 0 && !zero_size_allowed) {
7200                 verbose(env, "invalid zero-sized read\n");
7201                 return -EACCES;
7202         }
7203 
7204         if (type == ACCESS_HELPER) {
7205                 /* The bounds checks for writes are more permissive than for
7206                  * reads. However, if raw_mode is not set, we'll do extra
7207                  * checks below.
7208                  */
7209                 bounds_check_type = BPF_WRITE;
7210                 clobber = true;
7211         } else {
7212                 bounds_check_type = BPF_READ;
7213         }
7214         err = check_stack_access_within_bounds(env, regno, off, access_size,
7215                                                type, bounds_check_type);
7216         if (err)
7217                 return err;
7218 
7219 
7220         if (tnum_is_const(reg->var_off)) {
7221                 min_off = max_off = reg->var_off.value + off;
7222         } else {
7223                 /* Variable offset is prohibited for unprivileged mode for
7224                  * simplicity since it requires corresponding support in
7225                  * Spectre masking for stack ALU.
7226                  * See also retrieve_ptr_limit().
7227                  */
7228                 if (!env->bypass_spec_v1) {
7229                         char tn_buf[48];
7230 
7231                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7232                         verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
7233                                 regno, err_extra, tn_buf);
7234                         return -EACCES;
7235                 }
7236                 /* Only initialized buffer on stack is allowed to be accessed
7237                  * with variable offset. With uninitialized buffer it's hard to
7238                  * guarantee that whole memory is marked as initialized on
7239                  * helper return since specific bounds are unknown what may
7240                  * cause uninitialized stack leaking.
7241                  */
7242                 if (meta && meta->raw_mode)
7243                         meta = NULL;
7244 
7245                 min_off = reg->smin_value + off;
7246                 max_off = reg->smax_value + off;
7247         }
7248 
7249         if (meta && meta->raw_mode) {
7250                 /* Ensure we won't be overwriting dynptrs when simulating byte
7251                  * by byte access in check_helper_call using meta.access_size.
7252                  * This would be a problem if we have a helper in the future
7253                  * which takes:
7254                  *
7255                  *      helper(uninit_mem, len, dynptr)
7256                  *
7257                  * Now, uninint_mem may overlap with dynptr pointer. Hence, it
7258                  * may end up writing to dynptr itself when touching memory from
7259                  * arg 1. This can be relaxed on a case by case basis for known
7260                  * safe cases, but reject due to the possibilitiy of aliasing by
7261                  * default.
7262                  */
7263                 for (i = min_off; i < max_off + access_size; i++) {
7264                         int stack_off = -i - 1;
7265 
7266                         spi = __get_spi(i);
7267                         /* raw_mode may write past allocated_stack */
7268                         if (state->allocated_stack <= stack_off)
7269                                 continue;
7270                         if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
7271                                 verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
7272                                 return -EACCES;
7273                         }
7274                 }
7275                 meta->access_size = access_size;
7276                 meta->regno = regno;
7277                 return 0;
7278         }
7279 
7280         for (i = min_off; i < max_off + access_size; i++) {
7281                 u8 *stype;
7282 
7283                 slot = -i - 1;
7284                 spi = slot / BPF_REG_SIZE;
7285                 if (state->allocated_stack <= slot) {
7286                         verbose(env, "verifier bug: allocated_stack too small");
7287                         return -EFAULT;
7288                 }
7289 
7290                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
7291                 if (*stype == STACK_MISC)
7292                         goto mark;
7293                 if ((*stype == STACK_ZERO) ||
7294                     (*stype == STACK_INVALID && env->allow_uninit_stack)) {
7295                         if (clobber) {
7296                                 /* helper can write anything into the stack */
7297                                 *stype = STACK_MISC;
7298                         }
7299                         goto mark;
7300                 }
7301 
7302                 if (is_spilled_reg(&state->stack[spi]) &&
7303                     (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
7304                      env->allow_ptr_leaks)) {
7305                         if (clobber) {
7306                                 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
7307                                 for (j = 0; j < BPF_REG_SIZE; j++)
7308                                         scrub_spilled_slot(&state->stack[spi].slot_type[j]);
7309                         }
7310                         goto mark;
7311                 }
7312 
7313                 if (tnum_is_const(reg->var_off)) {
7314                         verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
7315                                 err_extra, regno, min_off, i - min_off, access_size);
7316                 } else {
7317                         char tn_buf[48];
7318 
7319                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7320                         verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
7321                                 err_extra, regno, tn_buf, i - min_off, access_size);
7322                 }
7323                 return -EACCES;
7324 mark:
7325                 /* reading any byte out of 8-byte 'spill_slot' will cause
7326                  * the whole slot to be marked as 'read'
7327                  */
7328                 mark_reg_read(env, &state->stack[spi].spilled_ptr,
7329                               state->stack[spi].spilled_ptr.parent,
7330                               REG_LIVE_READ64);
7331                 /* We do not set REG_LIVE_WRITTEN for stack slot, as we can not
7332                  * be sure that whether stack slot is written to or not. Hence,
7333                  * we must still conservatively propagate reads upwards even if
7334                  * helper may write to the entire memory range.
7335                  */
7336         }
7337         return 0;
7338 }
7339 
7340 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
7341                                    int access_size, bool zero_size_allowed,
7342                                    struct bpf_call_arg_meta *meta)
7343 {
7344         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7345         u32 *max_access;
7346 
7347         switch (base_type(reg->type)) {
7348         case PTR_TO_PACKET:
7349         case PTR_TO_PACKET_META:
7350                 return check_packet_access(env, regno, reg->off, access_size,
7351                                            zero_size_allowed);
7352         case PTR_TO_MAP_KEY:
7353                 if (meta && meta->raw_mode) {
7354                         verbose(env, "R%d cannot write into %s\n", regno,
7355                                 reg_type_str(env, reg->type));
7356                         return -EACCES;
7357                 }
7358                 return check_mem_region_access(env, regno, reg->off, access_size,
7359                                                reg->map_ptr->key_size, false);
7360         case PTR_TO_MAP_VALUE:
7361                 if (check_map_access_type(env, regno, reg->off, access_size,
7362                                           meta && meta->raw_mode ? BPF_WRITE :
7363                                           BPF_READ))
7364                         return -EACCES;
7365                 return check_map_access(env, regno, reg->off, access_size,
7366                                         zero_size_allowed, ACCESS_HELPER);
7367         case PTR_TO_MEM:
7368                 if (type_is_rdonly_mem(reg->type)) {
7369                         if (meta && meta->raw_mode) {
7370                                 verbose(env, "R%d cannot write into %s\n", regno,
7371                                         reg_type_str(env, reg->type));
7372                                 return -EACCES;
7373                         }
7374                 }
7375                 return check_mem_region_access(env, regno, reg->off,
7376                                                access_size, reg->mem_size,
7377                                                zero_size_allowed);
7378         case PTR_TO_BUF:
7379                 if (type_is_rdonly_mem(reg->type)) {
7380                         if (meta && meta->raw_mode) {
7381                                 verbose(env, "R%d cannot write into %s\n", regno,
7382                                         reg_type_str(env, reg->type));
7383                                 return -EACCES;
7384                         }
7385 
7386                         max_access = &env->prog->aux->max_rdonly_access;
7387                 } else {
7388                         max_access = &env->prog->aux->max_rdwr_access;
7389                 }
7390                 return check_buffer_access(env, reg, regno, reg->off,
7391                                            access_size, zero_size_allowed,
7392                                            max_access);
7393         case PTR_TO_STACK:
7394                 return check_stack_range_initialized(
7395                                 env,
7396                                 regno, reg->off, access_size,
7397                                 zero_size_allowed, ACCESS_HELPER, meta);
7398         case PTR_TO_BTF_ID:
7399                 return check_ptr_to_btf_access(env, regs, regno, reg->off,
7400                                                access_size, BPF_READ, -1);
7401         case PTR_TO_CTX:
7402                 /* in case the function doesn't know how to access the context,
7403                  * (because we are in a program of type SYSCALL for example), we
7404                  * can not statically check its size.
7405                  * Dynamically check it now.
7406                  */
7407                 if (!env->ops->convert_ctx_access) {
7408                         enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ;
7409                         int offset = access_size - 1;
7410 
7411                         /* Allow zero-byte read from PTR_TO_CTX */
7412                         if (access_size == 0)
7413                                 return zero_size_allowed ? 0 : -EACCES;
7414 
7415                         return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
7416                                                 atype, -1, false, false);
7417                 }
7418 
7419                 fallthrough;
7420         default: /* scalar_value or invalid ptr */
7421                 /* Allow zero-byte read from NULL, regardless of pointer type */
7422                 if (zero_size_allowed && access_size == 0 &&
7423                     register_is_null(reg))
7424                         return 0;
7425 
7426                 verbose(env, "R%d type=%s ", regno,
7427                         reg_type_str(env, reg->type));
7428                 verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
7429                 return -EACCES;
7430         }
7431 }
7432 
7433 /* verify arguments to helpers or kfuncs consisting of a pointer and an access
7434  * size.
7435  *
7436  * @regno is the register containing the access size. regno-1 is the register
7437  * containing the pointer.
7438  */
7439 static int check_mem_size_reg(struct bpf_verifier_env *env,
7440                               struct bpf_reg_state *reg, u32 regno,
7441                               bool zero_size_allowed,
7442                               struct bpf_call_arg_meta *meta)
7443 {
7444         int err;
7445 
7446         /* This is used to refine r0 return value bounds for helpers
7447          * that enforce this value as an upper bound on return values.
7448          * See do_refine_retval_range() for helpers that can refine
7449          * the return value. C type of helper is u32 so we pull register
7450          * bound from umax_value however, if negative verifier errors
7451          * out. Only upper bounds can be learned because retval is an
7452          * int type and negative retvals are allowed.
7453          */
7454         meta->msize_max_value = reg->umax_value;
7455 
7456         /* The register is SCALAR_VALUE; the access check
7457          * happens using its boundaries.
7458          */
7459         if (!tnum_is_const(reg->var_off))
7460                 /* For unprivileged variable accesses, disable raw
7461                  * mode so that the program is required to
7462                  * initialize all the memory that the helper could
7463                  * just partially fill up.
7464                  */
7465                 meta = NULL;
7466 
7467         if (reg->smin_value < 0) {
7468                 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
7469                         regno);
7470                 return -EACCES;
7471         }
7472 
7473         if (reg->umin_value == 0 && !zero_size_allowed) {
7474                 verbose(env, "R%d invalid zero-sized read: u64=[%lld,%lld]\n",
7475                         regno, reg->umin_value, reg->umax_value);
7476                 return -EACCES;
7477         }
7478 
7479         if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
7480                 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
7481                         regno);
7482                 return -EACCES;
7483         }
7484         err = check_helper_mem_access(env, regno - 1,
7485                                       reg->umax_value,
7486                                       zero_size_allowed, meta);
7487         if (!err)
7488                 err = mark_chain_precision(env, regno);
7489         return err;
7490 }
7491 
7492 static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7493                          u32 regno, u32 mem_size)
7494 {
7495         bool may_be_null = type_may_be_null(reg->type);
7496         struct bpf_reg_state saved_reg;
7497         struct bpf_call_arg_meta meta;
7498         int err;
7499 
7500         if (register_is_null(reg))
7501                 return 0;
7502 
7503         memset(&meta, 0, sizeof(meta));
7504         /* Assuming that the register contains a value check if the memory
7505          * access is safe. Temporarily save and restore the register's state as
7506          * the conversion shouldn't be visible to a caller.
7507          */
7508         if (may_be_null) {
7509                 saved_reg = *reg;
7510                 mark_ptr_not_null_reg(reg);
7511         }
7512 
7513         err = check_helper_mem_access(env, regno, mem_size, true, &meta);
7514         /* Check access for BPF_WRITE */
7515         meta.raw_mode = true;
7516         err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
7517 
7518         if (may_be_null)
7519                 *reg = saved_reg;
7520 
7521         return err;
7522 }
7523 
7524 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7525                                     u32 regno)
7526 {
7527         struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
7528         bool may_be_null = type_may_be_null(mem_reg->type);
7529         struct bpf_reg_state saved_reg;
7530         struct bpf_call_arg_meta meta;
7531         int err;
7532 
7533         WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
7534 
7535         memset(&meta, 0, sizeof(meta));
7536 
7537         if (may_be_null) {
7538                 saved_reg = *mem_reg;
7539                 mark_ptr_not_null_reg(mem_reg);
7540         }
7541 
7542         err = check_mem_size_reg(env, reg, regno, true, &meta);
7543         /* Check access for BPF_WRITE */
7544         meta.raw_mode = true;
7545         err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
7546 
7547         if (may_be_null)
7548                 *mem_reg = saved_reg;
7549         return err;
7550 }
7551 
7552 /* Implementation details:
7553  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
7554  * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
7555  * Two bpf_map_lookups (even with the same key) will have different reg->id.
7556  * Two separate bpf_obj_new will also have different reg->id.
7557  * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
7558  * clears reg->id after value_or_null->value transition, since the verifier only
7559  * cares about the range of access to valid map value pointer and doesn't care
7560  * about actual address of the map element.
7561  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
7562  * reg->id > 0 after value_or_null->value transition. By doing so
7563  * two bpf_map_lookups will be considered two different pointers that
7564  * point to different bpf_spin_locks. Likewise for pointers to allocated objects
7565  * returned from bpf_obj_new.
7566  * The verifier allows taking only one bpf_spin_lock at a time to avoid
7567  * dead-locks.
7568  * Since only one bpf_spin_lock is allowed the checks are simpler than
7569  * reg_is_refcounted() logic. The verifier needs to remember only
7570  * one spin_lock instead of array of acquired_refs.
7571  * cur_state->active_lock remembers which map value element or allocated
7572  * object got locked and clears it after bpf_spin_unlock.
7573  */
7574 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
7575                              bool is_lock)
7576 {
7577         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7578         struct bpf_verifier_state *cur = env->cur_state;
7579         bool is_const = tnum_is_const(reg->var_off);
7580         u64 val = reg->var_off.value;
7581         struct bpf_map *map = NULL;
7582         struct btf *btf = NULL;
7583         struct btf_record *rec;
7584 
7585         if (!is_const) {
7586                 verbose(env,
7587                         "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
7588                         regno);
7589                 return -EINVAL;
7590         }
7591         if (reg->type == PTR_TO_MAP_VALUE) {
7592                 map = reg->map_ptr;
7593                 if (!map->btf) {
7594                         verbose(env,
7595                                 "map '%s' has to have BTF in order to use bpf_spin_lock\n",
7596                                 map->name);
7597                         return -EINVAL;
7598                 }
7599         } else {
7600                 btf = reg->btf;
7601         }
7602 
7603         rec = reg_btf_record(reg);
7604         if (!btf_record_has_field(rec, BPF_SPIN_LOCK)) {
7605                 verbose(env, "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local",
7606                         map ? map->name : "kptr");
7607                 return -EINVAL;
7608         }
7609         if (rec->spin_lock_off != val + reg->off) {
7610                 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
7611                         val + reg->off, rec->spin_lock_off);
7612                 return -EINVAL;
7613         }
7614         if (is_lock) {
7615                 if (cur->active_lock.ptr) {
7616                         verbose(env,
7617                                 "Locking two bpf_spin_locks are not allowed\n");
7618                         return -EINVAL;
7619                 }
7620                 if (map)
7621                         cur->active_lock.ptr = map;
7622                 else
7623                         cur->active_lock.ptr = btf;
7624                 cur->active_lock.id = reg->id;
7625         } else {
7626                 void *ptr;
7627 
7628                 if (map)
7629                         ptr = map;
7630                 else
7631                         ptr = btf;
7632 
7633                 if (!cur->active_lock.ptr) {
7634                         verbose(env, "bpf_spin_unlock without taking a lock\n");
7635                         return -EINVAL;
7636                 }
7637                 if (cur->active_lock.ptr != ptr ||
7638                     cur->active_lock.id != reg->id) {
7639                         verbose(env, "bpf_spin_unlock of different lock\n");
7640                         return -EINVAL;
7641                 }
7642 
7643                 invalidate_non_owning_refs(env);
7644 
7645                 cur->active_lock.ptr = NULL;
7646                 cur->active_lock.id = 0;
7647         }
7648         return 0;
7649 }
7650 
7651 static int process_timer_func(struct bpf_verifier_env *env, int regno,
7652                               struct bpf_call_arg_meta *meta)
7653 {
7654         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7655         bool is_const = tnum_is_const(reg->var_off);
7656         struct bpf_map *map = reg->map_ptr;
7657         u64 val = reg->var_off.value;
7658 
7659         if (!is_const) {
7660                 verbose(env,
7661                         "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
7662                         regno);
7663                 return -EINVAL;
7664         }
7665         if (!map->btf) {
7666                 verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
7667                         map->name);
7668                 return -EINVAL;
7669         }
7670         if (!btf_record_has_field(map->record, BPF_TIMER)) {
7671                 verbose(env, "map '%s' has no valid bpf_timer\n", map->name);
7672                 return -EINVAL;
7673         }
7674         if (map->record->timer_off != val + reg->off) {
7675                 verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
7676                         val + reg->off, map->record->timer_off);
7677                 return -EINVAL;
7678         }
7679         if (meta->map_ptr) {
7680                 verbose(env, "verifier bug. Two map pointers in a timer helper\n");
7681                 return -EFAULT;
7682         }
7683         meta->map_uid = reg->map_uid;
7684         meta->map_ptr = map;
7685         return 0;
7686 }
7687 
7688 static int process_wq_func(struct bpf_verifier_env *env, int regno,
7689                            struct bpf_kfunc_call_arg_meta *meta)
7690 {
7691         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7692         struct bpf_map *map = reg->map_ptr;
7693         u64 val = reg->var_off.value;
7694 
7695         if (map->record->wq_off != val + reg->off) {
7696                 verbose(env, "off %lld doesn't point to 'struct bpf_wq' that is at %d\n",
7697                         val + reg->off, map->record->wq_off);
7698                 return -EINVAL;
7699         }
7700         meta->map.uid = reg->map_uid;
7701         meta->map.ptr = map;
7702         return 0;
7703 }
7704 
7705 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
7706                              struct bpf_call_arg_meta *meta)
7707 {
7708         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7709         struct bpf_map *map_ptr = reg->map_ptr;
7710         struct btf_field *kptr_field;
7711         u32 kptr_off;
7712 
7713         if (!tnum_is_const(reg->var_off)) {
7714                 verbose(env,
7715                         "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
7716                         regno);
7717                 return -EINVAL;
7718         }
7719         if (!map_ptr->btf) {
7720                 verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7721                         map_ptr->name);
7722                 return -EINVAL;
7723         }
7724         if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
7725                 verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
7726                 return -EINVAL;
7727         }
7728 
7729         meta->map_ptr = map_ptr;
7730         kptr_off = reg->off + reg->var_off.value;
7731         kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
7732         if (!kptr_field) {
7733                 verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
7734                 return -EACCES;
7735         }
7736         if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
7737                 verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
7738                 return -EACCES;
7739         }
7740         meta->kptr_field = kptr_field;
7741         return 0;
7742 }
7743 
7744 /* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
7745  * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
7746  *
7747  * In both cases we deal with the first 8 bytes, but need to mark the next 8
7748  * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
7749  * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
7750  *
7751  * Mutability of bpf_dynptr is at two levels, one is at the level of struct
7752  * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
7753  * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
7754  * mutate the view of the dynptr and also possibly destroy it. In the latter
7755  * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
7756  * memory that dynptr points to.
7757  *
7758  * The verifier will keep track both levels of mutation (bpf_dynptr's in
7759  * reg->type and the memory's in reg->dynptr.type), but there is no support for
7760  * readonly dynptr view yet, hence only the first case is tracked and checked.
7761  *
7762  * This is consistent with how C applies the const modifier to a struct object,
7763  * where the pointer itself inside bpf_dynptr becomes const but not what it
7764  * points to.
7765  *
7766  * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
7767  * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
7768  */
7769 static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
7770                                enum bpf_arg_type arg_type, int clone_ref_obj_id)
7771 {
7772         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7773         int err;
7774 
7775         if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
7776                 verbose(env,
7777                         "arg#%d expected pointer to stack or const struct bpf_dynptr\n",
7778                         regno);
7779                 return -EINVAL;
7780         }
7781 
7782         /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
7783          * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
7784          */
7785         if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
7786                 verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
7787                 return -EFAULT;
7788         }
7789 
7790         /*  MEM_UNINIT - Points to memory that is an appropriate candidate for
7791          *               constructing a mutable bpf_dynptr object.
7792          *
7793          *               Currently, this is only possible with PTR_TO_STACK
7794          *               pointing to a region of at least 16 bytes which doesn't
7795          *               contain an existing bpf_dynptr.
7796          *
7797          *  MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
7798          *               mutated or destroyed. However, the memory it points to
7799          *               may be mutated.
7800          *
7801          *  None       - Points to a initialized dynptr that can be mutated and
7802          *               destroyed, including mutation of the memory it points
7803          *               to.
7804          */
7805         if (arg_type & MEM_UNINIT) {
7806                 int i;
7807 
7808                 if (!is_dynptr_reg_valid_uninit(env, reg)) {
7809                         verbose(env, "Dynptr has to be an uninitialized dynptr\n");
7810                         return -EINVAL;
7811                 }
7812 
7813                 /* we write BPF_DW bits (8 bytes) at a time */
7814                 for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
7815                         err = check_mem_access(env, insn_idx, regno,
7816                                                i, BPF_DW, BPF_WRITE, -1, false, false);
7817                         if (err)
7818                                 return err;
7819                 }
7820 
7821                 err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id);
7822         } else /* MEM_RDONLY and None case from above */ {
7823                 /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
7824                 if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
7825                         verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
7826                         return -EINVAL;
7827                 }
7828 
7829                 if (!is_dynptr_reg_valid_init(env, reg)) {
7830                         verbose(env,
7831                                 "Expected an initialized dynptr as arg #%d\n",
7832                                 regno);
7833                         return -EINVAL;
7834                 }
7835 
7836                 /* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
7837                 if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
7838                         verbose(env,
7839                                 "Expected a dynptr of type %s as arg #%d\n",
7840                                 dynptr_type_str(arg_to_dynptr_type(arg_type)), regno);
7841                         return -EINVAL;
7842                 }
7843 
7844                 err = mark_dynptr_read(env, reg);
7845         }
7846         return err;
7847 }
7848 
7849 static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi)
7850 {
7851         struct bpf_func_state *state = func(env, reg);
7852 
7853         return state->stack[spi].spilled_ptr.ref_obj_id;
7854 }
7855 
7856 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7857 {
7858         return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
7859 }
7860 
7861 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7862 {
7863         return meta->kfunc_flags & KF_ITER_NEW;
7864 }
7865 
7866 static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7867 {
7868         return meta->kfunc_flags & KF_ITER_NEXT;
7869 }
7870 
7871 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7872 {
7873         return meta->kfunc_flags & KF_ITER_DESTROY;
7874 }
7875 
7876 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg)
7877 {
7878         /* btf_check_iter_kfuncs() guarantees that first argument of any iter
7879          * kfunc is iter state pointer
7880          */
7881         return arg == 0 && is_iter_kfunc(meta);
7882 }
7883 
7884 static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
7885                             struct bpf_kfunc_call_arg_meta *meta)
7886 {
7887         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7888         const struct btf_type *t;
7889         const struct btf_param *arg;
7890         int spi, err, i, nr_slots;
7891         u32 btf_id;
7892 
7893         /* btf_check_iter_kfuncs() ensures we don't need to validate anything here */
7894         arg = &btf_params(meta->func_proto)[0];
7895         t = btf_type_skip_modifiers(meta->btf, arg->type, NULL);        /* PTR */
7896         t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id);       /* STRUCT */
7897         nr_slots = t->size / BPF_REG_SIZE;
7898 
7899         if (is_iter_new_kfunc(meta)) {
7900                 /* bpf_iter_<type>_new() expects pointer to uninit iter state */
7901                 if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
7902                         verbose(env, "expected uninitialized iter_%s as arg #%d\n",
7903                                 iter_type_str(meta->btf, btf_id), regno);
7904                         return -EINVAL;
7905                 }
7906 
7907                 for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
7908                         err = check_mem_access(env, insn_idx, regno,
7909                                                i, BPF_DW, BPF_WRITE, -1, false, false);
7910                         if (err)
7911                                 return err;
7912                 }
7913 
7914                 err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots);
7915                 if (err)
7916                         return err;
7917         } else {
7918                 /* iter_next() or iter_destroy() expect initialized iter state*/
7919                 err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
7920                 switch (err) {
7921                 case 0:
7922                         break;
7923                 case -EINVAL:
7924                         verbose(env, "expected an initialized iter_%s as arg #%d\n",
7925                                 iter_type_str(meta->btf, btf_id), regno);
7926                         return err;
7927                 case -EPROTO:
7928                         verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
7929                         return err;
7930                 default:
7931                         return err;
7932                 }
7933 
7934                 spi = iter_get_spi(env, reg, nr_slots);
7935                 if (spi < 0)
7936                         return spi;
7937 
7938                 err = mark_iter_read(env, reg, spi, nr_slots);
7939                 if (err)
7940                         return err;
7941 
7942                 /* remember meta->iter info for process_iter_next_call() */
7943                 meta->iter.spi = spi;
7944                 meta->iter.frameno = reg->frameno;
7945                 meta->ref_obj_id = iter_ref_obj_id(env, reg, spi);
7946 
7947                 if (is_iter_destroy_kfunc(meta)) {
7948                         err = unmark_stack_slots_iter(env, reg, nr_slots);
7949                         if (err)
7950                                 return err;
7951                 }
7952         }
7953 
7954         return 0;
7955 }
7956 
7957 /* Look for a previous loop entry at insn_idx: nearest parent state
7958  * stopped at insn_idx with callsites matching those in cur->frame.
7959  */
7960 static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
7961                                                   struct bpf_verifier_state *cur,
7962                                                   int insn_idx)
7963 {
7964         struct bpf_verifier_state_list *sl;
7965         struct bpf_verifier_state *st;
7966 
7967         /* Explored states are pushed in stack order, most recent states come first */
7968         sl = *explored_state(env, insn_idx);
7969         for (; sl; sl = sl->next) {
7970                 /* If st->branches != 0 state is a part of current DFS verification path,
7971                  * hence cur & st for a loop.
7972                  */
7973                 st = &sl->state;
7974                 if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
7975                     st->dfs_depth < cur->dfs_depth)
7976                         return st;
7977         }
7978 
7979         return NULL;
7980 }
7981 
7982 static void reset_idmap_scratch(struct bpf_verifier_env *env);
7983 static bool regs_exact(const struct bpf_reg_state *rold,
7984                        const struct bpf_reg_state *rcur,
7985                        struct bpf_idmap *idmap);
7986 
7987 static void maybe_widen_reg(struct bpf_verifier_env *env,
7988                             struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
7989                             struct bpf_idmap *idmap)
7990 {
7991         if (rold->type != SCALAR_VALUE)
7992                 return;
7993         if (rold->type != rcur->type)
7994                 return;
7995         if (rold->precise || rcur->precise || regs_exact(rold, rcur, idmap))
7996                 return;
7997         __mark_reg_unknown(env, rcur);
7998 }
7999 
8000 static int widen_imprecise_scalars(struct bpf_verifier_env *env,
8001                                    struct bpf_verifier_state *old,
8002                                    struct bpf_verifier_state *cur)
8003 {
8004         struct bpf_func_state *fold, *fcur;
8005         int i, fr;
8006 
8007         reset_idmap_scratch(env);
8008         for (fr = old->curframe; fr >= 0; fr--) {
8009                 fold = old->frame[fr];
8010                 fcur = cur->frame[fr];
8011 
8012                 for (i = 0; i < MAX_BPF_REG; i++)
8013                         maybe_widen_reg(env,
8014                                         &fold->regs[i],
8015                                         &fcur->regs[i],
8016                                         &env->idmap_scratch);
8017 
8018                 for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) {
8019                         if (!is_spilled_reg(&fold->stack[i]) ||
8020                             !is_spilled_reg(&fcur->stack[i]))
8021                                 continue;
8022 
8023                         maybe_widen_reg(env,
8024                                         &fold->stack[i].spilled_ptr,
8025                                         &fcur->stack[i].spilled_ptr,
8026                                         &env->idmap_scratch);
8027                 }
8028         }
8029         return 0;
8030 }
8031 
8032 static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
8033                                                  struct bpf_kfunc_call_arg_meta *meta)
8034 {
8035         int iter_frameno = meta->iter.frameno;
8036         int iter_spi = meta->iter.spi;
8037 
8038         return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
8039 }
8040 
8041 /* process_iter_next_call() is called when verifier gets to iterator's next
8042  * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
8043  * to it as just "iter_next()" in comments below.
8044  *
8045  * BPF verifier relies on a crucial contract for any iter_next()
8046  * implementation: it should *eventually* return NULL, and once that happens
8047  * it should keep returning NULL. That is, once iterator exhausts elements to
8048  * iterate, it should never reset or spuriously return new elements.
8049  *
8050  * With the assumption of such contract, process_iter_next_call() simulates
8051  * a fork in the verifier state to validate loop logic correctness and safety
8052  * without having to simulate infinite amount of iterations.
8053  *
8054  * In current state, we first assume that iter_next() returned NULL and
8055  * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
8056  * conditions we should not form an infinite loop and should eventually reach
8057  * exit.
8058  *
8059  * Besides that, we also fork current state and enqueue it for later
8060  * verification. In a forked state we keep iterator state as ACTIVE
8061  * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
8062  * also bump iteration depth to prevent erroneous infinite loop detection
8063  * later on (see iter_active_depths_differ() comment for details). In this
8064  * state we assume that we'll eventually loop back to another iter_next()
8065  * calls (it could be in exactly same location or in some other instruction,
8066  * it doesn't matter, we don't make any unnecessary assumptions about this,
8067  * everything revolves around iterator state in a stack slot, not which
8068  * instruction is calling iter_next()). When that happens, we either will come
8069  * to iter_next() with equivalent state and can conclude that next iteration
8070  * will proceed in exactly the same way as we just verified, so it's safe to
8071  * assume that loop converges. If not, we'll go on another iteration
8072  * simulation with a different input state, until all possible starting states
8073  * are validated or we reach maximum number of instructions limit.
8074  *
8075  * This way, we will either exhaustively discover all possible input states
8076  * that iterator loop can start with and eventually will converge, or we'll
8077  * effectively regress into bounded loop simulation logic and either reach
8078  * maximum number of instructions if loop is not provably convergent, or there
8079  * is some statically known limit on number of iterations (e.g., if there is
8080  * an explicit `if n > 100 then break;` statement somewhere in the loop).
8081  *
8082  * Iteration convergence logic in is_state_visited() relies on exact
8083  * states comparison, which ignores read and precision marks.
8084  * This is necessary because read and precision marks are not finalized
8085  * while in the loop. Exact comparison might preclude convergence for
8086  * simple programs like below:
8087  *
8088  *     i = 0;
8089  *     while(iter_next(&it))
8090  *       i++;
8091  *
8092  * At each iteration step i++ would produce a new distinct state and
8093  * eventually instruction processing limit would be reached.
8094  *
8095  * To avoid such behavior speculatively forget (widen) range for
8096  * imprecise scalar registers, if those registers were not precise at the
8097  * end of the previous iteration and do not match exactly.
8098  *
8099  * This is a conservative heuristic that allows to verify wide range of programs,
8100  * however it precludes verification of programs that conjure an
8101  * imprecise value on the first loop iteration and use it as precise on a second.
8102  * For example, the following safe program would fail to verify:
8103  *
8104  *     struct bpf_num_iter it;
8105  *     int arr[10];
8106  *     int i = 0, a = 0;
8107  *     bpf_iter_num_new(&it, 0, 10);
8108  *     while (bpf_iter_num_next(&it)) {
8109  *       if (a == 0) {
8110  *         a = 1;
8111  *         i = 7; // Because i changed verifier would forget
8112  *                // it's range on second loop entry.
8113  *       } else {
8114  *         arr[i] = 42; // This would fail to verify.
8115  *       }
8116  *     }
8117  *     bpf_iter_num_destroy(&it);
8118  */
8119 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
8120                                   struct bpf_kfunc_call_arg_meta *meta)
8121 {
8122         struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
8123         struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
8124         struct bpf_reg_state *cur_iter, *queued_iter;
8125 
8126         BTF_TYPE_EMIT(struct bpf_iter);
8127 
8128         cur_iter = get_iter_from_state(cur_st, meta);
8129 
8130         if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
8131             cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
8132                 verbose(env, "verifier internal error: unexpected iterator state %d (%s)\n",
8133                         cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
8134                 return -EFAULT;
8135         }
8136 
8137         if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
8138                 /* Because iter_next() call is a checkpoint is_state_visitied()
8139                  * should guarantee parent state with same call sites and insn_idx.
8140                  */
8141                 if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
8142                     !same_callsites(cur_st->parent, cur_st)) {
8143                         verbose(env, "bug: bad parent state for iter next call");
8144                         return -EFAULT;
8145                 }
8146                 /* Note cur_st->parent in the call below, it is necessary to skip
8147                  * checkpoint created for cur_st by is_state_visited()
8148                  * right at this instruction.
8149                  */
8150                 prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
8151                 /* branch out active iter state */
8152                 queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
8153                 if (!queued_st)
8154                         return -ENOMEM;
8155 
8156                 queued_iter = get_iter_from_state(queued_st, meta);
8157                 queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
8158                 queued_iter->iter.depth++;
8159                 if (prev_st)
8160                         widen_imprecise_scalars(env, prev_st, queued_st);
8161 
8162                 queued_fr = queued_st->frame[queued_st->curframe];
8163                 mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
8164         }
8165 
8166         /* switch to DRAINED state, but keep the depth unchanged */
8167         /* mark current iter state as drained and assume returned NULL */
8168         cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
8169         __mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]);
8170 
8171         return 0;
8172 }
8173 
8174 static bool arg_type_is_mem_size(enum bpf_arg_type type)
8175 {
8176         return type == ARG_CONST_SIZE ||
8177                type == ARG_CONST_SIZE_OR_ZERO;
8178 }
8179 
8180 static bool arg_type_is_raw_mem(enum bpf_arg_type type)
8181 {
8182         return base_type(type) == ARG_PTR_TO_MEM &&
8183                type & MEM_UNINIT;
8184 }
8185 
8186 static bool arg_type_is_release(enum bpf_arg_type type)
8187 {
8188         return type & OBJ_RELEASE;
8189 }
8190 
8191 static bool arg_type_is_dynptr(enum bpf_arg_type type)
8192 {
8193         return base_type(type) == ARG_PTR_TO_DYNPTR;
8194 }
8195 
8196 static int resolve_map_arg_type(struct bpf_verifier_env *env,
8197                                  const struct bpf_call_arg_meta *meta,
8198                                  enum bpf_arg_type *arg_type)
8199 {
8200         if (!meta->map_ptr) {
8201                 /* kernel subsystem misconfigured verifier */
8202                 verbose(env, "invalid map_ptr to access map->type\n");
8203                 return -EACCES;
8204         }
8205 
8206         switch (meta->map_ptr->map_type) {
8207         case BPF_MAP_TYPE_SOCKMAP:
8208         case BPF_MAP_TYPE_SOCKHASH:
8209                 if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
8210                         *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
8211                 } else {
8212                         verbose(env, "invalid arg_type for sockmap/sockhash\n");
8213                         return -EINVAL;
8214                 }
8215                 break;
8216         case BPF_MAP_TYPE_BLOOM_FILTER:
8217                 if (meta->func_id == BPF_FUNC_map_peek_elem)
8218                         *arg_type = ARG_PTR_TO_MAP_VALUE;
8219                 break;
8220         default:
8221                 break;
8222         }
8223         return 0;
8224 }
8225 
8226 struct bpf_reg_types {
8227         const enum bpf_reg_type types[10];
8228         u32 *btf_id;
8229 };
8230 
8231 static const struct bpf_reg_types sock_types = {
8232         .types = {
8233                 PTR_TO_SOCK_COMMON,
8234                 PTR_TO_SOCKET,
8235                 PTR_TO_TCP_SOCK,
8236                 PTR_TO_XDP_SOCK,
8237         },
8238 };
8239 
8240 #ifdef CONFIG_NET
8241 static const struct bpf_reg_types btf_id_sock_common_types = {
8242         .types = {
8243                 PTR_TO_SOCK_COMMON,
8244                 PTR_TO_SOCKET,
8245                 PTR_TO_TCP_SOCK,
8246                 PTR_TO_XDP_SOCK,
8247                 PTR_TO_BTF_ID,
8248                 PTR_TO_BTF_ID | PTR_TRUSTED,
8249         },
8250         .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
8251 };
8252 #endif
8253 
8254 static const struct bpf_reg_types mem_types = {
8255         .types = {
8256                 PTR_TO_STACK,
8257                 PTR_TO_PACKET,
8258                 PTR_TO_PACKET_META,
8259                 PTR_TO_MAP_KEY,
8260                 PTR_TO_MAP_VALUE,
8261                 PTR_TO_MEM,
8262                 PTR_TO_MEM | MEM_RINGBUF,
8263                 PTR_TO_BUF,
8264                 PTR_TO_BTF_ID | PTR_TRUSTED,
8265         },
8266 };
8267 
8268 static const struct bpf_reg_types spin_lock_types = {
8269         .types = {
8270                 PTR_TO_MAP_VALUE,
8271                 PTR_TO_BTF_ID | MEM_ALLOC,
8272         }
8273 };
8274 
8275 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
8276 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
8277 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
8278 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
8279 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
8280 static const struct bpf_reg_types btf_ptr_types = {
8281         .types = {
8282                 PTR_TO_BTF_ID,
8283                 PTR_TO_BTF_ID | PTR_TRUSTED,
8284                 PTR_TO_BTF_ID | MEM_RCU,
8285         },
8286 };
8287 static const struct bpf_reg_types percpu_btf_ptr_types = {
8288         .types = {
8289                 PTR_TO_BTF_ID | MEM_PERCPU,
8290                 PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU,
8291                 PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
8292         }
8293 };
8294 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
8295 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
8296 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
8297 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
8298 static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
8299 static const struct bpf_reg_types dynptr_types = {
8300         .types = {
8301                 PTR_TO_STACK,
8302                 CONST_PTR_TO_DYNPTR,
8303         }
8304 };
8305 
8306 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
8307         [ARG_PTR_TO_MAP_KEY]            = &mem_types,
8308         [ARG_PTR_TO_MAP_VALUE]          = &mem_types,
8309         [ARG_CONST_SIZE]                = &scalar_types,
8310         [ARG_CONST_SIZE_OR_ZERO]        = &scalar_types,
8311         [ARG_CONST_ALLOC_SIZE_OR_ZERO]  = &scalar_types,
8312         [ARG_CONST_MAP_PTR]             = &const_map_ptr_types,
8313         [ARG_PTR_TO_CTX]                = &context_types,
8314         [ARG_PTR_TO_SOCK_COMMON]        = &sock_types,
8315 #ifdef CONFIG_NET
8316         [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
8317 #endif
8318         [ARG_PTR_TO_SOCKET]             = &fullsock_types,
8319         [ARG_PTR_TO_BTF_ID]             = &btf_ptr_types,
8320         [ARG_PTR_TO_SPIN_LOCK]          = &spin_lock_types,
8321         [ARG_PTR_TO_MEM]                = &mem_types,
8322         [ARG_PTR_TO_RINGBUF_MEM]        = &ringbuf_mem_types,
8323         [ARG_PTR_TO_PERCPU_BTF_ID]      = &percpu_btf_ptr_types,
8324         [ARG_PTR_TO_FUNC]               = &func_ptr_types,
8325         [ARG_PTR_TO_STACK]              = &stack_ptr_types,
8326         [ARG_PTR_TO_CONST_STR]          = &const_str_ptr_types,
8327         [ARG_PTR_TO_TIMER]              = &timer_types,
8328         [ARG_PTR_TO_KPTR]               = &kptr_types,
8329         [ARG_PTR_TO_DYNPTR]             = &dynptr_types,
8330 };
8331 
8332 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
8333                           enum bpf_arg_type arg_type,
8334                           const u32 *arg_btf_id,
8335                           struct bpf_call_arg_meta *meta)
8336 {
8337         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
8338         enum bpf_reg_type expected, type = reg->type;
8339         const struct bpf_reg_types *compatible;
8340         int i, j;
8341 
8342         compatible = compatible_reg_types[base_type(arg_type)];
8343         if (!compatible) {
8344                 verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
8345                 return -EFAULT;
8346         }
8347 
8348         /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
8349          * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
8350          *
8351          * Same for MAYBE_NULL:
8352          *
8353          * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
8354          * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
8355          *
8356          * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
8357          *
8358          * Therefore we fold these flags depending on the arg_type before comparison.
8359          */
8360         if (arg_type & MEM_RDONLY)
8361                 type &= ~MEM_RDONLY;
8362         if (arg_type & PTR_MAYBE_NULL)
8363                 type &= ~PTR_MAYBE_NULL;
8364         if (base_type(arg_type) == ARG_PTR_TO_MEM)
8365                 type &= ~DYNPTR_TYPE_FLAG_MASK;
8366 
8367         if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) {
8368                 type &= ~MEM_ALLOC;
8369                 type &= ~MEM_PERCPU;
8370         }
8371 
8372         for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
8373                 expected = compatible->types[i];
8374                 if (expected == NOT_INIT)
8375                         break;
8376 
8377                 if (type == expected)
8378                         goto found;
8379         }
8380 
8381         verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
8382         for (j = 0; j + 1 < i; j++)
8383                 verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
8384         verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
8385         return -EACCES;
8386 
8387 found:
8388         if (base_type(reg->type) != PTR_TO_BTF_ID)
8389                 return 0;
8390 
8391         if (compatible == &mem_types) {
8392                 if (!(arg_type & MEM_RDONLY)) {
8393                         verbose(env,
8394                                 "%s() may write into memory pointed by R%d type=%s\n",
8395                                 func_id_name(meta->func_id),
8396                                 regno, reg_type_str(env, reg->type));
8397                         return -EACCES;
8398                 }
8399                 return 0;
8400         }
8401 
8402         switch ((int)reg->type) {
8403         case PTR_TO_BTF_ID:
8404         case PTR_TO_BTF_ID | PTR_TRUSTED:
8405         case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL:
8406         case PTR_TO_BTF_ID | MEM_RCU:
8407         case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
8408         case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
8409         {
8410                 /* For bpf_sk_release, it needs to match against first member
8411                  * 'struct sock_common', hence make an exception for it. This
8412                  * allows bpf_sk_release to work for multiple socket types.
8413                  */
8414                 bool strict_type_match = arg_type_is_release(arg_type) &&
8415                                          meta->func_id != BPF_FUNC_sk_release;
8416 
8417                 if (type_may_be_null(reg->type) &&
8418                     (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
8419                         verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno);
8420                         return -EACCES;
8421                 }
8422 
8423                 if (!arg_btf_id) {
8424                         if (!compatible->btf_id) {
8425                                 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
8426                                 return -EFAULT;
8427                         }
8428                         arg_btf_id = compatible->btf_id;
8429                 }
8430 
8431                 if (meta->func_id == BPF_FUNC_kptr_xchg) {
8432                         if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
8433                                 return -EACCES;
8434                 } else {
8435                         if (arg_btf_id == BPF_PTR_POISON) {
8436                                 verbose(env, "verifier internal error:");
8437                                 verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n",
8438                                         regno);
8439                                 return -EACCES;
8440                         }
8441 
8442                         if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
8443                                                   btf_vmlinux, *arg_btf_id,
8444                                                   strict_type_match)) {
8445                                 verbose(env, "R%d is of type %s but %s is expected\n",
8446                                         regno, btf_type_name(reg->btf, reg->btf_id),
8447                                         btf_type_name(btf_vmlinux, *arg_btf_id));
8448                                 return -EACCES;
8449                         }
8450                 }
8451                 break;
8452         }
8453         case PTR_TO_BTF_ID | MEM_ALLOC:
8454         case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
8455                 if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
8456                     meta->func_id != BPF_FUNC_kptr_xchg) {
8457                         verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
8458                         return -EFAULT;
8459                 }
8460                 if (meta->func_id == BPF_FUNC_kptr_xchg) {
8461                         if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
8462                                 return -EACCES;
8463                 }
8464                 break;
8465         case PTR_TO_BTF_ID | MEM_PERCPU:
8466         case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU:
8467         case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
8468                 /* Handled by helper specific checks */
8469                 break;
8470         default:
8471                 verbose(env, "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
8472                 return -EFAULT;
8473         }
8474         return 0;
8475 }
8476 
8477 static struct btf_field *
8478 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
8479 {
8480         struct btf_field *field;
8481         struct btf_record *rec;
8482 
8483         rec = reg_btf_record(reg);
8484         if (!rec)
8485                 return NULL;
8486 
8487         field = btf_record_find(rec, off, fields);
8488         if (!field)
8489                 return NULL;
8490 
8491         return field;
8492 }
8493 
8494 static int check_func_arg_reg_off(struct bpf_verifier_env *env,
8495                                   const struct bpf_reg_state *reg, int regno,
8496                                   enum bpf_arg_type arg_type)
8497 {
8498         u32 type = reg->type;
8499 
8500         /* When referenced register is passed to release function, its fixed
8501          * offset must be 0.
8502          *
8503          * We will check arg_type_is_release reg has ref_obj_id when storing
8504          * meta->release_regno.
8505          */
8506         if (arg_type_is_release(arg_type)) {
8507                 /* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
8508                  * may not directly point to the object being released, but to
8509                  * dynptr pointing to such object, which might be at some offset
8510                  * on the stack. In that case, we simply to fallback to the
8511                  * default handling.
8512                  */
8513                 if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
8514                         return 0;
8515 
8516                 /* Doing check_ptr_off_reg check for the offset will catch this
8517                  * because fixed_off_ok is false, but checking here allows us
8518                  * to give the user a better error message.
8519                  */
8520                 if (reg->off) {
8521                         verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
8522                                 regno);
8523                         return -EINVAL;
8524                 }
8525                 return __check_ptr_off_reg(env, reg, regno, false);
8526         }
8527 
8528         switch (type) {
8529         /* Pointer types where both fixed and variable offset is explicitly allowed: */
8530         case PTR_TO_STACK:
8531         case PTR_TO_PACKET:
8532         case PTR_TO_PACKET_META:
8533         case PTR_TO_MAP_KEY:
8534         case PTR_TO_MAP_VALUE:
8535         case PTR_TO_MEM:
8536         case PTR_TO_MEM | MEM_RDONLY:
8537         case PTR_TO_MEM | MEM_RINGBUF:
8538         case PTR_TO_BUF:
8539         case PTR_TO_BUF | MEM_RDONLY:
8540         case PTR_TO_ARENA:
8541         case SCALAR_VALUE:
8542                 return 0;
8543         /* All the rest must be rejected, except PTR_TO_BTF_ID which allows
8544          * fixed offset.
8545          */
8546         case PTR_TO_BTF_ID:
8547         case PTR_TO_BTF_ID | MEM_ALLOC:
8548         case PTR_TO_BTF_ID | PTR_TRUSTED:
8549         case PTR_TO_BTF_ID | MEM_RCU:
8550         case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
8551         case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
8552                 /* When referenced PTR_TO_BTF_ID is passed to release function,
8553                  * its fixed offset must be 0. In the other cases, fixed offset
8554                  * can be non-zero. This was already checked above. So pass
8555                  * fixed_off_ok as true to allow fixed offset for all other
8556                  * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
8557                  * still need to do checks instead of returning.
8558                  */
8559                 return __check_ptr_off_reg(env, reg, regno, true);
8560         default:
8561                 return __check_ptr_off_reg(env, reg, regno, false);
8562         }
8563 }
8564 
8565 static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
8566                                                 const struct bpf_func_proto *fn,
8567                                                 struct bpf_reg_state *regs)
8568 {
8569         struct bpf_reg_state *state = NULL;
8570         int i;
8571 
8572         for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
8573                 if (arg_type_is_dynptr(fn->arg_type[i])) {
8574                         if (state) {
8575                                 verbose(env, "verifier internal error: multiple dynptr args\n");
8576                                 return NULL;
8577                         }
8578                         state = &regs[BPF_REG_1 + i];
8579                 }
8580 
8581         if (!state)
8582                 verbose(env, "verifier internal error: no dynptr arg found\n");
8583 
8584         return state;
8585 }
8586 
8587 static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
8588 {
8589         struct bpf_func_state *state = func(env, reg);
8590         int spi;
8591 
8592         if (reg->type == CONST_PTR_TO_DYNPTR)
8593                 return reg->id;
8594         spi = dynptr_get_spi(env, reg);
8595         if (spi < 0)
8596                 return spi;
8597         return state->stack[spi].spilled_ptr.id;
8598 }
8599 
8600 static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
8601 {
8602         struct bpf_func_state *state = func(env, reg);
8603         int spi;
8604 
8605         if (reg->type == CONST_PTR_TO_DYNPTR)
8606                 return reg->ref_obj_id;
8607         spi = dynptr_get_spi(env, reg);
8608         if (spi < 0)
8609                 return spi;
8610         return state->stack[spi].spilled_ptr.ref_obj_id;
8611 }
8612 
8613 static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
8614                                             struct bpf_reg_state *reg)
8615 {
8616         struct bpf_func_state *state = func(env, reg);
8617         int spi;
8618 
8619         if (reg->type == CONST_PTR_TO_DYNPTR)
8620                 return reg->dynptr.type;
8621 
8622         spi = __get_spi(reg->off);
8623         if (spi < 0) {
8624                 verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
8625                 return BPF_DYNPTR_TYPE_INVALID;
8626         }
8627 
8628         return state->stack[spi].spilled_ptr.dynptr.type;
8629 }
8630 
8631 static int check_reg_const_str(struct bpf_verifier_env *env,
8632                                struct bpf_reg_state *reg, u32 regno)
8633 {
8634         struct bpf_map *map = reg->map_ptr;
8635         int err;
8636         int map_off;
8637         u64 map_addr;
8638         char *str_ptr;
8639 
8640         if (reg->type != PTR_TO_MAP_VALUE)
8641                 return -EINVAL;
8642 
8643         if (!bpf_map_is_rdonly(map)) {
8644                 verbose(env, "R%d does not point to a readonly map'\n", regno);
8645                 return -EACCES;
8646         }
8647 
8648         if (!tnum_is_const(reg->var_off)) {
8649                 verbose(env, "R%d is not a constant address'\n", regno);
8650                 return -EACCES;
8651         }
8652 
8653         if (!map->ops->map_direct_value_addr) {
8654                 verbose(env, "no direct value access support for this map type\n");
8655                 return -EACCES;
8656         }
8657 
8658         err = check_map_access(env, regno, reg->off,
8659                                map->value_size - reg->off, false,
8660                                ACCESS_HELPER);
8661         if (err)
8662                 return err;
8663 
8664         map_off = reg->off + reg->var_off.value;
8665         err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
8666         if (err) {
8667                 verbose(env, "direct value access on string failed\n");
8668                 return err;
8669         }
8670 
8671         str_ptr = (char *)(long)(map_addr);
8672         if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
8673                 verbose(env, "string is not zero-terminated\n");
8674                 return -EINVAL;
8675         }
8676         return 0;
8677 }
8678 
8679 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
8680                           struct bpf_call_arg_meta *meta,
8681                           const struct bpf_func_proto *fn,
8682                           int insn_idx)
8683 {
8684         u32 regno = BPF_REG_1 + arg;
8685         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
8686         enum bpf_arg_type arg_type = fn->arg_type[arg];
8687         enum bpf_reg_type type = reg->type;
8688         u32 *arg_btf_id = NULL;
8689         int err = 0;
8690 
8691         if (arg_type == ARG_DONTCARE)
8692                 return 0;
8693 
8694         err = check_reg_arg(env, regno, SRC_OP);
8695         if (err)
8696                 return err;
8697 
8698         if (arg_type == ARG_ANYTHING) {
8699                 if (is_pointer_value(env, regno)) {
8700                         verbose(env, "R%d leaks addr into helper function\n",
8701                                 regno);
8702                         return -EACCES;
8703                 }
8704                 return 0;
8705         }
8706 
8707         if (type_is_pkt_pointer(type) &&
8708             !may_access_direct_pkt_data(env, meta, BPF_READ)) {
8709                 verbose(env, "helper access to the packet is not allowed\n");
8710                 return -EACCES;
8711         }
8712 
8713         if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
8714                 err = resolve_map_arg_type(env, meta, &arg_type);
8715                 if (err)
8716                         return err;
8717         }
8718 
8719         if (register_is_null(reg) && type_may_be_null(arg_type))
8720                 /* A NULL register has a SCALAR_VALUE type, so skip
8721                  * type checking.
8722                  */
8723                 goto skip_type_check;
8724 
8725         /* arg_btf_id and arg_size are in a union. */
8726         if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
8727             base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
8728                 arg_btf_id = fn->arg_btf_id[arg];
8729 
8730         err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
8731         if (err)
8732                 return err;
8733 
8734         err = check_func_arg_reg_off(env, reg, regno, arg_type);
8735         if (err)
8736                 return err;
8737 
8738 skip_type_check:
8739         if (arg_type_is_release(arg_type)) {
8740                 if (arg_type_is_dynptr(arg_type)) {
8741                         struct bpf_func_state *state = func(env, reg);
8742                         int spi;
8743 
8744                         /* Only dynptr created on stack can be released, thus
8745                          * the get_spi and stack state checks for spilled_ptr
8746                          * should only be done before process_dynptr_func for
8747                          * PTR_TO_STACK.
8748                          */
8749                         if (reg->type == PTR_TO_STACK) {
8750                                 spi = dynptr_get_spi(env, reg);
8751                                 if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) {
8752                                         verbose(env, "arg %d is an unacquired reference\n", regno);
8753                                         return -EINVAL;
8754                                 }
8755                         } else {
8756                                 verbose(env, "cannot release unowned const bpf_dynptr\n");
8757                                 return -EINVAL;
8758                         }
8759                 } else if (!reg->ref_obj_id && !register_is_null(reg)) {
8760                         verbose(env, "R%d must be referenced when passed to release function\n",
8761                                 regno);
8762                         return -EINVAL;
8763                 }
8764                 if (meta->release_regno) {
8765                         verbose(env, "verifier internal error: more than one release argument\n");
8766                         return -EFAULT;
8767                 }
8768                 meta->release_regno = regno;
8769         }
8770 
8771         if (reg->ref_obj_id) {
8772                 if (meta->ref_obj_id) {
8773                         verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
8774                                 regno, reg->ref_obj_id,
8775                                 meta->ref_obj_id);
8776                         return -EFAULT;
8777                 }
8778                 meta->ref_obj_id = reg->ref_obj_id;
8779         }
8780 
8781         switch (base_type(arg_type)) {
8782         case ARG_CONST_MAP_PTR:
8783                 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
8784                 if (meta->map_ptr) {
8785                         /* Use map_uid (which is unique id of inner map) to reject:
8786                          * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
8787                          * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
8788                          * if (inner_map1 && inner_map2) {
8789                          *     timer = bpf_map_lookup_elem(inner_map1);
8790                          *     if (timer)
8791                          *         // mismatch would have been allowed
8792                          *         bpf_timer_init(timer, inner_map2);
8793                          * }
8794                          *
8795                          * Comparing map_ptr is enough to distinguish normal and outer maps.
8796                          */
8797                         if (meta->map_ptr != reg->map_ptr ||
8798                             meta->map_uid != reg->map_uid) {
8799                                 verbose(env,
8800                                         "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
8801                                         meta->map_uid, reg->map_uid);
8802                                 return -EINVAL;
8803                         }
8804                 }
8805                 meta->map_ptr = reg->map_ptr;
8806                 meta->map_uid = reg->map_uid;
8807                 break;
8808         case ARG_PTR_TO_MAP_KEY:
8809                 /* bpf_map_xxx(..., map_ptr, ..., key) call:
8810                  * check that [key, key + map->key_size) are within
8811                  * stack limits and initialized
8812                  */
8813                 if (!meta->map_ptr) {
8814                         /* in function declaration map_ptr must come before
8815                          * map_key, so that it's verified and known before
8816                          * we have to check map_key here. Otherwise it means
8817                          * that kernel subsystem misconfigured verifier
8818                          */
8819                         verbose(env, "invalid map_ptr to access map->key\n");
8820                         return -EACCES;
8821                 }
8822                 err = check_helper_mem_access(env, regno,
8823                                               meta->map_ptr->key_size, false,
8824                                               NULL);
8825                 break;
8826         case ARG_PTR_TO_MAP_VALUE:
8827                 if (type_may_be_null(arg_type) && register_is_null(reg))
8828                         return 0;
8829 
8830                 /* bpf_map_xxx(..., map_ptr, ..., value) call:
8831                  * check [value, value + map->value_size) validity
8832                  */
8833                 if (!meta->map_ptr) {
8834                         /* kernel subsystem misconfigured verifier */
8835                         verbose(env, "invalid map_ptr to access map->value\n");
8836                         return -EACCES;
8837                 }
8838                 meta->raw_mode = arg_type & MEM_UNINIT;
8839                 err = check_helper_mem_access(env, regno,
8840                                               meta->map_ptr->value_size, false,
8841                                               meta);
8842                 break;
8843         case ARG_PTR_TO_PERCPU_BTF_ID:
8844                 if (!reg->btf_id) {
8845                         verbose(env, "Helper has invalid btf_id in R%d\n", regno);
8846                         return -EACCES;
8847                 }
8848                 meta->ret_btf = reg->btf;
8849                 meta->ret_btf_id = reg->btf_id;
8850                 break;
8851         case ARG_PTR_TO_SPIN_LOCK:
8852                 if (in_rbtree_lock_required_cb(env)) {
8853                         verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
8854                         return -EACCES;
8855                 }
8856                 if (meta->func_id == BPF_FUNC_spin_lock) {
8857                         err = process_spin_lock(env, regno, true);
8858                         if (err)
8859                                 return err;
8860                 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
8861                         err = process_spin_lock(env, regno, false);
8862                         if (err)
8863                                 return err;
8864                 } else {
8865                         verbose(env, "verifier internal error\n");
8866                         return -EFAULT;
8867                 }
8868                 break;
8869         case ARG_PTR_TO_TIMER:
8870                 err = process_timer_func(env, regno, meta);
8871                 if (err)
8872                         return err;
8873                 break;
8874         case ARG_PTR_TO_FUNC:
8875                 meta->subprogno = reg->subprogno;
8876                 break;
8877         case ARG_PTR_TO_MEM:
8878                 /* The access to this pointer is only checked when we hit the
8879                  * next is_mem_size argument below.
8880                  */
8881                 meta->raw_mode = arg_type & MEM_UNINIT;
8882                 if (arg_type & MEM_FIXED_SIZE) {
8883                         err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta);
8884                         if (err)
8885                                 return err;
8886                         if (arg_type & MEM_ALIGNED)
8887                                 err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
8888                 }
8889                 break;
8890         case ARG_CONST_SIZE:
8891                 err = check_mem_size_reg(env, reg, regno, false, meta);
8892                 break;
8893         case ARG_CONST_SIZE_OR_ZERO:
8894                 err = check_mem_size_reg(env, reg, regno, true, meta);
8895                 break;
8896         case ARG_PTR_TO_DYNPTR:
8897                 err = process_dynptr_func(env, regno, insn_idx, arg_type, 0);
8898                 if (err)
8899                         return err;
8900                 break;
8901         case ARG_CONST_ALLOC_SIZE_OR_ZERO:
8902                 if (!tnum_is_const(reg->var_off)) {
8903                         verbose(env, "R%d is not a known constant'\n",
8904                                 regno);
8905                         return -EACCES;
8906                 }
8907                 meta->mem_size = reg->var_off.value;
8908                 err = mark_chain_precision(env, regno);
8909                 if (err)
8910                         return err;
8911                 break;
8912         case ARG_PTR_TO_CONST_STR:
8913         {
8914                 err = check_reg_const_str(env, reg, regno);
8915                 if (err)
8916                         return err;
8917                 break;
8918         }
8919         case ARG_PTR_TO_KPTR:
8920                 err = process_kptr_func(env, regno, meta);
8921                 if (err)
8922                         return err;
8923                 break;
8924         }
8925 
8926         return err;
8927 }
8928 
8929 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
8930 {
8931         enum bpf_attach_type eatype = env->prog->expected_attach_type;
8932         enum bpf_prog_type type = resolve_prog_type(env->prog);
8933 
8934         if (func_id != BPF_FUNC_map_update_elem &&
8935             func_id != BPF_FUNC_map_delete_elem)
8936                 return false;
8937 
8938         /* It's not possible to get access to a locked struct sock in these
8939          * contexts, so updating is safe.
8940          */
8941         switch (type) {
8942         case BPF_PROG_TYPE_TRACING:
8943                 if (eatype == BPF_TRACE_ITER)
8944                         return true;
8945                 break;
8946         case BPF_PROG_TYPE_SOCK_OPS:
8947                 /* map_update allowed only via dedicated helpers with event type checks */
8948                 if (func_id == BPF_FUNC_map_delete_elem)
8949                         return true;
8950                 break;
8951         case BPF_PROG_TYPE_SOCKET_FILTER:
8952         case BPF_PROG_TYPE_SCHED_CLS:
8953         case BPF_PROG_TYPE_SCHED_ACT:
8954         case BPF_PROG_TYPE_XDP:
8955         case BPF_PROG_TYPE_SK_REUSEPORT:
8956         case BPF_PROG_TYPE_FLOW_DISSECTOR:
8957         case BPF_PROG_TYPE_SK_LOOKUP:
8958                 return true;
8959         default:
8960                 break;
8961         }
8962 
8963         verbose(env, "cannot update sockmap in this context\n");
8964         return false;
8965 }
8966 
8967 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
8968 {
8969         return env->prog->jit_requested &&
8970                bpf_jit_supports_subprog_tailcalls();
8971 }
8972 
8973 static int check_map_func_compatibility(struct bpf_verifier_env *env,
8974                                         struct bpf_map *map, int func_id)
8975 {
8976         if (!map)
8977                 return 0;
8978 
8979         /* We need a two way check, first is from map perspective ... */
8980         switch (map->map_type) {
8981         case BPF_MAP_TYPE_PROG_ARRAY:
8982                 if (func_id != BPF_FUNC_tail_call)
8983                         goto error;
8984                 break;
8985         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
8986                 if (func_id != BPF_FUNC_perf_event_read &&
8987                     func_id != BPF_FUNC_perf_event_output &&
8988                     func_id != BPF_FUNC_skb_output &&
8989                     func_id != BPF_FUNC_perf_event_read_value &&
8990                     func_id != BPF_FUNC_xdp_output)
8991                         goto error;
8992                 break;
8993         case BPF_MAP_TYPE_RINGBUF:
8994                 if (func_id != BPF_FUNC_ringbuf_output &&
8995                     func_id != BPF_FUNC_ringbuf_reserve &&
8996                     func_id != BPF_FUNC_ringbuf_query &&
8997                     func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
8998                     func_id != BPF_FUNC_ringbuf_submit_dynptr &&
8999                     func_id != BPF_FUNC_ringbuf_discard_dynptr)
9000                         goto error;
9001                 break;
9002         case BPF_MAP_TYPE_USER_RINGBUF:
9003                 if (func_id != BPF_FUNC_user_ringbuf_drain)
9004                         goto error;
9005                 break;
9006         case BPF_MAP_TYPE_STACK_TRACE:
9007                 if (func_id != BPF_FUNC_get_stackid)
9008                         goto error;
9009                 break;
9010         case BPF_MAP_TYPE_CGROUP_ARRAY:
9011                 if (func_id != BPF_FUNC_skb_under_cgroup &&
9012                     func_id != BPF_FUNC_current_task_under_cgroup)
9013                         goto error;
9014                 break;
9015         case BPF_MAP_TYPE_CGROUP_STORAGE:
9016         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
9017                 if (func_id != BPF_FUNC_get_local_storage)
9018                         goto error;
9019                 break;
9020         case BPF_MAP_TYPE_DEVMAP:
9021         case BPF_MAP_TYPE_DEVMAP_HASH:
9022                 if (func_id != BPF_FUNC_redirect_map &&
9023                     func_id != BPF_FUNC_map_lookup_elem)
9024                         goto error;
9025                 break;
9026         /* Restrict bpf side of cpumap and xskmap, open when use-cases
9027          * appear.
9028          */
9029         case BPF_MAP_TYPE_CPUMAP:
9030                 if (func_id != BPF_FUNC_redirect_map)
9031                         goto error;
9032                 break;
9033         case BPF_MAP_TYPE_XSKMAP:
9034                 if (func_id != BPF_FUNC_redirect_map &&
9035                     func_id != BPF_FUNC_map_lookup_elem)
9036                         goto error;
9037                 break;
9038         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
9039         case BPF_MAP_TYPE_HASH_OF_MAPS:
9040                 if (func_id != BPF_FUNC_map_lookup_elem)
9041                         goto error;
9042                 break;
9043         case BPF_MAP_TYPE_SOCKMAP:
9044                 if (func_id != BPF_FUNC_sk_redirect_map &&
9045                     func_id != BPF_FUNC_sock_map_update &&
9046                     func_id != BPF_FUNC_msg_redirect_map &&
9047                     func_id != BPF_FUNC_sk_select_reuseport &&
9048                     func_id != BPF_FUNC_map_lookup_elem &&
9049                     !may_update_sockmap(env, func_id))
9050                         goto error;
9051                 break;
9052         case BPF_MAP_TYPE_SOCKHASH:
9053                 if (func_id != BPF_FUNC_sk_redirect_hash &&
9054                     func_id != BPF_FUNC_sock_hash_update &&
9055                     func_id != BPF_FUNC_msg_redirect_hash &&
9056                     func_id != BPF_FUNC_sk_select_reuseport &&
9057                     func_id != BPF_FUNC_map_lookup_elem &&
9058                     !may_update_sockmap(env, func_id))
9059                         goto error;
9060                 break;
9061         case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
9062                 if (func_id != BPF_FUNC_sk_select_reuseport)
9063                         goto error;
9064                 break;
9065         case BPF_MAP_TYPE_QUEUE:
9066         case BPF_MAP_TYPE_STACK:
9067                 if (func_id != BPF_FUNC_map_peek_elem &&
9068                     func_id != BPF_FUNC_map_pop_elem &&
9069                     func_id != BPF_FUNC_map_push_elem)
9070                         goto error;
9071                 break;
9072         case BPF_MAP_TYPE_SK_STORAGE:
9073                 if (func_id != BPF_FUNC_sk_storage_get &&
9074                     func_id != BPF_FUNC_sk_storage_delete &&
9075                     func_id != BPF_FUNC_kptr_xchg)
9076                         goto error;
9077                 break;
9078         case BPF_MAP_TYPE_INODE_STORAGE:
9079                 if (func_id != BPF_FUNC_inode_storage_get &&
9080                     func_id != BPF_FUNC_inode_storage_delete &&
9081                     func_id != BPF_FUNC_kptr_xchg)
9082                         goto error;
9083                 break;
9084         case BPF_MAP_TYPE_TASK_STORAGE:
9085                 if (func_id != BPF_FUNC_task_storage_get &&
9086                     func_id != BPF_FUNC_task_storage_delete &&
9087                     func_id != BPF_FUNC_kptr_xchg)
9088                         goto error;
9089                 break;
9090         case BPF_MAP_TYPE_CGRP_STORAGE:
9091                 if (func_id != BPF_FUNC_cgrp_storage_get &&
9092                     func_id != BPF_FUNC_cgrp_storage_delete &&
9093                     func_id != BPF_FUNC_kptr_xchg)
9094                         goto error;
9095                 break;
9096         case BPF_MAP_TYPE_BLOOM_FILTER:
9097                 if (func_id != BPF_FUNC_map_peek_elem &&
9098                     func_id != BPF_FUNC_map_push_elem)
9099                         goto error;
9100                 break;
9101         default:
9102                 break;
9103         }
9104 
9105         /* ... and second from the function itself. */
9106         switch (func_id) {
9107         case BPF_FUNC_tail_call:
9108                 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
9109                         goto error;
9110                 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
9111                         verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
9112                         return -EINVAL;
9113                 }
9114                 break;
9115         case BPF_FUNC_perf_event_read:
9116         case BPF_FUNC_perf_event_output:
9117         case BPF_FUNC_perf_event_read_value:
9118         case BPF_FUNC_skb_output:
9119         case BPF_FUNC_xdp_output:
9120                 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
9121                         goto error;
9122                 break;
9123         case BPF_FUNC_ringbuf_output:
9124         case BPF_FUNC_ringbuf_reserve:
9125         case BPF_FUNC_ringbuf_query:
9126         case BPF_FUNC_ringbuf_reserve_dynptr:
9127         case BPF_FUNC_ringbuf_submit_dynptr:
9128         case BPF_FUNC_ringbuf_discard_dynptr:
9129                 if (map->map_type != BPF_MAP_TYPE_RINGBUF)
9130                         goto error;
9131                 break;
9132         case BPF_FUNC_user_ringbuf_drain:
9133                 if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
9134                         goto error;
9135                 break;
9136         case BPF_FUNC_get_stackid:
9137                 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
9138                         goto error;
9139                 break;
9140         case BPF_FUNC_current_task_under_cgroup:
9141         case BPF_FUNC_skb_under_cgroup:
9142                 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
9143                         goto error;
9144                 break;
9145         case BPF_FUNC_redirect_map:
9146                 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
9147                     map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
9148                     map->map_type != BPF_MAP_TYPE_CPUMAP &&
9149                     map->map_type != BPF_MAP_TYPE_XSKMAP)
9150                         goto error;
9151                 break;
9152         case BPF_FUNC_sk_redirect_map:
9153         case BPF_FUNC_msg_redirect_map:
9154         case BPF_FUNC_sock_map_update:
9155                 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
9156                         goto error;
9157                 break;
9158         case BPF_FUNC_sk_redirect_hash:
9159         case BPF_FUNC_msg_redirect_hash:
9160         case BPF_FUNC_sock_hash_update:
9161                 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
9162                         goto error;
9163                 break;
9164         case BPF_FUNC_get_local_storage:
9165                 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
9166                     map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
9167                         goto error;
9168                 break;
9169         case BPF_FUNC_sk_select_reuseport:
9170                 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
9171                     map->map_type != BPF_MAP_TYPE_SOCKMAP &&
9172                     map->map_type != BPF_MAP_TYPE_SOCKHASH)
9173                         goto error;
9174                 break;
9175         case BPF_FUNC_map_pop_elem:
9176                 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
9177                     map->map_type != BPF_MAP_TYPE_STACK)
9178                         goto error;
9179                 break;
9180         case BPF_FUNC_map_peek_elem:
9181         case BPF_FUNC_map_push_elem:
9182                 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
9183                     map->map_type != BPF_MAP_TYPE_STACK &&
9184                     map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
9185                         goto error;
9186                 break;
9187         case BPF_FUNC_map_lookup_percpu_elem:
9188                 if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
9189                     map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
9190                     map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
9191                         goto error;
9192                 break;
9193         case BPF_FUNC_sk_storage_get:
9194         case BPF_FUNC_sk_storage_delete:
9195                 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
9196                         goto error;
9197                 break;
9198         case BPF_FUNC_inode_storage_get:
9199         case BPF_FUNC_inode_storage_delete:
9200                 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
9201                         goto error;
9202                 break;
9203         case BPF_FUNC_task_storage_get:
9204         case BPF_FUNC_task_storage_delete:
9205                 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
9206                         goto error;
9207                 break;
9208         case BPF_FUNC_cgrp_storage_get:
9209         case BPF_FUNC_cgrp_storage_delete:
9210                 if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
9211                         goto error;
9212                 break;
9213         default:
9214                 break;
9215         }
9216 
9217         return 0;
9218 error:
9219         verbose(env, "cannot pass map_type %d into func %s#%d\n",
9220                 map->map_type, func_id_name(func_id), func_id);
9221         return -EINVAL;
9222 }
9223 
9224 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
9225 {
9226         int count = 0;
9227 
9228         if (arg_type_is_raw_mem(fn->arg1_type))
9229                 count++;
9230         if (arg_type_is_raw_mem(fn->arg2_type))
9231                 count++;
9232         if (arg_type_is_raw_mem(fn->arg3_type))
9233                 count++;
9234         if (arg_type_is_raw_mem(fn->arg4_type))
9235                 count++;
9236         if (arg_type_is_raw_mem(fn->arg5_type))
9237                 count++;
9238 
9239         /* We only support one arg being in raw mode at the moment,
9240          * which is sufficient for the helper functions we have
9241          * right now.
9242          */
9243         return count <= 1;
9244 }
9245 
9246 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
9247 {
9248         bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
9249         bool has_size = fn->arg_size[arg] != 0;
9250         bool is_next_size = false;
9251 
9252         if (arg + 1 < ARRAY_SIZE(fn->arg_type))
9253                 is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
9254 
9255         if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
9256                 return is_next_size;
9257 
9258         return has_size == is_next_size || is_next_size == is_fixed;
9259 }
9260 
9261 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
9262 {
9263         /* bpf_xxx(..., buf, len) call will access 'len'
9264          * bytes from memory 'buf'. Both arg types need
9265          * to be paired, so make sure there's no buggy
9266          * helper function specification.
9267          */
9268         if (arg_type_is_mem_size(fn->arg1_type) ||
9269             check_args_pair_invalid(fn, 0) ||
9270             check_args_pair_invalid(fn, 1) ||
9271             check_args_pair_invalid(fn, 2) ||
9272             check_args_pair_invalid(fn, 3) ||
9273             check_args_pair_invalid(fn, 4))
9274                 return false;
9275 
9276         return true;
9277 }
9278 
9279 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
9280 {
9281         int i;
9282 
9283         for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
9284                 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
9285                         return !!fn->arg_btf_id[i];
9286                 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
9287                         return fn->arg_btf_id[i] == BPF_PTR_POISON;
9288                 if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
9289                     /* arg_btf_id and arg_size are in a union. */
9290                     (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
9291                      !(fn->arg_type[i] & MEM_FIXED_SIZE)))
9292                         return false;
9293         }
9294 
9295         return true;
9296 }
9297 
9298 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
9299 {
9300         return check_raw_mode_ok(fn) &&
9301                check_arg_pair_ok(fn) &&
9302                check_btf_id_ok(fn) ? 0 : -EINVAL;
9303 }
9304 
9305 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
9306  * are now invalid, so turn them into unknown SCALAR_VALUE.
9307  *
9308  * This also applies to dynptr slices belonging to skb and xdp dynptrs,
9309  * since these slices point to packet data.
9310  */
9311 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
9312 {
9313         struct bpf_func_state *state;
9314         struct bpf_reg_state *reg;
9315 
9316         bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9317                 if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
9318                         mark_reg_invalid(env, reg);
9319         }));
9320 }
9321 
9322 enum {
9323         AT_PKT_END = -1,
9324         BEYOND_PKT_END = -2,
9325 };
9326 
9327 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
9328 {
9329         struct bpf_func_state *state = vstate->frame[vstate->curframe];
9330         struct bpf_reg_state *reg = &state->regs[regn];
9331 
9332         if (reg->type != PTR_TO_PACKET)
9333                 /* PTR_TO_PACKET_META is not supported yet */
9334                 return;
9335 
9336         /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
9337          * How far beyond pkt_end it goes is unknown.
9338          * if (!range_open) it's the case of pkt >= pkt_end
9339          * if (range_open) it's the case of pkt > pkt_end
9340          * hence this pointer is at least 1 byte bigger than pkt_end
9341          */
9342         if (range_open)
9343                 reg->range = BEYOND_PKT_END;
9344         else
9345                 reg->range = AT_PKT_END;
9346 }
9347 
9348 /* The pointer with the specified id has released its reference to kernel
9349  * resources. Identify all copies of the same pointer and clear the reference.
9350  */
9351 static int release_reference(struct bpf_verifier_env *env,
9352                              int ref_obj_id)
9353 {
9354         struct bpf_func_state *state;
9355         struct bpf_reg_state *reg;
9356         int err;
9357 
9358         err = release_reference_state(cur_func(env), ref_obj_id);
9359         if (err)
9360                 return err;
9361 
9362         bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9363                 if (reg->ref_obj_id == ref_obj_id)
9364                         mark_reg_invalid(env, reg);
9365         }));
9366 
9367         return 0;
9368 }
9369 
9370 static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
9371 {
9372         struct bpf_func_state *unused;
9373         struct bpf_reg_state *reg;
9374 
9375         bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
9376                 if (type_is_non_owning_ref(reg->type))
9377                         mark_reg_invalid(env, reg);
9378         }));
9379 }
9380 
9381 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
9382                                     struct bpf_reg_state *regs)
9383 {
9384         int i;
9385 
9386         /* after the call registers r0 - r5 were scratched */
9387         for (i = 0; i < CALLER_SAVED_REGS; i++) {
9388                 mark_reg_not_init(env, regs, caller_saved[i]);
9389                 __check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
9390         }
9391 }
9392 
9393 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
9394                                    struct bpf_func_state *caller,
9395                                    struct bpf_func_state *callee,
9396                                    int insn_idx);
9397 
9398 static int set_callee_state(struct bpf_verifier_env *env,
9399                             struct bpf_func_state *caller,
9400                             struct bpf_func_state *callee, int insn_idx);
9401 
9402 static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
9403                             set_callee_state_fn set_callee_state_cb,
9404                             struct bpf_verifier_state *state)
9405 {
9406         struct bpf_func_state *caller, *callee;
9407         int err;
9408 
9409         if (state->curframe + 1 >= MAX_CALL_FRAMES) {
9410                 verbose(env, "the call stack of %d frames is too deep\n",
9411                         state->curframe + 2);
9412                 return -E2BIG;
9413         }
9414 
9415         if (state->frame[state->curframe + 1]) {
9416                 verbose(env, "verifier bug. Frame %d already allocated\n",
9417                         state->curframe + 1);
9418                 return -EFAULT;
9419         }
9420 
9421         caller = state->frame[state->curframe];
9422         callee = kzalloc(sizeof(*callee), GFP_KERNEL);
9423         if (!callee)
9424                 return -ENOMEM;
9425         state->frame[state->curframe + 1] = callee;
9426 
9427         /* callee cannot access r0, r6 - r9 for reading and has to write
9428          * into its own stack before reading from it.
9429          * callee can read/write into caller's stack
9430          */
9431         init_func_state(env, callee,
9432                         /* remember the callsite, it will be used by bpf_exit */
9433                         callsite,
9434                         state->curframe + 1 /* frameno within this callchain */,
9435                         subprog /* subprog number within this prog */);
9436         /* Transfer references to the callee */
9437         err = copy_reference_state(callee, caller);
9438         err = err ?: set_callee_state_cb(env, caller, callee, callsite);
9439         if (err)
9440                 goto err_out;
9441 
9442         /* only increment it after check_reg_arg() finished */
9443         state->curframe++;
9444 
9445         return 0;
9446 
9447 err_out:
9448         free_func_state(callee);
9449         state->frame[state->curframe + 1] = NULL;
9450         return err;
9451 }
9452 
9453 static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
9454                                     const struct btf *btf,
9455                                     struct bpf_reg_state *regs)
9456 {
9457         struct bpf_subprog_info *sub = subprog_info(env, subprog);
9458         struct bpf_verifier_log *log = &env->log;
9459         u32 i;
9460         int ret;
9461 
9462         ret = btf_prepare_func_args(env, subprog);
9463         if (ret)
9464                 return ret;
9465 
9466         /* check that BTF function arguments match actual types that the
9467          * verifier sees.
9468          */
9469         for (i = 0; i < sub->arg_cnt; i++) {
9470                 u32 regno = i + 1;
9471                 struct bpf_reg_state *reg = &regs[regno];
9472                 struct bpf_subprog_arg_info *arg = &sub->args[i];
9473 
9474                 if (arg->arg_type == ARG_ANYTHING) {
9475                         if (reg->type != SCALAR_VALUE) {
9476                                 bpf_log(log, "R%d is not a scalar\n", regno);
9477                                 return -EINVAL;
9478                         }
9479                 } else if (arg->arg_type == ARG_PTR_TO_CTX) {
9480                         ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
9481                         if (ret < 0)
9482                                 return ret;
9483                         /* If function expects ctx type in BTF check that caller
9484                          * is passing PTR_TO_CTX.
9485                          */
9486                         if (reg->type != PTR_TO_CTX) {
9487                                 bpf_log(log, "arg#%d expects pointer to ctx\n", i);
9488                                 return -EINVAL;
9489                         }
9490                 } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
9491                         ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
9492                         if (ret < 0)
9493                                 return ret;
9494                         if (check_mem_reg(env, reg, regno, arg->mem_size))
9495                                 return -EINVAL;
9496                         if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) {
9497                                 bpf_log(log, "arg#%d is expected to be non-NULL\n", i);
9498                                 return -EINVAL;
9499                         }
9500                 } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
9501                         /*
9502                          * Can pass any value and the kernel won't crash, but
9503                          * only PTR_TO_ARENA or SCALAR make sense. Everything
9504                          * else is a bug in the bpf program. Point it out to
9505                          * the user at the verification time instead of
9506                          * run-time debug nightmare.
9507                          */
9508                         if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) {
9509                                 bpf_log(log, "R%d is not a pointer to arena or scalar.\n", regno);
9510                                 return -EINVAL;
9511                         }
9512                 } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
9513                         ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_DYNPTR);
9514                         if (ret)
9515                                 return ret;
9516 
9517                         ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0);
9518                         if (ret)
9519                                 return ret;
9520                 } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
9521                         struct bpf_call_arg_meta meta;
9522                         int err;
9523 
9524                         if (register_is_null(reg) && type_may_be_null(arg->arg_type))
9525                                 continue;
9526 
9527                         memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */
9528                         err = check_reg_type(env, regno, arg->arg_type, &arg->btf_id, &meta);
9529                         err = err ?: check_func_arg_reg_off(env, reg, regno, arg->arg_type);
9530                         if (err)
9531                                 return err;
9532                 } else {
9533                         bpf_log(log, "verifier bug: unrecognized arg#%d type %d\n",
9534                                 i, arg->arg_type);
9535                         return -EFAULT;
9536                 }
9537         }
9538 
9539         return 0;
9540 }
9541 
9542 /* Compare BTF of a function call with given bpf_reg_state.
9543  * Returns:
9544  * EFAULT - there is a verifier bug. Abort verification.
9545  * EINVAL - there is a type mismatch or BTF is not available.
9546  * 0 - BTF matches with what bpf_reg_state expects.
9547  * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
9548  */
9549 static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
9550                                   struct bpf_reg_state *regs)
9551 {
9552         struct bpf_prog *prog = env->prog;
9553         struct btf *btf = prog->aux->btf;
9554         u32 btf_id;
9555         int err;
9556 
9557         if (!prog->aux->func_info)
9558                 return -EINVAL;
9559 
9560         btf_id = prog->aux->func_info[subprog].type_id;
9561         if (!btf_id)
9562                 return -EFAULT;
9563 
9564         if (prog->aux->func_info_aux[subprog].unreliable)
9565                 return -EINVAL;
9566 
9567         err = btf_check_func_arg_match(env, subprog, btf, regs);
9568         /* Compiler optimizations can remove arguments from static functions
9569          * or mismatched type can be passed into a global function.
9570          * In such cases mark the function as unreliable from BTF point of view.
9571          */
9572         if (err)
9573                 prog->aux->func_info_aux[subprog].unreliable = true;
9574         return err;
9575 }
9576 
9577 static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9578                               int insn_idx, int subprog,
9579                               set_callee_state_fn set_callee_state_cb)
9580 {
9581         struct bpf_verifier_state *state = env->cur_state, *callback_state;
9582         struct bpf_func_state *caller, *callee;
9583         int err;
9584 
9585         caller = state->frame[state->curframe];
9586         err = btf_check_subprog_call(env, subprog, caller->regs);
9587         if (err == -EFAULT)
9588                 return err;
9589 
9590         /* set_callee_state is used for direct subprog calls, but we are
9591          * interested in validating only BPF helpers that can call subprogs as
9592          * callbacks
9593          */
9594         env->subprog_info[subprog].is_cb = true;
9595         if (bpf_pseudo_kfunc_call(insn) &&
9596             !is_callback_calling_kfunc(insn->imm)) {
9597                 verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
9598                         func_id_name(insn->imm), insn->imm);
9599                 return -EFAULT;
9600         } else if (!bpf_pseudo_kfunc_call(insn) &&
9601                    !is_callback_calling_function(insn->imm)) { /* helper */
9602                 verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
9603                         func_id_name(insn->imm), insn->imm);
9604                 return -EFAULT;
9605         }
9606 
9607         if (is_async_callback_calling_insn(insn)) {
9608                 struct bpf_verifier_state *async_cb;
9609 
9610                 /* there is no real recursion here. timer and workqueue callbacks are async */
9611                 env->subprog_info[subprog].is_async_cb = true;
9612                 async_cb = push_async_cb(env, env->subprog_info[subprog].start,
9613                                          insn_idx, subprog,
9614                                          is_bpf_wq_set_callback_impl_kfunc(insn->imm));
9615                 if (!async_cb)
9616                         return -EFAULT;
9617                 callee = async_cb->frame[0];
9618                 callee->async_entry_cnt = caller->async_entry_cnt + 1;
9619 
9620                 /* Convert bpf_timer_set_callback() args into timer callback args */
9621                 err = set_callee_state_cb(env, caller, callee, insn_idx);
9622                 if (err)
9623                         return err;
9624 
9625                 return 0;
9626         }
9627 
9628         /* for callback functions enqueue entry to callback and
9629          * proceed with next instruction within current frame.
9630          */
9631         callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
9632         if (!callback_state)
9633                 return -ENOMEM;
9634 
9635         err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
9636                                callback_state);
9637         if (err)
9638                 return err;
9639 
9640         callback_state->callback_unroll_depth++;
9641         callback_state->frame[callback_state->curframe - 1]->callback_depth++;
9642         caller->callback_depth = 0;
9643         return 0;
9644 }
9645 
9646 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9647                            int *insn_idx)
9648 {
9649         struct bpf_verifier_state *state = env->cur_state;
9650         struct bpf_func_state *caller;
9651         int err, subprog, target_insn;
9652 
9653         target_insn = *insn_idx + insn->imm + 1;
9654         subprog = find_subprog(env, target_insn);
9655         if (subprog < 0) {
9656                 verbose(env, "verifier bug. No program starts at insn %d\n", target_insn);
9657                 return -EFAULT;
9658         }
9659 
9660         caller = state->frame[state->curframe];
9661         err = btf_check_subprog_call(env, subprog, caller->regs);
9662         if (err == -EFAULT)
9663                 return err;
9664         if (subprog_is_global(env, subprog)) {
9665                 const char *sub_name = subprog_name(env, subprog);
9666 
9667                 /* Only global subprogs cannot be called with a lock held. */
9668                 if (env->cur_state->active_lock.ptr) {
9669                         verbose(env, "global function calls are not allowed while holding a lock,\n"
9670                                      "use static function instead\n");
9671                         return -EINVAL;
9672                 }
9673 
9674                 /* Only global subprogs cannot be called with preemption disabled. */
9675                 if (env->cur_state->active_preempt_lock) {
9676                         verbose(env, "global function calls are not allowed with preemption disabled,\n"
9677                                      "use static function instead\n");
9678                         return -EINVAL;
9679                 }
9680 
9681                 if (err) {
9682                         verbose(env, "Caller passes invalid args into func#%d ('%s')\n",
9683                                 subprog, sub_name);
9684                         return err;
9685                 }
9686 
9687                 verbose(env, "Func#%d ('%s') is global and assumed valid.\n",
9688                         subprog, sub_name);
9689                 /* mark global subprog for verifying after main prog */
9690                 subprog_aux(env, subprog)->called = true;
9691                 clear_caller_saved_regs(env, caller->regs);
9692 
9693                 /* All global functions return a 64-bit SCALAR_VALUE */
9694                 mark_reg_unknown(env, caller->regs, BPF_REG_0);
9695                 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9696 
9697                 /* continue with next insn after call */
9698                 return 0;
9699         }
9700 
9701         /* for regular function entry setup new frame and continue
9702          * from that frame.
9703          */
9704         err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
9705         if (err)
9706                 return err;
9707 
9708         clear_caller_saved_regs(env, caller->regs);
9709 
9710         /* and go analyze first insn of the callee */
9711         *insn_idx = env->subprog_info[subprog].start - 1;
9712 
9713         if (env->log.level & BPF_LOG_LEVEL) {
9714                 verbose(env, "caller:\n");
9715                 print_verifier_state(env, caller, true);
9716                 verbose(env, "callee:\n");
9717                 print_verifier_state(env, state->frame[state->curframe], true);
9718         }
9719 
9720         return 0;
9721 }
9722 
9723 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
9724                                    struct bpf_func_state *caller,
9725                                    struct bpf_func_state *callee)
9726 {
9727         /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
9728          *      void *callback_ctx, u64 flags);
9729          * callback_fn(struct bpf_map *map, void *key, void *value,
9730          *      void *callback_ctx);
9731          */
9732         callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9733 
9734         callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9735         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9736         callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9737 
9738         callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9739         __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9740         callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9741 
9742         /* pointer to stack or null */
9743         callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
9744 
9745         /* unused */
9746         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9747         return 0;
9748 }
9749 
9750 static int set_callee_state(struct bpf_verifier_env *env,
9751                             struct bpf_func_state *caller,
9752                             struct bpf_func_state *callee, int insn_idx)
9753 {
9754         int i;
9755 
9756         /* copy r1 - r5 args that callee can access.  The copy includes parent
9757          * pointers, which connects us up to the liveness chain
9758          */
9759         for (i = BPF_REG_1; i <= BPF_REG_5; i++)
9760                 callee->regs[i] = caller->regs[i];
9761         return 0;
9762 }
9763 
9764 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
9765                                        struct bpf_func_state *caller,
9766                                        struct bpf_func_state *callee,
9767                                        int insn_idx)
9768 {
9769         struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
9770         struct bpf_map *map;
9771         int err;
9772 
9773         /* valid map_ptr and poison value does not matter */
9774         map = insn_aux->map_ptr_state.map_ptr;
9775         if (!map->ops->map_set_for_each_callback_args ||
9776             !map->ops->map_for_each_callback) {
9777                 verbose(env, "callback function not allowed for map\n");
9778                 return -ENOTSUPP;
9779         }
9780 
9781         err = map->ops->map_set_for_each_callback_args(env, caller, callee);
9782         if (err)
9783                 return err;
9784 
9785         callee->in_callback_fn = true;
9786         callee->callback_ret_range = retval_range(0, 1);
9787         return 0;
9788 }
9789 
9790 static int set_loop_callback_state(struct bpf_verifier_env *env,
9791                                    struct bpf_func_state *caller,
9792                                    struct bpf_func_state *callee,
9793                                    int insn_idx)
9794 {
9795         /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
9796          *          u64 flags);
9797          * callback_fn(u32 index, void *callback_ctx);
9798          */
9799         callee->regs[BPF_REG_1].type = SCALAR_VALUE;
9800         callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9801 
9802         /* unused */
9803         __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9804         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9805         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9806 
9807         callee->in_callback_fn = true;
9808         callee->callback_ret_range = retval_range(0, 1);
9809         return 0;
9810 }
9811 
9812 static int set_timer_callback_state(struct bpf_verifier_env *env,
9813                                     struct bpf_func_state *caller,
9814                                     struct bpf_func_state *callee,
9815                                     int insn_idx)
9816 {
9817         struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
9818 
9819         /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
9820          * callback_fn(struct bpf_map *map, void *key, void *value);
9821          */
9822         callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9823         __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9824         callee->regs[BPF_REG_1].map_ptr = map_ptr;
9825 
9826         callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9827         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9828         callee->regs[BPF_REG_2].map_ptr = map_ptr;
9829 
9830         callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9831         __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9832         callee->regs[BPF_REG_3].map_ptr = map_ptr;
9833 
9834         /* unused */
9835         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9836         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9837         callee->in_async_callback_fn = true;
9838         callee->callback_ret_range = retval_range(0, 1);
9839         return 0;
9840 }
9841 
9842 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
9843                                        struct bpf_func_state *caller,
9844                                        struct bpf_func_state *callee,
9845                                        int insn_idx)
9846 {
9847         /* bpf_find_vma(struct task_struct *task, u64 addr,
9848          *               void *callback_fn, void *callback_ctx, u64 flags)
9849          * (callback_fn)(struct task_struct *task,
9850          *               struct vm_area_struct *vma, void *callback_ctx);
9851          */
9852         callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9853 
9854         callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
9855         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9856         callee->regs[BPF_REG_2].btf =  btf_vmlinux;
9857         callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
9858 
9859         /* pointer to stack or null */
9860         callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
9861 
9862         /* unused */
9863         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9864         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9865         callee->in_callback_fn = true;
9866         callee->callback_ret_range = retval_range(0, 1);
9867         return 0;
9868 }
9869 
9870 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
9871                                            struct bpf_func_state *caller,
9872                                            struct bpf_func_state *callee,
9873                                            int insn_idx)
9874 {
9875         /* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
9876          *                        callback_ctx, u64 flags);
9877          * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
9878          */
9879         __mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
9880         mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
9881         callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9882 
9883         /* unused */
9884         __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9885         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9886         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9887 
9888         callee->in_callback_fn = true;
9889         callee->callback_ret_range = retval_range(0, 1);
9890         return 0;
9891 }
9892 
9893 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
9894                                          struct bpf_func_state *caller,
9895                                          struct bpf_func_state *callee,
9896                                          int insn_idx)
9897 {
9898         /* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
9899          *                     bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
9900          *
9901          * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
9902          * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
9903          * by this point, so look at 'root'
9904          */
9905         struct btf_field *field;
9906 
9907         field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
9908                                       BPF_RB_ROOT);
9909         if (!field || !field->graph_root.value_btf_id)
9910                 return -EFAULT;
9911 
9912         mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
9913         ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
9914         mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
9915         ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
9916 
9917         __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9918         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9919         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9920         callee->in_callback_fn = true;
9921         callee->callback_ret_range = retval_range(0, 1);
9922         return 0;
9923 }
9924 
9925 static bool is_rbtree_lock_required_kfunc(u32 btf_id);
9926 
9927 /* Are we currently verifying the callback for a rbtree helper that must
9928  * be called with lock held? If so, no need to complain about unreleased
9929  * lock
9930  */
9931 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
9932 {
9933         struct bpf_verifier_state *state = env->cur_state;
9934         struct bpf_insn *insn = env->prog->insnsi;
9935         struct bpf_func_state *callee;
9936         int kfunc_btf_id;
9937 
9938         if (!state->curframe)
9939                 return false;
9940 
9941         callee = state->frame[state->curframe];
9942 
9943         if (!callee->in_callback_fn)
9944                 return false;
9945 
9946         kfunc_btf_id = insn[callee->callsite].imm;
9947         return is_rbtree_lock_required_kfunc(kfunc_btf_id);
9948 }
9949 
9950 static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg,
9951                                 bool return_32bit)
9952 {
9953         if (return_32bit)
9954                 return range.minval <= reg->s32_min_value && reg->s32_max_value <= range.maxval;
9955         else
9956                 return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
9957 }
9958 
9959 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
9960 {
9961         struct bpf_verifier_state *state = env->cur_state, *prev_st;
9962         struct bpf_func_state *caller, *callee;
9963         struct bpf_reg_state *r0;
9964         bool in_callback_fn;
9965         int err;
9966 
9967         callee = state->frame[state->curframe];
9968         r0 = &callee->regs[BPF_REG_0];
9969         if (r0->type == PTR_TO_STACK) {
9970                 /* technically it's ok to return caller's stack pointer
9971                  * (or caller's caller's pointer) back to the caller,
9972                  * since these pointers are valid. Only current stack
9973                  * pointer will be invalid as soon as function exits,
9974                  * but let's be conservative
9975                  */
9976                 verbose(env, "cannot return stack pointer to the caller\n");
9977                 return -EINVAL;
9978         }
9979 
9980         caller = state->frame[state->curframe - 1];
9981         if (callee->in_callback_fn) {
9982                 if (r0->type != SCALAR_VALUE) {
9983                         verbose(env, "R0 not a scalar value\n");
9984                         return -EACCES;
9985                 }
9986 
9987                 /* we are going to rely on register's precise value */
9988                 err = mark_reg_read(env, r0, r0->parent, REG_LIVE_READ64);
9989                 err = err ?: mark_chain_precision(env, BPF_REG_0);
9990                 if (err)
9991                         return err;
9992 
9993                 /* enforce R0 return value range, and bpf_callback_t returns 64bit */
9994                 if (!retval_range_within(callee->callback_ret_range, r0, false)) {
9995                         verbose_invalid_scalar(env, r0, callee->callback_ret_range,
9996                                                "At callback return", "R0");
9997                         return -EINVAL;
9998                 }
9999                 if (!calls_callback(env, callee->callsite)) {
10000                         verbose(env, "BUG: in callback at %d, callsite %d !calls_callback\n",
10001                                 *insn_idx, callee->callsite);
10002                         return -EFAULT;
10003                 }
10004         } else {
10005                 /* return to the caller whatever r0 had in the callee */
10006                 caller->regs[BPF_REG_0] = *r0;
10007         }
10008 
10009         /* callback_fn frame should have released its own additions to parent's
10010          * reference state at this point, or check_reference_leak would
10011          * complain, hence it must be the same as the caller. There is no need
10012          * to copy it back.
10013          */
10014         if (!callee->in_callback_fn) {
10015                 /* Transfer references to the caller */
10016                 err = copy_reference_state(caller, callee);
10017                 if (err)
10018                         return err;
10019         }
10020 
10021         /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
10022          * there function call logic would reschedule callback visit. If iteration
10023          * converges is_state_visited() would prune that visit eventually.
10024          */
10025         in_callback_fn = callee->in_callback_fn;
10026         if (in_callback_fn)
10027                 *insn_idx = callee->callsite;
10028         else
10029                 *insn_idx = callee->callsite + 1;
10030 
10031         if (env->log.level & BPF_LOG_LEVEL) {
10032                 verbose(env, "returning from callee:\n");
10033                 print_verifier_state(env, callee, true);
10034                 verbose(env, "to caller at %d:\n", *insn_idx);
10035                 print_verifier_state(env, caller, true);
10036         }
10037         /* clear everything in the callee. In case of exceptional exits using
10038          * bpf_throw, this will be done by copy_verifier_state for extra frames. */
10039         free_func_state(callee);
10040         state->frame[state->curframe--] = NULL;
10041 
10042         /* for callbacks widen imprecise scalars to make programs like below verify:
10043          *
10044          *   struct ctx { int i; }
10045          *   void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
10046          *   ...
10047          *   struct ctx = { .i = 0; }
10048          *   bpf_loop(100, cb, &ctx, 0);
10049          *
10050          * This is similar to what is done in process_iter_next_call() for open
10051          * coded iterators.
10052          */
10053         prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
10054         if (prev_st) {
10055                 err = widen_imprecise_scalars(env, prev_st, state);
10056                 if (err)
10057                         return err;
10058         }
10059         return 0;
10060 }
10061 
10062 static int do_refine_retval_range(struct bpf_verifier_env *env,
10063                                   struct bpf_reg_state *regs, int ret_type,
10064                                   int func_id,
10065                                   struct bpf_call_arg_meta *meta)
10066 {
10067         struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
10068 
10069         if (ret_type != RET_INTEGER)
10070                 return 0;
10071 
10072         switch (func_id) {
10073         case BPF_FUNC_get_stack:
10074         case BPF_FUNC_get_task_stack:
10075         case BPF_FUNC_probe_read_str:
10076         case BPF_FUNC_probe_read_kernel_str:
10077         case BPF_FUNC_probe_read_user_str:
10078                 ret_reg->smax_value = meta->msize_max_value;
10079                 ret_reg->s32_max_value = meta->msize_max_value;
10080                 ret_reg->smin_value = -MAX_ERRNO;
10081                 ret_reg->s32_min_value = -MAX_ERRNO;
10082                 reg_bounds_sync(ret_reg);
10083                 break;
10084         case BPF_FUNC_get_smp_processor_id:
10085                 ret_reg->umax_value = nr_cpu_ids - 1;
10086                 ret_reg->u32_max_value = nr_cpu_ids - 1;
10087                 ret_reg->smax_value = nr_cpu_ids - 1;
10088                 ret_reg->s32_max_value = nr_cpu_ids - 1;
10089                 ret_reg->umin_value = 0;
10090                 ret_reg->u32_min_value = 0;
10091                 ret_reg->smin_value = 0;
10092                 ret_reg->s32_min_value = 0;
10093                 reg_bounds_sync(ret_reg);
10094                 break;
10095         }
10096 
10097         return reg_bounds_sanity_check(env, ret_reg, "retval");
10098 }
10099 
10100 static int
10101 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
10102                 int func_id, int insn_idx)
10103 {
10104         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
10105         struct bpf_map *map = meta->map_ptr;
10106 
10107         if (func_id != BPF_FUNC_tail_call &&
10108             func_id != BPF_FUNC_map_lookup_elem &&
10109             func_id != BPF_FUNC_map_update_elem &&
10110             func_id != BPF_FUNC_map_delete_elem &&
10111             func_id != BPF_FUNC_map_push_elem &&
10112             func_id != BPF_FUNC_map_pop_elem &&
10113             func_id != BPF_FUNC_map_peek_elem &&
10114             func_id != BPF_FUNC_for_each_map_elem &&
10115             func_id != BPF_FUNC_redirect_map &&
10116             func_id != BPF_FUNC_map_lookup_percpu_elem)
10117                 return 0;
10118 
10119         if (map == NULL) {
10120                 verbose(env, "kernel subsystem misconfigured verifier\n");
10121                 return -EINVAL;
10122         }
10123 
10124         /* In case of read-only, some additional restrictions
10125          * need to be applied in order to prevent altering the
10126          * state of the map from program side.
10127          */
10128         if ((map->map_flags & BPF_F_RDONLY_PROG) &&
10129             (func_id == BPF_FUNC_map_delete_elem ||
10130              func_id == BPF_FUNC_map_update_elem ||
10131              func_id == BPF_FUNC_map_push_elem ||
10132              func_id == BPF_FUNC_map_pop_elem)) {
10133                 verbose(env, "write into map forbidden\n");
10134                 return -EACCES;
10135         }
10136 
10137         if (!aux->map_ptr_state.map_ptr)
10138                 bpf_map_ptr_store(aux, meta->map_ptr,
10139                                   !meta->map_ptr->bypass_spec_v1, false);
10140         else if (aux->map_ptr_state.map_ptr != meta->map_ptr)
10141                 bpf_map_ptr_store(aux, meta->map_ptr,
10142                                   !meta->map_ptr->bypass_spec_v1, true);
10143         return 0;
10144 }
10145 
10146 static int
10147 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
10148                 int func_id, int insn_idx)
10149 {
10150         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
10151         struct bpf_reg_state *regs = cur_regs(env), *reg;
10152         struct bpf_map *map = meta->map_ptr;
10153         u64 val, max;
10154         int err;
10155 
10156         if (func_id != BPF_FUNC_tail_call)
10157                 return 0;
10158         if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
10159                 verbose(env, "kernel subsystem misconfigured verifier\n");
10160                 return -EINVAL;
10161         }
10162 
10163         reg = &regs[BPF_REG_3];
10164         val = reg->var_off.value;
10165         max = map->max_entries;
10166 
10167         if (!(is_reg_const(reg, false) && val < max)) {
10168                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
10169                 return 0;
10170         }
10171 
10172         err = mark_chain_precision(env, BPF_REG_3);
10173         if (err)
10174                 return err;
10175         if (bpf_map_key_unseen(aux))
10176                 bpf_map_key_store(aux, val);
10177         else if (!bpf_map_key_poisoned(aux) &&
10178                   bpf_map_key_immediate(aux) != val)
10179                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
10180         return 0;
10181 }
10182 
10183 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
10184 {
10185         struct bpf_func_state *state = cur_func(env);
10186         bool refs_lingering = false;
10187         int i;
10188 
10189         if (!exception_exit && state->frameno && !state->in_callback_fn)
10190                 return 0;
10191 
10192         for (i = 0; i < state->acquired_refs; i++) {
10193                 if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
10194                         continue;
10195                 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
10196                         state->refs[i].id, state->refs[i].insn_idx);
10197                 refs_lingering = true;
10198         }
10199         return refs_lingering ? -EINVAL : 0;
10200 }
10201 
10202 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
10203                                    struct bpf_reg_state *regs)
10204 {
10205         struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
10206         struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
10207         struct bpf_map *fmt_map = fmt_reg->map_ptr;
10208         struct bpf_bprintf_data data = {};
10209         int err, fmt_map_off, num_args;
10210         u64 fmt_addr;
10211         char *fmt;
10212 
10213         /* data must be an array of u64 */
10214         if (data_len_reg->var_off.value % 8)
10215                 return -EINVAL;
10216         num_args = data_len_reg->var_off.value / 8;
10217 
10218         /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
10219          * and map_direct_value_addr is set.
10220          */
10221         fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
10222         err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
10223                                                   fmt_map_off);
10224         if (err) {
10225                 verbose(env, "verifier bug\n");
10226                 return -EFAULT;
10227         }
10228         fmt = (char *)(long)fmt_addr + fmt_map_off;
10229 
10230         /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
10231          * can focus on validating the format specifiers.
10232          */
10233         err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
10234         if (err < 0)
10235                 verbose(env, "Invalid format string\n");
10236 
10237         return err;
10238 }
10239 
10240 static int check_get_func_ip(struct bpf_verifier_env *env)
10241 {
10242         enum bpf_prog_type type = resolve_prog_type(env->prog);
10243         int func_id = BPF_FUNC_get_func_ip;
10244 
10245         if (type == BPF_PROG_TYPE_TRACING) {
10246                 if (!bpf_prog_has_trampoline(env->prog)) {
10247                         verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
10248                                 func_id_name(func_id), func_id);
10249                         return -ENOTSUPP;
10250                 }
10251                 return 0;
10252         } else if (type == BPF_PROG_TYPE_KPROBE) {
10253                 return 0;
10254         }
10255 
10256         verbose(env, "func %s#%d not supported for program type %d\n",
10257                 func_id_name(func_id), func_id, type);
10258         return -ENOTSUPP;
10259 }
10260 
10261 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
10262 {
10263         return &env->insn_aux_data[env->insn_idx];
10264 }
10265 
10266 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
10267 {
10268         struct bpf_reg_state *regs = cur_regs(env);
10269         struct bpf_reg_state *reg = &regs[BPF_REG_4];
10270         bool reg_is_null = register_is_null(reg);
10271 
10272         if (reg_is_null)
10273                 mark_chain_precision(env, BPF_REG_4);
10274 
10275         return reg_is_null;
10276 }
10277 
10278 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
10279 {
10280         struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
10281 
10282         if (!state->initialized) {
10283                 state->initialized = 1;
10284                 state->fit_for_inline = loop_flag_is_zero(env);
10285                 state->callback_subprogno = subprogno;
10286                 return;
10287         }
10288 
10289         if (!state->fit_for_inline)
10290                 return;
10291 
10292         state->fit_for_inline = (loop_flag_is_zero(env) &&
10293                                  state->callback_subprogno == subprogno);
10294 }
10295 
10296 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10297                              int *insn_idx_p)
10298 {
10299         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
10300         bool returns_cpu_specific_alloc_ptr = false;
10301         const struct bpf_func_proto *fn = NULL;
10302         enum bpf_return_type ret_type;
10303         enum bpf_type_flag ret_flag;
10304         struct bpf_reg_state *regs;
10305         struct bpf_call_arg_meta meta;
10306         int insn_idx = *insn_idx_p;
10307         bool changes_data;
10308         int i, err, func_id;
10309 
10310         /* find function prototype */
10311         func_id = insn->imm;
10312         if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
10313                 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
10314                         func_id);
10315                 return -EINVAL;
10316         }
10317 
10318         if (env->ops->get_func_proto)
10319                 fn = env->ops->get_func_proto(func_id, env->prog);
10320         if (!fn) {
10321                 verbose(env, "program of this type cannot use helper %s#%d\n",
10322                         func_id_name(func_id), func_id);
10323                 return -EINVAL;
10324         }
10325 
10326         /* eBPF programs must be GPL compatible to use GPL-ed functions */
10327         if (!env->prog->gpl_compatible && fn->gpl_only) {
10328                 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
10329                 return -EINVAL;
10330         }
10331 
10332         if (fn->allowed && !fn->allowed(env->prog)) {
10333                 verbose(env, "helper call is not allowed in probe\n");
10334                 return -EINVAL;
10335         }
10336 
10337         if (!in_sleepable(env) && fn->might_sleep) {
10338                 verbose(env, "helper call might sleep in a non-sleepable prog\n");
10339                 return -EINVAL;
10340         }
10341 
10342         /* With LD_ABS/IND some JITs save/restore skb from r1. */
10343         changes_data = bpf_helper_changes_pkt_data(fn->func);
10344         if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
10345                 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
10346                         func_id_name(func_id), func_id);
10347                 return -EINVAL;
10348         }
10349 
10350         memset(&meta, 0, sizeof(meta));
10351         meta.pkt_access = fn->pkt_access;
10352 
10353         err = check_func_proto(fn, func_id);
10354         if (err) {
10355                 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
10356                         func_id_name(func_id), func_id);
10357                 return err;
10358         }
10359 
10360         if (env->cur_state->active_rcu_lock) {
10361                 if (fn->might_sleep) {
10362                         verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
10363                                 func_id_name(func_id), func_id);
10364                         return -EINVAL;
10365                 }
10366 
10367                 if (in_sleepable(env) && is_storage_get_function(func_id))
10368                         env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
10369         }
10370 
10371         if (env->cur_state->active_preempt_lock) {
10372                 if (fn->might_sleep) {
10373                         verbose(env, "sleepable helper %s#%d in non-preemptible region\n",
10374                                 func_id_name(func_id), func_id);
10375                         return -EINVAL;
10376                 }
10377 
10378                 if (in_sleepable(env) && is_storage_get_function(func_id))
10379                         env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
10380         }
10381 
10382         meta.func_id = func_id;
10383         /* check args */
10384         for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
10385                 err = check_func_arg(env, i, &meta, fn, insn_idx);
10386                 if (err)
10387                         return err;
10388         }
10389 
10390         err = record_func_map(env, &meta, func_id, insn_idx);
10391         if (err)
10392                 return err;
10393 
10394         err = record_func_key(env, &meta, func_id, insn_idx);
10395         if (err)
10396                 return err;
10397 
10398         /* Mark slots with STACK_MISC in case of raw mode, stack offset
10399          * is inferred from register state.
10400          */
10401         for (i = 0; i < meta.access_size; i++) {
10402                 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
10403                                        BPF_WRITE, -1, false, false);
10404                 if (err)
10405                         return err;
10406         }
10407 
10408         regs = cur_regs(env);
10409 
10410         if (meta.release_regno) {
10411                 err = -EINVAL;
10412                 /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
10413                  * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
10414                  * is safe to do directly.
10415                  */
10416                 if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
10417                         if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
10418                                 verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
10419                                 return -EFAULT;
10420                         }
10421                         err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
10422                 } else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) {
10423                         u32 ref_obj_id = meta.ref_obj_id;
10424                         bool in_rcu = in_rcu_cs(env);
10425                         struct bpf_func_state *state;
10426                         struct bpf_reg_state *reg;
10427 
10428                         err = release_reference_state(cur_func(env), ref_obj_id);
10429                         if (!err) {
10430                                 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
10431                                         if (reg->ref_obj_id == ref_obj_id) {
10432                                                 if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
10433                                                         reg->ref_obj_id = 0;
10434                                                         reg->type &= ~MEM_ALLOC;
10435                                                         reg->type |= MEM_RCU;
10436                                                 } else {
10437                                                         mark_reg_invalid(env, reg);
10438                                                 }
10439                                         }
10440                                 }));
10441                         }
10442                 } else if (meta.ref_obj_id) {
10443                         err = release_reference(env, meta.ref_obj_id);
10444                 } else if (register_is_null(&regs[meta.release_regno])) {
10445                         /* meta.ref_obj_id can only be 0 if register that is meant to be
10446                          * released is NULL, which must be > R0.
10447                          */
10448                         err = 0;
10449                 }
10450                 if (err) {
10451                         verbose(env, "func %s#%d reference has not been acquired before\n",
10452                                 func_id_name(func_id), func_id);
10453                         return err;
10454                 }
10455         }
10456 
10457         switch (func_id) {
10458         case BPF_FUNC_tail_call:
10459                 err = check_reference_leak(env, false);
10460                 if (err) {
10461                         verbose(env, "tail_call would lead to reference leak\n");
10462                         return err;
10463                 }
10464                 break;
10465         case BPF_FUNC_get_local_storage:
10466                 /* check that flags argument in get_local_storage(map, flags) is 0,
10467                  * this is required because get_local_storage() can't return an error.
10468                  */
10469                 if (!register_is_null(&regs[BPF_REG_2])) {
10470                         verbose(env, "get_local_storage() doesn't support non-zero flags\n");
10471                         return -EINVAL;
10472                 }
10473                 break;
10474         case BPF_FUNC_for_each_map_elem:
10475                 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10476                                          set_map_elem_callback_state);
10477                 break;
10478         case BPF_FUNC_timer_set_callback:
10479                 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10480                                          set_timer_callback_state);
10481                 break;
10482         case BPF_FUNC_find_vma:
10483                 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10484                                          set_find_vma_callback_state);
10485                 break;
10486         case BPF_FUNC_snprintf:
10487                 err = check_bpf_snprintf_call(env, regs);
10488                 break;
10489         case BPF_FUNC_loop:
10490                 update_loop_inline_state(env, meta.subprogno);
10491                 /* Verifier relies on R1 value to determine if bpf_loop() iteration
10492                  * is finished, thus mark it precise.
10493                  */
10494                 err = mark_chain_precision(env, BPF_REG_1);
10495                 if (err)
10496                         return err;
10497                 if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) {
10498                         err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10499                                                  set_loop_callback_state);
10500                 } else {
10501                         cur_func(env)->callback_depth = 0;
10502                         if (env->log.level & BPF_LOG_LEVEL2)
10503                                 verbose(env, "frame%d bpf_loop iteration limit reached\n",
10504                                         env->cur_state->curframe);
10505                 }
10506                 break;
10507         case BPF_FUNC_dynptr_from_mem:
10508                 if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
10509                         verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
10510                                 reg_type_str(env, regs[BPF_REG_1].type));
10511                         return -EACCES;
10512                 }
10513                 break;
10514         case BPF_FUNC_set_retval:
10515                 if (prog_type == BPF_PROG_TYPE_LSM &&
10516                     env->prog->expected_attach_type == BPF_LSM_CGROUP) {
10517                         if (!env->prog->aux->attach_func_proto->type) {
10518                                 /* Make sure programs that attach to void
10519                                  * hooks don't try to modify return value.
10520                                  */
10521                                 verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
10522                                 return -EINVAL;
10523                         }
10524                 }
10525                 break;
10526         case BPF_FUNC_dynptr_data:
10527         {
10528                 struct bpf_reg_state *reg;
10529                 int id, ref_obj_id;
10530 
10531                 reg = get_dynptr_arg_reg(env, fn, regs);
10532                 if (!reg)
10533                         return -EFAULT;
10534 
10535 
10536                 if (meta.dynptr_id) {
10537                         verbose(env, "verifier internal error: meta.dynptr_id already set\n");
10538                         return -EFAULT;
10539                 }
10540                 if (meta.ref_obj_id) {
10541                         verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
10542                         return -EFAULT;
10543                 }
10544 
10545                 id = dynptr_id(env, reg);
10546                 if (id < 0) {
10547                         verbose(env, "verifier internal error: failed to obtain dynptr id\n");
10548                         return id;
10549                 }
10550 
10551                 ref_obj_id = dynptr_ref_obj_id(env, reg);
10552                 if (ref_obj_id < 0) {
10553                         verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
10554                         return ref_obj_id;
10555                 }
10556 
10557                 meta.dynptr_id = id;
10558                 meta.ref_obj_id = ref_obj_id;
10559 
10560                 break;
10561         }
10562         case BPF_FUNC_dynptr_write:
10563         {
10564                 enum bpf_dynptr_type dynptr_type;
10565                 struct bpf_reg_state *reg;
10566 
10567                 reg = get_dynptr_arg_reg(env, fn, regs);
10568                 if (!reg)
10569                         return -EFAULT;
10570 
10571                 dynptr_type = dynptr_get_type(env, reg);
10572                 if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
10573                         return -EFAULT;
10574 
10575                 if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
10576                         /* this will trigger clear_all_pkt_pointers(), which will
10577                          * invalidate all dynptr slices associated with the skb
10578                          */
10579                         changes_data = true;
10580 
10581                 break;
10582         }
10583         case BPF_FUNC_per_cpu_ptr:
10584         case BPF_FUNC_this_cpu_ptr:
10585         {
10586                 struct bpf_reg_state *reg = &regs[BPF_REG_1];
10587                 const struct btf_type *type;
10588 
10589                 if (reg->type & MEM_RCU) {
10590                         type = btf_type_by_id(reg->btf, reg->btf_id);
10591                         if (!type || !btf_type_is_struct(type)) {
10592                                 verbose(env, "Helper has invalid btf/btf_id in R1\n");
10593                                 return -EFAULT;
10594                         }
10595                         returns_cpu_specific_alloc_ptr = true;
10596                         env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
10597                 }
10598                 break;
10599         }
10600         case BPF_FUNC_user_ringbuf_drain:
10601                 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10602                                          set_user_ringbuf_callback_state);
10603                 break;
10604         }
10605 
10606         if (err)
10607                 return err;
10608 
10609         /* reset caller saved regs */
10610         for (i = 0; i < CALLER_SAVED_REGS; i++) {
10611                 mark_reg_not_init(env, regs, caller_saved[i]);
10612                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
10613         }
10614 
10615         /* helper call returns 64-bit value. */
10616         regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
10617 
10618         /* update return register (already marked as written above) */
10619         ret_type = fn->ret_type;
10620         ret_flag = type_flag(ret_type);
10621 
10622         switch (base_type(ret_type)) {
10623         case RET_INTEGER:
10624                 /* sets type to SCALAR_VALUE */
10625                 mark_reg_unknown(env, regs, BPF_REG_0);
10626                 break;
10627         case RET_VOID:
10628                 regs[BPF_REG_0].type = NOT_INIT;
10629                 break;
10630         case RET_PTR_TO_MAP_VALUE:
10631                 /* There is no offset yet applied, variable or fixed */
10632                 mark_reg_known_zero(env, regs, BPF_REG_0);
10633                 /* remember map_ptr, so that check_map_access()
10634                  * can check 'value_size' boundary of memory access
10635                  * to map element returned from bpf_map_lookup_elem()
10636                  */
10637                 if (meta.map_ptr == NULL) {
10638                         verbose(env,
10639                                 "kernel subsystem misconfigured verifier\n");
10640                         return -EINVAL;
10641                 }
10642                 regs[BPF_REG_0].map_ptr = meta.map_ptr;
10643                 regs[BPF_REG_0].map_uid = meta.map_uid;
10644                 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
10645                 if (!type_may_be_null(ret_type) &&
10646                     btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) {
10647                         regs[BPF_REG_0].id = ++env->id_gen;
10648                 }
10649                 break;
10650         case RET_PTR_TO_SOCKET:
10651                 mark_reg_known_zero(env, regs, BPF_REG_0);
10652                 regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
10653                 break;
10654         case RET_PTR_TO_SOCK_COMMON:
10655                 mark_reg_known_zero(env, regs, BPF_REG_0);
10656                 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
10657                 break;
10658         case RET_PTR_TO_TCP_SOCK:
10659                 mark_reg_known_zero(env, regs, BPF_REG_0);
10660                 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
10661                 break;
10662         case RET_PTR_TO_MEM:
10663                 mark_reg_known_zero(env, regs, BPF_REG_0);
10664                 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10665                 regs[BPF_REG_0].mem_size = meta.mem_size;
10666                 break;
10667         case RET_PTR_TO_MEM_OR_BTF_ID:
10668         {
10669                 const struct btf_type *t;
10670 
10671                 mark_reg_known_zero(env, regs, BPF_REG_0);
10672                 t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
10673                 if (!btf_type_is_struct(t)) {
10674                         u32 tsize;
10675                         const struct btf_type *ret;
10676                         const char *tname;
10677 
10678                         /* resolve the type size of ksym. */
10679                         ret = btf_resolve_size(meta.ret_btf, t, &tsize);
10680                         if (IS_ERR(ret)) {
10681                                 tname = btf_name_by_offset(meta.ret_btf, t->name_off);
10682                                 verbose(env, "unable to resolve the size of type '%s': %ld\n",
10683                                         tname, PTR_ERR(ret));
10684                                 return -EINVAL;
10685                         }
10686                         regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10687                         regs[BPF_REG_0].mem_size = tsize;
10688                 } else {
10689                         if (returns_cpu_specific_alloc_ptr) {
10690                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU;
10691                         } else {
10692                                 /* MEM_RDONLY may be carried from ret_flag, but it
10693                                  * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
10694                                  * it will confuse the check of PTR_TO_BTF_ID in
10695                                  * check_mem_access().
10696                                  */
10697                                 ret_flag &= ~MEM_RDONLY;
10698                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10699                         }
10700 
10701                         regs[BPF_REG_0].btf = meta.ret_btf;
10702                         regs[BPF_REG_0].btf_id = meta.ret_btf_id;
10703                 }
10704                 break;
10705         }
10706         case RET_PTR_TO_BTF_ID:
10707         {
10708                 struct btf *ret_btf;
10709                 int ret_btf_id;
10710 
10711                 mark_reg_known_zero(env, regs, BPF_REG_0);
10712                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10713                 if (func_id == BPF_FUNC_kptr_xchg) {
10714                         ret_btf = meta.kptr_field->kptr.btf;
10715                         ret_btf_id = meta.kptr_field->kptr.btf_id;
10716                         if (!btf_is_kernel(ret_btf)) {
10717                                 regs[BPF_REG_0].type |= MEM_ALLOC;
10718                                 if (meta.kptr_field->type == BPF_KPTR_PERCPU)
10719                                         regs[BPF_REG_0].type |= MEM_PERCPU;
10720                         }
10721                 } else {
10722                         if (fn->ret_btf_id == BPF_PTR_POISON) {
10723                                 verbose(env, "verifier internal error:");
10724                                 verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n",
10725                                         func_id_name(func_id));
10726                                 return -EINVAL;
10727                         }
10728                         ret_btf = btf_vmlinux;
10729                         ret_btf_id = *fn->ret_btf_id;
10730                 }
10731                 if (ret_btf_id == 0) {
10732                         verbose(env, "invalid return type %u of func %s#%d\n",
10733                                 base_type(ret_type), func_id_name(func_id),
10734                                 func_id);
10735                         return -EINVAL;
10736                 }
10737                 regs[BPF_REG_0].btf = ret_btf;
10738                 regs[BPF_REG_0].btf_id = ret_btf_id;
10739                 break;
10740         }
10741         default:
10742                 verbose(env, "unknown return type %u of func %s#%d\n",
10743                         base_type(ret_type), func_id_name(func_id), func_id);
10744                 return -EINVAL;
10745         }
10746 
10747         if (type_may_be_null(regs[BPF_REG_0].type))
10748                 regs[BPF_REG_0].id = ++env->id_gen;
10749 
10750         if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
10751                 verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
10752                         func_id_name(func_id), func_id);
10753                 return -EFAULT;
10754         }
10755 
10756         if (is_dynptr_ref_function(func_id))
10757                 regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
10758 
10759         if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
10760                 /* For release_reference() */
10761                 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
10762         } else if (is_acquire_function(func_id, meta.map_ptr)) {
10763                 int id = acquire_reference_state(env, insn_idx);
10764 
10765                 if (id < 0)
10766                         return id;
10767                 /* For mark_ptr_or_null_reg() */
10768                 regs[BPF_REG_0].id = id;
10769                 /* For release_reference() */
10770                 regs[BPF_REG_0].ref_obj_id = id;
10771         }
10772 
10773         err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
10774         if (err)
10775                 return err;
10776 
10777         err = check_map_func_compatibility(env, meta.map_ptr, func_id);
10778         if (err)
10779                 return err;
10780 
10781         if ((func_id == BPF_FUNC_get_stack ||
10782              func_id == BPF_FUNC_get_task_stack) &&
10783             !env->prog->has_callchain_buf) {
10784                 const char *err_str;
10785 
10786 #ifdef CONFIG_PERF_EVENTS
10787                 err = get_callchain_buffers(sysctl_perf_event_max_stack);
10788                 err_str = "cannot get callchain buffer for func %s#%d\n";
10789 #else
10790                 err = -ENOTSUPP;
10791                 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
10792 #endif
10793                 if (err) {
10794                         verbose(env, err_str, func_id_name(func_id), func_id);
10795                         return err;
10796                 }
10797 
10798                 env->prog->has_callchain_buf = true;
10799         }
10800 
10801         if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
10802                 env->prog->call_get_stack = true;
10803 
10804         if (func_id == BPF_FUNC_get_func_ip) {
10805                 if (check_get_func_ip(env))
10806                         return -ENOTSUPP;
10807                 env->prog->call_get_func_ip = true;
10808         }
10809 
10810         if (changes_data)
10811                 clear_all_pkt_pointers(env);
10812         return 0;
10813 }
10814 
10815 /* mark_btf_func_reg_size() is used when the reg size is determined by
10816  * the BTF func_proto's return value size and argument.
10817  */
10818 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
10819                                    size_t reg_size)
10820 {
10821         struct bpf_reg_state *reg = &cur_regs(env)[regno];
10822 
10823         if (regno == BPF_REG_0) {
10824                 /* Function return value */
10825                 reg->live |= REG_LIVE_WRITTEN;
10826                 reg->subreg_def = reg_size == sizeof(u64) ?
10827                         DEF_NOT_SUBREG : env->insn_idx + 1;
10828         } else {
10829                 /* Function argument */
10830                 if (reg_size == sizeof(u64)) {
10831                         mark_insn_zext(env, reg);
10832                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
10833                 } else {
10834                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
10835                 }
10836         }
10837 }
10838 
10839 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
10840 {
10841         return meta->kfunc_flags & KF_ACQUIRE;
10842 }
10843 
10844 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
10845 {
10846         return meta->kfunc_flags & KF_RELEASE;
10847 }
10848 
10849 static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
10850 {
10851         return (meta->kfunc_flags & KF_TRUSTED_ARGS) || is_kfunc_release(meta);
10852 }
10853 
10854 static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
10855 {
10856         return meta->kfunc_flags & KF_SLEEPABLE;
10857 }
10858 
10859 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
10860 {
10861         return meta->kfunc_flags & KF_DESTRUCTIVE;
10862 }
10863 
10864 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
10865 {
10866         return meta->kfunc_flags & KF_RCU;
10867 }
10868 
10869 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
10870 {
10871         return meta->kfunc_flags & KF_RCU_PROTECTED;
10872 }
10873 
10874 static bool is_kfunc_arg_mem_size(const struct btf *btf,
10875                                   const struct btf_param *arg,
10876                                   const struct bpf_reg_state *reg)
10877 {
10878         const struct btf_type *t;
10879 
10880         t = btf_type_skip_modifiers(btf, arg->type, NULL);
10881         if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10882                 return false;
10883 
10884         return btf_param_match_suffix(btf, arg, "__sz");
10885 }
10886 
10887 static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
10888                                         const struct btf_param *arg,
10889                                         const struct bpf_reg_state *reg)
10890 {
10891         const struct btf_type *t;
10892 
10893         t = btf_type_skip_modifiers(btf, arg->type, NULL);
10894         if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10895                 return false;
10896 
10897         return btf_param_match_suffix(btf, arg, "__szk");
10898 }
10899 
10900 static bool is_kfunc_arg_optional(const struct btf *btf, const struct btf_param *arg)
10901 {
10902         return btf_param_match_suffix(btf, arg, "__opt");
10903 }
10904 
10905 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
10906 {
10907         return btf_param_match_suffix(btf, arg, "__k");
10908 }
10909 
10910 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
10911 {
10912         return btf_param_match_suffix(btf, arg, "__ign");
10913 }
10914 
10915 static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg)
10916 {
10917         return btf_param_match_suffix(btf, arg, "__map");
10918 }
10919 
10920 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
10921 {
10922         return btf_param_match_suffix(btf, arg, "__alloc");
10923 }
10924 
10925 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
10926 {
10927         return btf_param_match_suffix(btf, arg, "__uninit");
10928 }
10929 
10930 static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
10931 {
10932         return btf_param_match_suffix(btf, arg, "__refcounted_kptr");
10933 }
10934 
10935 static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg)
10936 {
10937         return btf_param_match_suffix(btf, arg, "__nullable");
10938 }
10939 
10940 static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg)
10941 {
10942         return btf_param_match_suffix(btf, arg, "__str");
10943 }
10944 
10945 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
10946                                           const struct btf_param *arg,
10947                                           const char *name)
10948 {
10949         int len, target_len = strlen(name);
10950         const char *param_name;
10951 
10952         param_name = btf_name_by_offset(btf, arg->name_off);
10953         if (str_is_empty(param_name))
10954                 return false;
10955         len = strlen(param_name);
10956         if (len != target_len)
10957                 return false;
10958         if (strcmp(param_name, name))
10959                 return false;
10960 
10961         return true;
10962 }
10963 
10964 enum {
10965         KF_ARG_DYNPTR_ID,
10966         KF_ARG_LIST_HEAD_ID,
10967         KF_ARG_LIST_NODE_ID,
10968         KF_ARG_RB_ROOT_ID,
10969         KF_ARG_RB_NODE_ID,
10970         KF_ARG_WORKQUEUE_ID,
10971 };
10972 
10973 BTF_ID_LIST(kf_arg_btf_ids)
10974 BTF_ID(struct, bpf_dynptr)
10975 BTF_ID(struct, bpf_list_head)
10976 BTF_ID(struct, bpf_list_node)
10977 BTF_ID(struct, bpf_rb_root)
10978 BTF_ID(struct, bpf_rb_node)
10979 BTF_ID(struct, bpf_wq)
10980 
10981 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
10982                                     const struct btf_param *arg, int type)
10983 {
10984         const struct btf_type *t;
10985         u32 res_id;
10986 
10987         t = btf_type_skip_modifiers(btf, arg->type, NULL);
10988         if (!t)
10989                 return false;
10990         if (!btf_type_is_ptr(t))
10991                 return false;
10992         t = btf_type_skip_modifiers(btf, t->type, &res_id);
10993         if (!t)
10994                 return false;
10995         return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
10996 }
10997 
10998 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
10999 {
11000         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
11001 }
11002 
11003 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
11004 {
11005         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
11006 }
11007 
11008 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
11009 {
11010         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
11011 }
11012 
11013 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
11014 {
11015         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
11016 }
11017 
11018 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
11019 {
11020         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
11021 }
11022 
11023 static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg)
11024 {
11025         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID);
11026 }
11027 
11028 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
11029                                   const struct btf_param *arg)
11030 {
11031         const struct btf_type *t;
11032 
11033         t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
11034         if (!t)
11035                 return false;
11036 
11037         return true;
11038 }
11039 
11040 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
11041 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
11042                                         const struct btf *btf,
11043                                         const struct btf_type *t, int rec)
11044 {
11045         const struct btf_type *member_type;
11046         const struct btf_member *member;
11047         u32 i;
11048 
11049         if (!btf_type_is_struct(t))
11050                 return false;
11051 
11052         for_each_member(i, t, member) {
11053                 const struct btf_array *array;
11054 
11055                 member_type = btf_type_skip_modifiers(btf, member->type, NULL);
11056                 if (btf_type_is_struct(member_type)) {
11057                         if (rec >= 3) {
11058                                 verbose(env, "max struct nesting depth exceeded\n");
11059                                 return false;
11060                         }
11061                         if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
11062                                 return false;
11063                         continue;
11064                 }
11065                 if (btf_type_is_array(member_type)) {
11066                         array = btf_array(member_type);
11067                         if (!array->nelems)
11068                                 return false;
11069                         member_type = btf_type_skip_modifiers(btf, array->type, NULL);
11070                         if (!btf_type_is_scalar(member_type))
11071                                 return false;
11072                         continue;
11073                 }
11074                 if (!btf_type_is_scalar(member_type))
11075                         return false;
11076         }
11077         return true;
11078 }
11079 
11080 enum kfunc_ptr_arg_type {
11081         KF_ARG_PTR_TO_CTX,
11082         KF_ARG_PTR_TO_ALLOC_BTF_ID,    /* Allocated object */
11083         KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */
11084         KF_ARG_PTR_TO_DYNPTR,
11085         KF_ARG_PTR_TO_ITER,
11086         KF_ARG_PTR_TO_LIST_HEAD,
11087         KF_ARG_PTR_TO_LIST_NODE,
11088         KF_ARG_PTR_TO_BTF_ID,          /* Also covers reg2btf_ids conversions */
11089         KF_ARG_PTR_TO_MEM,
11090         KF_ARG_PTR_TO_MEM_SIZE,        /* Size derived from next argument, skip it */
11091         KF_ARG_PTR_TO_CALLBACK,
11092         KF_ARG_PTR_TO_RB_ROOT,
11093         KF_ARG_PTR_TO_RB_NODE,
11094         KF_ARG_PTR_TO_NULL,
11095         KF_ARG_PTR_TO_CONST_STR,
11096         KF_ARG_PTR_TO_MAP,
11097         KF_ARG_PTR_TO_WORKQUEUE,
11098 };
11099 
11100 enum special_kfunc_type {
11101         KF_bpf_obj_new_impl,
11102         KF_bpf_obj_drop_impl,
11103         KF_bpf_refcount_acquire_impl,
11104         KF_bpf_list_push_front_impl,
11105         KF_bpf_list_push_back_impl,
11106         KF_bpf_list_pop_front,
11107         KF_bpf_list_pop_back,
11108         KF_bpf_cast_to_kern_ctx,
11109         KF_bpf_rdonly_cast,
11110         KF_bpf_rcu_read_lock,
11111         KF_bpf_rcu_read_unlock,
11112         KF_bpf_rbtree_remove,
11113         KF_bpf_rbtree_add_impl,
11114         KF_bpf_rbtree_first,
11115         KF_bpf_dynptr_from_skb,
11116         KF_bpf_dynptr_from_xdp,
11117         KF_bpf_dynptr_slice,
11118         KF_bpf_dynptr_slice_rdwr,
11119         KF_bpf_dynptr_clone,
11120         KF_bpf_percpu_obj_new_impl,
11121         KF_bpf_percpu_obj_drop_impl,
11122         KF_bpf_throw,
11123         KF_bpf_wq_set_callback_impl,
11124         KF_bpf_preempt_disable,
11125         KF_bpf_preempt_enable,
11126         KF_bpf_iter_css_task_new,
11127         KF_bpf_session_cookie,
11128 };
11129 
11130 BTF_SET_START(special_kfunc_set)
11131 BTF_ID(func, bpf_obj_new_impl)
11132 BTF_ID(func, bpf_obj_drop_impl)
11133 BTF_ID(func, bpf_refcount_acquire_impl)
11134 BTF_ID(func, bpf_list_push_front_impl)
11135 BTF_ID(func, bpf_list_push_back_impl)
11136 BTF_ID(func, bpf_list_pop_front)
11137 BTF_ID(func, bpf_list_pop_back)
11138 BTF_ID(func, bpf_cast_to_kern_ctx)
11139 BTF_ID(func, bpf_rdonly_cast)
11140 BTF_ID(func, bpf_rbtree_remove)
11141 BTF_ID(func, bpf_rbtree_add_impl)
11142 BTF_ID(func, bpf_rbtree_first)
11143 BTF_ID(func, bpf_dynptr_from_skb)
11144 BTF_ID(func, bpf_dynptr_from_xdp)
11145 BTF_ID(func, bpf_dynptr_slice)
11146 BTF_ID(func, bpf_dynptr_slice_rdwr)
11147 BTF_ID(func, bpf_dynptr_clone)
11148 BTF_ID(func, bpf_percpu_obj_new_impl)
11149 BTF_ID(func, bpf_percpu_obj_drop_impl)
11150 BTF_ID(func, bpf_throw)
11151 BTF_ID(func, bpf_wq_set_callback_impl)
11152 #ifdef CONFIG_CGROUPS
11153 BTF_ID(func, bpf_iter_css_task_new)
11154 #endif
11155 BTF_SET_END(special_kfunc_set)
11156 
11157 BTF_ID_LIST(special_kfunc_list)
11158 BTF_ID(func, bpf_obj_new_impl)
11159 BTF_ID(func, bpf_obj_drop_impl)
11160 BTF_ID(func, bpf_refcount_acquire_impl)
11161 BTF_ID(func, bpf_list_push_front_impl)
11162 BTF_ID(func, bpf_list_push_back_impl)
11163 BTF_ID(func, bpf_list_pop_front)
11164 BTF_ID(func, bpf_list_pop_back)
11165 BTF_ID(func, bpf_cast_to_kern_ctx)
11166 BTF_ID(func, bpf_rdonly_cast)
11167 BTF_ID(func, bpf_rcu_read_lock)
11168 BTF_ID(func, bpf_rcu_read_unlock)
11169 BTF_ID(func, bpf_rbtree_remove)
11170 BTF_ID(func, bpf_rbtree_add_impl)
11171 BTF_ID(func, bpf_rbtree_first)
11172 BTF_ID(func, bpf_dynptr_from_skb)
11173 BTF_ID(func, bpf_dynptr_from_xdp)
11174 BTF_ID(func, bpf_dynptr_slice)
11175 BTF_ID(func, bpf_dynptr_slice_rdwr)
11176 BTF_ID(func, bpf_dynptr_clone)
11177 BTF_ID(func, bpf_percpu_obj_new_impl)
11178 BTF_ID(func, bpf_percpu_obj_drop_impl)
11179 BTF_ID(func, bpf_throw)
11180 BTF_ID(func, bpf_wq_set_callback_impl)
11181 BTF_ID(func, bpf_preempt_disable)
11182 BTF_ID(func, bpf_preempt_enable)
11183 #ifdef CONFIG_CGROUPS
11184 BTF_ID(func, bpf_iter_css_task_new)
11185 #else
11186 BTF_ID_UNUSED
11187 #endif
11188 #ifdef CONFIG_BPF_EVENTS
11189 BTF_ID(func, bpf_session_cookie)
11190 #else
11191 BTF_ID_UNUSED
11192 #endif
11193 
11194 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
11195 {
11196         if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
11197             meta->arg_owning_ref) {
11198                 return false;
11199         }
11200 
11201         return meta->kfunc_flags & KF_RET_NULL;
11202 }
11203 
11204 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
11205 {
11206         return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
11207 }
11208 
11209 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
11210 {
11211         return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
11212 }
11213 
11214 static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta)
11215 {
11216         return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable];
11217 }
11218 
11219 static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
11220 {
11221         return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
11222 }
11223 
11224 static enum kfunc_ptr_arg_type
11225 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
11226                        struct bpf_kfunc_call_arg_meta *meta,
11227                        const struct btf_type *t, const struct btf_type *ref_t,
11228                        const char *ref_tname, const struct btf_param *args,
11229                        int argno, int nargs)
11230 {
11231         u32 regno = argno + 1;
11232         struct bpf_reg_state *regs = cur_regs(env);
11233         struct bpf_reg_state *reg = &regs[regno];
11234         bool arg_mem_size = false;
11235 
11236         if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
11237                 return KF_ARG_PTR_TO_CTX;
11238 
11239         /* In this function, we verify the kfunc's BTF as per the argument type,
11240          * leaving the rest of the verification with respect to the register
11241          * type to our caller. When a set of conditions hold in the BTF type of
11242          * arguments, we resolve it to a known kfunc_ptr_arg_type.
11243          */
11244         if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
11245                 return KF_ARG_PTR_TO_CTX;
11246 
11247         if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg))
11248                 return KF_ARG_PTR_TO_NULL;
11249 
11250         if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
11251                 return KF_ARG_PTR_TO_ALLOC_BTF_ID;
11252 
11253         if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[argno]))
11254                 return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
11255 
11256         if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
11257                 return KF_ARG_PTR_TO_DYNPTR;
11258 
11259         if (is_kfunc_arg_iter(meta, argno))
11260                 return KF_ARG_PTR_TO_ITER;
11261 
11262         if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
11263                 return KF_ARG_PTR_TO_LIST_HEAD;
11264 
11265         if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
11266                 return KF_ARG_PTR_TO_LIST_NODE;
11267 
11268         if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
11269                 return KF_ARG_PTR_TO_RB_ROOT;
11270 
11271         if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
11272                 return KF_ARG_PTR_TO_RB_NODE;
11273 
11274         if (is_kfunc_arg_const_str(meta->btf, &args[argno]))
11275                 return KF_ARG_PTR_TO_CONST_STR;
11276 
11277         if (is_kfunc_arg_map(meta->btf, &args[argno]))
11278                 return KF_ARG_PTR_TO_MAP;
11279 
11280         if (is_kfunc_arg_wq(meta->btf, &args[argno]))
11281                 return KF_ARG_PTR_TO_WORKQUEUE;
11282 
11283         if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
11284                 if (!btf_type_is_struct(ref_t)) {
11285                         verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
11286                                 meta->func_name, argno, btf_type_str(ref_t), ref_tname);
11287                         return -EINVAL;
11288                 }
11289                 return KF_ARG_PTR_TO_BTF_ID;
11290         }
11291 
11292         if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
11293                 return KF_ARG_PTR_TO_CALLBACK;
11294 
11295         if (argno + 1 < nargs &&
11296             (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
11297              is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
11298                 arg_mem_size = true;
11299 
11300         /* This is the catch all argument type of register types supported by
11301          * check_helper_mem_access. However, we only allow when argument type is
11302          * pointer to scalar, or struct composed (recursively) of scalars. When
11303          * arg_mem_size is true, the pointer can be void *.
11304          */
11305         if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
11306             (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
11307                 verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
11308                         argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
11309                 return -EINVAL;
11310         }
11311         return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
11312 }
11313 
11314 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
11315                                         struct bpf_reg_state *reg,
11316                                         const struct btf_type *ref_t,
11317                                         const char *ref_tname, u32 ref_id,
11318                                         struct bpf_kfunc_call_arg_meta *meta,
11319                                         int argno)
11320 {
11321         const struct btf_type *reg_ref_t;
11322         bool strict_type_match = false;
11323         const struct btf *reg_btf;
11324         const char *reg_ref_tname;
11325         bool taking_projection;
11326         bool struct_same;
11327         u32 reg_ref_id;
11328 
11329         if (base_type(reg->type) == PTR_TO_BTF_ID) {
11330                 reg_btf = reg->btf;
11331                 reg_ref_id = reg->btf_id;
11332         } else {
11333                 reg_btf = btf_vmlinux;
11334                 reg_ref_id = *reg2btf_ids[base_type(reg->type)];
11335         }
11336 
11337         /* Enforce strict type matching for calls to kfuncs that are acquiring
11338          * or releasing a reference, or are no-cast aliases. We do _not_
11339          * enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
11340          * as we want to enable BPF programs to pass types that are bitwise
11341          * equivalent without forcing them to explicitly cast with something
11342          * like bpf_cast_to_kern_ctx().
11343          *
11344          * For example, say we had a type like the following:
11345          *
11346          * struct bpf_cpumask {
11347          *      cpumask_t cpumask;
11348          *      refcount_t usage;
11349          * };
11350          *
11351          * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
11352          * to a struct cpumask, so it would be safe to pass a struct
11353          * bpf_cpumask * to a kfunc expecting a struct cpumask *.
11354          *
11355          * The philosophy here is similar to how we allow scalars of different
11356          * types to be passed to kfuncs as long as the size is the same. The
11357          * only difference here is that we're simply allowing
11358          * btf_struct_ids_match() to walk the struct at the 0th offset, and
11359          * resolve types.
11360          */
11361         if (is_kfunc_acquire(meta) ||
11362             (is_kfunc_release(meta) && reg->ref_obj_id) ||
11363             btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
11364                 strict_type_match = true;
11365 
11366         WARN_ON_ONCE(is_kfunc_release(meta) &&
11367                      (reg->off || !tnum_is_const(reg->var_off) ||
11368                       reg->var_off.value));
11369 
11370         reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
11371         reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
11372         struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match);
11373         /* If kfunc is accepting a projection type (ie. __sk_buff), it cannot
11374          * actually use it -- it must cast to the underlying type. So we allow
11375          * caller to pass in the underlying type.
11376          */
11377         taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname);
11378         if (!taking_projection && !struct_same) {
11379                 verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
11380                         meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
11381                         btf_type_str(reg_ref_t), reg_ref_tname);
11382                 return -EINVAL;
11383         }
11384         return 0;
11385 }
11386 
11387 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11388 {
11389         struct bpf_verifier_state *state = env->cur_state;
11390         struct btf_record *rec = reg_btf_record(reg);
11391 
11392         if (!state->active_lock.ptr) {
11393                 verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
11394                 return -EFAULT;
11395         }
11396 
11397         if (type_flag(reg->type) & NON_OWN_REF) {
11398                 verbose(env, "verifier internal error: NON_OWN_REF already set\n");
11399                 return -EFAULT;
11400         }
11401 
11402         reg->type |= NON_OWN_REF;
11403         if (rec->refcount_off >= 0)
11404                 reg->type |= MEM_RCU;
11405 
11406         return 0;
11407 }
11408 
11409 static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
11410 {
11411         struct bpf_func_state *state, *unused;
11412         struct bpf_reg_state *reg;
11413         int i;
11414 
11415         state = cur_func(env);
11416 
11417         if (!ref_obj_id) {
11418                 verbose(env, "verifier internal error: ref_obj_id is zero for "
11419                              "owning -> non-owning conversion\n");
11420                 return -EFAULT;
11421         }
11422 
11423         for (i = 0; i < state->acquired_refs; i++) {
11424                 if (state->refs[i].id != ref_obj_id)
11425                         continue;
11426 
11427                 /* Clear ref_obj_id here so release_reference doesn't clobber
11428                  * the whole reg
11429                  */
11430                 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
11431                         if (reg->ref_obj_id == ref_obj_id) {
11432                                 reg->ref_obj_id = 0;
11433                                 ref_set_non_owning(env, reg);
11434                         }
11435                 }));
11436                 return 0;
11437         }
11438 
11439         verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
11440         return -EFAULT;
11441 }
11442 
11443 /* Implementation details:
11444  *
11445  * Each register points to some region of memory, which we define as an
11446  * allocation. Each allocation may embed a bpf_spin_lock which protects any
11447  * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
11448  * allocation. The lock and the data it protects are colocated in the same
11449  * memory region.
11450  *
11451  * Hence, everytime a register holds a pointer value pointing to such
11452  * allocation, the verifier preserves a unique reg->id for it.
11453  *
11454  * The verifier remembers the lock 'ptr' and the lock 'id' whenever
11455  * bpf_spin_lock is called.
11456  *
11457  * To enable this, lock state in the verifier captures two values:
11458  *      active_lock.ptr = Register's type specific pointer
11459  *      active_lock.id  = A unique ID for each register pointer value
11460  *
11461  * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
11462  * supported register types.
11463  *
11464  * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
11465  * allocated objects is the reg->btf pointer.
11466  *
11467  * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
11468  * can establish the provenance of the map value statically for each distinct
11469  * lookup into such maps. They always contain a single map value hence unique
11470  * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
11471  *
11472  * So, in case of global variables, they use array maps with max_entries = 1,
11473  * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
11474  * into the same map value as max_entries is 1, as described above).
11475  *
11476  * In case of inner map lookups, the inner map pointer has same map_ptr as the
11477  * outer map pointer (in verifier context), but each lookup into an inner map
11478  * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
11479  * maps from the same outer map share the same map_ptr as active_lock.ptr, they
11480  * will get different reg->id assigned to each lookup, hence different
11481  * active_lock.id.
11482  *
11483  * In case of allocated objects, active_lock.ptr is the reg->btf, and the
11484  * reg->id is a unique ID preserved after the NULL pointer check on the pointer
11485  * returned from bpf_obj_new. Each allocation receives a new reg->id.
11486  */
11487 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11488 {
11489         void *ptr;
11490         u32 id;
11491 
11492         switch ((int)reg->type) {
11493         case PTR_TO_MAP_VALUE:
11494                 ptr = reg->map_ptr;
11495                 break;
11496         case PTR_TO_BTF_ID | MEM_ALLOC:
11497                 ptr = reg->btf;
11498                 break;
11499         default:
11500                 verbose(env, "verifier internal error: unknown reg type for lock check\n");
11501                 return -EFAULT;
11502         }
11503         id = reg->id;
11504 
11505         if (!env->cur_state->active_lock.ptr)
11506                 return -EINVAL;
11507         if (env->cur_state->active_lock.ptr != ptr ||
11508             env->cur_state->active_lock.id != id) {
11509                 verbose(env, "held lock and object are not in the same allocation\n");
11510                 return -EINVAL;
11511         }
11512         return 0;
11513 }
11514 
11515 static bool is_bpf_list_api_kfunc(u32 btf_id)
11516 {
11517         return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
11518                btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
11519                btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
11520                btf_id == special_kfunc_list[KF_bpf_list_pop_back];
11521 }
11522 
11523 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
11524 {
11525         return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
11526                btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11527                btf_id == special_kfunc_list[KF_bpf_rbtree_first];
11528 }
11529 
11530 static bool is_bpf_graph_api_kfunc(u32 btf_id)
11531 {
11532         return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id) ||
11533                btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
11534 }
11535 
11536 static bool is_sync_callback_calling_kfunc(u32 btf_id)
11537 {
11538         return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
11539 }
11540 
11541 static bool is_async_callback_calling_kfunc(u32 btf_id)
11542 {
11543         return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl];
11544 }
11545 
11546 static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
11547 {
11548         return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
11549                insn->imm == special_kfunc_list[KF_bpf_throw];
11550 }
11551 
11552 static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id)
11553 {
11554         return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl];
11555 }
11556 
11557 static bool is_callback_calling_kfunc(u32 btf_id)
11558 {
11559         return is_sync_callback_calling_kfunc(btf_id) ||
11560                is_async_callback_calling_kfunc(btf_id);
11561 }
11562 
11563 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
11564 {
11565         return is_bpf_rbtree_api_kfunc(btf_id);
11566 }
11567 
11568 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
11569                                           enum btf_field_type head_field_type,
11570                                           u32 kfunc_btf_id)
11571 {
11572         bool ret;
11573 
11574         switch (head_field_type) {
11575         case BPF_LIST_HEAD:
11576                 ret = is_bpf_list_api_kfunc(kfunc_btf_id);
11577                 break;
11578         case BPF_RB_ROOT:
11579                 ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
11580                 break;
11581         default:
11582                 verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
11583                         btf_field_type_name(head_field_type));
11584                 return false;
11585         }
11586 
11587         if (!ret)
11588                 verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
11589                         btf_field_type_name(head_field_type));
11590         return ret;
11591 }
11592 
11593 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
11594                                           enum btf_field_type node_field_type,
11595                                           u32 kfunc_btf_id)
11596 {
11597         bool ret;
11598 
11599         switch (node_field_type) {
11600         case BPF_LIST_NODE:
11601                 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
11602                        kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]);
11603                 break;
11604         case BPF_RB_NODE:
11605                 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11606                        kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
11607                 break;
11608         default:
11609                 verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
11610                         btf_field_type_name(node_field_type));
11611                 return false;
11612         }
11613 
11614         if (!ret)
11615                 verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
11616                         btf_field_type_name(node_field_type));
11617         return ret;
11618 }
11619 
11620 static int
11621 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
11622                                    struct bpf_reg_state *reg, u32 regno,
11623                                    struct bpf_kfunc_call_arg_meta *meta,
11624                                    enum btf_field_type head_field_type,
11625                                    struct btf_field **head_field)
11626 {
11627         const char *head_type_name;
11628         struct btf_field *field;
11629         struct btf_record *rec;
11630         u32 head_off;
11631 
11632         if (meta->btf != btf_vmlinux) {
11633                 verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
11634                 return -EFAULT;
11635         }
11636 
11637         if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
11638                 return -EFAULT;
11639 
11640         head_type_name = btf_field_type_name(head_field_type);
11641         if (!tnum_is_const(reg->var_off)) {
11642                 verbose(env,
11643                         "R%d doesn't have constant offset. %s has to be at the constant offset\n",
11644                         regno, head_type_name);
11645                 return -EINVAL;
11646         }
11647 
11648         rec = reg_btf_record(reg);
11649         head_off = reg->off + reg->var_off.value;
11650         field = btf_record_find(rec, head_off, head_field_type);
11651         if (!field) {
11652                 verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
11653                 return -EINVAL;
11654         }
11655 
11656         /* All functions require bpf_list_head to be protected using a bpf_spin_lock */
11657         if (check_reg_allocation_locked(env, reg)) {
11658                 verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
11659                         rec->spin_lock_off, head_type_name);
11660                 return -EINVAL;
11661         }
11662 
11663         if (*head_field) {
11664                 verbose(env, "verifier internal error: repeating %s arg\n", head_type_name);
11665                 return -EFAULT;
11666         }
11667         *head_field = field;
11668         return 0;
11669 }
11670 
11671 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
11672                                            struct bpf_reg_state *reg, u32 regno,
11673                                            struct bpf_kfunc_call_arg_meta *meta)
11674 {
11675         return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
11676                                                           &meta->arg_list_head.field);
11677 }
11678 
11679 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
11680                                              struct bpf_reg_state *reg, u32 regno,
11681                                              struct bpf_kfunc_call_arg_meta *meta)
11682 {
11683         return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
11684                                                           &meta->arg_rbtree_root.field);
11685 }
11686 
11687 static int
11688 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
11689                                    struct bpf_reg_state *reg, u32 regno,
11690                                    struct bpf_kfunc_call_arg_meta *meta,
11691                                    enum btf_field_type head_field_type,
11692                                    enum btf_field_type node_field_type,
11693                                    struct btf_field **node_field)
11694 {
11695         const char *node_type_name;
11696         const struct btf_type *et, *t;
11697         struct btf_field *field;
11698         u32 node_off;
11699 
11700         if (meta->btf != btf_vmlinux) {
11701                 verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
11702                 return -EFAULT;
11703         }
11704 
11705         if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
11706                 return -EFAULT;
11707 
11708         node_type_name = btf_field_type_name(node_field_type);
11709         if (!tnum_is_const(reg->var_off)) {
11710                 verbose(env,
11711                         "R%d doesn't have constant offset. %s has to be at the constant offset\n",
11712                         regno, node_type_name);
11713                 return -EINVAL;
11714         }
11715 
11716         node_off = reg->off + reg->var_off.value;
11717         field = reg_find_field_offset(reg, node_off, node_field_type);
11718         if (!field) {
11719                 verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
11720                 return -EINVAL;
11721         }
11722 
11723         field = *node_field;
11724 
11725         et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
11726         t = btf_type_by_id(reg->btf, reg->btf_id);
11727         if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
11728                                   field->graph_root.value_btf_id, true)) {
11729                 verbose(env, "operation on %s expects arg#1 %s at offset=%d "
11730                         "in struct %s, but arg is at offset=%d in struct %s\n",
11731                         btf_field_type_name(head_field_type),
11732                         btf_field_type_name(node_field_type),
11733                         field->graph_root.node_offset,
11734                         btf_name_by_offset(field->graph_root.btf, et->name_off),
11735                         node_off, btf_name_by_offset(reg->btf, t->name_off));
11736                 return -EINVAL;
11737         }
11738         meta->arg_btf = reg->btf;
11739         meta->arg_btf_id = reg->btf_id;
11740 
11741         if (node_off != field->graph_root.node_offset) {
11742                 verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
11743                         node_off, btf_field_type_name(node_field_type),
11744                         field->graph_root.node_offset,
11745                         btf_name_by_offset(field->graph_root.btf, et->name_off));
11746                 return -EINVAL;
11747         }
11748 
11749         return 0;
11750 }
11751 
11752 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
11753                                            struct bpf_reg_state *reg, u32 regno,
11754                                            struct bpf_kfunc_call_arg_meta *meta)
11755 {
11756         return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
11757                                                   BPF_LIST_HEAD, BPF_LIST_NODE,
11758                                                   &meta->arg_list_head.field);
11759 }
11760 
11761 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
11762                                              struct bpf_reg_state *reg, u32 regno,
11763                                              struct bpf_kfunc_call_arg_meta *meta)
11764 {
11765         return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
11766                                                   BPF_RB_ROOT, BPF_RB_NODE,
11767                                                   &meta->arg_rbtree_root.field);
11768 }
11769 
11770 /*
11771  * css_task iter allowlist is needed to avoid dead locking on css_set_lock.
11772  * LSM hooks and iters (both sleepable and non-sleepable) are safe.
11773  * Any sleepable progs are also safe since bpf_check_attach_target() enforce
11774  * them can only be attached to some specific hook points.
11775  */
11776 static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
11777 {
11778         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
11779 
11780         switch (prog_type) {
11781         case BPF_PROG_TYPE_LSM:
11782                 return true;
11783         case BPF_PROG_TYPE_TRACING:
11784                 if (env->prog->expected_attach_type == BPF_TRACE_ITER)
11785                         return true;
11786                 fallthrough;
11787         default:
11788                 return in_sleepable(env);
11789         }
11790 }
11791 
11792 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
11793                             int insn_idx)
11794 {
11795         const char *func_name = meta->func_name, *ref_tname;
11796         const struct btf *btf = meta->btf;
11797         const struct btf_param *args;
11798         struct btf_record *rec;
11799         u32 i, nargs;
11800         int ret;
11801 
11802         args = (const struct btf_param *)(meta->func_proto + 1);
11803         nargs = btf_type_vlen(meta->func_proto);
11804         if (nargs > MAX_BPF_FUNC_REG_ARGS) {
11805                 verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
11806                         MAX_BPF_FUNC_REG_ARGS);
11807                 return -EINVAL;
11808         }
11809 
11810         /* Check that BTF function arguments match actual types that the
11811          * verifier sees.
11812          */
11813         for (i = 0; i < nargs; i++) {
11814                 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
11815                 const struct btf_type *t, *ref_t, *resolve_ret;
11816                 enum bpf_arg_type arg_type = ARG_DONTCARE;
11817                 u32 regno = i + 1, ref_id, type_size;
11818                 bool is_ret_buf_sz = false;
11819                 int kf_arg_type;
11820 
11821                 t = btf_type_skip_modifiers(btf, args[i].type, NULL);
11822 
11823                 if (is_kfunc_arg_ignore(btf, &args[i]))
11824                         continue;
11825 
11826                 if (btf_type_is_scalar(t)) {
11827                         if (reg->type != SCALAR_VALUE) {
11828                                 verbose(env, "R%d is not a scalar\n", regno);
11829                                 return -EINVAL;
11830                         }
11831 
11832                         if (is_kfunc_arg_constant(meta->btf, &args[i])) {
11833                                 if (meta->arg_constant.found) {
11834                                         verbose(env, "verifier internal error: only one constant argument permitted\n");
11835                                         return -EFAULT;
11836                                 }
11837                                 if (!tnum_is_const(reg->var_off)) {
11838                                         verbose(env, "R%d must be a known constant\n", regno);
11839                                         return -EINVAL;
11840                                 }
11841                                 ret = mark_chain_precision(env, regno);
11842                                 if (ret < 0)
11843                                         return ret;
11844                                 meta->arg_constant.found = true;
11845                                 meta->arg_constant.value = reg->var_off.value;
11846                         } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
11847                                 meta->r0_rdonly = true;
11848                                 is_ret_buf_sz = true;
11849                         } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
11850                                 is_ret_buf_sz = true;
11851                         }
11852 
11853                         if (is_ret_buf_sz) {
11854                                 if (meta->r0_size) {
11855                                         verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
11856                                         return -EINVAL;
11857                                 }
11858 
11859                                 if (!tnum_is_const(reg->var_off)) {
11860                                         verbose(env, "R%d is not a const\n", regno);
11861                                         return -EINVAL;
11862                                 }
11863 
11864                                 meta->r0_size = reg->var_off.value;
11865                                 ret = mark_chain_precision(env, regno);
11866                                 if (ret)
11867                                         return ret;
11868                         }
11869                         continue;
11870                 }
11871 
11872                 if (!btf_type_is_ptr(t)) {
11873                         verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
11874                         return -EINVAL;
11875                 }
11876 
11877                 if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) &&
11878                     (register_is_null(reg) || type_may_be_null(reg->type)) &&
11879                         !is_kfunc_arg_nullable(meta->btf, &args[i])) {
11880                         verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
11881                         return -EACCES;
11882                 }
11883 
11884                 if (reg->ref_obj_id) {
11885                         if (is_kfunc_release(meta) && meta->ref_obj_id) {
11886                                 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
11887                                         regno, reg->ref_obj_id,
11888                                         meta->ref_obj_id);
11889                                 return -EFAULT;
11890                         }
11891                         meta->ref_obj_id = reg->ref_obj_id;
11892                         if (is_kfunc_release(meta))
11893                                 meta->release_regno = regno;
11894                 }
11895 
11896                 ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
11897                 ref_tname = btf_name_by_offset(btf, ref_t->name_off);
11898 
11899                 kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
11900                 if (kf_arg_type < 0)
11901                         return kf_arg_type;
11902 
11903                 switch (kf_arg_type) {
11904                 case KF_ARG_PTR_TO_NULL:
11905                         continue;
11906                 case KF_ARG_PTR_TO_MAP:
11907                         if (!reg->map_ptr) {
11908                                 verbose(env, "pointer in R%d isn't map pointer\n", regno);
11909                                 return -EINVAL;
11910                         }
11911                         if (meta->map.ptr && reg->map_ptr->record->wq_off >= 0) {
11912                                 /* Use map_uid (which is unique id of inner map) to reject:
11913                                  * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
11914                                  * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
11915                                  * if (inner_map1 && inner_map2) {
11916                                  *     wq = bpf_map_lookup_elem(inner_map1);
11917                                  *     if (wq)
11918                                  *         // mismatch would have been allowed
11919                                  *         bpf_wq_init(wq, inner_map2);
11920                                  * }
11921                                  *
11922                                  * Comparing map_ptr is enough to distinguish normal and outer maps.
11923                                  */
11924                                 if (meta->map.ptr != reg->map_ptr ||
11925                                     meta->map.uid != reg->map_uid) {
11926                                         verbose(env,
11927                                                 "workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
11928                                                 meta->map.uid, reg->map_uid);
11929                                         return -EINVAL;
11930                                 }
11931                         }
11932                         meta->map.ptr = reg->map_ptr;
11933                         meta->map.uid = reg->map_uid;
11934                         fallthrough;
11935                 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
11936                 case KF_ARG_PTR_TO_BTF_ID:
11937                         if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
11938                                 break;
11939 
11940                         if (!is_trusted_reg(reg)) {
11941                                 if (!is_kfunc_rcu(meta)) {
11942                                         verbose(env, "R%d must be referenced or trusted\n", regno);
11943                                         return -EINVAL;
11944                                 }
11945                                 if (!is_rcu_reg(reg)) {
11946                                         verbose(env, "R%d must be a rcu pointer\n", regno);
11947                                         return -EINVAL;
11948                                 }
11949                         }
11950                         fallthrough;
11951                 case KF_ARG_PTR_TO_CTX:
11952                 case KF_ARG_PTR_TO_DYNPTR:
11953                 case KF_ARG_PTR_TO_ITER:
11954                 case KF_ARG_PTR_TO_LIST_HEAD:
11955                 case KF_ARG_PTR_TO_LIST_NODE:
11956                 case KF_ARG_PTR_TO_RB_ROOT:
11957                 case KF_ARG_PTR_TO_RB_NODE:
11958                 case KF_ARG_PTR_TO_MEM:
11959                 case KF_ARG_PTR_TO_MEM_SIZE:
11960                 case KF_ARG_PTR_TO_CALLBACK:
11961                 case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
11962                 case KF_ARG_PTR_TO_CONST_STR:
11963                 case KF_ARG_PTR_TO_WORKQUEUE:
11964                         break;
11965                 default:
11966                         WARN_ON_ONCE(1);
11967                         return -EFAULT;
11968                 }
11969 
11970                 if (is_kfunc_release(meta) && reg->ref_obj_id)
11971                         arg_type |= OBJ_RELEASE;
11972                 ret = check_func_arg_reg_off(env, reg, regno, arg_type);
11973                 if (ret < 0)
11974                         return ret;
11975 
11976                 switch (kf_arg_type) {
11977                 case KF_ARG_PTR_TO_CTX:
11978                         if (reg->type != PTR_TO_CTX) {
11979                                 verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
11980                                 return -EINVAL;
11981                         }
11982 
11983                         if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
11984                                 ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
11985                                 if (ret < 0)
11986                                         return -EINVAL;
11987                                 meta->ret_btf_id  = ret;
11988                         }
11989                         break;
11990                 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
11991                         if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
11992                                 if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) {
11993                                         verbose(env, "arg#%d expected for bpf_obj_drop_impl()\n", i);
11994                                         return -EINVAL;
11995                                 }
11996                         } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
11997                                 if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
11998                                         verbose(env, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i);
11999                                         return -EINVAL;
12000                                 }
12001                         } else {
12002                                 verbose(env, "arg#%d expected pointer to allocated object\n", i);
12003                                 return -EINVAL;
12004                         }
12005                         if (!reg->ref_obj_id) {
12006                                 verbose(env, "allocated object must be referenced\n");
12007                                 return -EINVAL;
12008                         }
12009                         if (meta->btf == btf_vmlinux) {
12010                                 meta->arg_btf = reg->btf;
12011                                 meta->arg_btf_id = reg->btf_id;
12012                         }
12013                         break;
12014                 case KF_ARG_PTR_TO_DYNPTR:
12015                 {
12016                         enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
12017                         int clone_ref_obj_id = 0;
12018 
12019                         if (reg->type == CONST_PTR_TO_DYNPTR)
12020                                 dynptr_arg_type |= MEM_RDONLY;
12021 
12022                         if (is_kfunc_arg_uninit(btf, &args[i]))
12023                                 dynptr_arg_type |= MEM_UNINIT;
12024 
12025                         if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
12026                                 dynptr_arg_type |= DYNPTR_TYPE_SKB;
12027                         } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
12028                                 dynptr_arg_type |= DYNPTR_TYPE_XDP;
12029                         } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
12030                                    (dynptr_arg_type & MEM_UNINIT)) {
12031                                 enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
12032 
12033                                 if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
12034                                         verbose(env, "verifier internal error: no dynptr type for parent of clone\n");
12035                                         return -EFAULT;
12036                                 }
12037 
12038                                 dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
12039                                 clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id;
12040                                 if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) {
12041                                         verbose(env, "verifier internal error: missing ref obj id for parent of clone\n");
12042                                         return -EFAULT;
12043                                 }
12044                         }
12045 
12046                         ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id);
12047                         if (ret < 0)
12048                                 return ret;
12049 
12050                         if (!(dynptr_arg_type & MEM_UNINIT)) {
12051                                 int id = dynptr_id(env, reg);
12052 
12053                                 if (id < 0) {
12054                                         verbose(env, "verifier internal error: failed to obtain dynptr id\n");
12055                                         return id;
12056                                 }
12057                                 meta->initialized_dynptr.id = id;
12058                                 meta->initialized_dynptr.type = dynptr_get_type(env, reg);
12059                                 meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg);
12060                         }
12061 
12062                         break;
12063                 }
12064                 case KF_ARG_PTR_TO_ITER:
12065                         if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
12066                                 if (!check_css_task_iter_allowlist(env)) {
12067                                         verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
12068                                         return -EINVAL;
12069                                 }
12070                         }
12071                         ret = process_iter_arg(env, regno, insn_idx, meta);
12072                         if (ret < 0)
12073                                 return ret;
12074                         break;
12075                 case KF_ARG_PTR_TO_LIST_HEAD:
12076                         if (reg->type != PTR_TO_MAP_VALUE &&
12077                             reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12078                                 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
12079                                 return -EINVAL;
12080                         }
12081                         if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
12082                                 verbose(env, "allocated object must be referenced\n");
12083                                 return -EINVAL;
12084                         }
12085                         ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
12086                         if (ret < 0)
12087                                 return ret;
12088                         break;
12089                 case KF_ARG_PTR_TO_RB_ROOT:
12090                         if (reg->type != PTR_TO_MAP_VALUE &&
12091                             reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12092                                 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
12093                                 return -EINVAL;
12094                         }
12095                         if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
12096                                 verbose(env, "allocated object must be referenced\n");
12097                                 return -EINVAL;
12098                         }
12099                         ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
12100                         if (ret < 0)
12101                                 return ret;
12102                         break;
12103                 case KF_ARG_PTR_TO_LIST_NODE:
12104                         if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12105                                 verbose(env, "arg#%d expected pointer to allocated object\n", i);
12106                                 return -EINVAL;
12107                         }
12108                         if (!reg->ref_obj_id) {
12109                                 verbose(env, "allocated object must be referenced\n");
12110                                 return -EINVAL;
12111                         }
12112                         ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
12113                         if (ret < 0)
12114                                 return ret;
12115                         break;
12116                 case KF_ARG_PTR_TO_RB_NODE:
12117                         if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
12118                                 if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
12119                                         verbose(env, "rbtree_remove node input must be non-owning ref\n");
12120                                         return -EINVAL;
12121                                 }
12122                                 if (in_rbtree_lock_required_cb(env)) {
12123                                         verbose(env, "rbtree_remove not allowed in rbtree cb\n");
12124                                         return -EINVAL;
12125                                 }
12126                         } else {
12127                                 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12128                                         verbose(env, "arg#%d expected pointer to allocated object\n", i);
12129                                         return -EINVAL;
12130                                 }
12131                                 if (!reg->ref_obj_id) {
12132                                         verbose(env, "allocated object must be referenced\n");
12133                                         return -EINVAL;
12134                                 }
12135                         }
12136 
12137                         ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
12138                         if (ret < 0)
12139                                 return ret;
12140                         break;
12141                 case KF_ARG_PTR_TO_MAP:
12142                         /* If argument has '__map' suffix expect 'struct bpf_map *' */
12143                         ref_id = *reg2btf_ids[CONST_PTR_TO_MAP];
12144                         ref_t = btf_type_by_id(btf_vmlinux, ref_id);
12145                         ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12146                         fallthrough;
12147                 case KF_ARG_PTR_TO_BTF_ID:
12148                         /* Only base_type is checked, further checks are done here */
12149                         if ((base_type(reg->type) != PTR_TO_BTF_ID ||
12150                              (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
12151                             !reg2btf_ids[base_type(reg->type)]) {
12152                                 verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
12153                                 verbose(env, "expected %s or socket\n",
12154                                         reg_type_str(env, base_type(reg->type) |
12155                                                           (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
12156                                 return -EINVAL;
12157                         }
12158                         ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
12159                         if (ret < 0)
12160                                 return ret;
12161                         break;
12162                 case KF_ARG_PTR_TO_MEM:
12163                         resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
12164                         if (IS_ERR(resolve_ret)) {
12165                                 verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
12166                                         i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
12167                                 return -EINVAL;
12168                         }
12169                         ret = check_mem_reg(env, reg, regno, type_size);
12170                         if (ret < 0)
12171                                 return ret;
12172                         break;
12173                 case KF_ARG_PTR_TO_MEM_SIZE:
12174                 {
12175                         struct bpf_reg_state *buff_reg = &regs[regno];
12176                         const struct btf_param *buff_arg = &args[i];
12177                         struct bpf_reg_state *size_reg = &regs[regno + 1];
12178                         const struct btf_param *size_arg = &args[i + 1];
12179 
12180                         if (!register_is_null(buff_reg) || !is_kfunc_arg_optional(meta->btf, buff_arg)) {
12181                                 ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
12182                                 if (ret < 0) {
12183                                         verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
12184                                         return ret;
12185                                 }
12186                         }
12187 
12188                         if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
12189                                 if (meta->arg_constant.found) {
12190                                         verbose(env, "verifier internal error: only one constant argument permitted\n");
12191                                         return -EFAULT;
12192                                 }
12193                                 if (!tnum_is_const(size_reg->var_off)) {
12194                                         verbose(env, "R%d must be a known constant\n", regno + 1);
12195                                         return -EINVAL;
12196                                 }
12197                                 meta->arg_constant.found = true;
12198                                 meta->arg_constant.value = size_reg->var_off.value;
12199                         }
12200 
12201                         /* Skip next '__sz' or '__szk' argument */
12202                         i++;
12203                         break;
12204                 }
12205                 case KF_ARG_PTR_TO_CALLBACK:
12206                         if (reg->type != PTR_TO_FUNC) {
12207                                 verbose(env, "arg%d expected pointer to func\n", i);
12208                                 return -EINVAL;
12209                         }
12210                         meta->subprogno = reg->subprogno;
12211                         break;
12212                 case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12213                         if (!type_is_ptr_alloc_obj(reg->type)) {
12214                                 verbose(env, "arg#%d is neither owning or non-owning ref\n", i);
12215                                 return -EINVAL;
12216                         }
12217                         if (!type_is_non_owning_ref(reg->type))
12218                                 meta->arg_owning_ref = true;
12219 
12220                         rec = reg_btf_record(reg);
12221                         if (!rec) {
12222                                 verbose(env, "verifier internal error: Couldn't find btf_record\n");
12223                                 return -EFAULT;
12224                         }
12225 
12226                         if (rec->refcount_off < 0) {
12227                                 verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i);
12228                                 return -EINVAL;
12229                         }
12230 
12231                         meta->arg_btf = reg->btf;
12232                         meta->arg_btf_id = reg->btf_id;
12233                         break;
12234                 case KF_ARG_PTR_TO_CONST_STR:
12235                         if (reg->type != PTR_TO_MAP_VALUE) {
12236                                 verbose(env, "arg#%d doesn't point to a const string\n", i);
12237                                 return -EINVAL;
12238                         }
12239                         ret = check_reg_const_str(env, reg, regno);
12240                         if (ret)
12241                                 return ret;
12242                         break;
12243                 case KF_ARG_PTR_TO_WORKQUEUE:
12244                         if (reg->type != PTR_TO_MAP_VALUE) {
12245                                 verbose(env, "arg#%d doesn't point to a map value\n", i);
12246                                 return -EINVAL;
12247                         }
12248                         ret = process_wq_func(env, regno, meta);
12249                         if (ret < 0)
12250                                 return ret;
12251                         break;
12252                 }
12253         }
12254 
12255         if (is_kfunc_release(meta) && !meta->release_regno) {
12256                 verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
12257                         func_name);
12258                 return -EINVAL;
12259         }
12260 
12261         return 0;
12262 }
12263 
12264 static int fetch_kfunc_meta(struct bpf_verifier_env *env,
12265                             struct bpf_insn *insn,
12266                             struct bpf_kfunc_call_arg_meta *meta,
12267                             const char **kfunc_name)
12268 {
12269         const struct btf_type *func, *func_proto;
12270         u32 func_id, *kfunc_flags;
12271         const char *func_name;
12272         struct btf *desc_btf;
12273 
12274         if (kfunc_name)
12275                 *kfunc_name = NULL;
12276 
12277         if (!insn->imm)
12278                 return -EINVAL;
12279 
12280         desc_btf = find_kfunc_desc_btf(env, insn->off);
12281         if (IS_ERR(desc_btf))
12282                 return PTR_ERR(desc_btf);
12283 
12284         func_id = insn->imm;
12285         func = btf_type_by_id(desc_btf, func_id);
12286         func_name = btf_name_by_offset(desc_btf, func->name_off);
12287         if (kfunc_name)
12288                 *kfunc_name = func_name;
12289         func_proto = btf_type_by_id(desc_btf, func->type);
12290 
12291         kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog);
12292         if (!kfunc_flags) {
12293                 return -EACCES;
12294         }
12295 
12296         memset(meta, 0, sizeof(*meta));
12297         meta->btf = desc_btf;
12298         meta->func_id = func_id;
12299         meta->kfunc_flags = *kfunc_flags;
12300         meta->func_proto = func_proto;
12301         meta->func_name = func_name;
12302 
12303         return 0;
12304 }
12305 
12306 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
12307 
12308 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
12309                             int *insn_idx_p)
12310 {
12311         bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable;
12312         u32 i, nargs, ptr_type_id, release_ref_obj_id;
12313         struct bpf_reg_state *regs = cur_regs(env);
12314         const char *func_name, *ptr_type_name;
12315         const struct btf_type *t, *ptr_type;
12316         struct bpf_kfunc_call_arg_meta meta;
12317         struct bpf_insn_aux_data *insn_aux;
12318         int err, insn_idx = *insn_idx_p;
12319         const struct btf_param *args;
12320         const struct btf_type *ret_t;
12321         struct btf *desc_btf;
12322 
12323         /* skip for now, but return error when we find this in fixup_kfunc_call */
12324         if (!insn->imm)
12325                 return 0;
12326 
12327         err = fetch_kfunc_meta(env, insn, &meta, &func_name);
12328         if (err == -EACCES && func_name)
12329                 verbose(env, "calling kernel function %s is not allowed\n", func_name);
12330         if (err)
12331                 return err;
12332         desc_btf = meta.btf;
12333         insn_aux = &env->insn_aux_data[insn_idx];
12334 
12335         insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
12336 
12337         if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
12338                 verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
12339                 return -EACCES;
12340         }
12341 
12342         sleepable = is_kfunc_sleepable(&meta);
12343         if (sleepable && !in_sleepable(env)) {
12344                 verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
12345                 return -EACCES;
12346         }
12347 
12348         /* Check the arguments */
12349         err = check_kfunc_args(env, &meta, insn_idx);
12350         if (err < 0)
12351                 return err;
12352 
12353         if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
12354                 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
12355                                          set_rbtree_add_callback_state);
12356                 if (err) {
12357                         verbose(env, "kfunc %s#%d failed callback verification\n",
12358                                 func_name, meta.func_id);
12359                         return err;
12360                 }
12361         }
12362 
12363         if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) {
12364                 meta.r0_size = sizeof(u64);
12365                 meta.r0_rdonly = false;
12366         }
12367 
12368         if (is_bpf_wq_set_callback_impl_kfunc(meta.func_id)) {
12369                 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
12370                                          set_timer_callback_state);
12371                 if (err) {
12372                         verbose(env, "kfunc %s#%d failed callback verification\n",
12373                                 func_name, meta.func_id);
12374                         return err;
12375                 }
12376         }
12377 
12378         rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
12379         rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
12380 
12381         preempt_disable = is_kfunc_bpf_preempt_disable(&meta);
12382         preempt_enable = is_kfunc_bpf_preempt_enable(&meta);
12383 
12384         if (env->cur_state->active_rcu_lock) {
12385                 struct bpf_func_state *state;
12386                 struct bpf_reg_state *reg;
12387                 u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
12388 
12389                 if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
12390                         verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
12391                         return -EACCES;
12392                 }
12393 
12394                 if (rcu_lock) {
12395                         verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
12396                         return -EINVAL;
12397                 } else if (rcu_unlock) {
12398                         bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
12399                                 if (reg->type & MEM_RCU) {
12400                                         reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
12401                                         reg->type |= PTR_UNTRUSTED;
12402                                 }
12403                         }));
12404                         env->cur_state->active_rcu_lock = false;
12405                 } else if (sleepable) {
12406                         verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
12407                         return -EACCES;
12408                 }
12409         } else if (rcu_lock) {
12410                 env->cur_state->active_rcu_lock = true;
12411         } else if (rcu_unlock) {
12412                 verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
12413                 return -EINVAL;
12414         }
12415 
12416         if (env->cur_state->active_preempt_lock) {
12417                 if (preempt_disable) {
12418                         env->cur_state->active_preempt_lock++;
12419                 } else if (preempt_enable) {
12420                         env->cur_state->active_preempt_lock--;
12421                 } else if (sleepable) {
12422                         verbose(env, "kernel func %s is sleepable within non-preemptible region\n", func_name);
12423                         return -EACCES;
12424                 }
12425         } else if (preempt_disable) {
12426                 env->cur_state->active_preempt_lock++;
12427         } else if (preempt_enable) {
12428                 verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
12429                 return -EINVAL;
12430         }
12431 
12432         /* In case of release function, we get register number of refcounted
12433          * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
12434          */
12435         if (meta.release_regno) {
12436                 err = release_reference(env, regs[meta.release_regno].ref_obj_id);
12437                 if (err) {
12438                         verbose(env, "kfunc %s#%d reference has not been acquired before\n",
12439                                 func_name, meta.func_id);
12440                         return err;
12441                 }
12442         }
12443 
12444         if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
12445             meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
12446             meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
12447                 release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
12448                 insn_aux->insert_off = regs[BPF_REG_2].off;
12449                 insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
12450                 err = ref_convert_owning_non_owning(env, release_ref_obj_id);
12451                 if (err) {
12452                         verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
12453                                 func_name, meta.func_id);
12454                         return err;
12455                 }
12456 
12457                 err = release_reference(env, release_ref_obj_id);
12458                 if (err) {
12459                         verbose(env, "kfunc %s#%d reference has not been acquired before\n",
12460                                 func_name, meta.func_id);
12461                         return err;
12462                 }
12463         }
12464 
12465         if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
12466                 if (!bpf_jit_supports_exceptions()) {
12467                         verbose(env, "JIT does not support calling kfunc %s#%d\n",
12468                                 func_name, meta.func_id);
12469                         return -ENOTSUPP;
12470                 }
12471                 env->seen_exception = true;
12472 
12473                 /* In the case of the default callback, the cookie value passed
12474                  * to bpf_throw becomes the return value of the program.
12475                  */
12476                 if (!env->exception_callback_subprog) {
12477                         err = check_return_code(env, BPF_REG_1, "R1");
12478                         if (err < 0)
12479                                 return err;
12480                 }
12481         }
12482 
12483         for (i = 0; i < CALLER_SAVED_REGS; i++)
12484                 mark_reg_not_init(env, regs, caller_saved[i]);
12485 
12486         /* Check return type */
12487         t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
12488 
12489         if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
12490                 /* Only exception is bpf_obj_new_impl */
12491                 if (meta.btf != btf_vmlinux ||
12492                     (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
12493                      meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
12494                      meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
12495                         verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
12496                         return -EINVAL;
12497                 }
12498         }
12499 
12500         if (btf_type_is_scalar(t)) {
12501                 mark_reg_unknown(env, regs, BPF_REG_0);
12502                 mark_btf_func_reg_size(env, BPF_REG_0, t->size);
12503         } else if (btf_type_is_ptr(t)) {
12504                 ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
12505 
12506                 if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
12507                         if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
12508                             meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
12509                                 struct btf_struct_meta *struct_meta;
12510                                 struct btf *ret_btf;
12511                                 u32 ret_btf_id;
12512 
12513                                 if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
12514                                         return -ENOMEM;
12515 
12516                                 if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
12517                                         verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
12518                                         return -EINVAL;
12519                                 }
12520 
12521                                 ret_btf = env->prog->aux->btf;
12522                                 ret_btf_id = meta.arg_constant.value;
12523 
12524                                 /* This may be NULL due to user not supplying a BTF */
12525                                 if (!ret_btf) {
12526                                         verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
12527                                         return -EINVAL;
12528                                 }
12529 
12530                                 ret_t = btf_type_by_id(ret_btf, ret_btf_id);
12531                                 if (!ret_t || !__btf_type_is_struct(ret_t)) {
12532                                         verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
12533                                         return -EINVAL;
12534                                 }
12535 
12536                                 if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
12537                                         if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
12538                                                 verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
12539                                                         ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
12540                                                 return -EINVAL;
12541                                         }
12542 
12543                                         if (!bpf_global_percpu_ma_set) {
12544                                                 mutex_lock(&bpf_percpu_ma_lock);
12545                                                 if (!bpf_global_percpu_ma_set) {
12546                                                         /* Charge memory allocated with bpf_global_percpu_ma to
12547                                                          * root memcg. The obj_cgroup for root memcg is NULL.
12548                                                          */
12549                                                         err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
12550                                                         if (!err)
12551                                                                 bpf_global_percpu_ma_set = true;
12552                                                 }
12553                                                 mutex_unlock(&bpf_percpu_ma_lock);
12554                                                 if (err)
12555                                                         return err;
12556                                         }
12557 
12558                                         mutex_lock(&bpf_percpu_ma_lock);
12559                                         err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
12560                                         mutex_unlock(&bpf_percpu_ma_lock);
12561                                         if (err)
12562                                                 return err;
12563                                 }
12564 
12565                                 struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
12566                                 if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
12567                                         if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
12568                                                 verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
12569                                                 return -EINVAL;
12570                                         }
12571 
12572                                         if (struct_meta) {
12573                                                 verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
12574                                                 return -EINVAL;
12575                                         }
12576                                 }
12577 
12578                                 mark_reg_known_zero(env, regs, BPF_REG_0);
12579                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12580                                 regs[BPF_REG_0].btf = ret_btf;
12581                                 regs[BPF_REG_0].btf_id = ret_btf_id;
12582                                 if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
12583                                         regs[BPF_REG_0].type |= MEM_PERCPU;
12584 
12585                                 insn_aux->obj_new_size = ret_t->size;
12586                                 insn_aux->kptr_struct_meta = struct_meta;
12587                         } else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
12588                                 mark_reg_known_zero(env, regs, BPF_REG_0);
12589                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12590                                 regs[BPF_REG_0].btf = meta.arg_btf;
12591                                 regs[BPF_REG_0].btf_id = meta.arg_btf_id;
12592 
12593                                 insn_aux->kptr_struct_meta =
12594                                         btf_find_struct_meta(meta.arg_btf,
12595                                                              meta.arg_btf_id);
12596                         } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
12597                                    meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
12598                                 struct btf_field *field = meta.arg_list_head.field;
12599 
12600                                 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12601                         } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
12602                                    meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
12603                                 struct btf_field *field = meta.arg_rbtree_root.field;
12604 
12605                                 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12606                         } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12607                                 mark_reg_known_zero(env, regs, BPF_REG_0);
12608                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
12609                                 regs[BPF_REG_0].btf = desc_btf;
12610                                 regs[BPF_REG_0].btf_id = meta.ret_btf_id;
12611                         } else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
12612                                 ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
12613                                 if (!ret_t || !btf_type_is_struct(ret_t)) {
12614                                         verbose(env,
12615                                                 "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
12616                                         return -EINVAL;
12617                                 }
12618 
12619                                 mark_reg_known_zero(env, regs, BPF_REG_0);
12620                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
12621                                 regs[BPF_REG_0].btf = desc_btf;
12622                                 regs[BPF_REG_0].btf_id = meta.arg_constant.value;
12623                         } else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
12624                                    meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
12625                                 enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
12626 
12627                                 mark_reg_known_zero(env, regs, BPF_REG_0);
12628 
12629                                 if (!meta.arg_constant.found) {
12630                                         verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
12631                                         return -EFAULT;
12632                                 }
12633 
12634                                 regs[BPF_REG_0].mem_size = meta.arg_constant.value;
12635 
12636                                 /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
12637                                 regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
12638 
12639                                 if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
12640                                         regs[BPF_REG_0].type |= MEM_RDONLY;
12641                                 } else {
12642                                         /* this will set env->seen_direct_write to true */
12643                                         if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
12644                                                 verbose(env, "the prog does not allow writes to packet data\n");
12645                                                 return -EINVAL;
12646                                         }
12647                                 }
12648 
12649                                 if (!meta.initialized_dynptr.id) {
12650                                         verbose(env, "verifier internal error: no dynptr id\n");
12651                                         return -EFAULT;
12652                                 }
12653                                 regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
12654 
12655                                 /* we don't need to set BPF_REG_0's ref obj id
12656                                  * because packet slices are not refcounted (see
12657                                  * dynptr_type_refcounted)
12658                                  */
12659                         } else {
12660                                 verbose(env, "kernel function %s unhandled dynamic return type\n",
12661                                         meta.func_name);
12662                                 return -EFAULT;
12663                         }
12664                 } else if (btf_type_is_void(ptr_type)) {
12665                         /* kfunc returning 'void *' is equivalent to returning scalar */
12666                         mark_reg_unknown(env, regs, BPF_REG_0);
12667                 } else if (!__btf_type_is_struct(ptr_type)) {
12668                         if (!meta.r0_size) {
12669                                 __u32 sz;
12670 
12671                                 if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
12672                                         meta.r0_size = sz;
12673                                         meta.r0_rdonly = true;
12674                                 }
12675                         }
12676                         if (!meta.r0_size) {
12677                                 ptr_type_name = btf_name_by_offset(desc_btf,
12678                                                                    ptr_type->name_off);
12679                                 verbose(env,
12680                                         "kernel function %s returns pointer type %s %s is not supported\n",
12681                                         func_name,
12682                                         btf_type_str(ptr_type),
12683                                         ptr_type_name);
12684                                 return -EINVAL;
12685                         }
12686 
12687                         mark_reg_known_zero(env, regs, BPF_REG_0);
12688                         regs[BPF_REG_0].type = PTR_TO_MEM;
12689                         regs[BPF_REG_0].mem_size = meta.r0_size;
12690 
12691                         if (meta.r0_rdonly)
12692                                 regs[BPF_REG_0].type |= MEM_RDONLY;
12693 
12694                         /* Ensures we don't access the memory after a release_reference() */
12695                         if (meta.ref_obj_id)
12696                                 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
12697                 } else {
12698                         mark_reg_known_zero(env, regs, BPF_REG_0);
12699                         regs[BPF_REG_0].btf = desc_btf;
12700                         regs[BPF_REG_0].type = PTR_TO_BTF_ID;
12701                         regs[BPF_REG_0].btf_id = ptr_type_id;
12702 
12703                         if (is_iter_next_kfunc(&meta)) {
12704                                 struct bpf_reg_state *cur_iter;
12705 
12706                                 cur_iter = get_iter_from_state(env->cur_state, &meta);
12707 
12708                                 if (cur_iter->type & MEM_RCU) /* KF_RCU_PROTECTED */
12709                                         regs[BPF_REG_0].type |= MEM_RCU;
12710                                 else
12711                                         regs[BPF_REG_0].type |= PTR_TRUSTED;
12712                         }
12713                 }
12714 
12715                 if (is_kfunc_ret_null(&meta)) {
12716                         regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
12717                         /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
12718                         regs[BPF_REG_0].id = ++env->id_gen;
12719                 }
12720                 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
12721                 if (is_kfunc_acquire(&meta)) {
12722                         int id = acquire_reference_state(env, insn_idx);
12723 
12724                         if (id < 0)
12725                                 return id;
12726                         if (is_kfunc_ret_null(&meta))
12727                                 regs[BPF_REG_0].id = id;
12728                         regs[BPF_REG_0].ref_obj_id = id;
12729                 } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
12730                         ref_set_non_owning(env, &regs[BPF_REG_0]);
12731                 }
12732 
12733                 if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
12734                         regs[BPF_REG_0].id = ++env->id_gen;
12735         } else if (btf_type_is_void(t)) {
12736                 if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
12737                         if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
12738                             meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
12739                                 insn_aux->kptr_struct_meta =
12740                                         btf_find_struct_meta(meta.arg_btf,
12741                                                              meta.arg_btf_id);
12742                         }
12743                 }
12744         }
12745 
12746         nargs = btf_type_vlen(meta.func_proto);
12747         args = (const struct btf_param *)(meta.func_proto + 1);
12748         for (i = 0; i < nargs; i++) {
12749                 u32 regno = i + 1;
12750 
12751                 t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
12752                 if (btf_type_is_ptr(t))
12753                         mark_btf_func_reg_size(env, regno, sizeof(void *));
12754                 else
12755                         /* scalar. ensured by btf_check_kfunc_arg_match() */
12756                         mark_btf_func_reg_size(env, regno, t->size);
12757         }
12758 
12759         if (is_iter_next_kfunc(&meta)) {
12760                 err = process_iter_next_call(env, insn_idx, &meta);
12761                 if (err)
12762                         return err;
12763         }
12764 
12765         return 0;
12766 }
12767 
12768 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
12769                                   const struct bpf_reg_state *reg,
12770                                   enum bpf_reg_type type)
12771 {
12772         bool known = tnum_is_const(reg->var_off);
12773         s64 val = reg->var_off.value;
12774         s64 smin = reg->smin_value;
12775 
12776         if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
12777                 verbose(env, "math between %s pointer and %lld is not allowed\n",
12778                         reg_type_str(env, type), val);
12779                 return false;
12780         }
12781 
12782         if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
12783                 verbose(env, "%s pointer offset %d is not allowed\n",
12784                         reg_type_str(env, type), reg->off);
12785                 return false;
12786         }
12787 
12788         if (smin == S64_MIN) {
12789                 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
12790                         reg_type_str(env, type));
12791                 return false;
12792         }
12793 
12794         if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
12795                 verbose(env, "value %lld makes %s pointer be out of bounds\n",
12796                         smin, reg_type_str(env, type));
12797                 return false;
12798         }
12799 
12800         return true;
12801 }
12802 
12803 enum {
12804         REASON_BOUNDS   = -1,
12805         REASON_TYPE     = -2,
12806         REASON_PATHS    = -3,
12807         REASON_LIMIT    = -4,
12808         REASON_STACK    = -5,
12809 };
12810 
12811 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
12812                               u32 *alu_limit, bool mask_to_left)
12813 {
12814         u32 max = 0, ptr_limit = 0;
12815 
12816         switch (ptr_reg->type) {
12817         case PTR_TO_STACK:
12818                 /* Offset 0 is out-of-bounds, but acceptable start for the
12819                  * left direction, see BPF_REG_FP. Also, unknown scalar
12820                  * offset where we would need to deal with min/max bounds is
12821                  * currently prohibited for unprivileged.
12822                  */
12823                 max = MAX_BPF_STACK + mask_to_left;
12824                 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
12825                 break;
12826         case PTR_TO_MAP_VALUE:
12827                 max = ptr_reg->map_ptr->value_size;
12828                 ptr_limit = (mask_to_left ?
12829                              ptr_reg->smin_value :
12830                              ptr_reg->umax_value) + ptr_reg->off;
12831                 break;
12832         default:
12833                 return REASON_TYPE;
12834         }
12835 
12836         if (ptr_limit >= max)
12837                 return REASON_LIMIT;
12838         *alu_limit = ptr_limit;
12839         return 0;
12840 }
12841 
12842 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
12843                                     const struct bpf_insn *insn)
12844 {
12845         return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
12846 }
12847 
12848 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
12849                                        u32 alu_state, u32 alu_limit)
12850 {
12851         /* If we arrived here from different branches with different
12852          * state or limits to sanitize, then this won't work.
12853          */
12854         if (aux->alu_state &&
12855             (aux->alu_state != alu_state ||
12856              aux->alu_limit != alu_limit))
12857                 return REASON_PATHS;
12858 
12859         /* Corresponding fixup done in do_misc_fixups(). */
12860         aux->alu_state = alu_state;
12861         aux->alu_limit = alu_limit;
12862         return 0;
12863 }
12864 
12865 static int sanitize_val_alu(struct bpf_verifier_env *env,
12866                             struct bpf_insn *insn)
12867 {
12868         struct bpf_insn_aux_data *aux = cur_aux(env);
12869 
12870         if (can_skip_alu_sanitation(env, insn))
12871                 return 0;
12872 
12873         return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
12874 }
12875 
12876 static bool sanitize_needed(u8 opcode)
12877 {
12878         return opcode == BPF_ADD || opcode == BPF_SUB;
12879 }
12880 
12881 struct bpf_sanitize_info {
12882         struct bpf_insn_aux_data aux;
12883         bool mask_to_left;
12884 };
12885 
12886 static struct bpf_verifier_state *
12887 sanitize_speculative_path(struct bpf_verifier_env *env,
12888                           const struct bpf_insn *insn,
12889                           u32 next_idx, u32 curr_idx)
12890 {
12891         struct bpf_verifier_state *branch;
12892         struct bpf_reg_state *regs;
12893 
12894         branch = push_stack(env, next_idx, curr_idx, true);
12895         if (branch && insn) {
12896                 regs = branch->frame[branch->curframe]->regs;
12897                 if (BPF_SRC(insn->code) == BPF_K) {
12898                         mark_reg_unknown(env, regs, insn->dst_reg);
12899                 } else if (BPF_SRC(insn->code) == BPF_X) {
12900                         mark_reg_unknown(env, regs, insn->dst_reg);
12901                         mark_reg_unknown(env, regs, insn->src_reg);
12902                 }
12903         }
12904         return branch;
12905 }
12906 
12907 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
12908                             struct bpf_insn *insn,
12909                             const struct bpf_reg_state *ptr_reg,
12910                             const struct bpf_reg_state *off_reg,
12911                             struct bpf_reg_state *dst_reg,
12912                             struct bpf_sanitize_info *info,
12913                             const bool commit_window)
12914 {
12915         struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
12916         struct bpf_verifier_state *vstate = env->cur_state;
12917         bool off_is_imm = tnum_is_const(off_reg->var_off);
12918         bool off_is_neg = off_reg->smin_value < 0;
12919         bool ptr_is_dst_reg = ptr_reg == dst_reg;
12920         u8 opcode = BPF_OP(insn->code);
12921         u32 alu_state, alu_limit;
12922         struct bpf_reg_state tmp;
12923         bool ret;
12924         int err;
12925 
12926         if (can_skip_alu_sanitation(env, insn))
12927                 return 0;
12928 
12929         /* We already marked aux for masking from non-speculative
12930          * paths, thus we got here in the first place. We only care
12931          * to explore bad access from here.
12932          */
12933         if (vstate->speculative)
12934                 goto do_sim;
12935 
12936         if (!commit_window) {
12937                 if (!tnum_is_const(off_reg->var_off) &&
12938                     (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
12939                         return REASON_BOUNDS;
12940 
12941                 info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
12942                                      (opcode == BPF_SUB && !off_is_neg);
12943         }
12944 
12945         err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
12946         if (err < 0)
12947                 return err;
12948 
12949         if (commit_window) {
12950                 /* In commit phase we narrow the masking window based on
12951                  * the observed pointer move after the simulated operation.
12952                  */
12953                 alu_state = info->aux.alu_state;
12954                 alu_limit = abs(info->aux.alu_limit - alu_limit);
12955         } else {
12956                 alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
12957                 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
12958                 alu_state |= ptr_is_dst_reg ?
12959                              BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
12960 
12961                 /* Limit pruning on unknown scalars to enable deep search for
12962                  * potential masking differences from other program paths.
12963                  */
12964                 if (!off_is_imm)
12965                         env->explore_alu_limits = true;
12966         }
12967 
12968         err = update_alu_sanitation_state(aux, alu_state, alu_limit);
12969         if (err < 0)
12970                 return err;
12971 do_sim:
12972         /* If we're in commit phase, we're done here given we already
12973          * pushed the truncated dst_reg into the speculative verification
12974          * stack.
12975          *
12976          * Also, when register is a known constant, we rewrite register-based
12977          * operation to immediate-based, and thus do not need masking (and as
12978          * a consequence, do not need to simulate the zero-truncation either).
12979          */
12980         if (commit_window || off_is_imm)
12981                 return 0;
12982 
12983         /* Simulate and find potential out-of-bounds access under
12984          * speculative execution from truncation as a result of
12985          * masking when off was not within expected range. If off
12986          * sits in dst, then we temporarily need to move ptr there
12987          * to simulate dst (== 0) +/-= ptr. Needed, for example,
12988          * for cases where we use K-based arithmetic in one direction
12989          * and truncated reg-based in the other in order to explore
12990          * bad access.
12991          */
12992         if (!ptr_is_dst_reg) {
12993                 tmp = *dst_reg;
12994                 copy_register_state(dst_reg, ptr_reg);
12995         }
12996         ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
12997                                         env->insn_idx);
12998         if (!ptr_is_dst_reg && ret)
12999                 *dst_reg = tmp;
13000         return !ret ? REASON_STACK : 0;
13001 }
13002 
13003 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
13004 {
13005         struct bpf_verifier_state *vstate = env->cur_state;
13006 
13007         /* If we simulate paths under speculation, we don't update the
13008          * insn as 'seen' such that when we verify unreachable paths in
13009          * the non-speculative domain, sanitize_dead_code() can still
13010          * rewrite/sanitize them.
13011          */
13012         if (!vstate->speculative)
13013                 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
13014 }
13015 
13016 static int sanitize_err(struct bpf_verifier_env *env,
13017                         const struct bpf_insn *insn, int reason,
13018                         const struct bpf_reg_state *off_reg,
13019                         const struct bpf_reg_state *dst_reg)
13020 {
13021         static const char *err = "pointer arithmetic with it prohibited for !root";
13022         const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
13023         u32 dst = insn->dst_reg, src = insn->src_reg;
13024 
13025         switch (reason) {
13026         case REASON_BOUNDS:
13027                 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
13028                         off_reg == dst_reg ? dst : src, err);
13029                 break;
13030         case REASON_TYPE:
13031                 verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
13032                         off_reg == dst_reg ? src : dst, err);
13033                 break;
13034         case REASON_PATHS:
13035                 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
13036                         dst, op, err);
13037                 break;
13038         case REASON_LIMIT:
13039                 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
13040                         dst, op, err);
13041                 break;
13042         case REASON_STACK:
13043                 verbose(env, "R%d could not be pushed for speculative verification, %s\n",
13044                         dst, err);
13045                 break;
13046         default:
13047                 verbose(env, "verifier internal error: unknown reason (%d)\n",
13048                         reason);
13049                 break;
13050         }
13051 
13052         return -EACCES;
13053 }
13054 
13055 /* check that stack access falls within stack limits and that 'reg' doesn't
13056  * have a variable offset.
13057  *
13058  * Variable offset is prohibited for unprivileged mode for simplicity since it
13059  * requires corresponding support in Spectre masking for stack ALU.  See also
13060  * retrieve_ptr_limit().
13061  *
13062  *
13063  * 'off' includes 'reg->off'.
13064  */
13065 static int check_stack_access_for_ptr_arithmetic(
13066                                 struct bpf_verifier_env *env,
13067                                 int regno,
13068                                 const struct bpf_reg_state *reg,
13069                                 int off)
13070 {
13071         if (!tnum_is_const(reg->var_off)) {
13072                 char tn_buf[48];
13073 
13074                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
13075                 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
13076                         regno, tn_buf, off);
13077                 return -EACCES;
13078         }
13079 
13080         if (off >= 0 || off < -MAX_BPF_STACK) {
13081                 verbose(env, "R%d stack pointer arithmetic goes out of range, "
13082                         "prohibited for !root; off=%d\n", regno, off);
13083                 return -EACCES;
13084         }
13085 
13086         return 0;
13087 }
13088 
13089 static int sanitize_check_bounds(struct bpf_verifier_env *env,
13090                                  const struct bpf_insn *insn,
13091                                  const struct bpf_reg_state *dst_reg)
13092 {
13093         u32 dst = insn->dst_reg;
13094 
13095         /* For unprivileged we require that resulting offset must be in bounds
13096          * in order to be able to sanitize access later on.
13097          */
13098         if (env->bypass_spec_v1)
13099                 return 0;
13100 
13101         switch (dst_reg->type) {
13102         case PTR_TO_STACK:
13103                 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
13104                                         dst_reg->off + dst_reg->var_off.value))
13105                         return -EACCES;
13106                 break;
13107         case PTR_TO_MAP_VALUE:
13108                 if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
13109                         verbose(env, "R%d pointer arithmetic of map value goes out of range, "
13110                                 "prohibited for !root\n", dst);
13111                         return -EACCES;
13112                 }
13113                 break;
13114         default:
13115                 break;
13116         }
13117 
13118         return 0;
13119 }
13120 
13121 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
13122  * Caller should also handle BPF_MOV case separately.
13123  * If we return -EACCES, caller may want to try again treating pointer as a
13124  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
13125  */
13126 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
13127                                    struct bpf_insn *insn,
13128                                    const struct bpf_reg_state *ptr_reg,
13129                                    const struct bpf_reg_state *off_reg)
13130 {
13131         struct bpf_verifier_state *vstate = env->cur_state;
13132         struct bpf_func_state *state = vstate->frame[vstate->curframe];
13133         struct bpf_reg_state *regs = state->regs, *dst_reg;
13134         bool known = tnum_is_const(off_reg->var_off);
13135         s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
13136             smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
13137         u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
13138             umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
13139         struct bpf_sanitize_info info = {};
13140         u8 opcode = BPF_OP(insn->code);
13141         u32 dst = insn->dst_reg;
13142         int ret;
13143 
13144         dst_reg = &regs[dst];
13145 
13146         if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
13147             smin_val > smax_val || umin_val > umax_val) {
13148                 /* Taint dst register if offset had invalid bounds derived from
13149                  * e.g. dead branches.
13150                  */
13151                 __mark_reg_unknown(env, dst_reg);
13152                 return 0;
13153         }
13154 
13155         if (BPF_CLASS(insn->code) != BPF_ALU64) {
13156                 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
13157                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
13158                         __mark_reg_unknown(env, dst_reg);
13159                         return 0;
13160                 }
13161 
13162                 verbose(env,
13163                         "R%d 32-bit pointer arithmetic prohibited\n",
13164                         dst);
13165                 return -EACCES;
13166         }
13167 
13168         if (ptr_reg->type & PTR_MAYBE_NULL) {
13169                 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
13170                         dst, reg_type_str(env, ptr_reg->type));
13171                 return -EACCES;
13172         }
13173 
13174         switch (base_type(ptr_reg->type)) {
13175         case PTR_TO_CTX:
13176         case PTR_TO_MAP_VALUE:
13177         case PTR_TO_MAP_KEY:
13178         case PTR_TO_STACK:
13179         case PTR_TO_PACKET_META:
13180         case PTR_TO_PACKET:
13181         case PTR_TO_TP_BUFFER:
13182         case PTR_TO_BTF_ID:
13183         case PTR_TO_MEM:
13184         case PTR_TO_BUF:
13185         case PTR_TO_FUNC:
13186         case CONST_PTR_TO_DYNPTR:
13187                 break;
13188         case PTR_TO_FLOW_KEYS:
13189                 if (known)
13190                         break;
13191                 fallthrough;
13192         case CONST_PTR_TO_MAP:
13193                 /* smin_val represents the known value */
13194                 if (known && smin_val == 0 && opcode == BPF_ADD)
13195                         break;
13196                 fallthrough;
13197         default:
13198                 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
13199                         dst, reg_type_str(env, ptr_reg->type));
13200                 return -EACCES;
13201         }
13202 
13203         /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
13204          * The id may be overwritten later if we create a new variable offset.
13205          */
13206         dst_reg->type = ptr_reg->type;
13207         dst_reg->id = ptr_reg->id;
13208 
13209         if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
13210             !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
13211                 return -EINVAL;
13212 
13213         /* pointer types do not carry 32-bit bounds at the moment. */
13214         __mark_reg32_unbounded(dst_reg);
13215 
13216         if (sanitize_needed(opcode)) {
13217                 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
13218                                        &info, false);
13219                 if (ret < 0)
13220                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
13221         }
13222 
13223         switch (opcode) {
13224         case BPF_ADD:
13225                 /* We can take a fixed offset as long as it doesn't overflow
13226                  * the s32 'off' field
13227                  */
13228                 if (known && (ptr_reg->off + smin_val ==
13229                               (s64)(s32)(ptr_reg->off + smin_val))) {
13230                         /* pointer += K.  Accumulate it into fixed offset */
13231                         dst_reg->smin_value = smin_ptr;
13232                         dst_reg->smax_value = smax_ptr;
13233                         dst_reg->umin_value = umin_ptr;
13234                         dst_reg->umax_value = umax_ptr;
13235                         dst_reg->var_off = ptr_reg->var_off;
13236                         dst_reg->off = ptr_reg->off + smin_val;
13237                         dst_reg->raw = ptr_reg->raw;
13238                         break;
13239                 }
13240                 /* A new variable offset is created.  Note that off_reg->off
13241                  * == 0, since it's a scalar.
13242                  * dst_reg gets the pointer type and since some positive
13243                  * integer value was added to the pointer, give it a new 'id'
13244                  * if it's a PTR_TO_PACKET.
13245                  * this creates a new 'base' pointer, off_reg (variable) gets
13246                  * added into the variable offset, and we copy the fixed offset
13247                  * from ptr_reg.
13248                  */
13249                 if (check_add_overflow(smin_ptr, smin_val, &dst_reg->smin_value) ||
13250                     check_add_overflow(smax_ptr, smax_val, &dst_reg->smax_value)) {
13251                         dst_reg->smin_value = S64_MIN;
13252                         dst_reg->smax_value = S64_MAX;
13253                 }
13254                 if (check_add_overflow(umin_ptr, umin_val, &dst_reg->umin_value) ||
13255                     check_add_overflow(umax_ptr, umax_val, &dst_reg->umax_value)) {
13256                         dst_reg->umin_value = 0;
13257                         dst_reg->umax_value = U64_MAX;
13258                 }
13259                 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
13260                 dst_reg->off = ptr_reg->off;
13261                 dst_reg->raw = ptr_reg->raw;
13262                 if (reg_is_pkt_pointer(ptr_reg)) {
13263                         dst_reg->id = ++env->id_gen;
13264                         /* something was added to pkt_ptr, set range to zero */
13265                         memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13266                 }
13267                 break;
13268         case BPF_SUB:
13269                 if (dst_reg == off_reg) {
13270                         /* scalar -= pointer.  Creates an unknown scalar */
13271                         verbose(env, "R%d tried to subtract pointer from scalar\n",
13272                                 dst);
13273                         return -EACCES;
13274                 }
13275                 /* We don't allow subtraction from FP, because (according to
13276                  * test_verifier.c test "invalid fp arithmetic", JITs might not
13277                  * be able to deal with it.
13278                  */
13279                 if (ptr_reg->type == PTR_TO_STACK) {
13280                         verbose(env, "R%d subtraction from stack pointer prohibited\n",
13281                                 dst);
13282                         return -EACCES;
13283                 }
13284                 if (known && (ptr_reg->off - smin_val ==
13285                               (s64)(s32)(ptr_reg->off - smin_val))) {
13286                         /* pointer -= K.  Subtract it from fixed offset */
13287                         dst_reg->smin_value = smin_ptr;
13288                         dst_reg->smax_value = smax_ptr;
13289                         dst_reg->umin_value = umin_ptr;
13290                         dst_reg->umax_value = umax_ptr;
13291                         dst_reg->var_off = ptr_reg->var_off;
13292                         dst_reg->id = ptr_reg->id;
13293                         dst_reg->off = ptr_reg->off - smin_val;
13294                         dst_reg->raw = ptr_reg->raw;
13295                         break;
13296                 }
13297                 /* A new variable offset is created.  If the subtrahend is known
13298                  * nonnegative, then any reg->range we had before is still good.
13299                  */
13300                 if (check_sub_overflow(smin_ptr, smax_val, &dst_reg->smin_value) ||
13301                     check_sub_overflow(smax_ptr, smin_val, &dst_reg->smax_value)) {
13302                         /* Overflow possible, we know nothing */
13303                         dst_reg->smin_value = S64_MIN;
13304                         dst_reg->smax_value = S64_MAX;
13305                 }
13306                 if (umin_ptr < umax_val) {
13307                         /* Overflow possible, we know nothing */
13308                         dst_reg->umin_value = 0;
13309                         dst_reg->umax_value = U64_MAX;
13310                 } else {
13311                         /* Cannot overflow (as long as bounds are consistent) */
13312                         dst_reg->umin_value = umin_ptr - umax_val;
13313                         dst_reg->umax_value = umax_ptr - umin_val;
13314                 }
13315                 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
13316                 dst_reg->off = ptr_reg->off;
13317                 dst_reg->raw = ptr_reg->raw;
13318                 if (reg_is_pkt_pointer(ptr_reg)) {
13319                         dst_reg->id = ++env->id_gen;
13320                         /* something was added to pkt_ptr, set range to zero */
13321                         if (smin_val < 0)
13322                                 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13323                 }
13324                 break;
13325         case BPF_AND:
13326         case BPF_OR:
13327         case BPF_XOR:
13328                 /* bitwise ops on pointers are troublesome, prohibit. */
13329                 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
13330                         dst, bpf_alu_string[opcode >> 4]);
13331                 return -EACCES;
13332         default:
13333                 /* other operators (e.g. MUL,LSH) produce non-pointer results */
13334                 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
13335                         dst, bpf_alu_string[opcode >> 4]);
13336                 return -EACCES;
13337         }
13338 
13339         if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
13340                 return -EINVAL;
13341         reg_bounds_sync(dst_reg);
13342         if (sanitize_check_bounds(env, insn, dst_reg) < 0)
13343                 return -EACCES;
13344         if (sanitize_needed(opcode)) {
13345                 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
13346                                        &info, true);
13347                 if (ret < 0)
13348                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
13349         }
13350 
13351         return 0;
13352 }
13353 
13354 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
13355                                  struct bpf_reg_state *src_reg)
13356 {
13357         s32 *dst_smin = &dst_reg->s32_min_value;
13358         s32 *dst_smax = &dst_reg->s32_max_value;
13359         u32 *dst_umin = &dst_reg->u32_min_value;
13360         u32 *dst_umax = &dst_reg->u32_max_value;
13361 
13362         if (check_add_overflow(*dst_smin, src_reg->s32_min_value, dst_smin) ||
13363             check_add_overflow(*dst_smax, src_reg->s32_max_value, dst_smax)) {
13364                 *dst_smin = S32_MIN;
13365                 *dst_smax = S32_MAX;
13366         }
13367         if (check_add_overflow(*dst_umin, src_reg->u32_min_value, dst_umin) ||
13368             check_add_overflow(*dst_umax, src_reg->u32_max_value, dst_umax)) {
13369                 *dst_umin = 0;
13370                 *dst_umax = U32_MAX;
13371         }
13372 }
13373 
13374 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
13375                                struct bpf_reg_state *src_reg)
13376 {
13377         s64 *dst_smin = &dst_reg->smin_value;
13378         s64 *dst_smax = &dst_reg->smax_value;
13379         u64 *dst_umin = &dst_reg->umin_value;
13380         u64 *dst_umax = &dst_reg->umax_value;
13381 
13382         if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) ||
13383             check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) {
13384                 *dst_smin = S64_MIN;
13385                 *dst_smax = S64_MAX;
13386         }
13387         if (check_add_overflow(*dst_umin, src_reg->umin_value, dst_umin) ||
13388             check_add_overflow(*dst_umax, src_reg->umax_value, dst_umax)) {
13389                 *dst_umin = 0;
13390                 *dst_umax = U64_MAX;
13391         }
13392 }
13393 
13394 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
13395                                  struct bpf_reg_state *src_reg)
13396 {
13397         s32 *dst_smin = &dst_reg->s32_min_value;
13398         s32 *dst_smax = &dst_reg->s32_max_value;
13399         u32 umin_val = src_reg->u32_min_value;
13400         u32 umax_val = src_reg->u32_max_value;
13401 
13402         if (check_sub_overflow(*dst_smin, src_reg->s32_max_value, dst_smin) ||
13403             check_sub_overflow(*dst_smax, src_reg->s32_min_value, dst_smax)) {
13404                 /* Overflow possible, we know nothing */
13405                 *dst_smin = S32_MIN;
13406                 *dst_smax = S32_MAX;
13407         }
13408         if (dst_reg->u32_min_value < umax_val) {
13409                 /* Overflow possible, we know nothing */
13410                 dst_reg->u32_min_value = 0;
13411                 dst_reg->u32_max_value = U32_MAX;
13412         } else {
13413                 /* Cannot overflow (as long as bounds are consistent) */
13414                 dst_reg->u32_min_value -= umax_val;
13415                 dst_reg->u32_max_value -= umin_val;
13416         }
13417 }
13418 
13419 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
13420                                struct bpf_reg_state *src_reg)
13421 {
13422         s64 *dst_smin = &dst_reg->smin_value;
13423         s64 *dst_smax = &dst_reg->smax_value;
13424         u64 umin_val = src_reg->umin_value;
13425         u64 umax_val = src_reg->umax_value;
13426 
13427         if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) ||
13428             check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) {
13429                 /* Overflow possible, we know nothing */
13430                 *dst_smin = S64_MIN;
13431                 *dst_smax = S64_MAX;
13432         }
13433         if (dst_reg->umin_value < umax_val) {
13434                 /* Overflow possible, we know nothing */
13435                 dst_reg->umin_value = 0;
13436                 dst_reg->umax_value = U64_MAX;
13437         } else {
13438                 /* Cannot overflow (as long as bounds are consistent) */
13439                 dst_reg->umin_value -= umax_val;
13440                 dst_reg->umax_value -= umin_val;
13441         }
13442 }
13443 
13444 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
13445                                  struct bpf_reg_state *src_reg)
13446 {
13447         s32 smin_val = src_reg->s32_min_value;
13448         u32 umin_val = src_reg->u32_min_value;
13449         u32 umax_val = src_reg->u32_max_value;
13450 
13451         if (smin_val < 0 || dst_reg->s32_min_value < 0) {
13452                 /* Ain't nobody got time to multiply that sign */
13453                 __mark_reg32_unbounded(dst_reg);
13454                 return;
13455         }
13456         /* Both values are positive, so we can work with unsigned and
13457          * copy the result to signed (unless it exceeds S32_MAX).
13458          */
13459         if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
13460                 /* Potential overflow, we know nothing */
13461                 __mark_reg32_unbounded(dst_reg);
13462                 return;
13463         }
13464         dst_reg->u32_min_value *= umin_val;
13465         dst_reg->u32_max_value *= umax_val;
13466         if (dst_reg->u32_max_value > S32_MAX) {
13467                 /* Overflow possible, we know nothing */
13468                 dst_reg->s32_min_value = S32_MIN;
13469                 dst_reg->s32_max_value = S32_MAX;
13470         } else {
13471                 dst_reg->s32_min_value = dst_reg->u32_min_value;
13472                 dst_reg->s32_max_value = dst_reg->u32_max_value;
13473         }
13474 }
13475 
13476 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
13477                                struct bpf_reg_state *src_reg)
13478 {
13479         s64 smin_val = src_reg->smin_value;
13480         u64 umin_val = src_reg->umin_value;
13481         u64 umax_val = src_reg->umax_value;
13482 
13483         if (smin_val < 0 || dst_reg->smin_value < 0) {
13484                 /* Ain't nobody got time to multiply that sign */
13485                 __mark_reg64_unbounded(dst_reg);
13486                 return;
13487         }
13488         /* Both values are positive, so we can work with unsigned and
13489          * copy the result to signed (unless it exceeds S64_MAX).
13490          */
13491         if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
13492                 /* Potential overflow, we know nothing */
13493                 __mark_reg64_unbounded(dst_reg);
13494                 return;
13495         }
13496         dst_reg->umin_value *= umin_val;
13497         dst_reg->umax_value *= umax_val;
13498         if (dst_reg->umax_value > S64_MAX) {
13499                 /* Overflow possible, we know nothing */
13500                 dst_reg->smin_value = S64_MIN;
13501                 dst_reg->smax_value = S64_MAX;
13502         } else {
13503                 dst_reg->smin_value = dst_reg->umin_value;
13504                 dst_reg->smax_value = dst_reg->umax_value;
13505         }
13506 }
13507 
13508 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
13509                                  struct bpf_reg_state *src_reg)
13510 {
13511         bool src_known = tnum_subreg_is_const(src_reg->var_off);
13512         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
13513         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
13514         u32 umax_val = src_reg->u32_max_value;
13515 
13516         if (src_known && dst_known) {
13517                 __mark_reg32_known(dst_reg, var32_off.value);
13518                 return;
13519         }
13520 
13521         /* We get our minimum from the var_off, since that's inherently
13522          * bitwise.  Our maximum is the minimum of the operands' maxima.
13523          */
13524         dst_reg->u32_min_value = var32_off.value;
13525         dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
13526 
13527         /* Safe to set s32 bounds by casting u32 result into s32 when u32
13528          * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
13529          */
13530         if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
13531                 dst_reg->s32_min_value = dst_reg->u32_min_value;
13532                 dst_reg->s32_max_value = dst_reg->u32_max_value;
13533         } else {
13534                 dst_reg->s32_min_value = S32_MIN;
13535                 dst_reg->s32_max_value = S32_MAX;
13536         }
13537 }
13538 
13539 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
13540                                struct bpf_reg_state *src_reg)
13541 {
13542         bool src_known = tnum_is_const(src_reg->var_off);
13543         bool dst_known = tnum_is_const(dst_reg->var_off);
13544         u64 umax_val = src_reg->umax_value;
13545 
13546         if (src_known && dst_known) {
13547                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
13548                 return;
13549         }
13550 
13551         /* We get our minimum from the var_off, since that's inherently
13552          * bitwise.  Our maximum is the minimum of the operands' maxima.
13553          */
13554         dst_reg->umin_value = dst_reg->var_off.value;
13555         dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
13556 
13557         /* Safe to set s64 bounds by casting u64 result into s64 when u64
13558          * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
13559          */
13560         if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
13561                 dst_reg->smin_value = dst_reg->umin_value;
13562                 dst_reg->smax_value = dst_reg->umax_value;
13563         } else {
13564                 dst_reg->smin_value = S64_MIN;
13565                 dst_reg->smax_value = S64_MAX;
13566         }
13567         /* We may learn something more from the var_off */
13568         __update_reg_bounds(dst_reg);
13569 }
13570 
13571 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
13572                                 struct bpf_reg_state *src_reg)
13573 {
13574         bool src_known = tnum_subreg_is_const(src_reg->var_off);
13575         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
13576         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
13577         u32 umin_val = src_reg->u32_min_value;
13578 
13579         if (src_known && dst_known) {
13580                 __mark_reg32_known(dst_reg, var32_off.value);
13581                 return;
13582         }
13583 
13584         /* We get our maximum from the var_off, and our minimum is the
13585          * maximum of the operands' minima
13586          */
13587         dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
13588         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
13589 
13590         /* Safe to set s32 bounds by casting u32 result into s32 when u32
13591          * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
13592          */
13593         if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
13594                 dst_reg->s32_min_value = dst_reg->u32_min_value;
13595                 dst_reg->s32_max_value = dst_reg->u32_max_value;
13596         } else {
13597                 dst_reg->s32_min_value = S32_MIN;
13598                 dst_reg->s32_max_value = S32_MAX;
13599         }
13600 }
13601 
13602 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
13603                               struct bpf_reg_state *src_reg)
13604 {
13605         bool src_known = tnum_is_const(src_reg->var_off);
13606         bool dst_known = tnum_is_const(dst_reg->var_off);
13607         u64 umin_val = src_reg->umin_value;
13608 
13609         if (src_known && dst_known) {
13610                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
13611                 return;
13612         }
13613 
13614         /* We get our maximum from the var_off, and our minimum is the
13615          * maximum of the operands' minima
13616          */
13617         dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
13618         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
13619 
13620         /* Safe to set s64 bounds by casting u64 result into s64 when u64
13621          * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
13622          */
13623         if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
13624                 dst_reg->smin_value = dst_reg->umin_value;
13625                 dst_reg->smax_value = dst_reg->umax_value;
13626         } else {
13627                 dst_reg->smin_value = S64_MIN;
13628                 dst_reg->smax_value = S64_MAX;
13629         }
13630         /* We may learn something more from the var_off */
13631         __update_reg_bounds(dst_reg);
13632 }
13633 
13634 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
13635                                  struct bpf_reg_state *src_reg)
13636 {
13637         bool src_known = tnum_subreg_is_const(src_reg->var_off);
13638         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
13639         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
13640 
13641         if (src_known && dst_known) {
13642                 __mark_reg32_known(dst_reg, var32_off.value);
13643                 return;
13644         }
13645 
13646         /* We get both minimum and maximum from the var32_off. */
13647         dst_reg->u32_min_value = var32_off.value;
13648         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
13649 
13650         /* Safe to set s32 bounds by casting u32 result into s32 when u32
13651          * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
13652          */
13653         if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
13654                 dst_reg->s32_min_value = dst_reg->u32_min_value;
13655                 dst_reg->s32_max_value = dst_reg->u32_max_value;
13656         } else {
13657                 dst_reg->s32_min_value = S32_MIN;
13658                 dst_reg->s32_max_value = S32_MAX;
13659         }
13660 }
13661 
13662 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
13663                                struct bpf_reg_state *src_reg)
13664 {
13665         bool src_known = tnum_is_const(src_reg->var_off);
13666         bool dst_known = tnum_is_const(dst_reg->var_off);
13667 
13668         if (src_known && dst_known) {
13669                 /* dst_reg->var_off.value has been updated earlier */
13670                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
13671                 return;
13672         }
13673 
13674         /* We get both minimum and maximum from the var_off. */
13675         dst_reg->umin_value = dst_reg->var_off.value;
13676         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
13677 
13678         /* Safe to set s64 bounds by casting u64 result into s64 when u64
13679          * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
13680          */
13681         if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
13682                 dst_reg->smin_value = dst_reg->umin_value;
13683                 dst_reg->smax_value = dst_reg->umax_value;
13684         } else {
13685                 dst_reg->smin_value = S64_MIN;
13686                 dst_reg->smax_value = S64_MAX;
13687         }
13688 
13689         __update_reg_bounds(dst_reg);
13690 }
13691 
13692 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
13693                                    u64 umin_val, u64 umax_val)
13694 {
13695         /* We lose all sign bit information (except what we can pick
13696          * up from var_off)
13697          */
13698         dst_reg->s32_min_value = S32_MIN;
13699         dst_reg->s32_max_value = S32_MAX;
13700         /* If we might shift our top bit out, then we know nothing */
13701         if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
13702                 dst_reg->u32_min_value = 0;
13703                 dst_reg->u32_max_value = U32_MAX;
13704         } else {
13705                 dst_reg->u32_min_value <<= umin_val;
13706                 dst_reg->u32_max_value <<= umax_val;
13707         }
13708 }
13709 
13710 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
13711                                  struct bpf_reg_state *src_reg)
13712 {
13713         u32 umax_val = src_reg->u32_max_value;
13714         u32 umin_val = src_reg->u32_min_value;
13715         /* u32 alu operation will zext upper bits */
13716         struct tnum subreg = tnum_subreg(dst_reg->var_off);
13717 
13718         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
13719         dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
13720         /* Not required but being careful mark reg64 bounds as unknown so
13721          * that we are forced to pick them up from tnum and zext later and
13722          * if some path skips this step we are still safe.
13723          */
13724         __mark_reg64_unbounded(dst_reg);
13725         __update_reg32_bounds(dst_reg);
13726 }
13727 
13728 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
13729                                    u64 umin_val, u64 umax_val)
13730 {
13731         /* Special case <<32 because it is a common compiler pattern to sign
13732          * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
13733          * positive we know this shift will also be positive so we can track
13734          * bounds correctly. Otherwise we lose all sign bit information except
13735          * what we can pick up from var_off. Perhaps we can generalize this
13736          * later to shifts of any length.
13737          */
13738         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
13739                 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
13740         else
13741                 dst_reg->smax_value = S64_MAX;
13742 
13743         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
13744                 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
13745         else
13746                 dst_reg->smin_value = S64_MIN;
13747 
13748         /* If we might shift our top bit out, then we know nothing */
13749         if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
13750                 dst_reg->umin_value = 0;
13751                 dst_reg->umax_value = U64_MAX;
13752         } else {
13753                 dst_reg->umin_value <<= umin_val;
13754                 dst_reg->umax_value <<= umax_val;
13755         }
13756 }
13757 
13758 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
13759                                struct bpf_reg_state *src_reg)
13760 {
13761         u64 umax_val = src_reg->umax_value;
13762         u64 umin_val = src_reg->umin_value;
13763 
13764         /* scalar64 calc uses 32bit unshifted bounds so must be called first */
13765         __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
13766         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
13767 
13768         dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
13769         /* We may learn something more from the var_off */
13770         __update_reg_bounds(dst_reg);
13771 }
13772 
13773 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
13774                                  struct bpf_reg_state *src_reg)
13775 {
13776         struct tnum subreg = tnum_subreg(dst_reg->var_off);
13777         u32 umax_val = src_reg->u32_max_value;
13778         u32 umin_val = src_reg->u32_min_value;
13779 
13780         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
13781          * be negative, then either:
13782          * 1) src_reg might be zero, so the sign bit of the result is
13783          *    unknown, so we lose our signed bounds
13784          * 2) it's known negative, thus the unsigned bounds capture the
13785          *    signed bounds
13786          * 3) the signed bounds cross zero, so they tell us nothing
13787          *    about the result
13788          * If the value in dst_reg is known nonnegative, then again the
13789          * unsigned bounds capture the signed bounds.
13790          * Thus, in all cases it suffices to blow away our signed bounds
13791          * and rely on inferring new ones from the unsigned bounds and
13792          * var_off of the result.
13793          */
13794         dst_reg->s32_min_value = S32_MIN;
13795         dst_reg->s32_max_value = S32_MAX;
13796 
13797         dst_reg->var_off = tnum_rshift(subreg, umin_val);
13798         dst_reg->u32_min_value >>= umax_val;
13799         dst_reg->u32_max_value >>= umin_val;
13800 
13801         __mark_reg64_unbounded(dst_reg);
13802         __update_reg32_bounds(dst_reg);
13803 }
13804 
13805 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
13806                                struct bpf_reg_state *src_reg)
13807 {
13808         u64 umax_val = src_reg->umax_value;
13809         u64 umin_val = src_reg->umin_value;
13810 
13811         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
13812          * be negative, then either:
13813          * 1) src_reg might be zero, so the sign bit of the result is
13814          *    unknown, so we lose our signed bounds
13815          * 2) it's known negative, thus the unsigned bounds capture the
13816          *    signed bounds
13817          * 3) the signed bounds cross zero, so they tell us nothing
13818          *    about the result
13819          * If the value in dst_reg is known nonnegative, then again the
13820          * unsigned bounds capture the signed bounds.
13821          * Thus, in all cases it suffices to blow away our signed bounds
13822          * and rely on inferring new ones from the unsigned bounds and
13823          * var_off of the result.
13824          */
13825         dst_reg->smin_value = S64_MIN;
13826         dst_reg->smax_value = S64_MAX;
13827         dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
13828         dst_reg->umin_value >>= umax_val;
13829         dst_reg->umax_value >>= umin_val;
13830 
13831         /* Its not easy to operate on alu32 bounds here because it depends
13832          * on bits being shifted in. Take easy way out and mark unbounded
13833          * so we can recalculate later from tnum.
13834          */
13835         __mark_reg32_unbounded(dst_reg);
13836         __update_reg_bounds(dst_reg);
13837 }
13838 
13839 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
13840                                   struct bpf_reg_state *src_reg)
13841 {
13842         u64 umin_val = src_reg->u32_min_value;
13843 
13844         /* Upon reaching here, src_known is true and
13845          * umax_val is equal to umin_val.
13846          */
13847         dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
13848         dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
13849 
13850         dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
13851 
13852         /* blow away the dst_reg umin_value/umax_value and rely on
13853          * dst_reg var_off to refine the result.
13854          */
13855         dst_reg->u32_min_value = 0;
13856         dst_reg->u32_max_value = U32_MAX;
13857 
13858         __mark_reg64_unbounded(dst_reg);
13859         __update_reg32_bounds(dst_reg);
13860 }
13861 
13862 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
13863                                 struct bpf_reg_state *src_reg)
13864 {
13865         u64 umin_val = src_reg->umin_value;
13866 
13867         /* Upon reaching here, src_known is true and umax_val is equal
13868          * to umin_val.
13869          */
13870         dst_reg->smin_value >>= umin_val;
13871         dst_reg->smax_value >>= umin_val;
13872 
13873         dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
13874 
13875         /* blow away the dst_reg umin_value/umax_value and rely on
13876          * dst_reg var_off to refine the result.
13877          */
13878         dst_reg->umin_value = 0;
13879         dst_reg->umax_value = U64_MAX;
13880 
13881         /* Its not easy to operate on alu32 bounds here because it depends
13882          * on bits being shifted in from upper 32-bits. Take easy way out
13883          * and mark unbounded so we can recalculate later from tnum.
13884          */
13885         __mark_reg32_unbounded(dst_reg);
13886         __update_reg_bounds(dst_reg);
13887 }
13888 
13889 static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
13890                                              const struct bpf_reg_state *src_reg)
13891 {
13892         bool src_is_const = false;
13893         u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
13894 
13895         if (insn_bitness == 32) {
13896                 if (tnum_subreg_is_const(src_reg->var_off)
13897                     && src_reg->s32_min_value == src_reg->s32_max_value
13898                     && src_reg->u32_min_value == src_reg->u32_max_value)
13899                         src_is_const = true;
13900         } else {
13901                 if (tnum_is_const(src_reg->var_off)
13902                     && src_reg->smin_value == src_reg->smax_value
13903                     && src_reg->umin_value == src_reg->umax_value)
13904                         src_is_const = true;
13905         }
13906 
13907         switch (BPF_OP(insn->code)) {
13908         case BPF_ADD:
13909         case BPF_SUB:
13910         case BPF_AND:
13911         case BPF_XOR:
13912         case BPF_OR:
13913         case BPF_MUL:
13914                 return true;
13915 
13916         /* Shift operators range is only computable if shift dimension operand
13917          * is a constant. Shifts greater than 31 or 63 are undefined. This
13918          * includes shifts by a negative number.
13919          */
13920         case BPF_LSH:
13921         case BPF_RSH:
13922         case BPF_ARSH:
13923                 return (src_is_const && src_reg->umax_value < insn_bitness);
13924         default:
13925                 return false;
13926         }
13927 }
13928 
13929 /* WARNING: This function does calculations on 64-bit values, but the actual
13930  * execution may occur on 32-bit values. Therefore, things like bitshifts
13931  * need extra checks in the 32-bit case.
13932  */
13933 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
13934                                       struct bpf_insn *insn,
13935                                       struct bpf_reg_state *dst_reg,
13936                                       struct bpf_reg_state src_reg)
13937 {
13938         u8 opcode = BPF_OP(insn->code);
13939         bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
13940         int ret;
13941 
13942         if (!is_safe_to_compute_dst_reg_range(insn, &src_reg)) {
13943                 __mark_reg_unknown(env, dst_reg);
13944                 return 0;
13945         }
13946 
13947         if (sanitize_needed(opcode)) {
13948                 ret = sanitize_val_alu(env, insn);
13949                 if (ret < 0)
13950                         return sanitize_err(env, insn, ret, NULL, NULL);
13951         }
13952 
13953         /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
13954          * There are two classes of instructions: The first class we track both
13955          * alu32 and alu64 sign/unsigned bounds independently this provides the
13956          * greatest amount of precision when alu operations are mixed with jmp32
13957          * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
13958          * and BPF_OR. This is possible because these ops have fairly easy to
13959          * understand and calculate behavior in both 32-bit and 64-bit alu ops.
13960          * See alu32 verifier tests for examples. The second class of
13961          * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
13962          * with regards to tracking sign/unsigned bounds because the bits may
13963          * cross subreg boundaries in the alu64 case. When this happens we mark
13964          * the reg unbounded in the subreg bound space and use the resulting
13965          * tnum to calculate an approximation of the sign/unsigned bounds.
13966          */
13967         switch (opcode) {
13968         case BPF_ADD:
13969                 scalar32_min_max_add(dst_reg, &src_reg);
13970                 scalar_min_max_add(dst_reg, &src_reg);
13971                 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
13972                 break;
13973         case BPF_SUB:
13974                 scalar32_min_max_sub(dst_reg, &src_reg);
13975                 scalar_min_max_sub(dst_reg, &src_reg);
13976                 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
13977                 break;
13978         case BPF_MUL:
13979                 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
13980                 scalar32_min_max_mul(dst_reg, &src_reg);
13981                 scalar_min_max_mul(dst_reg, &src_reg);
13982                 break;
13983         case BPF_AND:
13984                 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
13985                 scalar32_min_max_and(dst_reg, &src_reg);
13986                 scalar_min_max_and(dst_reg, &src_reg);
13987                 break;
13988         case BPF_OR:
13989                 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
13990                 scalar32_min_max_or(dst_reg, &src_reg);
13991                 scalar_min_max_or(dst_reg, &src_reg);
13992                 break;
13993         case BPF_XOR:
13994                 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
13995                 scalar32_min_max_xor(dst_reg, &src_reg);
13996                 scalar_min_max_xor(dst_reg, &src_reg);
13997                 break;
13998         case BPF_LSH:
13999                 if (alu32)
14000                         scalar32_min_max_lsh(dst_reg, &src_reg);
14001                 else
14002                         scalar_min_max_lsh(dst_reg, &src_reg);
14003                 break;
14004         case BPF_RSH:
14005                 if (alu32)
14006                         scalar32_min_max_rsh(dst_reg, &src_reg);
14007                 else
14008                         scalar_min_max_rsh(dst_reg, &src_reg);
14009                 break;
14010         case BPF_ARSH:
14011                 if (alu32)
14012                         scalar32_min_max_arsh(dst_reg, &src_reg);
14013                 else
14014                         scalar_min_max_arsh(dst_reg, &src_reg);
14015                 break;
14016         default:
14017                 break;
14018         }
14019 
14020         /* ALU32 ops are zero extended into 64bit register */
14021         if (alu32)
14022                 zext_32_to_64(dst_reg);
14023         reg_bounds_sync(dst_reg);
14024         return 0;
14025 }
14026 
14027 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
14028  * and var_off.
14029  */
14030 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
14031                                    struct bpf_insn *insn)
14032 {
14033         struct bpf_verifier_state *vstate = env->cur_state;
14034         struct bpf_func_state *state = vstate->frame[vstate->curframe];
14035         struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
14036         struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
14037         bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
14038         u8 opcode = BPF_OP(insn->code);
14039         int err;
14040 
14041         dst_reg = &regs[insn->dst_reg];
14042         src_reg = NULL;
14043 
14044         if (dst_reg->type == PTR_TO_ARENA) {
14045                 struct bpf_insn_aux_data *aux = cur_aux(env);
14046 
14047                 if (BPF_CLASS(insn->code) == BPF_ALU64)
14048                         /*
14049                          * 32-bit operations zero upper bits automatically.
14050                          * 64-bit operations need to be converted to 32.
14051                          */
14052                         aux->needs_zext = true;
14053 
14054                 /* Any arithmetic operations are allowed on arena pointers */
14055                 return 0;
14056         }
14057 
14058         if (dst_reg->type != SCALAR_VALUE)
14059                 ptr_reg = dst_reg;
14060 
14061         if (BPF_SRC(insn->code) == BPF_X) {
14062                 src_reg = &regs[insn->src_reg];
14063                 if (src_reg->type != SCALAR_VALUE) {
14064                         if (dst_reg->type != SCALAR_VALUE) {
14065                                 /* Combining two pointers by any ALU op yields
14066                                  * an arbitrary scalar. Disallow all math except
14067                                  * pointer subtraction
14068                                  */
14069                                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
14070                                         mark_reg_unknown(env, regs, insn->dst_reg);
14071                                         return 0;
14072                                 }
14073                                 verbose(env, "R%d pointer %s pointer prohibited\n",
14074                                         insn->dst_reg,
14075                                         bpf_alu_string[opcode >> 4]);
14076                                 return -EACCES;
14077                         } else {
14078                                 /* scalar += pointer
14079                                  * This is legal, but we have to reverse our
14080                                  * src/dest handling in computing the range
14081                                  */
14082                                 err = mark_chain_precision(env, insn->dst_reg);
14083                                 if (err)
14084                                         return err;
14085                                 return adjust_ptr_min_max_vals(env, insn,
14086                                                                src_reg, dst_reg);
14087                         }
14088                 } else if (ptr_reg) {
14089                         /* pointer += scalar */
14090                         err = mark_chain_precision(env, insn->src_reg);
14091                         if (err)
14092                                 return err;
14093                         return adjust_ptr_min_max_vals(env, insn,
14094                                                        dst_reg, src_reg);
14095                 } else if (dst_reg->precise) {
14096                         /* if dst_reg is precise, src_reg should be precise as well */
14097                         err = mark_chain_precision(env, insn->src_reg);
14098                         if (err)
14099                                 return err;
14100                 }
14101         } else {
14102                 /* Pretend the src is a reg with a known value, since we only
14103                  * need to be able to read from this state.
14104                  */
14105                 off_reg.type = SCALAR_VALUE;
14106                 __mark_reg_known(&off_reg, insn->imm);
14107                 src_reg = &off_reg;
14108                 if (ptr_reg) /* pointer += K */
14109                         return adjust_ptr_min_max_vals(env, insn,
14110                                                        ptr_reg, src_reg);
14111         }
14112 
14113         /* Got here implies adding two SCALAR_VALUEs */
14114         if (WARN_ON_ONCE(ptr_reg)) {
14115                 print_verifier_state(env, state, true);
14116                 verbose(env, "verifier internal error: unexpected ptr_reg\n");
14117                 return -EINVAL;
14118         }
14119         if (WARN_ON(!src_reg)) {
14120                 print_verifier_state(env, state, true);
14121                 verbose(env, "verifier internal error: no src_reg\n");
14122                 return -EINVAL;
14123         }
14124         err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
14125         if (err)
14126                 return err;
14127         /*
14128          * Compilers can generate the code
14129          * r1 = r2
14130          * r1 += 0x1
14131          * if r2 < 1000 goto ...
14132          * use r1 in memory access
14133          * So remember constant delta between r2 and r1 and update r1 after
14134          * 'if' condition.
14135          */
14136         if (env->bpf_capable && BPF_OP(insn->code) == BPF_ADD &&
14137             dst_reg->id && is_reg_const(src_reg, alu32)) {
14138                 u64 val = reg_const_value(src_reg, alu32);
14139 
14140                 if ((dst_reg->id & BPF_ADD_CONST) ||
14141                     /* prevent overflow in find_equal_scalars() later */
14142                     val > (u32)S32_MAX) {
14143                         /*
14144                          * If the register already went through rX += val
14145                          * we cannot accumulate another val into rx->off.
14146                          */
14147                         dst_reg->off = 0;
14148                         dst_reg->id = 0;
14149                 } else {
14150                         dst_reg->id |= BPF_ADD_CONST;
14151                         dst_reg->off = val;
14152                 }
14153         } else {
14154                 /*
14155                  * Make sure ID is cleared otherwise dst_reg min/max could be
14156                  * incorrectly propagated into other registers by find_equal_scalars()
14157                  */
14158                 dst_reg->id = 0;
14159         }
14160         return 0;
14161 }
14162 
14163 /* check validity of 32-bit and 64-bit arithmetic operations */
14164 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
14165 {
14166         struct bpf_reg_state *regs = cur_regs(env);
14167         u8 opcode = BPF_OP(insn->code);
14168         int err;
14169 
14170         if (opcode == BPF_END || opcode == BPF_NEG) {
14171                 if (opcode == BPF_NEG) {
14172                         if (BPF_SRC(insn->code) != BPF_K ||
14173                             insn->src_reg != BPF_REG_0 ||
14174                             insn->off != 0 || insn->imm != 0) {
14175                                 verbose(env, "BPF_NEG uses reserved fields\n");
14176                                 return -EINVAL;
14177                         }
14178                 } else {
14179                         if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
14180                             (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
14181                             (BPF_CLASS(insn->code) == BPF_ALU64 &&
14182                              BPF_SRC(insn->code) != BPF_TO_LE)) {
14183                                 verbose(env, "BPF_END uses reserved fields\n");
14184                                 return -EINVAL;
14185                         }
14186                 }
14187 
14188                 /* check src operand */
14189                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
14190                 if (err)
14191                         return err;
14192 
14193                 if (is_pointer_value(env, insn->dst_reg)) {
14194                         verbose(env, "R%d pointer arithmetic prohibited\n",
14195                                 insn->dst_reg);
14196                         return -EACCES;
14197                 }
14198 
14199                 /* check dest operand */
14200                 err = check_reg_arg(env, insn->dst_reg, DST_OP);
14201                 if (err)
14202                         return err;
14203 
14204         } else if (opcode == BPF_MOV) {
14205 
14206                 if (BPF_SRC(insn->code) == BPF_X) {
14207                         if (BPF_CLASS(insn->code) == BPF_ALU) {
14208                                 if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
14209                                     insn->imm) {
14210                                         verbose(env, "BPF_MOV uses reserved fields\n");
14211                                         return -EINVAL;
14212                                 }
14213                         } else if (insn->off == BPF_ADDR_SPACE_CAST) {
14214                                 if (insn->imm != 1 && insn->imm != 1u << 16) {
14215                                         verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
14216                                         return -EINVAL;
14217                                 }
14218                                 if (!env->prog->aux->arena) {
14219                                         verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
14220                                         return -EINVAL;
14221                                 }
14222                         } else {
14223                                 if ((insn->off != 0 && insn->off != 8 && insn->off != 16 &&
14224                                      insn->off != 32) || insn->imm) {
14225                                         verbose(env, "BPF_MOV uses reserved fields\n");
14226                                         return -EINVAL;
14227                                 }
14228                         }
14229 
14230                         /* check src operand */
14231                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
14232                         if (err)
14233                                 return err;
14234                 } else {
14235                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
14236                                 verbose(env, "BPF_MOV uses reserved fields\n");
14237                                 return -EINVAL;
14238                         }
14239                 }
14240 
14241                 /* check dest operand, mark as required later */
14242                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
14243                 if (err)
14244                         return err;
14245 
14246                 if (BPF_SRC(insn->code) == BPF_X) {
14247                         struct bpf_reg_state *src_reg = regs + insn->src_reg;
14248                         struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
14249 
14250                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
14251                                 if (insn->imm) {
14252                                         /* off == BPF_ADDR_SPACE_CAST */
14253                                         mark_reg_unknown(env, regs, insn->dst_reg);
14254                                         if (insn->imm == 1) { /* cast from as(1) to as(0) */
14255                                                 dst_reg->type = PTR_TO_ARENA;
14256                                                 /* PTR_TO_ARENA is 32-bit */
14257                                                 dst_reg->subreg_def = env->insn_idx + 1;
14258                                         }
14259                                 } else if (insn->off == 0) {
14260                                         /* case: R1 = R2
14261                                          * copy register state to dest reg
14262                                          */
14263                                         assign_scalar_id_before_mov(env, src_reg);
14264                                         copy_register_state(dst_reg, src_reg);
14265                                         dst_reg->live |= REG_LIVE_WRITTEN;
14266                                         dst_reg->subreg_def = DEF_NOT_SUBREG;
14267                                 } else {
14268                                         /* case: R1 = (s8, s16 s32)R2 */
14269                                         if (is_pointer_value(env, insn->src_reg)) {
14270                                                 verbose(env,
14271                                                         "R%d sign-extension part of pointer\n",
14272                                                         insn->src_reg);
14273                                                 return -EACCES;
14274                                         } else if (src_reg->type == SCALAR_VALUE) {
14275                                                 bool no_sext;
14276 
14277                                                 no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
14278                                                 if (no_sext)
14279                                                         assign_scalar_id_before_mov(env, src_reg);
14280                                                 copy_register_state(dst_reg, src_reg);
14281                                                 if (!no_sext)
14282                                                         dst_reg->id = 0;
14283                                                 coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
14284                                                 dst_reg->live |= REG_LIVE_WRITTEN;
14285                                                 dst_reg->subreg_def = DEF_NOT_SUBREG;
14286                                         } else {
14287                                                 mark_reg_unknown(env, regs, insn->dst_reg);
14288                                         }
14289                                 }
14290                         } else {
14291                                 /* R1 = (u32) R2 */
14292                                 if (is_pointer_value(env, insn->src_reg)) {
14293                                         verbose(env,
14294                                                 "R%d partial copy of pointer\n",
14295                                                 insn->src_reg);
14296                                         return -EACCES;
14297                                 } else if (src_reg->type == SCALAR_VALUE) {
14298                                         if (insn->off == 0) {
14299                                                 bool is_src_reg_u32 = get_reg_width(src_reg) <= 32;
14300 
14301                                                 if (is_src_reg_u32)
14302                                                         assign_scalar_id_before_mov(env, src_reg);
14303                                                 copy_register_state(dst_reg, src_reg);
14304                                                 /* Make sure ID is cleared if src_reg is not in u32
14305                                                  * range otherwise dst_reg min/max could be incorrectly
14306                                                  * propagated into src_reg by find_equal_scalars()
14307                                                  */
14308                                                 if (!is_src_reg_u32)
14309                                                         dst_reg->id = 0;
14310                                                 dst_reg->live |= REG_LIVE_WRITTEN;
14311                                                 dst_reg->subreg_def = env->insn_idx + 1;
14312                                         } else {
14313                                                 /* case: W1 = (s8, s16)W2 */
14314                                                 bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
14315 
14316                                                 if (no_sext)
14317                                                         assign_scalar_id_before_mov(env, src_reg);
14318                                                 copy_register_state(dst_reg, src_reg);
14319                                                 if (!no_sext)
14320                                                         dst_reg->id = 0;
14321                                                 dst_reg->live |= REG_LIVE_WRITTEN;
14322                                                 dst_reg->subreg_def = env->insn_idx + 1;
14323                                                 coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
14324                                         }
14325                                 } else {
14326                                         mark_reg_unknown(env, regs,
14327                                                          insn->dst_reg);
14328                                 }
14329                                 zext_32_to_64(dst_reg);
14330                                 reg_bounds_sync(dst_reg);
14331                         }
14332                 } else {
14333                         /* case: R = imm
14334                          * remember the value we stored into this reg
14335                          */
14336                         /* clear any state __mark_reg_known doesn't set */
14337                         mark_reg_unknown(env, regs, insn->dst_reg);
14338                         regs[insn->dst_reg].type = SCALAR_VALUE;
14339                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
14340                                 __mark_reg_known(regs + insn->dst_reg,
14341                                                  insn->imm);
14342                         } else {
14343                                 __mark_reg_known(regs + insn->dst_reg,
14344                                                  (u32)insn->imm);
14345                         }
14346                 }
14347 
14348         } else if (opcode > BPF_END) {
14349                 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
14350                 return -EINVAL;
14351 
14352         } else {        /* all other ALU ops: and, sub, xor, add, ... */
14353 
14354                 if (BPF_SRC(insn->code) == BPF_X) {
14355                         if (insn->imm != 0 || insn->off > 1 ||
14356                             (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
14357                                 verbose(env, "BPF_ALU uses reserved fields\n");
14358                                 return -EINVAL;
14359                         }
14360                         /* check src1 operand */
14361                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
14362                         if (err)
14363                                 return err;
14364                 } else {
14365                         if (insn->src_reg != BPF_REG_0 || insn->off > 1 ||
14366                             (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
14367                                 verbose(env, "BPF_ALU uses reserved fields\n");
14368                                 return -EINVAL;
14369                         }
14370                 }
14371 
14372                 /* check src2 operand */
14373                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
14374                 if (err)
14375                         return err;
14376 
14377                 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
14378                     BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
14379                         verbose(env, "div by zero\n");
14380                         return -EINVAL;
14381                 }
14382 
14383                 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
14384                      opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
14385                         int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
14386 
14387                         if (insn->imm < 0 || insn->imm >= size) {
14388                                 verbose(env, "invalid shift %d\n", insn->imm);
14389                                 return -EINVAL;
14390                         }
14391                 }
14392 
14393                 /* check dest operand */
14394                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
14395                 err = err ?: adjust_reg_min_max_vals(env, insn);
14396                 if (err)
14397                         return err;
14398         }
14399 
14400         return reg_bounds_sanity_check(env, &regs[insn->dst_reg], "alu");
14401 }
14402 
14403 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
14404                                    struct bpf_reg_state *dst_reg,
14405                                    enum bpf_reg_type type,
14406                                    bool range_right_open)
14407 {
14408         struct bpf_func_state *state;
14409         struct bpf_reg_state *reg;
14410         int new_range;
14411 
14412         if (dst_reg->off < 0 ||
14413             (dst_reg->off == 0 && range_right_open))
14414                 /* This doesn't give us any range */
14415                 return;
14416 
14417         if (dst_reg->umax_value > MAX_PACKET_OFF ||
14418             dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
14419                 /* Risk of overflow.  For instance, ptr + (1<<63) may be less
14420                  * than pkt_end, but that's because it's also less than pkt.
14421                  */
14422                 return;
14423 
14424         new_range = dst_reg->off;
14425         if (range_right_open)
14426                 new_range++;
14427 
14428         /* Examples for register markings:
14429          *
14430          * pkt_data in dst register:
14431          *
14432          *   r2 = r3;
14433          *   r2 += 8;
14434          *   if (r2 > pkt_end) goto <handle exception>
14435          *   <access okay>
14436          *
14437          *   r2 = r3;
14438          *   r2 += 8;
14439          *   if (r2 < pkt_end) goto <access okay>
14440          *   <handle exception>
14441          *
14442          *   Where:
14443          *     r2 == dst_reg, pkt_end == src_reg
14444          *     r2=pkt(id=n,off=8,r=0)
14445          *     r3=pkt(id=n,off=0,r=0)
14446          *
14447          * pkt_data in src register:
14448          *
14449          *   r2 = r3;
14450          *   r2 += 8;
14451          *   if (pkt_end >= r2) goto <access okay>
14452          *   <handle exception>
14453          *
14454          *   r2 = r3;
14455          *   r2 += 8;
14456          *   if (pkt_end <= r2) goto <handle exception>
14457          *   <access okay>
14458          *
14459          *   Where:
14460          *     pkt_end == dst_reg, r2 == src_reg
14461          *     r2=pkt(id=n,off=8,r=0)
14462          *     r3=pkt(id=n,off=0,r=0)
14463          *
14464          * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
14465          * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
14466          * and [r3, r3 + 8-1) respectively is safe to access depending on
14467          * the check.
14468          */
14469 
14470         /* If our ids match, then we must have the same max_value.  And we
14471          * don't care about the other reg's fixed offset, since if it's too big
14472          * the range won't allow anything.
14473          * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
14474          */
14475         bpf_for_each_reg_in_vstate(vstate, state, reg, ({
14476                 if (reg->type == type && reg->id == dst_reg->id)
14477                         /* keep the maximum range already checked */
14478                         reg->range = max(reg->range, new_range);
14479         }));
14480 }
14481 
14482 /*
14483  * <reg1> <op> <reg2>, currently assuming reg2 is a constant
14484  */
14485 static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
14486                                   u8 opcode, bool is_jmp32)
14487 {
14488         struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off;
14489         struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off;
14490         u64 umin1 = is_jmp32 ? (u64)reg1->u32_min_value : reg1->umin_value;
14491         u64 umax1 = is_jmp32 ? (u64)reg1->u32_max_value : reg1->umax_value;
14492         s64 smin1 = is_jmp32 ? (s64)reg1->s32_min_value : reg1->smin_value;
14493         s64 smax1 = is_jmp32 ? (s64)reg1->s32_max_value : reg1->smax_value;
14494         u64 umin2 = is_jmp32 ? (u64)reg2->u32_min_value : reg2->umin_value;
14495         u64 umax2 = is_jmp32 ? (u64)reg2->u32_max_value : reg2->umax_value;
14496         s64 smin2 = is_jmp32 ? (s64)reg2->s32_min_value : reg2->smin_value;
14497         s64 smax2 = is_jmp32 ? (s64)reg2->s32_max_value : reg2->smax_value;
14498 
14499         switch (opcode) {
14500         case BPF_JEQ:
14501                 /* constants, umin/umax and smin/smax checks would be
14502                  * redundant in this case because they all should match
14503                  */
14504                 if (tnum_is_const(t1) && tnum_is_const(t2))
14505                         return t1.value == t2.value;
14506                 /* non-overlapping ranges */
14507                 if (umin1 > umax2 || umax1 < umin2)
14508                         return 0;
14509                 if (smin1 > smax2 || smax1 < smin2)
14510                         return 0;
14511                 if (!is_jmp32) {
14512                         /* if 64-bit ranges are inconclusive, see if we can
14513                          * utilize 32-bit subrange knowledge to eliminate
14514                          * branches that can't be taken a priori
14515                          */
14516                         if (reg1->u32_min_value > reg2->u32_max_value ||
14517                             reg1->u32_max_value < reg2->u32_min_value)
14518                                 return 0;
14519                         if (reg1->s32_min_value > reg2->s32_max_value ||
14520                             reg1->s32_max_value < reg2->s32_min_value)
14521                                 return 0;
14522                 }
14523                 break;
14524         case BPF_JNE:
14525                 /* constants, umin/umax and smin/smax checks would be
14526                  * redundant in this case because they all should match
14527                  */
14528                 if (tnum_is_const(t1) && tnum_is_const(t2))
14529                         return t1.value != t2.value;
14530                 /* non-overlapping ranges */
14531                 if (umin1 > umax2 || umax1 < umin2)
14532                         return 1;
14533                 if (smin1 > smax2 || smax1 < smin2)
14534                         return 1;
14535                 if (!is_jmp32) {
14536                         /* if 64-bit ranges are inconclusive, see if we can
14537                          * utilize 32-bit subrange knowledge to eliminate
14538                          * branches that can't be taken a priori
14539                          */
14540                         if (reg1->u32_min_value > reg2->u32_max_value ||
14541                             reg1->u32_max_value < reg2->u32_min_value)
14542                                 return 1;
14543                         if (reg1->s32_min_value > reg2->s32_max_value ||
14544                             reg1->s32_max_value < reg2->s32_min_value)
14545                                 return 1;
14546                 }
14547                 break;
14548         case BPF_JSET:
14549                 if (!is_reg_const(reg2, is_jmp32)) {
14550                         swap(reg1, reg2);
14551                         swap(t1, t2);
14552                 }
14553                 if (!is_reg_const(reg2, is_jmp32))
14554                         return -1;
14555                 if ((~t1.mask & t1.value) & t2.value)
14556                         return 1;
14557                 if (!((t1.mask | t1.value) & t2.value))
14558                         return 0;
14559                 break;
14560         case BPF_JGT:
14561                 if (umin1 > umax2)
14562                         return 1;
14563                 else if (umax1 <= umin2)
14564                         return 0;
14565                 break;
14566         case BPF_JSGT:
14567                 if (smin1 > smax2)
14568                         return 1;
14569                 else if (smax1 <= smin2)
14570                         return 0;
14571                 break;
14572         case BPF_JLT:
14573                 if (umax1 < umin2)
14574                         return 1;
14575                 else if (umin1 >= umax2)
14576                         return 0;
14577                 break;
14578         case BPF_JSLT:
14579                 if (smax1 < smin2)
14580                         return 1;
14581                 else if (smin1 >= smax2)
14582                         return 0;
14583                 break;
14584         case BPF_JGE:
14585                 if (umin1 >= umax2)
14586                         return 1;
14587                 else if (umax1 < umin2)
14588                         return 0;
14589                 break;
14590         case BPF_JSGE:
14591                 if (smin1 >= smax2)
14592                         return 1;
14593                 else if (smax1 < smin2)
14594                         return 0;
14595                 break;
14596         case BPF_JLE:
14597                 if (umax1 <= umin2)
14598                         return 1;
14599                 else if (umin1 > umax2)
14600                         return 0;
14601                 break;
14602         case BPF_JSLE:
14603                 if (smax1 <= smin2)
14604                         return 1;
14605                 else if (smin1 > smax2)
14606                         return 0;
14607                 break;
14608         }
14609 
14610         return -1;
14611 }
14612 
14613 static int flip_opcode(u32 opcode)
14614 {
14615         /* How can we transform "a <op> b" into "b <op> a"? */
14616         static const u8 opcode_flip[16] = {
14617                 /* these stay the same */
14618                 [BPF_JEQ  >> 4] = BPF_JEQ,
14619                 [BPF_JNE  >> 4] = BPF_JNE,
14620                 [BPF_JSET >> 4] = BPF_JSET,
14621                 /* these swap "lesser" and "greater" (L and G in the opcodes) */
14622                 [BPF_JGE  >> 4] = BPF_JLE,
14623                 [BPF_JGT  >> 4] = BPF_JLT,
14624                 [BPF_JLE  >> 4] = BPF_JGE,
14625                 [BPF_JLT  >> 4] = BPF_JGT,
14626                 [BPF_JSGE >> 4] = BPF_JSLE,
14627                 [BPF_JSGT >> 4] = BPF_JSLT,
14628                 [BPF_JSLE >> 4] = BPF_JSGE,
14629                 [BPF_JSLT >> 4] = BPF_JSGT
14630         };
14631         return opcode_flip[opcode >> 4];
14632 }
14633 
14634 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
14635                                    struct bpf_reg_state *src_reg,
14636                                    u8 opcode)
14637 {
14638         struct bpf_reg_state *pkt;
14639 
14640         if (src_reg->type == PTR_TO_PACKET_END) {
14641                 pkt = dst_reg;
14642         } else if (dst_reg->type == PTR_TO_PACKET_END) {
14643                 pkt = src_reg;
14644                 opcode = flip_opcode(opcode);
14645         } else {
14646                 return -1;
14647         }
14648 
14649         if (pkt->range >= 0)
14650                 return -1;
14651 
14652         switch (opcode) {
14653         case BPF_JLE:
14654                 /* pkt <= pkt_end */
14655                 fallthrough;
14656         case BPF_JGT:
14657                 /* pkt > pkt_end */
14658                 if (pkt->range == BEYOND_PKT_END)
14659                         /* pkt has at last one extra byte beyond pkt_end */
14660                         return opcode == BPF_JGT;
14661                 break;
14662         case BPF_JLT:
14663                 /* pkt < pkt_end */
14664                 fallthrough;
14665         case BPF_JGE:
14666                 /* pkt >= pkt_end */
14667                 if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
14668                         return opcode == BPF_JGE;
14669                 break;
14670         }
14671         return -1;
14672 }
14673 
14674 /* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;"
14675  * and return:
14676  *  1 - branch will be taken and "goto target" will be executed
14677  *  0 - branch will not be taken and fall-through to next insn
14678  * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
14679  *      range [0,10]
14680  */
14681 static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
14682                            u8 opcode, bool is_jmp32)
14683 {
14684         if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32)
14685                 return is_pkt_ptr_branch_taken(reg1, reg2, opcode);
14686 
14687         if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) {
14688                 u64 val;
14689 
14690                 /* arrange that reg2 is a scalar, and reg1 is a pointer */
14691                 if (!is_reg_const(reg2, is_jmp32)) {
14692                         opcode = flip_opcode(opcode);
14693                         swap(reg1, reg2);
14694                 }
14695                 /* and ensure that reg2 is a constant */
14696                 if (!is_reg_const(reg2, is_jmp32))
14697                         return -1;
14698 
14699                 if (!reg_not_null(reg1))
14700                         return -1;
14701 
14702                 /* If pointer is valid tests against zero will fail so we can
14703                  * use this to direct branch taken.
14704                  */
14705                 val = reg_const_value(reg2, is_jmp32);
14706                 if (val != 0)
14707                         return -1;
14708 
14709                 switch (opcode) {
14710                 case BPF_JEQ:
14711                         return 0;
14712                 case BPF_JNE:
14713                         return 1;
14714                 default:
14715                         return -1;
14716                 }
14717         }
14718 
14719         /* now deal with two scalars, but not necessarily constants */
14720         return is_scalar_branch_taken(reg1, reg2, opcode, is_jmp32);
14721 }
14722 
14723 /* Opcode that corresponds to a *false* branch condition.
14724  * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2
14725  */
14726 static u8 rev_opcode(u8 opcode)
14727 {
14728         switch (opcode) {
14729         case BPF_JEQ:           return BPF_JNE;
14730         case BPF_JNE:           return BPF_JEQ;
14731         /* JSET doesn't have it's reverse opcode in BPF, so add
14732          * BPF_X flag to denote the reverse of that operation
14733          */
14734         case BPF_JSET:          return BPF_JSET | BPF_X;
14735         case BPF_JSET | BPF_X:  return BPF_JSET;
14736         case BPF_JGE:           return BPF_JLT;
14737         case BPF_JGT:           return BPF_JLE;
14738         case BPF_JLE:           return BPF_JGT;
14739         case BPF_JLT:           return BPF_JGE;
14740         case BPF_JSGE:          return BPF_JSLT;
14741         case BPF_JSGT:          return BPF_JSLE;
14742         case BPF_JSLE:          return BPF_JSGT;
14743         case BPF_JSLT:          return BPF_JSGE;
14744         default:                return 0;
14745         }
14746 }
14747 
14748 /* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */
14749 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
14750                                 u8 opcode, bool is_jmp32)
14751 {
14752         struct tnum t;
14753         u64 val;
14754 
14755         /* In case of GE/GT/SGE/JST, reuse LE/LT/SLE/SLT logic from below */
14756         switch (opcode) {
14757         case BPF_JGE:
14758         case BPF_JGT:
14759         case BPF_JSGE:
14760         case BPF_JSGT:
14761                 opcode = flip_opcode(opcode);
14762                 swap(reg1, reg2);
14763                 break;
14764         default:
14765                 break;
14766         }
14767 
14768         switch (opcode) {
14769         case BPF_JEQ:
14770                 if (is_jmp32) {
14771                         reg1->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
14772                         reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
14773                         reg1->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
14774                         reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
14775                         reg2->u32_min_value = reg1->u32_min_value;
14776                         reg2->u32_max_value = reg1->u32_max_value;
14777                         reg2->s32_min_value = reg1->s32_min_value;
14778                         reg2->s32_max_value = reg1->s32_max_value;
14779 
14780                         t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off));
14781                         reg1->var_off = tnum_with_subreg(reg1->var_off, t);
14782                         reg2->var_off = tnum_with_subreg(reg2->var_off, t);
14783                 } else {
14784                         reg1->umin_value = max(reg1->umin_value, reg2->umin_value);
14785                         reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
14786                         reg1->smin_value = max(reg1->smin_value, reg2->smin_value);
14787                         reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
14788                         reg2->umin_value = reg1->umin_value;
14789                         reg2->umax_value = reg1->umax_value;
14790                         reg2->smin_value = reg1->smin_value;
14791                         reg2->smax_value = reg1->smax_value;
14792 
14793                         reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off);
14794                         reg2->var_off = reg1->var_off;
14795                 }
14796                 break;
14797         case BPF_JNE:
14798                 if (!is_reg_const(reg2, is_jmp32))
14799                         swap(reg1, reg2);
14800                 if (!is_reg_const(reg2, is_jmp32))
14801                         break;
14802 
14803                 /* try to recompute the bound of reg1 if reg2 is a const and
14804                  * is exactly the edge of reg1.
14805                  */
14806                 val = reg_const_value(reg2, is_jmp32);
14807                 if (is_jmp32) {
14808                         /* u32_min_value is not equal to 0xffffffff at this point,
14809                          * because otherwise u32_max_value is 0xffffffff as well,
14810                          * in such a case both reg1 and reg2 would be constants,
14811                          * jump would be predicted and reg_set_min_max() won't
14812                          * be called.
14813                          *
14814                          * Same reasoning works for all {u,s}{min,max}{32,64} cases
14815                          * below.
14816                          */
14817                         if (reg1->u32_min_value == (u32)val)
14818                                 reg1->u32_min_value++;
14819                         if (reg1->u32_max_value == (u32)val)
14820                                 reg1->u32_max_value--;
14821                         if (reg1->s32_min_value == (s32)val)
14822                                 reg1->s32_min_value++;
14823                         if (reg1->s32_max_value == (s32)val)
14824                                 reg1->s32_max_value--;
14825                 } else {
14826                         if (reg1->umin_value == (u64)val)
14827                                 reg1->umin_value++;
14828                         if (reg1->umax_value == (u64)val)
14829                                 reg1->umax_value--;
14830                         if (reg1->smin_value == (s64)val)
14831                                 reg1->smin_value++;
14832                         if (reg1->smax_value == (s64)val)
14833                                 reg1->smax_value--;
14834                 }
14835                 break;
14836         case BPF_JSET:
14837                 if (!is_reg_const(reg2, is_jmp32))
14838                         swap(reg1, reg2);
14839                 if (!is_reg_const(reg2, is_jmp32))
14840                         break;
14841                 val = reg_const_value(reg2, is_jmp32);
14842                 /* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X)
14843                  * requires single bit to learn something useful. E.g., if we
14844                  * know that `r1 & 0x3` is true, then which bits (0, 1, or both)
14845                  * are actually set? We can learn something definite only if
14846                  * it's a single-bit value to begin with.
14847                  *
14848                  * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have
14849                  * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor
14850                  * bit 1 is set, which we can readily use in adjustments.
14851                  */
14852                 if (!is_power_of_2(val))
14853                         break;
14854                 if (is_jmp32) {
14855                         t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val));
14856                         reg1->var_off = tnum_with_subreg(reg1->var_off, t);
14857                 } else {
14858                         reg1->var_off = tnum_or(reg1->var_off, tnum_const(val));
14859                 }
14860                 break;
14861         case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */
14862                 if (!is_reg_const(reg2, is_jmp32))
14863                         swap(reg1, reg2);
14864                 if (!is_reg_const(reg2, is_jmp32))
14865                         break;
14866                 val = reg_const_value(reg2, is_jmp32);
14867                 if (is_jmp32) {
14868                         t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val));
14869                         reg1->var_off = tnum_with_subreg(reg1->var_off, t);
14870                 } else {
14871                         reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val));
14872                 }
14873                 break;
14874         case BPF_JLE:
14875                 if (is_jmp32) {
14876                         reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
14877                         reg2->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
14878                 } else {
14879                         reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
14880                         reg2->umin_value = max(reg1->umin_value, reg2->umin_value);
14881                 }
14882                 break;
14883         case BPF_JLT:
14884                 if (is_jmp32) {
14885                         reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value - 1);
14886                         reg2->u32_min_value = max(reg1->u32_min_value + 1, reg2->u32_min_value);
14887                 } else {
14888                         reg1->umax_value = min(reg1->umax_value, reg2->umax_value - 1);
14889                         reg2->umin_value = max(reg1->umin_value + 1, reg2->umin_value);
14890                 }
14891                 break;
14892         case BPF_JSLE:
14893                 if (is_jmp32) {
14894                         reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
14895                         reg2->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
14896                 } else {
14897                         reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
14898                         reg2->smin_value = max(reg1->smin_value, reg2->smin_value);
14899                 }
14900                 break;
14901         case BPF_JSLT:
14902                 if (is_jmp32) {
14903                         reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value - 1);
14904                         reg2->s32_min_value = max(reg1->s32_min_value + 1, reg2->s32_min_value);
14905                 } else {
14906                         reg1->smax_value = min(reg1->smax_value, reg2->smax_value - 1);
14907                         reg2->smin_value = max(reg1->smin_value + 1, reg2->smin_value);
14908                 }
14909                 break;
14910         default:
14911                 return;
14912         }
14913 }
14914 
14915 /* Adjusts the register min/max values in the case that the dst_reg and
14916  * src_reg are both SCALAR_VALUE registers (or we are simply doing a BPF_K
14917  * check, in which case we have a fake SCALAR_VALUE representing insn->imm).
14918  * Technically we can do similar adjustments for pointers to the same object,
14919  * but we don't support that right now.
14920  */
14921 static int reg_set_min_max(struct bpf_verifier_env *env,
14922                            struct bpf_reg_state *true_reg1,
14923                            struct bpf_reg_state *true_reg2,
14924                            struct bpf_reg_state *false_reg1,
14925                            struct bpf_reg_state *false_reg2,
14926                            u8 opcode, bool is_jmp32)
14927 {
14928         int err;
14929 
14930         /* If either register is a pointer, we can't learn anything about its
14931          * variable offset from the compare (unless they were a pointer into
14932          * the same object, but we don't bother with that).
14933          */
14934         if (false_reg1->type != SCALAR_VALUE || false_reg2->type != SCALAR_VALUE)
14935                 return 0;
14936 
14937         /* fallthrough (FALSE) branch */
14938         regs_refine_cond_op(false_reg1, false_reg2, rev_opcode(opcode), is_jmp32);
14939         reg_bounds_sync(false_reg1);
14940         reg_bounds_sync(false_reg2);
14941 
14942         /* jump (TRUE) branch */
14943         regs_refine_cond_op(true_reg1, true_reg2, opcode, is_jmp32);
14944         reg_bounds_sync(true_reg1);
14945         reg_bounds_sync(true_reg2);
14946 
14947         err = reg_bounds_sanity_check(env, true_reg1, "true_reg1");
14948         err = err ?: reg_bounds_sanity_check(env, true_reg2, "true_reg2");
14949         err = err ?: reg_bounds_sanity_check(env, false_reg1, "false_reg1");
14950         err = err ?: reg_bounds_sanity_check(env, false_reg2, "false_reg2");
14951         return err;
14952 }
14953 
14954 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
14955                                  struct bpf_reg_state *reg, u32 id,
14956                                  bool is_null)
14957 {
14958         if (type_may_be_null(reg->type) && reg->id == id &&
14959             (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
14960                 /* Old offset (both fixed and variable parts) should have been
14961                  * known-zero, because we don't allow pointer arithmetic on
14962                  * pointers that might be NULL. If we see this happening, don't
14963                  * convert the register.
14964                  *
14965                  * But in some cases, some helpers that return local kptrs
14966                  * advance offset for the returned pointer. In those cases, it
14967                  * is fine to expect to see reg->off.
14968                  */
14969                 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
14970                         return;
14971                 if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
14972                     WARN_ON_ONCE(reg->off))
14973                         return;
14974 
14975                 if (is_null) {
14976                         reg->type = SCALAR_VALUE;
14977                         /* We don't need id and ref_obj_id from this point
14978                          * onwards anymore, thus we should better reset it,
14979                          * so that state pruning has chances to take effect.
14980                          */
14981                         reg->id = 0;
14982                         reg->ref_obj_id = 0;
14983 
14984                         return;
14985                 }
14986 
14987                 mark_ptr_not_null_reg(reg);
14988 
14989                 if (!reg_may_point_to_spin_lock(reg)) {
14990                         /* For not-NULL ptr, reg->ref_obj_id will be reset
14991                          * in release_reference().
14992                          *
14993                          * reg->id is still used by spin_lock ptr. Other
14994                          * than spin_lock ptr type, reg->id can be reset.
14995                          */
14996                         reg->id = 0;
14997                 }
14998         }
14999 }
15000 
15001 /* The logic is similar to find_good_pkt_pointers(), both could eventually
15002  * be folded together at some point.
15003  */
15004 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
15005                                   bool is_null)
15006 {
15007         struct bpf_func_state *state = vstate->frame[vstate->curframe];
15008         struct bpf_reg_state *regs = state->regs, *reg;
15009         u32 ref_obj_id = regs[regno].ref_obj_id;
15010         u32 id = regs[regno].id;
15011 
15012         if (ref_obj_id && ref_obj_id == id && is_null)
15013                 /* regs[regno] is in the " == NULL" branch.
15014                  * No one could have freed the reference state before
15015                  * doing the NULL check.
15016                  */
15017                 WARN_ON_ONCE(release_reference_state(state, id));
15018 
15019         bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15020                 mark_ptr_or_null_reg(state, reg, id, is_null);
15021         }));
15022 }
15023 
15024 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
15025                                    struct bpf_reg_state *dst_reg,
15026                                    struct bpf_reg_state *src_reg,
15027                                    struct bpf_verifier_state *this_branch,
15028                                    struct bpf_verifier_state *other_branch)
15029 {
15030         if (BPF_SRC(insn->code) != BPF_X)
15031                 return false;
15032 
15033         /* Pointers are always 64-bit. */
15034         if (BPF_CLASS(insn->code) == BPF_JMP32)
15035                 return false;
15036 
15037         switch (BPF_OP(insn->code)) {
15038         case BPF_JGT:
15039                 if ((dst_reg->type == PTR_TO_PACKET &&
15040                      src_reg->type == PTR_TO_PACKET_END) ||
15041                     (dst_reg->type == PTR_TO_PACKET_META &&
15042                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15043                         /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
15044                         find_good_pkt_pointers(this_branch, dst_reg,
15045                                                dst_reg->type, false);
15046                         mark_pkt_end(other_branch, insn->dst_reg, true);
15047                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
15048                             src_reg->type == PTR_TO_PACKET) ||
15049                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15050                             src_reg->type == PTR_TO_PACKET_META)) {
15051                         /* pkt_end > pkt_data', pkt_data > pkt_meta' */
15052                         find_good_pkt_pointers(other_branch, src_reg,
15053                                                src_reg->type, true);
15054                         mark_pkt_end(this_branch, insn->src_reg, false);
15055                 } else {
15056                         return false;
15057                 }
15058                 break;
15059         case BPF_JLT:
15060                 if ((dst_reg->type == PTR_TO_PACKET &&
15061                      src_reg->type == PTR_TO_PACKET_END) ||
15062                     (dst_reg->type == PTR_TO_PACKET_META &&
15063                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15064                         /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
15065                         find_good_pkt_pointers(other_branch, dst_reg,
15066                                                dst_reg->type, true);
15067                         mark_pkt_end(this_branch, insn->dst_reg, false);
15068                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
15069                             src_reg->type == PTR_TO_PACKET) ||
15070                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15071                             src_reg->type == PTR_TO_PACKET_META)) {
15072                         /* pkt_end < pkt_data', pkt_data > pkt_meta' */
15073                         find_good_pkt_pointers(this_branch, src_reg,
15074                                                src_reg->type, false);
15075                         mark_pkt_end(other_branch, insn->src_reg, true);
15076                 } else {
15077                         return false;
15078                 }
15079                 break;
15080         case BPF_JGE:
15081                 if ((dst_reg->type == PTR_TO_PACKET &&
15082                      src_reg->type == PTR_TO_PACKET_END) ||
15083                     (dst_reg->type == PTR_TO_PACKET_META &&
15084                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15085                         /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
15086                         find_good_pkt_pointers(this_branch, dst_reg,
15087                                                dst_reg->type, true);
15088                         mark_pkt_end(other_branch, insn->dst_reg, false);
15089                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
15090                             src_reg->type == PTR_TO_PACKET) ||
15091                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15092                             src_reg->type == PTR_TO_PACKET_META)) {
15093                         /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
15094                         find_good_pkt_pointers(other_branch, src_reg,
15095                                                src_reg->type, false);
15096                         mark_pkt_end(this_branch, insn->src_reg, true);
15097                 } else {
15098                         return false;
15099                 }
15100                 break;
15101         case BPF_JLE:
15102                 if ((dst_reg->type == PTR_TO_PACKET &&
15103                      src_reg->type == PTR_TO_PACKET_END) ||
15104                     (dst_reg->type == PTR_TO_PACKET_META &&
15105                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15106                         /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
15107                         find_good_pkt_pointers(other_branch, dst_reg,
15108                                                dst_reg->type, false);
15109                         mark_pkt_end(this_branch, insn->dst_reg, true);
15110                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
15111                             src_reg->type == PTR_TO_PACKET) ||
15112                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15113                             src_reg->type == PTR_TO_PACKET_META)) {
15114                         /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
15115                         find_good_pkt_pointers(this_branch, src_reg,
15116                                                src_reg->type, true);
15117                         mark_pkt_end(other_branch, insn->src_reg, false);
15118                 } else {
15119                         return false;
15120                 }
15121                 break;
15122         default:
15123                 return false;
15124         }
15125 
15126         return true;
15127 }
15128 
15129 static void find_equal_scalars(struct bpf_verifier_state *vstate,
15130                                struct bpf_reg_state *known_reg)
15131 {
15132         struct bpf_reg_state fake_reg;
15133         struct bpf_func_state *state;
15134         struct bpf_reg_state *reg;
15135 
15136         bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15137                 if (reg->type != SCALAR_VALUE || reg == known_reg)
15138                         continue;
15139                 if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
15140                         continue;
15141                 if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
15142                     reg->off == known_reg->off) {
15143                         copy_register_state(reg, known_reg);
15144                 } else {
15145                         s32 saved_off = reg->off;
15146 
15147                         fake_reg.type = SCALAR_VALUE;
15148                         __mark_reg_known(&fake_reg, (s32)reg->off - (s32)known_reg->off);
15149 
15150                         /* reg = known_reg; reg += delta */
15151                         copy_register_state(reg, known_reg);
15152                         /*
15153                          * Must preserve off, id and add_const flag,
15154                          * otherwise another find_equal_scalars() will be incorrect.
15155                          */
15156                         reg->off = saved_off;
15157 
15158                         scalar32_min_max_add(reg, &fake_reg);
15159                         scalar_min_max_add(reg, &fake_reg);
15160                         reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
15161                 }
15162         }));
15163 }
15164 
15165 static int check_cond_jmp_op(struct bpf_verifier_env *env,
15166                              struct bpf_insn *insn, int *insn_idx)
15167 {
15168         struct bpf_verifier_state *this_branch = env->cur_state;
15169         struct bpf_verifier_state *other_branch;
15170         struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
15171         struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
15172         struct bpf_reg_state *eq_branch_regs;
15173         u8 opcode = BPF_OP(insn->code);
15174         bool is_jmp32;
15175         int pred = -1;
15176         int err;
15177 
15178         /* Only conditional jumps are expected to reach here. */
15179         if (opcode == BPF_JA || opcode > BPF_JCOND) {
15180                 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
15181                 return -EINVAL;
15182         }
15183 
15184         if (opcode == BPF_JCOND) {
15185                 struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
15186                 int idx = *insn_idx;
15187 
15188                 if (insn->code != (BPF_JMP | BPF_JCOND) ||
15189                     insn->src_reg != BPF_MAY_GOTO ||
15190                     insn->dst_reg || insn->imm || insn->off == 0) {
15191                         verbose(env, "invalid may_goto off %d imm %d\n",
15192                                 insn->off, insn->imm);
15193                         return -EINVAL;
15194                 }
15195                 prev_st = find_prev_entry(env, cur_st->parent, idx);
15196 
15197                 /* branch out 'fallthrough' insn as a new state to explore */
15198                 queued_st = push_stack(env, idx + 1, idx, false);
15199                 if (!queued_st)
15200                         return -ENOMEM;
15201 
15202                 queued_st->may_goto_depth++;
15203                 if (prev_st)
15204                         widen_imprecise_scalars(env, prev_st, queued_st);
15205                 *insn_idx += insn->off;
15206                 return 0;
15207         }
15208 
15209         /* check src2 operand */
15210         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15211         if (err)
15212                 return err;
15213 
15214         dst_reg = &regs[insn->dst_reg];
15215         if (BPF_SRC(insn->code) == BPF_X) {
15216                 if (insn->imm != 0) {
15217                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
15218                         return -EINVAL;
15219                 }
15220 
15221                 /* check src1 operand */
15222                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
15223                 if (err)
15224                         return err;
15225 
15226                 src_reg = &regs[insn->src_reg];
15227                 if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) &&
15228                     is_pointer_value(env, insn->src_reg)) {
15229                         verbose(env, "R%d pointer comparison prohibited\n",
15230                                 insn->src_reg);
15231                         return -EACCES;
15232                 }
15233         } else {
15234                 if (insn->src_reg != BPF_REG_0) {
15235                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
15236                         return -EINVAL;
15237                 }
15238                 src_reg = &env->fake_reg[0];
15239                 memset(src_reg, 0, sizeof(*src_reg));
15240                 src_reg->type = SCALAR_VALUE;
15241                 __mark_reg_known(src_reg, insn->imm);
15242         }
15243 
15244         is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
15245         pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32);
15246         if (pred >= 0) {
15247                 /* If we get here with a dst_reg pointer type it is because
15248                  * above is_branch_taken() special cased the 0 comparison.
15249                  */
15250                 if (!__is_pointer_value(false, dst_reg))
15251                         err = mark_chain_precision(env, insn->dst_reg);
15252                 if (BPF_SRC(insn->code) == BPF_X && !err &&
15253                     !__is_pointer_value(false, src_reg))
15254                         err = mark_chain_precision(env, insn->src_reg);
15255                 if (err)
15256                         return err;
15257         }
15258 
15259         if (pred == 1) {
15260                 /* Only follow the goto, ignore fall-through. If needed, push
15261                  * the fall-through branch for simulation under speculative
15262                  * execution.
15263                  */
15264                 if (!env->bypass_spec_v1 &&
15265                     !sanitize_speculative_path(env, insn, *insn_idx + 1,
15266                                                *insn_idx))
15267                         return -EFAULT;
15268                 if (env->log.level & BPF_LOG_LEVEL)
15269                         print_insn_state(env, this_branch->frame[this_branch->curframe]);
15270                 *insn_idx += insn->off;
15271                 return 0;
15272         } else if (pred == 0) {
15273                 /* Only follow the fall-through branch, since that's where the
15274                  * program will go. If needed, push the goto branch for
15275                  * simulation under speculative execution.
15276                  */
15277                 if (!env->bypass_spec_v1 &&
15278                     !sanitize_speculative_path(env, insn,
15279                                                *insn_idx + insn->off + 1,
15280                                                *insn_idx))
15281                         return -EFAULT;
15282                 if (env->log.level & BPF_LOG_LEVEL)
15283                         print_insn_state(env, this_branch->frame[this_branch->curframe]);
15284                 return 0;
15285         }
15286 
15287         other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
15288                                   false);
15289         if (!other_branch)
15290                 return -EFAULT;
15291         other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
15292 
15293         if (BPF_SRC(insn->code) == BPF_X) {
15294                 err = reg_set_min_max(env,
15295                                       &other_branch_regs[insn->dst_reg],
15296                                       &other_branch_regs[insn->src_reg],
15297                                       dst_reg, src_reg, opcode, is_jmp32);
15298         } else /* BPF_SRC(insn->code) == BPF_K */ {
15299                 /* reg_set_min_max() can mangle the fake_reg. Make a copy
15300                  * so that these are two different memory locations. The
15301                  * src_reg is not used beyond here in context of K.
15302                  */
15303                 memcpy(&env->fake_reg[1], &env->fake_reg[0],
15304                        sizeof(env->fake_reg[0]));
15305                 err = reg_set_min_max(env,
15306                                       &other_branch_regs[insn->dst_reg],
15307                                       &env->fake_reg[0],
15308                                       dst_reg, &env->fake_reg[1],
15309                                       opcode, is_jmp32);
15310         }
15311         if (err)
15312                 return err;
15313 
15314         if (BPF_SRC(insn->code) == BPF_X &&
15315             src_reg->type == SCALAR_VALUE && src_reg->id &&
15316             !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
15317                 find_equal_scalars(this_branch, src_reg);
15318                 find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
15319         }
15320         if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
15321             !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
15322                 find_equal_scalars(this_branch, dst_reg);
15323                 find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
15324         }
15325 
15326         /* if one pointer register is compared to another pointer
15327          * register check if PTR_MAYBE_NULL could be lifted.
15328          * E.g. register A - maybe null
15329          *      register B - not null
15330          * for JNE A, B, ... - A is not null in the false branch;
15331          * for JEQ A, B, ... - A is not null in the true branch.
15332          *
15333          * Since PTR_TO_BTF_ID points to a kernel struct that does
15334          * not need to be null checked by the BPF program, i.e.,
15335          * could be null even without PTR_MAYBE_NULL marking, so
15336          * only propagate nullness when neither reg is that type.
15337          */
15338         if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
15339             __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
15340             type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
15341             base_type(src_reg->type) != PTR_TO_BTF_ID &&
15342             base_type(dst_reg->type) != PTR_TO_BTF_ID) {
15343                 eq_branch_regs = NULL;
15344                 switch (opcode) {
15345                 case BPF_JEQ:
15346                         eq_branch_regs = other_branch_regs;
15347                         break;
15348                 case BPF_JNE:
15349                         eq_branch_regs = regs;
15350                         break;
15351                 default:
15352                         /* do nothing */
15353                         break;
15354                 }
15355                 if (eq_branch_regs) {
15356                         if (type_may_be_null(src_reg->type))
15357                                 mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
15358                         else
15359                                 mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
15360                 }
15361         }
15362 
15363         /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
15364          * NOTE: these optimizations below are related with pointer comparison
15365          *       which will never be JMP32.
15366          */
15367         if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
15368             insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
15369             type_may_be_null(dst_reg->type)) {
15370                 /* Mark all identical registers in each branch as either
15371                  * safe or unknown depending R == 0 or R != 0 conditional.
15372                  */
15373                 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
15374                                       opcode == BPF_JNE);
15375                 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
15376                                       opcode == BPF_JEQ);
15377         } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
15378                                            this_branch, other_branch) &&
15379                    is_pointer_value(env, insn->dst_reg)) {
15380                 verbose(env, "R%d pointer comparison prohibited\n",
15381                         insn->dst_reg);
15382                 return -EACCES;
15383         }
15384         if (env->log.level & BPF_LOG_LEVEL)
15385                 print_insn_state(env, this_branch->frame[this_branch->curframe]);
15386         return 0;
15387 }
15388 
15389 /* verify BPF_LD_IMM64 instruction */
15390 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
15391 {
15392         struct bpf_insn_aux_data *aux = cur_aux(env);
15393         struct bpf_reg_state *regs = cur_regs(env);
15394         struct bpf_reg_state *dst_reg;
15395         struct bpf_map *map;
15396         int err;
15397 
15398         if (BPF_SIZE(insn->code) != BPF_DW) {
15399                 verbose(env, "invalid BPF_LD_IMM insn\n");
15400                 return -EINVAL;
15401         }
15402         if (insn->off != 0) {
15403                 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
15404                 return -EINVAL;
15405         }
15406 
15407         err = check_reg_arg(env, insn->dst_reg, DST_OP);
15408         if (err)
15409                 return err;
15410 
15411         dst_reg = &regs[insn->dst_reg];
15412         if (insn->src_reg == 0) {
15413                 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
15414 
15415                 dst_reg->type = SCALAR_VALUE;
15416                 __mark_reg_known(&regs[insn->dst_reg], imm);
15417                 return 0;
15418         }
15419 
15420         /* All special src_reg cases are listed below. From this point onwards
15421          * we either succeed and assign a corresponding dst_reg->type after
15422          * zeroing the offset, or fail and reject the program.
15423          */
15424         mark_reg_known_zero(env, regs, insn->dst_reg);
15425 
15426         if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
15427                 dst_reg->type = aux->btf_var.reg_type;
15428                 switch (base_type(dst_reg->type)) {
15429                 case PTR_TO_MEM:
15430                         dst_reg->mem_size = aux->btf_var.mem_size;
15431                         break;
15432                 case PTR_TO_BTF_ID:
15433                         dst_reg->btf = aux->btf_var.btf;
15434                         dst_reg->btf_id = aux->btf_var.btf_id;
15435                         break;
15436                 default:
15437                         verbose(env, "bpf verifier is misconfigured\n");
15438                         return -EFAULT;
15439                 }
15440                 return 0;
15441         }
15442 
15443         if (insn->src_reg == BPF_PSEUDO_FUNC) {
15444                 struct bpf_prog_aux *aux = env->prog->aux;
15445                 u32 subprogno = find_subprog(env,
15446                                              env->insn_idx + insn->imm + 1);
15447 
15448                 if (!aux->func_info) {
15449                         verbose(env, "missing btf func_info\n");
15450                         return -EINVAL;
15451                 }
15452                 if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
15453                         verbose(env, "callback function not static\n");
15454                         return -EINVAL;
15455                 }
15456 
15457                 dst_reg->type = PTR_TO_FUNC;
15458                 dst_reg->subprogno = subprogno;
15459                 return 0;
15460         }
15461 
15462         map = env->used_maps[aux->map_index];
15463         dst_reg->map_ptr = map;
15464 
15465         if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
15466             insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
15467                 if (map->map_type == BPF_MAP_TYPE_ARENA) {
15468                         __mark_reg_unknown(env, dst_reg);
15469                         return 0;
15470                 }
15471                 dst_reg->type = PTR_TO_MAP_VALUE;
15472                 dst_reg->off = aux->map_off;
15473                 WARN_ON_ONCE(map->max_entries != 1);
15474                 /* We want reg->id to be same (0) as map_value is not distinct */
15475         } else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
15476                    insn->src_reg == BPF_PSEUDO_MAP_IDX) {
15477                 dst_reg->type = CONST_PTR_TO_MAP;
15478         } else {
15479                 verbose(env, "bpf verifier is misconfigured\n");
15480                 return -EINVAL;
15481         }
15482 
15483         return 0;
15484 }
15485 
15486 static bool may_access_skb(enum bpf_prog_type type)
15487 {
15488         switch (type) {
15489         case BPF_PROG_TYPE_SOCKET_FILTER:
15490         case BPF_PROG_TYPE_SCHED_CLS:
15491         case BPF_PROG_TYPE_SCHED_ACT:
15492                 return true;
15493         default:
15494                 return false;
15495         }
15496 }
15497 
15498 /* verify safety of LD_ABS|LD_IND instructions:
15499  * - they can only appear in the programs where ctx == skb
15500  * - since they are wrappers of function calls, they scratch R1-R5 registers,
15501  *   preserve R6-R9, and store return value into R0
15502  *
15503  * Implicit input:
15504  *   ctx == skb == R6 == CTX
15505  *
15506  * Explicit input:
15507  *   SRC == any register
15508  *   IMM == 32-bit immediate
15509  *
15510  * Output:
15511  *   R0 - 8/16/32-bit skb data converted to cpu endianness
15512  */
15513 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
15514 {
15515         struct bpf_reg_state *regs = cur_regs(env);
15516         static const int ctx_reg = BPF_REG_6;
15517         u8 mode = BPF_MODE(insn->code);
15518         int i, err;
15519 
15520         if (!may_access_skb(resolve_prog_type(env->prog))) {
15521                 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
15522                 return -EINVAL;
15523         }
15524 
15525         if (!env->ops->gen_ld_abs) {
15526                 verbose(env, "bpf verifier is misconfigured\n");
15527                 return -EINVAL;
15528         }
15529 
15530         if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
15531             BPF_SIZE(insn->code) == BPF_DW ||
15532             (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
15533                 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
15534                 return -EINVAL;
15535         }
15536 
15537         /* check whether implicit source operand (register R6) is readable */
15538         err = check_reg_arg(env, ctx_reg, SRC_OP);
15539         if (err)
15540                 return err;
15541 
15542         /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
15543          * gen_ld_abs() may terminate the program at runtime, leading to
15544          * reference leak.
15545          */
15546         err = check_reference_leak(env, false);
15547         if (err) {
15548                 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
15549                 return err;
15550         }
15551 
15552         if (env->cur_state->active_lock.ptr) {
15553                 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
15554                 return -EINVAL;
15555         }
15556 
15557         if (env->cur_state->active_rcu_lock) {
15558                 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
15559                 return -EINVAL;
15560         }
15561 
15562         if (env->cur_state->active_preempt_lock) {
15563                 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_preempt_disable-ed region\n");
15564                 return -EINVAL;
15565         }
15566 
15567         if (regs[ctx_reg].type != PTR_TO_CTX) {
15568                 verbose(env,
15569                         "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
15570                 return -EINVAL;
15571         }
15572 
15573         if (mode == BPF_IND) {
15574                 /* check explicit source operand */
15575                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
15576                 if (err)
15577                         return err;
15578         }
15579 
15580         err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
15581         if (err < 0)
15582                 return err;
15583 
15584         /* reset caller saved regs to unreadable */
15585         for (i = 0; i < CALLER_SAVED_REGS; i++) {
15586                 mark_reg_not_init(env, regs, caller_saved[i]);
15587                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
15588         }
15589 
15590         /* mark destination R0 register as readable, since it contains
15591          * the value fetched from the packet.
15592          * Already marked as written above.
15593          */
15594         mark_reg_unknown(env, regs, BPF_REG_0);
15595         /* ld_abs load up to 32-bit skb data. */
15596         regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
15597         return 0;
15598 }
15599 
15600 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
15601 {
15602         const char *exit_ctx = "At program exit";
15603         struct tnum enforce_attach_type_range = tnum_unknown;
15604         const struct bpf_prog *prog = env->prog;
15605         struct bpf_reg_state *reg;
15606         struct bpf_retval_range range = retval_range(0, 1);
15607         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
15608         int err;
15609         struct bpf_func_state *frame = env->cur_state->frame[0];
15610         const bool is_subprog = frame->subprogno;
15611         bool return_32bit = false;
15612 
15613         /* LSM and struct_ops func-ptr's return type could be "void" */
15614         if (!is_subprog || frame->in_exception_callback_fn) {
15615                 switch (prog_type) {
15616                 case BPF_PROG_TYPE_LSM:
15617                         if (prog->expected_attach_type == BPF_LSM_CGROUP)
15618                                 /* See below, can be 0 or 0-1 depending on hook. */
15619                                 break;
15620                         fallthrough;
15621                 case BPF_PROG_TYPE_STRUCT_OPS:
15622                         if (!prog->aux->attach_func_proto->type)
15623                                 return 0;
15624                         break;
15625                 default:
15626                         break;
15627                 }
15628         }
15629 
15630         /* eBPF calling convention is such that R0 is used
15631          * to return the value from eBPF program.
15632          * Make sure that it's readable at this time
15633          * of bpf_exit, which means that program wrote
15634          * something into it earlier
15635          */
15636         err = check_reg_arg(env, regno, SRC_OP);
15637         if (err)
15638                 return err;
15639 
15640         if (is_pointer_value(env, regno)) {
15641                 verbose(env, "R%d leaks addr as return value\n", regno);
15642                 return -EACCES;
15643         }
15644 
15645         reg = cur_regs(env) + regno;
15646 
15647         if (frame->in_async_callback_fn) {
15648                 /* enforce return zero from async callbacks like timer */
15649                 exit_ctx = "At async callback return";
15650                 range = retval_range(0, 0);
15651                 goto enforce_retval;
15652         }
15653 
15654         if (is_subprog && !frame->in_exception_callback_fn) {
15655                 if (reg->type != SCALAR_VALUE) {
15656                         verbose(env, "At subprogram exit the register R%d is not a scalar value (%s)\n",
15657                                 regno, reg_type_str(env, reg->type));
15658                         return -EINVAL;
15659                 }
15660                 return 0;
15661         }
15662 
15663         switch (prog_type) {
15664         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
15665                 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
15666                     env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
15667                     env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG ||
15668                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
15669                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
15670                     env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME ||
15671                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
15672                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME ||
15673                     env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
15674                         range = retval_range(1, 1);
15675                 if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
15676                     env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
15677                         range = retval_range(0, 3);
15678                 break;
15679         case BPF_PROG_TYPE_CGROUP_SKB:
15680                 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
15681                         range = retval_range(0, 3);
15682                         enforce_attach_type_range = tnum_range(2, 3);
15683                 }
15684                 break;
15685         case BPF_PROG_TYPE_CGROUP_SOCK:
15686         case BPF_PROG_TYPE_SOCK_OPS:
15687         case BPF_PROG_TYPE_CGROUP_DEVICE:
15688         case BPF_PROG_TYPE_CGROUP_SYSCTL:
15689         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
15690                 break;
15691         case BPF_PROG_TYPE_RAW_TRACEPOINT:
15692                 if (!env->prog->aux->attach_btf_id)
15693                         return 0;
15694                 range = retval_range(0, 0);
15695                 break;
15696         case BPF_PROG_TYPE_TRACING:
15697                 switch (env->prog->expected_attach_type) {
15698                 case BPF_TRACE_FENTRY:
15699                 case BPF_TRACE_FEXIT:
15700                         range = retval_range(0, 0);
15701                         break;
15702                 case BPF_TRACE_RAW_TP:
15703                 case BPF_MODIFY_RETURN:
15704                         return 0;
15705                 case BPF_TRACE_ITER:
15706                         break;
15707                 default:
15708                         return -ENOTSUPP;
15709                 }
15710                 break;
15711         case BPF_PROG_TYPE_SK_LOOKUP:
15712                 range = retval_range(SK_DROP, SK_PASS);
15713                 break;
15714 
15715         case BPF_PROG_TYPE_LSM:
15716                 if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
15717                         /* no range found, any return value is allowed */
15718                         if (!get_func_retval_range(env->prog, &range))
15719                                 return 0;
15720                         /* no restricted range, any return value is allowed */
15721                         if (range.minval == S32_MIN && range.maxval == S32_MAX)
15722                                 return 0;
15723                         return_32bit = true;
15724                 } else if (!env->prog->aux->attach_func_proto->type) {
15725                         /* Make sure programs that attach to void
15726                          * hooks don't try to modify return value.
15727                          */
15728                         range = retval_range(1, 1);
15729                 }
15730                 break;
15731 
15732         case BPF_PROG_TYPE_NETFILTER:
15733                 range = retval_range(NF_DROP, NF_ACCEPT);
15734                 break;
15735         case BPF_PROG_TYPE_EXT:
15736                 /* freplace program can return anything as its return value
15737                  * depends on the to-be-replaced kernel func or bpf program.
15738                  */
15739         default:
15740                 return 0;
15741         }
15742 
15743 enforce_retval:
15744         if (reg->type != SCALAR_VALUE) {
15745                 verbose(env, "%s the register R%d is not a known value (%s)\n",
15746                         exit_ctx, regno, reg_type_str(env, reg->type));
15747                 return -EINVAL;
15748         }
15749 
15750         err = mark_chain_precision(env, regno);
15751         if (err)
15752                 return err;
15753 
15754         if (!retval_range_within(range, reg, return_32bit)) {
15755                 verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
15756                 if (!is_subprog &&
15757                     prog->expected_attach_type == BPF_LSM_CGROUP &&
15758                     prog_type == BPF_PROG_TYPE_LSM &&
15759                     !prog->aux->attach_func_proto->type)
15760                         verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
15761                 return -EINVAL;
15762         }
15763 
15764         if (!tnum_is_unknown(enforce_attach_type_range) &&
15765             tnum_in(enforce_attach_type_range, reg->var_off))
15766                 env->prog->enforce_expected_attach_type = 1;
15767         return 0;
15768 }
15769 
15770 /* non-recursive DFS pseudo code
15771  * 1  procedure DFS-iterative(G,v):
15772  * 2      label v as discovered
15773  * 3      let S be a stack
15774  * 4      S.push(v)
15775  * 5      while S is not empty
15776  * 6            t <- S.peek()
15777  * 7            if t is what we're looking for:
15778  * 8                return t
15779  * 9            for all edges e in G.adjacentEdges(t) do
15780  * 10               if edge e is already labelled
15781  * 11                   continue with the next edge
15782  * 12               w <- G.adjacentVertex(t,e)
15783  * 13               if vertex w is not discovered and not explored
15784  * 14                   label e as tree-edge
15785  * 15                   label w as discovered
15786  * 16                   S.push(w)
15787  * 17                   continue at 5
15788  * 18               else if vertex w is discovered
15789  * 19                   label e as back-edge
15790  * 20               else
15791  * 21                   // vertex w is explored
15792  * 22                   label e as forward- or cross-edge
15793  * 23           label t as explored
15794  * 24           S.pop()
15795  *
15796  * convention:
15797  * 0x10 - discovered
15798  * 0x11 - discovered and fall-through edge labelled
15799  * 0x12 - discovered and fall-through and branch edges labelled
15800  * 0x20 - explored
15801  */
15802 
15803 enum {
15804         DISCOVERED = 0x10,
15805         EXPLORED = 0x20,
15806         FALLTHROUGH = 1,
15807         BRANCH = 2,
15808 };
15809 
15810 static void mark_prune_point(struct bpf_verifier_env *env, int idx)
15811 {
15812         env->insn_aux_data[idx].prune_point = true;
15813 }
15814 
15815 static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
15816 {
15817         return env->insn_aux_data[insn_idx].prune_point;
15818 }
15819 
15820 static void mark_force_checkpoint(struct bpf_verifier_env *env, int idx)
15821 {
15822         env->insn_aux_data[idx].force_checkpoint = true;
15823 }
15824 
15825 static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
15826 {
15827         return env->insn_aux_data[insn_idx].force_checkpoint;
15828 }
15829 
15830 static void mark_calls_callback(struct bpf_verifier_env *env, int idx)
15831 {
15832         env->insn_aux_data[idx].calls_callback = true;
15833 }
15834 
15835 static bool calls_callback(struct bpf_verifier_env *env, int insn_idx)
15836 {
15837         return env->insn_aux_data[insn_idx].calls_callback;
15838 }
15839 
15840 enum {
15841         DONE_EXPLORING = 0,
15842         KEEP_EXPLORING = 1,
15843 };
15844 
15845 /* t, w, e - match pseudo-code above:
15846  * t - index of current instruction
15847  * w - next instruction
15848  * e - edge
15849  */
15850 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
15851 {
15852         int *insn_stack = env->cfg.insn_stack;
15853         int *insn_state = env->cfg.insn_state;
15854 
15855         if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
15856                 return DONE_EXPLORING;
15857 
15858         if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
15859                 return DONE_EXPLORING;
15860 
15861         if (w < 0 || w >= env->prog->len) {
15862                 verbose_linfo(env, t, "%d: ", t);
15863                 verbose(env, "jump out of range from insn %d to %d\n", t, w);
15864                 return -EINVAL;
15865         }
15866 
15867         if (e == BRANCH) {
15868                 /* mark branch target for state pruning */
15869                 mark_prune_point(env, w);
15870                 mark_jmp_point(env, w);
15871         }
15872 
15873         if (insn_state[w] == 0) {
15874                 /* tree-edge */
15875                 insn_state[t] = DISCOVERED | e;
15876                 insn_state[w] = DISCOVERED;
15877                 if (env->cfg.cur_stack >= env->prog->len)
15878                         return -E2BIG;
15879                 insn_stack[env->cfg.cur_stack++] = w;
15880                 return KEEP_EXPLORING;
15881         } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
15882                 if (env->bpf_capable)
15883                         return DONE_EXPLORING;
15884                 verbose_linfo(env, t, "%d: ", t);
15885                 verbose_linfo(env, w, "%d: ", w);
15886                 verbose(env, "back-edge from insn %d to %d\n", t, w);
15887                 return -EINVAL;
15888         } else if (insn_state[w] == EXPLORED) {
15889                 /* forward- or cross-edge */
15890                 insn_state[t] = DISCOVERED | e;
15891         } else {
15892                 verbose(env, "insn state internal bug\n");
15893                 return -EFAULT;
15894         }
15895         return DONE_EXPLORING;
15896 }
15897 
15898 static int visit_func_call_insn(int t, struct bpf_insn *insns,
15899                                 struct bpf_verifier_env *env,
15900                                 bool visit_callee)
15901 {
15902         int ret, insn_sz;
15903 
15904         insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
15905         ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
15906         if (ret)
15907                 return ret;
15908 
15909         mark_prune_point(env, t + insn_sz);
15910         /* when we exit from subprog, we need to record non-linear history */
15911         mark_jmp_point(env, t + insn_sz);
15912 
15913         if (visit_callee) {
15914                 mark_prune_point(env, t);
15915                 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
15916         }
15917         return ret;
15918 }
15919 
15920 /* Visits the instruction at index t and returns one of the following:
15921  *  < 0 - an error occurred
15922  *  DONE_EXPLORING - the instruction was fully explored
15923  *  KEEP_EXPLORING - there is still work to be done before it is fully explored
15924  */
15925 static int visit_insn(int t, struct bpf_verifier_env *env)
15926 {
15927         struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
15928         int ret, off, insn_sz;
15929 
15930         if (bpf_pseudo_func(insn))
15931                 return visit_func_call_insn(t, insns, env, true);
15932 
15933         /* All non-branch instructions have a single fall-through edge. */
15934         if (BPF_CLASS(insn->code) != BPF_JMP &&
15935             BPF_CLASS(insn->code) != BPF_JMP32) {
15936                 insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
15937                 return push_insn(t, t + insn_sz, FALLTHROUGH, env);
15938         }
15939 
15940         switch (BPF_OP(insn->code)) {
15941         case BPF_EXIT:
15942                 return DONE_EXPLORING;
15943 
15944         case BPF_CALL:
15945                 if (is_async_callback_calling_insn(insn))
15946                         /* Mark this call insn as a prune point to trigger
15947                          * is_state_visited() check before call itself is
15948                          * processed by __check_func_call(). Otherwise new
15949                          * async state will be pushed for further exploration.
15950                          */
15951                         mark_prune_point(env, t);
15952                 /* For functions that invoke callbacks it is not known how many times
15953                  * callback would be called. Verifier models callback calling functions
15954                  * by repeatedly visiting callback bodies and returning to origin call
15955                  * instruction.
15956                  * In order to stop such iteration verifier needs to identify when a
15957                  * state identical some state from a previous iteration is reached.
15958                  * Check below forces creation of checkpoint before callback calling
15959                  * instruction to allow search for such identical states.
15960                  */
15961                 if (is_sync_callback_calling_insn(insn)) {
15962                         mark_calls_callback(env, t);
15963                         mark_force_checkpoint(env, t);
15964                         mark_prune_point(env, t);
15965                         mark_jmp_point(env, t);
15966                 }
15967                 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
15968                         struct bpf_kfunc_call_arg_meta meta;
15969 
15970                         ret = fetch_kfunc_meta(env, insn, &meta, NULL);
15971                         if (ret == 0 && is_iter_next_kfunc(&meta)) {
15972                                 mark_prune_point(env, t);
15973                                 /* Checking and saving state checkpoints at iter_next() call
15974                                  * is crucial for fast convergence of open-coded iterator loop
15975                                  * logic, so we need to force it. If we don't do that,
15976                                  * is_state_visited() might skip saving a checkpoint, causing
15977                                  * unnecessarily long sequence of not checkpointed
15978                                  * instructions and jumps, leading to exhaustion of jump
15979                                  * history buffer, and potentially other undesired outcomes.
15980                                  * It is expected that with correct open-coded iterators
15981                                  * convergence will happen quickly, so we don't run a risk of
15982                                  * exhausting memory.
15983                                  */
15984                                 mark_force_checkpoint(env, t);
15985                         }
15986                 }
15987                 return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
15988 
15989         case BPF_JA:
15990                 if (BPF_SRC(insn->code) != BPF_K)
15991                         return -EINVAL;
15992 
15993                 if (BPF_CLASS(insn->code) == BPF_JMP)
15994                         off = insn->off;
15995                 else
15996                         off = insn->imm;
15997 
15998                 /* unconditional jump with single edge */
15999                 ret = push_insn(t, t + off + 1, FALLTHROUGH, env);
16000                 if (ret)
16001                         return ret;
16002 
16003                 mark_prune_point(env, t + off + 1);
16004                 mark_jmp_point(env, t + off + 1);
16005 
16006                 return ret;
16007 
16008         default:
16009                 /* conditional jump with two edges */
16010                 mark_prune_point(env, t);
16011                 if (is_may_goto_insn(insn))
16012                         mark_force_checkpoint(env, t);
16013 
16014                 ret = push_insn(t, t + 1, FALLTHROUGH, env);
16015                 if (ret)
16016                         return ret;
16017 
16018                 return push_insn(t, t + insn->off + 1, BRANCH, env);
16019         }
16020 }
16021 
16022 /* non-recursive depth-first-search to detect loops in BPF program
16023  * loop == back-edge in directed graph
16024  */
16025 static int check_cfg(struct bpf_verifier_env *env)
16026 {
16027         int insn_cnt = env->prog->len;
16028         int *insn_stack, *insn_state;
16029         int ex_insn_beg, i, ret = 0;
16030         bool ex_done = false;
16031 
16032         insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
16033         if (!insn_state)
16034                 return -ENOMEM;
16035 
16036         insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
16037         if (!insn_stack) {
16038                 kvfree(insn_state);
16039                 return -ENOMEM;
16040         }
16041 
16042         insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
16043         insn_stack[0] = 0; /* 0 is the first instruction */
16044         env->cfg.cur_stack = 1;
16045 
16046 walk_cfg:
16047         while (env->cfg.cur_stack > 0) {
16048                 int t = insn_stack[env->cfg.cur_stack - 1];
16049 
16050                 ret = visit_insn(t, env);
16051                 switch (ret) {
16052                 case DONE_EXPLORING:
16053                         insn_state[t] = EXPLORED;
16054                         env->cfg.cur_stack--;
16055                         break;
16056                 case KEEP_EXPLORING:
16057                         break;
16058                 default:
16059                         if (ret > 0) {
16060                                 verbose(env, "visit_insn internal bug\n");
16061                                 ret = -EFAULT;
16062                         }
16063                         goto err_free;
16064                 }
16065         }
16066 
16067         if (env->cfg.cur_stack < 0) {
16068                 verbose(env, "pop stack internal bug\n");
16069                 ret = -EFAULT;
16070                 goto err_free;
16071         }
16072 
16073         if (env->exception_callback_subprog && !ex_done) {
16074                 ex_insn_beg = env->subprog_info[env->exception_callback_subprog].start;
16075 
16076                 insn_state[ex_insn_beg] = DISCOVERED;
16077                 insn_stack[0] = ex_insn_beg;
16078                 env->cfg.cur_stack = 1;
16079                 ex_done = true;
16080                 goto walk_cfg;
16081         }
16082 
16083         for (i = 0; i < insn_cnt; i++) {
16084                 struct bpf_insn *insn = &env->prog->insnsi[i];
16085 
16086                 if (insn_state[i] != EXPLORED) {
16087                         verbose(env, "unreachable insn %d\n", i);
16088                         ret = -EINVAL;
16089                         goto err_free;
16090                 }
16091                 if (bpf_is_ldimm64(insn)) {
16092                         if (insn_state[i + 1] != 0) {
16093                                 verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
16094                                 ret = -EINVAL;
16095                                 goto err_free;
16096                         }
16097                         i++; /* skip second half of ldimm64 */
16098                 }
16099         }
16100         ret = 0; /* cfg looks good */
16101 
16102 err_free:
16103         kvfree(insn_state);
16104         kvfree(insn_stack);
16105         env->cfg.insn_state = env->cfg.insn_stack = NULL;
16106         return ret;
16107 }
16108 
16109 static int check_abnormal_return(struct bpf_verifier_env *env)
16110 {
16111         int i;
16112 
16113         for (i = 1; i < env->subprog_cnt; i++) {
16114                 if (env->subprog_info[i].has_ld_abs) {
16115                         verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
16116                         return -EINVAL;
16117                 }
16118                 if (env->subprog_info[i].has_tail_call) {
16119                         verbose(env, "tail_call is not allowed in subprogs without BTF\n");
16120                         return -EINVAL;
16121                 }
16122         }
16123         return 0;
16124 }
16125 
16126 /* The minimum supported BTF func info size */
16127 #define MIN_BPF_FUNCINFO_SIZE   8
16128 #define MAX_FUNCINFO_REC_SIZE   252
16129 
16130 static int check_btf_func_early(struct bpf_verifier_env *env,
16131                                 const union bpf_attr *attr,
16132                                 bpfptr_t uattr)
16133 {
16134         u32 krec_size = sizeof(struct bpf_func_info);
16135         const struct btf_type *type, *func_proto;
16136         u32 i, nfuncs, urec_size, min_size;
16137         struct bpf_func_info *krecord;
16138         struct bpf_prog *prog;
16139         const struct btf *btf;
16140         u32 prev_offset = 0;
16141         bpfptr_t urecord;
16142         int ret = -ENOMEM;
16143 
16144         nfuncs = attr->func_info_cnt;
16145         if (!nfuncs) {
16146                 if (check_abnormal_return(env))
16147                         return -EINVAL;
16148                 return 0;
16149         }
16150 
16151         urec_size = attr->func_info_rec_size;
16152         if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
16153             urec_size > MAX_FUNCINFO_REC_SIZE ||
16154             urec_size % sizeof(u32)) {
16155                 verbose(env, "invalid func info rec size %u\n", urec_size);
16156                 return -EINVAL;
16157         }
16158 
16159         prog = env->prog;
16160         btf = prog->aux->btf;
16161 
16162         urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
16163         min_size = min_t(u32, krec_size, urec_size);
16164 
16165         krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
16166         if (!krecord)
16167                 return -ENOMEM;
16168 
16169         for (i = 0; i < nfuncs; i++) {
16170                 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
16171                 if (ret) {
16172                         if (ret == -E2BIG) {
16173                                 verbose(env, "nonzero tailing record in func info");
16174                                 /* set the size kernel expects so loader can zero
16175                                  * out the rest of the record.
16176                                  */
16177                                 if (copy_to_bpfptr_offset(uattr,
16178                                                           offsetof(union bpf_attr, func_info_rec_size),
16179                                                           &min_size, sizeof(min_size)))
16180                                         ret = -EFAULT;
16181                         }
16182                         goto err_free;
16183                 }
16184 
16185                 if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
16186                         ret = -EFAULT;
16187                         goto err_free;
16188                 }
16189 
16190                 /* check insn_off */
16191                 ret = -EINVAL;
16192                 if (i == 0) {
16193                         if (krecord[i].insn_off) {
16194                                 verbose(env,
16195                                         "nonzero insn_off %u for the first func info record",
16196                                         krecord[i].insn_off);
16197                                 goto err_free;
16198                         }
16199                 } else if (krecord[i].insn_off <= prev_offset) {
16200                         verbose(env,
16201                                 "same or smaller insn offset (%u) than previous func info record (%u)",
16202                                 krecord[i].insn_off, prev_offset);
16203                         goto err_free;
16204                 }
16205 
16206                 /* check type_id */
16207                 type = btf_type_by_id(btf, krecord[i].type_id);
16208                 if (!type || !btf_type_is_func(type)) {
16209                         verbose(env, "invalid type id %d in func info",
16210                                 krecord[i].type_id);
16211                         goto err_free;
16212                 }
16213 
16214                 func_proto = btf_type_by_id(btf, type->type);
16215                 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
16216                         /* btf_func_check() already verified it during BTF load */
16217                         goto err_free;
16218 
16219                 prev_offset = krecord[i].insn_off;
16220                 bpfptr_add(&urecord, urec_size);
16221         }
16222 
16223         prog->aux->func_info = krecord;
16224         prog->aux->func_info_cnt = nfuncs;
16225         return 0;
16226 
16227 err_free:
16228         kvfree(krecord);
16229         return ret;
16230 }
16231 
16232 static int check_btf_func(struct bpf_verifier_env *env,
16233                           const union bpf_attr *attr,
16234                           bpfptr_t uattr)
16235 {
16236         const struct btf_type *type, *func_proto, *ret_type;
16237         u32 i, nfuncs, urec_size;
16238         struct bpf_func_info *krecord;
16239         struct bpf_func_info_aux *info_aux = NULL;
16240         struct bpf_prog *prog;
16241         const struct btf *btf;
16242         bpfptr_t urecord;
16243         bool scalar_return;
16244         int ret = -ENOMEM;
16245 
16246         nfuncs = attr->func_info_cnt;
16247         if (!nfuncs) {
16248                 if (check_abnormal_return(env))
16249                         return -EINVAL;
16250                 return 0;
16251         }
16252         if (nfuncs != env->subprog_cnt) {
16253                 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
16254                 return -EINVAL;
16255         }
16256 
16257         urec_size = attr->func_info_rec_size;
16258 
16259         prog = env->prog;
16260         btf = prog->aux->btf;
16261 
16262         urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
16263 
16264         krecord = prog->aux->func_info;
16265         info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
16266         if (!info_aux)
16267                 return -ENOMEM;
16268 
16269         for (i = 0; i < nfuncs; i++) {
16270                 /* check insn_off */
16271                 ret = -EINVAL;
16272 
16273                 if (env->subprog_info[i].start != krecord[i].insn_off) {
16274                         verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
16275                         goto err_free;
16276                 }
16277 
16278                 /* Already checked type_id */
16279                 type = btf_type_by_id(btf, krecord[i].type_id);
16280                 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
16281                 /* Already checked func_proto */
16282                 func_proto = btf_type_by_id(btf, type->type);
16283 
16284                 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
16285                 scalar_return =
16286                         btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
16287                 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
16288                         verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
16289                         goto err_free;
16290                 }
16291                 if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
16292                         verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
16293                         goto err_free;
16294                 }
16295 
16296                 bpfptr_add(&urecord, urec_size);
16297         }
16298 
16299         prog->aux->func_info_aux = info_aux;
16300         return 0;
16301 
16302 err_free:
16303         kfree(info_aux);
16304         return ret;
16305 }
16306 
16307 static void adjust_btf_func(struct bpf_verifier_env *env)
16308 {
16309         struct bpf_prog_aux *aux = env->prog->aux;
16310         int i;
16311 
16312         if (!aux->func_info)
16313                 return;
16314 
16315         /* func_info is not available for hidden subprogs */
16316         for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
16317                 aux->func_info[i].insn_off = env->subprog_info[i].start;
16318 }
16319 
16320 #define MIN_BPF_LINEINFO_SIZE   offsetofend(struct bpf_line_info, line_col)
16321 #define MAX_LINEINFO_REC_SIZE   MAX_FUNCINFO_REC_SIZE
16322 
16323 static int check_btf_line(struct bpf_verifier_env *env,
16324                           const union bpf_attr *attr,
16325                           bpfptr_t uattr)
16326 {
16327         u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
16328         struct bpf_subprog_info *sub;
16329         struct bpf_line_info *linfo;
16330         struct bpf_prog *prog;
16331         const struct btf *btf;
16332         bpfptr_t ulinfo;
16333         int err;
16334 
16335         nr_linfo = attr->line_info_cnt;
16336         if (!nr_linfo)
16337                 return 0;
16338         if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
16339                 return -EINVAL;
16340 
16341         rec_size = attr->line_info_rec_size;
16342         if (rec_size < MIN_BPF_LINEINFO_SIZE ||
16343             rec_size > MAX_LINEINFO_REC_SIZE ||
16344             rec_size & (sizeof(u32) - 1))
16345                 return -EINVAL;
16346 
16347         /* Need to zero it in case the userspace may
16348          * pass in a smaller bpf_line_info object.
16349          */
16350         linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
16351                          GFP_KERNEL | __GFP_NOWARN);
16352         if (!linfo)
16353                 return -ENOMEM;
16354 
16355         prog = env->prog;
16356         btf = prog->aux->btf;
16357 
16358         s = 0;
16359         sub = env->subprog_info;
16360         ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
16361         expected_size = sizeof(struct bpf_line_info);
16362         ncopy = min_t(u32, expected_size, rec_size);
16363         for (i = 0; i < nr_linfo; i++) {
16364                 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
16365                 if (err) {
16366                         if (err == -E2BIG) {
16367                                 verbose(env, "nonzero tailing record in line_info");
16368                                 if (copy_to_bpfptr_offset(uattr,
16369                                                           offsetof(union bpf_attr, line_info_rec_size),
16370                                                           &expected_size, sizeof(expected_size)))
16371                                         err = -EFAULT;
16372                         }
16373                         goto err_free;
16374                 }
16375 
16376                 if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
16377                         err = -EFAULT;
16378                         goto err_free;
16379                 }
16380 
16381                 /*
16382                  * Check insn_off to ensure
16383                  * 1) strictly increasing AND
16384                  * 2) bounded by prog->len
16385                  *
16386                  * The linfo[0].insn_off == 0 check logically falls into
16387                  * the later "missing bpf_line_info for func..." case
16388                  * because the first linfo[0].insn_off must be the
16389                  * first sub also and the first sub must have
16390                  * subprog_info[0].start == 0.
16391                  */
16392                 if ((i && linfo[i].insn_off <= prev_offset) ||
16393                     linfo[i].insn_off >= prog->len) {
16394                         verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
16395                                 i, linfo[i].insn_off, prev_offset,
16396                                 prog->len);
16397                         err = -EINVAL;
16398                         goto err_free;
16399                 }
16400 
16401                 if (!prog->insnsi[linfo[i].insn_off].code) {
16402                         verbose(env,
16403                                 "Invalid insn code at line_info[%u].insn_off\n",
16404                                 i);
16405                         err = -EINVAL;
16406                         goto err_free;
16407                 }
16408 
16409                 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
16410                     !btf_name_by_offset(btf, linfo[i].file_name_off)) {
16411                         verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
16412                         err = -EINVAL;
16413                         goto err_free;
16414                 }
16415 
16416                 if (s != env->subprog_cnt) {
16417                         if (linfo[i].insn_off == sub[s].start) {
16418                                 sub[s].linfo_idx = i;
16419                                 s++;
16420                         } else if (sub[s].start < linfo[i].insn_off) {
16421                                 verbose(env, "missing bpf_line_info for func#%u\n", s);
16422                                 err = -EINVAL;
16423                                 goto err_free;
16424                         }
16425                 }
16426 
16427                 prev_offset = linfo[i].insn_off;
16428                 bpfptr_add(&ulinfo, rec_size);
16429         }
16430 
16431         if (s != env->subprog_cnt) {
16432                 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
16433                         env->subprog_cnt - s, s);
16434                 err = -EINVAL;
16435                 goto err_free;
16436         }
16437 
16438         prog->aux->linfo = linfo;
16439         prog->aux->nr_linfo = nr_linfo;
16440 
16441         return 0;
16442 
16443 err_free:
16444         kvfree(linfo);
16445         return err;
16446 }
16447 
16448 #define MIN_CORE_RELO_SIZE      sizeof(struct bpf_core_relo)
16449 #define MAX_CORE_RELO_SIZE      MAX_FUNCINFO_REC_SIZE
16450 
16451 static int check_core_relo(struct bpf_verifier_env *env,
16452                            const union bpf_attr *attr,
16453                            bpfptr_t uattr)
16454 {
16455         u32 i, nr_core_relo, ncopy, expected_size, rec_size;
16456         struct bpf_core_relo core_relo = {};
16457         struct bpf_prog *prog = env->prog;
16458         const struct btf *btf = prog->aux->btf;
16459         struct bpf_core_ctx ctx = {
16460                 .log = &env->log,
16461                 .btf = btf,
16462         };
16463         bpfptr_t u_core_relo;
16464         int err;
16465 
16466         nr_core_relo = attr->core_relo_cnt;
16467         if (!nr_core_relo)
16468                 return 0;
16469         if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
16470                 return -EINVAL;
16471 
16472         rec_size = attr->core_relo_rec_size;
16473         if (rec_size < MIN_CORE_RELO_SIZE ||
16474             rec_size > MAX_CORE_RELO_SIZE ||
16475             rec_size % sizeof(u32))
16476                 return -EINVAL;
16477 
16478         u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
16479         expected_size = sizeof(struct bpf_core_relo);
16480         ncopy = min_t(u32, expected_size, rec_size);
16481 
16482         /* Unlike func_info and line_info, copy and apply each CO-RE
16483          * relocation record one at a time.
16484          */
16485         for (i = 0; i < nr_core_relo; i++) {
16486                 /* future proofing when sizeof(bpf_core_relo) changes */
16487                 err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
16488                 if (err) {
16489                         if (err == -E2BIG) {
16490                                 verbose(env, "nonzero tailing record in core_relo");
16491                                 if (copy_to_bpfptr_offset(uattr,
16492                                                           offsetof(union bpf_attr, core_relo_rec_size),
16493                                                           &expected_size, sizeof(expected_size)))
16494                                         err = -EFAULT;
16495                         }
16496                         break;
16497                 }
16498 
16499                 if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
16500                         err = -EFAULT;
16501                         break;
16502                 }
16503 
16504                 if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
16505                         verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
16506                                 i, core_relo.insn_off, prog->len);
16507                         err = -EINVAL;
16508                         break;
16509                 }
16510 
16511                 err = bpf_core_apply(&ctx, &core_relo, i,
16512                                      &prog->insnsi[core_relo.insn_off / 8]);
16513                 if (err)
16514                         break;
16515                 bpfptr_add(&u_core_relo, rec_size);
16516         }
16517         return err;
16518 }
16519 
16520 static int check_btf_info_early(struct bpf_verifier_env *env,
16521                                 const union bpf_attr *attr,
16522                                 bpfptr_t uattr)
16523 {
16524         struct btf *btf;
16525         int err;
16526 
16527         if (!attr->func_info_cnt && !attr->line_info_cnt) {
16528                 if (check_abnormal_return(env))
16529                         return -EINVAL;
16530                 return 0;
16531         }
16532 
16533         btf = btf_get_by_fd(attr->prog_btf_fd);
16534         if (IS_ERR(btf))
16535                 return PTR_ERR(btf);
16536         if (btf_is_kernel(btf)) {
16537                 btf_put(btf);
16538                 return -EACCES;
16539         }
16540         env->prog->aux->btf = btf;
16541 
16542         err = check_btf_func_early(env, attr, uattr);
16543         if (err)
16544                 return err;
16545         return 0;
16546 }
16547 
16548 static int check_btf_info(struct bpf_verifier_env *env,
16549                           const union bpf_attr *attr,
16550                           bpfptr_t uattr)
16551 {
16552         int err;
16553 
16554         if (!attr->func_info_cnt && !attr->line_info_cnt) {
16555                 if (check_abnormal_return(env))
16556                         return -EINVAL;
16557                 return 0;
16558         }
16559 
16560         err = check_btf_func(env, attr, uattr);
16561         if (err)
16562                 return err;
16563 
16564         err = check_btf_line(env, attr, uattr);
16565         if (err)
16566                 return err;
16567 
16568         err = check_core_relo(env, attr, uattr);
16569         if (err)
16570                 return err;
16571 
16572         return 0;
16573 }
16574 
16575 /* check %cur's range satisfies %old's */
16576 static bool range_within(const struct bpf_reg_state *old,
16577                          const struct bpf_reg_state *cur)
16578 {
16579         return old->umin_value <= cur->umin_value &&
16580                old->umax_value >= cur->umax_value &&
16581                old->smin_value <= cur->smin_value &&
16582                old->smax_value >= cur->smax_value &&
16583                old->u32_min_value <= cur->u32_min_value &&
16584                old->u32_max_value >= cur->u32_max_value &&
16585                old->s32_min_value <= cur->s32_min_value &&
16586                old->s32_max_value >= cur->s32_max_value;
16587 }
16588 
16589 /* If in the old state two registers had the same id, then they need to have
16590  * the same id in the new state as well.  But that id could be different from
16591  * the old state, so we need to track the mapping from old to new ids.
16592  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
16593  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
16594  * regs with a different old id could still have new id 9, we don't care about
16595  * that.
16596  * So we look through our idmap to see if this old id has been seen before.  If
16597  * so, we require the new id to match; otherwise, we add the id pair to the map.
16598  */
16599 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
16600 {
16601         struct bpf_id_pair *map = idmap->map;
16602         unsigned int i;
16603 
16604         /* either both IDs should be set or both should be zero */
16605         if (!!old_id != !!cur_id)
16606                 return false;
16607 
16608         if (old_id == 0) /* cur_id == 0 as well */
16609                 return true;
16610 
16611         for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
16612                 if (!map[i].old) {
16613                         /* Reached an empty slot; haven't seen this id before */
16614                         map[i].old = old_id;
16615                         map[i].cur = cur_id;
16616                         return true;
16617                 }
16618                 if (map[i].old == old_id)
16619                         return map[i].cur == cur_id;
16620                 if (map[i].cur == cur_id)
16621                         return false;
16622         }
16623         /* We ran out of idmap slots, which should be impossible */
16624         WARN_ON_ONCE(1);
16625         return false;
16626 }
16627 
16628 /* Similar to check_ids(), but allocate a unique temporary ID
16629  * for 'old_id' or 'cur_id' of zero.
16630  * This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid.
16631  */
16632 static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
16633 {
16634         old_id = old_id ? old_id : ++idmap->tmp_id_gen;
16635         cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
16636 
16637         return check_ids(old_id, cur_id, idmap);
16638 }
16639 
16640 static void clean_func_state(struct bpf_verifier_env *env,
16641                              struct bpf_func_state *st)
16642 {
16643         enum bpf_reg_liveness live;
16644         int i, j;
16645 
16646         for (i = 0; i < BPF_REG_FP; i++) {
16647                 live = st->regs[i].live;
16648                 /* liveness must not touch this register anymore */
16649                 st->regs[i].live |= REG_LIVE_DONE;
16650                 if (!(live & REG_LIVE_READ))
16651                         /* since the register is unused, clear its state
16652                          * to make further comparison simpler
16653                          */
16654                         __mark_reg_not_init(env, &st->regs[i]);
16655         }
16656 
16657         for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
16658                 live = st->stack[i].spilled_ptr.live;
16659                 /* liveness must not touch this stack slot anymore */
16660                 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
16661                 if (!(live & REG_LIVE_READ)) {
16662                         __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
16663                         for (j = 0; j < BPF_REG_SIZE; j++)
16664                                 st->stack[i].slot_type[j] = STACK_INVALID;
16665                 }
16666         }
16667 }
16668 
16669 static void clean_verifier_state(struct bpf_verifier_env *env,
16670                                  struct bpf_verifier_state *st)
16671 {
16672         int i;
16673 
16674         if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
16675                 /* all regs in this state in all frames were already marked */
16676                 return;
16677 
16678         for (i = 0; i <= st->curframe; i++)
16679                 clean_func_state(env, st->frame[i]);
16680 }
16681 
16682 /* the parentage chains form a tree.
16683  * the verifier states are added to state lists at given insn and
16684  * pushed into state stack for future exploration.
16685  * when the verifier reaches bpf_exit insn some of the verifer states
16686  * stored in the state lists have their final liveness state already,
16687  * but a lot of states will get revised from liveness point of view when
16688  * the verifier explores other branches.
16689  * Example:
16690  * 1: r0 = 1
16691  * 2: if r1 == 100 goto pc+1
16692  * 3: r0 = 2
16693  * 4: exit
16694  * when the verifier reaches exit insn the register r0 in the state list of
16695  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
16696  * of insn 2 and goes exploring further. At the insn 4 it will walk the
16697  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
16698  *
16699  * Since the verifier pushes the branch states as it sees them while exploring
16700  * the program the condition of walking the branch instruction for the second
16701  * time means that all states below this branch were already explored and
16702  * their final liveness marks are already propagated.
16703  * Hence when the verifier completes the search of state list in is_state_visited()
16704  * we can call this clean_live_states() function to mark all liveness states
16705  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
16706  * will not be used.
16707  * This function also clears the registers and stack for states that !READ
16708  * to simplify state merging.
16709  *
16710  * Important note here that walking the same branch instruction in the callee
16711  * doesn't meant that the states are DONE. The verifier has to compare
16712  * the callsites
16713  */
16714 static void clean_live_states(struct bpf_verifier_env *env, int insn,
16715                               struct bpf_verifier_state *cur)
16716 {
16717         struct bpf_verifier_state_list *sl;
16718 
16719         sl = *explored_state(env, insn);
16720         while (sl) {
16721                 if (sl->state.branches)
16722                         goto next;
16723                 if (sl->state.insn_idx != insn ||
16724                     !same_callsites(&sl->state, cur))
16725                         goto next;
16726                 clean_verifier_state(env, &sl->state);
16727 next:
16728                 sl = sl->next;
16729         }
16730 }
16731 
16732 static bool regs_exact(const struct bpf_reg_state *rold,
16733                        const struct bpf_reg_state *rcur,
16734                        struct bpf_idmap *idmap)
16735 {
16736         return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
16737                check_ids(rold->id, rcur->id, idmap) &&
16738                check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
16739 }
16740 
16741 enum exact_level {
16742         NOT_EXACT,
16743         EXACT,
16744         RANGE_WITHIN
16745 };
16746 
16747 /* Returns true if (rold safe implies rcur safe) */
16748 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
16749                     struct bpf_reg_state *rcur, struct bpf_idmap *idmap,
16750                     enum exact_level exact)
16751 {
16752         if (exact == EXACT)
16753                 return regs_exact(rold, rcur, idmap);
16754 
16755         if (!(rold->live & REG_LIVE_READ) && exact == NOT_EXACT)
16756                 /* explored state didn't use this */
16757                 return true;
16758         if (rold->type == NOT_INIT) {
16759                 if (exact == NOT_EXACT || rcur->type == NOT_INIT)
16760                         /* explored state can't have used this */
16761                         return true;
16762         }
16763 
16764         /* Enforce that register types have to match exactly, including their
16765          * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
16766          * rule.
16767          *
16768          * One can make a point that using a pointer register as unbounded
16769          * SCALAR would be technically acceptable, but this could lead to
16770          * pointer leaks because scalars are allowed to leak while pointers
16771          * are not. We could make this safe in special cases if root is
16772          * calling us, but it's probably not worth the hassle.
16773          *
16774          * Also, register types that are *not* MAYBE_NULL could technically be
16775          * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
16776          * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
16777          * to the same map).
16778          * However, if the old MAYBE_NULL register then got NULL checked,
16779          * doing so could have affected others with the same id, and we can't
16780          * check for that because we lost the id when we converted to
16781          * a non-MAYBE_NULL variant.
16782          * So, as a general rule we don't allow mixing MAYBE_NULL and
16783          * non-MAYBE_NULL registers as well.
16784          */
16785         if (rold->type != rcur->type)
16786                 return false;
16787 
16788         switch (base_type(rold->type)) {
16789         case SCALAR_VALUE:
16790                 if (env->explore_alu_limits) {
16791                         /* explore_alu_limits disables tnum_in() and range_within()
16792                          * logic and requires everything to be strict
16793                          */
16794                         return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
16795                                check_scalar_ids(rold->id, rcur->id, idmap);
16796                 }
16797                 if (!rold->precise && exact == NOT_EXACT)
16798                         return true;
16799                 if ((rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST))
16800                         return false;
16801                 if ((rold->id & BPF_ADD_CONST) && (rold->off != rcur->off))
16802                         return false;
16803                 /* Why check_ids() for scalar registers?
16804                  *
16805                  * Consider the following BPF code:
16806                  *   1: r6 = ... unbound scalar, ID=a ...
16807                  *   2: r7 = ... unbound scalar, ID=b ...
16808                  *   3: if (r6 > r7) goto +1
16809                  *   4: r6 = r7
16810                  *   5: if (r6 > X) goto ...
16811                  *   6: ... memory operation using r7 ...
16812                  *
16813                  * First verification path is [1-6]:
16814                  * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
16815                  * - at (5) r6 would be marked <= X, find_equal_scalars() would also mark
16816                  *   r7 <= X, because r6 and r7 share same id.
16817                  * Next verification path is [1-4, 6].
16818                  *
16819                  * Instruction (6) would be reached in two states:
16820                  *   I.  r6{.id=b}, r7{.id=b} via path 1-6;
16821                  *   II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
16822                  *
16823                  * Use check_ids() to distinguish these states.
16824                  * ---
16825                  * Also verify that new value satisfies old value range knowledge.
16826                  */
16827                 return range_within(rold, rcur) &&
16828                        tnum_in(rold->var_off, rcur->var_off) &&
16829                        check_scalar_ids(rold->id, rcur->id, idmap);
16830         case PTR_TO_MAP_KEY:
16831         case PTR_TO_MAP_VALUE:
16832         case PTR_TO_MEM:
16833         case PTR_TO_BUF:
16834         case PTR_TO_TP_BUFFER:
16835                 /* If the new min/max/var_off satisfy the old ones and
16836                  * everything else matches, we are OK.
16837                  */
16838                 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
16839                        range_within(rold, rcur) &&
16840                        tnum_in(rold->var_off, rcur->var_off) &&
16841                        check_ids(rold->id, rcur->id, idmap) &&
16842                        check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
16843         case PTR_TO_PACKET_META:
16844         case PTR_TO_PACKET:
16845                 /* We must have at least as much range as the old ptr
16846                  * did, so that any accesses which were safe before are
16847                  * still safe.  This is true even if old range < old off,
16848                  * since someone could have accessed through (ptr - k), or
16849                  * even done ptr -= k in a register, to get a safe access.
16850                  */
16851                 if (rold->range > rcur->range)
16852                         return false;
16853                 /* If the offsets don't match, we can't trust our alignment;
16854                  * nor can we be sure that we won't fall out of range.
16855                  */
16856                 if (rold->off != rcur->off)
16857                         return false;
16858                 /* id relations must be preserved */
16859                 if (!check_ids(rold->id, rcur->id, idmap))
16860                         return false;
16861                 /* new val must satisfy old val knowledge */
16862                 return range_within(rold, rcur) &&
16863                        tnum_in(rold->var_off, rcur->var_off);
16864         case PTR_TO_STACK:
16865                 /* two stack pointers are equal only if they're pointing to
16866                  * the same stack frame, since fp-8 in foo != fp-8 in bar
16867                  */
16868                 return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
16869         case PTR_TO_ARENA:
16870                 return true;
16871         default:
16872                 return regs_exact(rold, rcur, idmap);
16873         }
16874 }
16875 
16876 static struct bpf_reg_state unbound_reg;
16877 
16878 static __init int unbound_reg_init(void)
16879 {
16880         __mark_reg_unknown_imprecise(&unbound_reg);
16881         unbound_reg.live |= REG_LIVE_READ;
16882         return 0;
16883 }
16884 late_initcall(unbound_reg_init);
16885 
16886 static bool is_stack_all_misc(struct bpf_verifier_env *env,
16887                               struct bpf_stack_state *stack)
16888 {
16889         u32 i;
16890 
16891         for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i) {
16892                 if ((stack->slot_type[i] == STACK_MISC) ||
16893                     (stack->slot_type[i] == STACK_INVALID && env->allow_uninit_stack))
16894                         continue;
16895                 return false;
16896         }
16897 
16898         return true;
16899 }
16900 
16901 static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env,
16902                                                   struct bpf_stack_state *stack)
16903 {
16904         if (is_spilled_scalar_reg64(stack))
16905                 return &stack->spilled_ptr;
16906 
16907         if (is_stack_all_misc(env, stack))
16908                 return &unbound_reg;
16909 
16910         return NULL;
16911 }
16912 
16913 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
16914                       struct bpf_func_state *cur, struct bpf_idmap *idmap,
16915                       enum exact_level exact)
16916 {
16917         int i, spi;
16918 
16919         /* walk slots of the explored stack and ignore any additional
16920          * slots in the current stack, since explored(safe) state
16921          * didn't use them
16922          */
16923         for (i = 0; i < old->allocated_stack; i++) {
16924                 struct bpf_reg_state *old_reg, *cur_reg;
16925 
16926                 spi = i / BPF_REG_SIZE;
16927 
16928                 if (exact != NOT_EXACT &&
16929                     (i >= cur->allocated_stack ||
16930                      old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
16931                      cur->stack[spi].slot_type[i % BPF_REG_SIZE]))
16932                         return false;
16933 
16934                 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)
16935                     && exact == NOT_EXACT) {
16936                         i += BPF_REG_SIZE - 1;
16937                         /* explored state didn't use this */
16938                         continue;
16939                 }
16940 
16941                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
16942                         continue;
16943 
16944                 if (env->allow_uninit_stack &&
16945                     old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
16946                         continue;
16947 
16948                 /* explored stack has more populated slots than current stack
16949                  * and these slots were used
16950                  */
16951                 if (i >= cur->allocated_stack)
16952                         return false;
16953 
16954                 /* 64-bit scalar spill vs all slots MISC and vice versa.
16955                  * Load from all slots MISC produces unbound scalar.
16956                  * Construct a fake register for such stack and call
16957                  * regsafe() to ensure scalar ids are compared.
16958                  */
16959                 old_reg = scalar_reg_for_stack(env, &old->stack[spi]);
16960                 cur_reg = scalar_reg_for_stack(env, &cur->stack[spi]);
16961                 if (old_reg && cur_reg) {
16962                         if (!regsafe(env, old_reg, cur_reg, idmap, exact))
16963                                 return false;
16964                         i += BPF_REG_SIZE - 1;
16965                         continue;
16966                 }
16967 
16968                 /* if old state was safe with misc data in the stack
16969                  * it will be safe with zero-initialized stack.
16970                  * The opposite is not true
16971                  */
16972                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
16973                     cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
16974                         continue;
16975                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
16976                     cur->stack[spi].slot_type[i % BPF_REG_SIZE])
16977                         /* Ex: old explored (safe) state has STACK_SPILL in
16978                          * this stack slot, but current has STACK_MISC ->
16979                          * this verifier states are not equivalent,
16980                          * return false to continue verification of this path
16981                          */
16982                         return false;
16983                 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
16984                         continue;
16985                 /* Both old and cur are having same slot_type */
16986                 switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
16987                 case STACK_SPILL:
16988                         /* when explored and current stack slot are both storing
16989                          * spilled registers, check that stored pointers types
16990                          * are the same as well.
16991                          * Ex: explored safe path could have stored
16992                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
16993                          * but current path has stored:
16994                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
16995                          * such verifier states are not equivalent.
16996                          * return false to continue verification of this path
16997                          */
16998                         if (!regsafe(env, &old->stack[spi].spilled_ptr,
16999                                      &cur->stack[spi].spilled_ptr, idmap, exact))
17000                                 return false;
17001                         break;
17002                 case STACK_DYNPTR:
17003                         old_reg = &old->stack[spi].spilled_ptr;
17004                         cur_reg = &cur->stack[spi].spilled_ptr;
17005                         if (old_reg->dynptr.type != cur_reg->dynptr.type ||
17006                             old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
17007                             !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
17008                                 return false;
17009                         break;
17010                 case STACK_ITER:
17011                         old_reg = &old->stack[spi].spilled_ptr;
17012                         cur_reg = &cur->stack[spi].spilled_ptr;
17013                         /* iter.depth is not compared between states as it
17014                          * doesn't matter for correctness and would otherwise
17015                          * prevent convergence; we maintain it only to prevent
17016                          * infinite loop check triggering, see
17017                          * iter_active_depths_differ()
17018                          */
17019                         if (old_reg->iter.btf != cur_reg->iter.btf ||
17020                             old_reg->iter.btf_id != cur_reg->iter.btf_id ||
17021                             old_reg->iter.state != cur_reg->iter.state ||
17022                             /* ignore {old_reg,cur_reg}->iter.depth, see above */
17023                             !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
17024                                 return false;
17025                         break;
17026                 case STACK_MISC:
17027                 case STACK_ZERO:
17028                 case STACK_INVALID:
17029                         continue;
17030                 /* Ensure that new unhandled slot types return false by default */
17031                 default:
17032                         return false;
17033                 }
17034         }
17035         return true;
17036 }
17037 
17038 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
17039                     struct bpf_idmap *idmap)
17040 {
17041         int i;
17042 
17043         if (old->acquired_refs != cur->acquired_refs)
17044                 return false;
17045 
17046         for (i = 0; i < old->acquired_refs; i++) {
17047                 if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap))
17048                         return false;
17049         }
17050 
17051         return true;
17052 }
17053 
17054 /* compare two verifier states
17055  *
17056  * all states stored in state_list are known to be valid, since
17057  * verifier reached 'bpf_exit' instruction through them
17058  *
17059  * this function is called when verifier exploring different branches of
17060  * execution popped from the state stack. If it sees an old state that has
17061  * more strict register state and more strict stack state then this execution
17062  * branch doesn't need to be explored further, since verifier already
17063  * concluded that more strict state leads to valid finish.
17064  *
17065  * Therefore two states are equivalent if register state is more conservative
17066  * and explored stack state is more conservative than the current one.
17067  * Example:
17068  *       explored                   current
17069  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
17070  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
17071  *
17072  * In other words if current stack state (one being explored) has more
17073  * valid slots than old one that already passed validation, it means
17074  * the verifier can stop exploring and conclude that current state is valid too
17075  *
17076  * Similarly with registers. If explored state has register type as invalid
17077  * whereas register type in current state is meaningful, it means that
17078  * the current state will reach 'bpf_exit' instruction safely
17079  */
17080 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
17081                               struct bpf_func_state *cur, enum exact_level exact)
17082 {
17083         int i;
17084 
17085         if (old->callback_depth > cur->callback_depth)
17086                 return false;
17087 
17088         for (i = 0; i < MAX_BPF_REG; i++)
17089                 if (!regsafe(env, &old->regs[i], &cur->regs[i],
17090                              &env->idmap_scratch, exact))
17091                         return false;
17092 
17093         if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
17094                 return false;
17095 
17096         if (!refsafe(old, cur, &env->idmap_scratch))
17097                 return false;
17098 
17099         return true;
17100 }
17101 
17102 static void reset_idmap_scratch(struct bpf_verifier_env *env)
17103 {
17104         env->idmap_scratch.tmp_id_gen = env->id_gen;
17105         memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
17106 }
17107 
17108 static bool states_equal(struct bpf_verifier_env *env,
17109                          struct bpf_verifier_state *old,
17110                          struct bpf_verifier_state *cur,
17111                          enum exact_level exact)
17112 {
17113         int i;
17114 
17115         if (old->curframe != cur->curframe)
17116                 return false;
17117 
17118         reset_idmap_scratch(env);
17119 
17120         /* Verification state from speculative execution simulation
17121          * must never prune a non-speculative execution one.
17122          */
17123         if (old->speculative && !cur->speculative)
17124                 return false;
17125 
17126         if (old->active_lock.ptr != cur->active_lock.ptr)
17127                 return false;
17128 
17129         /* Old and cur active_lock's have to be either both present
17130          * or both absent.
17131          */
17132         if (!!old->active_lock.id != !!cur->active_lock.id)
17133                 return false;
17134 
17135         if (old->active_lock.id &&
17136             !check_ids(old->active_lock.id, cur->active_lock.id, &env->idmap_scratch))
17137                 return false;
17138 
17139         if (old->active_rcu_lock != cur->active_rcu_lock)
17140                 return false;
17141 
17142         if (old->active_preempt_lock != cur->active_preempt_lock)
17143                 return false;
17144 
17145         if (old->in_sleepable != cur->in_sleepable)
17146                 return false;
17147 
17148         /* for states to be equal callsites have to be the same
17149          * and all frame states need to be equivalent
17150          */
17151         for (i = 0; i <= old->curframe; i++) {
17152                 if (old->frame[i]->callsite != cur->frame[i]->callsite)
17153                         return false;
17154                 if (!func_states_equal(env, old->frame[i], cur->frame[i], exact))
17155                         return false;
17156         }
17157         return true;
17158 }
17159 
17160 /* Return 0 if no propagation happened. Return negative error code if error
17161  * happened. Otherwise, return the propagated bit.
17162  */
17163 static int propagate_liveness_reg(struct bpf_verifier_env *env,
17164                                   struct bpf_reg_state *reg,
17165                                   struct bpf_reg_state *parent_reg)
17166 {
17167         u8 parent_flag = parent_reg->live & REG_LIVE_READ;
17168         u8 flag = reg->live & REG_LIVE_READ;
17169         int err;
17170 
17171         /* When comes here, read flags of PARENT_REG or REG could be any of
17172          * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
17173          * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
17174          */
17175         if (parent_flag == REG_LIVE_READ64 ||
17176             /* Or if there is no read flag from REG. */
17177             !flag ||
17178             /* Or if the read flag from REG is the same as PARENT_REG. */
17179             parent_flag == flag)
17180                 return 0;
17181 
17182         err = mark_reg_read(env, reg, parent_reg, flag);
17183         if (err)
17184                 return err;
17185 
17186         return flag;
17187 }
17188 
17189 /* A write screens off any subsequent reads; but write marks come from the
17190  * straight-line code between a state and its parent.  When we arrive at an
17191  * equivalent state (jump target or such) we didn't arrive by the straight-line
17192  * code, so read marks in the state must propagate to the parent regardless
17193  * of the state's write marks. That's what 'parent == state->parent' comparison
17194  * in mark_reg_read() is for.
17195  */
17196 static int propagate_liveness(struct bpf_verifier_env *env,
17197                               const struct bpf_verifier_state *vstate,
17198                               struct bpf_verifier_state *vparent)
17199 {
17200         struct bpf_reg_state *state_reg, *parent_reg;
17201         struct bpf_func_state *state, *parent;
17202         int i, frame, err = 0;
17203 
17204         if (vparent->curframe != vstate->curframe) {
17205                 WARN(1, "propagate_live: parent frame %d current frame %d\n",
17206                      vparent->curframe, vstate->curframe);
17207                 return -EFAULT;
17208         }
17209         /* Propagate read liveness of registers... */
17210         BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
17211         for (frame = 0; frame <= vstate->curframe; frame++) {
17212                 parent = vparent->frame[frame];
17213                 state = vstate->frame[frame];
17214                 parent_reg = parent->regs;
17215                 state_reg = state->regs;
17216                 /* We don't need to worry about FP liveness, it's read-only */
17217                 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
17218                         err = propagate_liveness_reg(env, &state_reg[i],
17219                                                      &parent_reg[i]);
17220                         if (err < 0)
17221                                 return err;
17222                         if (err == REG_LIVE_READ64)
17223                                 mark_insn_zext(env, &parent_reg[i]);
17224                 }
17225 
17226                 /* Propagate stack slots. */
17227                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
17228                             i < parent->allocated_stack / BPF_REG_SIZE; i++) {
17229                         parent_reg = &parent->stack[i].spilled_ptr;
17230                         state_reg = &state->stack[i].spilled_ptr;
17231                         err = propagate_liveness_reg(env, state_reg,
17232                                                      parent_reg);
17233                         if (err < 0)
17234                                 return err;
17235                 }
17236         }
17237         return 0;
17238 }
17239 
17240 /* find precise scalars in the previous equivalent state and
17241  * propagate them into the current state
17242  */
17243 static int propagate_precision(struct bpf_verifier_env *env,
17244                                const struct bpf_verifier_state *old)
17245 {
17246         struct bpf_reg_state *state_reg;
17247         struct bpf_func_state *state;
17248         int i, err = 0, fr;
17249         bool first;
17250 
17251         for (fr = old->curframe; fr >= 0; fr--) {
17252                 state = old->frame[fr];
17253                 state_reg = state->regs;
17254                 first = true;
17255                 for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
17256                         if (state_reg->type != SCALAR_VALUE ||
17257                             !state_reg->precise ||
17258                             !(state_reg->live & REG_LIVE_READ))
17259                                 continue;
17260                         if (env->log.level & BPF_LOG_LEVEL2) {
17261                                 if (first)
17262                                         verbose(env, "frame %d: propagating r%d", fr, i);
17263                                 else
17264                                         verbose(env, ",r%d", i);
17265                         }
17266                         bt_set_frame_reg(&env->bt, fr, i);
17267                         first = false;
17268                 }
17269 
17270                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
17271                         if (!is_spilled_reg(&state->stack[i]))
17272                                 continue;
17273                         state_reg = &state->stack[i].spilled_ptr;
17274                         if (state_reg->type != SCALAR_VALUE ||
17275                             !state_reg->precise ||
17276                             !(state_reg->live & REG_LIVE_READ))
17277                                 continue;
17278                         if (env->log.level & BPF_LOG_LEVEL2) {
17279                                 if (first)
17280                                         verbose(env, "frame %d: propagating fp%d",
17281                                                 fr, (-i - 1) * BPF_REG_SIZE);
17282                                 else
17283                                         verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
17284                         }
17285                         bt_set_frame_slot(&env->bt, fr, i);
17286                         first = false;
17287                 }
17288                 if (!first)
17289                         verbose(env, "\n");
17290         }
17291 
17292         err = mark_chain_precision_batch(env);
17293         if (err < 0)
17294                 return err;
17295 
17296         return 0;
17297 }
17298 
17299 static bool states_maybe_looping(struct bpf_verifier_state *old,
17300                                  struct bpf_verifier_state *cur)
17301 {
17302         struct bpf_func_state *fold, *fcur;
17303         int i, fr = cur->curframe;
17304 
17305         if (old->curframe != fr)
17306                 return false;
17307 
17308         fold = old->frame[fr];
17309         fcur = cur->frame[fr];
17310         for (i = 0; i < MAX_BPF_REG; i++)
17311                 if (memcmp(&fold->regs[i], &fcur->regs[i],
17312                            offsetof(struct bpf_reg_state, parent)))
17313                         return false;
17314         return true;
17315 }
17316 
17317 static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
17318 {
17319         return env->insn_aux_data[insn_idx].is_iter_next;
17320 }
17321 
17322 /* is_state_visited() handles iter_next() (see process_iter_next_call() for
17323  * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
17324  * states to match, which otherwise would look like an infinite loop. So while
17325  * iter_next() calls are taken care of, we still need to be careful and
17326  * prevent erroneous and too eager declaration of "ininite loop", when
17327  * iterators are involved.
17328  *
17329  * Here's a situation in pseudo-BPF assembly form:
17330  *
17331  *   0: again:                          ; set up iter_next() call args
17332  *   1:   r1 = &it                      ; <CHECKPOINT HERE>
17333  *   2:   call bpf_iter_num_next        ; this is iter_next() call
17334  *   3:   if r0 == 0 goto done
17335  *   4:   ... something useful here ...
17336  *   5:   goto again                    ; another iteration
17337  *   6: done:
17338  *   7:   r1 = &it
17339  *   8:   call bpf_iter_num_destroy     ; clean up iter state
17340  *   9:   exit
17341  *
17342  * This is a typical loop. Let's assume that we have a prune point at 1:,
17343  * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
17344  * again`, assuming other heuristics don't get in a way).
17345  *
17346  * When we first time come to 1:, let's say we have some state X. We proceed
17347  * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
17348  * Now we come back to validate that forked ACTIVE state. We proceed through
17349  * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
17350  * are converging. But the problem is that we don't know that yet, as this
17351  * convergence has to happen at iter_next() call site only. So if nothing is
17352  * done, at 1: verifier will use bounded loop logic and declare infinite
17353  * looping (and would be *technically* correct, if not for iterator's
17354  * "eventual sticky NULL" contract, see process_iter_next_call()). But we
17355  * don't want that. So what we do in process_iter_next_call() when we go on
17356  * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
17357  * a different iteration. So when we suspect an infinite loop, we additionally
17358  * check if any of the *ACTIVE* iterator states depths differ. If yes, we
17359  * pretend we are not looping and wait for next iter_next() call.
17360  *
17361  * This only applies to ACTIVE state. In DRAINED state we don't expect to
17362  * loop, because that would actually mean infinite loop, as DRAINED state is
17363  * "sticky", and so we'll keep returning into the same instruction with the
17364  * same state (at least in one of possible code paths).
17365  *
17366  * This approach allows to keep infinite loop heuristic even in the face of
17367  * active iterator. E.g., C snippet below is and will be detected as
17368  * inifintely looping:
17369  *
17370  *   struct bpf_iter_num it;
17371  *   int *p, x;
17372  *
17373  *   bpf_iter_num_new(&it, 0, 10);
17374  *   while ((p = bpf_iter_num_next(&t))) {
17375  *       x = p;
17376  *       while (x--) {} // <<-- infinite loop here
17377  *   }
17378  *
17379  */
17380 static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
17381 {
17382         struct bpf_reg_state *slot, *cur_slot;
17383         struct bpf_func_state *state;
17384         int i, fr;
17385 
17386         for (fr = old->curframe; fr >= 0; fr--) {
17387                 state = old->frame[fr];
17388                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
17389                         if (state->stack[i].slot_type[0] != STACK_ITER)
17390                                 continue;
17391 
17392                         slot = &state->stack[i].spilled_ptr;
17393                         if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
17394                                 continue;
17395 
17396                         cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
17397                         if (cur_slot->iter.depth != slot->iter.depth)
17398                                 return true;
17399                 }
17400         }
17401         return false;
17402 }
17403 
17404 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
17405 {
17406         struct bpf_verifier_state_list *new_sl;
17407         struct bpf_verifier_state_list *sl, **pprev;
17408         struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry;
17409         int i, j, n, err, states_cnt = 0;
17410         bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx);
17411         bool add_new_state = force_new_state;
17412         bool force_exact;
17413 
17414         /* bpf progs typically have pruning point every 4 instructions
17415          * http://vger.kernel.org/bpfconf2019.html#session-1
17416          * Do not add new state for future pruning if the verifier hasn't seen
17417          * at least 2 jumps and at least 8 instructions.
17418          * This heuristics helps decrease 'total_states' and 'peak_states' metric.
17419          * In tests that amounts to up to 50% reduction into total verifier
17420          * memory consumption and 20% verifier time speedup.
17421          */
17422         if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
17423             env->insn_processed - env->prev_insn_processed >= 8)
17424                 add_new_state = true;
17425 
17426         pprev = explored_state(env, insn_idx);
17427         sl = *pprev;
17428 
17429         clean_live_states(env, insn_idx, cur);
17430 
17431         while (sl) {
17432                 states_cnt++;
17433                 if (sl->state.insn_idx != insn_idx)
17434                         goto next;
17435 
17436                 if (sl->state.branches) {
17437                         struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
17438 
17439                         if (frame->in_async_callback_fn &&
17440                             frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
17441                                 /* Different async_entry_cnt means that the verifier is
17442                                  * processing another entry into async callback.
17443                                  * Seeing the same state is not an indication of infinite
17444                                  * loop or infinite recursion.
17445                                  * But finding the same state doesn't mean that it's safe
17446                                  * to stop processing the current state. The previous state
17447                                  * hasn't yet reached bpf_exit, since state.branches > 0.
17448                                  * Checking in_async_callback_fn alone is not enough either.
17449                                  * Since the verifier still needs to catch infinite loops
17450                                  * inside async callbacks.
17451                                  */
17452                                 goto skip_inf_loop_check;
17453                         }
17454                         /* BPF open-coded iterators loop detection is special.
17455                          * states_maybe_looping() logic is too simplistic in detecting
17456                          * states that *might* be equivalent, because it doesn't know
17457                          * about ID remapping, so don't even perform it.
17458                          * See process_iter_next_call() and iter_active_depths_differ()
17459                          * for overview of the logic. When current and one of parent
17460                          * states are detected as equivalent, it's a good thing: we prove
17461                          * convergence and can stop simulating further iterations.
17462                          * It's safe to assume that iterator loop will finish, taking into
17463                          * account iter_next() contract of eventually returning
17464                          * sticky NULL result.
17465                          *
17466                          * Note, that states have to be compared exactly in this case because
17467                          * read and precision marks might not be finalized inside the loop.
17468                          * E.g. as in the program below:
17469                          *
17470                          *     1. r7 = -16
17471                          *     2. r6 = bpf_get_prandom_u32()
17472                          *     3. while (bpf_iter_num_next(&fp[-8])) {
17473                          *     4.   if (r6 != 42) {
17474                          *     5.     r7 = -32
17475                          *     6.     r6 = bpf_get_prandom_u32()
17476                          *     7.     continue
17477                          *     8.   }
17478                          *     9.   r0 = r10
17479                          *    10.   r0 += r7
17480                          *    11.   r8 = *(u64 *)(r0 + 0)
17481                          *    12.   r6 = bpf_get_prandom_u32()
17482                          *    13. }
17483                          *
17484                          * Here verifier would first visit path 1-3, create a checkpoint at 3
17485                          * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
17486                          * not have read or precision mark for r7 yet, thus inexact states
17487                          * comparison would discard current state with r7=-32
17488                          * => unsafe memory access at 11 would not be caught.
17489                          */
17490                         if (is_iter_next_insn(env, insn_idx)) {
17491                                 if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
17492                                         struct bpf_func_state *cur_frame;
17493                                         struct bpf_reg_state *iter_state, *iter_reg;
17494                                         int spi;
17495 
17496                                         cur_frame = cur->frame[cur->curframe];
17497                                         /* btf_check_iter_kfuncs() enforces that
17498                                          * iter state pointer is always the first arg
17499                                          */
17500                                         iter_reg = &cur_frame->regs[BPF_REG_1];
17501                                         /* current state is valid due to states_equal(),
17502                                          * so we can assume valid iter and reg state,
17503                                          * no need for extra (re-)validations
17504                                          */
17505                                         spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
17506                                         iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
17507                                         if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
17508                                                 update_loop_entry(cur, &sl->state);
17509                                                 goto hit;
17510                                         }
17511                                 }
17512                                 goto skip_inf_loop_check;
17513                         }
17514                         if (is_may_goto_insn_at(env, insn_idx)) {
17515                                 if (sl->state.may_goto_depth != cur->may_goto_depth &&
17516                                     states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
17517                                         update_loop_entry(cur, &sl->state);
17518                                         goto hit;
17519                                 }
17520                         }
17521                         if (calls_callback(env, insn_idx)) {
17522                                 if (states_equal(env, &sl->state, cur, RANGE_WITHIN))
17523                                         goto hit;
17524                                 goto skip_inf_loop_check;
17525                         }
17526                         /* attempt to detect infinite loop to avoid unnecessary doomed work */
17527                         if (states_maybe_looping(&sl->state, cur) &&
17528                             states_equal(env, &sl->state, cur, EXACT) &&
17529                             !iter_active_depths_differ(&sl->state, cur) &&
17530                             sl->state.may_goto_depth == cur->may_goto_depth &&
17531                             sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
17532                                 verbose_linfo(env, insn_idx, "; ");
17533                                 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
17534                                 verbose(env, "cur state:");
17535                                 print_verifier_state(env, cur->frame[cur->curframe], true);
17536                                 verbose(env, "old state:");
17537                                 print_verifier_state(env, sl->state.frame[cur->curframe], true);
17538                                 return -EINVAL;
17539                         }
17540                         /* if the verifier is processing a loop, avoid adding new state
17541                          * too often, since different loop iterations have distinct
17542                          * states and may not help future pruning.
17543                          * This threshold shouldn't be too low to make sure that
17544                          * a loop with large bound will be rejected quickly.
17545                          * The most abusive loop will be:
17546                          * r1 += 1
17547                          * if r1 < 1000000 goto pc-2
17548                          * 1M insn_procssed limit / 100 == 10k peak states.
17549                          * This threshold shouldn't be too high either, since states
17550                          * at the end of the loop are likely to be useful in pruning.
17551                          */
17552 skip_inf_loop_check:
17553                         if (!force_new_state &&
17554                             env->jmps_processed - env->prev_jmps_processed < 20 &&
17555                             env->insn_processed - env->prev_insn_processed < 100)
17556                                 add_new_state = false;
17557                         goto miss;
17558                 }
17559                 /* If sl->state is a part of a loop and this loop's entry is a part of
17560                  * current verification path then states have to be compared exactly.
17561                  * 'force_exact' is needed to catch the following case:
17562                  *
17563                  *                initial     Here state 'succ' was processed first,
17564                  *                  |         it was eventually tracked to produce a
17565                  *                  V         state identical to 'hdr'.
17566                  *     .---------> hdr        All branches from 'succ' had been explored
17567                  *     |            |         and thus 'succ' has its .branches == 0.
17568                  *     |            V
17569                  *     |    .------...        Suppose states 'cur' and 'succ' correspond
17570                  *     |    |       |         to the same instruction + callsites.
17571                  *     |    V       V         In such case it is necessary to check
17572                  *     |   ...     ...        if 'succ' and 'cur' are states_equal().
17573                  *     |    |       |         If 'succ' and 'cur' are a part of the
17574                  *     |    V       V         same loop exact flag has to be set.
17575                  *     |   succ <- cur        To check if that is the case, verify
17576                  *     |    |                 if loop entry of 'succ' is in current
17577                  *     |    V                 DFS path.
17578                  *     |   ...
17579                  *     |    |
17580                  *     '----'
17581                  *
17582                  * Additional details are in the comment before get_loop_entry().
17583                  */
17584                 loop_entry = get_loop_entry(&sl->state);
17585                 force_exact = loop_entry && loop_entry->branches > 0;
17586                 if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) {
17587                         if (force_exact)
17588                                 update_loop_entry(cur, loop_entry);
17589 hit:
17590                         sl->hit_cnt++;
17591                         /* reached equivalent register/stack state,
17592                          * prune the search.
17593                          * Registers read by the continuation are read by us.
17594                          * If we have any write marks in env->cur_state, they
17595                          * will prevent corresponding reads in the continuation
17596                          * from reaching our parent (an explored_state).  Our
17597                          * own state will get the read marks recorded, but
17598                          * they'll be immediately forgotten as we're pruning
17599                          * this state and will pop a new one.
17600                          */
17601                         err = propagate_liveness(env, &sl->state, cur);
17602 
17603                         /* if previous state reached the exit with precision and
17604                          * current state is equivalent to it (except precision marks)
17605                          * the precision needs to be propagated back in
17606                          * the current state.
17607                          */
17608                         if (is_jmp_point(env, env->insn_idx))
17609                                 err = err ? : push_jmp_history(env, cur, 0);
17610                         err = err ? : propagate_precision(env, &sl->state);
17611                         if (err)
17612                                 return err;
17613                         return 1;
17614                 }
17615 miss:
17616                 /* when new state is not going to be added do not increase miss count.
17617                  * Otherwise several loop iterations will remove the state
17618                  * recorded earlier. The goal of these heuristics is to have
17619                  * states from some iterations of the loop (some in the beginning
17620                  * and some at the end) to help pruning.
17621                  */
17622                 if (add_new_state)
17623                         sl->miss_cnt++;
17624                 /* heuristic to determine whether this state is beneficial
17625                  * to keep checking from state equivalence point of view.
17626                  * Higher numbers increase max_states_per_insn and verification time,
17627                  * but do not meaningfully decrease insn_processed.
17628                  * 'n' controls how many times state could miss before eviction.
17629                  * Use bigger 'n' for checkpoints because evicting checkpoint states
17630                  * too early would hinder iterator convergence.
17631                  */
17632                 n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
17633                 if (sl->miss_cnt > sl->hit_cnt * n + n) {
17634                         /* the state is unlikely to be useful. Remove it to
17635                          * speed up verification
17636                          */
17637                         *pprev = sl->next;
17638                         if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE &&
17639                             !sl->state.used_as_loop_entry) {
17640                                 u32 br = sl->state.branches;
17641 
17642                                 WARN_ONCE(br,
17643                                           "BUG live_done but branches_to_explore %d\n",
17644                                           br);
17645                                 free_verifier_state(&sl->state, false);
17646                                 kfree(sl);
17647                                 env->peak_states--;
17648                         } else {
17649                                 /* cannot free this state, since parentage chain may
17650                                  * walk it later. Add it for free_list instead to
17651                                  * be freed at the end of verification
17652                                  */
17653                                 sl->next = env->free_list;
17654                                 env->free_list = sl;
17655                         }
17656                         sl = *pprev;
17657                         continue;
17658                 }
17659 next:
17660                 pprev = &sl->next;
17661                 sl = *pprev;
17662         }
17663 
17664         if (env->max_states_per_insn < states_cnt)
17665                 env->max_states_per_insn = states_cnt;
17666 
17667         if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
17668                 return 0;
17669 
17670         if (!add_new_state)
17671                 return 0;
17672 
17673         /* There were no equivalent states, remember the current one.
17674          * Technically the current state is not proven to be safe yet,
17675          * but it will either reach outer most bpf_exit (which means it's safe)
17676          * or it will be rejected. When there are no loops the verifier won't be
17677          * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
17678          * again on the way to bpf_exit.
17679          * When looping the sl->state.branches will be > 0 and this state
17680          * will not be considered for equivalence until branches == 0.
17681          */
17682         new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
17683         if (!new_sl)
17684                 return -ENOMEM;
17685         env->total_states++;
17686         env->peak_states++;
17687         env->prev_jmps_processed = env->jmps_processed;
17688         env->prev_insn_processed = env->insn_processed;
17689 
17690         /* forget precise markings we inherited, see __mark_chain_precision */
17691         if (env->bpf_capable)
17692                 mark_all_scalars_imprecise(env, cur);
17693 
17694         /* add new state to the head of linked list */
17695         new = &new_sl->state;
17696         err = copy_verifier_state(new, cur);
17697         if (err) {
17698                 free_verifier_state(new, false);
17699                 kfree(new_sl);
17700                 return err;
17701         }
17702         new->insn_idx = insn_idx;
17703         WARN_ONCE(new->branches != 1,
17704                   "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
17705 
17706         cur->parent = new;
17707         cur->first_insn_idx = insn_idx;
17708         cur->dfs_depth = new->dfs_depth + 1;
17709         clear_jmp_history(cur);
17710         new_sl->next = *explored_state(env, insn_idx);
17711         *explored_state(env, insn_idx) = new_sl;
17712         /* connect new state to parentage chain. Current frame needs all
17713          * registers connected. Only r6 - r9 of the callers are alive (pushed
17714          * to the stack implicitly by JITs) so in callers' frames connect just
17715          * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
17716          * the state of the call instruction (with WRITTEN set), and r0 comes
17717          * from callee with its full parentage chain, anyway.
17718          */
17719         /* clear write marks in current state: the writes we did are not writes
17720          * our child did, so they don't screen off its reads from us.
17721          * (There are no read marks in current state, because reads always mark
17722          * their parent and current state never has children yet.  Only
17723          * explored_states can get read marks.)
17724          */
17725         for (j = 0; j <= cur->curframe; j++) {
17726                 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
17727                         cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
17728                 for (i = 0; i < BPF_REG_FP; i++)
17729                         cur->frame[j]->regs[i].live = REG_LIVE_NONE;
17730         }
17731 
17732         /* all stack frames are accessible from callee, clear them all */
17733         for (j = 0; j <= cur->curframe; j++) {
17734                 struct bpf_func_state *frame = cur->frame[j];
17735                 struct bpf_func_state *newframe = new->frame[j];
17736 
17737                 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
17738                         frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
17739                         frame->stack[i].spilled_ptr.parent =
17740                                                 &newframe->stack[i].spilled_ptr;
17741                 }
17742         }
17743         return 0;
17744 }
17745 
17746 /* Return true if it's OK to have the same insn return a different type. */
17747 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
17748 {
17749         switch (base_type(type)) {
17750         case PTR_TO_CTX:
17751         case PTR_TO_SOCKET:
17752         case PTR_TO_SOCK_COMMON:
17753         case PTR_TO_TCP_SOCK:
17754         case PTR_TO_XDP_SOCK:
17755         case PTR_TO_BTF_ID:
17756         case PTR_TO_ARENA:
17757                 return false;
17758         default:
17759                 return true;
17760         }
17761 }
17762 
17763 /* If an instruction was previously used with particular pointer types, then we
17764  * need to be careful to avoid cases such as the below, where it may be ok
17765  * for one branch accessing the pointer, but not ok for the other branch:
17766  *
17767  * R1 = sock_ptr
17768  * goto X;
17769  * ...
17770  * R1 = some_other_valid_ptr;
17771  * goto X;
17772  * ...
17773  * R2 = *(u32 *)(R1 + 0);
17774  */
17775 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
17776 {
17777         return src != prev && (!reg_type_mismatch_ok(src) ||
17778                                !reg_type_mismatch_ok(prev));
17779 }
17780 
17781 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
17782                              bool allow_trust_mismatch)
17783 {
17784         enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
17785 
17786         if (*prev_type == NOT_INIT) {
17787                 /* Saw a valid insn
17788                  * dst_reg = *(u32 *)(src_reg + off)
17789                  * save type to validate intersecting paths
17790                  */
17791                 *prev_type = type;
17792         } else if (reg_type_mismatch(type, *prev_type)) {
17793                 /* Abuser program is trying to use the same insn
17794                  * dst_reg = *(u32*) (src_reg + off)
17795                  * with different pointer types:
17796                  * src_reg == ctx in one branch and
17797                  * src_reg == stack|map in some other branch.
17798                  * Reject it.
17799                  */
17800                 if (allow_trust_mismatch &&
17801                     base_type(type) == PTR_TO_BTF_ID &&
17802                     base_type(*prev_type) == PTR_TO_BTF_ID) {
17803                         /*
17804                          * Have to support a use case when one path through
17805                          * the program yields TRUSTED pointer while another
17806                          * is UNTRUSTED. Fallback to UNTRUSTED to generate
17807                          * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
17808                          */
17809                         *prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
17810                 } else {
17811                         verbose(env, "same insn cannot be used with different pointers\n");
17812                         return -EINVAL;
17813                 }
17814         }
17815 
17816         return 0;
17817 }
17818 
17819 static int do_check(struct bpf_verifier_env *env)
17820 {
17821         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
17822         struct bpf_verifier_state *state = env->cur_state;
17823         struct bpf_insn *insns = env->prog->insnsi;
17824         struct bpf_reg_state *regs;
17825         int insn_cnt = env->prog->len;
17826         bool do_print_state = false;
17827         int prev_insn_idx = -1;
17828 
17829         for (;;) {
17830                 bool exception_exit = false;
17831                 struct bpf_insn *insn;
17832                 u8 class;
17833                 int err;
17834 
17835                 /* reset current history entry on each new instruction */
17836                 env->cur_hist_ent = NULL;
17837 
17838                 env->prev_insn_idx = prev_insn_idx;
17839                 if (env->insn_idx >= insn_cnt) {
17840                         verbose(env, "invalid insn idx %d insn_cnt %d\n",
17841                                 env->insn_idx, insn_cnt);
17842                         return -EFAULT;
17843                 }
17844 
17845                 insn = &insns[env->insn_idx];
17846                 class = BPF_CLASS(insn->code);
17847 
17848                 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
17849                         verbose(env,
17850                                 "BPF program is too large. Processed %d insn\n",
17851                                 env->insn_processed);
17852                         return -E2BIG;
17853                 }
17854 
17855                 state->last_insn_idx = env->prev_insn_idx;
17856 
17857                 if (is_prune_point(env, env->insn_idx)) {
17858                         err = is_state_visited(env, env->insn_idx);
17859                         if (err < 0)
17860                                 return err;
17861                         if (err == 1) {
17862                                 /* found equivalent state, can prune the search */
17863                                 if (env->log.level & BPF_LOG_LEVEL) {
17864                                         if (do_print_state)
17865                                                 verbose(env, "\nfrom %d to %d%s: safe\n",
17866                                                         env->prev_insn_idx, env->insn_idx,
17867                                                         env->cur_state->speculative ?
17868                                                         " (speculative execution)" : "");
17869                                         else
17870                                                 verbose(env, "%d: safe\n", env->insn_idx);
17871                                 }
17872                                 goto process_bpf_exit;
17873                         }
17874                 }
17875 
17876                 if (is_jmp_point(env, env->insn_idx)) {
17877                         err = push_jmp_history(env, state, 0);
17878                         if (err)
17879                                 return err;
17880                 }
17881 
17882                 if (signal_pending(current))
17883                         return -EAGAIN;
17884 
17885                 if (need_resched())
17886                         cond_resched();
17887 
17888                 if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
17889                         verbose(env, "\nfrom %d to %d%s:",
17890                                 env->prev_insn_idx, env->insn_idx,
17891                                 env->cur_state->speculative ?
17892                                 " (speculative execution)" : "");
17893                         print_verifier_state(env, state->frame[state->curframe], true);
17894                         do_print_state = false;
17895                 }
17896 
17897                 if (env->log.level & BPF_LOG_LEVEL) {
17898                         const struct bpf_insn_cbs cbs = {
17899                                 .cb_call        = disasm_kfunc_name,
17900                                 .cb_print       = verbose,
17901                                 .private_data   = env,
17902                         };
17903 
17904                         if (verifier_state_scratched(env))
17905                                 print_insn_state(env, state->frame[state->curframe]);
17906 
17907                         verbose_linfo(env, env->insn_idx, "; ");
17908                         env->prev_log_pos = env->log.end_pos;
17909                         verbose(env, "%d: ", env->insn_idx);
17910                         print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
17911                         env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
17912                         env->prev_log_pos = env->log.end_pos;
17913                 }
17914 
17915                 if (bpf_prog_is_offloaded(env->prog->aux)) {
17916                         err = bpf_prog_offload_verify_insn(env, env->insn_idx,
17917                                                            env->prev_insn_idx);
17918                         if (err)
17919                                 return err;
17920                 }
17921 
17922                 regs = cur_regs(env);
17923                 sanitize_mark_insn_seen(env);
17924                 prev_insn_idx = env->insn_idx;
17925 
17926                 if (class == BPF_ALU || class == BPF_ALU64) {
17927                         err = check_alu_op(env, insn);
17928                         if (err)
17929                                 return err;
17930 
17931                 } else if (class == BPF_LDX) {
17932                         enum bpf_reg_type src_reg_type;
17933 
17934                         /* check for reserved fields is already done */
17935 
17936                         /* check src operand */
17937                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
17938                         if (err)
17939                                 return err;
17940 
17941                         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17942                         if (err)
17943                                 return err;
17944 
17945                         src_reg_type = regs[insn->src_reg].type;
17946 
17947                         /* check that memory (src_reg + off) is readable,
17948                          * the state of dst_reg will be updated by this func
17949                          */
17950                         err = check_mem_access(env, env->insn_idx, insn->src_reg,
17951                                                insn->off, BPF_SIZE(insn->code),
17952                                                BPF_READ, insn->dst_reg, false,
17953                                                BPF_MODE(insn->code) == BPF_MEMSX);
17954                         err = err ?: save_aux_ptr_type(env, src_reg_type, true);
17955                         err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], "ldx");
17956                         if (err)
17957                                 return err;
17958                 } else if (class == BPF_STX) {
17959                         enum bpf_reg_type dst_reg_type;
17960 
17961                         if (BPF_MODE(insn->code) == BPF_ATOMIC) {
17962                                 err = check_atomic(env, env->insn_idx, insn);
17963                                 if (err)
17964                                         return err;
17965                                 env->insn_idx++;
17966                                 continue;
17967                         }
17968 
17969                         if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
17970                                 verbose(env, "BPF_STX uses reserved fields\n");
17971                                 return -EINVAL;
17972                         }
17973 
17974                         /* check src1 operand */
17975                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
17976                         if (err)
17977                                 return err;
17978                         /* check src2 operand */
17979                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17980                         if (err)
17981                                 return err;
17982 
17983                         dst_reg_type = regs[insn->dst_reg].type;
17984 
17985                         /* check that memory (dst_reg + off) is writeable */
17986                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
17987                                                insn->off, BPF_SIZE(insn->code),
17988                                                BPF_WRITE, insn->src_reg, false, false);
17989                         if (err)
17990                                 return err;
17991 
17992                         err = save_aux_ptr_type(env, dst_reg_type, false);
17993                         if (err)
17994                                 return err;
17995                 } else if (class == BPF_ST) {
17996                         enum bpf_reg_type dst_reg_type;
17997 
17998                         if (BPF_MODE(insn->code) != BPF_MEM ||
17999                             insn->src_reg != BPF_REG_0) {
18000                                 verbose(env, "BPF_ST uses reserved fields\n");
18001                                 return -EINVAL;
18002                         }
18003                         /* check src operand */
18004                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
18005                         if (err)
18006                                 return err;
18007 
18008                         dst_reg_type = regs[insn->dst_reg].type;
18009 
18010                         /* check that memory (dst_reg + off) is writeable */
18011                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
18012                                                insn->off, BPF_SIZE(insn->code),
18013                                                BPF_WRITE, -1, false, false);
18014                         if (err)
18015                                 return err;
18016 
18017                         err = save_aux_ptr_type(env, dst_reg_type, false);
18018                         if (err)
18019                                 return err;
18020                 } else if (class == BPF_JMP || class == BPF_JMP32) {
18021                         u8 opcode = BPF_OP(insn->code);
18022 
18023                         env->jmps_processed++;
18024                         if (opcode == BPF_CALL) {
18025                                 if (BPF_SRC(insn->code) != BPF_K ||
18026                                     (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
18027                                      && insn->off != 0) ||
18028                                     (insn->src_reg != BPF_REG_0 &&
18029                                      insn->src_reg != BPF_PSEUDO_CALL &&
18030                                      insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
18031                                     insn->dst_reg != BPF_REG_0 ||
18032                                     class == BPF_JMP32) {
18033                                         verbose(env, "BPF_CALL uses reserved fields\n");
18034                                         return -EINVAL;
18035                                 }
18036 
18037                                 if (env->cur_state->active_lock.ptr) {
18038                                         if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
18039                                             (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
18040                                              (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) {
18041                                                 verbose(env, "function calls are not allowed while holding a lock\n");
18042                                                 return -EINVAL;
18043                                         }
18044                                 }
18045                                 if (insn->src_reg == BPF_PSEUDO_CALL) {
18046                                         err = check_func_call(env, insn, &env->insn_idx);
18047                                 } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
18048                                         err = check_kfunc_call(env, insn, &env->insn_idx);
18049                                         if (!err && is_bpf_throw_kfunc(insn)) {
18050                                                 exception_exit = true;
18051                                                 goto process_bpf_exit_full;
18052                                         }
18053                                 } else {
18054                                         err = check_helper_call(env, insn, &env->insn_idx);
18055                                 }
18056                                 if (err)
18057                                         return err;
18058 
18059                                 mark_reg_scratched(env, BPF_REG_0);
18060                         } else if (opcode == BPF_JA) {
18061                                 if (BPF_SRC(insn->code) != BPF_K ||
18062                                     insn->src_reg != BPF_REG_0 ||
18063                                     insn->dst_reg != BPF_REG_0 ||
18064                                     (class == BPF_JMP && insn->imm != 0) ||
18065                                     (class == BPF_JMP32 && insn->off != 0)) {
18066                                         verbose(env, "BPF_JA uses reserved fields\n");
18067                                         return -EINVAL;
18068                                 }
18069 
18070                                 if (class == BPF_JMP)
18071                                         env->insn_idx += insn->off + 1;
18072                                 else
18073                                         env->insn_idx += insn->imm + 1;
18074                                 continue;
18075 
18076                         } else if (opcode == BPF_EXIT) {
18077                                 if (BPF_SRC(insn->code) != BPF_K ||
18078                                     insn->imm != 0 ||
18079                                     insn->src_reg != BPF_REG_0 ||
18080                                     insn->dst_reg != BPF_REG_0 ||
18081                                     class == BPF_JMP32) {
18082                                         verbose(env, "BPF_EXIT uses reserved fields\n");
18083                                         return -EINVAL;
18084                                 }
18085 process_bpf_exit_full:
18086                                 if (env->cur_state->active_lock.ptr && !env->cur_state->curframe) {
18087                                         verbose(env, "bpf_spin_unlock is missing\n");
18088                                         return -EINVAL;
18089                                 }
18090 
18091                                 if (env->cur_state->active_rcu_lock && !env->cur_state->curframe) {
18092                                         verbose(env, "bpf_rcu_read_unlock is missing\n");
18093                                         return -EINVAL;
18094                                 }
18095 
18096                                 if (env->cur_state->active_preempt_lock && !env->cur_state->curframe) {
18097                                         verbose(env, "%d bpf_preempt_enable%s missing\n",
18098                                                 env->cur_state->active_preempt_lock,
18099                                                 env->cur_state->active_preempt_lock == 1 ? " is" : "(s) are");
18100                                         return -EINVAL;
18101                                 }
18102 
18103                                 /* We must do check_reference_leak here before
18104                                  * prepare_func_exit to handle the case when
18105                                  * state->curframe > 0, it may be a callback
18106                                  * function, for which reference_state must
18107                                  * match caller reference state when it exits.
18108                                  */
18109                                 err = check_reference_leak(env, exception_exit);
18110                                 if (err)
18111                                         return err;
18112 
18113                                 /* The side effect of the prepare_func_exit
18114                                  * which is being skipped is that it frees
18115                                  * bpf_func_state. Typically, process_bpf_exit
18116                                  * will only be hit with outermost exit.
18117                                  * copy_verifier_state in pop_stack will handle
18118                                  * freeing of any extra bpf_func_state left over
18119                                  * from not processing all nested function
18120                                  * exits. We also skip return code checks as
18121                                  * they are not needed for exceptional exits.
18122                                  */
18123                                 if (exception_exit)
18124                                         goto process_bpf_exit;
18125 
18126                                 if (state->curframe) {
18127                                         /* exit from nested function */
18128                                         err = prepare_func_exit(env, &env->insn_idx);
18129                                         if (err)
18130                                                 return err;
18131                                         do_print_state = true;
18132                                         continue;
18133                                 }
18134 
18135                                 err = check_return_code(env, BPF_REG_0, "R0");
18136                                 if (err)
18137                                         return err;
18138 process_bpf_exit:
18139                                 mark_verifier_state_scratched(env);
18140                                 update_branch_counts(env, env->cur_state);
18141                                 err = pop_stack(env, &prev_insn_idx,
18142                                                 &env->insn_idx, pop_log);
18143                                 if (err < 0) {
18144                                         if (err != -ENOENT)
18145                                                 return err;
18146                                         break;
18147                                 } else {
18148                                         do_print_state = true;
18149                                         continue;
18150                                 }
18151                         } else {
18152                                 err = check_cond_jmp_op(env, insn, &env->insn_idx);
18153                                 if (err)
18154                                         return err;
18155                         }
18156                 } else if (class == BPF_LD) {
18157                         u8 mode = BPF_MODE(insn->code);
18158 
18159                         if (mode == BPF_ABS || mode == BPF_IND) {
18160                                 err = check_ld_abs(env, insn);
18161                                 if (err)
18162                                         return err;
18163 
18164                         } else if (mode == BPF_IMM) {
18165                                 err = check_ld_imm(env, insn);
18166                                 if (err)
18167                                         return err;
18168 
18169                                 env->insn_idx++;
18170                                 sanitize_mark_insn_seen(env);
18171                         } else {
18172                                 verbose(env, "invalid BPF_LD mode\n");
18173                                 return -EINVAL;
18174                         }
18175                 } else {
18176                         verbose(env, "unknown insn class %d\n", class);
18177                         return -EINVAL;
18178                 }
18179 
18180                 env->insn_idx++;
18181         }
18182 
18183         return 0;
18184 }
18185 
18186 static int find_btf_percpu_datasec(struct btf *btf)
18187 {
18188         const struct btf_type *t;
18189         const char *tname;
18190         int i, n;
18191 
18192         /*
18193          * Both vmlinux and module each have their own ".data..percpu"
18194          * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
18195          * types to look at only module's own BTF types.
18196          */
18197         n = btf_nr_types(btf);
18198         if (btf_is_module(btf))
18199                 i = btf_nr_types(btf_vmlinux);
18200         else
18201                 i = 1;
18202 
18203         for(; i < n; i++) {
18204                 t = btf_type_by_id(btf, i);
18205                 if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
18206                         continue;
18207 
18208                 tname = btf_name_by_offset(btf, t->name_off);
18209                 if (!strcmp(tname, ".data..percpu"))
18210                         return i;
18211         }
18212 
18213         return -ENOENT;
18214 }
18215 
18216 /* replace pseudo btf_id with kernel symbol address */
18217 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
18218                                struct bpf_insn *insn,
18219                                struct bpf_insn_aux_data *aux)
18220 {
18221         const struct btf_var_secinfo *vsi;
18222         const struct btf_type *datasec;
18223         struct btf_mod_pair *btf_mod;
18224         const struct btf_type *t;
18225         const char *sym_name;
18226         bool percpu = false;
18227         u32 type, id = insn->imm;
18228         struct btf *btf;
18229         s32 datasec_id;
18230         u64 addr;
18231         int i, btf_fd, err;
18232 
18233         btf_fd = insn[1].imm;
18234         if (btf_fd) {
18235                 btf = btf_get_by_fd(btf_fd);
18236                 if (IS_ERR(btf)) {
18237                         verbose(env, "invalid module BTF object FD specified.\n");
18238                         return -EINVAL;
18239                 }
18240         } else {
18241                 if (!btf_vmlinux) {
18242                         verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
18243                         return -EINVAL;
18244                 }
18245                 btf = btf_vmlinux;
18246                 btf_get(btf);
18247         }
18248 
18249         t = btf_type_by_id(btf, id);
18250         if (!t) {
18251                 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
18252                 err = -ENOENT;
18253                 goto err_put;
18254         }
18255 
18256         if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
18257                 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
18258                 err = -EINVAL;
18259                 goto err_put;
18260         }
18261 
18262         sym_name = btf_name_by_offset(btf, t->name_off);
18263         addr = kallsyms_lookup_name(sym_name);
18264         if (!addr) {
18265                 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
18266                         sym_name);
18267                 err = -ENOENT;
18268                 goto err_put;
18269         }
18270         insn[0].imm = (u32)addr;
18271         insn[1].imm = addr >> 32;
18272 
18273         if (btf_type_is_func(t)) {
18274                 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
18275                 aux->btf_var.mem_size = 0;
18276                 goto check_btf;
18277         }
18278 
18279         datasec_id = find_btf_percpu_datasec(btf);
18280         if (datasec_id > 0) {
18281                 datasec = btf_type_by_id(btf, datasec_id);
18282                 for_each_vsi(i, datasec, vsi) {
18283                         if (vsi->type == id) {
18284                                 percpu = true;
18285                                 break;
18286                         }
18287                 }
18288         }
18289 
18290         type = t->type;
18291         t = btf_type_skip_modifiers(btf, type, NULL);
18292         if (percpu) {
18293                 aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
18294                 aux->btf_var.btf = btf;
18295                 aux->btf_var.btf_id = type;
18296         } else if (!btf_type_is_struct(t)) {
18297                 const struct btf_type *ret;
18298                 const char *tname;
18299                 u32 tsize;
18300 
18301                 /* resolve the type size of ksym. */
18302                 ret = btf_resolve_size(btf, t, &tsize);
18303                 if (IS_ERR(ret)) {
18304                         tname = btf_name_by_offset(btf, t->name_off);
18305                         verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
18306                                 tname, PTR_ERR(ret));
18307                         err = -EINVAL;
18308                         goto err_put;
18309                 }
18310                 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
18311                 aux->btf_var.mem_size = tsize;
18312         } else {
18313                 aux->btf_var.reg_type = PTR_TO_BTF_ID;
18314                 aux->btf_var.btf = btf;
18315                 aux->btf_var.btf_id = type;
18316         }
18317 check_btf:
18318         /* check whether we recorded this BTF (and maybe module) already */
18319         for (i = 0; i < env->used_btf_cnt; i++) {
18320                 if (env->used_btfs[i].btf == btf) {
18321                         btf_put(btf);
18322                         return 0;
18323                 }
18324         }
18325 
18326         if (env->used_btf_cnt >= MAX_USED_BTFS) {
18327                 err = -E2BIG;
18328                 goto err_put;
18329         }
18330 
18331         btf_mod = &env->used_btfs[env->used_btf_cnt];
18332         btf_mod->btf = btf;
18333         btf_mod->module = NULL;
18334 
18335         /* if we reference variables from kernel module, bump its refcount */
18336         if (btf_is_module(btf)) {
18337                 btf_mod->module = btf_try_get_module(btf);
18338                 if (!btf_mod->module) {
18339                         err = -ENXIO;
18340                         goto err_put;
18341                 }
18342         }
18343 
18344         env->used_btf_cnt++;
18345 
18346         return 0;
18347 err_put:
18348         btf_put(btf);
18349         return err;
18350 }
18351 
18352 static bool is_tracing_prog_type(enum bpf_prog_type type)
18353 {
18354         switch (type) {
18355         case BPF_PROG_TYPE_KPROBE:
18356         case BPF_PROG_TYPE_TRACEPOINT:
18357         case BPF_PROG_TYPE_PERF_EVENT:
18358         case BPF_PROG_TYPE_RAW_TRACEPOINT:
18359         case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
18360                 return true;
18361         default:
18362                 return false;
18363         }
18364 }
18365 
18366 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
18367                                         struct bpf_map *map,
18368                                         struct bpf_prog *prog)
18369 
18370 {
18371         enum bpf_prog_type prog_type = resolve_prog_type(prog);
18372 
18373         if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
18374             btf_record_has_field(map->record, BPF_RB_ROOT)) {
18375                 if (is_tracing_prog_type(prog_type)) {
18376                         verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
18377                         return -EINVAL;
18378                 }
18379         }
18380 
18381         if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
18382                 if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
18383                         verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
18384                         return -EINVAL;
18385                 }
18386 
18387                 if (is_tracing_prog_type(prog_type)) {
18388                         verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
18389                         return -EINVAL;
18390                 }
18391         }
18392 
18393         if (btf_record_has_field(map->record, BPF_TIMER)) {
18394                 if (is_tracing_prog_type(prog_type)) {
18395                         verbose(env, "tracing progs cannot use bpf_timer yet\n");
18396                         return -EINVAL;
18397                 }
18398         }
18399 
18400         if (btf_record_has_field(map->record, BPF_WORKQUEUE)) {
18401                 if (is_tracing_prog_type(prog_type)) {
18402                         verbose(env, "tracing progs cannot use bpf_wq yet\n");
18403                         return -EINVAL;
18404                 }
18405         }
18406 
18407         if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
18408             !bpf_offload_prog_map_match(prog, map)) {
18409                 verbose(env, "offload device mismatch between prog and map\n");
18410                 return -EINVAL;
18411         }
18412 
18413         if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
18414                 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
18415                 return -EINVAL;
18416         }
18417 
18418         if (prog->sleepable)
18419                 switch (map->map_type) {
18420                 case BPF_MAP_TYPE_HASH:
18421                 case BPF_MAP_TYPE_LRU_HASH:
18422                 case BPF_MAP_TYPE_ARRAY:
18423                 case BPF_MAP_TYPE_PERCPU_HASH:
18424                 case BPF_MAP_TYPE_PERCPU_ARRAY:
18425                 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
18426                 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
18427                 case BPF_MAP_TYPE_HASH_OF_MAPS:
18428                 case BPF_MAP_TYPE_RINGBUF:
18429                 case BPF_MAP_TYPE_USER_RINGBUF:
18430                 case BPF_MAP_TYPE_INODE_STORAGE:
18431                 case BPF_MAP_TYPE_SK_STORAGE:
18432                 case BPF_MAP_TYPE_TASK_STORAGE:
18433                 case BPF_MAP_TYPE_CGRP_STORAGE:
18434                 case BPF_MAP_TYPE_QUEUE:
18435                 case BPF_MAP_TYPE_STACK:
18436                 case BPF_MAP_TYPE_ARENA:
18437                         break;
18438                 default:
18439                         verbose(env,
18440                                 "Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
18441                         return -EINVAL;
18442                 }
18443 
18444         return 0;
18445 }
18446 
18447 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
18448 {
18449         return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
18450                 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
18451 }
18452 
18453 /* find and rewrite pseudo imm in ld_imm64 instructions:
18454  *
18455  * 1. if it accesses map FD, replace it with actual map pointer.
18456  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
18457  *
18458  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
18459  */
18460 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
18461 {
18462         struct bpf_insn *insn = env->prog->insnsi;
18463         int insn_cnt = env->prog->len;
18464         int i, j, err;
18465 
18466         err = bpf_prog_calc_tag(env->prog);
18467         if (err)
18468                 return err;
18469 
18470         for (i = 0; i < insn_cnt; i++, insn++) {
18471                 if (BPF_CLASS(insn->code) == BPF_LDX &&
18472                     ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
18473                     insn->imm != 0)) {
18474                         verbose(env, "BPF_LDX uses reserved fields\n");
18475                         return -EINVAL;
18476                 }
18477 
18478                 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
18479                         struct bpf_insn_aux_data *aux;
18480                         struct bpf_map *map;
18481                         struct fd f;
18482                         u64 addr;
18483                         u32 fd;
18484 
18485                         if (i == insn_cnt - 1 || insn[1].code != 0 ||
18486                             insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
18487                             insn[1].off != 0) {
18488                                 verbose(env, "invalid bpf_ld_imm64 insn\n");
18489                                 return -EINVAL;
18490                         }
18491 
18492                         if (insn[0].src_reg == 0)
18493                                 /* valid generic load 64-bit imm */
18494                                 goto next_insn;
18495 
18496                         if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
18497                                 aux = &env->insn_aux_data[i];
18498                                 err = check_pseudo_btf_id(env, insn, aux);
18499                                 if (err)
18500                                         return err;
18501                                 goto next_insn;
18502                         }
18503 
18504                         if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
18505                                 aux = &env->insn_aux_data[i];
18506                                 aux->ptr_type = PTR_TO_FUNC;
18507                                 goto next_insn;
18508                         }
18509 
18510                         /* In final convert_pseudo_ld_imm64() step, this is
18511                          * converted into regular 64-bit imm load insn.
18512                          */
18513                         switch (insn[0].src_reg) {
18514                         case BPF_PSEUDO_MAP_VALUE:
18515                         case BPF_PSEUDO_MAP_IDX_VALUE:
18516                                 break;
18517                         case BPF_PSEUDO_MAP_FD:
18518                         case BPF_PSEUDO_MAP_IDX:
18519                                 if (insn[1].imm == 0)
18520                                         break;
18521                                 fallthrough;
18522                         default:
18523                                 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
18524                                 return -EINVAL;
18525                         }
18526 
18527                         switch (insn[0].src_reg) {
18528                         case BPF_PSEUDO_MAP_IDX_VALUE:
18529                         case BPF_PSEUDO_MAP_IDX:
18530                                 if (bpfptr_is_null(env->fd_array)) {
18531                                         verbose(env, "fd_idx without fd_array is invalid\n");
18532                                         return -EPROTO;
18533                                 }
18534                                 if (copy_from_bpfptr_offset(&fd, env->fd_array,
18535                                                             insn[0].imm * sizeof(fd),
18536                                                             sizeof(fd)))
18537                                         return -EFAULT;
18538                                 break;
18539                         default:
18540                                 fd = insn[0].imm;
18541                                 break;
18542                         }
18543 
18544                         f = fdget(fd);
18545                         map = __bpf_map_get(f);
18546                         if (IS_ERR(map)) {
18547                                 verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
18548                                 return PTR_ERR(map);
18549                         }
18550 
18551                         err = check_map_prog_compatibility(env, map, env->prog);
18552                         if (err) {
18553                                 fdput(f);
18554                                 return err;
18555                         }
18556 
18557                         aux = &env->insn_aux_data[i];
18558                         if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
18559                             insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
18560                                 addr = (unsigned long)map;
18561                         } else {
18562                                 u32 off = insn[1].imm;
18563 
18564                                 if (off >= BPF_MAX_VAR_OFF) {
18565                                         verbose(env, "direct value offset of %u is not allowed\n", off);
18566                                         fdput(f);
18567                                         return -EINVAL;
18568                                 }
18569 
18570                                 if (!map->ops->map_direct_value_addr) {
18571                                         verbose(env, "no direct value access support for this map type\n");
18572                                         fdput(f);
18573                                         return -EINVAL;
18574                                 }
18575 
18576                                 err = map->ops->map_direct_value_addr(map, &addr, off);
18577                                 if (err) {
18578                                         verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
18579                                                 map->value_size, off);
18580                                         fdput(f);
18581                                         return err;
18582                                 }
18583 
18584                                 aux->map_off = off;
18585                                 addr += off;
18586                         }
18587 
18588                         insn[0].imm = (u32)addr;
18589                         insn[1].imm = addr >> 32;
18590 
18591                         /* check whether we recorded this map already */
18592                         for (j = 0; j < env->used_map_cnt; j++) {
18593                                 if (env->used_maps[j] == map) {
18594                                         aux->map_index = j;
18595                                         fdput(f);
18596                                         goto next_insn;
18597                                 }
18598                         }
18599 
18600                         if (env->used_map_cnt >= MAX_USED_MAPS) {
18601                                 verbose(env, "The total number of maps per program has reached the limit of %u\n",
18602                                         MAX_USED_MAPS);
18603                                 fdput(f);
18604                                 return -E2BIG;
18605                         }
18606 
18607                         if (env->prog->sleepable)
18608                                 atomic64_inc(&map->sleepable_refcnt);
18609                         /* hold the map. If the program is rejected by verifier,
18610                          * the map will be released by release_maps() or it
18611                          * will be used by the valid program until it's unloaded
18612                          * and all maps are released in bpf_free_used_maps()
18613                          */
18614                         bpf_map_inc(map);
18615 
18616                         aux->map_index = env->used_map_cnt;
18617                         env->used_maps[env->used_map_cnt++] = map;
18618 
18619                         if (bpf_map_is_cgroup_storage(map) &&
18620                             bpf_cgroup_storage_assign(env->prog->aux, map)) {
18621                                 verbose(env, "only one cgroup storage of each type is allowed\n");
18622                                 fdput(f);
18623                                 return -EBUSY;
18624                         }
18625                         if (map->map_type == BPF_MAP_TYPE_ARENA) {
18626                                 if (env->prog->aux->arena) {
18627                                         verbose(env, "Only one arena per program\n");
18628                                         fdput(f);
18629                                         return -EBUSY;
18630                                 }
18631                                 if (!env->allow_ptr_leaks || !env->bpf_capable) {
18632                                         verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
18633                                         fdput(f);
18634                                         return -EPERM;
18635                                 }
18636                                 if (!env->prog->jit_requested) {
18637                                         verbose(env, "JIT is required to use arena\n");
18638                                         fdput(f);
18639                                         return -EOPNOTSUPP;
18640                                 }
18641                                 if (!bpf_jit_supports_arena()) {
18642                                         verbose(env, "JIT doesn't support arena\n");
18643                                         fdput(f);
18644                                         return -EOPNOTSUPP;
18645                                 }
18646                                 env->prog->aux->arena = (void *)map;
18647                                 if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
18648                                         verbose(env, "arena's user address must be set via map_extra or mmap()\n");
18649                                         fdput(f);
18650                                         return -EINVAL;
18651                                 }
18652                         }
18653 
18654                         fdput(f);
18655 next_insn:
18656                         insn++;
18657                         i++;
18658                         continue;
18659                 }
18660 
18661                 /* Basic sanity check before we invest more work here. */
18662                 if (!bpf_opcode_in_insntable(insn->code)) {
18663                         verbose(env, "unknown opcode %02x\n", insn->code);
18664                         return -EINVAL;
18665                 }
18666         }
18667 
18668         /* now all pseudo BPF_LD_IMM64 instructions load valid
18669          * 'struct bpf_map *' into a register instead of user map_fd.
18670          * These pointers will be used later by verifier to validate map access.
18671          */
18672         return 0;
18673 }
18674 
18675 /* drop refcnt of maps used by the rejected program */
18676 static void release_maps(struct bpf_verifier_env *env)
18677 {
18678         __bpf_free_used_maps(env->prog->aux, env->used_maps,
18679                              env->used_map_cnt);
18680 }
18681 
18682 /* drop refcnt of maps used by the rejected program */
18683 static void release_btfs(struct bpf_verifier_env *env)
18684 {
18685         __bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt);
18686 }
18687 
18688 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
18689 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
18690 {
18691         struct bpf_insn *insn = env->prog->insnsi;
18692         int insn_cnt = env->prog->len;
18693         int i;
18694 
18695         for (i = 0; i < insn_cnt; i++, insn++) {
18696                 if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
18697                         continue;
18698                 if (insn->src_reg == BPF_PSEUDO_FUNC)
18699                         continue;
18700                 insn->src_reg = 0;
18701         }
18702 }
18703 
18704 /* single env->prog->insni[off] instruction was replaced with the range
18705  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
18706  * [0, off) and [off, end) to new locations, so the patched range stays zero
18707  */
18708 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
18709                                  struct bpf_insn_aux_data *new_data,
18710                                  struct bpf_prog *new_prog, u32 off, u32 cnt)
18711 {
18712         struct bpf_insn_aux_data *old_data = env->insn_aux_data;
18713         struct bpf_insn *insn = new_prog->insnsi;
18714         u32 old_seen = old_data[off].seen;
18715         u32 prog_len;
18716         int i;
18717 
18718         /* aux info at OFF always needs adjustment, no matter fast path
18719          * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
18720          * original insn at old prog.
18721          */
18722         old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
18723 
18724         if (cnt == 1)
18725                 return;
18726         prog_len = new_prog->len;
18727 
18728         memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
18729         memcpy(new_data + off + cnt - 1, old_data + off,
18730                sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
18731         for (i = off; i < off + cnt - 1; i++) {
18732                 /* Expand insni[off]'s seen count to the patched range. */
18733                 new_data[i].seen = old_seen;
18734                 new_data[i].zext_dst = insn_has_def32(env, insn + i);
18735         }
18736         env->insn_aux_data = new_data;
18737         vfree(old_data);
18738 }
18739 
18740 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
18741 {
18742         int i;
18743 
18744         if (len == 1)
18745                 return;
18746         /* NOTE: fake 'exit' subprog should be updated as well. */
18747         for (i = 0; i <= env->subprog_cnt; i++) {
18748                 if (env->subprog_info[i].start <= off)
18749                         continue;
18750                 env->subprog_info[i].start += len - 1;
18751         }
18752 }
18753 
18754 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
18755 {
18756         struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
18757         int i, sz = prog->aux->size_poke_tab;
18758         struct bpf_jit_poke_descriptor *desc;
18759 
18760         for (i = 0; i < sz; i++) {
18761                 desc = &tab[i];
18762                 if (desc->insn_idx <= off)
18763                         continue;
18764                 desc->insn_idx += len - 1;
18765         }
18766 }
18767 
18768 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
18769                                             const struct bpf_insn *patch, u32 len)
18770 {
18771         struct bpf_prog *new_prog;
18772         struct bpf_insn_aux_data *new_data = NULL;
18773 
18774         if (len > 1) {
18775                 new_data = vzalloc(array_size(env->prog->len + len - 1,
18776                                               sizeof(struct bpf_insn_aux_data)));
18777                 if (!new_data)
18778                         return NULL;
18779         }
18780 
18781         new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
18782         if (IS_ERR(new_prog)) {
18783                 if (PTR_ERR(new_prog) == -ERANGE)
18784                         verbose(env,
18785                                 "insn %d cannot be patched due to 16-bit range\n",
18786                                 env->insn_aux_data[off].orig_idx);
18787                 vfree(new_data);
18788                 return NULL;
18789         }
18790         adjust_insn_aux_data(env, new_data, new_prog, off, len);
18791         adjust_subprog_starts(env, off, len);
18792         adjust_poke_descs(new_prog, off, len);
18793         return new_prog;
18794 }
18795 
18796 /*
18797  * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
18798  * jump offset by 'delta'.
18799  */
18800 static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
18801 {
18802         struct bpf_insn *insn = prog->insnsi;
18803         u32 insn_cnt = prog->len, i;
18804         s32 imm;
18805         s16 off;
18806 
18807         for (i = 0; i < insn_cnt; i++, insn++) {
18808                 u8 code = insn->code;
18809 
18810                 if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
18811                     BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
18812                         continue;
18813 
18814                 if (insn->code == (BPF_JMP32 | BPF_JA)) {
18815                         if (i + 1 + insn->imm != tgt_idx)
18816                                 continue;
18817                         if (check_add_overflow(insn->imm, delta, &imm))
18818                                 return -ERANGE;
18819                         insn->imm = imm;
18820                 } else {
18821                         if (i + 1 + insn->off != tgt_idx)
18822                                 continue;
18823                         if (check_add_overflow(insn->off, delta, &off))
18824                                 return -ERANGE;
18825                         insn->off = off;
18826                 }
18827         }
18828         return 0;
18829 }
18830 
18831 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
18832                                               u32 off, u32 cnt)
18833 {
18834         int i, j;
18835 
18836         /* find first prog starting at or after off (first to remove) */
18837         for (i = 0; i < env->subprog_cnt; i++)
18838                 if (env->subprog_info[i].start >= off)
18839                         break;
18840         /* find first prog starting at or after off + cnt (first to stay) */
18841         for (j = i; j < env->subprog_cnt; j++)
18842                 if (env->subprog_info[j].start >= off + cnt)
18843                         break;
18844         /* if j doesn't start exactly at off + cnt, we are just removing
18845          * the front of previous prog
18846          */
18847         if (env->subprog_info[j].start != off + cnt)
18848                 j--;
18849 
18850         if (j > i) {
18851                 struct bpf_prog_aux *aux = env->prog->aux;
18852                 int move;
18853 
18854                 /* move fake 'exit' subprog as well */
18855                 move = env->subprog_cnt + 1 - j;
18856 
18857                 memmove(env->subprog_info + i,
18858                         env->subprog_info + j,
18859                         sizeof(*env->subprog_info) * move);
18860                 env->subprog_cnt -= j - i;
18861 
18862                 /* remove func_info */
18863                 if (aux->func_info) {
18864                         move = aux->func_info_cnt - j;
18865 
18866                         memmove(aux->func_info + i,
18867                                 aux->func_info + j,
18868                                 sizeof(*aux->func_info) * move);
18869                         aux->func_info_cnt -= j - i;
18870                         /* func_info->insn_off is set after all code rewrites,
18871                          * in adjust_btf_func() - no need to adjust
18872                          */
18873                 }
18874         } else {
18875                 /* convert i from "first prog to remove" to "first to adjust" */
18876                 if (env->subprog_info[i].start == off)
18877                         i++;
18878         }
18879 
18880         /* update fake 'exit' subprog as well */
18881         for (; i <= env->subprog_cnt; i++)
18882                 env->subprog_info[i].start -= cnt;
18883 
18884         return 0;
18885 }
18886 
18887 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
18888                                       u32 cnt)
18889 {
18890         struct bpf_prog *prog = env->prog;
18891         u32 i, l_off, l_cnt, nr_linfo;
18892         struct bpf_line_info *linfo;
18893 
18894         nr_linfo = prog->aux->nr_linfo;
18895         if (!nr_linfo)
18896                 return 0;
18897 
18898         linfo = prog->aux->linfo;
18899 
18900         /* find first line info to remove, count lines to be removed */
18901         for (i = 0; i < nr_linfo; i++)
18902                 if (linfo[i].insn_off >= off)
18903                         break;
18904 
18905         l_off = i;
18906         l_cnt = 0;
18907         for (; i < nr_linfo; i++)
18908                 if (linfo[i].insn_off < off + cnt)
18909                         l_cnt++;
18910                 else
18911                         break;
18912 
18913         /* First live insn doesn't match first live linfo, it needs to "inherit"
18914          * last removed linfo.  prog is already modified, so prog->len == off
18915          * means no live instructions after (tail of the program was removed).
18916          */
18917         if (prog->len != off && l_cnt &&
18918             (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
18919                 l_cnt--;
18920                 linfo[--i].insn_off = off + cnt;
18921         }
18922 
18923         /* remove the line info which refer to the removed instructions */
18924         if (l_cnt) {
18925                 memmove(linfo + l_off, linfo + i,
18926                         sizeof(*linfo) * (nr_linfo - i));
18927 
18928                 prog->aux->nr_linfo -= l_cnt;
18929                 nr_linfo = prog->aux->nr_linfo;
18930         }
18931 
18932         /* pull all linfo[i].insn_off >= off + cnt in by cnt */
18933         for (i = l_off; i < nr_linfo; i++)
18934                 linfo[i].insn_off -= cnt;
18935 
18936         /* fix up all subprogs (incl. 'exit') which start >= off */
18937         for (i = 0; i <= env->subprog_cnt; i++)
18938                 if (env->subprog_info[i].linfo_idx > l_off) {
18939                         /* program may have started in the removed region but
18940                          * may not be fully removed
18941                          */
18942                         if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
18943                                 env->subprog_info[i].linfo_idx -= l_cnt;
18944                         else
18945                                 env->subprog_info[i].linfo_idx = l_off;
18946                 }
18947 
18948         return 0;
18949 }
18950 
18951 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
18952 {
18953         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18954         unsigned int orig_prog_len = env->prog->len;
18955         int err;
18956 
18957         if (bpf_prog_is_offloaded(env->prog->aux))
18958                 bpf_prog_offload_remove_insns(env, off, cnt);
18959 
18960         err = bpf_remove_insns(env->prog, off, cnt);
18961         if (err)
18962                 return err;
18963 
18964         err = adjust_subprog_starts_after_remove(env, off, cnt);
18965         if (err)
18966                 return err;
18967 
18968         err = bpf_adj_linfo_after_remove(env, off, cnt);
18969         if (err)
18970                 return err;
18971 
18972         memmove(aux_data + off, aux_data + off + cnt,
18973                 sizeof(*aux_data) * (orig_prog_len - off - cnt));
18974 
18975         return 0;
18976 }
18977 
18978 /* The verifier does more data flow analysis than llvm and will not
18979  * explore branches that are dead at run time. Malicious programs can
18980  * have dead code too. Therefore replace all dead at-run-time code
18981  * with 'ja -1'.
18982  *
18983  * Just nops are not optimal, e.g. if they would sit at the end of the
18984  * program and through another bug we would manage to jump there, then
18985  * we'd execute beyond program memory otherwise. Returning exception
18986  * code also wouldn't work since we can have subprogs where the dead
18987  * code could be located.
18988  */
18989 static void sanitize_dead_code(struct bpf_verifier_env *env)
18990 {
18991         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18992         struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
18993         struct bpf_insn *insn = env->prog->insnsi;
18994         const int insn_cnt = env->prog->len;
18995         int i;
18996 
18997         for (i = 0; i < insn_cnt; i++) {
18998                 if (aux_data[i].seen)
18999                         continue;
19000                 memcpy(insn + i, &trap, sizeof(trap));
19001                 aux_data[i].zext_dst = false;
19002         }
19003 }
19004 
19005 static bool insn_is_cond_jump(u8 code)
19006 {
19007         u8 op;
19008 
19009         op = BPF_OP(code);
19010         if (BPF_CLASS(code) == BPF_JMP32)
19011                 return op != BPF_JA;
19012 
19013         if (BPF_CLASS(code) != BPF_JMP)
19014                 return false;
19015 
19016         return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
19017 }
19018 
19019 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
19020 {
19021         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
19022         struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
19023         struct bpf_insn *insn = env->prog->insnsi;
19024         const int insn_cnt = env->prog->len;
19025         int i;
19026 
19027         for (i = 0; i < insn_cnt; i++, insn++) {
19028                 if (!insn_is_cond_jump(insn->code))
19029                         continue;
19030 
19031                 if (!aux_data[i + 1].seen)
19032                         ja.off = insn->off;
19033                 else if (!aux_data[i + 1 + insn->off].seen)
19034                         ja.off = 0;
19035                 else
19036                         continue;
19037 
19038                 if (bpf_prog_is_offloaded(env->prog->aux))
19039                         bpf_prog_offload_replace_insn(env, i, &ja);
19040 
19041                 memcpy(insn, &ja, sizeof(ja));
19042         }
19043 }
19044 
19045 static int opt_remove_dead_code(struct bpf_verifier_env *env)
19046 {
19047         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
19048         int insn_cnt = env->prog->len;
19049         int i, err;
19050 
19051         for (i = 0; i < insn_cnt; i++) {
19052                 int j;
19053 
19054                 j = 0;
19055                 while (i + j < insn_cnt && !aux_data[i + j].seen)
19056                         j++;
19057                 if (!j)
19058                         continue;
19059 
19060                 err = verifier_remove_insns(env, i, j);
19061                 if (err)
19062                         return err;
19063                 insn_cnt = env->prog->len;
19064         }
19065 
19066         return 0;
19067 }
19068 
19069 static int opt_remove_nops(struct bpf_verifier_env *env)
19070 {
19071         const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
19072         struct bpf_insn *insn = env->prog->insnsi;
19073         int insn_cnt = env->prog->len;
19074         int i, err;
19075 
19076         for (i = 0; i < insn_cnt; i++) {
19077                 if (memcmp(&insn[i], &ja, sizeof(ja)))
19078                         continue;
19079 
19080                 err = verifier_remove_insns(env, i, 1);
19081                 if (err)
19082                         return err;
19083                 insn_cnt--;
19084                 i--;
19085         }
19086 
19087         return 0;
19088 }
19089 
19090 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
19091                                          const union bpf_attr *attr)
19092 {
19093         struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
19094         struct bpf_insn_aux_data *aux = env->insn_aux_data;
19095         int i, patch_len, delta = 0, len = env->prog->len;
19096         struct bpf_insn *insns = env->prog->insnsi;
19097         struct bpf_prog *new_prog;
19098         bool rnd_hi32;
19099 
19100         rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
19101         zext_patch[1] = BPF_ZEXT_REG(0);
19102         rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
19103         rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
19104         rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
19105         for (i = 0; i < len; i++) {
19106                 int adj_idx = i + delta;
19107                 struct bpf_insn insn;
19108                 int load_reg;
19109 
19110                 insn = insns[adj_idx];
19111                 load_reg = insn_def_regno(&insn);
19112                 if (!aux[adj_idx].zext_dst) {
19113                         u8 code, class;
19114                         u32 imm_rnd;
19115 
19116                         if (!rnd_hi32)
19117                                 continue;
19118 
19119                         code = insn.code;
19120                         class = BPF_CLASS(code);
19121                         if (load_reg == -1)
19122                                 continue;
19123 
19124                         /* NOTE: arg "reg" (the fourth one) is only used for
19125                          *       BPF_STX + SRC_OP, so it is safe to pass NULL
19126                          *       here.
19127                          */
19128                         if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
19129                                 if (class == BPF_LD &&
19130                                     BPF_MODE(code) == BPF_IMM)
19131                                         i++;
19132                                 continue;
19133                         }
19134 
19135                         /* ctx load could be transformed into wider load. */
19136                         if (class == BPF_LDX &&
19137                             aux[adj_idx].ptr_type == PTR_TO_CTX)
19138                                 continue;
19139 
19140                         imm_rnd = get_random_u32();
19141                         rnd_hi32_patch[0] = insn;
19142                         rnd_hi32_patch[1].imm = imm_rnd;
19143                         rnd_hi32_patch[3].dst_reg = load_reg;
19144                         patch = rnd_hi32_patch;
19145                         patch_len = 4;
19146                         goto apply_patch_buffer;
19147                 }
19148 
19149                 /* Add in an zero-extend instruction if a) the JIT has requested
19150                  * it or b) it's a CMPXCHG.
19151                  *
19152                  * The latter is because: BPF_CMPXCHG always loads a value into
19153                  * R0, therefore always zero-extends. However some archs'
19154                  * equivalent instruction only does this load when the
19155                  * comparison is successful. This detail of CMPXCHG is
19156                  * orthogonal to the general zero-extension behaviour of the
19157                  * CPU, so it's treated independently of bpf_jit_needs_zext.
19158                  */
19159                 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
19160                         continue;
19161 
19162                 /* Zero-extension is done by the caller. */
19163                 if (bpf_pseudo_kfunc_call(&insn))
19164                         continue;
19165 
19166                 if (WARN_ON(load_reg == -1)) {
19167                         verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
19168                         return -EFAULT;
19169                 }
19170 
19171                 zext_patch[0] = insn;
19172                 zext_patch[1].dst_reg = load_reg;
19173                 zext_patch[1].src_reg = load_reg;
19174                 patch = zext_patch;
19175                 patch_len = 2;
19176 apply_patch_buffer:
19177                 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
19178                 if (!new_prog)
19179                         return -ENOMEM;
19180                 env->prog = new_prog;
19181                 insns = new_prog->insnsi;
19182                 aux = env->insn_aux_data;
19183                 delta += patch_len - 1;
19184         }
19185 
19186         return 0;
19187 }
19188 
19189 /* convert load instructions that access fields of a context type into a
19190  * sequence of instructions that access fields of the underlying structure:
19191  *     struct __sk_buff    -> struct sk_buff
19192  *     struct bpf_sock_ops -> struct sock
19193  */
19194 static int convert_ctx_accesses(struct bpf_verifier_env *env)
19195 {
19196         const struct bpf_verifier_ops *ops = env->ops;
19197         int i, cnt, size, ctx_field_size, delta = 0;
19198         const int insn_cnt = env->prog->len;
19199         struct bpf_insn insn_buf[16], *insn;
19200         u32 target_size, size_default, off;
19201         struct bpf_prog *new_prog;
19202         enum bpf_access_type type;
19203         bool is_narrower_load;
19204 
19205         if (ops->gen_prologue || env->seen_direct_write) {
19206                 if (!ops->gen_prologue) {
19207                         verbose(env, "bpf verifier is misconfigured\n");
19208                         return -EINVAL;
19209                 }
19210                 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
19211                                         env->prog);
19212                 if (cnt >= ARRAY_SIZE(insn_buf)) {
19213                         verbose(env, "bpf verifier is misconfigured\n");
19214                         return -EINVAL;
19215                 } else if (cnt) {
19216                         new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
19217                         if (!new_prog)
19218                                 return -ENOMEM;
19219 
19220                         env->prog = new_prog;
19221                         delta += cnt - 1;
19222                 }
19223         }
19224 
19225         if (bpf_prog_is_offloaded(env->prog->aux))
19226                 return 0;
19227 
19228         insn = env->prog->insnsi + delta;
19229 
19230         for (i = 0; i < insn_cnt; i++, insn++) {
19231                 bpf_convert_ctx_access_t convert_ctx_access;
19232                 u8 mode;
19233 
19234                 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
19235                     insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
19236                     insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
19237                     insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
19238                     insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
19239                     insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
19240                     insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
19241                         type = BPF_READ;
19242                 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
19243                            insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
19244                            insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
19245                            insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
19246                            insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
19247                            insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
19248                            insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
19249                            insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
19250                         type = BPF_WRITE;
19251                 } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
19252                             insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
19253                            env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
19254                         insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
19255                         env->prog->aux->num_exentries++;
19256                         continue;
19257                 } else {
19258                         continue;
19259                 }
19260 
19261                 if (type == BPF_WRITE &&
19262                     env->insn_aux_data[i + delta].sanitize_stack_spill) {
19263                         struct bpf_insn patch[] = {
19264                                 *insn,
19265                                 BPF_ST_NOSPEC(),
19266                         };
19267 
19268                         cnt = ARRAY_SIZE(patch);
19269                         new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
19270                         if (!new_prog)
19271                                 return -ENOMEM;
19272 
19273                         delta    += cnt - 1;
19274                         env->prog = new_prog;
19275                         insn      = new_prog->insnsi + i + delta;
19276                         continue;
19277                 }
19278 
19279                 switch ((int)env->insn_aux_data[i + delta].ptr_type) {
19280                 case PTR_TO_CTX:
19281                         if (!ops->convert_ctx_access)
19282                                 continue;
19283                         convert_ctx_access = ops->convert_ctx_access;
19284                         break;
19285                 case PTR_TO_SOCKET:
19286                 case PTR_TO_SOCK_COMMON:
19287                         convert_ctx_access = bpf_sock_convert_ctx_access;
19288                         break;
19289                 case PTR_TO_TCP_SOCK:
19290                         convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
19291                         break;
19292                 case PTR_TO_XDP_SOCK:
19293                         convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
19294                         break;
19295                 case PTR_TO_BTF_ID:
19296                 case PTR_TO_BTF_ID | PTR_UNTRUSTED:
19297                 /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
19298                  * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
19299                  * be said once it is marked PTR_UNTRUSTED, hence we must handle
19300                  * any faults for loads into such types. BPF_WRITE is disallowed
19301                  * for this case.
19302                  */
19303                 case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
19304                         if (type == BPF_READ) {
19305                                 if (BPF_MODE(insn->code) == BPF_MEM)
19306                                         insn->code = BPF_LDX | BPF_PROBE_MEM |
19307                                                      BPF_SIZE((insn)->code);
19308                                 else
19309                                         insn->code = BPF_LDX | BPF_PROBE_MEMSX |
19310                                                      BPF_SIZE((insn)->code);
19311                                 env->prog->aux->num_exentries++;
19312                         }
19313                         continue;
19314                 case PTR_TO_ARENA:
19315                         if (BPF_MODE(insn->code) == BPF_MEMSX) {
19316                                 verbose(env, "sign extending loads from arena are not supported yet\n");
19317                                 return -EOPNOTSUPP;
19318                         }
19319                         insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
19320                         env->prog->aux->num_exentries++;
19321                         continue;
19322                 default:
19323                         continue;
19324                 }
19325 
19326                 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
19327                 size = BPF_LDST_BYTES(insn);
19328                 mode = BPF_MODE(insn->code);
19329 
19330                 /* If the read access is a narrower load of the field,
19331                  * convert to a 4/8-byte load, to minimum program type specific
19332                  * convert_ctx_access changes. If conversion is successful,
19333                  * we will apply proper mask to the result.
19334                  */
19335                 is_narrower_load = size < ctx_field_size;
19336                 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
19337                 off = insn->off;
19338                 if (is_narrower_load) {
19339                         u8 size_code;
19340 
19341                         if (type == BPF_WRITE) {
19342                                 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
19343                                 return -EINVAL;
19344                         }
19345 
19346                         size_code = BPF_H;
19347                         if (ctx_field_size == 4)
19348                                 size_code = BPF_W;
19349                         else if (ctx_field_size == 8)
19350                                 size_code = BPF_DW;
19351 
19352                         insn->off = off & ~(size_default - 1);
19353                         insn->code = BPF_LDX | BPF_MEM | size_code;
19354                 }
19355 
19356                 target_size = 0;
19357                 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
19358                                          &target_size);
19359                 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
19360                     (ctx_field_size && !target_size)) {
19361                         verbose(env, "bpf verifier is misconfigured\n");
19362                         return -EINVAL;
19363                 }
19364 
19365                 if (is_narrower_load && size < target_size) {
19366                         u8 shift = bpf_ctx_narrow_access_offset(
19367                                 off, size, size_default) * 8;
19368                         if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
19369                                 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
19370                                 return -EINVAL;
19371                         }
19372                         if (ctx_field_size <= 4) {
19373                                 if (shift)
19374                                         insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
19375                                                                         insn->dst_reg,
19376                                                                         shift);
19377                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
19378                                                                 (1 << size * 8) - 1);
19379                         } else {
19380                                 if (shift)
19381                                         insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
19382                                                                         insn->dst_reg,
19383                                                                         shift);
19384                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
19385                                                                 (1ULL << size * 8) - 1);
19386                         }
19387                 }
19388                 if (mode == BPF_MEMSX)
19389                         insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
19390                                                        insn->dst_reg, insn->dst_reg,
19391                                                        size * 8, 0);
19392 
19393                 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
19394                 if (!new_prog)
19395                         return -ENOMEM;
19396 
19397                 delta += cnt - 1;
19398 
19399                 /* keep walking new program and skip insns we just inserted */
19400                 env->prog = new_prog;
19401                 insn      = new_prog->insnsi + i + delta;
19402         }
19403 
19404         return 0;
19405 }
19406 
19407 static int jit_subprogs(struct bpf_verifier_env *env)
19408 {
19409         struct bpf_prog *prog = env->prog, **func, *tmp;
19410         int i, j, subprog_start, subprog_end = 0, len, subprog;
19411         struct bpf_map *map_ptr;
19412         struct bpf_insn *insn;
19413         void *old_bpf_func;
19414         int err, num_exentries;
19415 
19416         if (env->subprog_cnt <= 1)
19417                 return 0;
19418 
19419         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
19420                 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
19421                         continue;
19422 
19423                 /* Upon error here we cannot fall back to interpreter but
19424                  * need a hard reject of the program. Thus -EFAULT is
19425                  * propagated in any case.
19426                  */
19427                 subprog = find_subprog(env, i + insn->imm + 1);
19428                 if (subprog < 0) {
19429                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
19430                                   i + insn->imm + 1);
19431                         return -EFAULT;
19432                 }
19433                 /* temporarily remember subprog id inside insn instead of
19434                  * aux_data, since next loop will split up all insns into funcs
19435                  */
19436                 insn->off = subprog;
19437                 /* remember original imm in case JIT fails and fallback
19438                  * to interpreter will be needed
19439                  */
19440                 env->insn_aux_data[i].call_imm = insn->imm;
19441                 /* point imm to __bpf_call_base+1 from JITs point of view */
19442                 insn->imm = 1;
19443                 if (bpf_pseudo_func(insn)) {
19444 #if defined(MODULES_VADDR)
19445                         u64 addr = MODULES_VADDR;
19446 #else
19447                         u64 addr = VMALLOC_START;
19448 #endif
19449                         /* jit (e.g. x86_64) may emit fewer instructions
19450                          * if it learns a u32 imm is the same as a u64 imm.
19451                          * Set close enough to possible prog address.
19452                          */
19453                         insn[0].imm = (u32)addr;
19454                         insn[1].imm = addr >> 32;
19455                 }
19456         }
19457 
19458         err = bpf_prog_alloc_jited_linfo(prog);
19459         if (err)
19460                 goto out_undo_insn;
19461 
19462         err = -ENOMEM;
19463         func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
19464         if (!func)
19465                 goto out_undo_insn;
19466 
19467         for (i = 0; i < env->subprog_cnt; i++) {
19468                 subprog_start = subprog_end;
19469                 subprog_end = env->subprog_info[i + 1].start;
19470 
19471                 len = subprog_end - subprog_start;
19472                 /* bpf_prog_run() doesn't call subprogs directly,
19473                  * hence main prog stats include the runtime of subprogs.
19474                  * subprogs don't have IDs and not reachable via prog_get_next_id
19475                  * func[i]->stats will never be accessed and stays NULL
19476                  */
19477                 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
19478                 if (!func[i])
19479                         goto out_free;
19480                 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
19481                        len * sizeof(struct bpf_insn));
19482                 func[i]->type = prog->type;
19483                 func[i]->len = len;
19484                 if (bpf_prog_calc_tag(func[i]))
19485                         goto out_free;
19486                 func[i]->is_func = 1;
19487                 func[i]->sleepable = prog->sleepable;
19488                 func[i]->aux->func_idx = i;
19489                 /* Below members will be freed only at prog->aux */
19490                 func[i]->aux->btf = prog->aux->btf;
19491                 func[i]->aux->func_info = prog->aux->func_info;
19492                 func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
19493                 func[i]->aux->poke_tab = prog->aux->poke_tab;
19494                 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
19495 
19496                 for (j = 0; j < prog->aux->size_poke_tab; j++) {
19497                         struct bpf_jit_poke_descriptor *poke;
19498 
19499                         poke = &prog->aux->poke_tab[j];
19500                         if (poke->insn_idx < subprog_end &&
19501                             poke->insn_idx >= subprog_start)
19502                                 poke->aux = func[i]->aux;
19503                 }
19504 
19505                 func[i]->aux->name[0] = 'F';
19506                 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
19507                 func[i]->jit_requested = 1;
19508                 func[i]->blinding_requested = prog->blinding_requested;
19509                 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
19510                 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
19511                 func[i]->aux->linfo = prog->aux->linfo;
19512                 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
19513                 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
19514                 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
19515                 func[i]->aux->arena = prog->aux->arena;
19516                 num_exentries = 0;
19517                 insn = func[i]->insnsi;
19518                 for (j = 0; j < func[i]->len; j++, insn++) {
19519                         if (BPF_CLASS(insn->code) == BPF_LDX &&
19520                             (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
19521                              BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
19522                              BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
19523                                 num_exentries++;
19524                         if ((BPF_CLASS(insn->code) == BPF_STX ||
19525                              BPF_CLASS(insn->code) == BPF_ST) &&
19526                              BPF_MODE(insn->code) == BPF_PROBE_MEM32)
19527                                 num_exentries++;
19528                         if (BPF_CLASS(insn->code) == BPF_STX &&
19529                              BPF_MODE(insn->code) == BPF_PROBE_ATOMIC)
19530                                 num_exentries++;
19531                 }
19532                 func[i]->aux->num_exentries = num_exentries;
19533                 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
19534                 func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
19535                 if (!i)
19536                         func[i]->aux->exception_boundary = env->seen_exception;
19537                 func[i] = bpf_int_jit_compile(func[i]);
19538                 if (!func[i]->jited) {
19539                         err = -ENOTSUPP;
19540                         goto out_free;
19541                 }
19542                 cond_resched();
19543         }
19544 
19545         /* at this point all bpf functions were successfully JITed
19546          * now populate all bpf_calls with correct addresses and
19547          * run last pass of JIT
19548          */
19549         for (i = 0; i < env->subprog_cnt; i++) {
19550                 insn = func[i]->insnsi;
19551                 for (j = 0; j < func[i]->len; j++, insn++) {
19552                         if (bpf_pseudo_func(insn)) {
19553                                 subprog = insn->off;
19554                                 insn[0].imm = (u32)(long)func[subprog]->bpf_func;
19555                                 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
19556                                 continue;
19557                         }
19558                         if (!bpf_pseudo_call(insn))
19559                                 continue;
19560                         subprog = insn->off;
19561                         insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
19562                 }
19563 
19564                 /* we use the aux data to keep a list of the start addresses
19565                  * of the JITed images for each function in the program
19566                  *
19567                  * for some architectures, such as powerpc64, the imm field
19568                  * might not be large enough to hold the offset of the start
19569                  * address of the callee's JITed image from __bpf_call_base
19570                  *
19571                  * in such cases, we can lookup the start address of a callee
19572                  * by using its subprog id, available from the off field of
19573                  * the call instruction, as an index for this list
19574                  */
19575                 func[i]->aux->func = func;
19576                 func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
19577                 func[i]->aux->real_func_cnt = env->subprog_cnt;
19578         }
19579         for (i = 0; i < env->subprog_cnt; i++) {
19580                 old_bpf_func = func[i]->bpf_func;
19581                 tmp = bpf_int_jit_compile(func[i]);
19582                 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
19583                         verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
19584                         err = -ENOTSUPP;
19585                         goto out_free;
19586                 }
19587                 cond_resched();
19588         }
19589 
19590         /* finally lock prog and jit images for all functions and
19591          * populate kallsysm. Begin at the first subprogram, since
19592          * bpf_prog_load will add the kallsyms for the main program.
19593          */
19594         for (i = 1; i < env->subprog_cnt; i++) {
19595                 err = bpf_prog_lock_ro(func[i]);
19596                 if (err)
19597                         goto out_free;
19598         }
19599 
19600         for (i = 1; i < env->subprog_cnt; i++)
19601                 bpf_prog_kallsyms_add(func[i]);
19602 
19603         /* Last step: make now unused interpreter insns from main
19604          * prog consistent for later dump requests, so they can
19605          * later look the same as if they were interpreted only.
19606          */
19607         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
19608                 if (bpf_pseudo_func(insn)) {
19609                         insn[0].imm = env->insn_aux_data[i].call_imm;
19610                         insn[1].imm = insn->off;
19611                         insn->off = 0;
19612                         continue;
19613                 }
19614                 if (!bpf_pseudo_call(insn))
19615                         continue;
19616                 insn->off = env->insn_aux_data[i].call_imm;
19617                 subprog = find_subprog(env, i + insn->off + 1);
19618                 insn->imm = subprog;
19619         }
19620 
19621         prog->jited = 1;
19622         prog->bpf_func = func[0]->bpf_func;
19623         prog->jited_len = func[0]->jited_len;
19624         prog->aux->extable = func[0]->aux->extable;
19625         prog->aux->num_exentries = func[0]->aux->num_exentries;
19626         prog->aux->func = func;
19627         prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
19628         prog->aux->real_func_cnt = env->subprog_cnt;
19629         prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
19630         prog->aux->exception_boundary = func[0]->aux->exception_boundary;
19631         bpf_prog_jit_attempt_done(prog);
19632         return 0;
19633 out_free:
19634         /* We failed JIT'ing, so at this point we need to unregister poke
19635          * descriptors from subprogs, so that kernel is not attempting to
19636          * patch it anymore as we're freeing the subprog JIT memory.
19637          */
19638         for (i = 0; i < prog->aux->size_poke_tab; i++) {
19639                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
19640                 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
19641         }
19642         /* At this point we're guaranteed that poke descriptors are not
19643          * live anymore. We can just unlink its descriptor table as it's
19644          * released with the main prog.
19645          */
19646         for (i = 0; i < env->subprog_cnt; i++) {
19647                 if (!func[i])
19648                         continue;
19649                 func[i]->aux->poke_tab = NULL;
19650                 bpf_jit_free(func[i]);
19651         }
19652         kfree(func);
19653 out_undo_insn:
19654         /* cleanup main prog to be interpreted */
19655         prog->jit_requested = 0;
19656         prog->blinding_requested = 0;
19657         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
19658                 if (!bpf_pseudo_call(insn))
19659                         continue;
19660                 insn->off = 0;
19661                 insn->imm = env->insn_aux_data[i].call_imm;
19662         }
19663         bpf_prog_jit_attempt_done(prog);
19664         return err;
19665 }
19666 
19667 static int fixup_call_args(struct bpf_verifier_env *env)
19668 {
19669 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
19670         struct bpf_prog *prog = env->prog;
19671         struct bpf_insn *insn = prog->insnsi;
19672         bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
19673         int i, depth;
19674 #endif
19675         int err = 0;
19676 
19677         if (env->prog->jit_requested &&
19678             !bpf_prog_is_offloaded(env->prog->aux)) {
19679                 err = jit_subprogs(env);
19680                 if (err == 0)
19681                         return 0;
19682                 if (err == -EFAULT)
19683                         return err;
19684         }
19685 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
19686         if (has_kfunc_call) {
19687                 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
19688                 return -EINVAL;
19689         }
19690         if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
19691                 /* When JIT fails the progs with bpf2bpf calls and tail_calls
19692                  * have to be rejected, since interpreter doesn't support them yet.
19693                  */
19694                 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
19695                 return -EINVAL;
19696         }
19697         for (i = 0; i < prog->len; i++, insn++) {
19698                 if (bpf_pseudo_func(insn)) {
19699                         /* When JIT fails the progs with callback calls
19700                          * have to be rejected, since interpreter doesn't support them yet.
19701                          */
19702                         verbose(env, "callbacks are not allowed in non-JITed programs\n");
19703                         return -EINVAL;
19704                 }
19705 
19706                 if (!bpf_pseudo_call(insn))
19707                         continue;
19708                 depth = get_callee_stack_depth(env, insn, i);
19709                 if (depth < 0)
19710                         return depth;
19711                 bpf_patch_call_args(insn, depth);
19712         }
19713         err = 0;
19714 #endif
19715         return err;
19716 }
19717 
19718 /* replace a generic kfunc with a specialized version if necessary */
19719 static void specialize_kfunc(struct bpf_verifier_env *env,
19720                              u32 func_id, u16 offset, unsigned long *addr)
19721 {
19722         struct bpf_prog *prog = env->prog;
19723         bool seen_direct_write;
19724         void *xdp_kfunc;
19725         bool is_rdonly;
19726 
19727         if (bpf_dev_bound_kfunc_id(func_id)) {
19728                 xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
19729                 if (xdp_kfunc) {
19730                         *addr = (unsigned long)xdp_kfunc;
19731                         return;
19732                 }
19733                 /* fallback to default kfunc when not supported by netdev */
19734         }
19735 
19736         if (offset)
19737                 return;
19738 
19739         if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
19740                 seen_direct_write = env->seen_direct_write;
19741                 is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
19742 
19743                 if (is_rdonly)
19744                         *addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
19745 
19746                 /* restore env->seen_direct_write to its original value, since
19747                  * may_access_direct_pkt_data mutates it
19748                  */
19749                 env->seen_direct_write = seen_direct_write;
19750         }
19751 }
19752 
19753 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
19754                                             u16 struct_meta_reg,
19755                                             u16 node_offset_reg,
19756                                             struct bpf_insn *insn,
19757                                             struct bpf_insn *insn_buf,
19758                                             int *cnt)
19759 {
19760         struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
19761         struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
19762 
19763         insn_buf[0] = addr[0];
19764         insn_buf[1] = addr[1];
19765         insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
19766         insn_buf[3] = *insn;
19767         *cnt = 4;
19768 }
19769 
19770 static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
19771                             struct bpf_insn *insn_buf, int insn_idx, int *cnt)
19772 {
19773         const struct bpf_kfunc_desc *desc;
19774 
19775         if (!insn->imm) {
19776                 verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
19777                 return -EINVAL;
19778         }
19779 
19780         *cnt = 0;
19781 
19782         /* insn->imm has the btf func_id. Replace it with an offset relative to
19783          * __bpf_call_base, unless the JIT needs to call functions that are
19784          * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
19785          */
19786         desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
19787         if (!desc) {
19788                 verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
19789                         insn->imm);
19790                 return -EFAULT;
19791         }
19792 
19793         if (!bpf_jit_supports_far_kfunc_call())
19794                 insn->imm = BPF_CALL_IMM(desc->addr);
19795         if (insn->off)
19796                 return 0;
19797         if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
19798             desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
19799                 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19800                 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19801                 u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
19802 
19803                 if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
19804                         verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
19805                                 insn_idx);
19806                         return -EFAULT;
19807                 }
19808 
19809                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
19810                 insn_buf[1] = addr[0];
19811                 insn_buf[2] = addr[1];
19812                 insn_buf[3] = *insn;
19813                 *cnt = 4;
19814         } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
19815                    desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] ||
19816                    desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
19817                 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19818                 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19819 
19820                 if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
19821                         verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
19822                                 insn_idx);
19823                         return -EFAULT;
19824                 }
19825 
19826                 if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
19827                     !kptr_struct_meta) {
19828                         verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
19829                                 insn_idx);
19830                         return -EFAULT;
19831                 }
19832 
19833                 insn_buf[0] = addr[0];
19834                 insn_buf[1] = addr[1];
19835                 insn_buf[2] = *insn;
19836                 *cnt = 3;
19837         } else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
19838                    desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
19839                    desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
19840                 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19841                 int struct_meta_reg = BPF_REG_3;
19842                 int node_offset_reg = BPF_REG_4;
19843 
19844                 /* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
19845                 if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
19846                         struct_meta_reg = BPF_REG_4;
19847                         node_offset_reg = BPF_REG_5;
19848                 }
19849 
19850                 if (!kptr_struct_meta) {
19851                         verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
19852                                 insn_idx);
19853                         return -EFAULT;
19854                 }
19855 
19856                 __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
19857                                                 node_offset_reg, insn, insn_buf, cnt);
19858         } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
19859                    desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
19860                 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
19861                 *cnt = 1;
19862         } else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) {
19863                 struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_4, (long)env->prog->aux) };
19864 
19865                 insn_buf[0] = ld_addrs[0];
19866                 insn_buf[1] = ld_addrs[1];
19867                 insn_buf[2] = *insn;
19868                 *cnt = 3;
19869         }
19870         return 0;
19871 }
19872 
19873 /* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
19874 static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
19875 {
19876         struct bpf_subprog_info *info = env->subprog_info;
19877         int cnt = env->subprog_cnt;
19878         struct bpf_prog *prog;
19879 
19880         /* We only reserve one slot for hidden subprogs in subprog_info. */
19881         if (env->hidden_subprog_cnt) {
19882                 verbose(env, "verifier internal error: only one hidden subprog supported\n");
19883                 return -EFAULT;
19884         }
19885         /* We're not patching any existing instruction, just appending the new
19886          * ones for the hidden subprog. Hence all of the adjustment operations
19887          * in bpf_patch_insn_data are no-ops.
19888          */
19889         prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
19890         if (!prog)
19891                 return -ENOMEM;
19892         env->prog = prog;
19893         info[cnt + 1].start = info[cnt].start;
19894         info[cnt].start = prog->len - len + 1;
19895         env->subprog_cnt++;
19896         env->hidden_subprog_cnt++;
19897         return 0;
19898 }
19899 
19900 /* Do various post-verification rewrites in a single program pass.
19901  * These rewrites simplify JIT and interpreter implementations.
19902  */
19903 static int do_misc_fixups(struct bpf_verifier_env *env)
19904 {
19905         struct bpf_prog *prog = env->prog;
19906         enum bpf_attach_type eatype = prog->expected_attach_type;
19907         enum bpf_prog_type prog_type = resolve_prog_type(prog);
19908         struct bpf_insn *insn = prog->insnsi;
19909         const struct bpf_func_proto *fn;
19910         const int insn_cnt = prog->len;
19911         const struct bpf_map_ops *ops;
19912         struct bpf_insn_aux_data *aux;
19913         struct bpf_insn insn_buf[16];
19914         struct bpf_prog *new_prog;
19915         struct bpf_map *map_ptr;
19916         int i, ret, cnt, delta = 0, cur_subprog = 0;
19917         struct bpf_subprog_info *subprogs = env->subprog_info;
19918         u16 stack_depth = subprogs[cur_subprog].stack_depth;
19919         u16 stack_depth_extra = 0;
19920 
19921         if (env->seen_exception && !env->exception_callback_subprog) {
19922                 struct bpf_insn patch[] = {
19923                         env->prog->insnsi[insn_cnt - 1],
19924                         BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
19925                         BPF_EXIT_INSN(),
19926                 };
19927 
19928                 ret = add_hidden_subprog(env, patch, ARRAY_SIZE(patch));
19929                 if (ret < 0)
19930                         return ret;
19931                 prog = env->prog;
19932                 insn = prog->insnsi;
19933 
19934                 env->exception_callback_subprog = env->subprog_cnt - 1;
19935                 /* Don't update insn_cnt, as add_hidden_subprog always appends insns */
19936                 mark_subprog_exc_cb(env, env->exception_callback_subprog);
19937         }
19938 
19939         for (i = 0; i < insn_cnt;) {
19940                 if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
19941                         if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
19942                             (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
19943                                 /* convert to 32-bit mov that clears upper 32-bit */
19944                                 insn->code = BPF_ALU | BPF_MOV | BPF_X;
19945                                 /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
19946                                 insn->off = 0;
19947                                 insn->imm = 0;
19948                         } /* cast from as(0) to as(1) should be handled by JIT */
19949                         goto next_insn;
19950                 }
19951 
19952                 if (env->insn_aux_data[i + delta].needs_zext)
19953                         /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
19954                         insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
19955 
19956                 /* Make sdiv/smod divide-by-minus-one exceptions impossible. */
19957                 if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
19958                      insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
19959                      insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
19960                      insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
19961                     insn->off == 1 && insn->imm == -1) {
19962                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
19963                         bool isdiv = BPF_OP(insn->code) == BPF_DIV;
19964                         struct bpf_insn *patchlet;
19965                         struct bpf_insn chk_and_sdiv[] = {
19966                                 BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
19967                                              BPF_NEG | BPF_K, insn->dst_reg,
19968                                              0, 0, 0),
19969                         };
19970                         struct bpf_insn chk_and_smod[] = {
19971                                 BPF_MOV32_IMM(insn->dst_reg, 0),
19972                         };
19973 
19974                         patchlet = isdiv ? chk_and_sdiv : chk_and_smod;
19975                         cnt = isdiv ? ARRAY_SIZE(chk_and_sdiv) : ARRAY_SIZE(chk_and_smod);
19976 
19977                         new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
19978                         if (!new_prog)
19979                                 return -ENOMEM;
19980 
19981                         delta    += cnt - 1;
19982                         env->prog = prog = new_prog;
19983                         insn      = new_prog->insnsi + i + delta;
19984                         goto next_insn;
19985                 }
19986 
19987                 /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
19988                 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
19989                     insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
19990                     insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
19991                     insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
19992                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
19993                         bool isdiv = BPF_OP(insn->code) == BPF_DIV;
19994                         bool is_sdiv = isdiv && insn->off == 1;
19995                         bool is_smod = !isdiv && insn->off == 1;
19996                         struct bpf_insn *patchlet;
19997                         struct bpf_insn chk_and_div[] = {
19998                                 /* [R,W]x div 0 -> 0 */
19999                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
20000                                              BPF_JNE | BPF_K, insn->src_reg,
20001                                              0, 2, 0),
20002                                 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
20003                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
20004                                 *insn,
20005                         };
20006                         struct bpf_insn chk_and_mod[] = {
20007                                 /* [R,W]x mod 0 -> [R,W]x */
20008                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
20009                                              BPF_JEQ | BPF_K, insn->src_reg,
20010                                              0, 1 + (is64 ? 0 : 1), 0),
20011                                 *insn,
20012                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
20013                                 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
20014                         };
20015                         struct bpf_insn chk_and_sdiv[] = {
20016                                 /* [R,W]x sdiv 0 -> 0
20017                                  * LLONG_MIN sdiv -1 -> LLONG_MIN
20018                                  * INT_MIN sdiv -1 -> INT_MIN
20019                                  */
20020                                 BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
20021                                 BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
20022                                              BPF_ADD | BPF_K, BPF_REG_AX,
20023                                              0, 0, 1),
20024                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
20025                                              BPF_JGT | BPF_K, BPF_REG_AX,
20026                                              0, 4, 1),
20027                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
20028                                              BPF_JEQ | BPF_K, BPF_REG_AX,
20029                                              0, 1, 0),
20030                                 BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
20031                                              BPF_MOV | BPF_K, insn->dst_reg,
20032                                              0, 0, 0),
20033                                 /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
20034                                 BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
20035                                              BPF_NEG | BPF_K, insn->dst_reg,
20036                                              0, 0, 0),
20037                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
20038                                 *insn,
20039                         };
20040                         struct bpf_insn chk_and_smod[] = {
20041                                 /* [R,W]x mod 0 -> [R,W]x */
20042                                 /* [R,W]x mod -1 -> 0 */
20043                                 BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
20044                                 BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
20045                                              BPF_ADD | BPF_K, BPF_REG_AX,
20046                                              0, 0, 1),
20047                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
20048                                              BPF_JGT | BPF_K, BPF_REG_AX,
20049                                              0, 3, 1),
20050                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
20051                                              BPF_JEQ | BPF_K, BPF_REG_AX,
20052                                              0, 3 + (is64 ? 0 : 1), 1),
20053                                 BPF_MOV32_IMM(insn->dst_reg, 0),
20054                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
20055                                 *insn,
20056                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
20057                                 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
20058                         };
20059 
20060                         if (is_sdiv) {
20061                                 patchlet = chk_and_sdiv;
20062                                 cnt = ARRAY_SIZE(chk_and_sdiv);
20063                         } else if (is_smod) {
20064                                 patchlet = chk_and_smod;
20065                                 cnt = ARRAY_SIZE(chk_and_smod) - (is64 ? 2 : 0);
20066                         } else {
20067                                 patchlet = isdiv ? chk_and_div : chk_and_mod;
20068                                 cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
20069                                               ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
20070                         }
20071 
20072                         new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
20073                         if (!new_prog)
20074                                 return -ENOMEM;
20075 
20076                         delta    += cnt - 1;
20077                         env->prog = prog = new_prog;
20078                         insn      = new_prog->insnsi + i + delta;
20079                         goto next_insn;
20080                 }
20081 
20082                 /* Make it impossible to de-reference a userspace address */
20083                 if (BPF_CLASS(insn->code) == BPF_LDX &&
20084                     (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
20085                      BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
20086                         struct bpf_insn *patch = &insn_buf[0];
20087                         u64 uaddress_limit = bpf_arch_uaddress_limit();
20088 
20089                         if (!uaddress_limit)
20090                                 goto next_insn;
20091 
20092                         *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
20093                         if (insn->off)
20094                                 *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
20095                         *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
20096                         *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
20097                         *patch++ = *insn;
20098                         *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
20099                         *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
20100 
20101                         cnt = patch - insn_buf;
20102                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20103                         if (!new_prog)
20104                                 return -ENOMEM;
20105 
20106                         delta    += cnt - 1;
20107                         env->prog = prog = new_prog;
20108                         insn      = new_prog->insnsi + i + delta;
20109                         goto next_insn;
20110                 }
20111 
20112                 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
20113                 if (BPF_CLASS(insn->code) == BPF_LD &&
20114                     (BPF_MODE(insn->code) == BPF_ABS ||
20115                      BPF_MODE(insn->code) == BPF_IND)) {
20116                         cnt = env->ops->gen_ld_abs(insn, insn_buf);
20117                         if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
20118                                 verbose(env, "bpf verifier is misconfigured\n");
20119                                 return -EINVAL;
20120                         }
20121 
20122                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20123                         if (!new_prog)
20124                                 return -ENOMEM;
20125 
20126                         delta    += cnt - 1;
20127                         env->prog = prog = new_prog;
20128                         insn      = new_prog->insnsi + i + delta;
20129                         goto next_insn;
20130                 }
20131 
20132                 /* Rewrite pointer arithmetic to mitigate speculation attacks. */
20133                 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
20134                     insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
20135                         const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
20136                         const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
20137                         struct bpf_insn *patch = &insn_buf[0];
20138                         bool issrc, isneg, isimm;
20139                         u32 off_reg;
20140 
20141                         aux = &env->insn_aux_data[i + delta];
20142                         if (!aux->alu_state ||
20143                             aux->alu_state == BPF_ALU_NON_POINTER)
20144                                 goto next_insn;
20145 
20146                         isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
20147                         issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
20148                                 BPF_ALU_SANITIZE_SRC;
20149                         isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
20150 
20151                         off_reg = issrc ? insn->src_reg : insn->dst_reg;
20152                         if (isimm) {
20153                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
20154                         } else {
20155                                 if (isneg)
20156                                         *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
20157                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
20158                                 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
20159                                 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
20160                                 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
20161                                 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
20162                                 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
20163                         }
20164                         if (!issrc)
20165                                 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
20166                         insn->src_reg = BPF_REG_AX;
20167                         if (isneg)
20168                                 insn->code = insn->code == code_add ?
20169                                              code_sub : code_add;
20170                         *patch++ = *insn;
20171                         if (issrc && isneg && !isimm)
20172                                 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
20173                         cnt = patch - insn_buf;
20174 
20175                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20176                         if (!new_prog)
20177                                 return -ENOMEM;
20178 
20179                         delta    += cnt - 1;
20180                         env->prog = prog = new_prog;
20181                         insn      = new_prog->insnsi + i + delta;
20182                         goto next_insn;
20183                 }
20184 
20185                 if (is_may_goto_insn(insn)) {
20186                         int stack_off = -stack_depth - 8;
20187 
20188                         stack_depth_extra = 8;
20189                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
20190                         if (insn->off >= 0)
20191                                 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
20192                         else
20193                                 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
20194                         insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
20195                         insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
20196                         cnt = 4;
20197 
20198                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20199                         if (!new_prog)
20200                                 return -ENOMEM;
20201 
20202                         delta += cnt - 1;
20203                         env->prog = prog = new_prog;
20204                         insn = new_prog->insnsi + i + delta;
20205                         goto next_insn;
20206                 }
20207 
20208                 if (insn->code != (BPF_JMP | BPF_CALL))
20209                         goto next_insn;
20210                 if (insn->src_reg == BPF_PSEUDO_CALL)
20211                         goto next_insn;
20212                 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
20213                         ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
20214                         if (ret)
20215                                 return ret;
20216                         if (cnt == 0)
20217                                 goto next_insn;
20218 
20219                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20220                         if (!new_prog)
20221                                 return -ENOMEM;
20222 
20223                         delta    += cnt - 1;
20224                         env->prog = prog = new_prog;
20225                         insn      = new_prog->insnsi + i + delta;
20226                         goto next_insn;
20227                 }
20228 
20229                 /* Skip inlining the helper call if the JIT does it. */
20230                 if (bpf_jit_inlines_helper_call(insn->imm))
20231                         goto next_insn;
20232 
20233                 if (insn->imm == BPF_FUNC_get_route_realm)
20234                         prog->dst_needed = 1;
20235                 if (insn->imm == BPF_FUNC_get_prandom_u32)
20236                         bpf_user_rnd_init_once();
20237                 if (insn->imm == BPF_FUNC_override_return)
20238                         prog->kprobe_override = 1;
20239                 if (insn->imm == BPF_FUNC_tail_call) {
20240                         /* If we tail call into other programs, we
20241                          * cannot make any assumptions since they can
20242                          * be replaced dynamically during runtime in
20243                          * the program array.
20244                          */
20245                         prog->cb_access = 1;
20246                         if (!allow_tail_call_in_subprogs(env))
20247                                 prog->aux->stack_depth = MAX_BPF_STACK;
20248                         prog->aux->max_pkt_offset = MAX_PACKET_OFF;
20249 
20250                         /* mark bpf_tail_call as different opcode to avoid
20251                          * conditional branch in the interpreter for every normal
20252                          * call and to prevent accidental JITing by JIT compiler
20253                          * that doesn't support bpf_tail_call yet
20254                          */
20255                         insn->imm = 0;
20256                         insn->code = BPF_JMP | BPF_TAIL_CALL;
20257 
20258                         aux = &env->insn_aux_data[i + delta];
20259                         if (env->bpf_capable && !prog->blinding_requested &&
20260                             prog->jit_requested &&
20261                             !bpf_map_key_poisoned(aux) &&
20262                             !bpf_map_ptr_poisoned(aux) &&
20263                             !bpf_map_ptr_unpriv(aux)) {
20264                                 struct bpf_jit_poke_descriptor desc = {
20265                                         .reason = BPF_POKE_REASON_TAIL_CALL,
20266                                         .tail_call.map = aux->map_ptr_state.map_ptr,
20267                                         .tail_call.key = bpf_map_key_immediate(aux),
20268                                         .insn_idx = i + delta,
20269                                 };
20270 
20271                                 ret = bpf_jit_add_poke_descriptor(prog, &desc);
20272                                 if (ret < 0) {
20273                                         verbose(env, "adding tail call poke descriptor failed\n");
20274                                         return ret;
20275                                 }
20276 
20277                                 insn->imm = ret + 1;
20278                                 goto next_insn;
20279                         }
20280 
20281                         if (!bpf_map_ptr_unpriv(aux))
20282                                 goto next_insn;
20283 
20284                         /* instead of changing every JIT dealing with tail_call
20285                          * emit two extra insns:
20286                          * if (index >= max_entries) goto out;
20287                          * index &= array->index_mask;
20288                          * to avoid out-of-bounds cpu speculation
20289                          */
20290                         if (bpf_map_ptr_poisoned(aux)) {
20291                                 verbose(env, "tail_call abusing map_ptr\n");
20292                                 return -EINVAL;
20293                         }
20294 
20295                         map_ptr = aux->map_ptr_state.map_ptr;
20296                         insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
20297                                                   map_ptr->max_entries, 2);
20298                         insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
20299                                                     container_of(map_ptr,
20300                                                                  struct bpf_array,
20301                                                                  map)->index_mask);
20302                         insn_buf[2] = *insn;
20303                         cnt = 3;
20304                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20305                         if (!new_prog)
20306                                 return -ENOMEM;
20307 
20308                         delta    += cnt - 1;
20309                         env->prog = prog = new_prog;
20310                         insn      = new_prog->insnsi + i + delta;
20311                         goto next_insn;
20312                 }
20313 
20314                 if (insn->imm == BPF_FUNC_timer_set_callback) {
20315                         /* The verifier will process callback_fn as many times as necessary
20316                          * with different maps and the register states prepared by
20317                          * set_timer_callback_state will be accurate.
20318                          *
20319                          * The following use case is valid:
20320                          *   map1 is shared by prog1, prog2, prog3.
20321                          *   prog1 calls bpf_timer_init for some map1 elements
20322                          *   prog2 calls bpf_timer_set_callback for some map1 elements.
20323                          *     Those that were not bpf_timer_init-ed will return -EINVAL.
20324                          *   prog3 calls bpf_timer_start for some map1 elements.
20325                          *     Those that were not both bpf_timer_init-ed and
20326                          *     bpf_timer_set_callback-ed will return -EINVAL.
20327                          */
20328                         struct bpf_insn ld_addrs[2] = {
20329                                 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
20330                         };
20331 
20332                         insn_buf[0] = ld_addrs[0];
20333                         insn_buf[1] = ld_addrs[1];
20334                         insn_buf[2] = *insn;
20335                         cnt = 3;
20336 
20337                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20338                         if (!new_prog)
20339                                 return -ENOMEM;
20340 
20341                         delta    += cnt - 1;
20342                         env->prog = prog = new_prog;
20343                         insn      = new_prog->insnsi + i + delta;
20344                         goto patch_call_imm;
20345                 }
20346 
20347                 if (is_storage_get_function(insn->imm)) {
20348                         if (!in_sleepable(env) ||
20349                             env->insn_aux_data[i + delta].storage_get_func_atomic)
20350                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
20351                         else
20352                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
20353                         insn_buf[1] = *insn;
20354                         cnt = 2;
20355 
20356                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20357                         if (!new_prog)
20358                                 return -ENOMEM;
20359 
20360                         delta += cnt - 1;
20361                         env->prog = prog = new_prog;
20362                         insn = new_prog->insnsi + i + delta;
20363                         goto patch_call_imm;
20364                 }
20365 
20366                 /* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
20367                 if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
20368                         /* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
20369                          * bpf_mem_alloc() returns a ptr to the percpu data ptr.
20370                          */
20371                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
20372                         insn_buf[1] = *insn;
20373                         cnt = 2;
20374 
20375                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20376                         if (!new_prog)
20377                                 return -ENOMEM;
20378 
20379                         delta += cnt - 1;
20380                         env->prog = prog = new_prog;
20381                         insn = new_prog->insnsi + i + delta;
20382                         goto patch_call_imm;
20383                 }
20384 
20385                 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
20386                  * and other inlining handlers are currently limited to 64 bit
20387                  * only.
20388                  */
20389                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
20390                     (insn->imm == BPF_FUNC_map_lookup_elem ||
20391                      insn->imm == BPF_FUNC_map_update_elem ||
20392                      insn->imm == BPF_FUNC_map_delete_elem ||
20393                      insn->imm == BPF_FUNC_map_push_elem   ||
20394                      insn->imm == BPF_FUNC_map_pop_elem    ||
20395                      insn->imm == BPF_FUNC_map_peek_elem   ||
20396                      insn->imm == BPF_FUNC_redirect_map    ||
20397                      insn->imm == BPF_FUNC_for_each_map_elem ||
20398                      insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
20399                         aux = &env->insn_aux_data[i + delta];
20400                         if (bpf_map_ptr_poisoned(aux))
20401                                 goto patch_call_imm;
20402 
20403                         map_ptr = aux->map_ptr_state.map_ptr;
20404                         ops = map_ptr->ops;
20405                         if (insn->imm == BPF_FUNC_map_lookup_elem &&
20406                             ops->map_gen_lookup) {
20407                                 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
20408                                 if (cnt == -EOPNOTSUPP)
20409                                         goto patch_map_ops_generic;
20410                                 if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
20411                                         verbose(env, "bpf verifier is misconfigured\n");
20412                                         return -EINVAL;
20413                                 }
20414 
20415                                 new_prog = bpf_patch_insn_data(env, i + delta,
20416                                                                insn_buf, cnt);
20417                                 if (!new_prog)
20418                                         return -ENOMEM;
20419 
20420                                 delta    += cnt - 1;
20421                                 env->prog = prog = new_prog;
20422                                 insn      = new_prog->insnsi + i + delta;
20423                                 goto next_insn;
20424                         }
20425 
20426                         BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
20427                                      (void *(*)(struct bpf_map *map, void *key))NULL));
20428                         BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
20429                                      (long (*)(struct bpf_map *map, void *key))NULL));
20430                         BUILD_BUG_ON(!__same_type(ops->map_update_elem,
20431                                      (long (*)(struct bpf_map *map, void *key, void *value,
20432                                               u64 flags))NULL));
20433                         BUILD_BUG_ON(!__same_type(ops->map_push_elem,
20434                                      (long (*)(struct bpf_map *map, void *value,
20435                                               u64 flags))NULL));
20436                         BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
20437                                      (long (*)(struct bpf_map *map, void *value))NULL));
20438                         BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
20439                                      (long (*)(struct bpf_map *map, void *value))NULL));
20440                         BUILD_BUG_ON(!__same_type(ops->map_redirect,
20441                                      (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
20442                         BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
20443                                      (long (*)(struct bpf_map *map,
20444                                               bpf_callback_t callback_fn,
20445                                               void *callback_ctx,
20446                                               u64 flags))NULL));
20447                         BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
20448                                      (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
20449 
20450 patch_map_ops_generic:
20451                         switch (insn->imm) {
20452                         case BPF_FUNC_map_lookup_elem:
20453                                 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
20454                                 goto next_insn;
20455                         case BPF_FUNC_map_update_elem:
20456                                 insn->imm = BPF_CALL_IMM(ops->map_update_elem);
20457                                 goto next_insn;
20458                         case BPF_FUNC_map_delete_elem:
20459                                 insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
20460                                 goto next_insn;
20461                         case BPF_FUNC_map_push_elem:
20462                                 insn->imm = BPF_CALL_IMM(ops->map_push_elem);
20463                                 goto next_insn;
20464                         case BPF_FUNC_map_pop_elem:
20465                                 insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
20466                                 goto next_insn;
20467                         case BPF_FUNC_map_peek_elem:
20468                                 insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
20469                                 goto next_insn;
20470                         case BPF_FUNC_redirect_map:
20471                                 insn->imm = BPF_CALL_IMM(ops->map_redirect);
20472                                 goto next_insn;
20473                         case BPF_FUNC_for_each_map_elem:
20474                                 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
20475                                 goto next_insn;
20476                         case BPF_FUNC_map_lookup_percpu_elem:
20477                                 insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
20478                                 goto next_insn;
20479                         }
20480 
20481                         goto patch_call_imm;
20482                 }
20483 
20484                 /* Implement bpf_jiffies64 inline. */
20485                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
20486                     insn->imm == BPF_FUNC_jiffies64) {
20487                         struct bpf_insn ld_jiffies_addr[2] = {
20488                                 BPF_LD_IMM64(BPF_REG_0,
20489                                              (unsigned long)&jiffies),
20490                         };
20491 
20492                         insn_buf[0] = ld_jiffies_addr[0];
20493                         insn_buf[1] = ld_jiffies_addr[1];
20494                         insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
20495                                                   BPF_REG_0, 0);
20496                         cnt = 3;
20497 
20498                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
20499                                                        cnt);
20500                         if (!new_prog)
20501                                 return -ENOMEM;
20502 
20503                         delta    += cnt - 1;
20504                         env->prog = prog = new_prog;
20505                         insn      = new_prog->insnsi + i + delta;
20506                         goto next_insn;
20507                 }
20508 
20509 #if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
20510                 /* Implement bpf_get_smp_processor_id() inline. */
20511                 if (insn->imm == BPF_FUNC_get_smp_processor_id &&
20512                     prog->jit_requested && bpf_jit_supports_percpu_insn()) {
20513                         /* BPF_FUNC_get_smp_processor_id inlining is an
20514                          * optimization, so if pcpu_hot.cpu_number is ever
20515                          * changed in some incompatible and hard to support
20516                          * way, it's fine to back out this inlining logic
20517                          */
20518                         insn_buf[0] = BPF_MOV32_IMM(BPF_REG_0, (u32)(unsigned long)&pcpu_hot.cpu_number);
20519                         insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
20520                         insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
20521                         cnt = 3;
20522 
20523                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20524                         if (!new_prog)
20525                                 return -ENOMEM;
20526 
20527                         delta    += cnt - 1;
20528                         env->prog = prog = new_prog;
20529                         insn      = new_prog->insnsi + i + delta;
20530                         goto next_insn;
20531                 }
20532 #endif
20533                 /* Implement bpf_get_func_arg inline. */
20534                 if (prog_type == BPF_PROG_TYPE_TRACING &&
20535                     insn->imm == BPF_FUNC_get_func_arg) {
20536                         /* Load nr_args from ctx - 8 */
20537                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
20538                         insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
20539                         insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
20540                         insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
20541                         insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
20542                         insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
20543                         insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
20544                         insn_buf[7] = BPF_JMP_A(1);
20545                         insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
20546                         cnt = 9;
20547 
20548                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20549                         if (!new_prog)
20550                                 return -ENOMEM;
20551 
20552                         delta    += cnt - 1;
20553                         env->prog = prog = new_prog;
20554                         insn      = new_prog->insnsi + i + delta;
20555                         goto next_insn;
20556                 }
20557 
20558                 /* Implement bpf_get_func_ret inline. */
20559                 if (prog_type == BPF_PROG_TYPE_TRACING &&
20560                     insn->imm == BPF_FUNC_get_func_ret) {
20561                         if (eatype == BPF_TRACE_FEXIT ||
20562                             eatype == BPF_MODIFY_RETURN) {
20563                                 /* Load nr_args from ctx - 8 */
20564                                 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
20565                                 insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
20566                                 insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
20567                                 insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
20568                                 insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
20569                                 insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
20570                                 cnt = 6;
20571                         } else {
20572                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
20573                                 cnt = 1;
20574                         }
20575 
20576                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20577                         if (!new_prog)
20578                                 return -ENOMEM;
20579 
20580                         delta    += cnt - 1;
20581                         env->prog = prog = new_prog;
20582                         insn      = new_prog->insnsi + i + delta;
20583                         goto next_insn;
20584                 }
20585 
20586                 /* Implement get_func_arg_cnt inline. */
20587                 if (prog_type == BPF_PROG_TYPE_TRACING &&
20588                     insn->imm == BPF_FUNC_get_func_arg_cnt) {
20589                         /* Load nr_args from ctx - 8 */
20590                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
20591 
20592                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
20593                         if (!new_prog)
20594                                 return -ENOMEM;
20595 
20596                         env->prog = prog = new_prog;
20597                         insn      = new_prog->insnsi + i + delta;
20598                         goto next_insn;
20599                 }
20600 
20601                 /* Implement bpf_get_func_ip inline. */
20602                 if (prog_type == BPF_PROG_TYPE_TRACING &&
20603                     insn->imm == BPF_FUNC_get_func_ip) {
20604                         /* Load IP address from ctx - 16 */
20605                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
20606 
20607                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
20608                         if (!new_prog)
20609                                 return -ENOMEM;
20610 
20611                         env->prog = prog = new_prog;
20612                         insn      = new_prog->insnsi + i + delta;
20613                         goto next_insn;
20614                 }
20615 
20616                 /* Implement bpf_get_branch_snapshot inline. */
20617                 if (IS_ENABLED(CONFIG_PERF_EVENTS) &&
20618                     prog->jit_requested && BITS_PER_LONG == 64 &&
20619                     insn->imm == BPF_FUNC_get_branch_snapshot) {
20620                         /* We are dealing with the following func protos:
20621                          * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
20622                          * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
20623                          */
20624                         const u32 br_entry_size = sizeof(struct perf_branch_entry);
20625 
20626                         /* struct perf_branch_entry is part of UAPI and is
20627                          * used as an array element, so extremely unlikely to
20628                          * ever grow or shrink
20629                          */
20630                         BUILD_BUG_ON(br_entry_size != 24);
20631 
20632                         /* if (unlikely(flags)) return -EINVAL */
20633                         insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7);
20634 
20635                         /* Transform size (bytes) into number of entries (cnt = size / 24).
20636                          * But to avoid expensive division instruction, we implement
20637                          * divide-by-3 through multiplication, followed by further
20638                          * division by 8 through 3-bit right shift.
20639                          * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr.,
20640                          * p. 227, chapter "Unsigned Division by 3" for details and proofs.
20641                          *
20642                          * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab.
20643                          */
20644                         insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab);
20645                         insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0);
20646                         insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36);
20647 
20648                         /* call perf_snapshot_branch_stack implementation */
20649                         insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
20650                         /* if (entry_cnt == 0) return -ENOENT */
20651                         insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
20652                         /* return entry_cnt * sizeof(struct perf_branch_entry) */
20653                         insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
20654                         insn_buf[7] = BPF_JMP_A(3);
20655                         /* return -EINVAL; */
20656                         insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
20657                         insn_buf[9] = BPF_JMP_A(1);
20658                         /* return -ENOENT; */
20659                         insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
20660                         cnt = 11;
20661 
20662                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20663                         if (!new_prog)
20664                                 return -ENOMEM;
20665 
20666                         delta    += cnt - 1;
20667                         env->prog = prog = new_prog;
20668                         insn      = new_prog->insnsi + i + delta;
20669                         continue;
20670                 }
20671 
20672                 /* Implement bpf_kptr_xchg inline */
20673                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
20674                     insn->imm == BPF_FUNC_kptr_xchg &&
20675                     bpf_jit_supports_ptr_xchg()) {
20676                         insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
20677                         insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
20678                         cnt = 2;
20679 
20680                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20681                         if (!new_prog)
20682                                 return -ENOMEM;
20683 
20684                         delta    += cnt - 1;
20685                         env->prog = prog = new_prog;
20686                         insn      = new_prog->insnsi + i + delta;
20687                         goto next_insn;
20688                 }
20689 patch_call_imm:
20690                 fn = env->ops->get_func_proto(insn->imm, env->prog);
20691                 /* all functions that have prototype and verifier allowed
20692                  * programs to call them, must be real in-kernel functions
20693                  */
20694                 if (!fn->func) {
20695                         verbose(env,
20696                                 "kernel subsystem misconfigured func %s#%d\n",
20697                                 func_id_name(insn->imm), insn->imm);
20698                         return -EFAULT;
20699                 }
20700                 insn->imm = fn->func - __bpf_call_base;
20701 next_insn:
20702                 if (subprogs[cur_subprog + 1].start == i + delta + 1) {
20703                         subprogs[cur_subprog].stack_depth += stack_depth_extra;
20704                         subprogs[cur_subprog].stack_extra = stack_depth_extra;
20705                         cur_subprog++;
20706                         stack_depth = subprogs[cur_subprog].stack_depth;
20707                         stack_depth_extra = 0;
20708                 }
20709                 i++;
20710                 insn++;
20711         }
20712 
20713         env->prog->aux->stack_depth = subprogs[0].stack_depth;
20714         for (i = 0; i < env->subprog_cnt; i++) {
20715                 int subprog_start = subprogs[i].start;
20716                 int stack_slots = subprogs[i].stack_extra / 8;
20717 
20718                 if (!stack_slots)
20719                         continue;
20720                 if (stack_slots > 1) {
20721                         verbose(env, "verifier bug: stack_slots supports may_goto only\n");
20722                         return -EFAULT;
20723                 }
20724 
20725                 /* Add ST insn to subprog prologue to init extra stack */
20726                 insn_buf[0] = BPF_ST_MEM(BPF_DW, BPF_REG_FP,
20727                                          -subprogs[i].stack_depth, BPF_MAX_LOOPS);
20728                 /* Copy first actual insn to preserve it */
20729                 insn_buf[1] = env->prog->insnsi[subprog_start];
20730 
20731                 new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, 2);
20732                 if (!new_prog)
20733                         return -ENOMEM;
20734                 env->prog = prog = new_prog;
20735                 /*
20736                  * If may_goto is a first insn of a prog there could be a jmp
20737                  * insn that points to it, hence adjust all such jmps to point
20738                  * to insn after BPF_ST that inits may_goto count.
20739                  * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
20740                  */
20741                 WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1));
20742         }
20743 
20744         /* Since poke tab is now finalized, publish aux to tracker. */
20745         for (i = 0; i < prog->aux->size_poke_tab; i++) {
20746                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
20747                 if (!map_ptr->ops->map_poke_track ||
20748                     !map_ptr->ops->map_poke_untrack ||
20749                     !map_ptr->ops->map_poke_run) {
20750                         verbose(env, "bpf verifier is misconfigured\n");
20751                         return -EINVAL;
20752                 }
20753 
20754                 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
20755                 if (ret < 0) {
20756                         verbose(env, "tracking tail call prog failed\n");
20757                         return ret;
20758                 }
20759         }
20760 
20761         sort_kfunc_descs_by_imm_off(env->prog);
20762 
20763         return 0;
20764 }
20765 
20766 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
20767                                         int position,
20768                                         s32 stack_base,
20769                                         u32 callback_subprogno,
20770                                         u32 *cnt)
20771 {
20772         s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
20773         s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
20774         s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
20775         int reg_loop_max = BPF_REG_6;
20776         int reg_loop_cnt = BPF_REG_7;
20777         int reg_loop_ctx = BPF_REG_8;
20778 
20779         struct bpf_prog *new_prog;
20780         u32 callback_start;
20781         u32 call_insn_offset;
20782         s32 callback_offset;
20783 
20784         /* This represents an inlined version of bpf_iter.c:bpf_loop,
20785          * be careful to modify this code in sync.
20786          */
20787         struct bpf_insn insn_buf[] = {
20788                 /* Return error and jump to the end of the patch if
20789                  * expected number of iterations is too big.
20790                  */
20791                 BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
20792                 BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
20793                 BPF_JMP_IMM(BPF_JA, 0, 0, 16),
20794                 /* spill R6, R7, R8 to use these as loop vars */
20795                 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
20796                 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
20797                 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
20798                 /* initialize loop vars */
20799                 BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
20800                 BPF_MOV32_IMM(reg_loop_cnt, 0),
20801                 BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
20802                 /* loop header,
20803                  * if reg_loop_cnt >= reg_loop_max skip the loop body
20804                  */
20805                 BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
20806                 /* callback call,
20807                  * correct callback offset would be set after patching
20808                  */
20809                 BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
20810                 BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
20811                 BPF_CALL_REL(0),
20812                 /* increment loop counter */
20813                 BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
20814                 /* jump to loop header if callback returned 0 */
20815                 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
20816                 /* return value of bpf_loop,
20817                  * set R0 to the number of iterations
20818                  */
20819                 BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
20820                 /* restore original values of R6, R7, R8 */
20821                 BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
20822                 BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
20823                 BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
20824         };
20825 
20826         *cnt = ARRAY_SIZE(insn_buf);
20827         new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
20828         if (!new_prog)
20829                 return new_prog;
20830 
20831         /* callback start is known only after patching */
20832         callback_start = env->subprog_info[callback_subprogno].start;
20833         /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
20834         call_insn_offset = position + 12;
20835         callback_offset = callback_start - call_insn_offset - 1;
20836         new_prog->insnsi[call_insn_offset].imm = callback_offset;
20837 
20838         return new_prog;
20839 }
20840 
20841 static bool is_bpf_loop_call(struct bpf_insn *insn)
20842 {
20843         return insn->code == (BPF_JMP | BPF_CALL) &&
20844                 insn->src_reg == 0 &&
20845                 insn->imm == BPF_FUNC_loop;
20846 }
20847 
20848 /* For all sub-programs in the program (including main) check
20849  * insn_aux_data to see if there are bpf_loop calls that require
20850  * inlining. If such calls are found the calls are replaced with a
20851  * sequence of instructions produced by `inline_bpf_loop` function and
20852  * subprog stack_depth is increased by the size of 3 registers.
20853  * This stack space is used to spill values of the R6, R7, R8.  These
20854  * registers are used to store the loop bound, counter and context
20855  * variables.
20856  */
20857 static int optimize_bpf_loop(struct bpf_verifier_env *env)
20858 {
20859         struct bpf_subprog_info *subprogs = env->subprog_info;
20860         int i, cur_subprog = 0, cnt, delta = 0;
20861         struct bpf_insn *insn = env->prog->insnsi;
20862         int insn_cnt = env->prog->len;
20863         u16 stack_depth = subprogs[cur_subprog].stack_depth;
20864         u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
20865         u16 stack_depth_extra = 0;
20866 
20867         for (i = 0; i < insn_cnt; i++, insn++) {
20868                 struct bpf_loop_inline_state *inline_state =
20869                         &env->insn_aux_data[i + delta].loop_inline_state;
20870 
20871                 if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
20872                         struct bpf_prog *new_prog;
20873 
20874                         stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
20875                         new_prog = inline_bpf_loop(env,
20876                                                    i + delta,
20877                                                    -(stack_depth + stack_depth_extra),
20878                                                    inline_state->callback_subprogno,
20879                                                    &cnt);
20880                         if (!new_prog)
20881                                 return -ENOMEM;
20882 
20883                         delta     += cnt - 1;
20884                         env->prog  = new_prog;
20885                         insn       = new_prog->insnsi + i + delta;
20886                 }
20887 
20888                 if (subprogs[cur_subprog + 1].start == i + delta + 1) {
20889                         subprogs[cur_subprog].stack_depth += stack_depth_extra;
20890                         cur_subprog++;
20891                         stack_depth = subprogs[cur_subprog].stack_depth;
20892                         stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
20893                         stack_depth_extra = 0;
20894                 }
20895         }
20896 
20897         env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
20898 
20899         return 0;
20900 }
20901 
20902 static void free_states(struct bpf_verifier_env *env)
20903 {
20904         struct bpf_verifier_state_list *sl, *sln;
20905         int i;
20906 
20907         sl = env->free_list;
20908         while (sl) {
20909                 sln = sl->next;
20910                 free_verifier_state(&sl->state, false);
20911                 kfree(sl);
20912                 sl = sln;
20913         }
20914         env->free_list = NULL;
20915 
20916         if (!env->explored_states)
20917                 return;
20918 
20919         for (i = 0; i < state_htab_size(env); i++) {
20920                 sl = env->explored_states[i];
20921 
20922                 while (sl) {
20923                         sln = sl->next;
20924                         free_verifier_state(&sl->state, false);
20925                         kfree(sl);
20926                         sl = sln;
20927                 }
20928                 env->explored_states[i] = NULL;
20929         }
20930 }
20931 
20932 static int do_check_common(struct bpf_verifier_env *env, int subprog)
20933 {
20934         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
20935         struct bpf_subprog_info *sub = subprog_info(env, subprog);
20936         struct bpf_verifier_state *state;
20937         struct bpf_reg_state *regs;
20938         int ret, i;
20939 
20940         env->prev_linfo = NULL;
20941         env->pass_cnt++;
20942 
20943         state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
20944         if (!state)
20945                 return -ENOMEM;
20946         state->curframe = 0;
20947         state->speculative = false;
20948         state->branches = 1;
20949         state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
20950         if (!state->frame[0]) {
20951                 kfree(state);
20952                 return -ENOMEM;
20953         }
20954         env->cur_state = state;
20955         init_func_state(env, state->frame[0],
20956                         BPF_MAIN_FUNC /* callsite */,
20957                         0 /* frameno */,
20958                         subprog);
20959         state->first_insn_idx = env->subprog_info[subprog].start;
20960         state->last_insn_idx = -1;
20961 
20962         regs = state->frame[state->curframe]->regs;
20963         if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
20964                 const char *sub_name = subprog_name(env, subprog);
20965                 struct bpf_subprog_arg_info *arg;
20966                 struct bpf_reg_state *reg;
20967 
20968                 verbose(env, "Validating %s() func#%d...\n", sub_name, subprog);
20969                 ret = btf_prepare_func_args(env, subprog);
20970                 if (ret)
20971                         goto out;
20972 
20973                 if (subprog_is_exc_cb(env, subprog)) {
20974                         state->frame[0]->in_exception_callback_fn = true;
20975                         /* We have already ensured that the callback returns an integer, just
20976                          * like all global subprogs. We need to determine it only has a single
20977                          * scalar argument.
20978                          */
20979                         if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) {
20980                                 verbose(env, "exception cb only supports single integer argument\n");
20981                                 ret = -EINVAL;
20982                                 goto out;
20983                         }
20984                 }
20985                 for (i = BPF_REG_1; i <= sub->arg_cnt; i++) {
20986                         arg = &sub->args[i - BPF_REG_1];
20987                         reg = &regs[i];
20988 
20989                         if (arg->arg_type == ARG_PTR_TO_CTX) {
20990                                 reg->type = PTR_TO_CTX;
20991                                 mark_reg_known_zero(env, regs, i);
20992                         } else if (arg->arg_type == ARG_ANYTHING) {
20993                                 reg->type = SCALAR_VALUE;
20994                                 mark_reg_unknown(env, regs, i);
20995                         } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
20996                                 /* assume unspecial LOCAL dynptr type */
20997                                 __mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen);
20998                         } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
20999                                 reg->type = PTR_TO_MEM;
21000                                 if (arg->arg_type & PTR_MAYBE_NULL)
21001                                         reg->type |= PTR_MAYBE_NULL;
21002                                 mark_reg_known_zero(env, regs, i);
21003                                 reg->mem_size = arg->mem_size;
21004                                 reg->id = ++env->id_gen;
21005                         } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
21006                                 reg->type = PTR_TO_BTF_ID;
21007                                 if (arg->arg_type & PTR_MAYBE_NULL)
21008                                         reg->type |= PTR_MAYBE_NULL;
21009                                 if (arg->arg_type & PTR_UNTRUSTED)
21010                                         reg->type |= PTR_UNTRUSTED;
21011                                 if (arg->arg_type & PTR_TRUSTED)
21012                                         reg->type |= PTR_TRUSTED;
21013                                 mark_reg_known_zero(env, regs, i);
21014                                 reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */
21015                                 reg->btf_id = arg->btf_id;
21016                                 reg->id = ++env->id_gen;
21017                         } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
21018                                 /* caller can pass either PTR_TO_ARENA or SCALAR */
21019                                 mark_reg_unknown(env, regs, i);
21020                         } else {
21021                                 WARN_ONCE(1, "BUG: unhandled arg#%d type %d\n",
21022                                           i - BPF_REG_1, arg->arg_type);
21023                                 ret = -EFAULT;
21024                                 goto out;
21025                         }
21026                 }
21027         } else {
21028                 /* if main BPF program has associated BTF info, validate that
21029                  * it's matching expected signature, and otherwise mark BTF
21030                  * info for main program as unreliable
21031                  */
21032                 if (env->prog->aux->func_info_aux) {
21033                         ret = btf_prepare_func_args(env, 0);
21034                         if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX)
21035                                 env->prog->aux->func_info_aux[0].unreliable = true;
21036                 }
21037 
21038                 /* 1st arg to a function */
21039                 regs[BPF_REG_1].type = PTR_TO_CTX;
21040                 mark_reg_known_zero(env, regs, BPF_REG_1);
21041         }
21042 
21043         ret = do_check(env);
21044 out:
21045         /* check for NULL is necessary, since cur_state can be freed inside
21046          * do_check() under memory pressure.
21047          */
21048         if (env->cur_state) {
21049                 free_verifier_state(env->cur_state, true);
21050                 env->cur_state = NULL;
21051         }
21052         while (!pop_stack(env, NULL, NULL, false));
21053         if (!ret && pop_log)
21054                 bpf_vlog_reset(&env->log, 0);
21055         free_states(env);
21056         return ret;
21057 }
21058 
21059 /* Lazily verify all global functions based on their BTF, if they are called
21060  * from main BPF program or any of subprograms transitively.
21061  * BPF global subprogs called from dead code are not validated.
21062  * All callable global functions must pass verification.
21063  * Otherwise the whole program is rejected.
21064  * Consider:
21065  * int bar(int);
21066  * int foo(int f)
21067  * {
21068  *    return bar(f);
21069  * }
21070  * int bar(int b)
21071  * {
21072  *    ...
21073  * }
21074  * foo() will be verified first for R1=any_scalar_value. During verification it
21075  * will be assumed that bar() already verified successfully and call to bar()
21076  * from foo() will be checked for type match only. Later bar() will be verified
21077  * independently to check that it's safe for R1=any_scalar_value.
21078  */
21079 static int do_check_subprogs(struct bpf_verifier_env *env)
21080 {
21081         struct bpf_prog_aux *aux = env->prog->aux;
21082         struct bpf_func_info_aux *sub_aux;
21083         int i, ret, new_cnt;
21084 
21085         if (!aux->func_info)
21086                 return 0;
21087 
21088         /* exception callback is presumed to be always called */
21089         if (env->exception_callback_subprog)
21090                 subprog_aux(env, env->exception_callback_subprog)->called = true;
21091 
21092 again:
21093         new_cnt = 0;
21094         for (i = 1; i < env->subprog_cnt; i++) {
21095                 if (!subprog_is_global(env, i))
21096                         continue;
21097 
21098                 sub_aux = subprog_aux(env, i);
21099                 if (!sub_aux->called || sub_aux->verified)
21100                         continue;
21101 
21102                 env->insn_idx = env->subprog_info[i].start;
21103                 WARN_ON_ONCE(env->insn_idx == 0);
21104                 ret = do_check_common(env, i);
21105                 if (ret) {
21106                         return ret;
21107                 } else if (env->log.level & BPF_LOG_LEVEL) {
21108                         verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n",
21109                                 i, subprog_name(env, i));
21110                 }
21111 
21112                 /* We verified new global subprog, it might have called some
21113                  * more global subprogs that we haven't verified yet, so we
21114                  * need to do another pass over subprogs to verify those.
21115                  */
21116                 sub_aux->verified = true;
21117                 new_cnt++;
21118         }
21119 
21120         /* We can't loop forever as we verify at least one global subprog on
21121          * each pass.
21122          */
21123         if (new_cnt)
21124                 goto again;
21125 
21126         return 0;
21127 }
21128 
21129 static int do_check_main(struct bpf_verifier_env *env)
21130 {
21131         int ret;
21132 
21133         env->insn_idx = 0;
21134         ret = do_check_common(env, 0);
21135         if (!ret)
21136                 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
21137         return ret;
21138 }
21139 
21140 
21141 static void print_verification_stats(struct bpf_verifier_env *env)
21142 {
21143         int i;
21144 
21145         if (env->log.level & BPF_LOG_STATS) {
21146                 verbose(env, "verification time %lld usec\n",
21147                         div_u64(env->verification_time, 1000));
21148                 verbose(env, "stack depth ");
21149                 for (i = 0; i < env->subprog_cnt; i++) {
21150                         u32 depth = env->subprog_info[i].stack_depth;
21151 
21152                         verbose(env, "%d", depth);
21153                         if (i + 1 < env->subprog_cnt)
21154                                 verbose(env, "+");
21155                 }
21156                 verbose(env, "\n");
21157         }
21158         verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
21159                 "total_states %d peak_states %d mark_read %d\n",
21160                 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
21161                 env->max_states_per_insn, env->total_states,
21162                 env->peak_states, env->longest_mark_read_walk);
21163 }
21164 
21165 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
21166 {
21167         const struct btf_type *t, *func_proto;
21168         const struct bpf_struct_ops_desc *st_ops_desc;
21169         const struct bpf_struct_ops *st_ops;
21170         const struct btf_member *member;
21171         struct bpf_prog *prog = env->prog;
21172         u32 btf_id, member_idx;
21173         struct btf *btf;
21174         const char *mname;
21175 
21176         if (!prog->gpl_compatible) {
21177                 verbose(env, "struct ops programs must have a GPL compatible license\n");
21178                 return -EINVAL;
21179         }
21180 
21181         if (!prog->aux->attach_btf_id)
21182                 return -ENOTSUPP;
21183 
21184         btf = prog->aux->attach_btf;
21185         if (btf_is_module(btf)) {
21186                 /* Make sure st_ops is valid through the lifetime of env */
21187                 env->attach_btf_mod = btf_try_get_module(btf);
21188                 if (!env->attach_btf_mod) {
21189                         verbose(env, "struct_ops module %s is not found\n",
21190                                 btf_get_name(btf));
21191                         return -ENOTSUPP;
21192                 }
21193         }
21194 
21195         btf_id = prog->aux->attach_btf_id;
21196         st_ops_desc = bpf_struct_ops_find(btf, btf_id);
21197         if (!st_ops_desc) {
21198                 verbose(env, "attach_btf_id %u is not a supported struct\n",
21199                         btf_id);
21200                 return -ENOTSUPP;
21201         }
21202         st_ops = st_ops_desc->st_ops;
21203 
21204         t = st_ops_desc->type;
21205         member_idx = prog->expected_attach_type;
21206         if (member_idx >= btf_type_vlen(t)) {
21207                 verbose(env, "attach to invalid member idx %u of struct %s\n",
21208                         member_idx, st_ops->name);
21209                 return -EINVAL;
21210         }
21211 
21212         member = &btf_type_member(t)[member_idx];
21213         mname = btf_name_by_offset(btf, member->name_off);
21214         func_proto = btf_type_resolve_func_ptr(btf, member->type,
21215                                                NULL);
21216         if (!func_proto) {
21217                 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
21218                         mname, member_idx, st_ops->name);
21219                 return -EINVAL;
21220         }
21221 
21222         if (st_ops->check_member) {
21223                 int err = st_ops->check_member(t, member, prog);
21224 
21225                 if (err) {
21226                         verbose(env, "attach to unsupported member %s of struct %s\n",
21227                                 mname, st_ops->name);
21228                         return err;
21229                 }
21230         }
21231 
21232         /* btf_ctx_access() used this to provide argument type info */
21233         prog->aux->ctx_arg_info =
21234                 st_ops_desc->arg_info[member_idx].info;
21235         prog->aux->ctx_arg_info_size =
21236                 st_ops_desc->arg_info[member_idx].cnt;
21237 
21238         prog->aux->attach_func_proto = func_proto;
21239         prog->aux->attach_func_name = mname;
21240         env->ops = st_ops->verifier_ops;
21241 
21242         return 0;
21243 }
21244 #define SECURITY_PREFIX "security_"
21245 
21246 static int check_attach_modify_return(unsigned long addr, const char *func_name)
21247 {
21248         if (within_error_injection_list(addr) ||
21249             !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
21250                 return 0;
21251 
21252         return -EINVAL;
21253 }
21254 
21255 /* list of non-sleepable functions that are otherwise on
21256  * ALLOW_ERROR_INJECTION list
21257  */
21258 BTF_SET_START(btf_non_sleepable_error_inject)
21259 /* Three functions below can be called from sleepable and non-sleepable context.
21260  * Assume non-sleepable from bpf safety point of view.
21261  */
21262 BTF_ID(func, __filemap_add_folio)
21263 #ifdef CONFIG_FAIL_PAGE_ALLOC
21264 BTF_ID(func, should_fail_alloc_page)
21265 #endif
21266 #ifdef CONFIG_FAILSLAB
21267 BTF_ID(func, should_failslab)
21268 #endif
21269 BTF_SET_END(btf_non_sleepable_error_inject)
21270 
21271 static int check_non_sleepable_error_inject(u32 btf_id)
21272 {
21273         return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
21274 }
21275 
21276 int bpf_check_attach_target(struct bpf_verifier_log *log,
21277                             const struct bpf_prog *prog,
21278                             const struct bpf_prog *tgt_prog,
21279                             u32 btf_id,
21280                             struct bpf_attach_target_info *tgt_info)
21281 {
21282         bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
21283         bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
21284         const char prefix[] = "btf_trace_";
21285         int ret = 0, subprog = -1, i;
21286         const struct btf_type *t;
21287         bool conservative = true;
21288         const char *tname;
21289         struct btf *btf;
21290         long addr = 0;
21291         struct module *mod = NULL;
21292 
21293         if (!btf_id) {
21294                 bpf_log(log, "Tracing programs must provide btf_id\n");
21295                 return -EINVAL;
21296         }
21297         btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
21298         if (!btf) {
21299                 bpf_log(log,
21300                         "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
21301                 return -EINVAL;
21302         }
21303         t = btf_type_by_id(btf, btf_id);
21304         if (!t) {
21305                 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
21306                 return -EINVAL;
21307         }
21308         tname = btf_name_by_offset(btf, t->name_off);
21309         if (!tname) {
21310                 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
21311                 return -EINVAL;
21312         }
21313         if (tgt_prog) {
21314                 struct bpf_prog_aux *aux = tgt_prog->aux;
21315 
21316                 if (bpf_prog_is_dev_bound(prog->aux) &&
21317                     !bpf_prog_dev_bound_match(prog, tgt_prog)) {
21318                         bpf_log(log, "Target program bound device mismatch");
21319                         return -EINVAL;
21320                 }
21321 
21322                 for (i = 0; i < aux->func_info_cnt; i++)
21323                         if (aux->func_info[i].type_id == btf_id) {
21324                                 subprog = i;
21325                                 break;
21326                         }
21327                 if (subprog == -1) {
21328                         bpf_log(log, "Subprog %s doesn't exist\n", tname);
21329                         return -EINVAL;
21330                 }
21331                 if (aux->func && aux->func[subprog]->aux->exception_cb) {
21332                         bpf_log(log,
21333                                 "%s programs cannot attach to exception callback\n",
21334                                 prog_extension ? "Extension" : "FENTRY/FEXIT");
21335                         return -EINVAL;
21336                 }
21337                 conservative = aux->func_info_aux[subprog].unreliable;
21338                 if (prog_extension) {
21339                         if (conservative) {
21340                                 bpf_log(log,
21341                                         "Cannot replace static functions\n");
21342                                 return -EINVAL;
21343                         }
21344                         if (!prog->jit_requested) {
21345                                 bpf_log(log,
21346                                         "Extension programs should be JITed\n");
21347                                 return -EINVAL;
21348                         }
21349                 }
21350                 if (!tgt_prog->jited) {
21351                         bpf_log(log, "Can attach to only JITed progs\n");
21352                         return -EINVAL;
21353                 }
21354                 if (prog_tracing) {
21355                         if (aux->attach_tracing_prog) {
21356                                 /*
21357                                  * Target program is an fentry/fexit which is already attached
21358                                  * to another tracing program. More levels of nesting
21359                                  * attachment are not allowed.
21360                                  */
21361                                 bpf_log(log, "Cannot nest tracing program attach more than once\n");
21362                                 return -EINVAL;
21363                         }
21364                 } else if (tgt_prog->type == prog->type) {
21365                         /*
21366                          * To avoid potential call chain cycles, prevent attaching of a
21367                          * program extension to another extension. It's ok to attach
21368                          * fentry/fexit to extension program.
21369                          */
21370                         bpf_log(log, "Cannot recursively attach\n");
21371                         return -EINVAL;
21372                 }
21373                 if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
21374                     prog_extension &&
21375                     (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
21376                      tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
21377                         /* Program extensions can extend all program types
21378                          * except fentry/fexit. The reason is the following.
21379                          * The fentry/fexit programs are used for performance
21380                          * analysis, stats and can be attached to any program
21381                          * type. When extension program is replacing XDP function
21382                          * it is necessary to allow performance analysis of all
21383                          * functions. Both original XDP program and its program
21384                          * extension. Hence attaching fentry/fexit to
21385                          * BPF_PROG_TYPE_EXT is allowed. If extending of
21386                          * fentry/fexit was allowed it would be possible to create
21387                          * long call chain fentry->extension->fentry->extension
21388                          * beyond reasonable stack size. Hence extending fentry
21389                          * is not allowed.
21390                          */
21391                         bpf_log(log, "Cannot extend fentry/fexit\n");
21392                         return -EINVAL;
21393                 }
21394         } else {
21395                 if (prog_extension) {
21396                         bpf_log(log, "Cannot replace kernel functions\n");
21397                         return -EINVAL;
21398                 }
21399         }
21400 
21401         switch (prog->expected_attach_type) {
21402         case BPF_TRACE_RAW_TP:
21403                 if (tgt_prog) {
21404                         bpf_log(log,
21405                                 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
21406                         return -EINVAL;
21407                 }
21408                 if (!btf_type_is_typedef(t)) {
21409                         bpf_log(log, "attach_btf_id %u is not a typedef\n",
21410                                 btf_id);
21411                         return -EINVAL;
21412                 }
21413                 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
21414                         bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
21415                                 btf_id, tname);
21416                         return -EINVAL;
21417                 }
21418                 tname += sizeof(prefix) - 1;
21419                 t = btf_type_by_id(btf, t->type);
21420                 if (!btf_type_is_ptr(t))
21421                         /* should never happen in valid vmlinux build */
21422                         return -EINVAL;
21423                 t = btf_type_by_id(btf, t->type);
21424                 if (!btf_type_is_func_proto(t))
21425                         /* should never happen in valid vmlinux build */
21426                         return -EINVAL;
21427 
21428                 break;
21429         case BPF_TRACE_ITER:
21430                 if (!btf_type_is_func(t)) {
21431                         bpf_log(log, "attach_btf_id %u is not a function\n",
21432                                 btf_id);
21433                         return -EINVAL;
21434                 }
21435                 t = btf_type_by_id(btf, t->type);
21436                 if (!btf_type_is_func_proto(t))
21437                         return -EINVAL;
21438                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
21439                 if (ret)
21440                         return ret;
21441                 break;
21442         default:
21443                 if (!prog_extension)
21444                         return -EINVAL;
21445                 fallthrough;
21446         case BPF_MODIFY_RETURN:
21447         case BPF_LSM_MAC:
21448         case BPF_LSM_CGROUP:
21449         case BPF_TRACE_FENTRY:
21450         case BPF_TRACE_FEXIT:
21451                 if (!btf_type_is_func(t)) {
21452                         bpf_log(log, "attach_btf_id %u is not a function\n",
21453                                 btf_id);
21454                         return -EINVAL;
21455                 }
21456                 if (prog_extension &&
21457                     btf_check_type_match(log, prog, btf, t))
21458                         return -EINVAL;
21459                 t = btf_type_by_id(btf, t->type);
21460                 if (!btf_type_is_func_proto(t))
21461                         return -EINVAL;
21462 
21463                 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
21464                     (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
21465                      prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
21466                         return -EINVAL;
21467 
21468                 if (tgt_prog && conservative)
21469                         t = NULL;
21470 
21471                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
21472                 if (ret < 0)
21473                         return ret;
21474 
21475                 if (tgt_prog) {
21476                         if (subprog == 0)
21477                                 addr = (long) tgt_prog->bpf_func;
21478                         else
21479                                 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
21480                 } else {
21481                         if (btf_is_module(btf)) {
21482                                 mod = btf_try_get_module(btf);
21483                                 if (mod)
21484                                         addr = find_kallsyms_symbol_value(mod, tname);
21485                                 else
21486                                         addr = 0;
21487                         } else {
21488                                 addr = kallsyms_lookup_name(tname);
21489                         }
21490                         if (!addr) {
21491                                 module_put(mod);
21492                                 bpf_log(log,
21493                                         "The address of function %s cannot be found\n",
21494                                         tname);
21495                                 return -ENOENT;
21496                         }
21497                 }
21498 
21499                 if (prog->sleepable) {
21500                         ret = -EINVAL;
21501                         switch (prog->type) {
21502                         case BPF_PROG_TYPE_TRACING:
21503 
21504                                 /* fentry/fexit/fmod_ret progs can be sleepable if they are
21505                                  * attached to ALLOW_ERROR_INJECTION and are not in denylist.
21506                                  */
21507                                 if (!check_non_sleepable_error_inject(btf_id) &&
21508                                     within_error_injection_list(addr))
21509                                         ret = 0;
21510                                 /* fentry/fexit/fmod_ret progs can also be sleepable if they are
21511                                  * in the fmodret id set with the KF_SLEEPABLE flag.
21512                                  */
21513                                 else {
21514                                         u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
21515                                                                                 prog);
21516 
21517                                         if (flags && (*flags & KF_SLEEPABLE))
21518                                                 ret = 0;
21519                                 }
21520                                 break;
21521                         case BPF_PROG_TYPE_LSM:
21522                                 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
21523                                  * Only some of them are sleepable.
21524                                  */
21525                                 if (bpf_lsm_is_sleepable_hook(btf_id))
21526                                         ret = 0;
21527                                 break;
21528                         default:
21529                                 break;
21530                         }
21531                         if (ret) {
21532                                 module_put(mod);
21533                                 bpf_log(log, "%s is not sleepable\n", tname);
21534                                 return ret;
21535                         }
21536                 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
21537                         if (tgt_prog) {
21538                                 module_put(mod);
21539                                 bpf_log(log, "can't modify return codes of BPF programs\n");
21540                                 return -EINVAL;
21541                         }
21542                         ret = -EINVAL;
21543                         if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
21544                             !check_attach_modify_return(addr, tname))
21545                                 ret = 0;
21546                         if (ret) {
21547                                 module_put(mod);
21548                                 bpf_log(log, "%s() is not modifiable\n", tname);
21549                                 return ret;
21550                         }
21551                 }
21552 
21553                 break;
21554         }
21555         tgt_info->tgt_addr = addr;
21556         tgt_info->tgt_name = tname;
21557         tgt_info->tgt_type = t;
21558         tgt_info->tgt_mod = mod;
21559         return 0;
21560 }
21561 
21562 BTF_SET_START(btf_id_deny)
21563 BTF_ID_UNUSED
21564 #ifdef CONFIG_SMP
21565 BTF_ID(func, migrate_disable)
21566 BTF_ID(func, migrate_enable)
21567 #endif
21568 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
21569 BTF_ID(func, rcu_read_unlock_strict)
21570 #endif
21571 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
21572 BTF_ID(func, preempt_count_add)
21573 BTF_ID(func, preempt_count_sub)
21574 #endif
21575 #ifdef CONFIG_PREEMPT_RCU
21576 BTF_ID(func, __rcu_read_lock)
21577 BTF_ID(func, __rcu_read_unlock)
21578 #endif
21579 BTF_SET_END(btf_id_deny)
21580 
21581 static bool can_be_sleepable(struct bpf_prog *prog)
21582 {
21583         if (prog->type == BPF_PROG_TYPE_TRACING) {
21584                 switch (prog->expected_attach_type) {
21585                 case BPF_TRACE_FENTRY:
21586                 case BPF_TRACE_FEXIT:
21587                 case BPF_MODIFY_RETURN:
21588                 case BPF_TRACE_ITER:
21589                         return true;
21590                 default:
21591                         return false;
21592                 }
21593         }
21594         return prog->type == BPF_PROG_TYPE_LSM ||
21595                prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
21596                prog->type == BPF_PROG_TYPE_STRUCT_OPS;
21597 }
21598 
21599 static int check_attach_btf_id(struct bpf_verifier_env *env)
21600 {
21601         struct bpf_prog *prog = env->prog;
21602         struct bpf_prog *tgt_prog = prog->aux->dst_prog;
21603         struct bpf_attach_target_info tgt_info = {};
21604         u32 btf_id = prog->aux->attach_btf_id;
21605         struct bpf_trampoline *tr;
21606         int ret;
21607         u64 key;
21608 
21609         if (prog->type == BPF_PROG_TYPE_SYSCALL) {
21610                 if (prog->sleepable)
21611                         /* attach_btf_id checked to be zero already */
21612                         return 0;
21613                 verbose(env, "Syscall programs can only be sleepable\n");
21614                 return -EINVAL;
21615         }
21616 
21617         if (prog->sleepable && !can_be_sleepable(prog)) {
21618                 verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
21619                 return -EINVAL;
21620         }
21621 
21622         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
21623                 return check_struct_ops_btf_id(env);
21624 
21625         if (prog->type != BPF_PROG_TYPE_TRACING &&
21626             prog->type != BPF_PROG_TYPE_LSM &&
21627             prog->type != BPF_PROG_TYPE_EXT)
21628                 return 0;
21629 
21630         ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
21631         if (ret)
21632                 return ret;
21633 
21634         if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
21635                 /* to make freplace equivalent to their targets, they need to
21636                  * inherit env->ops and expected_attach_type for the rest of the
21637                  * verification
21638                  */
21639                 env->ops = bpf_verifier_ops[tgt_prog->type];
21640                 prog->expected_attach_type = tgt_prog->expected_attach_type;
21641         }
21642 
21643         /* store info about the attachment target that will be used later */
21644         prog->aux->attach_func_proto = tgt_info.tgt_type;
21645         prog->aux->attach_func_name = tgt_info.tgt_name;
21646         prog->aux->mod = tgt_info.tgt_mod;
21647 
21648         if (tgt_prog) {
21649                 prog->aux->saved_dst_prog_type = tgt_prog->type;
21650                 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
21651         }
21652 
21653         if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
21654                 prog->aux->attach_btf_trace = true;
21655                 return 0;
21656         } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
21657                 if (!bpf_iter_prog_supported(prog))
21658                         return -EINVAL;
21659                 return 0;
21660         }
21661 
21662         if (prog->type == BPF_PROG_TYPE_LSM) {
21663                 ret = bpf_lsm_verify_prog(&env->log, prog);
21664                 if (ret < 0)
21665                         return ret;
21666         } else if (prog->type == BPF_PROG_TYPE_TRACING &&
21667                    btf_id_set_contains(&btf_id_deny, btf_id)) {
21668                 return -EINVAL;
21669         }
21670 
21671         key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
21672         tr = bpf_trampoline_get(key, &tgt_info);
21673         if (!tr)
21674                 return -ENOMEM;
21675 
21676         if (tgt_prog && tgt_prog->aux->tail_call_reachable)
21677                 tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
21678 
21679         prog->aux->dst_trampoline = tr;
21680         return 0;
21681 }
21682 
21683 struct btf *bpf_get_btf_vmlinux(void)
21684 {
21685         if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
21686                 mutex_lock(&bpf_verifier_lock);
21687                 if (!btf_vmlinux)
21688                         btf_vmlinux = btf_parse_vmlinux();
21689                 mutex_unlock(&bpf_verifier_lock);
21690         }
21691         return btf_vmlinux;
21692 }
21693 
21694 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
21695 {
21696         u64 start_time = ktime_get_ns();
21697         struct bpf_verifier_env *env;
21698         int i, len, ret = -EINVAL, err;
21699         u32 log_true_size;
21700         bool is_priv;
21701 
21702         /* no program is valid */
21703         if (ARRAY_SIZE(bpf_verifier_ops) == 0)
21704                 return -EINVAL;
21705 
21706         /* 'struct bpf_verifier_env' can be global, but since it's not small,
21707          * allocate/free it every time bpf_check() is called
21708          */
21709         env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
21710         if (!env)
21711                 return -ENOMEM;
21712 
21713         env->bt.env = env;
21714 
21715         len = (*prog)->len;
21716         env->insn_aux_data =
21717                 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
21718         ret = -ENOMEM;
21719         if (!env->insn_aux_data)
21720                 goto err_free_env;
21721         for (i = 0; i < len; i++)
21722                 env->insn_aux_data[i].orig_idx = i;
21723         env->prog = *prog;
21724         env->ops = bpf_verifier_ops[env->prog->type];
21725         env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
21726 
21727         env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
21728         env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
21729         env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token);
21730         env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token);
21731         env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF);
21732 
21733         bpf_get_btf_vmlinux();
21734 
21735         /* grab the mutex to protect few globals used by verifier */
21736         if (!is_priv)
21737                 mutex_lock(&bpf_verifier_lock);
21738 
21739         /* user could have requested verbose verifier output
21740          * and supplied buffer to store the verification trace
21741          */
21742         ret = bpf_vlog_init(&env->log, attr->log_level,
21743                             (char __user *) (unsigned long) attr->log_buf,
21744                             attr->log_size);
21745         if (ret)
21746                 goto err_unlock;
21747 
21748         mark_verifier_state_clean(env);
21749 
21750         if (IS_ERR(btf_vmlinux)) {
21751                 /* Either gcc or pahole or kernel are broken. */
21752                 verbose(env, "in-kernel BTF is malformed\n");
21753                 ret = PTR_ERR(btf_vmlinux);
21754                 goto skip_full_check;
21755         }
21756 
21757         env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
21758         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
21759                 env->strict_alignment = true;
21760         if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
21761                 env->strict_alignment = false;
21762 
21763         if (is_priv)
21764                 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
21765         env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
21766 
21767         env->explored_states = kvcalloc(state_htab_size(env),
21768                                        sizeof(struct bpf_verifier_state_list *),
21769                                        GFP_USER);
21770         ret = -ENOMEM;
21771         if (!env->explored_states)
21772                 goto skip_full_check;
21773 
21774         ret = check_btf_info_early(env, attr, uattr);
21775         if (ret < 0)
21776                 goto skip_full_check;
21777 
21778         ret = add_subprog_and_kfunc(env);
21779         if (ret < 0)
21780                 goto skip_full_check;
21781 
21782         ret = check_subprogs(env);
21783         if (ret < 0)
21784                 goto skip_full_check;
21785 
21786         ret = check_btf_info(env, attr, uattr);
21787         if (ret < 0)
21788                 goto skip_full_check;
21789 
21790         ret = check_attach_btf_id(env);
21791         if (ret)
21792                 goto skip_full_check;
21793 
21794         ret = resolve_pseudo_ldimm64(env);
21795         if (ret < 0)
21796                 goto skip_full_check;
21797 
21798         if (bpf_prog_is_offloaded(env->prog->aux)) {
21799                 ret = bpf_prog_offload_verifier_prep(env->prog);
21800                 if (ret)
21801                         goto skip_full_check;
21802         }
21803 
21804         ret = check_cfg(env);
21805         if (ret < 0)
21806                 goto skip_full_check;
21807 
21808         ret = do_check_main(env);
21809         ret = ret ?: do_check_subprogs(env);
21810 
21811         if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
21812                 ret = bpf_prog_offload_finalize(env);
21813 
21814 skip_full_check:
21815         kvfree(env->explored_states);
21816 
21817         if (ret == 0)
21818                 ret = check_max_stack_depth(env);
21819 
21820         /* instruction rewrites happen after this point */
21821         if (ret == 0)
21822                 ret = optimize_bpf_loop(env);
21823 
21824         if (is_priv) {
21825                 if (ret == 0)
21826                         opt_hard_wire_dead_code_branches(env);
21827                 if (ret == 0)
21828                         ret = opt_remove_dead_code(env);
21829                 if (ret == 0)
21830                         ret = opt_remove_nops(env);
21831         } else {
21832                 if (ret == 0)
21833                         sanitize_dead_code(env);
21834         }
21835 
21836         if (ret == 0)
21837                 /* program is valid, convert *(u32*)(ctx + off) accesses */
21838                 ret = convert_ctx_accesses(env);
21839 
21840         if (ret == 0)
21841                 ret = do_misc_fixups(env);
21842 
21843         /* do 32-bit optimization after insn patching has done so those patched
21844          * insns could be handled correctly.
21845          */
21846         if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
21847                 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
21848                 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
21849                                                                      : false;
21850         }
21851 
21852         if (ret == 0)
21853                 ret = fixup_call_args(env);
21854 
21855         env->verification_time = ktime_get_ns() - start_time;
21856         print_verification_stats(env);
21857         env->prog->aux->verified_insns = env->insn_processed;
21858 
21859         /* preserve original error even if log finalization is successful */
21860         err = bpf_vlog_finalize(&env->log, &log_true_size);
21861         if (err)
21862                 ret = err;
21863 
21864         if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
21865             copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
21866                                   &log_true_size, sizeof(log_true_size))) {
21867                 ret = -EFAULT;
21868                 goto err_release_maps;
21869         }
21870 
21871         if (ret)
21872                 goto err_release_maps;
21873 
21874         if (env->used_map_cnt) {
21875                 /* if program passed verifier, update used_maps in bpf_prog_info */
21876                 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
21877                                                           sizeof(env->used_maps[0]),
21878                                                           GFP_KERNEL);
21879 
21880                 if (!env->prog->aux->used_maps) {
21881                         ret = -ENOMEM;
21882                         goto err_release_maps;
21883                 }
21884 
21885                 memcpy(env->prog->aux->used_maps, env->used_maps,
21886                        sizeof(env->used_maps[0]) * env->used_map_cnt);
21887                 env->prog->aux->used_map_cnt = env->used_map_cnt;
21888         }
21889         if (env->used_btf_cnt) {
21890                 /* if program passed verifier, update used_btfs in bpf_prog_aux */
21891                 env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
21892                                                           sizeof(env->used_btfs[0]),
21893                                                           GFP_KERNEL);
21894                 if (!env->prog->aux->used_btfs) {
21895                         ret = -ENOMEM;
21896                         goto err_release_maps;
21897                 }
21898 
21899                 memcpy(env->prog->aux->used_btfs, env->used_btfs,
21900                        sizeof(env->used_btfs[0]) * env->used_btf_cnt);
21901                 env->prog->aux->used_btf_cnt = env->used_btf_cnt;
21902         }
21903         if (env->used_map_cnt || env->used_btf_cnt) {
21904                 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
21905                  * bpf_ld_imm64 instructions
21906                  */
21907                 convert_pseudo_ld_imm64(env);
21908         }
21909 
21910         adjust_btf_func(env);
21911 
21912 err_release_maps:
21913         if (!env->prog->aux->used_maps)
21914                 /* if we didn't copy map pointers into bpf_prog_info, release
21915                  * them now. Otherwise free_used_maps() will release them.
21916                  */
21917                 release_maps(env);
21918         if (!env->prog->aux->used_btfs)
21919                 release_btfs(env);
21920 
21921         /* extension progs temporarily inherit the attach_type of their targets
21922            for verification purposes, so set it back to zero before returning
21923          */
21924         if (env->prog->type == BPF_PROG_TYPE_EXT)
21925                 env->prog->expected_attach_type = 0;
21926 
21927         *prog = env->prog;
21928 
21929         module_put(env->attach_btf_mod);
21930 err_unlock:
21931         if (!is_priv)
21932                 mutex_unlock(&bpf_verifier_lock);
21933         vfree(env->insn_aux_data);
21934 err_free_env:
21935         kfree(env);
21936         return ret;
21937 }
21938 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php