1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * BPF JIT compiler for ARM64 4 * 5 * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com> 6 */ 7 8 #define pr_fmt(fmt) "bpf_jit: " fmt 9 10 #include <linux/bitfield.h> 11 #include <linux/bpf.h> 12 #include <linux/filter.h> 13 #include <linux/memory.h> 14 #include <linux/printk.h> 15 #include <linux/slab.h> 16 17 #include <asm/asm-extable.h> 18 #include <asm/byteorder.h> 19 #include <asm/cacheflush.h> 20 #include <asm/debug-monitors.h> 21 #include <asm/insn.h> 22 #include <asm/patching.h> 23 #include <asm/set_memory.h> 24 25 #include "bpf_jit.h" 26 27 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) 28 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) 29 #define TCCNT_PTR (MAX_BPF_JIT_REG + 2) 30 #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) 31 #define FP_BOTTOM (MAX_BPF_JIT_REG + 4) 32 #define ARENA_VM_START (MAX_BPF_JIT_REG + 5) 33 34 #define check_imm(bits, imm) do { \ 35 if ((((imm) > 0) && ((imm) >> (bits))) || \ 36 (((imm) < 0) && (~(imm) >> (bits)))) { \ 37 pr_info("[%2d] imm=%d(0x%x) out of range\n", \ 38 i, imm, imm); \ 39 return -EINVAL; \ 40 } \ 41 } while (0) 42 #define check_imm19(imm) check_imm(19, imm) 43 #define check_imm26(imm) check_imm(26, imm) 44 45 /* Map BPF registers to A64 registers */ 46 static const int bpf2a64[] = { 47 /* return value from in-kernel function, and exit value from eBPF */ 48 [BPF_REG_0] = A64_R(7), 49 /* arguments from eBPF program to in-kernel function */ 50 [BPF_REG_1] = A64_R(0), 51 [BPF_REG_2] = A64_R(1), 52 [BPF_REG_3] = A64_R(2), 53 [BPF_REG_4] = A64_R(3), 54 [BPF_REG_5] = A64_R(4), 55 /* callee saved registers that in-kernel function will preserve */ 56 [BPF_REG_6] = A64_R(19), 57 [BPF_REG_7] = A64_R(20), 58 [BPF_REG_8] = A64_R(21), 59 [BPF_REG_9] = A64_R(22), 60 /* read-only frame pointer to access stack */ 61 [BPF_REG_FP] = A64_R(25), 62 /* temporary registers for BPF JIT */ 63 [TMP_REG_1] = A64_R(10), 64 [TMP_REG_2] = A64_R(11), 65 [TMP_REG_3] = A64_R(12), 66 /* tail_call_cnt_ptr */ 67 [TCCNT_PTR] = A64_R(26), 68 /* temporary register for blinding constants */ 69 [BPF_REG_AX] = A64_R(9), 70 [FP_BOTTOM] = A64_R(27), 71 /* callee saved register for kern_vm_start address */ 72 [ARENA_VM_START] = A64_R(28), 73 }; 74 75 struct jit_ctx { 76 const struct bpf_prog *prog; 77 int idx; 78 int epilogue_offset; 79 int *offset; 80 int exentry_idx; 81 __le32 *image; 82 __le32 *ro_image; 83 u32 stack_size; 84 int fpb_offset; 85 u64 user_vm_start; 86 }; 87 88 struct bpf_plt { 89 u32 insn_ldr; /* load target */ 90 u32 insn_br; /* branch to target */ 91 u64 target; /* target value */ 92 }; 93 94 #define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target) 95 #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target) 96 97 static inline void emit(const u32 insn, struct jit_ctx *ctx) 98 { 99 if (ctx->image != NULL) 100 ctx->image[ctx->idx] = cpu_to_le32(insn); 101 102 ctx->idx++; 103 } 104 105 static inline void emit_a64_mov_i(const int is64, const int reg, 106 const s32 val, struct jit_ctx *ctx) 107 { 108 u16 hi = val >> 16; 109 u16 lo = val & 0xffff; 110 111 if (hi & 0x8000) { 112 if (hi == 0xffff) { 113 emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx); 114 } else { 115 emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx); 116 if (lo != 0xffff) 117 emit(A64_MOVK(is64, reg, lo, 0), ctx); 118 } 119 } else { 120 emit(A64_MOVZ(is64, reg, lo, 0), ctx); 121 if (hi) 122 emit(A64_MOVK(is64, reg, hi, 16), ctx); 123 } 124 } 125 126 static int i64_i16_blocks(const u64 val, bool inverse) 127 { 128 return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 129 (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 130 (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 131 (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000)); 132 } 133 134 static inline void emit_a64_mov_i64(const int reg, const u64 val, 135 struct jit_ctx *ctx) 136 { 137 u64 nrm_tmp = val, rev_tmp = ~val; 138 bool inverse; 139 int shift; 140 141 if (!(nrm_tmp >> 32)) 142 return emit_a64_mov_i(0, reg, (u32)val, ctx); 143 144 inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false); 145 shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) : 146 (fls64(nrm_tmp) - 1)), 16), 0); 147 if (inverse) 148 emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx); 149 else 150 emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); 151 shift -= 16; 152 while (shift >= 0) { 153 if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000)) 154 emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); 155 shift -= 16; 156 } 157 } 158 159 static inline void emit_bti(u32 insn, struct jit_ctx *ctx) 160 { 161 if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) 162 emit(insn, ctx); 163 } 164 165 /* 166 * Kernel addresses in the vmalloc space use at most 48 bits, and the 167 * remaining bits are guaranteed to be 0x1. So we can compose the address 168 * with a fixed length movn/movk/movk sequence. 169 */ 170 static inline void emit_addr_mov_i64(const int reg, const u64 val, 171 struct jit_ctx *ctx) 172 { 173 u64 tmp = val; 174 int shift = 0; 175 176 emit(A64_MOVN(1, reg, ~tmp & 0xffff, shift), ctx); 177 while (shift < 32) { 178 tmp >>= 16; 179 shift += 16; 180 emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx); 181 } 182 } 183 184 static inline void emit_call(u64 target, struct jit_ctx *ctx) 185 { 186 u8 tmp = bpf2a64[TMP_REG_1]; 187 188 emit_addr_mov_i64(tmp, target, ctx); 189 emit(A64_BLR(tmp), ctx); 190 } 191 192 static inline int bpf2a64_offset(int bpf_insn, int off, 193 const struct jit_ctx *ctx) 194 { 195 /* BPF JMP offset is relative to the next instruction */ 196 bpf_insn++; 197 /* 198 * Whereas arm64 branch instructions encode the offset 199 * from the branch itself, so we must subtract 1 from the 200 * instruction offset. 201 */ 202 return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1); 203 } 204 205 static void jit_fill_hole(void *area, unsigned int size) 206 { 207 __le32 *ptr; 208 /* We are guaranteed to have aligned memory. */ 209 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) 210 *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT); 211 } 212 213 int bpf_arch_text_invalidate(void *dst, size_t len) 214 { 215 if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len)) 216 return -EINVAL; 217 218 return 0; 219 } 220 221 static inline int epilogue_offset(const struct jit_ctx *ctx) 222 { 223 int to = ctx->epilogue_offset; 224 int from = ctx->idx; 225 226 return to - from; 227 } 228 229 static bool is_addsub_imm(u32 imm) 230 { 231 /* Either imm12 or shifted imm12. */ 232 return !(imm & ~0xfff) || !(imm & ~0xfff000); 233 } 234 235 /* 236 * There are 3 types of AArch64 LDR/STR (immediate) instruction: 237 * Post-index, Pre-index, Unsigned offset. 238 * 239 * For BPF ldr/str, the "unsigned offset" type is sufficient. 240 * 241 * "Unsigned offset" type LDR(immediate) format: 242 * 243 * 3 2 1 0 244 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 245 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 246 * |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt | 247 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 248 * scale 249 * 250 * "Unsigned offset" type STR(immediate) format: 251 * 3 2 1 0 252 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 253 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 254 * |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt | 255 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 256 * scale 257 * 258 * The offset is calculated from imm12 and scale in the following way: 259 * 260 * offset = (u64)imm12 << scale 261 */ 262 static bool is_lsi_offset(int offset, int scale) 263 { 264 if (offset < 0) 265 return false; 266 267 if (offset > (0xFFF << scale)) 268 return false; 269 270 if (offset & ((1 << scale) - 1)) 271 return false; 272 273 return true; 274 } 275 276 /* generated prologue: 277 * bti c // if CONFIG_ARM64_BTI_KERNEL 278 * mov x9, lr 279 * nop // POKE_OFFSET 280 * paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL 281 * stp x29, lr, [sp, #-16]! 282 * mov x29, sp 283 * stp x19, x20, [sp, #-16]! 284 * stp x21, x22, [sp, #-16]! 285 * stp x26, x25, [sp, #-16]! 286 * stp x26, x25, [sp, #-16]! 287 * stp x27, x28, [sp, #-16]! 288 * mov x25, sp 289 * mov tcc, #0 290 * // PROLOGUE_OFFSET 291 */ 292 293 static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx) 294 { 295 const struct bpf_prog *prog = ctx->prog; 296 const bool is_main_prog = !bpf_is_subprog(prog); 297 const u8 ptr = bpf2a64[TCCNT_PTR]; 298 const u8 fp = bpf2a64[BPF_REG_FP]; 299 const u8 tcc = ptr; 300 301 emit(A64_PUSH(ptr, fp, A64_SP), ctx); 302 if (is_main_prog) { 303 /* Initialize tail_call_cnt. */ 304 emit(A64_MOVZ(1, tcc, 0, 0), ctx); 305 emit(A64_PUSH(tcc, fp, A64_SP), ctx); 306 emit(A64_MOV(1, ptr, A64_SP), ctx); 307 } else { 308 emit(A64_PUSH(ptr, fp, A64_SP), ctx); 309 emit(A64_NOP, ctx); 310 emit(A64_NOP, ctx); 311 } 312 } 313 314 #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) 315 #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) 316 317 /* Offset of nop instruction in bpf prog entry to be poked */ 318 #define POKE_OFFSET (BTI_INSNS + 1) 319 320 /* Tail call offset to jump into */ 321 #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 10) 322 323 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, 324 bool is_exception_cb, u64 arena_vm_start) 325 { 326 const struct bpf_prog *prog = ctx->prog; 327 const bool is_main_prog = !bpf_is_subprog(prog); 328 const u8 r6 = bpf2a64[BPF_REG_6]; 329 const u8 r7 = bpf2a64[BPF_REG_7]; 330 const u8 r8 = bpf2a64[BPF_REG_8]; 331 const u8 r9 = bpf2a64[BPF_REG_9]; 332 const u8 fp = bpf2a64[BPF_REG_FP]; 333 const u8 fpb = bpf2a64[FP_BOTTOM]; 334 const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; 335 const int idx0 = ctx->idx; 336 int cur_offset; 337 338 /* 339 * BPF prog stack layout 340 * 341 * high 342 * original A64_SP => 0:+-----+ BPF prologue 343 * |FP/LR| 344 * current A64_FP => -16:+-----+ 345 * | ... | callee saved registers 346 * BPF fp register => -64:+-----+ <= (BPF_FP) 347 * | | 348 * | ... | BPF prog stack 349 * | | 350 * +-----+ <= (BPF_FP - prog->aux->stack_depth) 351 * |RSVD | padding 352 * current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size) 353 * | | 354 * | ... | Function call stack 355 * | | 356 * +-----+ 357 * low 358 * 359 */ 360 361 /* bpf function may be invoked by 3 instruction types: 362 * 1. bl, attached via freplace to bpf prog via short jump 363 * 2. br, attached via freplace to bpf prog via long jump 364 * 3. blr, working as a function pointer, used by emit_call. 365 * So BTI_JC should used here to support both br and blr. 366 */ 367 emit_bti(A64_BTI_JC, ctx); 368 369 emit(A64_MOV(1, A64_R(9), A64_LR), ctx); 370 emit(A64_NOP, ctx); 371 372 if (!is_exception_cb) { 373 /* Sign lr */ 374 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) 375 emit(A64_PACIASP, ctx); 376 /* Save FP and LR registers to stay align with ARM64 AAPCS */ 377 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); 378 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 379 380 /* Save callee-saved registers */ 381 emit(A64_PUSH(r6, r7, A64_SP), ctx); 382 emit(A64_PUSH(r8, r9, A64_SP), ctx); 383 prepare_bpf_tail_call_cnt(ctx); 384 emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); 385 } else { 386 /* 387 * Exception callback receives FP of Main Program as third 388 * parameter 389 */ 390 emit(A64_MOV(1, A64_FP, A64_R(2)), ctx); 391 /* 392 * Main Program already pushed the frame record and the 393 * callee-saved registers. The exception callback will not push 394 * anything and re-use the main program's stack. 395 * 396 * 12 registers are on the stack 397 */ 398 emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx); 399 } 400 401 /* Set up BPF prog stack base register */ 402 emit(A64_MOV(1, fp, A64_SP), ctx); 403 404 if (!ebpf_from_cbpf && is_main_prog) { 405 cur_offset = ctx->idx - idx0; 406 if (cur_offset != PROLOGUE_OFFSET) { 407 pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", 408 cur_offset, PROLOGUE_OFFSET); 409 return -1; 410 } 411 412 /* BTI landing pad for the tail call, done with a BR */ 413 emit_bti(A64_BTI_J, ctx); 414 } 415 416 /* 417 * Program acting as exception boundary should save all ARM64 418 * Callee-saved registers as the exception callback needs to recover 419 * all ARM64 Callee-saved registers in its epilogue. 420 */ 421 if (prog->aux->exception_boundary) { 422 /* 423 * As we are pushing two more registers, BPF_FP should be moved 424 * 16 bytes 425 */ 426 emit(A64_SUB_I(1, fp, fp, 16), ctx); 427 emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx); 428 } 429 430 emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); 431 432 /* Stack must be multiples of 16B */ 433 ctx->stack_size = round_up(prog->aux->stack_depth, 16); 434 435 /* Set up function call stack */ 436 emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 437 438 if (arena_vm_start) 439 emit_a64_mov_i64(arena_vm_base, arena_vm_start, ctx); 440 441 return 0; 442 } 443 444 static int out_offset = -1; /* initialized on the first pass of build_body() */ 445 static int emit_bpf_tail_call(struct jit_ctx *ctx) 446 { 447 /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ 448 const u8 r2 = bpf2a64[BPF_REG_2]; 449 const u8 r3 = bpf2a64[BPF_REG_3]; 450 451 const u8 tmp = bpf2a64[TMP_REG_1]; 452 const u8 prg = bpf2a64[TMP_REG_2]; 453 const u8 tcc = bpf2a64[TMP_REG_3]; 454 const u8 ptr = bpf2a64[TCCNT_PTR]; 455 const int idx0 = ctx->idx; 456 #define cur_offset (ctx->idx - idx0) 457 #define jmp_offset (out_offset - (cur_offset)) 458 size_t off; 459 460 /* if (index >= array->map.max_entries) 461 * goto out; 462 */ 463 off = offsetof(struct bpf_array, map.max_entries); 464 emit_a64_mov_i64(tmp, off, ctx); 465 emit(A64_LDR32(tmp, r2, tmp), ctx); 466 emit(A64_MOV(0, r3, r3), ctx); 467 emit(A64_CMP(0, r3, tmp), ctx); 468 emit(A64_B_(A64_COND_CS, jmp_offset), ctx); 469 470 /* 471 * if ((*tail_call_cnt_ptr) >= MAX_TAIL_CALL_CNT) 472 * goto out; 473 * (*tail_call_cnt_ptr)++; 474 */ 475 emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); 476 emit(A64_LDR64I(tcc, ptr, 0), ctx); 477 emit(A64_CMP(1, tcc, tmp), ctx); 478 emit(A64_B_(A64_COND_CS, jmp_offset), ctx); 479 emit(A64_ADD_I(1, tcc, tcc, 1), ctx); 480 481 /* prog = array->ptrs[index]; 482 * if (prog == NULL) 483 * goto out; 484 */ 485 off = offsetof(struct bpf_array, ptrs); 486 emit_a64_mov_i64(tmp, off, ctx); 487 emit(A64_ADD(1, tmp, r2, tmp), ctx); 488 emit(A64_LSL(1, prg, r3, 3), ctx); 489 emit(A64_LDR64(prg, tmp, prg), ctx); 490 emit(A64_CBZ(1, prg, jmp_offset), ctx); 491 492 /* Update tail_call_cnt if the slot is populated. */ 493 emit(A64_STR64I(tcc, ptr, 0), ctx); 494 495 /* goto *(prog->bpf_func + prologue_offset); */ 496 off = offsetof(struct bpf_prog, bpf_func); 497 emit_a64_mov_i64(tmp, off, ctx); 498 emit(A64_LDR64(tmp, prg, tmp), ctx); 499 emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); 500 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 501 emit(A64_BR(tmp), ctx); 502 503 /* out: */ 504 if (out_offset == -1) 505 out_offset = cur_offset; 506 if (cur_offset != out_offset) { 507 pr_err_once("tail_call out_offset = %d, expected %d!\n", 508 cur_offset, out_offset); 509 return -1; 510 } 511 return 0; 512 #undef cur_offset 513 #undef jmp_offset 514 } 515 516 #ifdef CONFIG_ARM64_LSE_ATOMICS 517 static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 518 { 519 const u8 code = insn->code; 520 const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; 521 const u8 dst = bpf2a64[insn->dst_reg]; 522 const u8 src = bpf2a64[insn->src_reg]; 523 const u8 tmp = bpf2a64[TMP_REG_1]; 524 const u8 tmp2 = bpf2a64[TMP_REG_2]; 525 const bool isdw = BPF_SIZE(code) == BPF_DW; 526 const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; 527 const s16 off = insn->off; 528 u8 reg = dst; 529 530 if (off || arena) { 531 if (off) { 532 emit_a64_mov_i(1, tmp, off, ctx); 533 emit(A64_ADD(1, tmp, tmp, dst), ctx); 534 reg = tmp; 535 } 536 if (arena) { 537 emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); 538 reg = tmp; 539 } 540 } 541 542 switch (insn->imm) { 543 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ 544 case BPF_ADD: 545 emit(A64_STADD(isdw, reg, src), ctx); 546 break; 547 case BPF_AND: 548 emit(A64_MVN(isdw, tmp2, src), ctx); 549 emit(A64_STCLR(isdw, reg, tmp2), ctx); 550 break; 551 case BPF_OR: 552 emit(A64_STSET(isdw, reg, src), ctx); 553 break; 554 case BPF_XOR: 555 emit(A64_STEOR(isdw, reg, src), ctx); 556 break; 557 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ 558 case BPF_ADD | BPF_FETCH: 559 emit(A64_LDADDAL(isdw, src, reg, src), ctx); 560 break; 561 case BPF_AND | BPF_FETCH: 562 emit(A64_MVN(isdw, tmp2, src), ctx); 563 emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx); 564 break; 565 case BPF_OR | BPF_FETCH: 566 emit(A64_LDSETAL(isdw, src, reg, src), ctx); 567 break; 568 case BPF_XOR | BPF_FETCH: 569 emit(A64_LDEORAL(isdw, src, reg, src), ctx); 570 break; 571 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ 572 case BPF_XCHG: 573 emit(A64_SWPAL(isdw, src, reg, src), ctx); 574 break; 575 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ 576 case BPF_CMPXCHG: 577 emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx); 578 break; 579 default: 580 pr_err_once("unknown atomic op code %02x\n", insn->imm); 581 return -EINVAL; 582 } 583 584 return 0; 585 } 586 #else 587 static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 588 { 589 return -EINVAL; 590 } 591 #endif 592 593 static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 594 { 595 const u8 code = insn->code; 596 const u8 dst = bpf2a64[insn->dst_reg]; 597 const u8 src = bpf2a64[insn->src_reg]; 598 const u8 tmp = bpf2a64[TMP_REG_1]; 599 const u8 tmp2 = bpf2a64[TMP_REG_2]; 600 const u8 tmp3 = bpf2a64[TMP_REG_3]; 601 const int i = insn - ctx->prog->insnsi; 602 const s32 imm = insn->imm; 603 const s16 off = insn->off; 604 const bool isdw = BPF_SIZE(code) == BPF_DW; 605 u8 reg; 606 s32 jmp_offset; 607 608 if (BPF_MODE(code) == BPF_PROBE_ATOMIC) { 609 /* ll_sc based atomics don't support unsafe pointers yet. */ 610 pr_err_once("unknown atomic opcode %02x\n", code); 611 return -EINVAL; 612 } 613 614 if (!off) { 615 reg = dst; 616 } else { 617 emit_a64_mov_i(1, tmp, off, ctx); 618 emit(A64_ADD(1, tmp, tmp, dst), ctx); 619 reg = tmp; 620 } 621 622 if (imm == BPF_ADD || imm == BPF_AND || 623 imm == BPF_OR || imm == BPF_XOR) { 624 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ 625 emit(A64_LDXR(isdw, tmp2, reg), ctx); 626 if (imm == BPF_ADD) 627 emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); 628 else if (imm == BPF_AND) 629 emit(A64_AND(isdw, tmp2, tmp2, src), ctx); 630 else if (imm == BPF_OR) 631 emit(A64_ORR(isdw, tmp2, tmp2, src), ctx); 632 else 633 emit(A64_EOR(isdw, tmp2, tmp2, src), ctx); 634 emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx); 635 jmp_offset = -3; 636 check_imm19(jmp_offset); 637 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 638 } else if (imm == (BPF_ADD | BPF_FETCH) || 639 imm == (BPF_AND | BPF_FETCH) || 640 imm == (BPF_OR | BPF_FETCH) || 641 imm == (BPF_XOR | BPF_FETCH)) { 642 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ 643 const u8 ax = bpf2a64[BPF_REG_AX]; 644 645 emit(A64_MOV(isdw, ax, src), ctx); 646 emit(A64_LDXR(isdw, src, reg), ctx); 647 if (imm == (BPF_ADD | BPF_FETCH)) 648 emit(A64_ADD(isdw, tmp2, src, ax), ctx); 649 else if (imm == (BPF_AND | BPF_FETCH)) 650 emit(A64_AND(isdw, tmp2, src, ax), ctx); 651 else if (imm == (BPF_OR | BPF_FETCH)) 652 emit(A64_ORR(isdw, tmp2, src, ax), ctx); 653 else 654 emit(A64_EOR(isdw, tmp2, src, ax), ctx); 655 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); 656 jmp_offset = -3; 657 check_imm19(jmp_offset); 658 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 659 emit(A64_DMB_ISH, ctx); 660 } else if (imm == BPF_XCHG) { 661 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ 662 emit(A64_MOV(isdw, tmp2, src), ctx); 663 emit(A64_LDXR(isdw, src, reg), ctx); 664 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); 665 jmp_offset = -2; 666 check_imm19(jmp_offset); 667 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 668 emit(A64_DMB_ISH, ctx); 669 } else if (imm == BPF_CMPXCHG) { 670 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ 671 const u8 r0 = bpf2a64[BPF_REG_0]; 672 673 emit(A64_MOV(isdw, tmp2, r0), ctx); 674 emit(A64_LDXR(isdw, r0, reg), ctx); 675 emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx); 676 jmp_offset = 4; 677 check_imm19(jmp_offset); 678 emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx); 679 emit(A64_STLXR(isdw, src, reg, tmp3), ctx); 680 jmp_offset = -4; 681 check_imm19(jmp_offset); 682 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 683 emit(A64_DMB_ISH, ctx); 684 } else { 685 pr_err_once("unknown atomic op code %02x\n", imm); 686 return -EINVAL; 687 } 688 689 return 0; 690 } 691 692 void dummy_tramp(void); 693 694 asm ( 695 " .pushsection .text, \"ax\", @progbits\n" 696 " .global dummy_tramp\n" 697 " .type dummy_tramp, %function\n" 698 "dummy_tramp:" 699 #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) 700 " bti j\n" /* dummy_tramp is called via "br x10" */ 701 #endif 702 " mov x10, x30\n" 703 " mov x30, x9\n" 704 " ret x10\n" 705 " .size dummy_tramp, .-dummy_tramp\n" 706 " .popsection\n" 707 ); 708 709 /* build a plt initialized like this: 710 * 711 * plt: 712 * ldr tmp, target 713 * br tmp 714 * target: 715 * .quad dummy_tramp 716 * 717 * when a long jump trampoline is attached, target is filled with the 718 * trampoline address, and when the trampoline is removed, target is 719 * restored to dummy_tramp address. 720 */ 721 static void build_plt(struct jit_ctx *ctx) 722 { 723 const u8 tmp = bpf2a64[TMP_REG_1]; 724 struct bpf_plt *plt = NULL; 725 726 /* make sure target is 64-bit aligned */ 727 if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2) 728 emit(A64_NOP, ctx); 729 730 plt = (struct bpf_plt *)(ctx->image + ctx->idx); 731 /* plt is called via bl, no BTI needed here */ 732 emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx); 733 emit(A64_BR(tmp), ctx); 734 735 if (ctx->image) 736 plt->target = (u64)&dummy_tramp; 737 } 738 739 static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb) 740 { 741 const u8 r0 = bpf2a64[BPF_REG_0]; 742 const u8 r6 = bpf2a64[BPF_REG_6]; 743 const u8 r7 = bpf2a64[BPF_REG_7]; 744 const u8 r8 = bpf2a64[BPF_REG_8]; 745 const u8 r9 = bpf2a64[BPF_REG_9]; 746 const u8 fp = bpf2a64[BPF_REG_FP]; 747 const u8 ptr = bpf2a64[TCCNT_PTR]; 748 const u8 fpb = bpf2a64[FP_BOTTOM]; 749 750 /* We're done with BPF stack */ 751 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 752 753 /* 754 * Program acting as exception boundary pushes R23 and R24 in addition 755 * to BPF callee-saved registers. Exception callback uses the boundary 756 * program's stack frame, so recover these extra registers in the above 757 * two cases. 758 */ 759 if (ctx->prog->aux->exception_boundary || is_exception_cb) 760 emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx); 761 762 /* Restore x27 and x28 */ 763 emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); 764 /* Restore fs (x25) and x26 */ 765 emit(A64_POP(ptr, fp, A64_SP), ctx); 766 emit(A64_POP(ptr, fp, A64_SP), ctx); 767 768 /* Restore callee-saved register */ 769 emit(A64_POP(r8, r9, A64_SP), ctx); 770 emit(A64_POP(r6, r7, A64_SP), ctx); 771 772 /* Restore FP/LR registers */ 773 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); 774 775 /* Set return value */ 776 emit(A64_MOV(1, A64_R(0), r0), ctx); 777 778 /* Authenticate lr */ 779 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) 780 emit(A64_AUTIASP, ctx); 781 782 emit(A64_RET(A64_LR), ctx); 783 } 784 785 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) 786 #define BPF_FIXUP_REG_MASK GENMASK(31, 27) 787 #define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */ 788 789 bool ex_handler_bpf(const struct exception_table_entry *ex, 790 struct pt_regs *regs) 791 { 792 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); 793 int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); 794 795 if (dst_reg != DONT_CLEAR) 796 regs->regs[dst_reg] = 0; 797 regs->pc = (unsigned long)&ex->fixup - offset; 798 return true; 799 } 800 801 /* For accesses to BTF pointers, add an entry to the exception table */ 802 static int add_exception_handler(const struct bpf_insn *insn, 803 struct jit_ctx *ctx, 804 int dst_reg) 805 { 806 off_t ins_offset; 807 off_t fixup_offset; 808 unsigned long pc; 809 struct exception_table_entry *ex; 810 811 if (!ctx->image) 812 /* First pass */ 813 return 0; 814 815 if (BPF_MODE(insn->code) != BPF_PROBE_MEM && 816 BPF_MODE(insn->code) != BPF_PROBE_MEMSX && 817 BPF_MODE(insn->code) != BPF_PROBE_MEM32 && 818 BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) 819 return 0; 820 821 if (!ctx->prog->aux->extable || 822 WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) 823 return -EINVAL; 824 825 ex = &ctx->prog->aux->extable[ctx->exentry_idx]; 826 pc = (unsigned long)&ctx->ro_image[ctx->idx - 1]; 827 828 /* 829 * This is the relative offset of the instruction that may fault from 830 * the exception table itself. This will be written to the exception 831 * table and if this instruction faults, the destination register will 832 * be set to '' and the execution will jump to the next instruction. 833 */ 834 ins_offset = pc - (long)&ex->insn; 835 if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) 836 return -ERANGE; 837 838 /* 839 * Since the extable follows the program, the fixup offset is always 840 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value 841 * to keep things simple, and put the destination register in the upper 842 * bits. We don't need to worry about buildtime or runtime sort 843 * modifying the upper bits because the table is already sorted, and 844 * isn't part of the main exception table. 845 * 846 * The fixup_offset is set to the next instruction from the instruction 847 * that may fault. The execution will jump to this after handling the 848 * fault. 849 */ 850 fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); 851 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) 852 return -ERANGE; 853 854 /* 855 * The offsets above have been calculated using the RO buffer but we 856 * need to use the R/W buffer for writes. 857 * switch ex to rw buffer for writing. 858 */ 859 ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image); 860 861 ex->insn = ins_offset; 862 863 if (BPF_CLASS(insn->code) != BPF_LDX) 864 dst_reg = DONT_CLEAR; 865 866 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | 867 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); 868 869 ex->type = EX_TYPE_BPF; 870 871 ctx->exentry_idx++; 872 return 0; 873 } 874 875 /* JITs an eBPF instruction. 876 * Returns: 877 * 0 - successfully JITed an 8-byte eBPF instruction. 878 * >0 - successfully JITed a 16-byte eBPF instruction. 879 * <0 - failed to JIT. 880 */ 881 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, 882 bool extra_pass) 883 { 884 const u8 code = insn->code; 885 u8 dst = bpf2a64[insn->dst_reg]; 886 u8 src = bpf2a64[insn->src_reg]; 887 const u8 tmp = bpf2a64[TMP_REG_1]; 888 const u8 tmp2 = bpf2a64[TMP_REG_2]; 889 const u8 fp = bpf2a64[BPF_REG_FP]; 890 const u8 fpb = bpf2a64[FP_BOTTOM]; 891 const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; 892 const s16 off = insn->off; 893 const s32 imm = insn->imm; 894 const int i = insn - ctx->prog->insnsi; 895 const bool is64 = BPF_CLASS(code) == BPF_ALU64 || 896 BPF_CLASS(code) == BPF_JMP; 897 u8 jmp_cond; 898 s32 jmp_offset; 899 u32 a64_insn; 900 u8 src_adj; 901 u8 dst_adj; 902 int off_adj; 903 int ret; 904 bool sign_extend; 905 906 switch (code) { 907 /* dst = src */ 908 case BPF_ALU | BPF_MOV | BPF_X: 909 case BPF_ALU64 | BPF_MOV | BPF_X: 910 if (insn_is_cast_user(insn)) { 911 emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits 912 emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx); 913 emit(A64_LSL(1, dst, dst, 32), ctx); 914 emit(A64_CBZ(1, tmp, 2), ctx); 915 emit(A64_ORR(1, tmp, dst, tmp), ctx); 916 emit(A64_MOV(1, dst, tmp), ctx); 917 break; 918 } else if (insn_is_mov_percpu_addr(insn)) { 919 if (dst != src) 920 emit(A64_MOV(1, dst, src), ctx); 921 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) 922 emit(A64_MRS_TPIDR_EL2(tmp), ctx); 923 else 924 emit(A64_MRS_TPIDR_EL1(tmp), ctx); 925 emit(A64_ADD(1, dst, dst, tmp), ctx); 926 break; 927 } 928 switch (insn->off) { 929 case 0: 930 emit(A64_MOV(is64, dst, src), ctx); 931 break; 932 case 8: 933 emit(A64_SXTB(is64, dst, src), ctx); 934 break; 935 case 16: 936 emit(A64_SXTH(is64, dst, src), ctx); 937 break; 938 case 32: 939 emit(A64_SXTW(is64, dst, src), ctx); 940 break; 941 } 942 break; 943 /* dst = dst OP src */ 944 case BPF_ALU | BPF_ADD | BPF_X: 945 case BPF_ALU64 | BPF_ADD | BPF_X: 946 emit(A64_ADD(is64, dst, dst, src), ctx); 947 break; 948 case BPF_ALU | BPF_SUB | BPF_X: 949 case BPF_ALU64 | BPF_SUB | BPF_X: 950 emit(A64_SUB(is64, dst, dst, src), ctx); 951 break; 952 case BPF_ALU | BPF_AND | BPF_X: 953 case BPF_ALU64 | BPF_AND | BPF_X: 954 emit(A64_AND(is64, dst, dst, src), ctx); 955 break; 956 case BPF_ALU | BPF_OR | BPF_X: 957 case BPF_ALU64 | BPF_OR | BPF_X: 958 emit(A64_ORR(is64, dst, dst, src), ctx); 959 break; 960 case BPF_ALU | BPF_XOR | BPF_X: 961 case BPF_ALU64 | BPF_XOR | BPF_X: 962 emit(A64_EOR(is64, dst, dst, src), ctx); 963 break; 964 case BPF_ALU | BPF_MUL | BPF_X: 965 case BPF_ALU64 | BPF_MUL | BPF_X: 966 emit(A64_MUL(is64, dst, dst, src), ctx); 967 break; 968 case BPF_ALU | BPF_DIV | BPF_X: 969 case BPF_ALU64 | BPF_DIV | BPF_X: 970 if (!off) 971 emit(A64_UDIV(is64, dst, dst, src), ctx); 972 else 973 emit(A64_SDIV(is64, dst, dst, src), ctx); 974 break; 975 case BPF_ALU | BPF_MOD | BPF_X: 976 case BPF_ALU64 | BPF_MOD | BPF_X: 977 if (!off) 978 emit(A64_UDIV(is64, tmp, dst, src), ctx); 979 else 980 emit(A64_SDIV(is64, tmp, dst, src), ctx); 981 emit(A64_MSUB(is64, dst, dst, tmp, src), ctx); 982 break; 983 case BPF_ALU | BPF_LSH | BPF_X: 984 case BPF_ALU64 | BPF_LSH | BPF_X: 985 emit(A64_LSLV(is64, dst, dst, src), ctx); 986 break; 987 case BPF_ALU | BPF_RSH | BPF_X: 988 case BPF_ALU64 | BPF_RSH | BPF_X: 989 emit(A64_LSRV(is64, dst, dst, src), ctx); 990 break; 991 case BPF_ALU | BPF_ARSH | BPF_X: 992 case BPF_ALU64 | BPF_ARSH | BPF_X: 993 emit(A64_ASRV(is64, dst, dst, src), ctx); 994 break; 995 /* dst = -dst */ 996 case BPF_ALU | BPF_NEG: 997 case BPF_ALU64 | BPF_NEG: 998 emit(A64_NEG(is64, dst, dst), ctx); 999 break; 1000 /* dst = BSWAP##imm(dst) */ 1001 case BPF_ALU | BPF_END | BPF_FROM_LE: 1002 case BPF_ALU | BPF_END | BPF_FROM_BE: 1003 case BPF_ALU64 | BPF_END | BPF_FROM_LE: 1004 #ifdef CONFIG_CPU_BIG_ENDIAN 1005 if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE) 1006 goto emit_bswap_uxt; 1007 #else /* !CONFIG_CPU_BIG_ENDIAN */ 1008 if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE) 1009 goto emit_bswap_uxt; 1010 #endif 1011 switch (imm) { 1012 case 16: 1013 emit(A64_REV16(is64, dst, dst), ctx); 1014 /* zero-extend 16 bits into 64 bits */ 1015 emit(A64_UXTH(is64, dst, dst), ctx); 1016 break; 1017 case 32: 1018 emit(A64_REV32(0, dst, dst), ctx); 1019 /* upper 32 bits already cleared */ 1020 break; 1021 case 64: 1022 emit(A64_REV64(dst, dst), ctx); 1023 break; 1024 } 1025 break; 1026 emit_bswap_uxt: 1027 switch (imm) { 1028 case 16: 1029 /* zero-extend 16 bits into 64 bits */ 1030 emit(A64_UXTH(is64, dst, dst), ctx); 1031 break; 1032 case 32: 1033 /* zero-extend 32 bits into 64 bits */ 1034 emit(A64_UXTW(is64, dst, dst), ctx); 1035 break; 1036 case 64: 1037 /* nop */ 1038 break; 1039 } 1040 break; 1041 /* dst = imm */ 1042 case BPF_ALU | BPF_MOV | BPF_K: 1043 case BPF_ALU64 | BPF_MOV | BPF_K: 1044 emit_a64_mov_i(is64, dst, imm, ctx); 1045 break; 1046 /* dst = dst OP imm */ 1047 case BPF_ALU | BPF_ADD | BPF_K: 1048 case BPF_ALU64 | BPF_ADD | BPF_K: 1049 if (is_addsub_imm(imm)) { 1050 emit(A64_ADD_I(is64, dst, dst, imm), ctx); 1051 } else if (is_addsub_imm(-imm)) { 1052 emit(A64_SUB_I(is64, dst, dst, -imm), ctx); 1053 } else { 1054 emit_a64_mov_i(is64, tmp, imm, ctx); 1055 emit(A64_ADD(is64, dst, dst, tmp), ctx); 1056 } 1057 break; 1058 case BPF_ALU | BPF_SUB | BPF_K: 1059 case BPF_ALU64 | BPF_SUB | BPF_K: 1060 if (is_addsub_imm(imm)) { 1061 emit(A64_SUB_I(is64, dst, dst, imm), ctx); 1062 } else if (is_addsub_imm(-imm)) { 1063 emit(A64_ADD_I(is64, dst, dst, -imm), ctx); 1064 } else { 1065 emit_a64_mov_i(is64, tmp, imm, ctx); 1066 emit(A64_SUB(is64, dst, dst, tmp), ctx); 1067 } 1068 break; 1069 case BPF_ALU | BPF_AND | BPF_K: 1070 case BPF_ALU64 | BPF_AND | BPF_K: 1071 a64_insn = A64_AND_I(is64, dst, dst, imm); 1072 if (a64_insn != AARCH64_BREAK_FAULT) { 1073 emit(a64_insn, ctx); 1074 } else { 1075 emit_a64_mov_i(is64, tmp, imm, ctx); 1076 emit(A64_AND(is64, dst, dst, tmp), ctx); 1077 } 1078 break; 1079 case BPF_ALU | BPF_OR | BPF_K: 1080 case BPF_ALU64 | BPF_OR | BPF_K: 1081 a64_insn = A64_ORR_I(is64, dst, dst, imm); 1082 if (a64_insn != AARCH64_BREAK_FAULT) { 1083 emit(a64_insn, ctx); 1084 } else { 1085 emit_a64_mov_i(is64, tmp, imm, ctx); 1086 emit(A64_ORR(is64, dst, dst, tmp), ctx); 1087 } 1088 break; 1089 case BPF_ALU | BPF_XOR | BPF_K: 1090 case BPF_ALU64 | BPF_XOR | BPF_K: 1091 a64_insn = A64_EOR_I(is64, dst, dst, imm); 1092 if (a64_insn != AARCH64_BREAK_FAULT) { 1093 emit(a64_insn, ctx); 1094 } else { 1095 emit_a64_mov_i(is64, tmp, imm, ctx); 1096 emit(A64_EOR(is64, dst, dst, tmp), ctx); 1097 } 1098 break; 1099 case BPF_ALU | BPF_MUL | BPF_K: 1100 case BPF_ALU64 | BPF_MUL | BPF_K: 1101 emit_a64_mov_i(is64, tmp, imm, ctx); 1102 emit(A64_MUL(is64, dst, dst, tmp), ctx); 1103 break; 1104 case BPF_ALU | BPF_DIV | BPF_K: 1105 case BPF_ALU64 | BPF_DIV | BPF_K: 1106 emit_a64_mov_i(is64, tmp, imm, ctx); 1107 if (!off) 1108 emit(A64_UDIV(is64, dst, dst, tmp), ctx); 1109 else 1110 emit(A64_SDIV(is64, dst, dst, tmp), ctx); 1111 break; 1112 case BPF_ALU | BPF_MOD | BPF_K: 1113 case BPF_ALU64 | BPF_MOD | BPF_K: 1114 emit_a64_mov_i(is64, tmp2, imm, ctx); 1115 if (!off) 1116 emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); 1117 else 1118 emit(A64_SDIV(is64, tmp, dst, tmp2), ctx); 1119 emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx); 1120 break; 1121 case BPF_ALU | BPF_LSH | BPF_K: 1122 case BPF_ALU64 | BPF_LSH | BPF_K: 1123 emit(A64_LSL(is64, dst, dst, imm), ctx); 1124 break; 1125 case BPF_ALU | BPF_RSH | BPF_K: 1126 case BPF_ALU64 | BPF_RSH | BPF_K: 1127 emit(A64_LSR(is64, dst, dst, imm), ctx); 1128 break; 1129 case BPF_ALU | BPF_ARSH | BPF_K: 1130 case BPF_ALU64 | BPF_ARSH | BPF_K: 1131 emit(A64_ASR(is64, dst, dst, imm), ctx); 1132 break; 1133 1134 /* JUMP off */ 1135 case BPF_JMP | BPF_JA: 1136 case BPF_JMP32 | BPF_JA: 1137 if (BPF_CLASS(code) == BPF_JMP) 1138 jmp_offset = bpf2a64_offset(i, off, ctx); 1139 else 1140 jmp_offset = bpf2a64_offset(i, imm, ctx); 1141 check_imm26(jmp_offset); 1142 emit(A64_B(jmp_offset), ctx); 1143 break; 1144 /* IF (dst COND src) JUMP off */ 1145 case BPF_JMP | BPF_JEQ | BPF_X: 1146 case BPF_JMP | BPF_JGT | BPF_X: 1147 case BPF_JMP | BPF_JLT | BPF_X: 1148 case BPF_JMP | BPF_JGE | BPF_X: 1149 case BPF_JMP | BPF_JLE | BPF_X: 1150 case BPF_JMP | BPF_JNE | BPF_X: 1151 case BPF_JMP | BPF_JSGT | BPF_X: 1152 case BPF_JMP | BPF_JSLT | BPF_X: 1153 case BPF_JMP | BPF_JSGE | BPF_X: 1154 case BPF_JMP | BPF_JSLE | BPF_X: 1155 case BPF_JMP32 | BPF_JEQ | BPF_X: 1156 case BPF_JMP32 | BPF_JGT | BPF_X: 1157 case BPF_JMP32 | BPF_JLT | BPF_X: 1158 case BPF_JMP32 | BPF_JGE | BPF_X: 1159 case BPF_JMP32 | BPF_JLE | BPF_X: 1160 case BPF_JMP32 | BPF_JNE | BPF_X: 1161 case BPF_JMP32 | BPF_JSGT | BPF_X: 1162 case BPF_JMP32 | BPF_JSLT | BPF_X: 1163 case BPF_JMP32 | BPF_JSGE | BPF_X: 1164 case BPF_JMP32 | BPF_JSLE | BPF_X: 1165 emit(A64_CMP(is64, dst, src), ctx); 1166 emit_cond_jmp: 1167 jmp_offset = bpf2a64_offset(i, off, ctx); 1168 check_imm19(jmp_offset); 1169 switch (BPF_OP(code)) { 1170 case BPF_JEQ: 1171 jmp_cond = A64_COND_EQ; 1172 break; 1173 case BPF_JGT: 1174 jmp_cond = A64_COND_HI; 1175 break; 1176 case BPF_JLT: 1177 jmp_cond = A64_COND_CC; 1178 break; 1179 case BPF_JGE: 1180 jmp_cond = A64_COND_CS; 1181 break; 1182 case BPF_JLE: 1183 jmp_cond = A64_COND_LS; 1184 break; 1185 case BPF_JSET: 1186 case BPF_JNE: 1187 jmp_cond = A64_COND_NE; 1188 break; 1189 case BPF_JSGT: 1190 jmp_cond = A64_COND_GT; 1191 break; 1192 case BPF_JSLT: 1193 jmp_cond = A64_COND_LT; 1194 break; 1195 case BPF_JSGE: 1196 jmp_cond = A64_COND_GE; 1197 break; 1198 case BPF_JSLE: 1199 jmp_cond = A64_COND_LE; 1200 break; 1201 default: 1202 return -EFAULT; 1203 } 1204 emit(A64_B_(jmp_cond, jmp_offset), ctx); 1205 break; 1206 case BPF_JMP | BPF_JSET | BPF_X: 1207 case BPF_JMP32 | BPF_JSET | BPF_X: 1208 emit(A64_TST(is64, dst, src), ctx); 1209 goto emit_cond_jmp; 1210 /* IF (dst COND imm) JUMP off */ 1211 case BPF_JMP | BPF_JEQ | BPF_K: 1212 case BPF_JMP | BPF_JGT | BPF_K: 1213 case BPF_JMP | BPF_JLT | BPF_K: 1214 case BPF_JMP | BPF_JGE | BPF_K: 1215 case BPF_JMP | BPF_JLE | BPF_K: 1216 case BPF_JMP | BPF_JNE | BPF_K: 1217 case BPF_JMP | BPF_JSGT | BPF_K: 1218 case BPF_JMP | BPF_JSLT | BPF_K: 1219 case BPF_JMP | BPF_JSGE | BPF_K: 1220 case BPF_JMP | BPF_JSLE | BPF_K: 1221 case BPF_JMP32 | BPF_JEQ | BPF_K: 1222 case BPF_JMP32 | BPF_JGT | BPF_K: 1223 case BPF_JMP32 | BPF_JLT | BPF_K: 1224 case BPF_JMP32 | BPF_JGE | BPF_K: 1225 case BPF_JMP32 | BPF_JLE | BPF_K: 1226 case BPF_JMP32 | BPF_JNE | BPF_K: 1227 case BPF_JMP32 | BPF_JSGT | BPF_K: 1228 case BPF_JMP32 | BPF_JSLT | BPF_K: 1229 case BPF_JMP32 | BPF_JSGE | BPF_K: 1230 case BPF_JMP32 | BPF_JSLE | BPF_K: 1231 if (is_addsub_imm(imm)) { 1232 emit(A64_CMP_I(is64, dst, imm), ctx); 1233 } else if (is_addsub_imm(-imm)) { 1234 emit(A64_CMN_I(is64, dst, -imm), ctx); 1235 } else { 1236 emit_a64_mov_i(is64, tmp, imm, ctx); 1237 emit(A64_CMP(is64, dst, tmp), ctx); 1238 } 1239 goto emit_cond_jmp; 1240 case BPF_JMP | BPF_JSET | BPF_K: 1241 case BPF_JMP32 | BPF_JSET | BPF_K: 1242 a64_insn = A64_TST_I(is64, dst, imm); 1243 if (a64_insn != AARCH64_BREAK_FAULT) { 1244 emit(a64_insn, ctx); 1245 } else { 1246 emit_a64_mov_i(is64, tmp, imm, ctx); 1247 emit(A64_TST(is64, dst, tmp), ctx); 1248 } 1249 goto emit_cond_jmp; 1250 /* function call */ 1251 case BPF_JMP | BPF_CALL: 1252 { 1253 const u8 r0 = bpf2a64[BPF_REG_0]; 1254 bool func_addr_fixed; 1255 u64 func_addr; 1256 u32 cpu_offset; 1257 1258 /* Implement helper call to bpf_get_smp_processor_id() inline */ 1259 if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { 1260 cpu_offset = offsetof(struct thread_info, cpu); 1261 1262 emit(A64_MRS_SP_EL0(tmp), ctx); 1263 if (is_lsi_offset(cpu_offset, 2)) { 1264 emit(A64_LDR32I(r0, tmp, cpu_offset), ctx); 1265 } else { 1266 emit_a64_mov_i(1, tmp2, cpu_offset, ctx); 1267 emit(A64_LDR32(r0, tmp, tmp2), ctx); 1268 } 1269 break; 1270 } 1271 1272 /* Implement helper call to bpf_get_current_task/_btf() inline */ 1273 if (insn->src_reg == 0 && (insn->imm == BPF_FUNC_get_current_task || 1274 insn->imm == BPF_FUNC_get_current_task_btf)) { 1275 emit(A64_MRS_SP_EL0(r0), ctx); 1276 break; 1277 } 1278 1279 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, 1280 &func_addr, &func_addr_fixed); 1281 if (ret < 0) 1282 return ret; 1283 emit_call(func_addr, ctx); 1284 emit(A64_MOV(1, r0, A64_R(0)), ctx); 1285 break; 1286 } 1287 /* tail call */ 1288 case BPF_JMP | BPF_TAIL_CALL: 1289 if (emit_bpf_tail_call(ctx)) 1290 return -EFAULT; 1291 break; 1292 /* function return */ 1293 case BPF_JMP | BPF_EXIT: 1294 /* Optimization: when last instruction is EXIT, 1295 simply fallthrough to epilogue. */ 1296 if (i == ctx->prog->len - 1) 1297 break; 1298 jmp_offset = epilogue_offset(ctx); 1299 check_imm26(jmp_offset); 1300 emit(A64_B(jmp_offset), ctx); 1301 break; 1302 1303 /* dst = imm64 */ 1304 case BPF_LD | BPF_IMM | BPF_DW: 1305 { 1306 const struct bpf_insn insn1 = insn[1]; 1307 u64 imm64; 1308 1309 imm64 = (u64)insn1.imm << 32 | (u32)imm; 1310 if (bpf_pseudo_func(insn)) 1311 emit_addr_mov_i64(dst, imm64, ctx); 1312 else 1313 emit_a64_mov_i64(dst, imm64, ctx); 1314 1315 return 1; 1316 } 1317 1318 /* LDX: dst = (u64)*(unsigned size *)(src + off) */ 1319 case BPF_LDX | BPF_MEM | BPF_W: 1320 case BPF_LDX | BPF_MEM | BPF_H: 1321 case BPF_LDX | BPF_MEM | BPF_B: 1322 case BPF_LDX | BPF_MEM | BPF_DW: 1323 case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 1324 case BPF_LDX | BPF_PROBE_MEM | BPF_W: 1325 case BPF_LDX | BPF_PROBE_MEM | BPF_H: 1326 case BPF_LDX | BPF_PROBE_MEM | BPF_B: 1327 /* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */ 1328 case BPF_LDX | BPF_MEMSX | BPF_B: 1329 case BPF_LDX | BPF_MEMSX | BPF_H: 1330 case BPF_LDX | BPF_MEMSX | BPF_W: 1331 case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: 1332 case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: 1333 case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: 1334 case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: 1335 case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: 1336 case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: 1337 case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: 1338 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1339 emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx); 1340 src = tmp2; 1341 } 1342 if (ctx->fpb_offset > 0 && src == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { 1343 src_adj = fpb; 1344 off_adj = off + ctx->fpb_offset; 1345 } else { 1346 src_adj = src; 1347 off_adj = off; 1348 } 1349 sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX || 1350 BPF_MODE(insn->code) == BPF_PROBE_MEMSX); 1351 switch (BPF_SIZE(code)) { 1352 case BPF_W: 1353 if (is_lsi_offset(off_adj, 2)) { 1354 if (sign_extend) 1355 emit(A64_LDRSWI(dst, src_adj, off_adj), ctx); 1356 else 1357 emit(A64_LDR32I(dst, src_adj, off_adj), ctx); 1358 } else { 1359 emit_a64_mov_i(1, tmp, off, ctx); 1360 if (sign_extend) 1361 emit(A64_LDRSW(dst, src, tmp), ctx); 1362 else 1363 emit(A64_LDR32(dst, src, tmp), ctx); 1364 } 1365 break; 1366 case BPF_H: 1367 if (is_lsi_offset(off_adj, 1)) { 1368 if (sign_extend) 1369 emit(A64_LDRSHI(dst, src_adj, off_adj), ctx); 1370 else 1371 emit(A64_LDRHI(dst, src_adj, off_adj), ctx); 1372 } else { 1373 emit_a64_mov_i(1, tmp, off, ctx); 1374 if (sign_extend) 1375 emit(A64_LDRSH(dst, src, tmp), ctx); 1376 else 1377 emit(A64_LDRH(dst, src, tmp), ctx); 1378 } 1379 break; 1380 case BPF_B: 1381 if (is_lsi_offset(off_adj, 0)) { 1382 if (sign_extend) 1383 emit(A64_LDRSBI(dst, src_adj, off_adj), ctx); 1384 else 1385 emit(A64_LDRBI(dst, src_adj, off_adj), ctx); 1386 } else { 1387 emit_a64_mov_i(1, tmp, off, ctx); 1388 if (sign_extend) 1389 emit(A64_LDRSB(dst, src, tmp), ctx); 1390 else 1391 emit(A64_LDRB(dst, src, tmp), ctx); 1392 } 1393 break; 1394 case BPF_DW: 1395 if (is_lsi_offset(off_adj, 3)) { 1396 emit(A64_LDR64I(dst, src_adj, off_adj), ctx); 1397 } else { 1398 emit_a64_mov_i(1, tmp, off, ctx); 1399 emit(A64_LDR64(dst, src, tmp), ctx); 1400 } 1401 break; 1402 } 1403 1404 ret = add_exception_handler(insn, ctx, dst); 1405 if (ret) 1406 return ret; 1407 break; 1408 1409 /* speculation barrier */ 1410 case BPF_ST | BPF_NOSPEC: 1411 /* 1412 * Nothing required here. 1413 * 1414 * In case of arm64, we rely on the firmware mitigation of 1415 * Speculative Store Bypass as controlled via the ssbd kernel 1416 * parameter. Whenever the mitigation is enabled, it works 1417 * for all of the kernel code with no need to provide any 1418 * additional instructions. 1419 */ 1420 break; 1421 1422 /* ST: *(size *)(dst + off) = imm */ 1423 case BPF_ST | BPF_MEM | BPF_W: 1424 case BPF_ST | BPF_MEM | BPF_H: 1425 case BPF_ST | BPF_MEM | BPF_B: 1426 case BPF_ST | BPF_MEM | BPF_DW: 1427 case BPF_ST | BPF_PROBE_MEM32 | BPF_B: 1428 case BPF_ST | BPF_PROBE_MEM32 | BPF_H: 1429 case BPF_ST | BPF_PROBE_MEM32 | BPF_W: 1430 case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: 1431 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1432 emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); 1433 dst = tmp2; 1434 } 1435 if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { 1436 dst_adj = fpb; 1437 off_adj = off + ctx->fpb_offset; 1438 } else { 1439 dst_adj = dst; 1440 off_adj = off; 1441 } 1442 /* Load imm to a register then store it */ 1443 emit_a64_mov_i(1, tmp, imm, ctx); 1444 switch (BPF_SIZE(code)) { 1445 case BPF_W: 1446 if (is_lsi_offset(off_adj, 2)) { 1447 emit(A64_STR32I(tmp, dst_adj, off_adj), ctx); 1448 } else { 1449 emit_a64_mov_i(1, tmp2, off, ctx); 1450 emit(A64_STR32(tmp, dst, tmp2), ctx); 1451 } 1452 break; 1453 case BPF_H: 1454 if (is_lsi_offset(off_adj, 1)) { 1455 emit(A64_STRHI(tmp, dst_adj, off_adj), ctx); 1456 } else { 1457 emit_a64_mov_i(1, tmp2, off, ctx); 1458 emit(A64_STRH(tmp, dst, tmp2), ctx); 1459 } 1460 break; 1461 case BPF_B: 1462 if (is_lsi_offset(off_adj, 0)) { 1463 emit(A64_STRBI(tmp, dst_adj, off_adj), ctx); 1464 } else { 1465 emit_a64_mov_i(1, tmp2, off, ctx); 1466 emit(A64_STRB(tmp, dst, tmp2), ctx); 1467 } 1468 break; 1469 case BPF_DW: 1470 if (is_lsi_offset(off_adj, 3)) { 1471 emit(A64_STR64I(tmp, dst_adj, off_adj), ctx); 1472 } else { 1473 emit_a64_mov_i(1, tmp2, off, ctx); 1474 emit(A64_STR64(tmp, dst, tmp2), ctx); 1475 } 1476 break; 1477 } 1478 1479 ret = add_exception_handler(insn, ctx, dst); 1480 if (ret) 1481 return ret; 1482 break; 1483 1484 /* STX: *(size *)(dst + off) = src */ 1485 case BPF_STX | BPF_MEM | BPF_W: 1486 case BPF_STX | BPF_MEM | BPF_H: 1487 case BPF_STX | BPF_MEM | BPF_B: 1488 case BPF_STX | BPF_MEM | BPF_DW: 1489 case BPF_STX | BPF_PROBE_MEM32 | BPF_B: 1490 case BPF_STX | BPF_PROBE_MEM32 | BPF_H: 1491 case BPF_STX | BPF_PROBE_MEM32 | BPF_W: 1492 case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: 1493 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1494 emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); 1495 dst = tmp2; 1496 } 1497 if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { 1498 dst_adj = fpb; 1499 off_adj = off + ctx->fpb_offset; 1500 } else { 1501 dst_adj = dst; 1502 off_adj = off; 1503 } 1504 switch (BPF_SIZE(code)) { 1505 case BPF_W: 1506 if (is_lsi_offset(off_adj, 2)) { 1507 emit(A64_STR32I(src, dst_adj, off_adj), ctx); 1508 } else { 1509 emit_a64_mov_i(1, tmp, off, ctx); 1510 emit(A64_STR32(src, dst, tmp), ctx); 1511 } 1512 break; 1513 case BPF_H: 1514 if (is_lsi_offset(off_adj, 1)) { 1515 emit(A64_STRHI(src, dst_adj, off_adj), ctx); 1516 } else { 1517 emit_a64_mov_i(1, tmp, off, ctx); 1518 emit(A64_STRH(src, dst, tmp), ctx); 1519 } 1520 break; 1521 case BPF_B: 1522 if (is_lsi_offset(off_adj, 0)) { 1523 emit(A64_STRBI(src, dst_adj, off_adj), ctx); 1524 } else { 1525 emit_a64_mov_i(1, tmp, off, ctx); 1526 emit(A64_STRB(src, dst, tmp), ctx); 1527 } 1528 break; 1529 case BPF_DW: 1530 if (is_lsi_offset(off_adj, 3)) { 1531 emit(A64_STR64I(src, dst_adj, off_adj), ctx); 1532 } else { 1533 emit_a64_mov_i(1, tmp, off, ctx); 1534 emit(A64_STR64(src, dst, tmp), ctx); 1535 } 1536 break; 1537 } 1538 1539 ret = add_exception_handler(insn, ctx, dst); 1540 if (ret) 1541 return ret; 1542 break; 1543 1544 case BPF_STX | BPF_ATOMIC | BPF_W: 1545 case BPF_STX | BPF_ATOMIC | BPF_DW: 1546 case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: 1547 case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: 1548 if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) 1549 ret = emit_lse_atomic(insn, ctx); 1550 else 1551 ret = emit_ll_sc_atomic(insn, ctx); 1552 if (ret) 1553 return ret; 1554 1555 ret = add_exception_handler(insn, ctx, dst); 1556 if (ret) 1557 return ret; 1558 break; 1559 1560 default: 1561 pr_err_once("unknown opcode %02x\n", code); 1562 return -EINVAL; 1563 } 1564 1565 return 0; 1566 } 1567 1568 /* 1569 * Return 0 if FP may change at runtime, otherwise find the minimum negative 1570 * offset to FP, converts it to positive number, and align down to 8 bytes. 1571 */ 1572 static int find_fpb_offset(struct bpf_prog *prog) 1573 { 1574 int i; 1575 int offset = 0; 1576 1577 for (i = 0; i < prog->len; i++) { 1578 const struct bpf_insn *insn = &prog->insnsi[i]; 1579 const u8 class = BPF_CLASS(insn->code); 1580 const u8 mode = BPF_MODE(insn->code); 1581 const u8 src = insn->src_reg; 1582 const u8 dst = insn->dst_reg; 1583 const s32 imm = insn->imm; 1584 const s16 off = insn->off; 1585 1586 switch (class) { 1587 case BPF_STX: 1588 case BPF_ST: 1589 /* fp holds atomic operation result */ 1590 if (class == BPF_STX && mode == BPF_ATOMIC && 1591 ((imm == BPF_XCHG || 1592 imm == (BPF_FETCH | BPF_ADD) || 1593 imm == (BPF_FETCH | BPF_AND) || 1594 imm == (BPF_FETCH | BPF_XOR) || 1595 imm == (BPF_FETCH | BPF_OR)) && 1596 src == BPF_REG_FP)) 1597 return 0; 1598 1599 if (mode == BPF_MEM && dst == BPF_REG_FP && 1600 off < offset) 1601 offset = insn->off; 1602 break; 1603 1604 case BPF_JMP32: 1605 case BPF_JMP: 1606 break; 1607 1608 case BPF_LDX: 1609 case BPF_LD: 1610 /* fp holds load result */ 1611 if (dst == BPF_REG_FP) 1612 return 0; 1613 1614 if (class == BPF_LDX && mode == BPF_MEM && 1615 src == BPF_REG_FP && off < offset) 1616 offset = off; 1617 break; 1618 1619 case BPF_ALU: 1620 case BPF_ALU64: 1621 default: 1622 /* fp holds ALU result */ 1623 if (dst == BPF_REG_FP) 1624 return 0; 1625 } 1626 } 1627 1628 if (offset < 0) { 1629 /* 1630 * safely be converted to a positive 'int', since insn->off 1631 * is 's16' 1632 */ 1633 offset = -offset; 1634 /* align down to 8 bytes */ 1635 offset = ALIGN_DOWN(offset, 8); 1636 } 1637 1638 return offset; 1639 } 1640 1641 static int build_body(struct jit_ctx *ctx, bool extra_pass) 1642 { 1643 const struct bpf_prog *prog = ctx->prog; 1644 int i; 1645 1646 /* 1647 * - offset[0] offset of the end of prologue, 1648 * start of the 1st instruction. 1649 * - offset[1] - offset of the end of 1st instruction, 1650 * start of the 2nd instruction 1651 * [....] 1652 * - offset[3] - offset of the end of 3rd instruction, 1653 * start of 4th instruction 1654 */ 1655 for (i = 0; i < prog->len; i++) { 1656 const struct bpf_insn *insn = &prog->insnsi[i]; 1657 int ret; 1658 1659 if (ctx->image == NULL) 1660 ctx->offset[i] = ctx->idx; 1661 ret = build_insn(insn, ctx, extra_pass); 1662 if (ret > 0) { 1663 i++; 1664 if (ctx->image == NULL) 1665 ctx->offset[i] = ctx->idx; 1666 continue; 1667 } 1668 if (ret) 1669 return ret; 1670 } 1671 /* 1672 * offset is allocated with prog->len + 1 so fill in 1673 * the last element with the offset after the last 1674 * instruction (end of program) 1675 */ 1676 if (ctx->image == NULL) 1677 ctx->offset[i] = ctx->idx; 1678 1679 return 0; 1680 } 1681 1682 static int validate_code(struct jit_ctx *ctx) 1683 { 1684 int i; 1685 1686 for (i = 0; i < ctx->idx; i++) { 1687 u32 a64_insn = le32_to_cpu(ctx->image[i]); 1688 1689 if (a64_insn == AARCH64_BREAK_FAULT) 1690 return -1; 1691 } 1692 return 0; 1693 } 1694 1695 static int validate_ctx(struct jit_ctx *ctx) 1696 { 1697 if (validate_code(ctx)) 1698 return -1; 1699 1700 if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) 1701 return -1; 1702 1703 return 0; 1704 } 1705 1706 static inline void bpf_flush_icache(void *start, void *end) 1707 { 1708 flush_icache_range((unsigned long)start, (unsigned long)end); 1709 } 1710 1711 struct arm64_jit_data { 1712 struct bpf_binary_header *header; 1713 u8 *ro_image; 1714 struct bpf_binary_header *ro_header; 1715 struct jit_ctx ctx; 1716 }; 1717 1718 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1719 { 1720 int image_size, prog_size, extable_size, extable_align, extable_offset; 1721 struct bpf_prog *tmp, *orig_prog = prog; 1722 struct bpf_binary_header *header; 1723 struct bpf_binary_header *ro_header; 1724 struct arm64_jit_data *jit_data; 1725 bool was_classic = bpf_prog_was_classic(prog); 1726 bool tmp_blinded = false; 1727 bool extra_pass = false; 1728 struct jit_ctx ctx; 1729 u64 arena_vm_start; 1730 u8 *image_ptr; 1731 u8 *ro_image_ptr; 1732 1733 if (!prog->jit_requested) 1734 return orig_prog; 1735 1736 tmp = bpf_jit_blind_constants(prog); 1737 /* If blinding was requested and we failed during blinding, 1738 * we must fall back to the interpreter. 1739 */ 1740 if (IS_ERR(tmp)) 1741 return orig_prog; 1742 if (tmp != prog) { 1743 tmp_blinded = true; 1744 prog = tmp; 1745 } 1746 1747 arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); 1748 jit_data = prog->aux->jit_data; 1749 if (!jit_data) { 1750 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 1751 if (!jit_data) { 1752 prog = orig_prog; 1753 goto out; 1754 } 1755 prog->aux->jit_data = jit_data; 1756 } 1757 if (jit_data->ctx.offset) { 1758 ctx = jit_data->ctx; 1759 ro_image_ptr = jit_data->ro_image; 1760 ro_header = jit_data->ro_header; 1761 header = jit_data->header; 1762 image_ptr = (void *)header + ((void *)ro_image_ptr 1763 - (void *)ro_header); 1764 extra_pass = true; 1765 prog_size = sizeof(u32) * ctx.idx; 1766 goto skip_init_ctx; 1767 } 1768 memset(&ctx, 0, sizeof(ctx)); 1769 ctx.prog = prog; 1770 1771 ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); 1772 if (ctx.offset == NULL) { 1773 prog = orig_prog; 1774 goto out_off; 1775 } 1776 1777 ctx.fpb_offset = find_fpb_offset(prog); 1778 ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); 1779 1780 /* 1781 * 1. Initial fake pass to compute ctx->idx and ctx->offset. 1782 * 1783 * BPF line info needs ctx->offset[i] to be the offset of 1784 * instruction[i] in jited image, so build prologue first. 1785 */ 1786 if (build_prologue(&ctx, was_classic, prog->aux->exception_cb, 1787 arena_vm_start)) { 1788 prog = orig_prog; 1789 goto out_off; 1790 } 1791 1792 if (build_body(&ctx, extra_pass)) { 1793 prog = orig_prog; 1794 goto out_off; 1795 } 1796 1797 ctx.epilogue_offset = ctx.idx; 1798 build_epilogue(&ctx, prog->aux->exception_cb); 1799 build_plt(&ctx); 1800 1801 extable_align = __alignof__(struct exception_table_entry); 1802 extable_size = prog->aux->num_exentries * 1803 sizeof(struct exception_table_entry); 1804 1805 /* Now we know the actual image size. */ 1806 prog_size = sizeof(u32) * ctx.idx; 1807 /* also allocate space for plt target */ 1808 extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); 1809 image_size = extable_offset + extable_size; 1810 ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, 1811 sizeof(u32), &header, &image_ptr, 1812 jit_fill_hole); 1813 if (!ro_header) { 1814 prog = orig_prog; 1815 goto out_off; 1816 } 1817 1818 /* 2. Now, the actual pass. */ 1819 1820 /* 1821 * Use the image(RW) for writing the JITed instructions. But also save 1822 * the ro_image(RX) for calculating the offsets in the image. The RW 1823 * image will be later copied to the RX image from where the program 1824 * will run. The bpf_jit_binary_pack_finalize() will do this copy in the 1825 * final step. 1826 */ 1827 ctx.image = (__le32 *)image_ptr; 1828 ctx.ro_image = (__le32 *)ro_image_ptr; 1829 if (extable_size) 1830 prog->aux->extable = (void *)ro_image_ptr + extable_offset; 1831 skip_init_ctx: 1832 ctx.idx = 0; 1833 ctx.exentry_idx = 0; 1834 1835 build_prologue(&ctx, was_classic, prog->aux->exception_cb, arena_vm_start); 1836 1837 if (build_body(&ctx, extra_pass)) { 1838 prog = orig_prog; 1839 goto out_free_hdr; 1840 } 1841 1842 build_epilogue(&ctx, prog->aux->exception_cb); 1843 build_plt(&ctx); 1844 1845 /* 3. Extra pass to validate JITed code. */ 1846 if (validate_ctx(&ctx)) { 1847 prog = orig_prog; 1848 goto out_free_hdr; 1849 } 1850 1851 /* And we're done. */ 1852 if (bpf_jit_enable > 1) 1853 bpf_jit_dump(prog->len, prog_size, 2, ctx.image); 1854 1855 if (!prog->is_func || extra_pass) { 1856 if (extra_pass && ctx.idx != jit_data->ctx.idx) { 1857 pr_err_once("multi-func JIT bug %d != %d\n", 1858 ctx.idx, jit_data->ctx.idx); 1859 prog->bpf_func = NULL; 1860 prog->jited = 0; 1861 prog->jited_len = 0; 1862 goto out_free_hdr; 1863 } 1864 if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) { 1865 /* ro_header has been freed */ 1866 ro_header = NULL; 1867 prog = orig_prog; 1868 goto out_off; 1869 } 1870 /* 1871 * The instructions have now been copied to the ROX region from 1872 * where they will execute. Now the data cache has to be cleaned to 1873 * the PoU and the I-cache has to be invalidated for the VAs. 1874 */ 1875 bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx); 1876 } else { 1877 jit_data->ctx = ctx; 1878 jit_data->ro_image = ro_image_ptr; 1879 jit_data->header = header; 1880 jit_data->ro_header = ro_header; 1881 } 1882 1883 prog->bpf_func = (void *)ctx.ro_image; 1884 prog->jited = 1; 1885 prog->jited_len = prog_size; 1886 1887 if (!prog->is_func || extra_pass) { 1888 int i; 1889 1890 /* offset[prog->len] is the size of program */ 1891 for (i = 0; i <= prog->len; i++) 1892 ctx.offset[i] *= AARCH64_INSN_SIZE; 1893 bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); 1894 out_off: 1895 kvfree(ctx.offset); 1896 kfree(jit_data); 1897 prog->aux->jit_data = NULL; 1898 } 1899 out: 1900 if (tmp_blinded) 1901 bpf_jit_prog_release_other(prog, prog == orig_prog ? 1902 tmp : orig_prog); 1903 return prog; 1904 1905 out_free_hdr: 1906 if (header) { 1907 bpf_arch_text_copy(&ro_header->size, &header->size, 1908 sizeof(header->size)); 1909 bpf_jit_binary_pack_free(ro_header, header); 1910 } 1911 goto out_off; 1912 } 1913 1914 bool bpf_jit_supports_kfunc_call(void) 1915 { 1916 return true; 1917 } 1918 1919 void *bpf_arch_text_copy(void *dst, void *src, size_t len) 1920 { 1921 if (!aarch64_insn_copy(dst, src, len)) 1922 return ERR_PTR(-EINVAL); 1923 return dst; 1924 } 1925 1926 u64 bpf_jit_alloc_exec_limit(void) 1927 { 1928 return VMALLOC_END - VMALLOC_START; 1929 } 1930 1931 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ 1932 bool bpf_jit_supports_subprog_tailcalls(void) 1933 { 1934 return true; 1935 } 1936 1937 static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, 1938 int args_off, int retval_off, int run_ctx_off, 1939 bool save_ret) 1940 { 1941 __le32 *branch; 1942 u64 enter_prog; 1943 u64 exit_prog; 1944 struct bpf_prog *p = l->link.prog; 1945 int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); 1946 1947 enter_prog = (u64)bpf_trampoline_enter(p); 1948 exit_prog = (u64)bpf_trampoline_exit(p); 1949 1950 if (l->cookie == 0) { 1951 /* if cookie is zero, one instruction is enough to store it */ 1952 emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx); 1953 } else { 1954 emit_a64_mov_i64(A64_R(10), l->cookie, ctx); 1955 emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off), 1956 ctx); 1957 } 1958 1959 /* save p to callee saved register x19 to avoid loading p with mov_i64 1960 * each time. 1961 */ 1962 emit_addr_mov_i64(A64_R(19), (const u64)p, ctx); 1963 1964 /* arg1: prog */ 1965 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); 1966 /* arg2: &run_ctx */ 1967 emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx); 1968 1969 emit_call(enter_prog, ctx); 1970 1971 /* save return value to callee saved register x20 */ 1972 emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); 1973 1974 /* if (__bpf_prog_enter(prog) == 0) 1975 * goto skip_exec_of_prog; 1976 */ 1977 branch = ctx->image + ctx->idx; 1978 emit(A64_NOP, ctx); 1979 1980 emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); 1981 if (!p->jited) 1982 emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); 1983 1984 emit_call((const u64)p->bpf_func, ctx); 1985 1986 if (save_ret) 1987 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); 1988 1989 if (ctx->image) { 1990 int offset = &ctx->image[ctx->idx] - branch; 1991 *branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset)); 1992 } 1993 1994 /* arg1: prog */ 1995 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); 1996 /* arg2: start time */ 1997 emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx); 1998 /* arg3: &run_ctx */ 1999 emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx); 2000 2001 emit_call(exit_prog, ctx); 2002 } 2003 2004 static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, 2005 int args_off, int retval_off, int run_ctx_off, 2006 __le32 **branches) 2007 { 2008 int i; 2009 2010 /* The first fmod_ret program will receive a garbage return value. 2011 * Set this to 0 to avoid confusing the program. 2012 */ 2013 emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); 2014 for (i = 0; i < tl->nr_links; i++) { 2015 invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, 2016 run_ctx_off, true); 2017 /* if (*(u64 *)(sp + retval_off) != 0) 2018 * goto do_fexit; 2019 */ 2020 emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx); 2021 /* Save the location of branch, and generate a nop. 2022 * This nop will be replaced with a cbnz later. 2023 */ 2024 branches[i] = ctx->image + ctx->idx; 2025 emit(A64_NOP, ctx); 2026 } 2027 } 2028 2029 static void save_args(struct jit_ctx *ctx, int args_off, int nregs) 2030 { 2031 int i; 2032 2033 for (i = 0; i < nregs; i++) { 2034 emit(A64_STR64I(i, A64_SP, args_off), ctx); 2035 args_off += 8; 2036 } 2037 } 2038 2039 static void restore_args(struct jit_ctx *ctx, int args_off, int nregs) 2040 { 2041 int i; 2042 2043 for (i = 0; i < nregs; i++) { 2044 emit(A64_LDR64I(i, A64_SP, args_off), ctx); 2045 args_off += 8; 2046 } 2047 } 2048 2049 /* Based on the x86's implementation of arch_prepare_bpf_trampoline(). 2050 * 2051 * bpf prog and function entry before bpf trampoline hooked: 2052 * mov x9, lr 2053 * nop 2054 * 2055 * bpf prog and function entry after bpf trampoline hooked: 2056 * mov x9, lr 2057 * bl <bpf_trampoline or plt> 2058 * 2059 */ 2060 static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, 2061 struct bpf_tramp_links *tlinks, void *func_addr, 2062 int nregs, u32 flags) 2063 { 2064 int i; 2065 int stack_size; 2066 int retaddr_off; 2067 int regs_off; 2068 int retval_off; 2069 int args_off; 2070 int nregs_off; 2071 int ip_off; 2072 int run_ctx_off; 2073 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; 2074 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; 2075 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; 2076 bool save_ret; 2077 __le32 **branches = NULL; 2078 2079 /* trampoline stack layout: 2080 * [ parent ip ] 2081 * [ FP ] 2082 * SP + retaddr_off [ self ip ] 2083 * [ FP ] 2084 * 2085 * [ padding ] align SP to multiples of 16 2086 * 2087 * [ x20 ] callee saved reg x20 2088 * SP + regs_off [ x19 ] callee saved reg x19 2089 * 2090 * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or 2091 * BPF_TRAMP_F_RET_FENTRY_RET 2092 * 2093 * [ arg reg N ] 2094 * [ ... ] 2095 * SP + args_off [ arg reg 1 ] 2096 * 2097 * SP + nregs_off [ arg regs count ] 2098 * 2099 * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag 2100 * 2101 * SP + run_ctx_off [ bpf_tramp_run_ctx ] 2102 */ 2103 2104 stack_size = 0; 2105 run_ctx_off = stack_size; 2106 /* room for bpf_tramp_run_ctx */ 2107 stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); 2108 2109 ip_off = stack_size; 2110 /* room for IP address argument */ 2111 if (flags & BPF_TRAMP_F_IP_ARG) 2112 stack_size += 8; 2113 2114 nregs_off = stack_size; 2115 /* room for args count */ 2116 stack_size += 8; 2117 2118 args_off = stack_size; 2119 /* room for args */ 2120 stack_size += nregs * 8; 2121 2122 /* room for return value */ 2123 retval_off = stack_size; 2124 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); 2125 if (save_ret) 2126 stack_size += 8; 2127 2128 /* room for callee saved registers, currently x19 and x20 are used */ 2129 regs_off = stack_size; 2130 stack_size += 16; 2131 2132 /* round up to multiples of 16 to avoid SPAlignmentFault */ 2133 stack_size = round_up(stack_size, 16); 2134 2135 /* return address locates above FP */ 2136 retaddr_off = stack_size + 8; 2137 2138 /* bpf trampoline may be invoked by 3 instruction types: 2139 * 1. bl, attached to bpf prog or kernel function via short jump 2140 * 2. br, attached to bpf prog or kernel function via long jump 2141 * 3. blr, working as a function pointer, used by struct_ops. 2142 * So BTI_JC should used here to support both br and blr. 2143 */ 2144 emit_bti(A64_BTI_JC, ctx); 2145 2146 /* frame for parent function */ 2147 emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); 2148 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 2149 2150 /* frame for patched function */ 2151 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); 2152 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 2153 2154 /* allocate stack space */ 2155 emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); 2156 2157 if (flags & BPF_TRAMP_F_IP_ARG) { 2158 /* save ip address of the traced function */ 2159 emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx); 2160 emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx); 2161 } 2162 2163 /* save arg regs count*/ 2164 emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx); 2165 emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx); 2166 2167 /* save arg regs */ 2168 save_args(ctx, args_off, nregs); 2169 2170 /* save callee saved registers */ 2171 emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); 2172 emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx); 2173 2174 if (flags & BPF_TRAMP_F_CALL_ORIG) { 2175 emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); 2176 emit_call((const u64)__bpf_tramp_enter, ctx); 2177 } 2178 2179 for (i = 0; i < fentry->nr_links; i++) 2180 invoke_bpf_prog(ctx, fentry->links[i], args_off, 2181 retval_off, run_ctx_off, 2182 flags & BPF_TRAMP_F_RET_FENTRY_RET); 2183 2184 if (fmod_ret->nr_links) { 2185 branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *), 2186 GFP_KERNEL); 2187 if (!branches) 2188 return -ENOMEM; 2189 2190 invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, 2191 run_ctx_off, branches); 2192 } 2193 2194 if (flags & BPF_TRAMP_F_CALL_ORIG) { 2195 restore_args(ctx, args_off, nregs); 2196 /* call original func */ 2197 emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); 2198 emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx); 2199 emit(A64_RET(A64_R(10)), ctx); 2200 /* store return value */ 2201 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); 2202 /* reserve a nop for bpf_tramp_image_put */ 2203 im->ip_after_call = ctx->ro_image + ctx->idx; 2204 emit(A64_NOP, ctx); 2205 } 2206 2207 /* update the branches saved in invoke_bpf_mod_ret with cbnz */ 2208 for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { 2209 int offset = &ctx->image[ctx->idx] - branches[i]; 2210 *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset)); 2211 } 2212 2213 for (i = 0; i < fexit->nr_links; i++) 2214 invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, 2215 run_ctx_off, false); 2216 2217 if (flags & BPF_TRAMP_F_CALL_ORIG) { 2218 im->ip_epilogue = ctx->ro_image + ctx->idx; 2219 emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); 2220 emit_call((const u64)__bpf_tramp_exit, ctx); 2221 } 2222 2223 if (flags & BPF_TRAMP_F_RESTORE_REGS) 2224 restore_args(ctx, args_off, nregs); 2225 2226 /* restore callee saved register x19 and x20 */ 2227 emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); 2228 emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx); 2229 2230 if (save_ret) 2231 emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx); 2232 2233 /* reset SP */ 2234 emit(A64_MOV(1, A64_SP, A64_FP), ctx); 2235 2236 /* pop frames */ 2237 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); 2238 emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); 2239 2240 if (flags & BPF_TRAMP_F_SKIP_FRAME) { 2241 /* skip patched function, return to parent */ 2242 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); 2243 emit(A64_RET(A64_R(9)), ctx); 2244 } else { 2245 /* return to patched function */ 2246 emit(A64_MOV(1, A64_R(10), A64_LR), ctx); 2247 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); 2248 emit(A64_RET(A64_R(10)), ctx); 2249 } 2250 2251 kfree(branches); 2252 2253 return ctx->idx; 2254 } 2255 2256 static int btf_func_model_nregs(const struct btf_func_model *m) 2257 { 2258 int nregs = m->nr_args; 2259 int i; 2260 2261 /* extra registers needed for struct argument */ 2262 for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { 2263 /* The arg_size is at most 16 bytes, enforced by the verifier. */ 2264 if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) 2265 nregs += (m->arg_size[i] + 7) / 8 - 1; 2266 } 2267 2268 return nregs; 2269 } 2270 2271 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, 2272 struct bpf_tramp_links *tlinks, void *func_addr) 2273 { 2274 struct jit_ctx ctx = { 2275 .image = NULL, 2276 .idx = 0, 2277 }; 2278 struct bpf_tramp_image im; 2279 int nregs, ret; 2280 2281 nregs = btf_func_model_nregs(m); 2282 /* the first 8 registers are used for arguments */ 2283 if (nregs > 8) 2284 return -ENOTSUPP; 2285 2286 ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags); 2287 if (ret < 0) 2288 return ret; 2289 2290 return ret < 0 ? ret : ret * AARCH64_INSN_SIZE; 2291 } 2292 2293 void *arch_alloc_bpf_trampoline(unsigned int size) 2294 { 2295 return bpf_prog_pack_alloc(size, jit_fill_hole); 2296 } 2297 2298 void arch_free_bpf_trampoline(void *image, unsigned int size) 2299 { 2300 bpf_prog_pack_free(image, size); 2301 } 2302 2303 int arch_protect_bpf_trampoline(void *image, unsigned int size) 2304 { 2305 return 0; 2306 } 2307 2308 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, 2309 void *ro_image_end, const struct btf_func_model *m, 2310 u32 flags, struct bpf_tramp_links *tlinks, 2311 void *func_addr) 2312 { 2313 int ret, nregs; 2314 void *image, *tmp; 2315 u32 size = ro_image_end - ro_image; 2316 2317 /* image doesn't need to be in module memory range, so we can 2318 * use kvmalloc. 2319 */ 2320 image = kvmalloc(size, GFP_KERNEL); 2321 if (!image) 2322 return -ENOMEM; 2323 2324 struct jit_ctx ctx = { 2325 .image = image, 2326 .ro_image = ro_image, 2327 .idx = 0, 2328 }; 2329 2330 nregs = btf_func_model_nregs(m); 2331 /* the first 8 registers are used for arguments */ 2332 if (nregs > 8) 2333 return -ENOTSUPP; 2334 2335 jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); 2336 ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags); 2337 2338 if (ret > 0 && validate_code(&ctx) < 0) { 2339 ret = -EINVAL; 2340 goto out; 2341 } 2342 2343 if (ret > 0) 2344 ret *= AARCH64_INSN_SIZE; 2345 2346 tmp = bpf_arch_text_copy(ro_image, image, size); 2347 if (IS_ERR(tmp)) { 2348 ret = PTR_ERR(tmp); 2349 goto out; 2350 } 2351 2352 bpf_flush_icache(ro_image, ro_image + size); 2353 out: 2354 kvfree(image); 2355 return ret; 2356 } 2357 2358 static bool is_long_jump(void *ip, void *target) 2359 { 2360 long offset; 2361 2362 /* NULL target means this is a NOP */ 2363 if (!target) 2364 return false; 2365 2366 offset = (long)target - (long)ip; 2367 return offset < -SZ_128M || offset >= SZ_128M; 2368 } 2369 2370 static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, 2371 void *addr, void *plt, u32 *insn) 2372 { 2373 void *target; 2374 2375 if (!addr) { 2376 *insn = aarch64_insn_gen_nop(); 2377 return 0; 2378 } 2379 2380 if (is_long_jump(ip, addr)) 2381 target = plt; 2382 else 2383 target = addr; 2384 2385 *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, 2386 (unsigned long)target, 2387 type); 2388 2389 return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; 2390 } 2391 2392 /* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf 2393 * trampoline with the branch instruction from @ip to @new_addr. If @old_addr 2394 * or @new_addr is NULL, the old or new instruction is NOP. 2395 * 2396 * When @ip is the bpf prog entry, a bpf trampoline is being attached or 2397 * detached. Since bpf trampoline and bpf prog are allocated separately with 2398 * vmalloc, the address distance may exceed 128MB, the maximum branch range. 2399 * So long jump should be handled. 2400 * 2401 * When a bpf prog is constructed, a plt pointing to empty trampoline 2402 * dummy_tramp is placed at the end: 2403 * 2404 * bpf_prog: 2405 * mov x9, lr 2406 * nop // patchsite 2407 * ... 2408 * ret 2409 * 2410 * plt: 2411 * ldr x10, target 2412 * br x10 2413 * target: 2414 * .quad dummy_tramp // plt target 2415 * 2416 * This is also the state when no trampoline is attached. 2417 * 2418 * When a short-jump bpf trampoline is attached, the patchsite is patched 2419 * to a bl instruction to the trampoline directly: 2420 * 2421 * bpf_prog: 2422 * mov x9, lr 2423 * bl <short-jump bpf trampoline address> // patchsite 2424 * ... 2425 * ret 2426 * 2427 * plt: 2428 * ldr x10, target 2429 * br x10 2430 * target: 2431 * .quad dummy_tramp // plt target 2432 * 2433 * When a long-jump bpf trampoline is attached, the plt target is filled with 2434 * the trampoline address and the patchsite is patched to a bl instruction to 2435 * the plt: 2436 * 2437 * bpf_prog: 2438 * mov x9, lr 2439 * bl plt // patchsite 2440 * ... 2441 * ret 2442 * 2443 * plt: 2444 * ldr x10, target 2445 * br x10 2446 * target: 2447 * .quad <long-jump bpf trampoline address> // plt target 2448 * 2449 * The dummy_tramp is used to prevent another CPU from jumping to unknown 2450 * locations during the patching process, making the patching process easier. 2451 */ 2452 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, 2453 void *old_addr, void *new_addr) 2454 { 2455 int ret; 2456 u32 old_insn; 2457 u32 new_insn; 2458 u32 replaced; 2459 struct bpf_plt *plt = NULL; 2460 unsigned long size = 0UL; 2461 unsigned long offset = ~0UL; 2462 enum aarch64_insn_branch_type branch_type; 2463 char namebuf[KSYM_NAME_LEN]; 2464 void *image = NULL; 2465 u64 plt_target = 0ULL; 2466 bool poking_bpf_entry; 2467 2468 if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) 2469 /* Only poking bpf text is supported. Since kernel function 2470 * entry is set up by ftrace, we reply on ftrace to poke kernel 2471 * functions. 2472 */ 2473 return -ENOTSUPP; 2474 2475 image = ip - offset; 2476 /* zero offset means we're poking bpf prog entry */ 2477 poking_bpf_entry = (offset == 0UL); 2478 2479 /* bpf prog entry, find plt and the real patchsite */ 2480 if (poking_bpf_entry) { 2481 /* plt locates at the end of bpf prog */ 2482 plt = image + size - PLT_TARGET_OFFSET; 2483 2484 /* skip to the nop instruction in bpf prog entry: 2485 * bti c // if BTI enabled 2486 * mov x9, x30 2487 * nop 2488 */ 2489 ip = image + POKE_OFFSET * AARCH64_INSN_SIZE; 2490 } 2491 2492 /* long jump is only possible at bpf prog entry */ 2493 if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) && 2494 !poking_bpf_entry)) 2495 return -EINVAL; 2496 2497 if (poke_type == BPF_MOD_CALL) 2498 branch_type = AARCH64_INSN_BRANCH_LINK; 2499 else 2500 branch_type = AARCH64_INSN_BRANCH_NOLINK; 2501 2502 if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0) 2503 return -EFAULT; 2504 2505 if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0) 2506 return -EFAULT; 2507 2508 if (is_long_jump(ip, new_addr)) 2509 plt_target = (u64)new_addr; 2510 else if (is_long_jump(ip, old_addr)) 2511 /* if the old target is a long jump and the new target is not, 2512 * restore the plt target to dummy_tramp, so there is always a 2513 * legal and harmless address stored in plt target, and we'll 2514 * never jump from plt to an unknown place. 2515 */ 2516 plt_target = (u64)&dummy_tramp; 2517 2518 if (plt_target) { 2519 /* non-zero plt_target indicates we're patching a bpf prog, 2520 * which is read only. 2521 */ 2522 if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1)) 2523 return -EFAULT; 2524 WRITE_ONCE(plt->target, plt_target); 2525 set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1); 2526 /* since plt target points to either the new trampoline 2527 * or dummy_tramp, even if another CPU reads the old plt 2528 * target value before fetching the bl instruction to plt, 2529 * it will be brought back by dummy_tramp, so no barrier is 2530 * required here. 2531 */ 2532 } 2533 2534 /* if the old target and the new target are both long jumps, no 2535 * patching is required 2536 */ 2537 if (old_insn == new_insn) 2538 return 0; 2539 2540 mutex_lock(&text_mutex); 2541 if (aarch64_insn_read(ip, &replaced)) { 2542 ret = -EFAULT; 2543 goto out; 2544 } 2545 2546 if (replaced != old_insn) { 2547 ret = -EFAULT; 2548 goto out; 2549 } 2550 2551 /* We call aarch64_insn_patch_text_nosync() to replace instruction 2552 * atomically, so no other CPUs will fetch a half-new and half-old 2553 * instruction. But there is chance that another CPU executes the 2554 * old instruction after the patching operation finishes (e.g., 2555 * pipeline not flushed, or icache not synchronized yet). 2556 * 2557 * 1. when a new trampoline is attached, it is not a problem for 2558 * different CPUs to jump to different trampolines temporarily. 2559 * 2560 * 2. when an old trampoline is freed, we should wait for all other 2561 * CPUs to exit the trampoline and make sure the trampoline is no 2562 * longer reachable, since bpf_tramp_image_put() function already 2563 * uses percpu_ref and task-based rcu to do the sync, no need to call 2564 * the sync version here, see bpf_tramp_image_put() for details. 2565 */ 2566 ret = aarch64_insn_patch_text_nosync(ip, new_insn); 2567 out: 2568 mutex_unlock(&text_mutex); 2569 2570 return ret; 2571 } 2572 2573 bool bpf_jit_supports_ptr_xchg(void) 2574 { 2575 return true; 2576 } 2577 2578 bool bpf_jit_supports_exceptions(void) 2579 { 2580 /* We unwind through both kernel frames starting from within bpf_throw 2581 * call and BPF frames. Therefore we require FP unwinder to be enabled 2582 * to walk kernel frames and reach BPF frames in the stack trace. 2583 * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y 2584 */ 2585 return true; 2586 } 2587 2588 bool bpf_jit_supports_arena(void) 2589 { 2590 return true; 2591 } 2592 2593 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) 2594 { 2595 if (!in_arena) 2596 return true; 2597 switch (insn->code) { 2598 case BPF_STX | BPF_ATOMIC | BPF_W: 2599 case BPF_STX | BPF_ATOMIC | BPF_DW: 2600 if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) 2601 return false; 2602 } 2603 return true; 2604 } 2605 2606 bool bpf_jit_supports_percpu_insn(void) 2607 { 2608 return true; 2609 } 2610 2611 bool bpf_jit_inlines_helper_call(s32 imm) 2612 { 2613 switch (imm) { 2614 case BPF_FUNC_get_smp_processor_id: 2615 case BPF_FUNC_get_current_task: 2616 case BPF_FUNC_get_current_task_btf: 2617 return true; 2618 default: 2619 return false; 2620 } 2621 } 2622 2623 void bpf_jit_free(struct bpf_prog *prog) 2624 { 2625 if (prog->jited) { 2626 struct arm64_jit_data *jit_data = prog->aux->jit_data; 2627 struct bpf_binary_header *hdr; 2628 2629 /* 2630 * If we fail the final pass of JIT (from jit_subprogs), 2631 * the program may not be finalized yet. Call finalize here 2632 * before freeing it. 2633 */ 2634 if (jit_data) { 2635 bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size, 2636 sizeof(jit_data->header->size)); 2637 kfree(jit_data); 2638 } 2639 hdr = bpf_jit_binary_pack_hdr(prog); 2640 bpf_jit_binary_pack_free(hdr, NULL); 2641 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); 2642 } 2643 2644 bpf_prog_unlock_free(prog); 2645 } 2646
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.