1 /* SPDX-License-Identifier: GPL-2.0 */ 1 2 3 #ifndef _ASM_X86_NOSPEC_BRANCH_H_ 4 #define _ASM_X86_NOSPEC_BRANCH_H_ 5 6 #include <linux/static_key.h> 7 #include <linux/objtool.h> 8 #include <linux/linkage.h> 9 10 #include <asm/alternative.h> 11 #include <asm/cpufeatures.h> 12 #include <asm/msr-index.h> 13 #include <asm/unwind_hints.h> 14 #include <asm/percpu.h> 15 #include <asm/current.h> 16 17 /* 18 * Call depth tracking for Intel SKL CPUs to a 19 * issue in software. 20 * 21 * The tracking does not use a counter. It use 22 * right on call entry and logical shift left 23 * 24 * The depth tracking variable is initialized 25 * depth is zero. The arithmetic shift right s 26 * saturates after the 12th call. The shift co 27 * so the tracking covers 12 nested calls. 28 * 29 * Call 30 * 0: 0x8000000000000000 0x000000000000 31 * 1: 0xfc00000000000000 0xf00000000000 32 * ... 33 * 11: 0xfffffffffffffff8 0xffffffffffff 34 * 12: 0xffffffffffffffff 0xffffffffffff 35 * 36 * After a return buffer fill the depth is cre 37 * next stuffing has to take place. 38 * 39 * There is a inaccuracy for situations like t 40 * 41 * 10 calls 42 * 5 returns 43 * 3 calls 44 * 4 returns 45 * 3 calls 46 * .... 47 * 48 * The shift count might cause this to be off 49 * but there is still a cushion vs. the RSB de 50 * claim to be perfect and it can be speculate 51 * is considered that it obfuscates the proble 52 * extremely difficult. 53 */ 54 #define RET_DEPTH_SHIFT 5 55 #define RSB_RET_STUFF_LOOPS 16 56 #define RET_DEPTH_INIT 0x8000 57 #define RET_DEPTH_INIT_FROM_CALL 0xfc00 58 #define RET_DEPTH_CREDIT 0xffff 59 60 #ifdef CONFIG_CALL_THUNKS_DEBUG 61 # define CALL_THUNKS_DEBUG_INC_CALLS 62 incq PER_CPU_VAR(__x86_call_count); 63 # define CALL_THUNKS_DEBUG_INC_RETS 64 incq PER_CPU_VAR(__x86_ret_count); 65 # define CALL_THUNKS_DEBUG_INC_STUFFS 66 incq PER_CPU_VAR(__x86_stuffs_count 67 # define CALL_THUNKS_DEBUG_INC_CTXSW 68 incq PER_CPU_VAR(__x86_ctxsw_count) 69 #else 70 # define CALL_THUNKS_DEBUG_INC_CALLS 71 # define CALL_THUNKS_DEBUG_INC_RETS 72 # define CALL_THUNKS_DEBUG_INC_STUFFS 73 # define CALL_THUNKS_DEBUG_INC_CTXSW 74 #endif 75 76 #if defined(CONFIG_MITIGATION_CALL_DEPTH_TRACK 77 78 #include <asm/asm-offsets.h> 79 80 #define CREDIT_CALL_DEPTH 81 movq $-1, PER_CPU_VAR(pcpu_hot + X8 82 83 #define RESET_CALL_DEPTH 84 xor %eax, %eax; 85 bts $63, %rax; 86 movq %rax, PER_CPU_VAR(pcpu_hot + X 87 88 #define RESET_CALL_DEPTH_FROM_CALL 89 movb $0xfc, %al; 90 shl $56, %rax; 91 movq %rax, PER_CPU_VAR(pcpu_hot + X 92 CALL_THUNKS_DEBUG_INC_CALLS 93 94 #define INCREMENT_CALL_DEPTH 95 sarq $5, PER_CPU_VAR(pcpu_hot + X86 96 CALL_THUNKS_DEBUG_INC_CALLS 97 98 #else 99 #define CREDIT_CALL_DEPTH 100 #define RESET_CALL_DEPTH 101 #define RESET_CALL_DEPTH_FROM_CALL 102 #define INCREMENT_CALL_DEPTH 103 #endif 104 105 /* 106 * Fill the CPU return stack buffer. 107 * 108 * Each entry in the RSB, if used for a specul 109 * infinite 'pause; lfence; jmp' loop to captu 110 * 111 * This is required in various cases for retpo 112 * mitigations for the Spectre variant 2 vulne 113 * eliminate potentially bogus entries from th 114 * purely to ensure that it doesn't get empty, 115 * allow predictions from other (unwanted!) so 116 * 117 * We define a CPP macro such that it can be u 118 * inline assembly. It's possible to do a .mac 119 * from C via asm(".include <asm/nospec-branch 120 */ 121 122 #define RETPOLINE_THUNK_SIZE 32 123 #define RSB_CLEAR_LOOPS 32 /* To 124 125 /* 126 * Common helper for __FILL_RETURN_BUFFER and 127 */ 128 #define __FILL_RETURN_SLOT 129 ANNOTATE_INTRA_FUNCTION_CALL; 130 call 772f; 131 int3; 132 772: 133 134 /* 135 * Stuff the entire RSB. 136 * 137 * Google experimented with loop-unrolling and 138 * the optimal version - two calls, each with 139 * trap should their return address end up get 140 */ 141 #ifdef CONFIG_X86_64 142 #define __FILL_RETURN_BUFFER(reg, nr) 143 mov $(nr/2), reg; 144 771: 145 __FILL_RETURN_SLOT 146 __FILL_RETURN_SLOT 147 add $(BITS_PER_LONG/8) * 2, %_ASM_ 148 dec reg; 149 jnz 771b; 150 /* barrier for jnz misprediction */ 151 lfence; 152 CREDIT_CALL_DEPTH 153 CALL_THUNKS_DEBUG_INC_CTXSW 154 #else 155 /* 156 * i386 doesn't unconditionally have LFENCE, a 157 * do a loop. 158 */ 159 #define __FILL_RETURN_BUFFER(reg, nr) 160 .rept nr; 161 __FILL_RETURN_SLOT; 162 .endr; 163 add $(BITS_PER_LONG/8) * nr, %_ASM 164 #endif 165 166 /* 167 * Stuff a single RSB slot. 168 * 169 * To mitigate Post-Barrier RSB speculation, o 170 * forced to retire before letting a RET instr 171 * 172 * On PBRSB-vulnerable CPUs, it is not safe fo 173 * before this point. 174 */ 175 #define __FILL_ONE_RETURN 176 __FILL_RETURN_SLOT 177 add $(BITS_PER_LONG/8), %_ASM_SP; 178 lfence; 179 180 #ifdef __ASSEMBLY__ 181 182 /* 183 * This should be used immediately before an i 184 * objtool the subsequent indirect jump/call i 185 * builds. 186 */ 187 .macro ANNOTATE_RETPOLINE_SAFE 188 .Lhere_\@: 189 .pushsection .discard.retpoline_safe 190 .long .Lhere_\@ 191 .popsection 192 .endm 193 194 /* 195 * (ab)use RETPOLINE_SAFE on RET to annotate a 196 * vs RETBleed validation. 197 */ 198 #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE 199 200 /* 201 * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to i 202 * eventually turn into its own annotation. 203 */ 204 .macro VALIDATE_UNRET_END 205 #if defined(CONFIG_NOINSTR_VALIDATION) && \ 206 (defined(CONFIG_MITIGATION_UNRET_ENTRY 207 ANNOTATE_RETPOLINE_SAFE 208 nop 209 #endif 210 .endm 211 212 /* 213 * Equivalent to -mindirect-branch-cs-prefix; 214 * to the retpoline thunk with a CS prefix whe 215 * a RAX prefix byte to encode. Also see apply 216 */ 217 .macro __CS_PREFIX reg:req 218 .irp rs,r8,r9,r10,r11,r12,r13,r14,r15 219 .ifc \reg,\rs 220 .byte 0x2e 221 .endif 222 .endr 223 .endm 224 225 /* 226 * JMP_NOSPEC and CALL_NOSPEC macros can be us 227 * indirect jmp/call which may be susceptible 228 * attack. 229 * 230 * NOTE: these do not take kCFI into account a 231 * indirect calls, take care when using. The t 232 * instruction irrespective of kCFI. 233 */ 234 .macro JMP_NOSPEC reg:req 235 #ifdef CONFIG_MITIGATION_RETPOLINE 236 __CS_PREFIX \reg 237 jmp __x86_indirect_thunk_\reg 238 #else 239 jmp *%\reg 240 int3 241 #endif 242 .endm 243 244 .macro CALL_NOSPEC reg:req 245 #ifdef CONFIG_MITIGATION_RETPOLINE 246 __CS_PREFIX \reg 247 call __x86_indirect_thunk_\reg 248 #else 249 call *%\reg 250 #endif 251 .endm 252 253 /* 254 * A simpler FILL_RETURN_BUFFER macro. Don't 255 * monstrosity above, manually. 256 */ 257 .macro FILL_RETURN_BUFFER reg:req nr:req ftr:r 258 ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \ 259 __stringify(__FILL_RETURN_BUFF 260 __stringify(nop;nop;__FILL_ONE 261 262 .Lskip_rsb_\@: 263 .endm 264 265 /* 266 * The CALL to srso_alias_untrain_ret() must b 267 * the spot where untraining must be done, ie. 268 * must be the target of a CALL instruction in 269 * jumping to a wrapper which then calls it. T 270 * called outside of __UNTRAIN_RET below, for 271 * kernel can support nested alternatives with 272 */ 273 .macro CALL_UNTRAIN_RET 274 #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || 275 ALTERNATIVE_2 "", "call entry_untrain_ 276 "call srso_alias_unt 277 #endif 278 .endm 279 280 /* 281 * Mitigate RETBleed for AMD/Hygon Zen uarch. 282 * return thunk isn't mapped into the userspac 283 * typically has NO_MELTDOWN). 284 * 285 * While retbleed_untrain_ret() doesn't clobbe 286 * entry_ibpb() will clobber AX, CX, DX. 287 * 288 * As such, this must be placed after every *S 289 * where we have a stack but before any RET in 290 */ 291 .macro __UNTRAIN_RET ibpb_feature, call_depth_ 292 #if defined(CONFIG_MITIGATION_RETHUNK) || defi 293 VALIDATE_UNRET_END 294 CALL_UNTRAIN_RET 295 ALTERNATIVE_2 "", 296 "call entry_ibpb", \ibpb 297 __stringify(\call_depth_i 298 #endif 299 .endm 300 301 #define UNTRAIN_RET \ 302 __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, 303 304 #define UNTRAIN_RET_VM \ 305 __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEX 306 307 #define UNTRAIN_RET_FROM_CALL \ 308 __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, 309 310 311 .macro CALL_DEPTH_ACCOUNT 312 #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING 313 ALTERNATIVE "", 314 __stringify(INCREMENT_CALL 315 #endif 316 .endm 317 318 /* 319 * Macro to execute VERW instruction that miti 320 * attacks such as MDS. On affected systems a 321 * instruction to also clear the CPU buffers. 322 * 323 * Note: Only the memory operand variant of VE 324 */ 325 .macro CLEAR_CPU_BUFFERS 326 #ifdef CONFIG_X86_64 327 ALTERNATIVE "", "verw mds_verw_sel(%ri 328 #else 329 /* 330 * In 32bit mode, the memory operand m 331 * segments may not be usable (vm86 mo 332 * be flat (ESPFIX32). 333 */ 334 ALTERNATIVE "", "verw %cs:mds_verw_sel 335 #endif 336 .endm 337 338 #ifdef CONFIG_X86_64 339 .macro CLEAR_BRANCH_HISTORY 340 ALTERNATIVE "", "call clear_bhb_loop", 341 .endm 342 343 .macro CLEAR_BRANCH_HISTORY_VMEXIT 344 ALTERNATIVE "", "call clear_bhb_loop", 345 .endm 346 #else 347 #define CLEAR_BRANCH_HISTORY 348 #define CLEAR_BRANCH_HISTORY_VMEXIT 349 #endif 350 351 #else /* __ASSEMBLY__ */ 352 353 #define ANNOTATE_RETPOLINE_SAFE 354 "999:\n\t" 355 ".pushsection .discard.retpoline_safe\ 356 ".long 999b\n\t" 357 ".popsection\n\t" 358 359 typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_S 360 extern retpoline_thunk_t __x86_indirect_thunk_ 361 extern retpoline_thunk_t __x86_indirect_call_t 362 extern retpoline_thunk_t __x86_indirect_jump_t 363 364 #ifdef CONFIG_MITIGATION_RETHUNK 365 extern void __x86_return_thunk(void); 366 #else 367 static inline void __x86_return_thunk(void) {} 368 #endif 369 370 #ifdef CONFIG_MITIGATION_UNRET_ENTRY 371 extern void retbleed_return_thunk(void); 372 #else 373 static inline void retbleed_return_thunk(void) 374 #endif 375 376 extern void srso_alias_untrain_ret(void); 377 378 #ifdef CONFIG_MITIGATION_SRSO 379 extern void srso_return_thunk(void); 380 extern void srso_alias_return_thunk(void); 381 #else 382 static inline void srso_return_thunk(void) {} 383 static inline void srso_alias_return_thunk(voi 384 #endif 385 386 extern void retbleed_return_thunk(void); 387 extern void srso_return_thunk(void); 388 extern void srso_alias_return_thunk(void); 389 390 extern void entry_untrain_ret(void); 391 extern void entry_ibpb(void); 392 393 #ifdef CONFIG_X86_64 394 extern void clear_bhb_loop(void); 395 #endif 396 397 extern void (*x86_return_thunk)(void); 398 399 extern void __warn_thunk(void); 400 401 #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING 402 extern void call_depth_return_thunk(void); 403 404 #define CALL_DEPTH_ACCOUNT 405 ALTERNATIVE("", 406 __stringify(INCREMENT_CALL 407 X86_FEATURE_CALL_DEPTH) 408 409 #ifdef CONFIG_CALL_THUNKS_DEBUG 410 DECLARE_PER_CPU(u64, __x86_call_count); 411 DECLARE_PER_CPU(u64, __x86_ret_count); 412 DECLARE_PER_CPU(u64, __x86_stuffs_count); 413 DECLARE_PER_CPU(u64, __x86_ctxsw_count); 414 #endif 415 #else /* !CONFIG_MITIGATION_CALL_DEPTH_TRACKIN 416 417 static inline void call_depth_return_thunk(voi 418 #define CALL_DEPTH_ACCOUNT "" 419 420 #endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKIN 421 422 #ifdef CONFIG_MITIGATION_RETPOLINE 423 424 #define GEN(reg) \ 425 extern retpoline_thunk_t __x86_indirec 426 #include <asm/GEN-for-each-reg.h> 427 #undef GEN 428 429 #define GEN(reg) 430 extern retpoline_thunk_t __x86_indirec 431 #include <asm/GEN-for-each-reg.h> 432 #undef GEN 433 434 #define GEN(reg) 435 extern retpoline_thunk_t __x86_indirec 436 #include <asm/GEN-for-each-reg.h> 437 #undef GEN 438 439 #ifdef CONFIG_X86_64 440 441 /* 442 * Inline asm uses the %V modifier which is on 443 * which is ensured when CONFIG_MITIGATION_RET 444 */ 445 # define CALL_NOSPEC 446 ALTERNATIVE_2( 447 ANNOTATE_RETPOLINE_SAFE 448 "call *%[thunk_target]\n", 449 "call __x86_indirect_thunk_%V[thunk_ta 450 X86_FEATURE_RETPOLINE, 451 "lfence;\n" 452 ANNOTATE_RETPOLINE_SAFE 453 "call *%[thunk_target]\n", 454 X86_FEATURE_RETPOLINE_LFENCE) 455 456 # define THUNK_TARGET(addr) [thunk_target] "r" 457 458 #else /* CONFIG_X86_32 */ 459 /* 460 * For i386 we use the original ret-equivalent 461 * otherwise we'll run out of registers. We do 462 * here, anyway. 463 */ 464 # define CALL_NOSPEC 465 ALTERNATIVE_2( 466 ANNOTATE_RETPOLINE_SAFE 467 "call *%[thunk_target]\n", 468 " jmp 904f;\n" 469 " .align 16\n" 470 "901: call 903f;\n" 471 "902: pause;\n" 472 " lfence;\n" 473 " jmp 902b;\n" 474 " .align 16\n" 475 "903: lea 4(%%esp), %%esp;\n" 476 " pushl %[thunk_target];\n" 477 " ret;\n" 478 " .align 16\n" 479 "904: call 901b;\n", 480 X86_FEATURE_RETPOLINE, 481 "lfence;\n" 482 ANNOTATE_RETPOLINE_SAFE 483 "call *%[thunk_target]\n", 484 X86_FEATURE_RETPOLINE_LFENCE) 485 486 # define THUNK_TARGET(addr) [thunk_target] "rm 487 #endif 488 #else /* No retpoline for C / inline asm */ 489 # define CALL_NOSPEC "call *%[thunk_target]\n" 490 # define THUNK_TARGET(addr) [thunk_target] "rm 491 #endif 492 493 /* The Spectre V2 mitigation variants */ 494 enum spectre_v2_mitigation { 495 SPECTRE_V2_NONE, 496 SPECTRE_V2_RETPOLINE, 497 SPECTRE_V2_LFENCE, 498 SPECTRE_V2_EIBRS, 499 SPECTRE_V2_EIBRS_RETPOLINE, 500 SPECTRE_V2_EIBRS_LFENCE, 501 SPECTRE_V2_IBRS, 502 }; 503 504 /* The indirect branch speculation control var 505 enum spectre_v2_user_mitigation { 506 SPECTRE_V2_USER_NONE, 507 SPECTRE_V2_USER_STRICT, 508 SPECTRE_V2_USER_STRICT_PREFERRED, 509 SPECTRE_V2_USER_PRCTL, 510 SPECTRE_V2_USER_SECCOMP, 511 }; 512 513 /* The Speculative Store Bypass disable varian 514 enum ssb_mitigation { 515 SPEC_STORE_BYPASS_NONE, 516 SPEC_STORE_BYPASS_DISABLE, 517 SPEC_STORE_BYPASS_PRCTL, 518 SPEC_STORE_BYPASS_SECCOMP, 519 }; 520 521 static __always_inline 522 void alternative_msr_write(unsigned int msr, u 523 { 524 asm volatile(ALTERNATIVE("", "wrmsr", 525 : : "c" (msr), 526 "a" ((u32)val), 527 "d" ((u32)(val >> 32)), 528 [feature] "i" (feature) 529 : "memory"); 530 } 531 532 extern u64 x86_pred_cmd; 533 534 static inline void indirect_branch_prediction_ 535 { 536 alternative_msr_write(MSR_IA32_PRED_CM 537 } 538 539 /* The Intel SPEC CTRL MSR base value cache */ 540 extern u64 x86_spec_ctrl_base; 541 DECLARE_PER_CPU(u64, x86_spec_ctrl_current); 542 extern void update_spec_ctrl_cond(u64 val); 543 extern u64 spec_ctrl_current(void); 544 545 /* 546 * With retpoline, we must use IBRS to restric 547 * before calling into firmware. 548 * 549 * (Implemented as CPP macros due to header he 550 */ 551 #define firmware_restrict_branch_speculation_s 552 do { 553 preempt_disable(); 554 alternative_msr_write(MSR_IA32_SPEC_CT 555 spec_ctrl_curren 556 X86_FEATURE_USE_ 557 alternative_msr_write(MSR_IA32_PRED_CM 558 X86_FEATURE_USE_ 559 } while (0) 560 561 #define firmware_restrict_branch_speculation_e 562 do { 563 alternative_msr_write(MSR_IA32_SPEC_CT 564 spec_ctrl_curren 565 X86_FEATURE_USE_ 566 preempt_enable(); 567 } while (0) 568 569 DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp) 570 DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); 571 DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb 572 573 DECLARE_STATIC_KEY_FALSE(mds_idle_clear); 574 575 DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_fl 576 577 DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear 578 579 extern u16 mds_verw_sel; 580 581 #include <asm/segment.h> 582 583 /** 584 * mds_clear_cpu_buffers - Mitigation for MDS 585 * 586 * This uses the otherwise unused and obsolete 587 * combination with microcode which triggers a 588 * instruction is executed. 589 */ 590 static __always_inline void mds_clear_cpu_buff 591 { 592 static const u16 ds = __KERNEL_DS; 593 594 /* 595 * Has to be the memory-operand varian 596 * guarantees the CPU buffer flush fun 597 * documentation. The register-operand 598 * Works with any segment selector, bu 599 * data segment is the fastest variant 600 * 601 * "cc" clobber is required because VE 602 */ 603 asm volatile("verw %[ds]" : : [ds] "m" 604 } 605 606 /** 607 * mds_idle_clear_cpu_buffers - Mitigation for 608 * 609 * Clear CPU buffers if the corresponding stat 610 */ 611 static __always_inline void mds_idle_clear_cpu 612 { 613 if (static_branch_likely(&mds_idle_cle 614 mds_clear_cpu_buffers(); 615 } 616 617 #endif /* __ASSEMBLY__ */ 618 619 #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ 620
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.