~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/entry/entry_64.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/x86/entry/entry_64.S (Version linux-6.12-rc7) and /arch/i386/entry/entry_64.S (Version linux-2.6.0)


  1 /* SPDX-License-Identifier: GPL-2.0 */            
  2 /*                                                
  3  *  linux/arch/x86_64/entry.S                     
  4  *                                                
  5  *  Copyright (C) 1991, 1992  Linus Torvalds      
  6  *  Copyright (C) 2000, 2001, 2002  Andi Kleen    
  7  *  Copyright (C) 2000  Pavel Machek <pavel@sus    
  8  *                                                
  9  * entry.S contains the system-call and fault     
 10  *                                                
 11  * Some of this is documented in Documentation    
 12  *                                                
 13  * A note on terminology:                         
 14  * - iret frame:        Architecture defined i    
 15  *                      at the top of the kern    
 16  *                                                
 17  * Some macro usage:                              
 18  * - SYM_FUNC_START/END:Define functions in th    
 19  * - idtentry:          Define exception entry    
 20  */                                               
 21 #include <linux/export.h>                         
 22 #include <linux/linkage.h>                        
 23 #include <asm/segment.h>                          
 24 #include <asm/cache.h>                            
 25 #include <asm/errno.h>                            
 26 #include <asm/asm-offsets.h>                      
 27 #include <asm/msr.h>                              
 28 #include <asm/unistd.h>                           
 29 #include <asm/thread_info.h>                      
 30 #include <asm/hw_irq.h>                           
 31 #include <asm/page_types.h>                       
 32 #include <asm/irqflags.h>                         
 33 #include <asm/paravirt.h>                         
 34 #include <asm/percpu.h>                           
 35 #include <asm/asm.h>                              
 36 #include <asm/smap.h>                             
 37 #include <asm/pgtable_types.h>                    
 38 #include <asm/frame.h>                            
 39 #include <asm/trapnr.h>                           
 40 #include <asm/nospec-branch.h>                    
 41 #include <asm/fsgsbase.h>                         
 42 #include <linux/err.h>                            
 43                                                   
 44 #include "calling.h"                              
 45                                                   
 46 .code64                                           
 47 .section .entry.text, "ax"                        
 48                                                   
 49 /*                                                
 50  * 64-bit SYSCALL instruction entry. Up to 6 a    
 51  *                                                
 52  * This is the only entry point used for 64-bi    
 53  * hardware interface is reasonably well desig    
 54  * argument mapping Linux uses fits well with     
 55  * available when SYSCALL is used.                
 56  *                                                
 57  * SYSCALL instructions can be found inlined i    
 58  * well as some other programs and libraries.     
 59  * of SYSCALL instructions in the vDSO used, f    
 60  * clock_gettimeofday fallback.                   
 61  *                                                
 62  * 64-bit SYSCALL saves rip to rcx, clears rfl    
 63  * then loads new ss, cs, and rip from previou    
 64  * rflags gets masked by a value from another     
 65  * are not needed). SYSCALL does not save anyt    
 66  * and does not change rsp.                       
 67  *                                                
 68  * Registers on entry:                            
 69  * rax  system call number                        
 70  * rcx  return address                            
 71  * r11  saved rflags (note: r11 is callee-clob    
 72  * rdi  arg0                                      
 73  * rsi  arg1                                      
 74  * rdx  arg2                                      
 75  * r10  arg3 (needs to be moved to rcx to conf    
 76  * r8   arg4                                      
 77  * r9   arg5                                      
 78  * (note: r12-r15, rbp, rbx are callee-preserv    
 79  *                                                
 80  * Only called from user space.                   
 81  *                                                
 82  * When user can change pt_regs->foo always fo    
 83  * it deals with uncanonical addresses better.    
 84  * with them due to bugs in both AMD and Intel    
 85  */                                               
 86                                                   
 87 SYM_CODE_START(entry_SYSCALL_64)                  
 88         UNWIND_HINT_ENTRY                         
 89         ENDBR                                     
 90                                                   
 91         swapgs                                    
 92         /* tss.sp2 is scratch space. */           
 93         movq    %rsp, PER_CPU_VAR(cpu_tss_rw +    
 94         SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp     
 95         movq    PER_CPU_VAR(pcpu_hot + X86_top    
 96                                                   
 97 SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, S    
 98         ANNOTATE_NOENDBR                          
 99                                                   
100         /* Construct struct pt_regs on stack *    
101         pushq   $__USER_DS                        
102         pushq   PER_CPU_VAR(cpu_tss_rw + TSS_s    
103         pushq   %r11                              
104         pushq   $__USER_CS                        
105         pushq   %rcx                              
106 SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe    
107         pushq   %rax                              
108                                                   
109         PUSH_AND_CLEAR_REGS rax=$-ENOSYS          
110                                                   
111         /* IRQs are off. */                       
112         movq    %rsp, %rdi                        
113         /* Sign extend the lower 32bit as sysc    
114         movslq  %eax, %rsi                        
115                                                   
116         /* clobbers %rax, make sure it is afte    
117         IBRS_ENTER                                
118         UNTRAIN_RET                               
119         CLEAR_BRANCH_HISTORY                      
120                                                   
121         call    do_syscall_64           /* ret    
122                                                   
123         /*                                        
124          * Try to use SYSRET instead of IRET i    
125          * a completely clean 64-bit userspace    
126          * go to the slow exit path.              
127          * In the Xen PV case we must use iret    
128          */                                       
129                                                   
130         ALTERNATIVE "testb %al, %al; jz swapgs    
131                 "jmp swapgs_restore_regs_and_r    
132                                                   
133         /*                                        
134          * We win! This label is here just for    
135          * perf profiles. Nothing jumps here.     
136          */                                       
137 syscall_return_via_sysret:                        
138         IBRS_EXIT                                 
139         POP_REGS pop_rdi=0                        
140                                                   
141         /*                                        
142          * Now all regs are restored except RS    
143          * Save old stack pointer and switch t    
144          */                                       
145         movq    %rsp, %rdi                        
146         movq    PER_CPU_VAR(cpu_tss_rw + TSS_s    
147         UNWIND_HINT_END_OF_STACK                  
148                                                   
149         pushq   RSP-RDI(%rdi)   /* RSP */         
150         pushq   (%rdi)          /* RDI */         
151                                                   
152         /*                                        
153          * We are on the trampoline stack.  Al    
154          * We can do future final exit work ri    
155          */                                       
156         STACKLEAK_ERASE_NOCLOBBER                 
157                                                   
158         SWITCH_TO_USER_CR3_STACK scratch_reg=%    
159                                                   
160         popq    %rdi                              
161         popq    %rsp                              
162 SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SY    
163         ANNOTATE_NOENDBR                          
164         swapgs                                    
165         CLEAR_CPU_BUFFERS                         
166         sysretq                                   
167 SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBA    
168         ANNOTATE_NOENDBR                          
169         int3                                      
170 SYM_CODE_END(entry_SYSCALL_64)                    
171                                                   
172 /*                                                
173  * %rdi: prev task                                
174  * %rsi: next task                                
175  */                                               
176 .pushsection .text, "ax"                          
177 SYM_FUNC_START(__switch_to_asm)                   
178         /*                                        
179          * Save callee-saved registers            
180          * This must match the order in inacti    
181          */                                       
182         pushq   %rbp                              
183         pushq   %rbx                              
184         pushq   %r12                              
185         pushq   %r13                              
186         pushq   %r14                              
187         pushq   %r15                              
188                                                   
189         /* switch stack */                        
190         movq    %rsp, TASK_threadsp(%rdi)         
191         movq    TASK_threadsp(%rsi), %rsp         
192                                                   
193 #ifdef CONFIG_STACKPROTECTOR                      
194         movq    TASK_stack_canary(%rsi), %rbx     
195         movq    %rbx, PER_CPU_VAR(fixed_percpu    
196 #endif                                            
197                                                   
198         /*                                        
199          * When switching from a shallower to     
200          * the RSB may either underflow or use    
201          * with userspace addresses. On CPUs w    
202          * exist, overwrite the RSB with entri    
203          * speculative execution to prevent at    
204          */                                       
205         FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOO    
206                                                   
207         /* restore callee-saved registers */      
208         popq    %r15                              
209         popq    %r14                              
210         popq    %r13                              
211         popq    %r12                              
212         popq    %rbx                              
213         popq    %rbp                              
214                                                   
215         jmp     __switch_to                       
216 SYM_FUNC_END(__switch_to_asm)                     
217 .popsection                                       
218                                                   
219 /*                                                
220  * A newly forked process directly context swi    
221  *                                                
222  * rax: prev task we switched from                
223  * rbx: kernel thread func (NULL for user thre    
224  * r12: kernel thread arg                         
225  */                                               
226 .pushsection .text, "ax"                          
227 SYM_CODE_START(ret_from_fork_asm)                 
228         /*                                        
229          * This is the start of the kernel sta    
230          * register set at the top, the regset    
231          * (consider kthreads) and one cannot     
232          *                                        
233          * This ensures stack unwinds of kerne    
234          * good state.                            
235          */                                       
236         UNWIND_HINT_END_OF_STACK                  
237         ANNOTATE_NOENDBR // copy_thread           
238         CALL_DEPTH_ACCOUNT                        
239                                                   
240         movq    %rax, %rdi              /* pre    
241         movq    %rsp, %rsi              /* reg    
242         movq    %rbx, %rdx              /* fn     
243         movq    %r12, %rcx              /* fn_    
244         call    ret_from_fork                     
245                                                   
246         /*                                        
247          * Set the stack state to what is expe    
248          * -- at this point the register set s    
249          * and unwind should work normally.       
250          */                                       
251         UNWIND_HINT_REGS                          
252                                                   
253 #ifdef CONFIG_X86_FRED                            
254         ALTERNATIVE "jmp swapgs_restore_regs_a    
255                     "jmp asm_fred_exit_user",     
256 #else                                             
257         jmp     swapgs_restore_regs_and_return    
258 #endif                                            
259 SYM_CODE_END(ret_from_fork_asm)                   
260 .popsection                                       
261                                                   
262 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF                
263 #ifdef CONFIG_DEBUG_ENTRY                         
264         pushq %rax                                
265         SAVE_FLAGS                                
266         testl $X86_EFLAGS_IF, %eax                
267         jz .Lokay_\@                              
268         ud2                                       
269 .Lokay_\@:                                        
270         popq %rax                                 
271 #endif                                            
272 .endm                                             
273                                                   
274 SYM_CODE_START(xen_error_entry)                   
275         ANNOTATE_NOENDBR                          
276         UNWIND_HINT_FUNC                          
277         PUSH_AND_CLEAR_REGS save_ret=1            
278         ENCODE_FRAME_POINTER 8                    
279         UNTRAIN_RET_FROM_CALL                     
280         RET                                       
281 SYM_CODE_END(xen_error_entry)                     
282                                                   
283 /**                                               
284  * idtentry_body - Macro to emit code calling     
285  * @cfunc:              C function to be calle    
286  * @has_error_code:     Hardware pushed error     
287  */                                               
288 .macro idtentry_body cfunc has_error_code:req     
289                                                   
290         /*                                        
291          * Call error_entry() and switch to th    
292          *                                        
293          * When in XENPV, it is already in the    
294          * for native_iret() nor native_load_g    
295          * own pvops for IRET and load_gs_inde    
296          * switch the CR3.  So it can skip inv    
297          */                                       
298         ALTERNATIVE "call error_entry; movq %r    
299                     "call xen_error_entry", X8    
300                                                   
301         ENCODE_FRAME_POINTER                      
302         UNWIND_HINT_REGS                          
303                                                   
304         movq    %rsp, %rdi                        
305                                                   
306         .if \has_error_code == 1                  
307                 movq    ORIG_RAX(%rsp), %rsi      
308                 movq    $-1, ORIG_RAX(%rsp)       
309         .endif                                    
310                                                   
311         call    \cfunc                            
312                                                   
313         /* For some configurations \cfunc ends    
314         REACHABLE                                 
315                                                   
316         jmp     error_return                      
317 .endm                                             
318                                                   
319 /**                                               
320  * idtentry - Macro to generate entry stubs fo    
321  * @vector:             Vector number             
322  * @asmsym:             ASM symbol for the ent    
323  * @cfunc:              C function to be calle    
324  * @has_error_code:     Hardware pushed error     
325  *                                                
326  * The macro emits code to set up the kernel c    
327  * and simple IDT entries. No IST stack, no pa    
328  */                                               
329 .macro idtentry vector asmsym cfunc has_error_    
330 SYM_CODE_START(\asmsym)                           
331                                                   
332         .if \vector == X86_TRAP_BP                
333                 /* #BP advances %rip to the ne    
334                 UNWIND_HINT_IRET_ENTRY offset=    
335         .else                                     
336                 UNWIND_HINT_IRET_ENTRY offset=    
337         .endif                                    
338                                                   
339         ENDBR                                     
340         ASM_CLAC                                  
341         cld                                       
342                                                   
343         .if \has_error_code == 0                  
344                 pushq   $-1                       
345         .endif                                    
346                                                   
347         .if \vector == X86_TRAP_BP                
348                 /*                                
349                  * If coming from kernel space    
350                  * int3 handler to emulate a c    
351                  */                               
352                 testb   $3, CS-ORIG_RAX(%rsp)     
353                 jnz     .Lfrom_usermode_no_gap    
354                 .rept   6                         
355                 pushq   5*8(%rsp)                 
356                 .endr                             
357                 UNWIND_HINT_IRET_REGS offset=8    
358 .Lfrom_usermode_no_gap_\@:                        
359         .endif                                    
360                                                   
361         idtentry_body \cfunc \has_error_code      
362                                                   
363 _ASM_NOKPROBE(\asmsym)                            
364 SYM_CODE_END(\asmsym)                             
365 .endm                                             
366                                                   
367 /*                                                
368  * Interrupt entry/exit.                          
369  *                                                
370  + The interrupt stubs push (vector) onto the     
371  * position of idtentry exceptions, and jump t    
372  * (common/spurious).                             
373  *                                                
374  * common_interrupt is a hotpath, align it to     
375  */                                               
376 .macro idtentry_irq vector cfunc                  
377         .p2align CONFIG_X86_L1_CACHE_SHIFT        
378         idtentry \vector asm_\cfunc \cfunc has    
379 .endm                                             
380                                                   
381 /**                                               
382  * idtentry_mce_db - Macro to generate entry s    
383  * @vector:             Vector number             
384  * @asmsym:             ASM symbol for the ent    
385  * @cfunc:              C function to be calle    
386  *                                                
387  * The macro emits code to set up the kernel c    
388  *                                                
389  * If the entry comes from user space it uses     
390  * including the return to user space work and    
391  * exit.                                          
392  *                                                
393  * If hits in kernel mode then it needs to go     
394  * entry as the exception can hit any random s    
395  * check on exit to keep the paranoid path sim    
396  */                                               
397 .macro idtentry_mce_db vector asmsym cfunc        
398 SYM_CODE_START(\asmsym)                           
399         UNWIND_HINT_IRET_ENTRY                    
400         ENDBR                                     
401         ASM_CLAC                                  
402         cld                                       
403                                                   
404         pushq   $-1                     /* ORI    
405                                                   
406         /*                                        
407          * If the entry is from userspace, swi    
408          * a normal entry.                        
409          */                                       
410         testb   $3, CS-ORIG_RAX(%rsp)             
411         jnz     .Lfrom_usermode_switch_stack_\    
412                                                   
413         /* paranoid_entry returns GS informati    
414         call    paranoid_entry                    
415                                                   
416         UNWIND_HINT_REGS                          
417                                                   
418         movq    %rsp, %rdi              /* pt_    
419                                                   
420         call    \cfunc                            
421                                                   
422         jmp     paranoid_exit                     
423                                                   
424         /* Switch to the regular task stack an    
425 .Lfrom_usermode_switch_stack_\@:                  
426         idtentry_body noist_\cfunc, has_error_    
427                                                   
428 _ASM_NOKPROBE(\asmsym)                            
429 SYM_CODE_END(\asmsym)                             
430 .endm                                             
431                                                   
432 #ifdef CONFIG_AMD_MEM_ENCRYPT                     
433 /**                                               
434  * idtentry_vc - Macro to generate entry stub     
435  * @vector:             Vector number             
436  * @asmsym:             ASM symbol for the ent    
437  * @cfunc:              C function to be calle    
438  *                                                
439  * The macro emits code to set up the kernel c    
440  * runs on an IST stack and needs to be able t    
441  *                                                
442  * To make this work the #VC entry code tries     
443  * an IST stack by switching to the task stack    
444  * includes early SYSCALL entry path) or back     
445  * entered from kernel-mode.                      
446  *                                                
447  * If entered from kernel-mode the return stac    
448  * not safe to use (e.g. because it points to     
449  * will switch to a fall-back stack (VC2) and     
450  *                                                
451  * The macro is only used for one vector, but     
452  * the future for the #HV exception.              
453  */                                               
454 .macro idtentry_vc vector asmsym cfunc            
455 SYM_CODE_START(\asmsym)                           
456         UNWIND_HINT_IRET_ENTRY                    
457         ENDBR                                     
458         ASM_CLAC                                  
459         cld                                       
460                                                   
461         /*                                        
462          * If the entry is from userspace, swi    
463          * a normal entry.                        
464          */                                       
465         testb   $3, CS-ORIG_RAX(%rsp)             
466         jnz     .Lfrom_usermode_switch_stack_\    
467                                                   
468         /*                                        
469          * paranoid_entry returns SWAPGS flag     
470          * EBX == 0 -> SWAPGS, EBX == 1 -> no     
471          */                                       
472         call    paranoid_entry                    
473                                                   
474         UNWIND_HINT_REGS                          
475                                                   
476         /*                                        
477          * Switch off the IST stack to make it    
478          * vc_switch_off_ist() function will s    
479          * stack if it is safe to do so. If no    
480          * stack.                                 
481          */                                       
482         movq    %rsp, %rdi              /* pt_    
483         call    vc_switch_off_ist                 
484         movq    %rax, %rsp              /* Swi    
485                                                   
486         ENCODE_FRAME_POINTER                      
487         UNWIND_HINT_REGS                          
488                                                   
489         /* Update pt_regs */                      
490         movq    ORIG_RAX(%rsp), %rsi    /* get    
491         movq    $-1, ORIG_RAX(%rsp)     /* no     
492                                                   
493         movq    %rsp, %rdi              /* pt_    
494                                                   
495         call    kernel_\cfunc                     
496                                                   
497         /*                                        
498          * No need to switch back to the IST s    
499          * identical to the stack in the IRET     
500          * so it is definitely mapped even wit    
501          */                                       
502         jmp     paranoid_exit                     
503                                                   
504         /* Switch to the regular task stack */    
505 .Lfrom_usermode_switch_stack_\@:                  
506         idtentry_body user_\cfunc, has_error_c    
507                                                   
508 _ASM_NOKPROBE(\asmsym)                            
509 SYM_CODE_END(\asmsym)                             
510 .endm                                             
511 #endif                                            
512                                                   
513 /*                                                
514  * Double fault entry. Straight paranoid. No c    
515  * this comes because for the espfix induced #    
516  * thing.                                         
517  */                                               
518 .macro idtentry_df vector asmsym cfunc            
519 SYM_CODE_START(\asmsym)                           
520         UNWIND_HINT_IRET_ENTRY offset=8           
521         ENDBR                                     
522         ASM_CLAC                                  
523         cld                                       
524                                                   
525         /* paranoid_entry returns GS informati    
526         call    paranoid_entry                    
527         UNWIND_HINT_REGS                          
528                                                   
529         movq    %rsp, %rdi              /* pt_    
530         movq    ORIG_RAX(%rsp), %rsi    /* get    
531         movq    $-1, ORIG_RAX(%rsp)     /* no     
532         call    \cfunc                            
533                                                   
534         /* For some configurations \cfunc ends    
535         REACHABLE                                 
536                                                   
537         jmp     paranoid_exit                     
538                                                   
539 _ASM_NOKPROBE(\asmsym)                            
540 SYM_CODE_END(\asmsym)                             
541 .endm                                             
542                                                   
543 /*                                                
544  * Include the defines which emit the idt entr    
545  * shared between 32 and 64 bit and emit the _    
546  * so the stacktrace boundary checks work.        
547  */                                               
548         __ALIGN                                   
549         .globl __irqentry_text_start              
550 __irqentry_text_start:                            
551                                                   
552 #include <asm/idtentry.h>                         
553                                                   
554         __ALIGN                                   
555         .globl __irqentry_text_end                
556 __irqentry_text_end:                              
557         ANNOTATE_NOENDBR                          
558                                                   
559 SYM_CODE_START_LOCAL(common_interrupt_return)     
560 SYM_INNER_LABEL(swapgs_restore_regs_and_return    
561         IBRS_EXIT                                 
562 #ifdef CONFIG_XEN_PV                              
563         ALTERNATIVE "", "jmp xenpv_restore_reg    
564 #endif                                            
565 #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION     
566         ALTERNATIVE "", "jmp .Lpti_restore_reg    
567 #endif                                            
568                                                   
569         STACKLEAK_ERASE                           
570         POP_REGS                                  
571         add     $8, %rsp        /* orig_ax */     
572         UNWIND_HINT_IRET_REGS                     
573                                                   
574 .Lswapgs_and_iret:                                
575         swapgs                                    
576         CLEAR_CPU_BUFFERS                         
577         /* Assert that the IRET frame indicate    
578         testb   $3, 8(%rsp)                       
579         jnz     .Lnative_iret                     
580         ud2                                       
581                                                   
582 #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION     
583 .Lpti_restore_regs_and_return_to_usermode:        
584         POP_REGS pop_rdi=0                        
585                                                   
586         /*                                        
587          * The stack is now user RDI, orig_ax,    
588          * Save old stack pointer and switch t    
589          */                                       
590         movq    %rsp, %rdi                        
591         movq    PER_CPU_VAR(cpu_tss_rw + TSS_s    
592         UNWIND_HINT_END_OF_STACK                  
593                                                   
594         /* Copy the IRET frame to the trampoli    
595         pushq   6*8(%rdi)       /* SS */          
596         pushq   5*8(%rdi)       /* RSP */         
597         pushq   4*8(%rdi)       /* EFLAGS */      
598         pushq   3*8(%rdi)       /* CS */          
599         pushq   2*8(%rdi)       /* RIP */         
600                                                   
601         /* Push user RDI on the trampoline sta    
602         pushq   (%rdi)                            
603                                                   
604         /*                                        
605          * We are on the trampoline stack.  Al    
606          * We can do future final exit work ri    
607          */                                       
608         STACKLEAK_ERASE_NOCLOBBER                 
609                                                   
610         push    %rax                              
611         SWITCH_TO_USER_CR3 scratch_reg=%rdi sc    
612         pop     %rax                              
613                                                   
614         /* Restore RDI. */                        
615         popq    %rdi                              
616         jmp     .Lswapgs_and_iret                 
617 #endif                                            
618                                                   
619 SYM_INNER_LABEL(restore_regs_and_return_to_ker    
620 #ifdef CONFIG_DEBUG_ENTRY                         
621         /* Assert that pt_regs indicates kerne    
622         testb   $3, CS(%rsp)                      
623         jz      1f                                
624         ud2                                       
625 1:                                                
626 #endif                                            
627         POP_REGS                                  
628         addq    $8, %rsp        /* skip regs->    
629         /*                                        
630          * ARCH_HAS_MEMBARRIER_SYNC_CORE rely     
631          * when returning from IPI handler.       
632          */                                       
633 #ifdef CONFIG_XEN_PV                              
634 SYM_INNER_LABEL(early_xen_iret_patch, SYM_L_GL    
635         ANNOTATE_NOENDBR                          
636         .byte 0xe9                                
637         .long .Lnative_iret - (. + 4)             
638 #endif                                            
639                                                   
640 .Lnative_iret:                                    
641         UNWIND_HINT_IRET_REGS                     
642         /*                                        
643          * Are we returning to a stack segment    
644          * 64-bit mode SS:RSP on the exception    
645          */                                       
646 #ifdef CONFIG_X86_ESPFIX64                        
647         testb   $4, (SS-RIP)(%rsp)                
648         jnz     native_irq_return_ldt             
649 #endif                                            
650                                                   
651 SYM_INNER_LABEL(native_irq_return_iret, SYM_L_    
652         ANNOTATE_NOENDBR // exc_double_fault      
653         /*                                        
654          * This may fault.  Non-paranoid fault    
655          * handled by fixup_bad_iret.  These i    
656          * Double-faults due to espfix64 are h    
657          * Other faults here are fatal.           
658          */                                       
659         iretq                                     
660                                                   
661 #ifdef CONFIG_X86_ESPFIX64                        
662 native_irq_return_ldt:                            
663         /*                                        
664          * We are running with user GSBASE.  A    
665          * values.  We have a percpu ESPFIX st    
666          * long (see ESPFIX_STACK_SIZE).  espf    
667          * of the ESPFIX stack.                   
668          *                                        
669          * We clobber RAX and RDI in this code    
670          * normal stack and RAX on the ESPFIX     
671          *                                        
672          * The ESPFIX stack layout we set up l    
673          *                                        
674          * --- top of ESPFIX stack ---            
675          * SS                                     
676          * RSP                                    
677          * RFLAGS                                 
678          * CS                                     
679          * RIP  <-- RSP points here when we're    
680          * RAX  <-- espfix_waddr points here      
681          * --- bottom of ESPFIX stack ---         
682          */                                       
683                                                   
684         pushq   %rdi                              
685         swapgs                                    
686         SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi     
687                                                   
688         movq    PER_CPU_VAR(espfix_waddr), %rd    
689         movq    %rax, (0*8)(%rdi)                 
690         movq    (1*8)(%rsp), %rax                 
691         movq    %rax, (1*8)(%rdi)                 
692         movq    (2*8)(%rsp), %rax                 
693         movq    %rax, (2*8)(%rdi)                 
694         movq    (3*8)(%rsp), %rax                 
695         movq    %rax, (3*8)(%rdi)                 
696         movq    (5*8)(%rsp), %rax                 
697         movq    %rax, (5*8)(%rdi)                 
698         movq    (4*8)(%rsp), %rax                 
699         movq    %rax, (4*8)(%rdi)                 
700         /* Now RAX == RSP. */                     
701                                                   
702         andl    $0xffff0000, %eax                 
703                                                   
704         /*                                        
705          * espfix_stack[31:16] == 0.  The page    
706          * (espfix_stack | (X & 0xffff0000)) p    
707          * espfix_waddr for any X.  That is, t    
708          * the same page.  Set up RSP so that     
709          * respective 16 bits of the /userspac    
710          * still points to an RO alias of the     
711          */                                       
712         orq     PER_CPU_VAR(espfix_stack), %ra    
713                                                   
714         SWITCH_TO_USER_CR3_STACK scratch_reg=%    
715         swapgs                                    
716         popq    %rdi                              
717                                                   
718         movq    %rax, %rsp                        
719         UNWIND_HINT_IRET_REGS offset=8            
720                                                   
721         /*                                        
722          * At this point, we cannot write to t    
723          * still read.                            
724          */                                       
725         popq    %rax                              
726                                                   
727         CLEAR_CPU_BUFFERS                         
728                                                   
729         /*                                        
730          * RSP now points to an ordinary IRET     
731          * is read-only and RSP[31:16] are pre    
732          * values.  We can now IRET back to us    
733          */                                       
734         jmp     native_irq_return_iret            
735 #endif                                            
736 SYM_CODE_END(common_interrupt_return)             
737 _ASM_NOKPROBE(common_interrupt_return)            
738                                                   
739 /*                                                
740  * Reload gs selector with exception handling     
741  *  di:  new selector                             
742  *                                                
743  * Is in entry.text as it shouldn't be instrum    
744  */                                               
745 SYM_FUNC_START(asm_load_gs_index)                 
746         FRAME_BEGIN                               
747         swapgs                                    
748 .Lgs_change:                                      
749         ANNOTATE_NOENDBR // error_entry           
750         movl    %edi, %gs                         
751 2:      ALTERNATIVE "", "mfence", X86_BUG_SWAP    
752         swapgs                                    
753         FRAME_END                                 
754         RET                                       
755                                                   
756         /* running with kernelgs */               
757 .Lbad_gs:                                         
758         swapgs                                    
759 .macro ZAP_GS                                     
760         /* This can't be a string because the     
761         movl $__USER_DS, %eax                     
762         movl %eax, %gs                            
763 .endm                                             
764         ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL    
765         xorl    %eax, %eax                        
766         movl    %eax, %gs                         
767         jmp     2b                                
768                                                   
769         _ASM_EXTABLE(.Lgs_change, .Lbad_gs)       
770                                                   
771 SYM_FUNC_END(asm_load_gs_index)                   
772 EXPORT_SYMBOL(asm_load_gs_index)                  
773                                                   
774 #ifdef CONFIG_XEN_PV                              
775 /*                                                
776  * A note on the "critical region" in our call    
777  * We want to avoid stacking callback handlers    
778  * during handling of the last event. To do th    
779  * until we've done all processing. HOWEVER, w    
780  * popping the stack frame (can't be done atom    
781  * be possible to get enough handler activatio    
782  * Although unlikely, bugs of that kind are ha    
783  * like to avoid the possibility.                 
784  * So, on entry to the handler we detect wheth    
785  * existing activation in its critical region     
786  * activation and restart the handler using th    
787  *                                                
788  * C calling convention: exc_xen_hypervisor_ca    
789  */                                               
790         __FUNC_ALIGN                              
791 SYM_CODE_START_LOCAL_NOALIGN(exc_xen_hyperviso    
792                                                   
793 /*                                                
794  * Since we don't modify %rdi, evtchn_do_upall    
795  * see the correct pointer to the pt_regs         
796  */                                               
797         UNWIND_HINT_FUNC                          
798         movq    %rdi, %rsp                        
799         UNWIND_HINT_REGS                          
800                                                   
801         call    xen_pv_evtchn_do_upcall           
802                                                   
803         jmp     error_return                      
804 SYM_CODE_END(exc_xen_hypervisor_callback)         
805                                                   
806 /*                                                
807  * Hypervisor uses this for application faults    
808  * We get here for two reasons:                   
809  *  1. Fault while reloading DS, ES, FS or GS     
810  *  2. Fault while executing IRET                 
811  * Category 1 we do not need to fix up as Xen     
812  * registers that could be reloaded and zeroed    
813  * Category 2 we fix up by killing the current    
814  * normal Linux return path in this case becau    
815  * to pop the stack frame we end up in an infi    
816  * We distinguish between categories by compar    
817  * with its current contents: any discrepancy     
818  */                                               
819         __FUNC_ALIGN                              
820 SYM_CODE_START_NOALIGN(xen_failsafe_callback)     
821         UNWIND_HINT_UNDEFINED                     
822         ENDBR                                     
823         movl    %ds, %ecx                         
824         cmpw    %cx, 0x10(%rsp)                   
825         jne     1f                                
826         movl    %es, %ecx                         
827         cmpw    %cx, 0x18(%rsp)                   
828         jne     1f                                
829         movl    %fs, %ecx                         
830         cmpw    %cx, 0x20(%rsp)                   
831         jne     1f                                
832         movl    %gs, %ecx                         
833         cmpw    %cx, 0x28(%rsp)                   
834         jne     1f                                
835         /* All segments match their saved valu    
836         movq    (%rsp), %rcx                      
837         movq    8(%rsp), %r11                     
838         addq    $0x30, %rsp                       
839         pushq   $0                                
840         UNWIND_HINT_IRET_REGS offset=8            
841         jmp     asm_exc_general_protection        
842 1:      /* Segment mismatch => Category 1 (Bad    
843         movq    (%rsp), %rcx                      
844         movq    8(%rsp), %r11                     
845         addq    $0x30, %rsp                       
846         UNWIND_HINT_IRET_REGS                     
847         pushq   $-1 /* orig_ax = -1 => not a s    
848         PUSH_AND_CLEAR_REGS                       
849         ENCODE_FRAME_POINTER                      
850         jmp     error_return                      
851 SYM_CODE_END(xen_failsafe_callback)               
852 #endif /* CONFIG_XEN_PV */                        
853                                                   
854 /*                                                
855  * Save all registers in pt_regs. Return GSBAS    
856  * in EBX depending on the availability of the    
857  *                                                
858  * FSGSBASE     R/EBX                             
859  *     N        0 -> SWAPGS on exit               
860  *              1 -> no SWAPGS on exit            
861  *                                                
862  *     Y        GSBASE value at entry, must be    
863  *                                                
864  * R14 - old CR3                                  
865  * R15 - old SPEC_CTRL                            
866  */                                               
867 SYM_CODE_START(paranoid_entry)                    
868         ANNOTATE_NOENDBR                          
869         UNWIND_HINT_FUNC                          
870         PUSH_AND_CLEAR_REGS save_ret=1            
871         ENCODE_FRAME_POINTER 8                    
872                                                   
873         /*                                        
874          * Always stash CR3 in %r14.  This val    
875          * verbatim, at exit.  Needed if paran    
876          * another entry that already switched    
877          * but has not yet returned to userspa    
878          *                                        
879          * This is also why CS (stashed in the    
880          * hardware at entry) can not be used:    
881          * to kernel code, but with a user CR3    
882          *                                        
883          * Switching CR3 does not depend on ke    
884          * be done before switching to the ker    
885          * required for FSGSBASE because the k    
886          * be retrieved from a kernel internal    
887          */                                       
888         SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_    
889                                                   
890         /*                                        
891          * Handling GSBASE depends on the avai    
892          *                                        
893          * Without FSGSBASE the kernel enforce    
894          * values indicate kernel GSBASE. With    
895          * can be made about the GSBASE value     
896          * space.                                 
897          */                                       
898         ALTERNATIVE "jmp .Lparanoid_entry_chec    
899                                                   
900         /*                                        
901          * Read the current GSBASE and store i    
902          * retrieve and set the current CPUs k    
903          * has to be restored in paranoid_exit    
904          *                                        
905          * The unconditional write to GS base     
906          * loads based on a mispredicted GS ba    
907          * is needed here.                        
908          */                                       
909         SAVE_AND_SET_GSBASE scratch_reg=%rax s    
910         jmp .Lparanoid_gsbase_done                
911                                                   
912 .Lparanoid_entry_checkgs:                         
913         /* EBX = 1 -> kernel GSBASE active, no    
914         movl    $1, %ebx                          
915                                                   
916         /*                                        
917          * The kernel-enforced convention is a    
918          * a kernel value. No SWAPGS needed on    
919          */                                       
920         movl    $MSR_GS_BASE, %ecx                
921         rdmsr                                     
922         testl   %edx, %edx                        
923         js      .Lparanoid_kernel_gsbase          
924                                                   
925         /* EBX = 0 -> SWAPGS required on exit     
926         xorl    %ebx, %ebx                        
927         swapgs                                    
928 .Lparanoid_kernel_gsbase:                         
929         FENCE_SWAPGS_KERNEL_ENTRY                 
930 .Lparanoid_gsbase_done:                           
931                                                   
932         /*                                        
933          * Once we have CR3 and %GS setup save    
934          * CR3 above, keep the old value in a     
935          */                                       
936         IBRS_ENTER save_reg=%r15                  
937         UNTRAIN_RET_FROM_CALL                     
938                                                   
939         RET                                       
940 SYM_CODE_END(paranoid_entry)                      
941                                                   
942 /*                                                
943  * "Paranoid" exit path from exception stack.     
944  * only on return from non-NMI IST interrupts     
945  * from kernel space.                             
946  *                                                
947  * We may be returning to very strange context    
948  * in syscall entry), so checking for preempti    
949  * be complicated.  Fortunately, there's no go    
950  * to handle preemption here.                     
951  *                                                
952  * R/EBX contains the GSBASE related informati    
953  * availability of the FSGSBASE instructions:     
954  *                                                
955  * FSGSBASE     R/EBX                             
956  *     N        0 -> SWAPGS on exit               
957  *              1 -> no SWAPGS on exit            
958  *                                                
959  *     Y        User space GSBASE, must be res    
960  *                                                
961  * R14 - old CR3                                  
962  * R15 - old SPEC_CTRL                            
963  */                                               
964 SYM_CODE_START_LOCAL(paranoid_exit)               
965         UNWIND_HINT_REGS                          
966                                                   
967         /*                                        
968          * Must restore IBRS state before both    
969          * to the per-CPU x86_spec_ctrl_shadow    
970          */                                       
971         IBRS_EXIT save_reg=%r15                   
972                                                   
973         /*                                        
974          * The order of operations is importan    
975          * kernel GSBASE.                         
976          *                                        
977          * NB to anyone to try to optimize thi    
978          * not execute at all for exceptions f    
979          * exceptions go through error_return     
980          */                                       
981         PARANOID_RESTORE_CR3 scratch_reg=%rax     
982                                                   
983         /* Handle the three GSBASE cases */       
984         ALTERNATIVE "jmp .Lparanoid_exit_check    
985                                                   
986         /* With FSGSBASE enabled, unconditiona    
987         wrgsbase        %rbx                      
988         jmp             restore_regs_and_retur    
989                                                   
990 .Lparanoid_exit_checkgs:                          
991         /* On non-FSGSBASE systems, conditiona    
992         testl           %ebx, %ebx                
993         jnz             restore_regs_and_retur    
994                                                   
995         /* We are returning to a context with     
996         swapgs                                    
997         jmp             restore_regs_and_retur    
998 SYM_CODE_END(paranoid_exit)                       
999                                                   
1000 /*                                               
1001  * Switch GS and CR3 if needed.                  
1002  */                                              
1003 SYM_CODE_START(error_entry)                      
1004         ANNOTATE_NOENDBR                         
1005         UNWIND_HINT_FUNC                         
1006                                                  
1007         PUSH_AND_CLEAR_REGS save_ret=1           
1008         ENCODE_FRAME_POINTER 8                   
1009                                                  
1010         testb   $3, CS+8(%rsp)                   
1011         jz      .Lerror_kernelspace              
1012                                                  
1013         /*                                       
1014          * We entered from user mode or we're    
1015          * from user mode due to an IRET faul    
1016          */                                      
1017         swapgs                                   
1018         FENCE_SWAPGS_USER_ENTRY                  
1019         /* We have user CR3.  Change to kerne    
1020         SWITCH_TO_KERNEL_CR3 scratch_reg=%rax    
1021         IBRS_ENTER                               
1022         UNTRAIN_RET_FROM_CALL                    
1023                                                  
1024         leaq    8(%rsp), %rdi                    
1025         /* Put us onto the real thread stack.    
1026         jmp     sync_regs                        
1027                                                  
1028         /*                                       
1029          * There are two places in the kernel    
1030          * usergs. Handle them here.  B stepp    
1031          * truncated RIP for IRET exceptions     
1032          * for these here too.                   
1033          */                                      
1034 .Lerror_kernelspace:                             
1035         leaq    native_irq_return_iret(%rip),    
1036         cmpq    %rcx, RIP+8(%rsp)                
1037         je      .Lerror_bad_iret                 
1038         movl    %ecx, %eax                       
1039         cmpq    %rax, RIP+8(%rsp)                
1040         je      .Lbstep_iret                     
1041         cmpq    $.Lgs_change, RIP+8(%rsp)        
1042         jne     .Lerror_entry_done_lfence        
1043                                                  
1044         /*                                       
1045          * hack: .Lgs_change can fail with us    
1046          * gsbase and proceed.  We'll fix up     
1047          * .Lgs_change's error handler with k    
1048          */                                      
1049         swapgs                                   
1050                                                  
1051         /*                                       
1052          * Issue an LFENCE to prevent GS spec    
1053          * kernel or user gsbase.                
1054          */                                      
1055 .Lerror_entry_done_lfence:                       
1056         FENCE_SWAPGS_KERNEL_ENTRY                
1057         CALL_DEPTH_ACCOUNT                       
1058         leaq    8(%rsp), %rax                    
1059         VALIDATE_UNRET_END                       
1060         RET                                      
1061                                                  
1062 .Lbstep_iret:                                    
1063         /* Fix truncated RIP */                  
1064         movq    %rcx, RIP+8(%rsp)                
1065         /* fall through */                       
1066                                                  
1067 .Lerror_bad_iret:                                
1068         /*                                       
1069          * We came from an IRET to user mode,    
1070          * gsbase and CR3.  Switch to kernel     
1071          */                                      
1072         swapgs                                   
1073         FENCE_SWAPGS_USER_ENTRY                  
1074         SWITCH_TO_KERNEL_CR3 scratch_reg=%rax    
1075         IBRS_ENTER                               
1076         UNTRAIN_RET_FROM_CALL                    
1077                                                  
1078         /*                                       
1079          * Pretend that the exception came fr    
1080          * as if we faulted immediately after    
1081          */                                      
1082         leaq    8(%rsp), %rdi                    
1083         call    fixup_bad_iret                   
1084         mov     %rax, %rdi                       
1085         jmp     sync_regs                        
1086 SYM_CODE_END(error_entry)                        
1087                                                  
1088 SYM_CODE_START_LOCAL(error_return)               
1089         UNWIND_HINT_REGS                         
1090         DEBUG_ENTRY_ASSERT_IRQS_OFF              
1091         testb   $3, CS(%rsp)                     
1092         jz      restore_regs_and_return_to_ke    
1093         jmp     swapgs_restore_regs_and_retur    
1094 SYM_CODE_END(error_return)                       
1095                                                  
1096 /*                                               
1097  * Runs on exception stack.  Xen PV does not     
1098  * so we can use real assembly here.             
1099  *                                               
1100  * Registers:                                    
1101  *      %r14: Used to save/restore the CR3 of    
1102  *            when MITIGATION_PAGE_TABLE_ISOL    
1103  */                                              
1104 SYM_CODE_START(asm_exc_nmi)                      
1105         UNWIND_HINT_IRET_ENTRY                   
1106         ENDBR                                    
1107                                                  
1108         /*                                       
1109          * We allow breakpoints in NMIs. If a    
1110          * the iretq it performs will take us    
1111          * This means that we can have nested    
1112          * NMI is using the top of the stack     
1113          * can't let it execute because the n    
1114          * stack of the previous NMI. NMI han    
1115          * anyway.                               
1116          *                                       
1117          * To handle this case we do the foll    
1118          *  Check a special location on the s    
1119          *  variable that is set when NMIs ar    
1120          *  The interrupted task's stack is a    
1121          *  is an NMI stack.                     
1122          *  If the variable is not set and th    
1123          *  stack then:                          
1124          *    o Set the special variable on t    
1125          *    o Copy the interrupt frame into    
1126          *      stack                            
1127          *    o Copy the interrupt frame into    
1128          *    o Continue processing the NMI      
1129          *  If the variable is set or the pre    
1130          *    o Modify the "iret" location to    
1131          *    o return back to the first NMI     
1132          *                                       
1133          * Now on exit of the first NMI, we f    
1134          * The NMI stack will tell any nested    
1135          * nested. Then we pop the stack norm    
1136          * a nested NMI that updated the copy    
1137          * jump will be made to the repeat_nm    
1138          * NMI.                                  
1139          *                                       
1140          * However, espfix prevents us from d    
1141          * with a single IRET instruction.  S    
1142          * can fault.  We therefore handle NM    
1143          * other IST entries.                    
1144          */                                      
1145                                                  
1146         ASM_CLAC                                 
1147         cld                                      
1148                                                  
1149         /* Use %rdx as our temp variable thro    
1150         pushq   %rdx                             
1151                                                  
1152         testb   $3, CS-RIP+8(%rsp)               
1153         jz      .Lnmi_from_kernel                
1154                                                  
1155         /*                                       
1156          * NMI from user mode.  We need to ru    
1157          * can't go through the normal entry     
1158          * we don't want to enable interrupts    
1159          * up in an awkward situation in whic    
1160          * are off.                              
1161          *                                       
1162          * We also must not push anything to     
1163          * stacks lest we corrupt the "NMI ex    
1164          */                                      
1165                                                  
1166         swapgs                                   
1167         FENCE_SWAPGS_USER_ENTRY                  
1168         SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx    
1169         movq    %rsp, %rdx                       
1170         movq    PER_CPU_VAR(pcpu_hot + X86_to    
1171         UNWIND_HINT_IRET_REGS base=%rdx offse    
1172         pushq   5*8(%rdx)       /* pt_regs->s    
1173         pushq   4*8(%rdx)       /* pt_regs->r    
1174         pushq   3*8(%rdx)       /* pt_regs->f    
1175         pushq   2*8(%rdx)       /* pt_regs->c    
1176         pushq   1*8(%rdx)       /* pt_regs->r    
1177         UNWIND_HINT_IRET_REGS                    
1178         pushq   $-1             /* pt_regs->o    
1179         PUSH_AND_CLEAR_REGS rdx=(%rdx)           
1180         ENCODE_FRAME_POINTER                     
1181                                                  
1182         IBRS_ENTER                               
1183         UNTRAIN_RET                              
1184                                                  
1185         /*                                       
1186          * At this point we no longer need to    
1187          * due to nesting -- we're on the nor    
1188          * done with the NMI stack.              
1189          */                                      
1190                                                  
1191         movq    %rsp, %rdi                       
1192         call    exc_nmi                          
1193                                                  
1194         /*                                       
1195          * Return back to user mode.  We must    
1196          * work, because we don't want to ena    
1197          */                                      
1198         jmp     swapgs_restore_regs_and_retur    
1199                                                  
1200 .Lnmi_from_kernel:                               
1201         /*                                       
1202          * Here's what our stack frame will l    
1203          * +---------------------------------    
1204          * | original SS                         
1205          * | original Return RSP                 
1206          * | original RFLAGS                     
1207          * | original CS                         
1208          * | original RIP                        
1209          * +---------------------------------    
1210          * | temp storage for rdx                
1211          * +---------------------------------    
1212          * | "NMI executing" variable            
1213          * +---------------------------------    
1214          * | iret SS          } Copied from "    
1215          * | iret Return RSP  } on each loop     
1216          * | iret RFLAGS      } by a nested N    
1217          * | iret CS          } iteration if     
1218          * | iret RIP         }                  
1219          * +---------------------------------    
1220          * | outermost SS          } initiali    
1221          * | outermost Return RSP  } will not    
1222          * | outermost RFLAGS      } NMI proc    
1223          * | outermost CS          } Copied t    
1224          * | outermost RIP         } iteratio    
1225          * +---------------------------------    
1226          * | pt_regs                             
1227          * +---------------------------------    
1228          *                                       
1229          * The "original" frame is used by ha    
1230          * NMIs, we need to be done with it,     
1231          * space for the asm code here.          
1232          *                                       
1233          * We return by executing IRET while     
1234          * That will either return for real o    
1235          * processing.                           
1236          *                                       
1237          * The "outermost" frame is copied to    
1238          * iteration of the loop, so each ite    
1239          * frame pointing to the final return    
1240          */                                      
1241                                                  
1242         /*                                       
1243          * Determine whether we're a nested N    
1244          *                                       
1245          * If we interrupted kernel code betw    
1246          * end_repeat_nmi, then we are a nest    
1247          * modify the "iret" frame because it    
1248          * the outer NMI.  That's okay; the o    
1249          * about to call exc_nmi() anyway, so    
1250          * the outer NMI.                        
1251          */                                      
1252                                                  
1253         movq    $repeat_nmi, %rdx                
1254         cmpq    8(%rsp), %rdx                    
1255         ja      1f                               
1256         movq    $end_repeat_nmi, %rdx            
1257         cmpq    8(%rsp), %rdx                    
1258         ja      nested_nmi_out                   
1259 1:                                               
1260                                                  
1261         /*                                       
1262          * Now check "NMI executing".  If it'    
1263          * This will not detect if we interru    
1264          * before IRET.                          
1265          */                                      
1266         cmpl    $1, -8(%rsp)                     
1267         je      nested_nmi                       
1268                                                  
1269         /*                                       
1270          * Now test if the previous stack was    
1271          * the case where we interrupt an out    
1272          * "NMI executing" but before IRET.      
1273          * there is one case in which RSP cou    
1274          * despite there being no NMI active:    
1275          * RSP at the very beginning of the S    
1276          * pull a fast one on naughty userspa    
1277          * SYSCALL to mask DF, so userspace c    
1278          * if it controls the kernel's RSP.      
1279          * "NMI executing".                      
1280          */                                      
1281         lea     6*8(%rsp), %rdx                  
1282         /* Compare the NMI stack (rdx) with t    
1283         cmpq    %rdx, 4*8(%rsp)                  
1284         /* If the stack pointer is above the     
1285         ja      first_nmi                        
1286                                                  
1287         subq    $EXCEPTION_STKSZ, %rdx           
1288         cmpq    %rdx, 4*8(%rsp)                  
1289         /* If it is below the NMI stack, it i    
1290         jb      first_nmi                        
1291                                                  
1292         /* Ah, it is within the NMI stack. */    
1293                                                  
1294         testb   $(X86_EFLAGS_DF >> 8), (3*8 +    
1295         jz      first_nmi       /* RSP was us    
1296                                                  
1297         /* This is a nested NMI. */              
1298                                                  
1299 nested_nmi:                                      
1300         /*                                       
1301          * Modify the "iret" frame to point t    
1302          * iteration of NMI handling.            
1303          */                                      
1304         subq    $8, %rsp                         
1305         leaq    -10*8(%rsp), %rdx                
1306         pushq   $__KERNEL_DS                     
1307         pushq   %rdx                             
1308         pushfq                                   
1309         pushq   $__KERNEL_CS                     
1310         pushq   $repeat_nmi                      
1311                                                  
1312         /* Put stack back */                     
1313         addq    $(6*8), %rsp                     
1314                                                  
1315 nested_nmi_out:                                  
1316         popq    %rdx                             
1317                                                  
1318         /* We are returning to kernel mode, s    
1319         iretq                                    
1320                                                  
1321 first_nmi:                                       
1322         /* Restore rdx. */                       
1323         movq    (%rsp), %rdx                     
1324                                                  
1325         /* Make room for "NMI executing". */     
1326         pushq   $0                               
1327                                                  
1328         /* Leave room for the "iret" frame */    
1329         subq    $(5*8), %rsp                     
1330                                                  
1331         /* Copy the "original" frame to the "    
1332         .rept 5                                  
1333         pushq   11*8(%rsp)                       
1334         .endr                                    
1335         UNWIND_HINT_IRET_REGS                    
1336                                                  
1337         /* Everything up to here is safe from    
1338                                                  
1339 #ifdef CONFIG_DEBUG_ENTRY                        
1340         /*                                       
1341          * For ease of testing, unmask NMIs r    
1342          * default because IRET is very expen    
1343          */                                      
1344         pushq   $0              /* SS */         
1345         pushq   %rsp            /* RSP (minus    
1346         addq    $8, (%rsp)      /* Fix up RSP    
1347         pushfq                  /* RFLAGS */     
1348         pushq   $__KERNEL_CS    /* CS */         
1349         pushq   $1f             /* RIP */        
1350         iretq                   /* continues     
1351         UNWIND_HINT_IRET_REGS                    
1352 1:                                               
1353 #endif                                           
1354                                                  
1355 repeat_nmi:                                      
1356         ANNOTATE_NOENDBR // this code            
1357         /*                                       
1358          * If there was a nested NMI, the fir    
1359          * here. But NMIs are still enabled a    
1360          * nested NMI. The nested NMI checks     
1361          * if it is between repeat_nmi and en    
1362          * it will just return, as we are abo    
1363          * This makes it safe to copy to the     
1364          * NMI will update.                      
1365          *                                       
1366          * RSP is pointing to "outermost RIP"    
1367          * we're repeating an NMI, gsbase has    
1368          * the first iteration.  paranoid_ent    
1369          * gsbase if needed before we call ex    
1370          * is zero.                              
1371          */                                      
1372         movq    $1, 10*8(%rsp)          /* Se    
1373                                                  
1374         /*                                       
1375          * Copy the "outermost" frame to the     
1376          * here must not modify the "iret" fr    
1377          * it or it will end up containing ga    
1378          */                                      
1379         addq    $(10*8), %rsp                    
1380         .rept 5                                  
1381         pushq   -6*8(%rsp)                       
1382         .endr                                    
1383         subq    $(5*8), %rsp                     
1384 end_repeat_nmi:                                  
1385         ANNOTATE_NOENDBR // this code            
1386                                                  
1387         /*                                       
1388          * Everything below this point can be    
1389          * If this happens, then the inner NM    
1390          * frame to point back to repeat_nmi.    
1391          */                                      
1392         pushq   $-1                              
1393                                                  
1394         /*                                       
1395          * Use paranoid_entry to handle SWAPG    
1396          * as we should not be calling schedu    
1397          * Even with normal interrupts enable    
1398          * setting NEED_RESCHED or anything t    
1399          * exceptions might do.                  
1400          */                                      
1401         call    paranoid_entry                   
1402         UNWIND_HINT_REGS                         
1403                                                  
1404         movq    %rsp, %rdi                       
1405         call    exc_nmi                          
1406                                                  
1407         /* Always restore stashed SPEC_CTRL v    
1408         IBRS_EXIT save_reg=%r15                  
1409                                                  
1410         PARANOID_RESTORE_CR3 scratch_reg=%r15    
1411                                                  
1412         /*                                       
1413          * The above invocation of paranoid_e    
1414          * related information in R/EBX depen    
1415          * of FSGSBASE.                          
1416          *                                       
1417          * If FSGSBASE is enabled, restore th    
1418          * unconditionally, otherwise take th    
1419          */                                      
1420         ALTERNATIVE "jmp nmi_no_fsgsbase", ""    
1421                                                  
1422         wrgsbase        %rbx                     
1423         jmp     nmi_restore                      
1424                                                  
1425 nmi_no_fsgsbase:                                 
1426         /* EBX == 0 -> invoke SWAPGS */          
1427         testl   %ebx, %ebx                       
1428         jnz     nmi_restore                      
1429                                                  
1430 nmi_swapgs:                                      
1431         swapgs                                   
1432                                                  
1433 nmi_restore:                                     
1434         POP_REGS                                 
1435                                                  
1436         /*                                       
1437          * Skip orig_ax and the "outermost" f    
1438          * at the "iret" frame.                  
1439          */                                      
1440         addq    $6*8, %rsp                       
1441                                                  
1442         /*                                       
1443          * Clear "NMI executing".  Set DF fir    
1444          * distinguish the remaining code bet    
1445          * the SYSCALL entry and exit paths.     
1446          *                                       
1447          * We arguably should just inspect RI    
1448          * this code when I had the misappreh    
1449          * NMIs, and Xen PV would break that     
1450          */                                      
1451         std                                      
1452         movq    $0, 5*8(%rsp)           /* cl    
1453                                                  
1454         /*                                       
1455          * Skip CLEAR_CPU_BUFFERS here, since    
1456          * NMI in kernel after user state is     
1457          * these conditions are hard to meet.    
1458          */                                      
1459                                                  
1460         /*                                       
1461          * iretq reads the "iret" frame and e    
1462          * single instruction.  We are return    
1463          * cannot result in a fault.  Similar    
1464          * about espfix64 on the way back to     
1465          */                                      
1466         iretq                                    
1467 SYM_CODE_END(asm_exc_nmi)                        
1468                                                  
1469 /*                                               
1470  * This handles SYSCALL from 32-bit code.  Th    
1471  * MSRs to fully disable 32-bit SYSCALL.         
1472  */                                              
1473 SYM_CODE_START(entry_SYSCALL32_ignore)           
1474         UNWIND_HINT_END_OF_STACK                 
1475         ENDBR                                    
1476         mov     $-ENOSYS, %eax                   
1477         CLEAR_CPU_BUFFERS                        
1478         sysretl                                  
1479 SYM_CODE_END(entry_SYSCALL32_ignore)             
1480                                                  
1481 .pushsection .text, "ax"                         
1482         __FUNC_ALIGN                             
1483 SYM_CODE_START_NOALIGN(rewind_stack_and_make_    
1484         UNWIND_HINT_FUNC                         
1485         /* Prevent any naive code from trying    
1486         xorl    %ebp, %ebp                       
1487                                                  
1488         movq    PER_CPU_VAR(pcpu_hot + X86_to    
1489         leaq    -PTREGS_SIZE(%rax), %rsp         
1490         UNWIND_HINT_REGS                         
1491                                                  
1492         call    make_task_dead                   
1493 SYM_CODE_END(rewind_stack_and_make_dead)         
1494 .popsection                                      
1495                                                  
1496 /*                                               
1497  * This sequence executes branches in order t    
1498  * from the branch history tracker in the Bra    
1499  * user influence on subsequent BTB lookups.     
1500  *                                               
1501  * It should be used on parts prior to Alder     
1502  * BHI_DIS_S hardware control instead. If a p    
1503  * virtualized on newer hardware the VMM shou    
1504  * setting BHI_DIS_S for the guests.             
1505  *                                               
1506  * CALLs/RETs are necessary to prevent Loop S    
1507  * and not clearing the branch history. The c    
1508  *                                               
1509  * call 1                                        
1510  *    call 2                                     
1511  *      call 2                                   
1512  *        call 2                                 
1513  *          call 2                               
1514  *            call 2                             
1515  *            ret                                
1516  *          ret                                  
1517  *        ret                                    
1518  *      ret                                      
1519  *    ret                                        
1520  * ret                                           
1521  *                                               
1522  * This means that the stack is non-constant     
1523  * alone.  Therefore we unconditionally set u    
1524  * ORC to unwind properly.                       
1525  *                                               
1526  * The alignment is for performance and not f    
1527  * refactored in the future if needed.           
1528  */                                              
1529 SYM_FUNC_START(clear_bhb_loop)                   
1530         push    %rbp                             
1531         mov     %rsp, %rbp                       
1532         movl    $5, %ecx                         
1533         ANNOTATE_INTRA_FUNCTION_CALL             
1534         call    1f                               
1535         jmp     5f                               
1536         .align 64, 0xcc                          
1537         ANNOTATE_INTRA_FUNCTION_CALL             
1538 1:      call    2f                               
1539         RET                                      
1540         .align 64, 0xcc                          
1541 2:      movl    $5, %eax                         
1542 3:      jmp     4f                               
1543         nop                                      
1544 4:      sub     $1, %eax                         
1545         jnz     3b                               
1546         sub     $1, %ecx                         
1547         jnz     1b                               
1548         RET                                      
1549 5:      lfence                                   
1550         pop     %rbp                             
1551         RET                                      
1552 SYM_FUNC_END(clear_bhb_loop)                     
1553 EXPORT_SYMBOL_GPL(clear_bhb_loop)                
1554 STACK_FRAME_NON_STANDARD(clear_bhb_loop)         
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php