~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/kvm/mmu.h

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0 */
  2 #ifndef __KVM_X86_MMU_H
  3 #define __KVM_X86_MMU_H
  4 
  5 #include <linux/kvm_host.h>
  6 #include "kvm_cache_regs.h"
  7 #include "cpuid.h"
  8 
  9 extern bool __read_mostly enable_mmio_caching;
 10 
 11 #define PT_WRITABLE_SHIFT 1
 12 #define PT_USER_SHIFT 2
 13 
 14 #define PT_PRESENT_MASK (1ULL << 0)
 15 #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
 16 #define PT_USER_MASK (1ULL << PT_USER_SHIFT)
 17 #define PT_PWT_MASK (1ULL << 3)
 18 #define PT_PCD_MASK (1ULL << 4)
 19 #define PT_ACCESSED_SHIFT 5
 20 #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
 21 #define PT_DIRTY_SHIFT 6
 22 #define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT)
 23 #define PT_PAGE_SIZE_SHIFT 7
 24 #define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT)
 25 #define PT_PAT_MASK (1ULL << 7)
 26 #define PT_GLOBAL_MASK (1ULL << 8)
 27 #define PT64_NX_SHIFT 63
 28 #define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
 29 
 30 #define PT_PAT_SHIFT 7
 31 #define PT_DIR_PAT_SHIFT 12
 32 #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
 33 
 34 #define PT64_ROOT_5LEVEL 5
 35 #define PT64_ROOT_4LEVEL 4
 36 #define PT32_ROOT_LEVEL 2
 37 #define PT32E_ROOT_LEVEL 3
 38 
 39 #define KVM_MMU_CR4_ROLE_BITS (X86_CR4_PSE | X86_CR4_PAE | X86_CR4_LA57 | \
 40                                X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE)
 41 
 42 #define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP)
 43 #define KVM_MMU_EFER_ROLE_BITS (EFER_LME | EFER_NX)
 44 
 45 static __always_inline u64 rsvd_bits(int s, int e)
 46 {
 47         BUILD_BUG_ON(__builtin_constant_p(e) && __builtin_constant_p(s) && e < s);
 48 
 49         if (__builtin_constant_p(e))
 50                 BUILD_BUG_ON(e > 63);
 51         else
 52                 e &= 63;
 53 
 54         if (e < s)
 55                 return 0;
 56 
 57         return ((2ULL << (e - s)) - 1) << s;
 58 }
 59 
 60 static inline gfn_t kvm_mmu_max_gfn(void)
 61 {
 62         /*
 63          * Note that this uses the host MAXPHYADDR, not the guest's.
 64          * EPT/NPT cannot support GPAs that would exceed host.MAXPHYADDR;
 65          * assuming KVM is running on bare metal, guest accesses beyond
 66          * host.MAXPHYADDR will hit a #PF(RSVD) and never cause a vmexit
 67          * (either EPT Violation/Misconfig or #NPF), and so KVM will never
 68          * install a SPTE for such addresses.  If KVM is running as a VM
 69          * itself, on the other hand, it might see a MAXPHYADDR that is less
 70          * than hardware's real MAXPHYADDR.  Using the host MAXPHYADDR
 71          * disallows such SPTEs entirely and simplifies the TDP MMU.
 72          */
 73         int max_gpa_bits = likely(tdp_enabled) ? kvm_host.maxphyaddr : 52;
 74 
 75         return (1ULL << (max_gpa_bits - PAGE_SHIFT)) - 1;
 76 }
 77 
 78 u8 kvm_mmu_get_max_tdp_level(void);
 79 
 80 void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
 81 void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
 82 void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
 83 
 84 void kvm_init_mmu(struct kvm_vcpu *vcpu);
 85 void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
 86                              unsigned long cr4, u64 efer, gpa_t nested_cr3);
 87 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 88                              int huge_page_level, bool accessed_dirty,
 89                              gpa_t new_eptp);
 90 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
 91 int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
 92                                 u64 fault_address, char *insn, int insn_len);
 93 void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
 94                                         struct kvm_mmu *mmu);
 95 
 96 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 97 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 98 void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
 99 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
100 void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
101 void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
102                          int bytes);
103 
104 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
105 {
106         if (likely(vcpu->arch.mmu->root.hpa != INVALID_PAGE))
107                 return 0;
108 
109         return kvm_mmu_load(vcpu);
110 }
111 
112 static inline unsigned long kvm_get_pcid(struct kvm_vcpu *vcpu, gpa_t cr3)
113 {
114         BUILD_BUG_ON((X86_CR3_PCID_MASK & PAGE_MASK) != 0);
115 
116         return kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)
117                ? cr3 & X86_CR3_PCID_MASK
118                : 0;
119 }
120 
121 static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
122 {
123         return kvm_get_pcid(vcpu, kvm_read_cr3(vcpu));
124 }
125 
126 static inline unsigned long kvm_get_active_cr3_lam_bits(struct kvm_vcpu *vcpu)
127 {
128         if (!guest_can_use(vcpu, X86_FEATURE_LAM))
129                 return 0;
130 
131         return kvm_read_cr3(vcpu) & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
132 }
133 
134 static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
135 {
136         u64 root_hpa = vcpu->arch.mmu->root.hpa;
137 
138         if (!VALID_PAGE(root_hpa))
139                 return;
140 
141         kvm_x86_call(load_mmu_pgd)(vcpu, root_hpa,
142                                    vcpu->arch.mmu->root_role.level);
143 }
144 
145 static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
146                                                     struct kvm_mmu *mmu)
147 {
148         /*
149          * When EPT is enabled, KVM may passthrough CR0.WP to the guest, i.e.
150          * @mmu's snapshot of CR0.WP and thus all related paging metadata may
151          * be stale.  Refresh CR0.WP and the metadata on-demand when checking
152          * for permission faults.  Exempt nested MMUs, i.e. MMUs for shadowing
153          * nEPT and nNPT, as CR0.WP is ignored in both cases.  Note, KVM does
154          * need to refresh nested_mmu, a.k.a. the walker used to translate L2
155          * GVAs to GPAs, as that "MMU" needs to honor L2's CR0.WP.
156          */
157         if (!tdp_enabled || mmu == &vcpu->arch.guest_mmu)
158                 return;
159 
160         __kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
161 }
162 
163 /*
164  * Check if a given access (described through the I/D, W/R and U/S bits of a
165  * page fault error code pfec) causes a permission fault with the given PTE
166  * access rights (in ACC_* format).
167  *
168  * Return zero if the access does not fault; return the page fault error code
169  * if the access faults.
170  */
171 static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
172                                   unsigned pte_access, unsigned pte_pkey,
173                                   u64 access)
174 {
175         /* strip nested paging fault error codes */
176         unsigned int pfec = access;
177         unsigned long rflags = kvm_x86_call(get_rflags)(vcpu);
178 
179         /*
180          * For explicit supervisor accesses, SMAP is disabled if EFLAGS.AC = 1.
181          * For implicit supervisor accesses, SMAP cannot be overridden.
182          *
183          * SMAP works on supervisor accesses only, and not_smap can
184          * be set or not set when user access with neither has any bearing
185          * on the result.
186          *
187          * We put the SMAP checking bit in place of the PFERR_RSVD_MASK bit;
188          * this bit will always be zero in pfec, but it will be one in index
189          * if SMAP checks are being disabled.
190          */
191         u64 implicit_access = access & PFERR_IMPLICIT_ACCESS;
192         bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC;
193         int index = (pfec | (not_smap ? PFERR_RSVD_MASK : 0)) >> 1;
194         u32 errcode = PFERR_PRESENT_MASK;
195         bool fault;
196 
197         kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
198 
199         fault = (mmu->permissions[index] >> pte_access) & 1;
200 
201         WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
202         if (unlikely(mmu->pkru_mask)) {
203                 u32 pkru_bits, offset;
204 
205                 /*
206                 * PKRU defines 32 bits, there are 16 domains and 2
207                 * attribute bits per domain in pkru.  pte_pkey is the
208                 * index of the protection domain, so pte_pkey * 2 is
209                 * is the index of the first bit for the domain.
210                 */
211                 pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;
212 
213                 /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
214                 offset = (pfec & ~1) | ((pte_access & PT_USER_MASK) ? PFERR_RSVD_MASK : 0);
215 
216                 pkru_bits &= mmu->pkru_mask >> offset;
217                 errcode |= -pkru_bits & PFERR_PK_MASK;
218                 fault |= (pkru_bits != 0);
219         }
220 
221         return -(u32)fault & errcode;
222 }
223 
224 bool kvm_mmu_may_ignore_guest_pat(void);
225 
226 int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
227 
228 int kvm_mmu_post_init_vm(struct kvm *kvm);
229 void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
230 
231 static inline bool kvm_shadow_root_allocated(struct kvm *kvm)
232 {
233         /*
234          * Read shadow_root_allocated before related pointers. Hence, threads
235          * reading shadow_root_allocated in any lock context are guaranteed to
236          * see the pointers. Pairs with smp_store_release in
237          * mmu_first_shadow_root_alloc.
238          */
239         return smp_load_acquire(&kvm->arch.shadow_root_allocated);
240 }
241 
242 #ifdef CONFIG_X86_64
243 extern bool tdp_mmu_enabled;
244 #else
245 #define tdp_mmu_enabled false
246 #endif
247 
248 static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
249 {
250         return !tdp_mmu_enabled || kvm_shadow_root_allocated(kvm);
251 }
252 
253 static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
254 {
255         /* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */
256         return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
257                 (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
258 }
259 
260 static inline unsigned long
261 __kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, unsigned long npages,
262                       int level)
263 {
264         return gfn_to_index(slot->base_gfn + npages - 1,
265                             slot->base_gfn, level) + 1;
266 }
267 
268 static inline unsigned long
269 kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, int level)
270 {
271         return __kvm_mmu_slot_lpages(slot, slot->npages, level);
272 }
273 
274 static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count)
275 {
276         atomic64_add(count, &kvm->stat.pages[level - 1]);
277 }
278 
279 gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access,
280                            struct x86_exception *exception);
281 
282 static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu,
283                                       struct kvm_mmu *mmu,
284                                       gpa_t gpa, u64 access,
285                                       struct x86_exception *exception)
286 {
287         if (mmu != &vcpu->arch.nested_mmu)
288                 return gpa;
289         return translate_nested_gpa(vcpu, gpa, access, exception);
290 }
291 #endif
292 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php