~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/kvm/at.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Copyright (C) 2017 - Linaro Ltd
  4  * Author: Jintack Lim <jintack.lim@linaro.org>
  5  */
  6 
  7 #include <linux/kvm_host.h>
  8 
  9 #include <asm/esr.h>
 10 #include <asm/kvm_hyp.h>
 11 #include <asm/kvm_mmu.h>
 12 
 13 enum trans_regime {
 14         TR_EL10,
 15         TR_EL20,
 16         TR_EL2,
 17 };
 18 
 19 struct s1_walk_info {
 20         u64                     baddr;
 21         enum trans_regime       regime;
 22         unsigned int            max_oa_bits;
 23         unsigned int            pgshift;
 24         unsigned int            txsz;
 25         int                     sl;
 26         bool                    hpd;
 27         bool                    be;
 28         bool                    s2;
 29 };
 30 
 31 struct s1_walk_result {
 32         union {
 33                 struct {
 34                         u64     desc;
 35                         u64     pa;
 36                         s8      level;
 37                         u8      APTable;
 38                         bool    UXNTable;
 39                         bool    PXNTable;
 40                 };
 41                 struct {
 42                         u8      fst;
 43                         bool    ptw;
 44                         bool    s2;
 45                 };
 46         };
 47         bool    failed;
 48 };
 49 
 50 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2)
 51 {
 52         wr->fst         = fst;
 53         wr->ptw         = ptw;
 54         wr->s2          = s2;
 55         wr->failed      = true;
 56 }
 57 
 58 #define S1_MMU_DISABLED         (-127)
 59 
 60 static int get_ia_size(struct s1_walk_info *wi)
 61 {
 62         return 64 - wi->txsz;
 63 }
 64 
 65 /* Return true if the IPA is out of the OA range */
 66 static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
 67 {
 68         return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
 69 }
 70 
 71 /* Return the translation regime that applies to an AT instruction */
 72 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
 73 {
 74         /*
 75          * We only get here from guest EL2, so the translation
 76          * regime AT applies to is solely defined by {E2H,TGE}.
 77          */
 78         switch (op) {
 79         case OP_AT_S1E2R:
 80         case OP_AT_S1E2W:
 81         case OP_AT_S1E2A:
 82                 return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
 83                 break;
 84         default:
 85                 return (vcpu_el2_e2h_is_set(vcpu) &&
 86                         vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
 87         }
 88 }
 89 
 90 static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
 91                          struct s1_walk_result *wr, u64 va)
 92 {
 93         u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
 94         unsigned int stride, x;
 95         bool va55, tbi, lva, as_el0;
 96 
 97         hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
 98 
 99         wi->regime = compute_translation_regime(vcpu, op);
100         as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
101 
102         va55 = va & BIT(55);
103 
104         if (wi->regime == TR_EL2 && va55)
105                 goto addrsz;
106 
107         wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
108 
109         switch (wi->regime) {
110         case TR_EL10:
111                 sctlr   = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
112                 tcr     = vcpu_read_sys_reg(vcpu, TCR_EL1);
113                 ttbr    = (va55 ?
114                            vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
115                            vcpu_read_sys_reg(vcpu, TTBR0_EL1));
116                 break;
117         case TR_EL2:
118         case TR_EL20:
119                 sctlr   = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
120                 tcr     = vcpu_read_sys_reg(vcpu, TCR_EL2);
121                 ttbr    = (va55 ?
122                            vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
123                            vcpu_read_sys_reg(vcpu, TTBR0_EL2));
124                 break;
125         default:
126                 BUG();
127         }
128 
129         tbi = (wi->regime == TR_EL2 ?
130                FIELD_GET(TCR_EL2_TBI, tcr) :
131                (va55 ?
132                 FIELD_GET(TCR_TBI1, tcr) :
133                 FIELD_GET(TCR_TBI0, tcr)));
134 
135         if (!tbi && (u64)sign_extend64(va, 55) != va)
136                 goto addrsz;
137 
138         va = (u64)sign_extend64(va, 55);
139 
140         /* Let's put the MMU disabled case aside immediately */
141         switch (wi->regime) {
142         case TR_EL10:
143                 /*
144                  * If dealing with the EL1&0 translation regime, 3 things
145                  * can disable the S1 translation:
146                  *
147                  * - HCR_EL2.DC = 1
148                  * - HCR_EL2.{E2H,TGE} = {0,1}
149                  * - SCTLR_EL1.M = 0
150                  *
151                  * The TGE part is interesting. If we have decided that this
152                  * is EL1&0, then it means that either {E2H,TGE} == {1,0} or
153                  * {0,x}, and we only need to test for TGE == 1.
154                  */
155                 if (hcr & (HCR_DC | HCR_TGE)) {
156                         wr->level = S1_MMU_DISABLED;
157                         break;
158                 }
159                 fallthrough;
160         case TR_EL2:
161         case TR_EL20:
162                 if (!(sctlr & SCTLR_ELx_M))
163                         wr->level = S1_MMU_DISABLED;
164                 break;
165         }
166 
167         if (wr->level == S1_MMU_DISABLED) {
168                 if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
169                         goto addrsz;
170 
171                 wr->pa = va;
172                 return 0;
173         }
174 
175         wi->be = sctlr & SCTLR_ELx_EE;
176 
177         wi->hpd  = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
178         wi->hpd &= (wi->regime == TR_EL2 ?
179                     FIELD_GET(TCR_EL2_HPD, tcr) :
180                     (va55 ?
181                      FIELD_GET(TCR_HPD1, tcr) :
182                      FIELD_GET(TCR_HPD0, tcr)));
183 
184         /* Someone was silly enough to encode TG0/TG1 differently */
185         if (va55) {
186                 wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
187                 tg = FIELD_GET(TCR_TG1_MASK, tcr);
188 
189                 switch (tg << TCR_TG1_SHIFT) {
190                 case TCR_TG1_4K:
191                         wi->pgshift = 12;        break;
192                 case TCR_TG1_16K:
193                         wi->pgshift = 14;        break;
194                 case TCR_TG1_64K:
195                 default:            /* IMPDEF: treat any other value as 64k */
196                         wi->pgshift = 16;        break;
197                 }
198         } else {
199                 wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
200                 tg = FIELD_GET(TCR_TG0_MASK, tcr);
201 
202                 switch (tg << TCR_TG0_SHIFT) {
203                 case TCR_TG0_4K:
204                         wi->pgshift = 12;        break;
205                 case TCR_TG0_16K:
206                         wi->pgshift = 14;        break;
207                 case TCR_TG0_64K:
208                 default:            /* IMPDEF: treat any other value as 64k */
209                         wi->pgshift = 16;        break;
210                 }
211         }
212 
213         /* R_PLCGL, R_YXNYW */
214         if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
215                 if (wi->txsz > 39)
216                         goto transfault_l0;
217         } else {
218                 if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
219                         goto transfault_l0;
220         }
221 
222         /* R_GTJBY, R_SXWGM */
223         switch (BIT(wi->pgshift)) {
224         case SZ_4K:
225                 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
226                 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
227                 break;
228         case SZ_16K:
229                 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
230                 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
231                 break;
232         case SZ_64K:
233                 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
234                 break;
235         }
236 
237         if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
238                 goto transfault_l0;
239 
240         ia_bits = get_ia_size(wi);
241 
242         /* R_YYVYV, I_THCZK */
243         if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
244             (va55 && va < GENMASK(63, ia_bits)))
245                 goto transfault_l0;
246 
247         /* I_ZFSYQ */
248         if (wi->regime != TR_EL2 &&
249             (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
250                 goto transfault_l0;
251 
252         /* R_BNDVG and following statements */
253         if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
254             as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
255                 goto transfault_l0;
256 
257         /* AArch64.S1StartLevel() */
258         stride = wi->pgshift - 3;
259         wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
260 
261         ps = (wi->regime == TR_EL2 ?
262               FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
263 
264         wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
265 
266         /* Compute minimal alignment */
267         x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
268 
269         wi->baddr = ttbr & TTBRx_EL1_BADDR;
270 
271         /* R_VPBBF */
272         if (check_output_size(wi->baddr, wi))
273                 goto addrsz;
274 
275         wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
276 
277         return 0;
278 
279 addrsz:                         /* Address Size Fault level 0 */
280         fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false);
281         return -EFAULT;
282 
283 transfault_l0:                  /* Translation Fault level 0 */
284         fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false);
285         return -EFAULT;
286 }
287 
288 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
289                    struct s1_walk_result *wr, u64 va)
290 {
291         u64 va_top, va_bottom, baddr, desc;
292         int level, stride, ret;
293 
294         level = wi->sl;
295         stride = wi->pgshift - 3;
296         baddr = wi->baddr;
297 
298         va_top = get_ia_size(wi) - 1;
299 
300         while (1) {
301                 u64 index, ipa;
302 
303                 va_bottom = (3 - level) * stride + wi->pgshift;
304                 index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
305 
306                 ipa = baddr | index;
307 
308                 if (wi->s2) {
309                         struct kvm_s2_trans s2_trans = {};
310 
311                         ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
312                         if (ret) {
313                                 fail_s1_walk(wr,
314                                              (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
315                                              true, true);
316                                 return ret;
317                         }
318 
319                         if (!kvm_s2_trans_readable(&s2_trans)) {
320                                 fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
321                                              true, true);
322 
323                                 return -EPERM;
324                         }
325 
326                         ipa = kvm_s2_trans_output(&s2_trans);
327                 }
328 
329                 ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
330                 if (ret) {
331                         fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level),
332                                      true, false);
333                         return ret;
334                 }
335 
336                 if (wi->be)
337                         desc = be64_to_cpu((__force __be64)desc);
338                 else
339                         desc = le64_to_cpu((__force __le64)desc);
340 
341                 /* Invalid descriptor */
342                 if (!(desc & BIT(0)))
343                         goto transfault;
344 
345                 /* Block mapping, check validity down the line */
346                 if (!(desc & BIT(1)))
347                         break;
348 
349                 /* Page mapping */
350                 if (level == 3)
351                         break;
352 
353                 /* Table handling */
354                 if (!wi->hpd) {
355                         wr->APTable  |= FIELD_GET(S1_TABLE_AP, desc);
356                         wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
357                         wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
358                 }
359 
360                 baddr = desc & GENMASK_ULL(47, wi->pgshift);
361 
362                 /* Check for out-of-range OA */
363                 if (check_output_size(baddr, wi))
364                         goto addrsz;
365 
366                 /* Prepare for next round */
367                 va_top = va_bottom - 1;
368                 level++;
369         }
370 
371         /* Block mapping, check the validity of the level */
372         if (!(desc & BIT(1))) {
373                 bool valid_block = false;
374 
375                 switch (BIT(wi->pgshift)) {
376                 case SZ_4K:
377                         valid_block = level == 1 || level == 2;
378                         break;
379                 case SZ_16K:
380                 case SZ_64K:
381                         valid_block = level == 2;
382                         break;
383                 }
384 
385                 if (!valid_block)
386                         goto transfault;
387         }
388 
389         if (check_output_size(desc & GENMASK(47, va_bottom), wi))
390                 goto addrsz;
391 
392         va_bottom += contiguous_bit_shift(desc, wi, level);
393 
394         wr->failed = false;
395         wr->level = level;
396         wr->desc = desc;
397         wr->pa = desc & GENMASK(47, va_bottom);
398         wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
399 
400         return 0;
401 
402 addrsz:
403         fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false);
404         return -EINVAL;
405 transfault:
406         fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false);
407         return -ENOENT;
408 }
409 
410 struct mmu_config {
411         u64     ttbr0;
412         u64     ttbr1;
413         u64     tcr;
414         u64     mair;
415         u64     sctlr;
416         u64     vttbr;
417         u64     vtcr;
418         u64     hcr;
419 };
420 
421 static void __mmu_config_save(struct mmu_config *config)
422 {
423         config->ttbr0   = read_sysreg_el1(SYS_TTBR0);
424         config->ttbr1   = read_sysreg_el1(SYS_TTBR1);
425         config->tcr     = read_sysreg_el1(SYS_TCR);
426         config->mair    = read_sysreg_el1(SYS_MAIR);
427         config->sctlr   = read_sysreg_el1(SYS_SCTLR);
428         config->vttbr   = read_sysreg(vttbr_el2);
429         config->vtcr    = read_sysreg(vtcr_el2);
430         config->hcr     = read_sysreg(hcr_el2);
431 }
432 
433 static void __mmu_config_restore(struct mmu_config *config)
434 {
435         write_sysreg(config->hcr,       hcr_el2);
436 
437         /*
438          * ARM errata 1165522 and 1530923 require TGE to be 1 before
439          * we update the guest state.
440          */
441         asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
442 
443         write_sysreg_el1(config->ttbr0, SYS_TTBR0);
444         write_sysreg_el1(config->ttbr1, SYS_TTBR1);
445         write_sysreg_el1(config->tcr,   SYS_TCR);
446         write_sysreg_el1(config->mair,  SYS_MAIR);
447         write_sysreg_el1(config->sctlr, SYS_SCTLR);
448         write_sysreg(config->vttbr,     vttbr_el2);
449         write_sysreg(config->vtcr,      vtcr_el2);
450 }
451 
452 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
453 {
454         u64 host_pan;
455         bool fail;
456 
457         host_pan = read_sysreg_s(SYS_PSTATE_PAN);
458         write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
459 
460         switch (op) {
461         case OP_AT_S1E1RP:
462                 fail = __kvm_at(OP_AT_S1E1RP, vaddr);
463                 break;
464         case OP_AT_S1E1WP:
465                 fail = __kvm_at(OP_AT_S1E1WP, vaddr);
466                 break;
467         }
468 
469         write_sysreg_s(host_pan, SYS_PSTATE_PAN);
470 
471         return fail;
472 }
473 
474 #define MEMATTR(ic, oc)         (MEMATTR_##oc << 4 | MEMATTR_##ic)
475 #define MEMATTR_NC              0b0100
476 #define MEMATTR_Wt              0b1000
477 #define MEMATTR_Wb              0b1100
478 #define MEMATTR_WbRaWa          0b1111
479 
480 #define MEMATTR_IS_DEVICE(m)    (((m) & GENMASK(7, 4)) == 0)
481 
482 static u8 s2_memattr_to_attr(u8 memattr)
483 {
484         memattr &= 0b1111;
485 
486         switch (memattr) {
487         case 0b0000:
488         case 0b0001:
489         case 0b0010:
490         case 0b0011:
491                 return memattr << 2;
492         case 0b0100:
493                 return MEMATTR(Wb, Wb);
494         case 0b0101:
495                 return MEMATTR(NC, NC);
496         case 0b0110:
497                 return MEMATTR(Wt, NC);
498         case 0b0111:
499                 return MEMATTR(Wb, NC);
500         case 0b1000:
501                 /* Reserved, assume NC */
502                 return MEMATTR(NC, NC);
503         case 0b1001:
504                 return MEMATTR(NC, Wt);
505         case 0b1010:
506                 return MEMATTR(Wt, Wt);
507         case 0b1011:
508                 return MEMATTR(Wb, Wt);
509         case 0b1100:
510                 /* Reserved, assume NC */
511                 return MEMATTR(NC, NC);
512         case 0b1101:
513                 return MEMATTR(NC, Wb);
514         case 0b1110:
515                 return MEMATTR(Wt, Wb);
516         case 0b1111:
517                 return MEMATTR(Wb, Wb);
518         default:
519                 unreachable();
520         }
521 }
522 
523 static u8 combine_s1_s2_attr(u8 s1, u8 s2)
524 {
525         bool transient;
526         u8 final = 0;
527 
528         /* Upgrade transient s1 to non-transient to simplify things */
529         switch (s1) {
530         case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */
531                 transient = true;
532                 s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
533                 break;
534         case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */
535                 transient = true;
536                 s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
537                 break;
538         default:
539                 transient = false;
540         }
541 
542         /* S2CombineS1AttrHints() */
543         if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
544             (s2 & GENMASK(3, 2)) == MEMATTR_NC)
545                 final = MEMATTR_NC;
546         else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
547                  (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
548                 final = MEMATTR_Wt;
549         else
550                 final = MEMATTR_Wb;
551 
552         if (final != MEMATTR_NC) {
553                 /* Inherit RaWa hints form S1 */
554                 if (transient) {
555                         switch (s1 & GENMASK(3, 2)) {
556                         case MEMATTR_Wt:
557                                 final = 0;
558                                 break;
559                         case MEMATTR_Wb:
560                                 final = MEMATTR_NC;
561                                 break;
562                         }
563                 }
564 
565                 final |= s1 & GENMASK(1, 0);
566         }
567 
568         return final;
569 }
570 
571 #define ATTR_NSH        0b00
572 #define ATTR_RSV        0b01
573 #define ATTR_OSH        0b10
574 #define ATTR_ISH        0b11
575 
576 static u8 compute_sh(u8 attr, u64 desc)
577 {
578         u8 sh;
579 
580         /* Any form of device, as well as NC has SH[1:0]=0b10 */
581         if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
582                 return ATTR_OSH;
583 
584         sh = FIELD_GET(PTE_SHARED, desc);
585         if (sh == ATTR_RSV)             /* Reserved, mapped to NSH */
586                 sh = ATTR_NSH;
587 
588         return sh;
589 }
590 
591 static u8 combine_sh(u8 s1_sh, u8 s2_sh)
592 {
593         if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
594                 return ATTR_OSH;
595         if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
596                 return ATTR_ISH;
597 
598         return ATTR_NSH;
599 }
600 
601 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
602                            struct kvm_s2_trans *tr)
603 {
604         u8 s1_parattr, s2_memattr, final_attr;
605         u64 par;
606 
607         /* If S2 has failed to translate, report the damage */
608         if (tr->esr) {
609                 par = SYS_PAR_EL1_RES1;
610                 par |= SYS_PAR_EL1_F;
611                 par |= SYS_PAR_EL1_S;
612                 par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
613                 return par;
614         }
615 
616         s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
617         s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
618 
619         if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
620                 if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
621                         s2_memattr &= ~BIT(3);
622 
623                 /* Combination of R_VRJSW and R_RHWZM */
624                 switch (s2_memattr) {
625                 case 0b0101:
626                         if (MEMATTR_IS_DEVICE(s1_parattr))
627                                 final_attr = s1_parattr;
628                         else
629                                 final_attr = MEMATTR(NC, NC);
630                         break;
631                 case 0b0110:
632                 case 0b1110:
633                         final_attr = MEMATTR(WbRaWa, WbRaWa);
634                         break;
635                 case 0b0111:
636                 case 0b1111:
637                         /* Preserve S1 attribute */
638                         final_attr = s1_parattr;
639                         break;
640                 case 0b0100:
641                 case 0b1100:
642                 case 0b1101:
643                         /* Reserved, do something non-silly */
644                         final_attr = s1_parattr;
645                         break;
646                 default:
647                         /* MemAttr[2]=0, Device from S2 */
648                         final_attr = s2_memattr & GENMASK(1,0) << 2;
649                 }
650         } else {
651                 /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
652                 u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
653 
654                 if (MEMATTR_IS_DEVICE(s1_parattr) ||
655                     MEMATTR_IS_DEVICE(s2_parattr)) {
656                         final_attr = min(s1_parattr, s2_parattr);
657                 } else {
658                         /* At this stage, this is memory vs memory */
659                         final_attr  = combine_s1_s2_attr(s1_parattr & 0xf,
660                                                          s2_parattr & 0xf);
661                         final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
662                                                          s2_parattr >> 4) << 4;
663                 }
664         }
665 
666         if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
667             !MEMATTR_IS_DEVICE(final_attr))
668                 final_attr = MEMATTR(NC, NC);
669 
670         par  = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
671         par |= tr->output & GENMASK(47, 12);
672         par |= FIELD_PREP(SYS_PAR_EL1_SH,
673                           combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
674                                      compute_sh(final_attr, tr->desc)));
675 
676         return par;
677 }
678 
679 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
680                           enum trans_regime regime)
681 {
682         u64 par;
683 
684         if (wr->failed) {
685                 par = SYS_PAR_EL1_RES1;
686                 par |= SYS_PAR_EL1_F;
687                 par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
688                 par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
689                 par |= wr->s2 ? SYS_PAR_EL1_S : 0;
690         } else if (wr->level == S1_MMU_DISABLED) {
691                 /* MMU off or HCR_EL2.DC == 1 */
692                 par  = SYS_PAR_EL1_NSE;
693                 par |= wr->pa & GENMASK_ULL(47, 12);
694 
695                 if (regime == TR_EL10 &&
696                     (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
697                         par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
698                                           MEMATTR(WbRaWa, WbRaWa));
699                         par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
700                 } else {
701                         par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
702                         par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
703                 }
704         } else {
705                 u64 mair, sctlr;
706                 u8 sh;
707 
708                 par  = SYS_PAR_EL1_NSE;
709 
710                 mair = (regime == TR_EL10 ?
711                         vcpu_read_sys_reg(vcpu, MAIR_EL1) :
712                         vcpu_read_sys_reg(vcpu, MAIR_EL2));
713 
714                 mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
715                 mair &= 0xff;
716 
717                 sctlr = (regime == TR_EL10 ?
718                          vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
719                          vcpu_read_sys_reg(vcpu, SCTLR_EL2));
720 
721                 /* Force NC for memory if SCTLR_ELx.C is clear */
722                 if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
723                         mair = MEMATTR(NC, NC);
724 
725                 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
726                 par |= wr->pa & GENMASK_ULL(47, 12);
727 
728                 sh = compute_sh(mair, wr->desc);
729                 par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
730         }
731 
732         return par;
733 }
734 
735 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
736 {
737         u64 sctlr;
738 
739         if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
740                 return false;
741 
742         if (regime == TR_EL10)
743                 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
744         else
745                 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
746 
747         return sctlr & SCTLR_EL1_EPAN;
748 }
749 
750 static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
751 {
752         bool perm_fail, ur, uw, ux, pr, pw, px;
753         struct s1_walk_result wr = {};
754         struct s1_walk_info wi = {};
755         int ret, idx;
756 
757         ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr);
758         if (ret)
759                 goto compute_par;
760 
761         if (wr.level == S1_MMU_DISABLED)
762                 goto compute_par;
763 
764         idx = srcu_read_lock(&vcpu->kvm->srcu);
765 
766         ret = walk_s1(vcpu, &wi, &wr, vaddr);
767 
768         srcu_read_unlock(&vcpu->kvm->srcu, idx);
769 
770         if (ret)
771                 goto compute_par;
772 
773         /* FIXME: revisit when adding indirect permission support */
774         /* AArch64.S1DirectBasePermissions() */
775         if (wi.regime != TR_EL2) {
776                 switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr.desc)) {
777                 case 0b00:
778                         pr = pw = true;
779                         ur = uw = false;
780                         break;
781                 case 0b01:
782                         pr = pw = ur = uw = true;
783                         break;
784                 case 0b10:
785                         pr = true;
786                         pw = ur = uw = false;
787                         break;
788                 case 0b11:
789                         pr = ur = true;
790                         pw = uw = false;
791                         break;
792                 }
793 
794                 switch (wr.APTable) {
795                 case 0b00:
796                         break;
797                 case 0b01:
798                         ur = uw = false;
799                         break;
800                 case 0b10:
801                         pw = uw = false;
802                         break;
803                 case 0b11:
804                         pw = ur = uw = false;
805                         break;
806                 }
807 
808                 /* We don't use px for anything yet, but hey... */
809                 px = !((wr.desc & PTE_PXN) || wr.PXNTable || uw);
810                 ux = !((wr.desc & PTE_UXN) || wr.UXNTable);
811 
812                 if (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) {
813                         bool pan;
814 
815                         pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT;
816                         pan &= ur || uw || (pan3_enabled(vcpu, wi.regime) && ux);
817                         pw &= !pan;
818                         pr &= !pan;
819                 }
820         } else {
821                 ur = uw = ux = false;
822 
823                 if (!(wr.desc & PTE_RDONLY)) {
824                         pr = pw = true;
825                 } else {
826                         pr = true;
827                         pw = false;
828                 }
829 
830                 if (wr.APTable & BIT(1))
831                         pw = false;
832 
833                 /* XN maps to UXN */
834                 px = !((wr.desc & PTE_UXN) || wr.UXNTable);
835         }
836 
837         perm_fail = false;
838 
839         switch (op) {
840         case OP_AT_S1E1RP:
841         case OP_AT_S1E1R:
842         case OP_AT_S1E2R:
843                 perm_fail = !pr;
844                 break;
845         case OP_AT_S1E1WP:
846         case OP_AT_S1E1W:
847         case OP_AT_S1E2W:
848                 perm_fail = !pw;
849                 break;
850         case OP_AT_S1E0R:
851                 perm_fail = !ur;
852                 break;
853         case OP_AT_S1E0W:
854                 perm_fail = !uw;
855                 break;
856         case OP_AT_S1E1A:
857         case OP_AT_S1E2A:
858                 break;
859         default:
860                 BUG();
861         }
862 
863         if (perm_fail)
864                 fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false);
865 
866 compute_par:
867         return compute_par_s1(vcpu, &wr, wi.regime);
868 }
869 
870 /*
871  * Return the PAR_EL1 value as the result of a valid translation.
872  *
873  * If the translation is unsuccessful, the value may only contain
874  * PAR_EL1.F, and cannot be taken at face value. It isn't an
875  * indication of the translation having failed, only that the fast
876  * path did not succeed, *unless* it indicates a S1 permission fault.
877  */
878 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
879 {
880         struct mmu_config config;
881         struct kvm_s2_mmu *mmu;
882         bool fail;
883         u64 par;
884 
885         par = SYS_PAR_EL1_F;
886 
887         /*
888          * We've trapped, so everything is live on the CPU. As we will
889          * be switching contexts behind everybody's back, disable
890          * interrupts while holding the mmu lock.
891          */
892         guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
893 
894         /*
895          * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
896          * the right one (as we trapped from vEL2). If not, save the
897          * full MMU context.
898          */
899         if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
900                 goto skip_mmu_switch;
901 
902         /*
903          * Obtaining the S2 MMU for a L2 is horribly racy, and we may not
904          * find it (recycled by another vcpu, for example). When this
905          * happens, admit defeat immediately and use the SW (slow) path.
906          */
907         mmu = lookup_s2_mmu(vcpu);
908         if (!mmu)
909                 return par;
910 
911         __mmu_config_save(&config);
912 
913         write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1),    SYS_TTBR0);
914         write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1),    SYS_TTBR1);
915         write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1),      SYS_TCR);
916         write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1),     SYS_MAIR);
917         write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1),    SYS_SCTLR);
918         __load_stage2(mmu, mmu->arch);
919 
920 skip_mmu_switch:
921         /* Clear TGE, enable S2 translation, we're rolling */
922         write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM,  hcr_el2);
923         isb();
924 
925         switch (op) {
926         case OP_AT_S1E1RP:
927         case OP_AT_S1E1WP:
928                 fail = at_s1e1p_fast(vcpu, op, vaddr);
929                 break;
930         case OP_AT_S1E1R:
931                 fail = __kvm_at(OP_AT_S1E1R, vaddr);
932                 break;
933         case OP_AT_S1E1W:
934                 fail = __kvm_at(OP_AT_S1E1W, vaddr);
935                 break;
936         case OP_AT_S1E0R:
937                 fail = __kvm_at(OP_AT_S1E0R, vaddr);
938                 break;
939         case OP_AT_S1E0W:
940                 fail = __kvm_at(OP_AT_S1E0W, vaddr);
941                 break;
942         case OP_AT_S1E1A:
943                 fail = __kvm_at(OP_AT_S1E1A, vaddr);
944                 break;
945         default:
946                 WARN_ON_ONCE(1);
947                 fail = true;
948                 break;
949         }
950 
951         if (!fail)
952                 par = read_sysreg_par();
953 
954         if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
955                 __mmu_config_restore(&config);
956 
957         return par;
958 }
959 
960 static bool par_check_s1_perm_fault(u64 par)
961 {
962         u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
963 
964         return  ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
965                  !(par & SYS_PAR_EL1_S));
966 }
967 
968 void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
969 {
970         u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
971 
972         /*
973          * If PAR_EL1 reports that AT failed on a S1 permission fault, we
974          * know for sure that the PTW was able to walk the S1 tables and
975          * there's nothing else to do.
976          *
977          * If AT failed for any other reason, then we must walk the guest S1
978          * to emulate the instruction.
979          */
980         if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
981                 par = handle_at_slow(vcpu, op, vaddr);
982 
983         vcpu_write_sys_reg(vcpu, par, PAR_EL1);
984 }
985 
986 void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
987 {
988         u64 par;
989 
990         /*
991          * We've trapped, so everything is live on the CPU. As we will be
992          * switching context behind everybody's back, disable interrupts...
993          */
994         scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
995                 struct kvm_s2_mmu *mmu;
996                 u64 val, hcr;
997                 bool fail;
998 
999                 mmu = &vcpu->kvm->arch.mmu;
1000 
1001                 val = hcr = read_sysreg(hcr_el2);
1002                 val &= ~HCR_TGE;
1003                 val |= HCR_VM;
1004 
1005                 if (!vcpu_el2_e2h_is_set(vcpu))
1006                         val |= HCR_NV | HCR_NV1;
1007 
1008                 write_sysreg(val, hcr_el2);
1009                 isb();
1010 
1011                 par = SYS_PAR_EL1_F;
1012 
1013                 switch (op) {
1014                 case OP_AT_S1E2R:
1015                         fail = __kvm_at(OP_AT_S1E1R, vaddr);
1016                         break;
1017                 case OP_AT_S1E2W:
1018                         fail = __kvm_at(OP_AT_S1E1W, vaddr);
1019                         break;
1020                 case OP_AT_S1E2A:
1021                         fail = __kvm_at(OP_AT_S1E1A, vaddr);
1022                         break;
1023                 default:
1024                         WARN_ON_ONCE(1);
1025                         fail = true;
1026                 }
1027 
1028                 isb();
1029 
1030                 if (!fail)
1031                         par = read_sysreg_par();
1032 
1033                 write_sysreg(hcr, hcr_el2);
1034                 isb();
1035         }
1036 
1037         /* We failed the translation, let's replay it in slow motion */
1038         if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
1039                 par = handle_at_slow(vcpu, op, vaddr);
1040 
1041         vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1042 }
1043 
1044 void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1045 {
1046         struct kvm_s2_trans out = {};
1047         u64 ipa, par;
1048         bool write;
1049         int ret;
1050 
1051         /* Do the stage-1 translation */
1052         switch (op) {
1053         case OP_AT_S12E1R:
1054                 op = OP_AT_S1E1R;
1055                 write = false;
1056                 break;
1057         case OP_AT_S12E1W:
1058                 op = OP_AT_S1E1W;
1059                 write = true;
1060                 break;
1061         case OP_AT_S12E0R:
1062                 op = OP_AT_S1E0R;
1063                 write = false;
1064                 break;
1065         case OP_AT_S12E0W:
1066                 op = OP_AT_S1E0W;
1067                 write = true;
1068                 break;
1069         default:
1070                 WARN_ON_ONCE(1);
1071                 return;
1072         }
1073 
1074         __kvm_at_s1e01(vcpu, op, vaddr);
1075         par = vcpu_read_sys_reg(vcpu, PAR_EL1);
1076         if (par & SYS_PAR_EL1_F)
1077                 return;
1078 
1079         /*
1080          * If we only have a single stage of translation (E2H=0 or
1081          * TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
1082          */
1083         if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) ||
1084             !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
1085                 return;
1086 
1087         /* Do the stage-2 translation */
1088         ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
1089         out.esr = 0;
1090         ret = kvm_walk_nested_s2(vcpu, ipa, &out);
1091         if (ret < 0)
1092                 return;
1093 
1094         /* Check the access permission */
1095         if (!out.esr &&
1096             ((!write && !out.readable) || (write && !out.writable)))
1097                 out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
1098 
1099         par = compute_par_s12(vcpu, par, &out);
1100         vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1101 }
1102 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php