[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH for-4.12 6/8] xen/arm: Implement workaround for Cortex-A76 erratum 1165522
On Wed, 28 Nov 2018, Julien Grall wrote: > Early version of Cortex-A76 can end-up with corrupt TLBs if they > speculate an AT instruction while the S1/S2 system registers are in an > inconsistent state. > > This can happen during guest context switch and when invalidating the > TLBs for other than the current VMID. > > The workaround implemented in Xen will: > - Use an empty stage-2 with a reserved VMID while context switching > between 2 guests > - Use an empty stage-2 with the VMID where TLBs need to be flushed > > Signed-off-by: Julien Grall <julien.grall@xxxxxxx> Thank you for doing this and for setting up a testing environment. I have a couple of questions on a couple of changes below. > --- > docs/misc/arm/silicon-errata.txt | 1 + > xen/arch/arm/cpuerrata.c | 6 ++++ > xen/arch/arm/domain.c | 8 +++-- > xen/arch/arm/p2m.c | 78 > ++++++++++++++++++++++++++++++++++++++-- > xen/include/asm-arm/cpufeature.h | 3 +- > xen/include/asm-arm/processor.h | 2 ++ > 6 files changed, 93 insertions(+), 5 deletions(-) > > diff --git a/docs/misc/arm/silicon-errata.txt > b/docs/misc/arm/silicon-errata.txt > index 906bf5fd48..6cd1366f15 100644 > --- a/docs/misc/arm/silicon-errata.txt > +++ b/docs/misc/arm/silicon-errata.txt > @@ -48,4 +48,5 @@ stable hypervisors. > | ARM | Cortex-A57 | #852523 | N/A > | > | ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 > | > | ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 > | > +| ARM | Cortex-A76 | #1165522 | N/A > | > | ARM | MMU-500 | #842869 | N/A > | > diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c > index adf88e7bdc..61c64b9816 100644 > --- a/xen/arch/arm/cpuerrata.c > +++ b/xen/arch/arm/cpuerrata.c > @@ -489,6 +489,12 @@ static const struct arm_cpu_capabilities arm_errata[] = { > .matches = has_ssbd_mitigation, > }, > #endif > + { > + /* Cortex-A76 r0p0 - r2p0 */ > + .desc = "ARM erratum 116522", > + .capability = ARM64_WORKAROUND_AT_SPECULATE, > + MIDR_RANGE(MIDR_CORTEX_A76, 0, 2 << MIDR_VARIANT_SHIFT), > + }, > {}, > }; > > diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c > index 1d926dcb29..3180edd89d 100644 > --- a/xen/arch/arm/domain.c > +++ b/xen/arch/arm/domain.c > @@ -181,8 +181,6 @@ static void ctxt_switch_to(struct vcpu *n) > if ( is_idle_vcpu(n) ) > return; > > - p2m_restore_state(n); > - > vpidr = READ_SYSREG32(MIDR_EL1); > WRITE_SYSREG32(vpidr, VPIDR_EL2); > WRITE_SYSREG(n->arch.vmpidr, VMPIDR_EL2); > @@ -235,6 +233,12 @@ static void ctxt_switch_to(struct vcpu *n) > #endif > isb(); > > + /* > + * ARM64_WORKAROUND_AT_SPECULATE: The P2M should be restored after > + * the stage-1 MMU sysregs have been restored. > + */ > + p2m_restore_state(n); > + > /* Control Registers */ > WRITE_SYSREG(n->arch.cpacr, CPACR_EL1); > > diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c > index 844833c4c3..0facb66096 100644 > --- a/xen/arch/arm/p2m.c > +++ b/xen/arch/arm/p2m.c > @@ -15,6 +15,7 @@ > #include <asm/event.h> > #include <asm/hardirq.h> > #include <asm/page.h> > +#include <asm/alternative.h> > > #define MAX_VMID_8_BIT (1UL << 8) > #define MAX_VMID_16_BIT (1UL << 16) > @@ -47,6 +48,8 @@ static const paddr_t level_masks[] = > static const uint8_t level_orders[] = > { ZEROETH_ORDER, FIRST_ORDER, SECOND_ORDER, THIRD_ORDER }; > > +static mfn_t __read_mostly empty_root_mfn; > + > static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn) > { > return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48)); > @@ -98,9 +101,25 @@ void dump_p2m_lookup(struct domain *d, paddr_t addr) > P2M_ROOT_LEVEL, P2M_ROOT_PAGES); > } > > +/* > + * p2m_save_state and p2m_restore_state works in pair to workaround ^ work > + * ARM64_WORKAROUND_AT_SPECULATE. p2m_save_state will set-up VTTBR to > + * point to the empty page-tables to stop allocating TLB entries. > + */ > void p2m_save_state(struct vcpu *p) > { > p->arch.sctlr = READ_SYSREG(SCTLR_EL1); > + > + if ( cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) ) > + { > + WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), > VTTBR_EL2); > + /* > + * Ensure VTTBR_EL2 is correctly synchronized so we can restore > + * the next vCPU context without worrying about AT instruction > + * speculation. > + */ > + isb(); > + } > } OK > void p2m_restore_state(struct vcpu *n) > @@ -111,10 +130,17 @@ void p2m_restore_state(struct vcpu *n) > if ( is_idle_vcpu(n) ) > return; > > - WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); > WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1); > WRITE_SYSREG(n->arch.hcr_el2, HCR_EL2); > > + /* > + * ARM64_WORKAROUND_AT_SPECULATE: VTTBR_EL2 should be restored after all > + * registers associated to EL1/EL0 translations regime have been > + * synchronized. > + */ > + asm volatile(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE)); Obviously you have done a lot more thinking about this than me, but I don't fully understand the need for this barrier: this is not about ARM64_WORKAROUND_AT_SPECULATE per se, right? Shouldn't the CPU be able to figure out the right execution speculation path given that the instructions ordering is correct? I guess it depends on the nature of the hardware bug. > + WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); > + > last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()]; > > /* > @@ -157,8 +183,23 @@ static void p2m_force_tlb_flush_sync(struct p2m_domain > *p2m) > ovttbr = READ_SYSREG64(VTTBR_EL2); > if ( ovttbr != p2m->vttbr ) > { > + uint64_t vttbr; > + > local_irq_save(flags); > - WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); > + > + /* > + * ARM64_WORKAROUND_AT_SPECULATE: We need to stop AT to allocate > + * TLBs entries because the context is partially modified. We > + * only need the VMID for flushing the TLBs, so we can generate > + * a new VTTBR with the VMID to flush and the empty root table. > + */ > + if ( !cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) ) > + vttbr = p2m->vttbr; > + else > + vttbr = generate_vttbr(p2m->vmid, empty_root_mfn); > + > + WRITE_SYSREG64(vttbr, VTTBR_EL2); Good idea, any reasons not to use generate_vttbr(p2m->vmid, empty_root_mfn) in the general case? There should be no downsides, right? > /* Ensure VTTBR_EL2 is synchronized before flushing the TLBs */ > isb(); > } > @@ -1504,6 +1545,23 @@ static uint32_t __read_mostly vtcr; > static void setup_virt_paging_one(void *data) > { > WRITE_SYSREG32(vtcr, VTCR_EL2); > + > + /* > + * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs free from > + * entries related to EL1/EL0 translation regime until a guest vCPU > + * is running. For that, we need to set-up VTTBR to point to an empty > + * page-table and turn on stage-2 translation. I don't understand why this is needed: isn't the lack of HCR_VM (due to your previous patch) supposed to be sufficient? How can there be speculation without HCR_VM? Even if speculation happens without HCR_EL2, why do we need to set it now? Isn't setting empty_root_mfn enough? > The TLB entries > + * associated with EL1/EL0 translation regime will also be flushed in > case > + * an AT instruction was speculated before hand. > + */ > + if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) ) > + { > + WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), > VTTBR_EL2); > + WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_VM, HCR_EL2); > + isb(); > + > + flush_tlb_all_local(); > + } > } > > void __init setup_virt_paging(void) > @@ -1587,6 +1645,22 @@ void __init setup_virt_paging(void) > /* It is not allowed to concatenate a level zero root */ > BUG_ON( P2M_ROOT_LEVEL == 0 && P2M_ROOT_ORDER > 0 ); > vtcr = val; > + > + /* > + * ARM64_WORKAROUND_AT_SPECULATE requires to allocate root table > + * with all entries zeroed. > + */ > + if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) ) > + { > + struct page_info *root; > + > + root = p2m_allocate_root(); > + if ( !root ) > + panic("Unable to allocate root table for > ARM64_WORKAROUND_AT_SPECULATE\n"); > + > + empty_root_mfn = page_to_mfn(root); > + } OK > setup_virt_paging_one(NULL); > smp_call_function(setup_virt_paging_one, NULL, 1); > } > diff --git a/xen/include/asm-arm/cpufeature.h > b/xen/include/asm-arm/cpufeature.h > index 17de928467..c2c8f3417c 100644 > --- a/xen/include/asm-arm/cpufeature.h > +++ b/xen/include/asm-arm/cpufeature.h > @@ -45,8 +45,9 @@ > #define ARM_HARDEN_BRANCH_PREDICTOR 7 > #define ARM_SSBD 8 > #define ARM_SMCCC_1_1 9 > +#define ARM64_WORKAROUND_AT_SPECULATE 10 > > -#define ARM_NCAPS 10 > +#define ARM_NCAPS 11 > > #ifndef __ASSEMBLY__ > > diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h > index 72ddc42778..d03ec6e272 100644 > --- a/xen/include/asm-arm/processor.h > +++ b/xen/include/asm-arm/processor.h > @@ -52,6 +52,7 @@ > #define ARM_CPU_PART_CORTEX_A72 0xD08 > #define ARM_CPU_PART_CORTEX_A73 0xD09 > #define ARM_CPU_PART_CORTEX_A75 0xD0A > +#define ARM_CPU_PART_CORTEX_A76 0xD0B > > #define MIDR_CORTEX_A12 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, > ARM_CPU_PART_CORTEX_A12) > #define MIDR_CORTEX_A17 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, > ARM_CPU_PART_CORTEX_A17) > @@ -61,6 +62,7 @@ > #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, > ARM_CPU_PART_CORTEX_A72) > #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, > ARM_CPU_PART_CORTEX_A73) > #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, > ARM_CPU_PART_CORTEX_A75) > +#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, > ARM_CPU_PART_CORTEX_A76) > > /* MPIDR Multiprocessor Affinity Register */ > #define _MPIDR_UP (30) > -- > 2.11.0 > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |