[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH for-4.12 6/8] xen/arm: Implement workaround for Cortex-A76 erratum 1165522



On Wed, 28 Nov 2018, Julien Grall wrote:
> Early version of Cortex-A76 can end-up with corrupt TLBs if they
> speculate an AT instruction while the S1/S2 system registers are in an
> inconsistent state.
> 
> This can happen during guest context switch and when invalidating the
> TLBs for other than the current VMID.
> 
> The workaround implemented in Xen will:
>     - Use an empty stage-2 with a reserved VMID while context switching
>     between 2 guests
>     - Use an empty stage-2 with the VMID where TLBs need to be flushed
>
> Signed-off-by: Julien Grall <julien.grall@xxxxxxx>

Thank you for doing this and for setting up a testing environment. I
have a couple of questions on a couple of changes below.


> ---
>  docs/misc/arm/silicon-errata.txt |  1 +
>  xen/arch/arm/cpuerrata.c         |  6 ++++
>  xen/arch/arm/domain.c            |  8 +++--
>  xen/arch/arm/p2m.c               | 78 
> ++++++++++++++++++++++++++++++++++++++--
>  xen/include/asm-arm/cpufeature.h |  3 +-
>  xen/include/asm-arm/processor.h  |  2 ++
>  6 files changed, 93 insertions(+), 5 deletions(-)
> 
> diff --git a/docs/misc/arm/silicon-errata.txt 
> b/docs/misc/arm/silicon-errata.txt
> index 906bf5fd48..6cd1366f15 100644
> --- a/docs/misc/arm/silicon-errata.txt
> +++ b/docs/misc/arm/silicon-errata.txt
> @@ -48,4 +48,5 @@ stable hypervisors.
>  | ARM            | Cortex-A57      | #852523         | N/A                   
>   |
>  | ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075  
>   |
>  | ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220  
>   |
> +| ARM            | Cortex-A76      | #1165522        | N/A                   
>   |
>  | ARM            | MMU-500         | #842869         | N/A                   
>   |
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index adf88e7bdc..61c64b9816 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -489,6 +489,12 @@ static const struct arm_cpu_capabilities arm_errata[] = {
>          .matches = has_ssbd_mitigation,
>      },
>  #endif
> +    {
> +        /* Cortex-A76 r0p0 - r2p0 */
> +        .desc = "ARM erratum 116522",
> +        .capability = ARM64_WORKAROUND_AT_SPECULATE,
> +        MIDR_RANGE(MIDR_CORTEX_A76, 0, 2 << MIDR_VARIANT_SHIFT),
> +    },
>      {},
>  };
>  
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index 1d926dcb29..3180edd89d 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -181,8 +181,6 @@ static void ctxt_switch_to(struct vcpu *n)
>      if ( is_idle_vcpu(n) )
>          return;
>  
> -    p2m_restore_state(n);
> -
>      vpidr = READ_SYSREG32(MIDR_EL1);
>      WRITE_SYSREG32(vpidr, VPIDR_EL2);
>      WRITE_SYSREG(n->arch.vmpidr, VMPIDR_EL2);
> @@ -235,6 +233,12 @@ static void ctxt_switch_to(struct vcpu *n)
>  #endif
>      isb();
>  
> +    /*
> +     * ARM64_WORKAROUND_AT_SPECULATE: The P2M should be restored after
> +     * the stage-1 MMU sysregs have been restored.
> +     */
> +    p2m_restore_state(n);
> +
>      /* Control Registers */
>      WRITE_SYSREG(n->arch.cpacr, CPACR_EL1);
>  
> diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
> index 844833c4c3..0facb66096 100644
> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -15,6 +15,7 @@
>  #include <asm/event.h>
>  #include <asm/hardirq.h>
>  #include <asm/page.h>
> +#include <asm/alternative.h>
>  
>  #define MAX_VMID_8_BIT  (1UL << 8)
>  #define MAX_VMID_16_BIT (1UL << 16)
> @@ -47,6 +48,8 @@ static const paddr_t level_masks[] =
>  static const uint8_t level_orders[] =
>      { ZEROETH_ORDER, FIRST_ORDER, SECOND_ORDER, THIRD_ORDER };
>  
> +static mfn_t __read_mostly empty_root_mfn;
> +
>  static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn)
>  {
>      return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48));
> @@ -98,9 +101,25 @@ void dump_p2m_lookup(struct domain *d, paddr_t addr)
>                   P2M_ROOT_LEVEL, P2M_ROOT_PAGES);
>  }
>  
> +/*
> + * p2m_save_state and p2m_restore_state works in pair to workaround
                                           ^ work


> + * ARM64_WORKAROUND_AT_SPECULATE. p2m_save_state will set-up VTTBR to
> + * point to the empty page-tables to stop allocating TLB entries.
> + */
>  void p2m_save_state(struct vcpu *p)
>  {
>      p->arch.sctlr = READ_SYSREG(SCTLR_EL1);
> +
> +    if ( cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) )
> +    {
> +        WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), 
> VTTBR_EL2);
> +        /*
> +         * Ensure VTTBR_EL2 is correctly synchronized so we can restore
> +         * the next vCPU context without worrying about AT instruction
> +         * speculation.
> +         */
> +        isb();
> +    }
>  }

OK


>  void p2m_restore_state(struct vcpu *n)
> @@ -111,10 +130,17 @@ void p2m_restore_state(struct vcpu *n)
>      if ( is_idle_vcpu(n) )
>          return;
>  
> -    WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
>      WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1);
>      WRITE_SYSREG(n->arch.hcr_el2, HCR_EL2);
>  
> +    /*
> +     * ARM64_WORKAROUND_AT_SPECULATE: VTTBR_EL2 should be restored after all
> +     * registers associated to EL1/EL0 translations regime have been
> +     * synchronized.
> +     */
> +    asm volatile(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE));

Obviously you have done a lot more thinking about this than me, but
I don't fully understand the need for this barrier: this is not about
ARM64_WORKAROUND_AT_SPECULATE per se, right? Shouldn't the CPU be able
to figure out the right execution speculation path given that the
instructions ordering is correct? I guess it depends on the nature of
the hardware bug.


> +    WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
> +
>      last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()];
>  
>      /*
> @@ -157,8 +183,23 @@ static void p2m_force_tlb_flush_sync(struct p2m_domain 
> *p2m)
>      ovttbr = READ_SYSREG64(VTTBR_EL2);
>      if ( ovttbr != p2m->vttbr )
>      {
> +        uint64_t vttbr;
> +
>          local_irq_save(flags);
> -        WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
> +
> +        /*
> +         * ARM64_WORKAROUND_AT_SPECULATE: We need to stop AT to allocate
> +         * TLBs entries because the context is partially modified. We
> +         * only need the VMID for flushing the TLBs, so we can generate
> +         * a new VTTBR with the VMID to flush and the empty root table.
> +         */
> +        if ( !cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) )
> +            vttbr = p2m->vttbr;
> +        else
> +            vttbr = generate_vttbr(p2m->vmid, empty_root_mfn);
> +
> +        WRITE_SYSREG64(vttbr, VTTBR_EL2);

Good idea, any reasons not to use generate_vttbr(p2m->vmid,
empty_root_mfn) in the general case? There should be no downsides,
right?


>          /* Ensure VTTBR_EL2 is synchronized before flushing the TLBs */
>          isb();
>      }
> @@ -1504,6 +1545,23 @@ static uint32_t __read_mostly vtcr;
>  static void setup_virt_paging_one(void *data)
>  {
>      WRITE_SYSREG32(vtcr, VTCR_EL2);
> +
> +    /*
> +     * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs free from
> +     * entries related to EL1/EL0 translation regime until a guest vCPU
> +     * is running. For that, we need to set-up VTTBR to point to an empty
> +     * page-table and turn on stage-2 translation.

I don't understand why this is needed: isn't the lack of HCR_VM (due to
your previous patch) supposed to be sufficient? How can there be
speculation without HCR_VM?

Even if speculation happens without HCR_EL2, why do we need to set it
now? Isn't setting empty_root_mfn enough?


>         The TLB entries
> +     * associated with EL1/EL0 translation regime will also be flushed in 
> case
> +     * an AT instruction was speculated before hand.
> +     */
> +    if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) )
> +    {
> +        WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), 
> VTTBR_EL2);
> +        WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_VM, HCR_EL2);
> +        isb();
> +
> +        flush_tlb_all_local();
> +    }
>  }
>  
>  void __init setup_virt_paging(void)
> @@ -1587,6 +1645,22 @@ void __init setup_virt_paging(void)
>      /* It is not allowed to concatenate a level zero root */
>      BUG_ON( P2M_ROOT_LEVEL == 0 && P2M_ROOT_ORDER > 0 );
>      vtcr = val;
> +
> +    /*
> +     * ARM64_WORKAROUND_AT_SPECULATE requires to allocate root table
> +     * with all entries zeroed.
> +     */
> +    if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) )
> +    {
> +        struct page_info *root;
> +
> +        root = p2m_allocate_root();
> +        if ( !root )
> +            panic("Unable to allocate root table for 
> ARM64_WORKAROUND_AT_SPECULATE\n");
> +
> +        empty_root_mfn = page_to_mfn(root);
> +    }

OK


>      setup_virt_paging_one(NULL);
>      smp_call_function(setup_virt_paging_one, NULL, 1);
>  }
> diff --git a/xen/include/asm-arm/cpufeature.h 
> b/xen/include/asm-arm/cpufeature.h
> index 17de928467..c2c8f3417c 100644
> --- a/xen/include/asm-arm/cpufeature.h
> +++ b/xen/include/asm-arm/cpufeature.h
> @@ -45,8 +45,9 @@
>  #define ARM_HARDEN_BRANCH_PREDICTOR 7
>  #define ARM_SSBD 8
>  #define ARM_SMCCC_1_1 9
> +#define ARM64_WORKAROUND_AT_SPECULATE 10
>  
> -#define ARM_NCAPS           10
> +#define ARM_NCAPS           11
>  
>  #ifndef __ASSEMBLY__
>  
> diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
> index 72ddc42778..d03ec6e272 100644
> --- a/xen/include/asm-arm/processor.h
> +++ b/xen/include/asm-arm/processor.h
> @@ -52,6 +52,7 @@
>  #define ARM_CPU_PART_CORTEX_A72     0xD08
>  #define ARM_CPU_PART_CORTEX_A73     0xD09
>  #define ARM_CPU_PART_CORTEX_A75     0xD0A
> +#define ARM_CPU_PART_CORTEX_A76     0xD0B
>  
>  #define MIDR_CORTEX_A12 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, 
> ARM_CPU_PART_CORTEX_A12)
>  #define MIDR_CORTEX_A17 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, 
> ARM_CPU_PART_CORTEX_A17)
> @@ -61,6 +62,7 @@
>  #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, 
> ARM_CPU_PART_CORTEX_A72)
>  #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, 
> ARM_CPU_PART_CORTEX_A73)
>  #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, 
> ARM_CPU_PART_CORTEX_A75)
> +#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, 
> ARM_CPU_PART_CORTEX_A76)
>  
>  /* MPIDR Multiprocessor Affinity Register */
>  #define _MPIDR_UP           (30)
> -- 
> 2.11.0
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.