[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH for-4.12 6/8] xen/arm: Implement workaround for Cortex-A76 erratum 1165522



Early version of Cortex-A76 can end-up with corrupt TLBs if they
speculate an AT instruction while the S1/S2 system registers are in an
inconsistent state.

This can happen during guest context switch and when invalidating the
TLBs for other than the current VMID.

The workaround implemented in Xen will:
    - Use an empty stage-2 with a reserved VMID while context switching
    between 2 guests
    - Use an empty stage-2 with the VMID where TLBs need to be flushed

Signed-off-by: Julien Grall <julien.grall@xxxxxxx>
---
 docs/misc/arm/silicon-errata.txt |  1 +
 xen/arch/arm/cpuerrata.c         |  6 ++++
 xen/arch/arm/domain.c            |  8 +++--
 xen/arch/arm/p2m.c               | 78 ++++++++++++++++++++++++++++++++++++++--
 xen/include/asm-arm/cpufeature.h |  3 +-
 xen/include/asm-arm/processor.h  |  2 ++
 6 files changed, 93 insertions(+), 5 deletions(-)

diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
index 906bf5fd48..6cd1366f15 100644
--- a/docs/misc/arm/silicon-errata.txt
+++ b/docs/misc/arm/silicon-errata.txt
@@ -48,4 +48,5 @@ stable hypervisors.
 | ARM            | Cortex-A57      | #852523         | N/A                     
|
 | ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075    
|
 | ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220    
|
+| ARM            | Cortex-A76      | #1165522        | N/A                     
|
 | ARM            | MMU-500         | #842869         | N/A                     
|
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index adf88e7bdc..61c64b9816 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -489,6 +489,12 @@ static const struct arm_cpu_capabilities arm_errata[] = {
         .matches = has_ssbd_mitigation,
     },
 #endif
+    {
+        /* Cortex-A76 r0p0 - r2p0 */
+        .desc = "ARM erratum 116522",
+        .capability = ARM64_WORKAROUND_AT_SPECULATE,
+        MIDR_RANGE(MIDR_CORTEX_A76, 0, 2 << MIDR_VARIANT_SHIFT),
+    },
     {},
 };
 
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 1d926dcb29..3180edd89d 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -181,8 +181,6 @@ static void ctxt_switch_to(struct vcpu *n)
     if ( is_idle_vcpu(n) )
         return;
 
-    p2m_restore_state(n);
-
     vpidr = READ_SYSREG32(MIDR_EL1);
     WRITE_SYSREG32(vpidr, VPIDR_EL2);
     WRITE_SYSREG(n->arch.vmpidr, VMPIDR_EL2);
@@ -235,6 +233,12 @@ static void ctxt_switch_to(struct vcpu *n)
 #endif
     isb();
 
+    /*
+     * ARM64_WORKAROUND_AT_SPECULATE: The P2M should be restored after
+     * the stage-1 MMU sysregs have been restored.
+     */
+    p2m_restore_state(n);
+
     /* Control Registers */
     WRITE_SYSREG(n->arch.cpacr, CPACR_EL1);
 
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index 844833c4c3..0facb66096 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -15,6 +15,7 @@
 #include <asm/event.h>
 #include <asm/hardirq.h>
 #include <asm/page.h>
+#include <asm/alternative.h>
 
 #define MAX_VMID_8_BIT  (1UL << 8)
 #define MAX_VMID_16_BIT (1UL << 16)
@@ -47,6 +48,8 @@ static const paddr_t level_masks[] =
 static const uint8_t level_orders[] =
     { ZEROETH_ORDER, FIRST_ORDER, SECOND_ORDER, THIRD_ORDER };
 
+static mfn_t __read_mostly empty_root_mfn;
+
 static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn)
 {
     return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48));
@@ -98,9 +101,25 @@ void dump_p2m_lookup(struct domain *d, paddr_t addr)
                  P2M_ROOT_LEVEL, P2M_ROOT_PAGES);
 }
 
+/*
+ * p2m_save_state and p2m_restore_state works in pair to workaround
+ * ARM64_WORKAROUND_AT_SPECULATE. p2m_save_state will set-up VTTBR to
+ * point to the empty page-tables to stop allocating TLB entries.
+ */
 void p2m_save_state(struct vcpu *p)
 {
     p->arch.sctlr = READ_SYSREG(SCTLR_EL1);
+
+    if ( cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) )
+    {
+        WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), 
VTTBR_EL2);
+        /*
+         * Ensure VTTBR_EL2 is correctly synchronized so we can restore
+         * the next vCPU context without worrying about AT instruction
+         * speculation.
+         */
+        isb();
+    }
 }
 
 void p2m_restore_state(struct vcpu *n)
@@ -111,10 +130,17 @@ void p2m_restore_state(struct vcpu *n)
     if ( is_idle_vcpu(n) )
         return;
 
-    WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
     WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1);
     WRITE_SYSREG(n->arch.hcr_el2, HCR_EL2);
 
+    /*
+     * ARM64_WORKAROUND_AT_SPECULATE: VTTBR_EL2 should be restored after all
+     * registers associated to EL1/EL0 translations regime have been
+     * synchronized.
+     */
+    asm volatile(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE));
+    WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
+
     last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()];
 
     /*
@@ -157,8 +183,23 @@ static void p2m_force_tlb_flush_sync(struct p2m_domain 
*p2m)
     ovttbr = READ_SYSREG64(VTTBR_EL2);
     if ( ovttbr != p2m->vttbr )
     {
+        uint64_t vttbr;
+
         local_irq_save(flags);
-        WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
+
+        /*
+         * ARM64_WORKAROUND_AT_SPECULATE: We need to stop AT to allocate
+         * TLBs entries because the context is partially modified. We
+         * only need the VMID for flushing the TLBs, so we can generate
+         * a new VTTBR with the VMID to flush and the empty root table.
+         */
+        if ( !cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) )
+            vttbr = p2m->vttbr;
+        else
+            vttbr = generate_vttbr(p2m->vmid, empty_root_mfn);
+
+        WRITE_SYSREG64(vttbr, VTTBR_EL2);
+
         /* Ensure VTTBR_EL2 is synchronized before flushing the TLBs */
         isb();
     }
@@ -1504,6 +1545,23 @@ static uint32_t __read_mostly vtcr;
 static void setup_virt_paging_one(void *data)
 {
     WRITE_SYSREG32(vtcr, VTCR_EL2);
+
+    /*
+     * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs free from
+     * entries related to EL1/EL0 translation regime until a guest vCPU
+     * is running. For that, we need to set-up VTTBR to point to an empty
+     * page-table and turn on stage-2 translation. The TLB entries
+     * associated with EL1/EL0 translation regime will also be flushed in case
+     * an AT instruction was speculated before hand.
+     */
+    if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) )
+    {
+        WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), 
VTTBR_EL2);
+        WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_VM, HCR_EL2);
+        isb();
+
+        flush_tlb_all_local();
+    }
 }
 
 void __init setup_virt_paging(void)
@@ -1587,6 +1645,22 @@ void __init setup_virt_paging(void)
     /* It is not allowed to concatenate a level zero root */
     BUG_ON( P2M_ROOT_LEVEL == 0 && P2M_ROOT_ORDER > 0 );
     vtcr = val;
+
+    /*
+     * ARM64_WORKAROUND_AT_SPECULATE requires to allocate root table
+     * with all entries zeroed.
+     */
+    if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) )
+    {
+        struct page_info *root;
+
+        root = p2m_allocate_root();
+        if ( !root )
+            panic("Unable to allocate root table for 
ARM64_WORKAROUND_AT_SPECULATE\n");
+
+        empty_root_mfn = page_to_mfn(root);
+    }
+
     setup_virt_paging_one(NULL);
     smp_call_function(setup_virt_paging_one, NULL, 1);
 }
diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
index 17de928467..c2c8f3417c 100644
--- a/xen/include/asm-arm/cpufeature.h
+++ b/xen/include/asm-arm/cpufeature.h
@@ -45,8 +45,9 @@
 #define ARM_HARDEN_BRANCH_PREDICTOR 7
 #define ARM_SSBD 8
 #define ARM_SMCCC_1_1 9
+#define ARM64_WORKAROUND_AT_SPECULATE 10
 
-#define ARM_NCAPS           10
+#define ARM_NCAPS           11
 
 #ifndef __ASSEMBLY__
 
diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index 72ddc42778..d03ec6e272 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -52,6 +52,7 @@
 #define ARM_CPU_PART_CORTEX_A72     0xD08
 #define ARM_CPU_PART_CORTEX_A73     0xD09
 #define ARM_CPU_PART_CORTEX_A75     0xD0A
+#define ARM_CPU_PART_CORTEX_A76     0xD0B
 
 #define MIDR_CORTEX_A12 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, 
ARM_CPU_PART_CORTEX_A12)
 #define MIDR_CORTEX_A17 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, 
ARM_CPU_PART_CORTEX_A17)
@@ -61,6 +62,7 @@
 #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, 
ARM_CPU_PART_CORTEX_A72)
 #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, 
ARM_CPU_PART_CORTEX_A73)
 #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, 
ARM_CPU_PART_CORTEX_A75)
+#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, 
ARM_CPU_PART_CORTEX_A76)
 
 /* MPIDR Multiprocessor Affinity Register */
 #define _MPIDR_UP           (30)
-- 
2.11.0


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.