[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[XEN PATCH] xen/arm/p2m: perform IPA-based TLBI for arm64 when IPA is known



From: Haseeb Ashraf <haseeb.ashraf@xxxxxxxxxxx>

This commit addresses a major issue for running Xen on KVM arm64 i.e.
costly emulation of VMALLS12E1IS which becomes worse when this TLBI
is invoked too many times. There are mainly two places where this is
problematic:
(a) When vCPUs switch on a pCPU or pCPUs
(b) When domu mapped pages onto dom0, are to be unmapped, then each
    page being removed by XENMEM_remove_from_physmap has its TLBs
    invalidated by VMALLS12E1IS.

The first one is addressed by relaxing VMALLS12E1IS -> VMALLE1IS.
Each CPU have their own private TLBs, so flush between vCPU of the
same domains is required to avoid translations from vCPUx to "leak"
to the vCPUy. This can be achieved by using VMALLE1. If FEAT_nTLBPA
is present then VMALLE1 can also be avoided.

The second one is addressed by using IPA-based TLBI (IPAS2E1) in
combination with VMALLE1 whenever the IPA range is known instead of
using VMALLS12E1. Again, VMALLE1 can be avoided FEAT_nTLBPA is
present.

Suggested-by: Mohamed Mediouni <mohamed@xxxxxxxxxxxxxxxx>
Suggested-by: Julien Grall <julien@xxxxxxx>
Signed-off-by: Haseeb Ashraf <haseeb.ashraf@xxxxxxxxxxx>
---
 xen/arch/arm/include/asm/arm64/flushtlb.h | 34 ++++++++
 xen/arch/arm/include/asm/arm64/sysregs.h  |  1 +
 xen/arch/arm/include/asm/cpufeature.h     |  3 +-
 xen/arch/arm/include/asm/mmu/p2m.h        |  4 +
 xen/arch/arm/include/asm/processor.h      |  2 +
 xen/arch/arm/mmu/p2m.c                    | 96 ++++++++++++++++++++++-
 6 files changed, 136 insertions(+), 4 deletions(-)

diff --git a/xen/arch/arm/include/asm/arm64/flushtlb.h 
b/xen/arch/arm/include/asm/arm64/flushtlb.h
index 3b99c11b50..6703aab8a7 100644
--- a/xen/arch/arm/include/asm/arm64/flushtlb.h
+++ b/xen/arch/arm/include/asm/arm64/flushtlb.h
@@ -45,6 +45,27 @@ static inline void name(void)                    \
         : : : "memory");                         \
 }
 
+/*
+ * FLush TLB by IPA. This will likely be used in a loop, so the caller
+ * is responsible to use the appropriate memory barriers before/after
+ * the sequence.
+ *
+ * See above about the ARM64_WORKAROUND_REPEAT_TLBI sequence.
+ */
+#define TLB_HELPER_IPA(name, tlbop)              \
+static inline void name(paddr_t ipa)             \
+{                                                \
+    asm volatile(                                \
+        "tlbi "  # tlbop  ", %0;"                \
+        ALTERNATIVE(                             \
+            "nop; nop;",                         \
+            "dsb  ish;"                          \
+            "tlbi "  # tlbop  ", %0;",           \
+            ARM64_WORKAROUND_REPEAT_TLBI,        \
+            CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
+        : : "r" (ipa >> PAGE_SHIFT) : "memory"); \
+}
+
 /*
  * FLush TLB by VA. This will likely be used in a loop, so the caller
  * is responsible to use the appropriate memory barriers before/after
@@ -72,6 +93,18 @@ TLB_HELPER(flush_guest_tlb_local, vmalls12e1, nsh)
 /* Flush innershareable TLBs, current VMID only */
 TLB_HELPER(flush_guest_tlb, vmalls12e1is, ish)
 
+/* Flush local TLBs, current VMID, stage-1 only */
+TLB_HELPER(flush_guest_tlb_s1_local, vmalle1, nsh)
+
+/* Flush innershareable TLBs, current VMID, stage-1 only */
+TLB_HELPER(flush_guest_tlb_s1, vmalle1is, ish)
+
+/* Flush local TLBs, current VMID, stage-2 for ipa address */
+TLB_HELPER_IPA(flush_guest_tlb_one_s2_local, ipas2e1)
+
+/* Flush innershareable TLBs, current VMID, stage-2 for ipa address */
+TLB_HELPER_IPA(flush_guest_tlb_one_s2, ipas2e1is)
+
 /* Flush local TLBs, all VMIDs, non-hypervisor mode */
 TLB_HELPER(flush_all_guests_tlb_local, alle1, nsh)
 
@@ -88,6 +121,7 @@ TLB_HELPER_VA(__flush_xen_tlb_one_local, vae2)
 TLB_HELPER_VA(__flush_xen_tlb_one, vae2is)
 
 #undef TLB_HELPER
+#undef TLB_HELPER_IPA
 #undef TLB_HELPER_VA
 
 #endif /* __ASM_ARM_ARM64_FLUSHTLB_H__ */
diff --git a/xen/arch/arm/include/asm/arm64/sysregs.h 
b/xen/arch/arm/include/asm/arm64/sysregs.h
index 7440d495e4..30a7c3b9fd 100644
--- a/xen/arch/arm/include/asm/arm64/sysregs.h
+++ b/xen/arch/arm/include/asm/arm64/sysregs.h
@@ -286,6 +286,7 @@
 
 /* id_aa64mmfr1 */
 #define ID_AA64MMFR1_ECBHB_SHIFT     60
+#define ID_AA64MMFR1_NTLBPA_SHIFT    48
 #define ID_AA64MMFR1_AFP_SHIFT       44
 #define ID_AA64MMFR1_ETS_SHIFT       36
 #define ID_AA64MMFR1_TWED_SHIFT      32
diff --git a/xen/arch/arm/include/asm/cpufeature.h 
b/xen/arch/arm/include/asm/cpufeature.h
index b6df188011..21df7603fc 100644
--- a/xen/arch/arm/include/asm/cpufeature.h
+++ b/xen/arch/arm/include/asm/cpufeature.h
@@ -269,7 +269,8 @@ struct cpuinfo_arm {
             unsigned long ets:4;
             unsigned long __res1:4;
             unsigned long afp:4;
-            unsigned long __res2:12;
+            unsigned long ntlbpa:4;
+            unsigned long __res2:8;
             unsigned long ecbhb:4;
 
             /* MMFR2 */
diff --git a/xen/arch/arm/include/asm/mmu/p2m.h 
b/xen/arch/arm/include/asm/mmu/p2m.h
index 58496c0b09..fc2e08bbe8 100644
--- a/xen/arch/arm/include/asm/mmu/p2m.h
+++ b/xen/arch/arm/include/asm/mmu/p2m.h
@@ -10,6 +10,10 @@ extern unsigned int p2m_root_level;
 
 struct p2m_domain;
 void p2m_force_tlb_flush_sync(struct p2m_domain *p2m);
+#ifdef CONFIG_ARM_64
+void p2m_force_tlb_flush_range_sync(struct p2m_domain *p2m, uint64_t start_ipa,
+                                    uint64_t page_count);
+#endif
 void p2m_tlb_flush_sync(struct p2m_domain *p2m);
 
 void p2m_clear_root_pages(struct p2m_domain *p2m);
diff --git a/xen/arch/arm/include/asm/processor.h 
b/xen/arch/arm/include/asm/processor.h
index 92c8bc1a31..07f6a2ef6a 100644
--- a/xen/arch/arm/include/asm/processor.h
+++ b/xen/arch/arm/include/asm/processor.h
@@ -461,6 +461,8 @@
 #ifdef CONFIG_ARM_64
 #define MM64_VMID_8_BITS_SUPPORT    0x0
 #define MM64_VMID_16_BITS_SUPPORT   0x2
+#define MM64_NTLBPA_SUPPORT_NI      0x0
+#define MM64_NTLBPA_SUPPORT_IMP     0x1
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/xen/arch/arm/mmu/p2m.c b/xen/arch/arm/mmu/p2m.c
index 51abf3504f..28268fb67f 100644
--- a/xen/arch/arm/mmu/p2m.c
+++ b/xen/arch/arm/mmu/p2m.c
@@ -235,7 +235,12 @@ void p2m_restore_state(struct vcpu *n)
      * when running multiple vCPU of the same domain on a single pCPU.
      */
     if ( *last_vcpu_ran != INVALID_VCPU_ID && *last_vcpu_ran != n->vcpu_id )
+#ifdef CONFIG_ARM_64
+        if ( system_cpuinfo.mm64.ntlbpa != MM64_NTLBPA_SUPPORT_IMP )
+            flush_guest_tlb_s1_local();
+#else
         flush_guest_tlb_local();
+#endif
 
     *last_vcpu_ran = n->vcpu_id;
 }
@@ -293,6 +298,75 @@ void p2m_force_tlb_flush_sync(struct p2m_domain *p2m)
     p2m->need_flush = false;
 }
 
+#ifdef CONFIG_ARM_64
+/*
+ * Force a synchronous P2M TLB flush on a range of addresses.
+ *
+ * Must be called with the p2m lock held.
+ */
+void p2m_force_tlb_flush_range_sync(struct p2m_domain *p2m, uint64_t start_ipa,
+                                    uint64_t page_count)
+{
+    unsigned long flags = 0;
+    uint64_t ovttbr;
+    uint64_t ipa = start_ipa;
+    uint64_t i;
+
+    ASSERT(p2m_is_write_locked(p2m));
+
+    /*
+     * ARM only provides an instruction to flush TLBs for the current
+     * VMID. So switch to the VTTBR of a given P2M if different.
+     */
+    ovttbr = READ_SYSREG64(VTTBR_EL2);
+    if ( ovttbr != p2m->vttbr )
+    {
+        uint64_t vttbr;
+
+        local_irq_save(flags);
+
+        /*
+         * ARM64_WORKAROUND_AT_SPECULATE: We need to stop AT to allocate
+         * TLBs entries because the context is partially modified. We
+         * only need the VMID for flushing the TLBs, so we can generate
+         * a new VTTBR with the VMID to flush and the empty root table.
+         */
+        if ( !cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) )
+            vttbr = p2m->vttbr;
+        else
+            vttbr = generate_vttbr(p2m->vmid, empty_root_mfn);
+
+        WRITE_SYSREG64(vttbr, VTTBR_EL2);
+
+        /* Ensure VTTBR_EL2 is synchronized before flushing the TLBs */
+        isb();
+    }
+
+    /* Ensure prior page-tables updates have completed */
+    dsb(ishst);
+
+    /* Invalidate stage-2 TLB entries by IPA range */
+    for ( i = 0; i < page_count; i++ ) {
+        flush_guest_tlb_one_s2(ipa);
+        ipa += 1UL << PAGE_SHIFT;
+    }
+
+    if ( system_cpuinfo.mm64.ntlbpa != MM64_NTLBPA_SUPPORT_IMP )
+        flush_guest_tlb_s1_local();
+
+    /* Ensure the TLB invalidation has completed */
+    dsb(ishst);
+
+    if ( ovttbr != READ_SYSREG64(VTTBR_EL2) )
+    {
+        WRITE_SYSREG64(ovttbr, VTTBR_EL2);
+        /* Ensure VTTBR_EL2 is back in place before continuing. */
+        isb();
+        local_irq_restore(flags);
+    }
+}
+#endif
+
 void p2m_tlb_flush_sync(struct p2m_domain *p2m)
 {
     if ( p2m->need_flush )
@@ -1034,7 +1108,12 @@ static int __p2m_set_entry(struct p2m_domain *p2m,
          * For more details see (D4.7.1 in ARM DDI 0487A.j).
          */
         p2m_remove_pte(entry, p2m->clean_pte);
+#ifdef CONFIG_ARM_64
+        p2m_force_tlb_flush_range_sync(p2m, gfn_x(sgfn) << PAGE_SHIFT,
+                                       1UL << page_order);
+#else
         p2m_force_tlb_flush_sync(p2m);
+#endif
 
         p2m_write_pte(entry, split_pte, p2m->clean_pte);
 
@@ -1090,8 +1169,13 @@ static int __p2m_set_entry(struct p2m_domain *p2m,
         p2m_remove_pte(entry, p2m->clean_pte);
 
     if ( removing_mapping )
+#ifdef CONFIG_ARM_64
+        p2m_force_tlb_flush_range_sync(p2m, gfn_x(sgfn) << PAGE_SHIFT,
+                                       1UL << page_order);
+#else
         /* Flush can be deferred if the entry is removed */
         p2m->need_flush |= !!lpae_is_valid(orig_pte);
+#endif
     else
     {
         lpae_t pte = mfn_to_p2m_entry(smfn, t, a);
@@ -1102,17 +1186,23 @@ static int __p2m_set_entry(struct p2m_domain *p2m,
         /*
          * It is necessary to flush the TLB before writing the new entry
          * to keep coherency when the previous entry was valid.
-         *
-         * Although, it could be defered when only the permissions are
-         * changed (e.g in case of memaccess).
          */
         if ( lpae_is_valid(orig_pte) )
         {
+#ifdef CONFIG_ARM_64
+            p2m_force_tlb_flush_range_sync(p2m, gfn_x(sgfn) << PAGE_SHIFT,
+                                           1UL << page_order);
+#else
+        /*
+         * Although, flush could be defered when only the permissions are
+         * changed (e.g in case of memaccess).
+         */
             if ( likely(!p2m->mem_access_enabled) ||
                  P2M_CLEAR_PERM(pte) != P2M_CLEAR_PERM(orig_pte) )
                 p2m_force_tlb_flush_sync(p2m);
             else
                 p2m->need_flush = true;
+#endif
         }
         else if ( !p2m_is_valid(orig_pte) ) /* new mapping */
             p2m->stats.mappings[level]++;
-- 
2.34.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.