|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v2 6/6] xen/x86: use PCID feature for XPTI
Avoid flushing the complete TLB when switching %cr3 for mitigation of
Meltdown by using the PCID feature if available.
We are using 4 PCID values for a 64 bit pv domain subject to XPTI:
- hypervisor active and guest in kernel mode
- guest active and in kernel mode
- hypervisor active and guest in user mode
- guest active and in user mode
The 2 hypervisor cases could possibly be merged, but for security
reasons this is left for another patch.
Add a pcid flag to struct pv_domain to make it possible using PCID
without XPTI later.
Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
xen/arch/x86/cpu/mtrr/generic.c | 5 +++
xen/arch/x86/domain_page.c | 2 +-
xen/arch/x86/flushtlb.c | 74 +++++++++++++++++++++++------------------
xen/arch/x86/mm.c | 12 ++++++-
xen/arch/x86/pv/domain.c | 4 +++
xen/arch/x86/setup.c | 3 ++
xen/include/asm-x86/domain.h | 34 +++++++++++++------
xen/include/asm-x86/x86-defns.h | 1 +
8 files changed, 90 insertions(+), 45 deletions(-)
diff --git a/xen/arch/x86/cpu/mtrr/generic.c b/xen/arch/x86/cpu/mtrr/generic.c
index d705138100..84b9cd78df 100644
--- a/xen/arch/x86/cpu/mtrr/generic.c
+++ b/xen/arch/x86/cpu/mtrr/generic.c
@@ -5,6 +5,7 @@
#include <xen/mm.h>
#include <xen/stdbool.h>
#include <asm/flushtlb.h>
+#include <asm/invpcid.h>
#include <asm/io.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
@@ -417,6 +418,8 @@ static bool prepare_set(void)
cr4 = read_cr4();
if (cr4 & X86_CR4_PGE)
write_cr4(cr4 & ~X86_CR4_PGE);
+ else if ( cpu_has_invpcid )
+ invpcid_flush_all();
else
asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
@@ -440,6 +443,8 @@ static void post_set(bool pge)
/* Reenable CR4.PGE (also flushes the TLB) */
if (pge)
write_cr4(read_cr4() | X86_CR4_PGE);
+ else if ( cpu_has_invpcid )
+ invpcid_flush_all();
else
asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
index 3432a854dd..e4b7f74f34 100644
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -51,7 +51,7 @@ static inline struct vcpu *mapcache_current_vcpu(void)
if ( (v = idle_vcpu[smp_processor_id()]) == current )
sync_local_execstate();
/* We must now be running on the idle page table. */
- ASSERT(read_cr3() == __pa(idle_pg_table));
+ ASSERT((read_cr3() & ~X86_CR3_PCIDMASK) == __pa(idle_pg_table));
}
return v;
diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
index 186d9099f6..a65fad00ed 100644
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -75,39 +75,46 @@ static void post_flush(u32 t)
static void do_flush_tlb(unsigned long cr3)
{
unsigned long cr4;
+ u32 t;
+
+ t = pre_flush();
cr4 = read_cr4();
- if ( cr4 & X86_CR4_PGE )
+
+ if ( cpu_has_invpcid )
{
- write_cr4(cr4 & ~X86_CR4_PGE);
if ( cr3 )
asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
- else
- barrier();
- write_cr4(cr4);
+ if ( !cr3 || (cr3 & X86_CR3_NOFLUSH) || (cr4 & X86_CR4_PGE) )
+ invpcid_flush_all();
}
else
{
- if ( !cr3 )
+ /* PCID not possible here, as invpcid is required for PCID. */
+ if ( cr4 & X86_CR4_PGE )
+ write_cr4(cr4 & ~X86_CR4_PGE);
+ else if ( !cr3 )
cr3 = read_cr3();
- asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+ if ( cr3 )
+ asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+ else
+ barrier();
+ if ( cr4 & X86_CR4_PGE )
+ write_cr4(cr4);
}
+
+ post_flush(t);
}
void write_cr3(unsigned long cr3)
{
unsigned long flags;
- u32 t;
/* This non-reentrant function is sometimes called in interrupt context. */
local_irq_save(flags);
- t = pre_flush();
-
do_flush_tlb(cr3);
- post_flush(t);
-
local_irq_restore(flags);
}
@@ -128,30 +135,33 @@ unsigned int flush_area_local(const void *va, unsigned
int flags)
{
if ( order == 0 )
{
- /*
- * We don't INVLPG multi-page regions because the 2M/4M/1G
- * region may not have been mapped with a superpage. Also there
- * are various errata surrounding INVLPG usage on superpages, and
- * a full flush is in any case not *that* expensive.
- */
- asm volatile ( "invlpg %0"
- : : "m" (*(const char *)(va)) : "memory" );
- }
- else
- {
- u32 t = pre_flush();
+ if ( read_cr3() & X86_CR3_PCIDMASK )
+ {
+ unsigned long addr = (unsigned long)va;
- if ( !cpu_has_invpcid )
- do_flush_tlb(0);
+ /*
+ * Flush the addresses for all potential address spaces.
+ */
+ invpcid_flush_one(PCID_PV_PRIV, addr);
+ invpcid_flush_one(PCID_PV_USER, addr);
+ invpcid_flush_one(PCID_PV_PRIV | PCID_PV_XEN, addr);
+ invpcid_flush_one(PCID_PV_USER | PCID_PV_XEN, addr);
+ }
else
+ {
/*
- * Using invpcid to flush all mappings works
- * regardless of whether PCID is enabled or not.
- * It is faster than read-modify-write CR4.
+ * We don't INVLPG multi-page regions because the 2M/4M/1G
+ * region may not have been mapped with a superpage. Also there
+ * are various errata surrounding INVLPG usage on superpages,
+ * and a full flush is in any case not *that* expensive.
*/
- invpcid_flush_all();
-
- post_flush(t);
+ asm volatile ( "invlpg %0"
+ : : "m" (*(const char *)(va)) : "memory" );
+ }
+ }
+ else
+ {
+ do_flush_tlb(0);
}
}
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 2d8366a01c..82fbbe0a10 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -506,6 +506,8 @@ void free_shared_domheap_page(struct page_info *page)
void make_cr3(struct vcpu *v, mfn_t mfn)
{
v->arch.cr3 = mfn_x(mfn) << PAGE_SHIFT;
+ if ( is_pv_vcpu(v) && v->domain->arch.pv_domain.pcid )
+ v->arch.cr3 |= X86_CR3_NOFLUSH | get_pv_pcid(v, 1);
}
void write_ptbase(struct vcpu *v)
@@ -514,7 +516,15 @@ void write_ptbase(struct vcpu *v)
{
get_cpu_info()->root_pgt_changed = true;
get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt));
- asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
+ if ( v->domain->arch.pv_domain.pcid )
+ {
+ get_cpu_info()->pv_cr3 |= X86_CR3_NOFLUSH | get_pv_pcid(v, 0);
+ write_cr3(v->arch.cr3);
+ }
+ else
+ {
+ asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
+ }
}
else
{
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 5f15c9e25b..37338b2a01 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -96,8 +96,12 @@ void xpti_domain_init(struct domain *d)
}
if ( d->arch.pv_domain.xpti )
+ {
+ d->arch.pv_domain.pcid = cpu_has_pcid && cpu_has_invpcid;
+
printk("Enabling Xen Pagetable protection (XPTI) for Domain %d\n",
d->domain_id);
+ }
}
static void noreturn continue_nonidle_domain(struct vcpu *v)
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 7c9fbfe04a..781f191e6e 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1547,6 +1547,9 @@ void __init noreturn __start_xen(unsigned long mbi_p)
if ( cpu_has_fsgsbase )
set_in_cr4(X86_CR4_FSGSBASE);
+ if ( cpu_has_invpcid && cpu_has_pcid )
+ set_in_cr4(X86_CR4_PCIDE);
+
init_speculation_mitigations();
init_idle_domain();
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 316418a6fe..a2ca03583f 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -260,8 +260,20 @@ struct pv_domain
/* XPTI active? */
bool xpti;
+
+ /* Use PCID for the different address spaces? */
+ bool pcid;
};
+/* PCID values for the address spaces: */
+#define PCID_PV_PRIV 0x0001
+#define PCID_PV_USER 0x0002
+#define PCID_PV_XEN 0x0004 /* To be ORed to above values. */
+
+#define get_pv_pcid(v, xen) \
+ (((xen) ? PCID_PV_XEN : 0) | \
+ (((v)->arch.flags & TF_kernel_mode) ? PCID_PV_PRIV : PCID_PV_USER))
+
struct monitor_write_data {
struct {
unsigned int msr : 1;
@@ -615,18 +627,18 @@ void vcpu_show_registers(const struct vcpu *);
unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
/* Convert between guest-visible and real CR4 values. */
-#define pv_guest_cr4_to_real_cr4(v) \
- (((v)->arch.pv_vcpu.ctrlreg[4] \
- | (mmu_cr4_features \
- & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \
- X86_CR4_SMAP | X86_CR4_OSXSAVE | \
- X86_CR4_FSGSBASE)) \
- | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \
- & ~(X86_CR4_DE | \
+#define pv_guest_cr4_to_real_cr4(v) \
+ (((v)->arch.pv_vcpu.ctrlreg[4] \
+ | (mmu_cr4_features \
+ & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \
+ X86_CR4_SMAP | X86_CR4_OSXSAVE | \
+ X86_CR4_FSGSBASE | X86_CR4_PCIDE)) \
+ | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \
+ & ~(X86_CR4_DE | \
((v)->domain->arch.pv_domain.xpti ? X86_CR4_PGE : 0)))
-#define real_cr4_to_pv_guest_cr4(c) \
- ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \
- X86_CR4_OSXSAVE | X86_CR4_SMEP | \
+#define real_cr4_to_pv_guest_cr4(c) \
+ ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \
+ X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_PCIDE | \
X86_CR4_FSGSBASE | X86_CR4_SMAP))
#define domain_max_vcpus(d) (is_hvm_domain(d) ? HVM_MAX_VCPUS : MAX_VIRT_CPUS)
diff --git a/xen/include/asm-x86/x86-defns.h b/xen/include/asm-x86/x86-defns.h
index 8598adef14..d007997f88 100644
--- a/xen/include/asm-x86/x86-defns.h
+++ b/xen/include/asm-x86/x86-defns.h
@@ -46,6 +46,7 @@
* Intel CPU flags in CR3
*/
#define X86_CR3_NOFLUSH (_AC(1, ULL) << 63)
+#define X86_CR3_PCIDMASK _AC(0x0000000000000fff, ULL) /* Mask for PCID */
/*
* Intel CPU features in CR4
--
2.13.6
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |