|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v3 7/7] xen/x86: use PCID feature
Avoid flushing the complete TLB when switching %cr3 for mitigation of
Meltdown by using the PCID feature if available.
We are using 4 PCID values for a 64 bit pv domain subject to XPTI and
2 values for the non-XPTI case:
- guest active and in kernel mode
- guest active and in user mode
- hypervisor active and guest in user mode (XPTI only)
- hypervisor active and guest in kernel mode (XPTI only)
We use PCID only if PCID _and_ INVPCID are supported. With PCID in use
we disable global pages in cr4. A command line parameter controls in
which cases PCID is being used.
As the non-XPTI case has shown not to perform better with PCID at least
on some machines the default is to use PCID only for domains subject to
XPTI.
Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
V3:
- support PCID for non-XPTI case, too
- add command line parameter for controlling usage of PCID
- check PCID active by using cr4.pcide (Jan Beulich)
---
docs/misc/xen-command-line.markdown | 12 +++++++++
xen/arch/x86/debug.c | 3 ++-
xen/arch/x86/domain_page.c | 2 +-
xen/arch/x86/domctl.c | 4 +++
xen/arch/x86/flushtlb.c | 49 ++++++++++++++++++++++++++++------
xen/arch/x86/mm.c | 34 +++++++++++++++++++++---
xen/arch/x86/pv/dom0_build.c | 1 +
xen/arch/x86/pv/domain.c | 52 +++++++++++++++++++++++++++++++++++++
xen/include/asm-x86/domain.h | 14 +++++++---
xen/include/asm-x86/pv/domain.h | 2 ++
xen/include/asm-x86/x86-defns.h | 1 +
11 files changed, 158 insertions(+), 16 deletions(-)
diff --git a/docs/misc/xen-command-line.markdown
b/docs/misc/xen-command-line.markdown
index 8fc7b2ff3b..4ecf471ea9 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1450,6 +1450,18 @@ All numbers specified must be hexadecimal ones.
This option can be specified more than once (up to 8 times at present).
+### pcid (x86)
+> `= off | all | xpti | noxpti`
+
+> Default: `xpti`
+
+> Can be modified at runtime
+
+If available, control usage of the PCID feature of the processor for
+64-bit pv-domains. PCID can be used either for no domain at all, for
+all of them, only for those subject to XPTI or for those not subject
+to XPTI.
+
### ple\_gap
> `= <integer>`
diff --git a/xen/arch/x86/debug.c b/xen/arch/x86/debug.c
index 9159f32db4..c8079569c4 100644
--- a/xen/arch/x86/debug.c
+++ b/xen/arch/x86/debug.c
@@ -97,7 +97,8 @@ dbg_pv_va2mfn(dbgva_t vaddr, struct domain *dp, uint64_t
pgd3val)
l3_pgentry_t l3e, *l3t;
l2_pgentry_t l2e, *l2t;
l1_pgentry_t l1e, *l1t;
- unsigned long cr3 = (pgd3val ? pgd3val : dp->vcpu[0]->arch.cr3);
+ unsigned long cr3 = (pgd3val ? pgd3val
+ : (dp->vcpu[0]->arch.cr3 & ~X86_CR3_NOFLUSH));
mfn_t mfn = maddr_to_mfn(cr3);
DBGP2("vaddr:%lx domid:%d cr3:%lx pgd3:%lx\n", vaddr, dp->domain_id,
diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
index b5780f201f..8073ae5282 100644
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -51,7 +51,7 @@ static inline struct vcpu *mapcache_current_vcpu(void)
if ( (v = idle_vcpu[smp_processor_id()]) == current )
sync_local_execstate();
/* We must now be running on the idle page table. */
- ASSERT(read_cr3() == __pa(idle_pg_table));
+ ASSERT((read_cr3() & ~X86_CR3_PCIDMASK) == __pa(idle_pg_table));
}
return v;
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 0704f398c7..a7c8772fa6 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -613,7 +613,11 @@ long arch_do_domctl(
ret = -EINVAL;
if ( ret == 0 )
+ {
xpti_domain_init(d);
+ pcid_domain_init(d);
+ }
+
break;
case XEN_DOMCTL_get_address_size:
diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
index d4b8acc837..092ef86314 100644
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -102,7 +102,19 @@ void write_cr3_cr4(unsigned long cr3, unsigned long cr4)
t = pre_flush();
if ( read_cr4() & X86_CR4_PGE )
+ /*
+ * X86_CR4_PGE set means PCID being inactive.
+ * We have to purge the TLB via flipping cr4.pge.
+ */
write_cr4(cr4 & ~X86_CR4_PGE);
+ else if ( cpu_has_invpcid )
+ /*
+ * If we are using PCID purge the TLB via INVPCID as loading cr3
+ * will affect the current PCID only.
+ * If INVPCID is not supported we don't use PCIDs so loading cr3
+ * will purge the TLB (we are in the "global pages off" branch).
+ */
+ invpcid_flush_all();
asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
@@ -131,14 +143,35 @@ unsigned int flush_area_local(const void *va, unsigned
int flags)
{
if ( order == 0 )
{
- /*
- * We don't INVLPG multi-page regions because the 2M/4M/1G
- * region may not have been mapped with a superpage. Also there
- * are various errata surrounding INVLPG usage on superpages, and
- * a full flush is in any case not *that* expensive.
- */
- asm volatile ( "invlpg %0"
- : : "m" (*(const char *)(va)) : "memory" );
+ if ( read_cr4() & X86_CR4_PCIDE )
+ {
+ unsigned long addr = (unsigned long)va;
+
+ /*
+ * Flush the addresses for all potential address spaces.
+ * We can't check the current domain for being subject to
+ * XPTI as current might be the idle vcpu while we still have
+ * some XPTI domain TLB entries.
+ * Using invpcid is okay here, as with PCID enabled we always
+ * have global pages disabled.
+ */
+ invpcid_flush_one(PCID_PV_PRIV, addr);
+ invpcid_flush_one(PCID_PV_USER, addr);
+ if ( !cpu_has_no_xpti )
+ {
+ invpcid_flush_one(PCID_PV_PRIV | PCID_PV_XEN, addr);
+ invpcid_flush_one(PCID_PV_USER | PCID_PV_XEN, addr);
+ }
+ }
+ else
+ /*
+ * We don't INVLPG multi-page regions because the 2M/4M/1G
+ * region may not have been mapped with a superpage. Also there
+ * are various errata surrounding INVLPG usage on superpages,
+ * and a full flush is in any case not *that* expensive.
+ */
+ asm volatile ( "invlpg %0"
+ : : "m" (*(const char *)(va)) : "memory" );
}
else
do_tlb_flush();
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 29071bb257..242425c075 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -497,12 +497,38 @@ void free_shared_domheap_page(struct page_info *page)
free_domheap_page(page);
}
+/*
+ * Return additional PCID specific cr3 bits.
+ *
+ * Note that X86_CR3_NOFLUSH will not be readable in cr3. Anyone consuming
+ * v->arch.cr3 should mask away X86_CR3_NOFLUSH and X86_CR3_PCIDMASK in case
+ * the value is used to address the root page table.
+ */
+static unsigned long get_pcid_bits(struct vcpu *v, bool is_xen)
+{
+ return X86_CR3_NOFLUSH | (is_xen ? PCID_PV_XEN : 0) |
+ ((v->arch.flags & TF_kernel_mode) ? PCID_PV_PRIV : PCID_PV_USER);
+}
+
void make_cr3(struct vcpu *v, mfn_t mfn)
{
+ struct domain *d = v->domain;
+
v->arch.cr3 = mfn_x(mfn) << PAGE_SHIFT;
- if ( v == current && this_cpu(root_pgt) && is_pv_vcpu(v) &&
- !is_pv_32bit_vcpu(v) )
- get_cpu_info()->root_pgt_changed = true;
+ if ( is_pv_domain(d) )
+ {
+ if ( d->arch.pv_domain.xpti && v == current )
+ {
+ struct cpu_info *cpu_info = get_cpu_info();
+
+ cpu_info->root_pgt_changed = true;
+ if ( d->arch.pv_domain.pcid )
+ cpu_info->pv_cr3 = __pa(this_cpu(root_pgt)) |
+ get_pcid_bits(v, false);
+ }
+ if ( d->arch.pv_domain.pcid )
+ v->arch.cr3 |= get_pcid_bits(v, d->arch.pv_domain.xpti);
+ }
}
void write_ptbase(struct vcpu *v)
@@ -517,6 +543,8 @@ void write_ptbase(struct vcpu *v)
{
cpu_info->root_pgt_changed = true;
cpu_info->pv_cr3 = __pa(this_cpu(root_pgt));
+ if ( new_cr4 & X86_CR4_PCIDE )
+ cpu_info->pv_cr3 |= get_pcid_bits(v, false);
write_cr3_cr4(v->arch.cr3, new_cr4);
}
else
diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
index 77186c19bd..2af0094e95 100644
--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -709,6 +709,7 @@ int __init dom0_construct_pv(struct domain *d,
}
xpti_domain_init(d);
+ pcid_domain_init(d);
d->arch.paging.mode = 0;
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 266117e804..46c050aeeb 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -95,6 +95,58 @@ void xpti_domain_init(struct domain *d)
}
}
+static __read_mostly enum {
+ PCID_OFF,
+ PCID_ALL,
+ PCID_XPTI,
+ PCID_NOXPTI
+} opt_pcid = PCID_XPTI;
+
+static __init int parse_pcid(const char *s)
+{
+ int rc = 0;
+
+ if ( !strcmp(s, "off") )
+ opt_pcid = PCID_OFF;
+ else if ( !strcmp(s, "all") )
+ opt_pcid = PCID_ALL;
+ else if ( !strcmp(s, "xpti") )
+ opt_pcid = PCID_XPTI;
+ else if ( !strcmp(s, "noxpti") )
+ opt_pcid = PCID_NOXPTI;
+ else
+ rc = -EINVAL;
+
+ return rc;
+}
+custom_runtime_param("pcid", parse_pcid);
+
+void pcid_domain_init(struct domain *d)
+{
+ if ( !is_pv_domain(d) || is_pv_32bit_domain(d) ||
+ !cpu_has_invpcid || !cpu_has_pcid )
+ return;
+
+ switch ( opt_pcid )
+ {
+ case PCID_OFF:
+ d->arch.pv_domain.pcid = false;
+ break;
+ case PCID_ALL:
+ d->arch.pv_domain.pcid = true;
+ break;
+ case PCID_XPTI:
+ d->arch.pv_domain.pcid = d->arch.pv_domain.xpti;
+ break;
+ case PCID_NOXPTI:
+ d->arch.pv_domain.pcid = !d->arch.pv_domain.xpti;
+ break;
+ default:
+ ASSERT_UNREACHABLE();
+ break;
+ }
+}
+
static void noreturn continue_nonidle_domain(struct vcpu *v)
{
check_wakeup_from_wait();
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index ef9e9639fd..7751f9225b 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -255,6 +255,8 @@ struct pv_domain
/* XPTI active? */
bool xpti;
+ /* Use PCID feature? */
+ bool pcid;
/* map_domain_page() mapping cache. */
struct mapcache_domain mapcache;
@@ -262,6 +264,11 @@ struct pv_domain
struct cpuidmasks *cpuidmasks;
};
+/* PCID values for the address spaces of 64-bit pv domains: */
+#define PCID_PV_PRIV 0x0000 /* Used for other domains, too. */
+#define PCID_PV_USER 0x0001
+#define PCID_PV_XEN 0x0002 /* To be ORed to above values. */
+
struct monitor_write_data {
struct {
unsigned int msr : 1;
@@ -619,14 +626,15 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *,
unsigned long guest_cr4);
| (mmu_cr4_features \
& (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \
X86_CR4_SMAP | X86_CR4_OSXSAVE | \
- X86_CR4_FSGSBASE)) \
- | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \
+ X86_CR4_FSGSBASE | X86_CR4_PCIDE)) \
+ | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0) \
+ | ((v)->domain->arch.pv_domain.pcid ? X86_CR4_PCIDE : 0)) \
& ~(X86_CR4_DE | \
((v)->domain->arch.pv_domain.xpti ? X86_CR4_PGE : 0)))
#define real_cr4_to_pv_guest_cr4(c) \
((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \
X86_CR4_OSXSAVE | X86_CR4_SMEP | \
- X86_CR4_FSGSBASE | X86_CR4_SMAP))
+ X86_CR4_FSGSBASE | X86_CR4_SMAP | X86_CR4_PCIDE))
#define domain_max_vcpus(d) (is_hvm_domain(d) ? HVM_MAX_VCPUS : MAX_VIRT_CPUS)
diff --git a/xen/include/asm-x86/pv/domain.h b/xen/include/asm-x86/pv/domain.h
index 2213a8fb3d..8fd822ca83 100644
--- a/xen/include/asm-x86/pv/domain.h
+++ b/xen/include/asm-x86/pv/domain.h
@@ -29,6 +29,7 @@ void pv_domain_destroy(struct domain *d);
int pv_domain_initialise(struct domain *d);
void xpti_init(void);
void xpti_domain_init(struct domain *d);
+void pcid_domain_init(struct domain *d);
#else /* !CONFIG_PV */
@@ -40,6 +41,7 @@ static inline void pv_domain_destroy(struct domain *d) {}
static inline int pv_domain_initialise(struct domain *d) { return -EOPNOTSUPP;
}
static inline void xpti_init(void) {}
static inline void xpti_domain_init(struct domain *d) {}
+static inline void pcid_domain_init(struct domain *d) {}
#endif /* CONFIG_PV */
void paravirt_ctxt_switch_from(struct vcpu *v);
diff --git a/xen/include/asm-x86/x86-defns.h b/xen/include/asm-x86/x86-defns.h
index ff8d66be3c..e323d3c01f 100644
--- a/xen/include/asm-x86/x86-defns.h
+++ b/xen/include/asm-x86/x86-defns.h
@@ -46,6 +46,7 @@
* Intel CPU flags in CR3
*/
#define X86_CR3_NOFLUSH (_AC(1, ULL) << 63)
+#define X86_CR3_PCIDMASK _AC(0x0fff, ULL) /* Mask for PCID */
/*
* Intel CPU features in CR4
--
2.13.6
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |