[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v3 7/7] xen/x86: use PCID feature
Avoid flushing the complete TLB when switching %cr3 for mitigation of Meltdown by using the PCID feature if available. We are using 4 PCID values for a 64 bit pv domain subject to XPTI and 2 values for the non-XPTI case: - guest active and in kernel mode - guest active and in user mode - hypervisor active and guest in user mode (XPTI only) - hypervisor active and guest in kernel mode (XPTI only) We use PCID only if PCID _and_ INVPCID are supported. With PCID in use we disable global pages in cr4. A command line parameter controls in which cases PCID is being used. As the non-XPTI case has shown not to perform better with PCID at least on some machines the default is to use PCID only for domains subject to XPTI. Signed-off-by: Juergen Gross <jgross@xxxxxxxx> --- V3: - support PCID for non-XPTI case, too - add command line parameter for controlling usage of PCID - check PCID active by using cr4.pcide (Jan Beulich) --- docs/misc/xen-command-line.markdown | 12 +++++++++ xen/arch/x86/debug.c | 3 ++- xen/arch/x86/domain_page.c | 2 +- xen/arch/x86/domctl.c | 4 +++ xen/arch/x86/flushtlb.c | 49 ++++++++++++++++++++++++++++------ xen/arch/x86/mm.c | 34 +++++++++++++++++++++--- xen/arch/x86/pv/dom0_build.c | 1 + xen/arch/x86/pv/domain.c | 52 +++++++++++++++++++++++++++++++++++++ xen/include/asm-x86/domain.h | 14 +++++++--- xen/include/asm-x86/pv/domain.h | 2 ++ xen/include/asm-x86/x86-defns.h | 1 + 11 files changed, 158 insertions(+), 16 deletions(-) diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown index 8fc7b2ff3b..4ecf471ea9 100644 --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -1450,6 +1450,18 @@ All numbers specified must be hexadecimal ones. This option can be specified more than once (up to 8 times at present). +### pcid (x86) +> `= off | all | xpti | noxpti` + +> Default: `xpti` + +> Can be modified at runtime + +If available, control usage of the PCID feature of the processor for +64-bit pv-domains. PCID can be used either for no domain at all, for +all of them, only for those subject to XPTI or for those not subject +to XPTI. + ### ple\_gap > `= <integer>` diff --git a/xen/arch/x86/debug.c b/xen/arch/x86/debug.c index 9159f32db4..c8079569c4 100644 --- a/xen/arch/x86/debug.c +++ b/xen/arch/x86/debug.c @@ -97,7 +97,8 @@ dbg_pv_va2mfn(dbgva_t vaddr, struct domain *dp, uint64_t pgd3val) l3_pgentry_t l3e, *l3t; l2_pgentry_t l2e, *l2t; l1_pgentry_t l1e, *l1t; - unsigned long cr3 = (pgd3val ? pgd3val : dp->vcpu[0]->arch.cr3); + unsigned long cr3 = (pgd3val ? pgd3val + : (dp->vcpu[0]->arch.cr3 & ~X86_CR3_NOFLUSH)); mfn_t mfn = maddr_to_mfn(cr3); DBGP2("vaddr:%lx domid:%d cr3:%lx pgd3:%lx\n", vaddr, dp->domain_id, diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c index b5780f201f..8073ae5282 100644 --- a/xen/arch/x86/domain_page.c +++ b/xen/arch/x86/domain_page.c @@ -51,7 +51,7 @@ static inline struct vcpu *mapcache_current_vcpu(void) if ( (v = idle_vcpu[smp_processor_id()]) == current ) sync_local_execstate(); /* We must now be running on the idle page table. */ - ASSERT(read_cr3() == __pa(idle_pg_table)); + ASSERT((read_cr3() & ~X86_CR3_PCIDMASK) == __pa(idle_pg_table)); } return v; diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c index 0704f398c7..a7c8772fa6 100644 --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -613,7 +613,11 @@ long arch_do_domctl( ret = -EINVAL; if ( ret == 0 ) + { xpti_domain_init(d); + pcid_domain_init(d); + } + break; case XEN_DOMCTL_get_address_size: diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c index d4b8acc837..092ef86314 100644 --- a/xen/arch/x86/flushtlb.c +++ b/xen/arch/x86/flushtlb.c @@ -102,7 +102,19 @@ void write_cr3_cr4(unsigned long cr3, unsigned long cr4) t = pre_flush(); if ( read_cr4() & X86_CR4_PGE ) + /* + * X86_CR4_PGE set means PCID being inactive. + * We have to purge the TLB via flipping cr4.pge. + */ write_cr4(cr4 & ~X86_CR4_PGE); + else if ( cpu_has_invpcid ) + /* + * If we are using PCID purge the TLB via INVPCID as loading cr3 + * will affect the current PCID only. + * If INVPCID is not supported we don't use PCIDs so loading cr3 + * will purge the TLB (we are in the "global pages off" branch). + */ + invpcid_flush_all(); asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); @@ -131,14 +143,35 @@ unsigned int flush_area_local(const void *va, unsigned int flags) { if ( order == 0 ) { - /* - * We don't INVLPG multi-page regions because the 2M/4M/1G - * region may not have been mapped with a superpage. Also there - * are various errata surrounding INVLPG usage on superpages, and - * a full flush is in any case not *that* expensive. - */ - asm volatile ( "invlpg %0" - : : "m" (*(const char *)(va)) : "memory" ); + if ( read_cr4() & X86_CR4_PCIDE ) + { + unsigned long addr = (unsigned long)va; + + /* + * Flush the addresses for all potential address spaces. + * We can't check the current domain for being subject to + * XPTI as current might be the idle vcpu while we still have + * some XPTI domain TLB entries. + * Using invpcid is okay here, as with PCID enabled we always + * have global pages disabled. + */ + invpcid_flush_one(PCID_PV_PRIV, addr); + invpcid_flush_one(PCID_PV_USER, addr); + if ( !cpu_has_no_xpti ) + { + invpcid_flush_one(PCID_PV_PRIV | PCID_PV_XEN, addr); + invpcid_flush_one(PCID_PV_USER | PCID_PV_XEN, addr); + } + } + else + /* + * We don't INVLPG multi-page regions because the 2M/4M/1G + * region may not have been mapped with a superpage. Also there + * are various errata surrounding INVLPG usage on superpages, + * and a full flush is in any case not *that* expensive. + */ + asm volatile ( "invlpg %0" + : : "m" (*(const char *)(va)) : "memory" ); } else do_tlb_flush(); diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 29071bb257..242425c075 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -497,12 +497,38 @@ void free_shared_domheap_page(struct page_info *page) free_domheap_page(page); } +/* + * Return additional PCID specific cr3 bits. + * + * Note that X86_CR3_NOFLUSH will not be readable in cr3. Anyone consuming + * v->arch.cr3 should mask away X86_CR3_NOFLUSH and X86_CR3_PCIDMASK in case + * the value is used to address the root page table. + */ +static unsigned long get_pcid_bits(struct vcpu *v, bool is_xen) +{ + return X86_CR3_NOFLUSH | (is_xen ? PCID_PV_XEN : 0) | + ((v->arch.flags & TF_kernel_mode) ? PCID_PV_PRIV : PCID_PV_USER); +} + void make_cr3(struct vcpu *v, mfn_t mfn) { + struct domain *d = v->domain; + v->arch.cr3 = mfn_x(mfn) << PAGE_SHIFT; - if ( v == current && this_cpu(root_pgt) && is_pv_vcpu(v) && - !is_pv_32bit_vcpu(v) ) - get_cpu_info()->root_pgt_changed = true; + if ( is_pv_domain(d) ) + { + if ( d->arch.pv_domain.xpti && v == current ) + { + struct cpu_info *cpu_info = get_cpu_info(); + + cpu_info->root_pgt_changed = true; + if ( d->arch.pv_domain.pcid ) + cpu_info->pv_cr3 = __pa(this_cpu(root_pgt)) | + get_pcid_bits(v, false); + } + if ( d->arch.pv_domain.pcid ) + v->arch.cr3 |= get_pcid_bits(v, d->arch.pv_domain.xpti); + } } void write_ptbase(struct vcpu *v) @@ -517,6 +543,8 @@ void write_ptbase(struct vcpu *v) { cpu_info->root_pgt_changed = true; cpu_info->pv_cr3 = __pa(this_cpu(root_pgt)); + if ( new_cr4 & X86_CR4_PCIDE ) + cpu_info->pv_cr3 |= get_pcid_bits(v, false); write_cr3_cr4(v->arch.cr3, new_cr4); } else diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c index 77186c19bd..2af0094e95 100644 --- a/xen/arch/x86/pv/dom0_build.c +++ b/xen/arch/x86/pv/dom0_build.c @@ -709,6 +709,7 @@ int __init dom0_construct_pv(struct domain *d, } xpti_domain_init(d); + pcid_domain_init(d); d->arch.paging.mode = 0; diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c index 266117e804..46c050aeeb 100644 --- a/xen/arch/x86/pv/domain.c +++ b/xen/arch/x86/pv/domain.c @@ -95,6 +95,58 @@ void xpti_domain_init(struct domain *d) } } +static __read_mostly enum { + PCID_OFF, + PCID_ALL, + PCID_XPTI, + PCID_NOXPTI +} opt_pcid = PCID_XPTI; + +static __init int parse_pcid(const char *s) +{ + int rc = 0; + + if ( !strcmp(s, "off") ) + opt_pcid = PCID_OFF; + else if ( !strcmp(s, "all") ) + opt_pcid = PCID_ALL; + else if ( !strcmp(s, "xpti") ) + opt_pcid = PCID_XPTI; + else if ( !strcmp(s, "noxpti") ) + opt_pcid = PCID_NOXPTI; + else + rc = -EINVAL; + + return rc; +} +custom_runtime_param("pcid", parse_pcid); + +void pcid_domain_init(struct domain *d) +{ + if ( !is_pv_domain(d) || is_pv_32bit_domain(d) || + !cpu_has_invpcid || !cpu_has_pcid ) + return; + + switch ( opt_pcid ) + { + case PCID_OFF: + d->arch.pv_domain.pcid = false; + break; + case PCID_ALL: + d->arch.pv_domain.pcid = true; + break; + case PCID_XPTI: + d->arch.pv_domain.pcid = d->arch.pv_domain.xpti; + break; + case PCID_NOXPTI: + d->arch.pv_domain.pcid = !d->arch.pv_domain.xpti; + break; + default: + ASSERT_UNREACHABLE(); + break; + } +} + static void noreturn continue_nonidle_domain(struct vcpu *v) { check_wakeup_from_wait(); diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index ef9e9639fd..7751f9225b 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -255,6 +255,8 @@ struct pv_domain /* XPTI active? */ bool xpti; + /* Use PCID feature? */ + bool pcid; /* map_domain_page() mapping cache. */ struct mapcache_domain mapcache; @@ -262,6 +264,11 @@ struct pv_domain struct cpuidmasks *cpuidmasks; }; +/* PCID values for the address spaces of 64-bit pv domains: */ +#define PCID_PV_PRIV 0x0000 /* Used for other domains, too. */ +#define PCID_PV_USER 0x0001 +#define PCID_PV_XEN 0x0002 /* To be ORed to above values. */ + struct monitor_write_data { struct { unsigned int msr : 1; @@ -619,14 +626,15 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4); | (mmu_cr4_features \ & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \ X86_CR4_SMAP | X86_CR4_OSXSAVE | \ - X86_CR4_FSGSBASE)) \ - | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \ + X86_CR4_FSGSBASE | X86_CR4_PCIDE)) \ + | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0) \ + | ((v)->domain->arch.pv_domain.pcid ? X86_CR4_PCIDE : 0)) \ & ~(X86_CR4_DE | \ ((v)->domain->arch.pv_domain.xpti ? X86_CR4_PGE : 0))) #define real_cr4_to_pv_guest_cr4(c) \ ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \ X86_CR4_OSXSAVE | X86_CR4_SMEP | \ - X86_CR4_FSGSBASE | X86_CR4_SMAP)) + X86_CR4_FSGSBASE | X86_CR4_SMAP | X86_CR4_PCIDE)) #define domain_max_vcpus(d) (is_hvm_domain(d) ? HVM_MAX_VCPUS : MAX_VIRT_CPUS) diff --git a/xen/include/asm-x86/pv/domain.h b/xen/include/asm-x86/pv/domain.h index 2213a8fb3d..8fd822ca83 100644 --- a/xen/include/asm-x86/pv/domain.h +++ b/xen/include/asm-x86/pv/domain.h @@ -29,6 +29,7 @@ void pv_domain_destroy(struct domain *d); int pv_domain_initialise(struct domain *d); void xpti_init(void); void xpti_domain_init(struct domain *d); +void pcid_domain_init(struct domain *d); #else /* !CONFIG_PV */ @@ -40,6 +41,7 @@ static inline void pv_domain_destroy(struct domain *d) {} static inline int pv_domain_initialise(struct domain *d) { return -EOPNOTSUPP; } static inline void xpti_init(void) {} static inline void xpti_domain_init(struct domain *d) {} +static inline void pcid_domain_init(struct domain *d) {} #endif /* CONFIG_PV */ void paravirt_ctxt_switch_from(struct vcpu *v); diff --git a/xen/include/asm-x86/x86-defns.h b/xen/include/asm-x86/x86-defns.h index ff8d66be3c..e323d3c01f 100644 --- a/xen/include/asm-x86/x86-defns.h +++ b/xen/include/asm-x86/x86-defns.h @@ -46,6 +46,7 @@ * Intel CPU flags in CR3 */ #define X86_CR3_NOFLUSH (_AC(1, ULL) << 63) +#define X86_CR3_PCIDMASK _AC(0x0fff, ULL) /* Mask for PCID */ /* * Intel CPU features in CR4 -- 2.13.6 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |