[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v2 6/6] xen/x86: use PCID feature for XPTI
Avoid flushing the complete TLB when switching %cr3 for mitigation of Meltdown by using the PCID feature if available. We are using 4 PCID values for a 64 bit pv domain subject to XPTI: - hypervisor active and guest in kernel mode - guest active and in kernel mode - hypervisor active and guest in user mode - guest active and in user mode The 2 hypervisor cases could possibly be merged, but for security reasons this is left for another patch. Add a pcid flag to struct pv_domain to make it possible using PCID without XPTI later. Signed-off-by: Juergen Gross <jgross@xxxxxxxx> --- xen/arch/x86/cpu/mtrr/generic.c | 5 +++ xen/arch/x86/domain_page.c | 2 +- xen/arch/x86/flushtlb.c | 74 +++++++++++++++++++++++------------------ xen/arch/x86/mm.c | 12 ++++++- xen/arch/x86/pv/domain.c | 4 +++ xen/arch/x86/setup.c | 3 ++ xen/include/asm-x86/domain.h | 34 +++++++++++++------ xen/include/asm-x86/x86-defns.h | 1 + 8 files changed, 90 insertions(+), 45 deletions(-) diff --git a/xen/arch/x86/cpu/mtrr/generic.c b/xen/arch/x86/cpu/mtrr/generic.c index d705138100..84b9cd78df 100644 --- a/xen/arch/x86/cpu/mtrr/generic.c +++ b/xen/arch/x86/cpu/mtrr/generic.c @@ -5,6 +5,7 @@ #include <xen/mm.h> #include <xen/stdbool.h> #include <asm/flushtlb.h> +#include <asm/invpcid.h> #include <asm/io.h> #include <asm/mtrr.h> #include <asm/msr.h> @@ -417,6 +418,8 @@ static bool prepare_set(void) cr4 = read_cr4(); if (cr4 & X86_CR4_PGE) write_cr4(cr4 & ~X86_CR4_PGE); + else if ( cpu_has_invpcid ) + invpcid_flush_all(); else asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" ); @@ -440,6 +443,8 @@ static void post_set(bool pge) /* Reenable CR4.PGE (also flushes the TLB) */ if (pge) write_cr4(read_cr4() | X86_CR4_PGE); + else if ( cpu_has_invpcid ) + invpcid_flush_all(); else asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" ); diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c index 3432a854dd..e4b7f74f34 100644 --- a/xen/arch/x86/domain_page.c +++ b/xen/arch/x86/domain_page.c @@ -51,7 +51,7 @@ static inline struct vcpu *mapcache_current_vcpu(void) if ( (v = idle_vcpu[smp_processor_id()]) == current ) sync_local_execstate(); /* We must now be running on the idle page table. */ - ASSERT(read_cr3() == __pa(idle_pg_table)); + ASSERT((read_cr3() & ~X86_CR3_PCIDMASK) == __pa(idle_pg_table)); } return v; diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c index 186d9099f6..a65fad00ed 100644 --- a/xen/arch/x86/flushtlb.c +++ b/xen/arch/x86/flushtlb.c @@ -75,39 +75,46 @@ static void post_flush(u32 t) static void do_flush_tlb(unsigned long cr3) { unsigned long cr4; + u32 t; + + t = pre_flush(); cr4 = read_cr4(); - if ( cr4 & X86_CR4_PGE ) + + if ( cpu_has_invpcid ) { - write_cr4(cr4 & ~X86_CR4_PGE); if ( cr3 ) asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); - else - barrier(); - write_cr4(cr4); + if ( !cr3 || (cr3 & X86_CR3_NOFLUSH) || (cr4 & X86_CR4_PGE) ) + invpcid_flush_all(); } else { - if ( !cr3 ) + /* PCID not possible here, as invpcid is required for PCID. */ + if ( cr4 & X86_CR4_PGE ) + write_cr4(cr4 & ~X86_CR4_PGE); + else if ( !cr3 ) cr3 = read_cr3(); - asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); + if ( cr3 ) + asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); + else + barrier(); + if ( cr4 & X86_CR4_PGE ) + write_cr4(cr4); } + + post_flush(t); } void write_cr3(unsigned long cr3) { unsigned long flags; - u32 t; /* This non-reentrant function is sometimes called in interrupt context. */ local_irq_save(flags); - t = pre_flush(); - do_flush_tlb(cr3); - post_flush(t); - local_irq_restore(flags); } @@ -128,30 +135,33 @@ unsigned int flush_area_local(const void *va, unsigned int flags) { if ( order == 0 ) { - /* - * We don't INVLPG multi-page regions because the 2M/4M/1G - * region may not have been mapped with a superpage. Also there - * are various errata surrounding INVLPG usage on superpages, and - * a full flush is in any case not *that* expensive. - */ - asm volatile ( "invlpg %0" - : : "m" (*(const char *)(va)) : "memory" ); - } - else - { - u32 t = pre_flush(); + if ( read_cr3() & X86_CR3_PCIDMASK ) + { + unsigned long addr = (unsigned long)va; - if ( !cpu_has_invpcid ) - do_flush_tlb(0); + /* + * Flush the addresses for all potential address spaces. + */ + invpcid_flush_one(PCID_PV_PRIV, addr); + invpcid_flush_one(PCID_PV_USER, addr); + invpcid_flush_one(PCID_PV_PRIV | PCID_PV_XEN, addr); + invpcid_flush_one(PCID_PV_USER | PCID_PV_XEN, addr); + } else + { /* - * Using invpcid to flush all mappings works - * regardless of whether PCID is enabled or not. - * It is faster than read-modify-write CR4. + * We don't INVLPG multi-page regions because the 2M/4M/1G + * region may not have been mapped with a superpage. Also there + * are various errata surrounding INVLPG usage on superpages, + * and a full flush is in any case not *that* expensive. */ - invpcid_flush_all(); - - post_flush(t); + asm volatile ( "invlpg %0" + : : "m" (*(const char *)(va)) : "memory" ); + } + } + else + { + do_flush_tlb(0); } } diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 2d8366a01c..82fbbe0a10 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -506,6 +506,8 @@ void free_shared_domheap_page(struct page_info *page) void make_cr3(struct vcpu *v, mfn_t mfn) { v->arch.cr3 = mfn_x(mfn) << PAGE_SHIFT; + if ( is_pv_vcpu(v) && v->domain->arch.pv_domain.pcid ) + v->arch.cr3 |= X86_CR3_NOFLUSH | get_pv_pcid(v, 1); } void write_ptbase(struct vcpu *v) @@ -514,7 +516,15 @@ void write_ptbase(struct vcpu *v) { get_cpu_info()->root_pgt_changed = true; get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt)); - asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" ); + if ( v->domain->arch.pv_domain.pcid ) + { + get_cpu_info()->pv_cr3 |= X86_CR3_NOFLUSH | get_pv_pcid(v, 0); + write_cr3(v->arch.cr3); + } + else + { + asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" ); + } } else { diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c index 5f15c9e25b..37338b2a01 100644 --- a/xen/arch/x86/pv/domain.c +++ b/xen/arch/x86/pv/domain.c @@ -96,8 +96,12 @@ void xpti_domain_init(struct domain *d) } if ( d->arch.pv_domain.xpti ) + { + d->arch.pv_domain.pcid = cpu_has_pcid && cpu_has_invpcid; + printk("Enabling Xen Pagetable protection (XPTI) for Domain %d\n", d->domain_id); + } } static void noreturn continue_nonidle_domain(struct vcpu *v) diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index 7c9fbfe04a..781f191e6e 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -1547,6 +1547,9 @@ void __init noreturn __start_xen(unsigned long mbi_p) if ( cpu_has_fsgsbase ) set_in_cr4(X86_CR4_FSGSBASE); + if ( cpu_has_invpcid && cpu_has_pcid ) + set_in_cr4(X86_CR4_PCIDE); + init_speculation_mitigations(); init_idle_domain(); diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index 316418a6fe..a2ca03583f 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -260,8 +260,20 @@ struct pv_domain /* XPTI active? */ bool xpti; + + /* Use PCID for the different address spaces? */ + bool pcid; }; +/* PCID values for the address spaces: */ +#define PCID_PV_PRIV 0x0001 +#define PCID_PV_USER 0x0002 +#define PCID_PV_XEN 0x0004 /* To be ORed to above values. */ + +#define get_pv_pcid(v, xen) \ + (((xen) ? PCID_PV_XEN : 0) | \ + (((v)->arch.flags & TF_kernel_mode) ? PCID_PV_PRIV : PCID_PV_USER)) + struct monitor_write_data { struct { unsigned int msr : 1; @@ -615,18 +627,18 @@ void vcpu_show_registers(const struct vcpu *); unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4); /* Convert between guest-visible and real CR4 values. */ -#define pv_guest_cr4_to_real_cr4(v) \ - (((v)->arch.pv_vcpu.ctrlreg[4] \ - | (mmu_cr4_features \ - & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \ - X86_CR4_SMAP | X86_CR4_OSXSAVE | \ - X86_CR4_FSGSBASE)) \ - | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \ - & ~(X86_CR4_DE | \ +#define pv_guest_cr4_to_real_cr4(v) \ + (((v)->arch.pv_vcpu.ctrlreg[4] \ + | (mmu_cr4_features \ + & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \ + X86_CR4_SMAP | X86_CR4_OSXSAVE | \ + X86_CR4_FSGSBASE | X86_CR4_PCIDE)) \ + | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \ + & ~(X86_CR4_DE | \ ((v)->domain->arch.pv_domain.xpti ? X86_CR4_PGE : 0))) -#define real_cr4_to_pv_guest_cr4(c) \ - ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \ - X86_CR4_OSXSAVE | X86_CR4_SMEP | \ +#define real_cr4_to_pv_guest_cr4(c) \ + ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \ + X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_PCIDE | \ X86_CR4_FSGSBASE | X86_CR4_SMAP)) #define domain_max_vcpus(d) (is_hvm_domain(d) ? HVM_MAX_VCPUS : MAX_VIRT_CPUS) diff --git a/xen/include/asm-x86/x86-defns.h b/xen/include/asm-x86/x86-defns.h index 8598adef14..d007997f88 100644 --- a/xen/include/asm-x86/x86-defns.h +++ b/xen/include/asm-x86/x86-defns.h @@ -46,6 +46,7 @@ * Intel CPU flags in CR3 */ #define X86_CR3_NOFLUSH (_AC(1, ULL) << 63) +#define X86_CR3_PCIDMASK _AC(0x0000000000000fff, ULL) /* Mask for PCID */ /* * Intel CPU features in CR4 -- 2.13.6 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |