[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] Ping: [PATCH 1/2] x86: improve MSR_SHADOW_GS accesses
>>> On 06.12.17 at 17:37, wrote: > Instead of using RDMSR/WRMSR, on fsgsbase-capable systems use a double > SWAPGS combined with RDGSBASE/WRGSBASE. This halves execution time for > a shadow GS update alone on my Haswell (and we have indications of > good performance improvements by this on Skylake too), while the win is > even higher when e.g. updating more than one base (as may and commonly > will happen in load_segments()). > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> The previous (short) discussion has stalled, hence the ping. Jan > --- a/xen/arch/x86/domain.c > +++ b/xen/arch/x86/domain.c > @@ -1326,9 +1326,12 @@ static void load_segments(struct vcpu *n > if ( n->arch.pv_vcpu.fs_base | (dirty_segment_mask & DIRTY_FS_BASE) > ) > wrfsbase(n->arch.pv_vcpu.fs_base); > > - /* Most kernels have non-zero GS base, so don't bother testing. */ > - /* (This is also a serialising instruction, avoiding AMD erratum > #88.) */ > - wrmsrl(MSR_SHADOW_GS_BASE, n->arch.pv_vcpu.gs_base_kernel); > + /* > + * Most kernels have non-zero GS base, so don't bother testing. > + * (For old AMD hardware this is also a serialising instruction, > + * avoiding erratum #88.) > + */ > + wrgsshadow(n->arch.pv_vcpu.gs_base_kernel); > > /* This can only be non-zero if selector is NULL. */ > if ( n->arch.pv_vcpu.gs_base_user | > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -501,7 +501,7 @@ long_mode_do_msr_read(unsigned int msr, > break; > > case MSR_SHADOW_GS_BASE: > - rdmsrl(MSR_SHADOW_GS_BASE, *msr_content); > + *msr_content = rdgsshadow(); > break; > > case MSR_STAR: > @@ -549,7 +549,7 @@ long_mode_do_msr_write(unsigned int msr, > else if ( msr == MSR_GS_BASE ) > __vmwrite(GUEST_GS_BASE, msr_content); > else > - wrmsrl(MSR_SHADOW_GS_BASE, msr_content); > + wrgsshadow(msr_content); > > break; > > @@ -608,12 +608,12 @@ static void vmx_save_guest_msrs(struct v > * We cannot cache SHADOW_GS_BASE while the VCPU runs, as it can > * be updated at any time via SWAPGS, which we cannot trap. > */ > - rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs); > + v->arch.hvm_vmx.shadow_gs = rdgsshadow(); > } > > static void vmx_restore_guest_msrs(struct vcpu *v) > { > - wrmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs); > + wrgsshadow(v->arch.hvm_vmx.shadow_gs); > wrmsrl(MSR_STAR, v->arch.hvm_vmx.star); > wrmsrl(MSR_LSTAR, v->arch.hvm_vmx.lstar); > wrmsrl(MSR_SYSCALL_MASK, v->arch.hvm_vmx.sfmask); > --- a/xen/arch/x86/pv/emul-priv-op.c > +++ b/xen/arch/x86/pv/emul-priv-op.c > @@ -1027,7 +1027,7 @@ static int write_msr(unsigned int reg, u > case MSR_SHADOW_GS_BASE: > if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) ) > break; > - wrmsrl(MSR_SHADOW_GS_BASE, val); > + wrgsshadow(val); > curr->arch.pv_vcpu.gs_base_user = val; > return X86EMUL_OKAY; > > --- a/xen/arch/x86/x86_64/mm.c > +++ b/xen/arch/x86/x86_64/mm.c > @@ -1034,7 +1034,7 @@ long do_set_segment_base(unsigned int wh > case SEGBASE_GS_USER: > if ( is_canonical_address(base) ) > { > - wrmsrl(MSR_SHADOW_GS_BASE, base); > + wrgsshadow(base); > v->arch.pv_vcpu.gs_base_user = base; > } > else > --- a/xen/arch/x86/x86_64/traps.c > +++ b/xen/arch/x86/x86_64/traps.c > @@ -49,7 +49,7 @@ static void read_registers(struct cpu_us > regs->gs = read_sreg(gs); > crs[5] = rdfsbase(); > crs[6] = rdgsbase(); > - rdmsrl(MSR_SHADOW_GS_BASE, crs[7]); > + crs[7] = rdgsshadow(); > } > > static void _show_registers( > --- a/xen/include/asm-x86/msr.h > +++ b/xen/include/asm-x86/msr.h > @@ -8,6 +8,7 @@ > #include <xen/types.h> > #include <xen/percpu.h> > #include <xen/errno.h> > +#include <asm/alternative.h> > #include <asm/asm_defns.h> > #include <asm/cpufeature.h> > > @@ -172,6 +173,24 @@ static inline unsigned long rdgsbase(voi > return base; > } > > +static inline unsigned long rdgsshadow(void) > +{ > + unsigned long base; > + > + alternative_io("mov %[msr], %%ecx\n\t" > + "rdmsr\n\t" > + "shl $32, %%rdx\n\t" > + "or %%rdx, %[res]", > + "swapgs\n\t" > + ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8\n\t" /* rdgsbase rax > */ > + "swapgs", > + X86_FEATURE_FSGSBASE, > + [res] "=&a" (base), > + [msr] "i" (MSR_SHADOW_GS_BASE) : "rcx", "rdx"); > + > + return base; > +} > + > static inline void wrfsbase(unsigned long base) > { > if ( cpu_has_fsgsbase ) > @@ -196,6 +215,19 @@ static inline void wrgsbase(unsigned lon > wrmsrl(MSR_GS_BASE, base); > } > > +static inline void wrgsshadow(unsigned long base) > +{ > + alternative_input("mov %[msr], %%ecx\n\t" > + "shld $32, %[val], %%rdx\n\t" > + "wrmsr", > + "swapgs\n\t" > + ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8\n\t" /* wrgsbase > rax */ > + "swapgs", > + X86_FEATURE_FSGSBASE, > + [val] "a" (base), > + [msr] "i" (MSR_SHADOW_GS_BASE) : "rcx", "rdx"); > +} > + > DECLARE_PER_CPU(u64, efer); > u64 read_efer(void); > void write_efer(u64 val); > > > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |