Xen project Mailing List

Re: [Xen-devel] [PATCH v5] x86/AMD: Add support for AMD's OSVW feature in guests

To: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>

From: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>

Date: Mon, 6 Feb 2012 12:47:45 -0500

Cc: Christoph.Egger@xxxxxxx, xen-devel@xxxxxxxxxxxxxxxxxxx, keir@xxxxxxx, JBeulich@xxxxxxxx

Delivery-date: Mon, 06 Feb 2012 17:51:06 +0000

List-id: Xen developer discussion <xen-devel.lists.xensource.com>

On Mon, Feb 06, 2012 at 06:39:25PM +0100, Boris Ostrovsky wrote: > # HG changeset patch > # User Boris Ostrovsky <boris.ostrovsky@xxxxxxx> > # Date 1328549858 -3600 > # Node ID 3cf8ffd0ab883dd09f943f4d8fb50f5cc1f04cd5 > # Parent e2722b24dc0962de37215320b05d1bb7c4c42864 > x86/AMD: Add support for AMD's OSVW feature in guests. > > In some cases guests should not provide workarounds for errata even when the > physical processor is affected. For example, because of erratum 400 on family > 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before > going to idle in order to avoid getting stuck in a non-C0 state. This is not What about fixing the Linux guest to actually not do this? I presume you have encountered this with HVM guests - would it be possible to use set_pm_idle_to_default(default_idle) in the HVM bootup path, say in 'xen_hvm_guest_init' ?? > necessary: HLT and IO instructions are intercepted and therefore there is no > reason for erratum 400 workaround in the guest. > > This patch allows us to present a guest with certain errata as fixed, > regardless of the state of actual hardware. > > Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx> > Acked-by: Christoph Egger <Christoph.Egger@xxxxxxx> > > diff -r e2722b24dc09 -r 3cf8ffd0ab88 tools/libxc/xc_cpuid_x86.c > --- a/tools/libxc/xc_cpuid_x86.c Thu Jan 26 17:43:31 2012 +0000 > +++ b/tools/libxc/xc_cpuid_x86.c Mon Feb 06 18:37:38 2012 +0100 > @@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy( > bitmaskof(X86_FEATURE_SSE4A) | > bitmaskof(X86_FEATURE_MISALIGNSSE) | > bitmaskof(X86_FEATURE_3DNOWPREFETCH) | > + bitmaskof(X86_FEATURE_OSVW) | > bitmaskof(X86_FEATURE_XOP) | > bitmaskof(X86_FEATURE_FMA4) | > bitmaskof(X86_FEATURE_TBM) | > diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/arch/x86/hvm/svm/svm.c > --- a/xen/arch/x86/hvm/svm/svm.c Thu Jan 26 17:43:31 2012 +0000 > +++ b/xen/arch/x86/hvm/svm/svm.c Mon Feb 06 18:37:38 2012 +0100 > @@ -83,6 +83,10 @@ static DEFINE_PER_CPU_READ_MOSTLY(void * > > static bool_t amd_erratum383_found __read_mostly; > > +/* OSVW bits */ > +static uint64_t osvw_length, osvw_status; > +static DEFINE_SPINLOCK(osvw_lock); > + > void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len) > { > struct vcpu *curr = current; > @@ -902,6 +906,69 @@ static void svm_do_resume(struct vcpu *v > reset_stack_and_jump(svm_asm_do_resume); > } > > +static void svm_guest_osvw_init(struct vcpu *vcpu) > +{ > + if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ) > + return; > + > + /* > + * Guests should see errata 400 and 415 as fixed (assuming that > + * HLT and IO instructions are intercepted). > + */ > + vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ? osvw_length : 3; > + vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL); > + > + /* > + * By increasing VCPU's osvw.length to 3 we are telling the guest that > + * all osvw.status bits inside that length, including bit 0 (which is > + * reserved for erratum 298), are valid. However, if host processor's > + * osvw_len is 0 then osvw_status[0] carries no information. We need to > + * be conservative here and therefore we tell the guest that erratum 298 > + * is present (because we really don't know). > + */ > + if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 ) > + vcpu->arch.hvm_svm.osvw.status |= 1; > +} > + > +void svm_host_osvw_reset() > +{ > + spin_lock(&osvw_lock); > + > + osvw_length = 64; /* One register (MSRC001_0141) worth of errata */ > + osvw_status = 0; > + > + spin_unlock(&osvw_lock); > +} > + > +void svm_host_osvw_init() > +{ > + spin_lock(&osvw_lock); > + > + /* > + * Get OSVW bits. If bits are not the same on different processors then > + * choose the worst case (i.e. if erratum is present on one processor and > + * not on another assume that the erratum is present everywhere). > + */ > + if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) ) > + { > + uint64_t len, status; > + > + if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) || > + rdmsr_safe(MSR_AMD_OSVW_STATUS, status) ) > + len = status = 0; > + > + if (len < osvw_length) > + osvw_length = len; > + > + osvw_status |= status; > + osvw_status &= (1ULL << osvw_length) - 1; > + } > + else > + osvw_length = osvw_status = 0; > + > + spin_unlock(&osvw_lock); > +} > + > static int svm_domain_initialise(struct domain *d) > { > return 0; > @@ -930,6 +997,9 @@ static int svm_vcpu_initialise(struct vc > } > > vpmu_initialise(v); > + > + svm_guest_osvw_init(v); > + > return 0; > } > > @@ -1044,6 +1114,27 @@ static void svm_init_erratum_383(struct > } > } > > +static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val, > bool_t read) > +{ > + uint eax, ebx, ecx, edx; > + > + /* Guest OSVW support */ > + hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx); > + if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) ) > + return -1; > + > + if ( read ) > + { > + if (msr == MSR_AMD_OSVW_ID_LENGTH) > + *val = v->arch.hvm_svm.osvw.length; > + else > + *val = v->arch.hvm_svm.osvw.status; > + } > + /* Writes are ignored */ > + > + return 0; > +} > + > static int svm_cpu_up(void) > { > uint64_t msr_content; > @@ -1094,6 +1185,9 @@ static int svm_cpu_up(void) > } > #endif > > + /* Initialize OSVW bits to be used by guests */ > + svm_host_osvw_init(); > + > return 0; > } > > @@ -1104,6 +1198,8 @@ struct hvm_function_table * __init start > if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) ) > return NULL; > > + svm_host_osvw_reset(); > + > if ( svm_cpu_up() ) > { > printk("SVM: failed to initialise.\n"); > @@ -1388,6 +1484,13 @@ static int svm_msr_read_intercept(unsign > vpmu_do_rdmsr(msr, msr_content); > break; > > + case MSR_AMD_OSVW_ID_LENGTH: > + case MSR_AMD_OSVW_STATUS: > + ret = svm_handle_osvw(v, msr, msr_content, 1); > + if ( ret < 0 ) > + goto gpf; > + break; > + > default: > ret = nsvm_rdmsr(v, msr, msr_content); > if ( ret < 0 ) > @@ -1512,6 +1615,13 @@ static int svm_msr_write_intercept(unsig > */ > break; > > + case MSR_AMD_OSVW_ID_LENGTH: > + case MSR_AMD_OSVW_STATUS: > + ret = svm_handle_osvw(v, msr, &msr_content, 0); > + if ( ret < 0 ) > + goto gpf; > + break; > + > default: > ret = nsvm_wrmsr(v, msr, msr_content); > if ( ret < 0 ) > diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/arch/x86/microcode.c > --- a/xen/arch/x86/microcode.c Thu Jan 26 17:43:31 2012 +0000 > +++ b/xen/arch/x86/microcode.c Mon Feb 06 18:37:38 2012 +0100 > @@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co > info->error = 0; > info->cpu = cpumask_first(&cpu_online_map); > > + if ( microcode_ops->start_update ) > + { > + ret = microcode_ops->start_update(); > + if ( ret != 0 ) > + { > + xfree(info); > + return ret; > + } > + } > + > return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info); > } > > diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/arch/x86/microcode_amd.c > --- a/xen/arch/x86/microcode_amd.c Thu Jan 26 17:43:31 2012 +0000 > +++ b/xen/arch/x86/microcode_amd.c Mon Feb 06 18:37:38 2012 +0100 > @@ -25,6 +25,7 @@ > #include <asm/msr.h> > #include <asm/processor.h> > #include <asm/microcode.h> > +#include <asm/hvm/svm/svm.h> > > struct equiv_cpu_entry { > uint32_t installed_cpu; > @@ -71,6 +72,7 @@ struct mpbhdr { > /* serialize access to the physical write */ > static DEFINE_SPINLOCK(microcode_update_lock); > > +/* See comment in start_update() for cases when this routine fails */ > static int collect_cpu_info(int cpu, struct cpu_signature *csig) > { > struct cpuinfo_x86 *c = &cpu_data[cpu]; > @@ -287,7 +289,8 @@ static int cpu_request_microcode(int cpu > { > printk(KERN_ERR "microcode: error! Wrong " > "microcode patch file magic\n"); > - return -EINVAL; > + error = -EINVAL; > + goto out; > } > > mc_amd = xmalloc(struct microcode_amd); > @@ -295,7 +298,8 @@ static int cpu_request_microcode(int cpu > { > printk(KERN_ERR "microcode: error! " > "Can not allocate memory for microcode patch\n"); > - return -ENOMEM; > + error = -ENOMEM; > + goto out; > } > > error = install_equiv_cpu_table(mc_amd, buf, &offset); > @@ -303,7 +307,8 @@ static int cpu_request_microcode(int cpu > { > xfree(mc_amd); > printk(KERN_ERR "microcode: installing equivalent cpu table > failed\n"); > - return -EINVAL; > + error = -EINVAL; > + goto out; > } > > mc_old = uci->mc.mc_amd; > @@ -337,13 +342,19 @@ static int cpu_request_microcode(int cpu > /* On success keep the microcode patch for > * re-apply on resume. > */ > - if (error == 1) > + if ( error == 1 ) > { > xfree(mc_old); > - return 0; > + error = 0; > + } > + else > + { > + xfree(mc_amd); > + uci->mc.mc_amd = mc_old; > } > - xfree(mc_amd); > - uci->mc.mc_amd = mc_old; > + > + out: > + svm_host_osvw_init(); > > return error; > } > @@ -395,11 +406,28 @@ err1: > return -ENOMEM; > } > > +static int start_update(void) > +{ > + /* > + * We assume here that svm_host_osvw_init() will be called on each cpu > (from > + * cpu_request_microcode()). > + * > + * Note that if collect_cpu_info() returns an error then > + * cpu_request_microcode() will not invoked thus leaving OSVW bits not > + * updated. Currently though collect_cpu_info() will not fail on > processors > + * supporting OSVW so we will not deal with this possibility. > + */ > + svm_host_osvw_reset(); > + > + return 0; > +} > + > static const struct microcode_ops microcode_amd_ops = { > .microcode_resume_match = microcode_resume_match, > .cpu_request_microcode = cpu_request_microcode, > .collect_cpu_info = collect_cpu_info, > .apply_microcode = apply_microcode, > + .start_update = start_update, > }; > > static __init int microcode_init_amd(void) > diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/arch/x86/platform_hypercall.c > --- a/xen/arch/x86/platform_hypercall.c Thu Jan 26 17:43:31 2012 +0000 > +++ b/xen/arch/x86/platform_hypercall.c Mon Feb 06 18:37:38 2012 +0100 > @@ -166,7 +166,21 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe > break; > > guest_from_compat_handle(data, op->u.microcode.data); > + > + /* > + * alloc_vpcu() will access data which is modified during > + * microcode update > + */ > + while ( !spin_trylock(&vcpu_alloc_lock) ) > + if ( hypercall_preempt_check() ) > + { > + ret = hypercall_create_continuation( > + __HYPERVISOR_platform_op, "h", u_xenpf_op); > + goto out; > + } > + > ret = microcode_update(data, op->u.microcode.length); > + spin_unlock(&vcpu_alloc_lock); > } > break; > > diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/common/domctl.c > --- a/xen/common/domctl.c Thu Jan 26 17:43:31 2012 +0000 > +++ b/xen/common/domctl.c Mon Feb 06 18:37:38 2012 +0100 > @@ -29,6 +29,7 @@ > #include <xsm/xsm.h> > > static DEFINE_SPINLOCK(domctl_lock); > +DEFINE_SPINLOCK(vcpu_alloc_lock); > > int cpumask_to_xenctl_cpumap( > struct xenctl_cpumap *xenctl_cpumap, const cpumask_t *cpumask) > @@ -502,6 +503,18 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc > /* Needed, for example, to ensure writable p.t. state is synced. */ > domain_pause(d); > > + /* > + * Certain operations (e.g. CPU microcode updates) modify data which > is > + * used during VCPU allocation/initialization > + */ > + while ( !spin_trylock(&vcpu_alloc_lock) ) > + if ( hypercall_preempt_check() ) > + { > + ret = hypercall_create_continuation( > + __HYPERVISOR_domctl, "h", u_domctl); > + goto maxvcpu_out_novcpulock; > + } > + > /* We cannot reduce maximum VCPUs. */ > ret = -EINVAL; > if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) ) > @@ -551,6 +564,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc > ret = 0; > > maxvcpu_out: > + spin_unlock(&vcpu_alloc_lock); > + > + maxvcpu_out_novcpulock: > domain_unpause(d); > rcu_unlock_domain(d); > } > diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/include/asm-x86/hvm/svm/svm.h > --- a/xen/include/asm-x86/hvm/svm/svm.h Thu Jan 26 17:43:31 2012 +0000 > +++ b/xen/include/asm-x86/hvm/svm/svm.h Mon Feb 06 18:37:38 2012 +0100 > @@ -98,4 +98,7 @@ extern u32 svm_feature_flags; > ~TSC_RATIO_RSVD_BITS ) > #define vcpu_tsc_ratio(v) TSC_RATIO((v)->domain->arch.tsc_khz, cpu_khz) > > +extern void svm_host_osvw_reset(void); > +extern void svm_host_osvw_init(void); > + > #endif /* __ASM_X86_HVM_SVM_H__ */ > diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/include/asm-x86/hvm/svm/vmcb.h > --- a/xen/include/asm-x86/hvm/svm/vmcb.h Thu Jan 26 17:43:31 2012 +0000 > +++ b/xen/include/asm-x86/hvm/svm/vmcb.h Mon Feb 06 18:37:38 2012 +0100 > @@ -516,6 +516,12 @@ struct arch_svm_struct { > > /* AMD lightweight profiling MSR */ > uint64_t guest_lwp_cfg; > + > + /* OSVW MSRs */ > + struct { > + u64 length; > + u64 status; > + } osvw; > }; > > struct vmcb_struct *alloc_vmcb(void); > diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/include/asm-x86/microcode.h > --- a/xen/include/asm-x86/microcode.h Thu Jan 26 17:43:31 2012 +0000 > +++ b/xen/include/asm-x86/microcode.h Mon Feb 06 18:37:38 2012 +0100 > @@ -11,6 +11,7 @@ struct microcode_ops { > int (*cpu_request_microcode)(int cpu, const void *buf, size_t size); > int (*collect_cpu_info)(int cpu, struct cpu_signature *csig); > int (*apply_microcode)(int cpu); > + int (*start_update)(void); > }; > > struct cpu_signature { > diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/include/xen/domain.h > --- a/xen/include/xen/domain.h Thu Jan 26 17:43:31 2012 +0000 > +++ b/xen/include/xen/domain.h Mon Feb 06 18:37:38 2012 +0100 > @@ -69,6 +69,7 @@ void arch_dump_domain_info(struct domain > > void arch_vcpu_reset(struct vcpu *v); > > +extern spinlock_t vcpu_alloc_lock; > bool_t domctl_lock_acquire(void); > void domctl_lock_release(void); > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.