Xen project Mailing List

Re: [Xen-devel] [PATCH v5] x86/AMD: Add support for AMD's OSVW feature in guests

To: "Boris Ostrovsky" <boris.ostrovsky@xxxxxxx>,<keir@xxxxxxx>

From: "Jan Beulich" <JBeulich@xxxxxxxx>

Date: Tue, 07 Feb 2012 11:51:19 +0000

Cc: Christoph.Egger@xxxxxxx, xen-devel@xxxxxxxxxxxxxxxxxxx

Delivery-date: Tue, 07 Feb 2012 11:51:58 +0000

List-id: Xen developer discussion <xen-devel.lists.xensource.com>

>>> On 06.02.12 at 18:39, Boris Ostrovsky <boris.ostrovsky@xxxxxxx> wrote: > # HG changeset patch > # User Boris Ostrovsky <boris.ostrovsky@xxxxxxx> > # Date 1328549858 -3600 > # Node ID 3cf8ffd0ab883dd09f943f4d8fb50f5cc1f04cd5 > # Parent e2722b24dc0962de37215320b05d1bb7c4c42864 > x86/AMD: Add support for AMD's OSVW feature in guests. > > In some cases guests should not provide workarounds for errata even when the > physical processor is affected. For example, because of erratum 400 on > family > 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before > going to idle in order to avoid getting stuck in a non-C0 state. This is not > necessary: HLT and IO instructions are intercepted and therefore there is no > reason for erratum 400 workaround in the guest. > > This patch allows us to present a guest with certain errata as fixed, > regardless of the state of actual hardware. > > Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx> > Acked-by: Christoph Egger <Christoph.Egger@xxxxxxx> In the form below/attached (integration with boot time microcode loading fixed and trailing white space removed) Acked-by: Jan Beulich <jbeulich@xxxxxxxx> -- Jan In some cases guests should not provide workarounds for errata even when the physical processor is affected. For example, because of erratum 400 on family 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before going to idle in order to avoid getting stuck in a non-C0 state. This is not necessary: HLT and IO instructions are intercepted and therefore there is no reason for erratum 400 workaround in the guest. This patch allows us to present a guest with certain errata as fixed, regardless of the state of actual hardware. Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx> Acked-by: Christoph Egger <Christoph.Egger@xxxxxxx> Acked-by: Jan Beulich <jbeulich@xxxxxxxx> --- a/tools/libxc/xc_cpuid_x86.c +++ b/tools/libxc/xc_cpuid_x86.c @@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy( bitmaskof(X86_FEATURE_SSE4A) | bitmaskof(X86_FEATURE_MISALIGNSSE) | bitmaskof(X86_FEATURE_3DNOWPREFETCH) | + bitmaskof(X86_FEATURE_OSVW) | bitmaskof(X86_FEATURE_XOP) | bitmaskof(X86_FEATURE_FMA4) | bitmaskof(X86_FEATURE_TBM) | --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -83,6 +83,10 @@ static DEFINE_PER_CPU_READ_MOSTLY(void * static bool_t amd_erratum383_found __read_mostly; +/* OSVW bits */ +static uint64_t osvw_length, osvw_status; +static DEFINE_SPINLOCK(osvw_lock); + void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len) { struct vcpu *curr = current; @@ -902,6 +906,69 @@ static void svm_do_resume(struct vcpu *v reset_stack_and_jump(svm_asm_do_resume); } +static void svm_guest_osvw_init(struct vcpu *vcpu) +{ + if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ) + return; + + /* + * Guests should see errata 400 and 415 as fixed (assuming that + * HLT and IO instructions are intercepted). + */ + vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ? osvw_length : 3; + vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL); + + /* + * By increasing VCPU's osvw.length to 3 we are telling the guest that + * all osvw.status bits inside that length, including bit 0 (which is + * reserved for erratum 298), are valid. However, if host processor's + * osvw_len is 0 then osvw_status[0] carries no information. We need to + * be conservative here and therefore we tell the guest that erratum 298 + * is present (because we really don't know). + */ + if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 ) + vcpu->arch.hvm_svm.osvw.status |= 1; +} + +void svm_host_osvw_reset() +{ + spin_lock(&osvw_lock); + + osvw_length = 64; /* One register (MSRC001_0141) worth of errata */ + osvw_status = 0; + + spin_unlock(&osvw_lock); +} + +void svm_host_osvw_init() +{ + spin_lock(&osvw_lock); + + /* + * Get OSVW bits. If bits are not the same on different processors then + * choose the worst case (i.e. if erratum is present on one processor and + * not on another assume that the erratum is present everywhere). + */ + if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) ) + { + uint64_t len, status; + + if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) || + rdmsr_safe(MSR_AMD_OSVW_STATUS, status) ) + len = status = 0; + + if (len < osvw_length) + osvw_length = len; + + osvw_status |= status; + osvw_status &= (1ULL << osvw_length) - 1; + } + else + osvw_length = osvw_status = 0; + + spin_unlock(&osvw_lock); +} + static int svm_domain_initialise(struct domain *d) { return 0; @@ -930,6 +997,9 @@ static int svm_vcpu_initialise(struct vc } vpmu_initialise(v); + + svm_guest_osvw_init(v); + return 0; } @@ -1044,6 +1114,27 @@ static void svm_init_erratum_383(struct } } +static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val, bool_t read) +{ + uint eax, ebx, ecx, edx; + + /* Guest OSVW support */ + hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx); + if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) ) + return -1; + + if ( read ) + { + if (msr == MSR_AMD_OSVW_ID_LENGTH) + *val = v->arch.hvm_svm.osvw.length; + else + *val = v->arch.hvm_svm.osvw.status; + } + /* Writes are ignored */ + + return 0; +} + static int svm_cpu_up(void) { uint64_t msr_content; @@ -1094,6 +1185,9 @@ static int svm_cpu_up(void) } #endif + /* Initialize OSVW bits to be used by guests */ + svm_host_osvw_init(); + return 0; } @@ -1104,6 +1198,8 @@ struct hvm_function_table * __init start if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) ) return NULL; + svm_host_osvw_reset(); + if ( svm_cpu_up() ) { printk("SVM: failed to initialise.\n"); @@ -1388,6 +1484,13 @@ static int svm_msr_read_intercept(unsign vpmu_do_rdmsr(msr, msr_content); break; + case MSR_AMD_OSVW_ID_LENGTH: + case MSR_AMD_OSVW_STATUS: + ret = svm_handle_osvw(v, msr, msr_content, 1); + if ( ret < 0 ) + goto gpf; + break; + default: ret = nsvm_rdmsr(v, msr, msr_content); if ( ret < 0 ) @@ -1512,6 +1615,13 @@ static int svm_msr_write_intercept(unsig */ break; + case MSR_AMD_OSVW_ID_LENGTH: + case MSR_AMD_OSVW_STATUS: + ret = svm_handle_osvw(v, msr, &msr_content, 0); + if ( ret < 0 ) + goto gpf; + break; + default: ret = nsvm_wrmsr(v, msr, msr_content); if ( ret < 0 ) --- a/xen/arch/x86/microcode.c +++ b/xen/arch/x86/microcode.c @@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co info->error = 0; info->cpu = cpumask_first(&cpu_online_map); + if ( microcode_ops->start_update ) + { + ret = microcode_ops->start_update(); + if ( ret != 0 ) + { + xfree(info); + return ret; + } + } + return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info); } @@ -240,6 +250,12 @@ static int __init microcode_init(void) if ( !data ) return -ENOMEM; + if ( microcode_ops->start_update && microcode_ops->start_update() != 0 ) + { + ucode_mod_map(NULL); + return 0; + } + softirq_tasklet_init(&tasklet, _do_microcode_update, (unsigned long)data); for_each_online_cpu ( cpu ) --- a/xen/arch/x86/microcode_amd.c +++ b/xen/arch/x86/microcode_amd.c @@ -25,6 +25,7 @@ #include <asm/msr.h> #include <asm/processor.h> #include <asm/microcode.h> +#include <asm/hvm/svm/svm.h> struct equiv_cpu_entry { uint32_t installed_cpu; @@ -71,6 +72,7 @@ struct mpbhdr { /* serialize access to the physical write */ static DEFINE_SPINLOCK(microcode_update_lock); +/* See comment in start_update() for cases when this routine fails */ static int collect_cpu_info(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data[cpu]; @@ -287,7 +289,8 @@ static int cpu_request_microcode(int cpu { printk(KERN_ERR "microcode: error! Wrong " "microcode patch file magic\n"); - return -EINVAL; + error = -EINVAL; + goto out; } mc_amd = xmalloc(struct microcode_amd); @@ -295,7 +298,8 @@ static int cpu_request_microcode(int cpu { printk(KERN_ERR "microcode: error! " "Can not allocate memory for microcode patch\n"); - return -ENOMEM; + error = -ENOMEM; + goto out; } error = install_equiv_cpu_table(mc_amd, buf, &offset); @@ -303,7 +307,8 @@ static int cpu_request_microcode(int cpu { xfree(mc_amd); printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); - return -EINVAL; + error = -EINVAL; + goto out; } mc_old = uci->mc.mc_amd; @@ -337,13 +342,19 @@ static int cpu_request_microcode(int cpu /* On success keep the microcode patch for * re-apply on resume. */ - if (error == 1) + if ( error == 1 ) { xfree(mc_old); - return 0; + error = 0; + } + else + { + xfree(mc_amd); + uci->mc.mc_amd = mc_old; } - xfree(mc_amd); - uci->mc.mc_amd = mc_old; + + out: + svm_host_osvw_init(); return error; } @@ -395,11 +406,28 @@ err1: return -ENOMEM; } +static int start_update(void) +{ + /* + * We assume here that svm_host_osvw_init() will be called on each cpu (from + * cpu_request_microcode()). + * + * Note that if collect_cpu_info() returns an error then + * cpu_request_microcode() will not invoked thus leaving OSVW bits not + * updated. Currently though collect_cpu_info() will not fail on processors + * supporting OSVW so we will not deal with this possibility. + */ + svm_host_osvw_reset(); + + return 0; +} + static const struct microcode_ops microcode_amd_ops = { .microcode_resume_match = microcode_resume_match, .cpu_request_microcode = cpu_request_microcode, .collect_cpu_info = collect_cpu_info, .apply_microcode = apply_microcode, + .start_update = start_update, }; static __init int microcode_init_amd(void) --- a/xen/arch/x86/platform_hypercall.c +++ b/xen/arch/x86/platform_hypercall.c @@ -166,7 +166,21 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe break; guest_from_compat_handle(data, op->u.microcode.data); + + /* + * alloc_vpcu() will access data which is modified during + * microcode update + */ + while ( !spin_trylock(&vcpu_alloc_lock) ) + if ( hypercall_preempt_check() ) + { + ret = hypercall_create_continuation( + __HYPERVISOR_platform_op, "h", u_xenpf_op); + goto out; + } + ret = microcode_update(data, op->u.microcode.length); + spin_unlock(&vcpu_alloc_lock); } break; --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -29,6 +29,7 @@ #include <xsm/xsm.h> static DEFINE_SPINLOCK(domctl_lock); +DEFINE_SPINLOCK(vcpu_alloc_lock); int cpumask_to_xenctl_cpumap( struct xenctl_cpumap *xenctl_cpumap, const cpumask_t *cpumask) @@ -506,6 +507,18 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc /* Needed, for example, to ensure writable p.t. state is synced. */ domain_pause(d); + /* + * Certain operations (e.g. CPU microcode updates) modify data which is + * used during VCPU allocation/initialization + */ + while ( !spin_trylock(&vcpu_alloc_lock) ) + if ( hypercall_preempt_check() ) + { + ret = hypercall_create_continuation( + __HYPERVISOR_domctl, "h", u_domctl); + goto maxvcpu_out_novcpulock; + } + /* We cannot reduce maximum VCPUs. */ ret = -EINVAL; if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) ) @@ -555,6 +568,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc ret = 0; maxvcpu_out: + spin_unlock(&vcpu_alloc_lock); + + maxvcpu_out_novcpulock: domain_unpause(d); rcu_unlock_domain(d); } --- a/xen/include/asm-x86/hvm/svm/svm.h +++ b/xen/include/asm-x86/hvm/svm/svm.h @@ -98,4 +98,7 @@ extern u32 svm_feature_flags; ~TSC_RATIO_RSVD_BITS ) #define vcpu_tsc_ratio(v) TSC_RATIO((v)->domain->arch.tsc_khz, cpu_khz) +extern void svm_host_osvw_reset(void); +extern void svm_host_osvw_init(void); + #endif /* __ASM_X86_HVM_SVM_H__ */ --- a/xen/include/asm-x86/hvm/svm/vmcb.h +++ b/xen/include/asm-x86/hvm/svm/vmcb.h @@ -516,6 +516,12 @@ struct arch_svm_struct { /* AMD lightweight profiling MSR */ uint64_t guest_lwp_cfg; + + /* OSVW MSRs */ + struct { + u64 length; + u64 status; + } osvw; }; struct vmcb_struct *alloc_vmcb(void); --- a/xen/include/asm-x86/microcode.h +++ b/xen/include/asm-x86/microcode.h @@ -11,6 +11,7 @@ struct microcode_ops { int (*cpu_request_microcode)(int cpu, const void *buf, size_t size); int (*collect_cpu_info)(int cpu, struct cpu_signature *csig); int (*apply_microcode)(int cpu); + int (*start_update)(void); }; struct cpu_signature { --- a/xen/include/xen/domain.h +++ b/xen/include/xen/domain.h @@ -69,6 +69,7 @@ void arch_dump_domain_info(struct domain void arch_vcpu_reset(struct vcpu *v); +extern spinlock_t vcpu_alloc_lock; bool_t domctl_lock_acquire(void); void domctl_lock_release(void);

Attachment: AMD-OSVW-guest
Description: Binary data

_______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.