|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v4] x86/AMD: Add support for AMD's OSVW feature in guests
>>> On 01.02.12 at 17:30, Boris Ostrovsky <boris.ostrovsky@xxxxxxx> wrote:
> # HG changeset patch
> # User Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
> # Date 1328108207 -3600
> # Node ID 789bbf4f478b0e81d71240a1f1147ef66a7c8848
> # Parent e2722b24dc0962de37215320b05d1bb7c4c42864
> x86/AMD: Add support for AMD's OSVW feature in guests.
>
> In some cases guests should not provide workarounds for errata even when the
> physical processor is affected. For example, because of erratum 400 on
> family
> 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
> going to idle in order to avoid getting stuck in a non-C0 state. This is not
> necessary: HLT and IO instructions are intercepted and therefore there is no
> reason for erratum 400 workaround in the guest.
>
> This patch allows us to present a guest with certain errata as fixed,
> regardless of the state of actual hardware.
As I was about to apply this to my local tree to give it a try, I had
to realize that the microcode integration is still not correct: There
is (at least from an abstract perspective) no guarantee for
cpu_request_microcode() to be called on all CPUs, yet you want
svm_host_osvw_init() to be re-run on all of them. If you choose
to not deal with this in a formally correct way, it should be stated
so in a code comment (to lower the risk of surprises when someone
touches that code) that this is not possible in practice because
collect_cpu_info() won't currently fail for CPUs of interest.
Further you need to serialize against onlining of CPUs while in
svm_host_osvw_reset(), and similarly protect the updating of the
globals in svm_host_osvw_init(). Probably a private locked shared
between those two functions is the best route to go here.
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
> Acked-by: Christoph Egger <Christoph.Egger@xxxxxxx>
>
> diff -r e2722b24dc09 -r 789bbf4f478b tools/libxc/xc_cpuid_x86.c
> --- a/tools/libxc/xc_cpuid_x86.c Thu Jan 26 17:43:31 2012 +0000
> +++ b/tools/libxc/xc_cpuid_x86.c Wed Feb 01 15:56:47 2012 +0100
> @@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
> bitmaskof(X86_FEATURE_SSE4A) |
> bitmaskof(X86_FEATURE_MISALIGNSSE) |
> bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
> + bitmaskof(X86_FEATURE_OSVW) |
> bitmaskof(X86_FEATURE_XOP) |
> bitmaskof(X86_FEATURE_FMA4) |
> bitmaskof(X86_FEATURE_TBM) |
> diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/hvm/svm/svm.c
> --- a/xen/arch/x86/hvm/svm/svm.c Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/hvm/svm/svm.c Wed Feb 01 15:56:47 2012 +0100
> @@ -83,6 +83,9 @@ static DEFINE_PER_CPU_READ_MOSTLY(void *
>
> static bool_t amd_erratum383_found __read_mostly;
>
> +/* OSVW bits */
> +static uint64_t osvw_length, osvw_status;
> +
> void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len)
> {
> struct vcpu *curr = current;
> @@ -902,6 +905,61 @@ static void svm_do_resume(struct vcpu *v
> reset_stack_and_jump(svm_asm_do_resume);
> }
>
> +static void svm_guest_osvw_init(struct vcpu *vcpu)
> +{
> + if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
> + return;
> +
> + /*
> + * Guests should see errata 400 and 415 as fixed (assuming that
> + * HLT and IO instructions are intercepted).
> + */
> + vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ? osvw_length : 3;
> + vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL);
> +
> + /*
> + * By increasing VCPU's osvw.length to 3 we are telling the guest that
> + * all osvw.status bits inside that length, including bit 0 (which is
> + * reserved for erratum 298), are valid. However, if host processor's
> + * osvw_len is 0 then osvw_status[0] carries no information. We need to
> + * be conservative here and therefore we tell the guest that erratum
> 298
> + * is present (because we really don't know).
> + */
> + if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 )
> + vcpu->arch.hvm_svm.osvw.status |= 1;
> +}
> +
> +void svm_host_osvw_reset()
> +{
> + osvw_length = 64; /* One register (MSRC001_0141) worth of errata */
> + osvw_status = 0;
> +}
> +
> +void svm_host_osvw_init()
> +{
> + /*
> + * Get OSVW bits. If bits are not the same on different processors then
> + * choose the worst case (i.e. if erratum is present on one processor
> and
> + * not on another assume that the erratum is present everywhere).
> + */
> + if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) )
> + {
> + uint64_t len, status;
> +
> + if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
> + rdmsr_safe(MSR_AMD_OSVW_STATUS, status) )
> + len = status = 0;
> +
> + if (len < osvw_length)
> + osvw_length = len;
> +
> + osvw_status |= status;
> + osvw_status &= (1ULL << osvw_length) - 1;
> + }
> + else
> + osvw_length = osvw_status = 0;
> +}
> +
> static int svm_domain_initialise(struct domain *d)
> {
> return 0;
> @@ -930,6 +988,9 @@ static int svm_vcpu_initialise(struct vc
> }
>
> vpmu_initialise(v);
> +
> + svm_guest_osvw_init(v);
> +
> return 0;
> }
>
> @@ -1044,6 +1105,27 @@ static void svm_init_erratum_383(struct
> }
> }
>
> +static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val,
> bool_t read)
> +{
> + uint eax, ebx, ecx, edx;
> +
> + /* Guest OSVW support */
> + hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
> + if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) )
> + return -1;
> +
> + if ( read )
> + {
> + if (msr == MSR_AMD_OSVW_ID_LENGTH)
> + *val = v->arch.hvm_svm.osvw.length;
> + else
> + *val = v->arch.hvm_svm.osvw.status;
> + }
> + /* Writes are ignored */
> +
> + return 0;
> +}
> +
> static int svm_cpu_up(void)
> {
> uint64_t msr_content;
> @@ -1094,6 +1176,9 @@ static int svm_cpu_up(void)
> }
> #endif
>
> + /* Initialize OSVW bits to be used by guests */
> + svm_host_osvw_init();
> +
> return 0;
> }
>
> @@ -1104,6 +1189,8 @@ struct hvm_function_table * __init start
> if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) )
> return NULL;
>
> + svm_host_osvw_reset();
> +
> if ( svm_cpu_up() )
> {
> printk("SVM: failed to initialise.\n");
> @@ -1388,6 +1475,13 @@ static int svm_msr_read_intercept(unsign
> vpmu_do_rdmsr(msr, msr_content);
> break;
>
> + case MSR_AMD_OSVW_ID_LENGTH:
> + case MSR_AMD_OSVW_STATUS:
> + ret = svm_handle_osvw(v, msr, msr_content, 1);
> + if ( ret < 0 )
> + goto gpf;
> + break;
> +
> default:
> ret = nsvm_rdmsr(v, msr, msr_content);
> if ( ret < 0 )
> @@ -1512,6 +1606,13 @@ static int svm_msr_write_intercept(unsig
> */
> break;
>
> + case MSR_AMD_OSVW_ID_LENGTH:
> + case MSR_AMD_OSVW_STATUS:
> + ret = svm_handle_osvw(v, msr, &msr_content, 0);
> + if ( ret < 0 )
> + goto gpf;
> + break;
> +
> default:
> ret = nsvm_wrmsr(v, msr, msr_content);
> if ( ret < 0 )
> diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/microcode.c
> --- a/xen/arch/x86/microcode.c Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/microcode.c Wed Feb 01 15:56:47 2012 +0100
> @@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co
> info->error = 0;
> info->cpu = cpumask_first(&cpu_online_map);
>
> + if ( microcode_ops->start_update )
> + {
> + ret = microcode_ops->start_update();
> + if ( ret != 0 )
> + {
> + xfree(info);
> + return ret;
> + }
> + }
> +
> return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
> }
>
> diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/microcode_amd.c
> --- a/xen/arch/x86/microcode_amd.c Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/microcode_amd.c Wed Feb 01 15:56:47 2012 +0100
> @@ -25,6 +25,7 @@
> #include <asm/msr.h>
> #include <asm/processor.h>
> #include <asm/microcode.h>
> +#include <asm/hvm/svm/svm.h>
>
> struct equiv_cpu_entry {
> uint32_t installed_cpu;
> @@ -287,7 +288,8 @@ static int cpu_request_microcode(int cpu
> {
> printk(KERN_ERR "microcode: error! Wrong "
> "microcode patch file magic\n");
> - return -EINVAL;
> + error = -EINVAL;
> + goto out;
> }
>
> mc_amd = xmalloc(struct microcode_amd);
> @@ -295,7 +297,8 @@ static int cpu_request_microcode(int cpu
> {
> printk(KERN_ERR "microcode: error! "
> "Can not allocate memory for microcode patch\n");
> - return -ENOMEM;
> + error = -ENOMEM;
> + goto out;
> }
>
> error = install_equiv_cpu_table(mc_amd, buf, &offset);
> @@ -303,7 +306,8 @@ static int cpu_request_microcode(int cpu
> {
> xfree(mc_amd);
> printk(KERN_ERR "microcode: installing equivalent cpu table
> failed\n");
> - return -EINVAL;
> + error = -EINVAL;
> + goto out;
> }
>
> mc_old = uci->mc.mc_amd;
> @@ -337,13 +341,19 @@ static int cpu_request_microcode(int cpu
> /* On success keep the microcode patch for
> * re-apply on resume.
> */
> - if (error == 1)
> + if ( error == 1 )
> {
> xfree(mc_old);
> - return 0;
> + error = 0;
> + }
> + else
> + {
> + xfree(mc_amd);
> + uci->mc.mc_amd = mc_old;
> }
> - xfree(mc_amd);
> - uci->mc.mc_amd = mc_old;
> +
> + out:
> + svm_host_osvw_init();
>
> return error;
> }
> @@ -395,11 +405,19 @@ err1:
> return -ENOMEM;
> }
>
> +static int start_update(void)
> +{
> + svm_host_osvw_reset();
> +
> + return 0;
> +}
> +
> static const struct microcode_ops microcode_amd_ops = {
> .microcode_resume_match = microcode_resume_match,
> .cpu_request_microcode = cpu_request_microcode,
> .collect_cpu_info = collect_cpu_info,
> .apply_microcode = apply_microcode,
> + .start_update = start_update,
> };
>
> static __init int microcode_init_amd(void)
> diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/platform_hypercall.c
> --- a/xen/arch/x86/platform_hypercall.c Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/platform_hypercall.c Wed Feb 01 15:56:47 2012 +0100
> @@ -166,7 +166,21 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
> break;
>
> guest_from_compat_handle(data, op->u.microcode.data);
> +
> + /*
> + * alloc_vpcu() will access data which is modified during
> + * microcode update
> + */
> + while ( !spin_trylock(&vcpu_alloc_lock) )
> + if ( hypercall_preempt_check() )
> + {
> + ret = hypercall_create_continuation(
> + __HYPERVISOR_platform_op, "h", u_xenpf_op);
> + goto out;
> + }
> +
> ret = microcode_update(data, op->u.microcode.length);
> + spin_unlock(&vcpu_alloc_lock);
> }
> break;
>
> diff -r e2722b24dc09 -r 789bbf4f478b xen/common/domctl.c
> --- a/xen/common/domctl.c Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/common/domctl.c Wed Feb 01 15:56:47 2012 +0100
> @@ -29,6 +29,7 @@
> #include <xsm/xsm.h>
>
> static DEFINE_SPINLOCK(domctl_lock);
> +DEFINE_SPINLOCK(vcpu_alloc_lock);
>
> int cpumask_to_xenctl_cpumap(
> struct xenctl_cpumap *xenctl_cpumap, const cpumask_t *cpumask)
> @@ -502,6 +503,18 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
> /* Needed, for example, to ensure writable p.t. state is synced. */
> domain_pause(d);
>
> + /*
> + * Certain operations (e.g. CPU microcode updates) modify data
> which is
> + * used during VCPU allocation/initialization
> + */
> + while ( !spin_trylock(&vcpu_alloc_lock) )
> + if ( hypercall_preempt_check() )
> + {
> + ret = hypercall_create_continuation(
> + __HYPERVISOR_domctl, "h", u_domctl);
> + goto maxvcpu_out_novcpulock;
> + }
> +
> /* We cannot reduce maximum VCPUs. */
> ret = -EINVAL;
> if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) )
> @@ -551,6 +564,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
> ret = 0;
>
> maxvcpu_out:
> + spin_unlock(&vcpu_alloc_lock);
> +
> + maxvcpu_out_novcpulock:
> domain_unpause(d);
> rcu_unlock_domain(d);
> }
> diff -r e2722b24dc09 -r 789bbf4f478b xen/include/asm-x86/hvm/svm/svm.h
> --- a/xen/include/asm-x86/hvm/svm/svm.h Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/asm-x86/hvm/svm/svm.h Wed Feb 01 15:56:47 2012 +0100
> @@ -98,4 +98,7 @@ extern u32 svm_feature_flags;
> ~TSC_RATIO_RSVD_BITS )
> #define vcpu_tsc_ratio(v) TSC_RATIO((v)->domain->arch.tsc_khz, cpu_khz)
>
> +extern void svm_host_osvw_reset(void);
> +extern void svm_host_osvw_init(void);
> +
> #endif /* __ASM_X86_HVM_SVM_H__ */
> diff -r e2722b24dc09 -r 789bbf4f478b xen/include/asm-x86/hvm/svm/vmcb.h
> --- a/xen/include/asm-x86/hvm/svm/vmcb.h Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/asm-x86/hvm/svm/vmcb.h Wed Feb 01 15:56:47 2012 +0100
> @@ -516,6 +516,12 @@ struct arch_svm_struct {
>
> /* AMD lightweight profiling MSR */
> uint64_t guest_lwp_cfg;
> +
> + /* OSVW MSRs */
> + struct {
> + u64 length;
> + u64 status;
> + } osvw;
> };
>
> struct vmcb_struct *alloc_vmcb(void);
> diff -r e2722b24dc09 -r 789bbf4f478b xen/include/asm-x86/microcode.h
> --- a/xen/include/asm-x86/microcode.h Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/asm-x86/microcode.h Wed Feb 01 15:56:47 2012 +0100
> @@ -11,6 +11,7 @@ struct microcode_ops {
> int (*cpu_request_microcode)(int cpu, const void *buf, size_t size);
> int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
> int (*apply_microcode)(int cpu);
> + int (*start_update)(void);
> };
>
> struct cpu_signature {
> diff -r e2722b24dc09 -r 789bbf4f478b xen/include/xen/domain.h
> --- a/xen/include/xen/domain.h Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/xen/domain.h Wed Feb 01 15:56:47 2012 +0100
> @@ -69,6 +69,7 @@ void arch_dump_domain_info(struct domain
>
> void arch_vcpu_reset(struct vcpu *v);
>
> +extern spinlock_t vcpu_alloc_lock;
> bool_t domctl_lock_acquire(void);
> void domctl_lock_release(void);
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |