|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v5] x86/AMD: Add support for AMD's OSVW feature in guests
>>> On 06.02.12 at 18:39, Boris Ostrovsky <boris.ostrovsky@xxxxxxx> wrote:
> # HG changeset patch
> # User Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
> # Date 1328549858 -3600
> # Node ID 3cf8ffd0ab883dd09f943f4d8fb50f5cc1f04cd5
> # Parent e2722b24dc0962de37215320b05d1bb7c4c42864
> x86/AMD: Add support for AMD's OSVW feature in guests.
>
> In some cases guests should not provide workarounds for errata even when the
> physical processor is affected. For example, because of erratum 400 on
> family
> 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
> going to idle in order to avoid getting stuck in a non-C0 state. This is not
> necessary: HLT and IO instructions are intercepted and therefore there is no
> reason for erratum 400 workaround in the guest.
>
> This patch allows us to present a guest with certain errata as fixed,
> regardless of the state of actual hardware.
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
> Acked-by: Christoph Egger <Christoph.Egger@xxxxxxx>
In the form below/attached (integration with boot time microcode
loading fixed and trailing white space removed)
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
-- Jan
In some cases guests should not provide workarounds for errata even when the
physical processor is affected. For example, because of erratum 400 on family
10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
going to idle in order to avoid getting stuck in a non-C0 state. This is not
necessary: HLT and IO instructions are intercepted and therefore there is no
reason for erratum 400 workaround in the guest.
This patch allows us to present a guest with certain errata as fixed,
regardless of the state of actual hardware.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
Acked-by: Christoph Egger <Christoph.Egger@xxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
bitmaskof(X86_FEATURE_SSE4A) |
bitmaskof(X86_FEATURE_MISALIGNSSE) |
bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
+ bitmaskof(X86_FEATURE_OSVW) |
bitmaskof(X86_FEATURE_XOP) |
bitmaskof(X86_FEATURE_FMA4) |
bitmaskof(X86_FEATURE_TBM) |
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -83,6 +83,10 @@ static DEFINE_PER_CPU_READ_MOSTLY(void *
static bool_t amd_erratum383_found __read_mostly;
+/* OSVW bits */
+static uint64_t osvw_length, osvw_status;
+static DEFINE_SPINLOCK(osvw_lock);
+
void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len)
{
struct vcpu *curr = current;
@@ -902,6 +906,69 @@ static void svm_do_resume(struct vcpu *v
reset_stack_and_jump(svm_asm_do_resume);
}
+static void svm_guest_osvw_init(struct vcpu *vcpu)
+{
+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
+ return;
+
+ /*
+ * Guests should see errata 400 and 415 as fixed (assuming that
+ * HLT and IO instructions are intercepted).
+ */
+ vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ? osvw_length : 3;
+ vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL);
+
+ /*
+ * By increasing VCPU's osvw.length to 3 we are telling the guest that
+ * all osvw.status bits inside that length, including bit 0 (which is
+ * reserved for erratum 298), are valid. However, if host processor's
+ * osvw_len is 0 then osvw_status[0] carries no information. We need to
+ * be conservative here and therefore we tell the guest that erratum 298
+ * is present (because we really don't know).
+ */
+ if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 )
+ vcpu->arch.hvm_svm.osvw.status |= 1;
+}
+
+void svm_host_osvw_reset()
+{
+ spin_lock(&osvw_lock);
+
+ osvw_length = 64; /* One register (MSRC001_0141) worth of errata */
+ osvw_status = 0;
+
+ spin_unlock(&osvw_lock);
+}
+
+void svm_host_osvw_init()
+{
+ spin_lock(&osvw_lock);
+
+ /*
+ * Get OSVW bits. If bits are not the same on different processors then
+ * choose the worst case (i.e. if erratum is present on one processor and
+ * not on another assume that the erratum is present everywhere).
+ */
+ if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) )
+ {
+ uint64_t len, status;
+
+ if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
+ rdmsr_safe(MSR_AMD_OSVW_STATUS, status) )
+ len = status = 0;
+
+ if (len < osvw_length)
+ osvw_length = len;
+
+ osvw_status |= status;
+ osvw_status &= (1ULL << osvw_length) - 1;
+ }
+ else
+ osvw_length = osvw_status = 0;
+
+ spin_unlock(&osvw_lock);
+}
+
static int svm_domain_initialise(struct domain *d)
{
return 0;
@@ -930,6 +997,9 @@ static int svm_vcpu_initialise(struct vc
}
vpmu_initialise(v);
+
+ svm_guest_osvw_init(v);
+
return 0;
}
@@ -1044,6 +1114,27 @@ static void svm_init_erratum_383(struct
}
}
+static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val, bool_t
read)
+{
+ uint eax, ebx, ecx, edx;
+
+ /* Guest OSVW support */
+ hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+ if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) )
+ return -1;
+
+ if ( read )
+ {
+ if (msr == MSR_AMD_OSVW_ID_LENGTH)
+ *val = v->arch.hvm_svm.osvw.length;
+ else
+ *val = v->arch.hvm_svm.osvw.status;
+ }
+ /* Writes are ignored */
+
+ return 0;
+}
+
static int svm_cpu_up(void)
{
uint64_t msr_content;
@@ -1094,6 +1185,9 @@ static int svm_cpu_up(void)
}
#endif
+ /* Initialize OSVW bits to be used by guests */
+ svm_host_osvw_init();
+
return 0;
}
@@ -1104,6 +1198,8 @@ struct hvm_function_table * __init start
if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) )
return NULL;
+ svm_host_osvw_reset();
+
if ( svm_cpu_up() )
{
printk("SVM: failed to initialise.\n");
@@ -1388,6 +1484,13 @@ static int svm_msr_read_intercept(unsign
vpmu_do_rdmsr(msr, msr_content);
break;
+ case MSR_AMD_OSVW_ID_LENGTH:
+ case MSR_AMD_OSVW_STATUS:
+ ret = svm_handle_osvw(v, msr, msr_content, 1);
+ if ( ret < 0 )
+ goto gpf;
+ break;
+
default:
ret = nsvm_rdmsr(v, msr, msr_content);
if ( ret < 0 )
@@ -1512,6 +1615,13 @@ static int svm_msr_write_intercept(unsig
*/
break;
+ case MSR_AMD_OSVW_ID_LENGTH:
+ case MSR_AMD_OSVW_STATUS:
+ ret = svm_handle_osvw(v, msr, &msr_content, 0);
+ if ( ret < 0 )
+ goto gpf;
+ break;
+
default:
ret = nsvm_wrmsr(v, msr, msr_content);
if ( ret < 0 )
--- a/xen/arch/x86/microcode.c
+++ b/xen/arch/x86/microcode.c
@@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co
info->error = 0;
info->cpu = cpumask_first(&cpu_online_map);
+ if ( microcode_ops->start_update )
+ {
+ ret = microcode_ops->start_update();
+ if ( ret != 0 )
+ {
+ xfree(info);
+ return ret;
+ }
+ }
+
return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
}
@@ -240,6 +250,12 @@ static int __init microcode_init(void)
if ( !data )
return -ENOMEM;
+ if ( microcode_ops->start_update && microcode_ops->start_update() != 0 )
+ {
+ ucode_mod_map(NULL);
+ return 0;
+ }
+
softirq_tasklet_init(&tasklet, _do_microcode_update, (unsigned long)data);
for_each_online_cpu ( cpu )
--- a/xen/arch/x86/microcode_amd.c
+++ b/xen/arch/x86/microcode_amd.c
@@ -25,6 +25,7 @@
#include <asm/msr.h>
#include <asm/processor.h>
#include <asm/microcode.h>
+#include <asm/hvm/svm/svm.h>
struct equiv_cpu_entry {
uint32_t installed_cpu;
@@ -71,6 +72,7 @@ struct mpbhdr {
/* serialize access to the physical write */
static DEFINE_SPINLOCK(microcode_update_lock);
+/* See comment in start_update() for cases when this routine fails */
static int collect_cpu_info(int cpu, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data[cpu];
@@ -287,7 +289,8 @@ static int cpu_request_microcode(int cpu
{
printk(KERN_ERR "microcode: error! Wrong "
"microcode patch file magic\n");
- return -EINVAL;
+ error = -EINVAL;
+ goto out;
}
mc_amd = xmalloc(struct microcode_amd);
@@ -295,7 +298,8 @@ static int cpu_request_microcode(int cpu
{
printk(KERN_ERR "microcode: error! "
"Can not allocate memory for microcode patch\n");
- return -ENOMEM;
+ error = -ENOMEM;
+ goto out;
}
error = install_equiv_cpu_table(mc_amd, buf, &offset);
@@ -303,7 +307,8 @@ static int cpu_request_microcode(int cpu
{
xfree(mc_amd);
printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
- return -EINVAL;
+ error = -EINVAL;
+ goto out;
}
mc_old = uci->mc.mc_amd;
@@ -337,13 +342,19 @@ static int cpu_request_microcode(int cpu
/* On success keep the microcode patch for
* re-apply on resume.
*/
- if (error == 1)
+ if ( error == 1 )
{
xfree(mc_old);
- return 0;
+ error = 0;
+ }
+ else
+ {
+ xfree(mc_amd);
+ uci->mc.mc_amd = mc_old;
}
- xfree(mc_amd);
- uci->mc.mc_amd = mc_old;
+
+ out:
+ svm_host_osvw_init();
return error;
}
@@ -395,11 +406,28 @@ err1:
return -ENOMEM;
}
+static int start_update(void)
+{
+ /*
+ * We assume here that svm_host_osvw_init() will be called on each cpu
(from
+ * cpu_request_microcode()).
+ *
+ * Note that if collect_cpu_info() returns an error then
+ * cpu_request_microcode() will not invoked thus leaving OSVW bits not
+ * updated. Currently though collect_cpu_info() will not fail on processors
+ * supporting OSVW so we will not deal with this possibility.
+ */
+ svm_host_osvw_reset();
+
+ return 0;
+}
+
static const struct microcode_ops microcode_amd_ops = {
.microcode_resume_match = microcode_resume_match,
.cpu_request_microcode = cpu_request_microcode,
.collect_cpu_info = collect_cpu_info,
.apply_microcode = apply_microcode,
+ .start_update = start_update,
};
static __init int microcode_init_amd(void)
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -166,7 +166,21 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
break;
guest_from_compat_handle(data, op->u.microcode.data);
+
+ /*
+ * alloc_vpcu() will access data which is modified during
+ * microcode update
+ */
+ while ( !spin_trylock(&vcpu_alloc_lock) )
+ if ( hypercall_preempt_check() )
+ {
+ ret = hypercall_create_continuation(
+ __HYPERVISOR_platform_op, "h", u_xenpf_op);
+ goto out;
+ }
+
ret = microcode_update(data, op->u.microcode.length);
+ spin_unlock(&vcpu_alloc_lock);
}
break;
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -29,6 +29,7 @@
#include <xsm/xsm.h>
static DEFINE_SPINLOCK(domctl_lock);
+DEFINE_SPINLOCK(vcpu_alloc_lock);
int cpumask_to_xenctl_cpumap(
struct xenctl_cpumap *xenctl_cpumap, const cpumask_t *cpumask)
@@ -506,6 +507,18 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
/* Needed, for example, to ensure writable p.t. state is synced. */
domain_pause(d);
+ /*
+ * Certain operations (e.g. CPU microcode updates) modify data which is
+ * used during VCPU allocation/initialization
+ */
+ while ( !spin_trylock(&vcpu_alloc_lock) )
+ if ( hypercall_preempt_check() )
+ {
+ ret = hypercall_create_continuation(
+ __HYPERVISOR_domctl, "h", u_domctl);
+ goto maxvcpu_out_novcpulock;
+ }
+
/* We cannot reduce maximum VCPUs. */
ret = -EINVAL;
if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) )
@@ -555,6 +568,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
ret = 0;
maxvcpu_out:
+ spin_unlock(&vcpu_alloc_lock);
+
+ maxvcpu_out_novcpulock:
domain_unpause(d);
rcu_unlock_domain(d);
}
--- a/xen/include/asm-x86/hvm/svm/svm.h
+++ b/xen/include/asm-x86/hvm/svm/svm.h
@@ -98,4 +98,7 @@ extern u32 svm_feature_flags;
~TSC_RATIO_RSVD_BITS )
#define vcpu_tsc_ratio(v) TSC_RATIO((v)->domain->arch.tsc_khz, cpu_khz)
+extern void svm_host_osvw_reset(void);
+extern void svm_host_osvw_init(void);
+
#endif /* __ASM_X86_HVM_SVM_H__ */
--- a/xen/include/asm-x86/hvm/svm/vmcb.h
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -516,6 +516,12 @@ struct arch_svm_struct {
/* AMD lightweight profiling MSR */
uint64_t guest_lwp_cfg;
+
+ /* OSVW MSRs */
+ struct {
+ u64 length;
+ u64 status;
+ } osvw;
};
struct vmcb_struct *alloc_vmcb(void);
--- a/xen/include/asm-x86/microcode.h
+++ b/xen/include/asm-x86/microcode.h
@@ -11,6 +11,7 @@ struct microcode_ops {
int (*cpu_request_microcode)(int cpu, const void *buf, size_t size);
int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
int (*apply_microcode)(int cpu);
+ int (*start_update)(void);
};
struct cpu_signature {
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -69,6 +69,7 @@ void arch_dump_domain_info(struct domain
void arch_vcpu_reset(struct vcpu *v);
+extern spinlock_t vcpu_alloc_lock;
bool_t domctl_lock_acquire(void);
void domctl_lock_release(void);
Attachment:
AMD-OSVW-guest _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |