 
	
| [Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v4] x86/AMD: Add support for AMD's OSVW feature in guests
 >>> On 01.02.12 at 17:30, Boris Ostrovsky <boris.ostrovsky@xxxxxxx> wrote:
> # HG changeset patch
> # User Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
> # Date 1328108207 -3600
> # Node ID 789bbf4f478b0e81d71240a1f1147ef66a7c8848
> # Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
> x86/AMD: Add support for AMD's OSVW feature in guests.
> 
> In some cases guests should not provide workarounds for errata even when the
> physical processor is affected. For example, because of erratum 400 on 
> family
> 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
> going to idle in order to avoid getting stuck in a non-C0 state. This is not
> necessary: HLT and IO instructions are intercepted and therefore there is no
> reason for erratum 400 workaround in the guest.
> 
> This patch allows us to present a guest with certain errata as fixed,
> regardless of the state of actual hardware.
As I was about to apply this to my local tree to give it a try, I had
to realize that the microcode integration is still not correct: There
is (at least from an abstract perspective) no guarantee for
cpu_request_microcode() to be called on all CPUs, yet you want
svm_host_osvw_init() to be re-run on all of them. If you choose
to not deal with this in a formally correct way, it should be stated
so in a code comment (to lower the risk of surprises when someone
touches that code) that this is not possible in practice because
collect_cpu_info() won't currently fail for CPUs of interest.
Further you need to serialize against onlining of CPUs while in
svm_host_osvw_reset(), and similarly protect the updating of the
globals in svm_host_osvw_init(). Probably a private locked shared
between those two functions is the best route to go here.
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
> Acked-by: Christoph Egger <Christoph.Egger@xxxxxxx>
> 
> diff -r e2722b24dc09 -r 789bbf4f478b tools/libxc/xc_cpuid_x86.c
> --- a/tools/libxc/xc_cpuid_x86.c      Thu Jan 26 17:43:31 2012 +0000
> +++ b/tools/libxc/xc_cpuid_x86.c      Wed Feb 01 15:56:47 2012 +0100
> @@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
>                      bitmaskof(X86_FEATURE_SSE4A) |
>                      bitmaskof(X86_FEATURE_MISALIGNSSE) |
>                      bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
> +                    bitmaskof(X86_FEATURE_OSVW) |
>                      bitmaskof(X86_FEATURE_XOP) |
>                      bitmaskof(X86_FEATURE_FMA4) |
>                      bitmaskof(X86_FEATURE_TBM) |
> diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/hvm/svm/svm.c
> --- a/xen/arch/x86/hvm/svm/svm.c      Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/hvm/svm/svm.c      Wed Feb 01 15:56:47 2012 +0100
> @@ -83,6 +83,9 @@ static DEFINE_PER_CPU_READ_MOSTLY(void *
>  
>  static bool_t amd_erratum383_found __read_mostly;
>  
> +/* OSVW bits */
> +static uint64_t osvw_length, osvw_status;
> +
>  void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len)
>  {
>      struct vcpu *curr = current;
> @@ -902,6 +905,61 @@ static void svm_do_resume(struct vcpu *v
>      reset_stack_and_jump(svm_asm_do_resume);
>  }
>  
> +static void svm_guest_osvw_init(struct vcpu *vcpu)
> +{
> +    if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
> +        return;
> +
> +    /*
> +     * Guests should see errata 400 and 415 as fixed (assuming that
> +     * HLT and IO instructions are intercepted).
> +     */
> +    vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ? osvw_length : 3;
> +    vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL);
> +
> +    /*
> +     * By increasing VCPU's osvw.length to 3 we are telling the guest that
> +     * all osvw.status bits inside that length, including bit 0 (which is
> +     * reserved for erratum 298), are valid. However, if host processor's
> +     * osvw_len is 0 then osvw_status[0] carries no information. We need to
> +     * be conservative here and therefore we tell the guest that erratum 
> 298
> +     * is present (because we really don't know).
> +     */
> +    if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 )
> +        vcpu->arch.hvm_svm.osvw.status |= 1;
> +}
> +
> +void svm_host_osvw_reset()
> +{
> +    osvw_length = 64; /* One register (MSRC001_0141) worth of errata */
> +    osvw_status = 0;
> +}
> +
> +void svm_host_osvw_init()
> +{
> +    /* 
> +     * Get OSVW bits. If bits are not the same on different processors then
> +     * choose the worst case (i.e. if erratum is present on one processor 
> and
> +     * not on another assume that the erratum is present everywhere).
> +     */
> +    if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) )
> +    {
> +        uint64_t len, status;
> +         
> +        if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
> +             rdmsr_safe(MSR_AMD_OSVW_STATUS, status) )
> +            len = status = 0;
> +
> +        if (len < osvw_length)
> +            osvw_length = len;
> +
> +        osvw_status |= status;
> +        osvw_status &= (1ULL << osvw_length) - 1;
> +    }
> +    else
> +        osvw_length = osvw_status = 0;
> +}
> +
>  static int svm_domain_initialise(struct domain *d)
>  {
>      return 0;
> @@ -930,6 +988,9 @@ static int svm_vcpu_initialise(struct vc
>      }
>  
>      vpmu_initialise(v);
> +
> +    svm_guest_osvw_init(v);
> +
>      return 0;
>  }
>  
> @@ -1044,6 +1105,27 @@ static void svm_init_erratum_383(struct 
>      }
>  }
>  
> +static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val, 
> bool_t read)
> +{
> +    uint eax, ebx, ecx, edx;
> +     
> +    /* Guest OSVW support */
> +    hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
> +    if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) )
> +        return -1;
> +
> +    if ( read )
> +    {
> +        if (msr == MSR_AMD_OSVW_ID_LENGTH)
> +            *val = v->arch.hvm_svm.osvw.length;
> +        else
> +            *val = v->arch.hvm_svm.osvw.status;
> +    } 
> +    /* Writes are ignored */
> +
> +    return 0;
> +}
> +
>  static int svm_cpu_up(void)
>  {
>      uint64_t msr_content;
> @@ -1094,6 +1176,9 @@ static int svm_cpu_up(void)
>      }
>  #endif
>  
> +    /* Initialize OSVW bits to be used by guests */
> +    svm_host_osvw_init();
> +
>      return 0;
>  }
>  
> @@ -1104,6 +1189,8 @@ struct hvm_function_table * __init start
>      if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) )
>          return NULL;
>  
> +    svm_host_osvw_reset();
> +
>      if ( svm_cpu_up() )
>      {
>          printk("SVM: failed to initialise.\n");
> @@ -1388,6 +1475,13 @@ static int svm_msr_read_intercept(unsign
>          vpmu_do_rdmsr(msr, msr_content);
>          break;
>  
> +    case MSR_AMD_OSVW_ID_LENGTH:
> +    case MSR_AMD_OSVW_STATUS:
> +        ret = svm_handle_osvw(v, msr, msr_content, 1);
> +        if ( ret < 0 )
> +            goto gpf;
> +        break;
> +
>      default:
>          ret = nsvm_rdmsr(v, msr, msr_content);
>          if ( ret < 0 )
> @@ -1512,6 +1606,13 @@ static int svm_msr_write_intercept(unsig
>           */
>          break;
>  
> +    case MSR_AMD_OSVW_ID_LENGTH:
> +    case MSR_AMD_OSVW_STATUS:
> +        ret = svm_handle_osvw(v, msr, &msr_content, 0);
> +        if ( ret < 0 )
> +            goto gpf;
> +        break;
> +
>      default:
>          ret = nsvm_wrmsr(v, msr, msr_content);
>          if ( ret < 0 )
> diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/microcode.c
> --- a/xen/arch/x86/microcode.c        Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/microcode.c        Wed Feb 01 15:56:47 2012 +0100
> @@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co
>      info->error = 0;
>      info->cpu = cpumask_first(&cpu_online_map);
>  
> +    if ( microcode_ops->start_update )
> +    {
> +        ret = microcode_ops->start_update();
> +        if ( ret != 0 )
> +        {
> +            xfree(info);
> +            return ret;
> +        }
> +    }
> +
>      return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
>  }
>  
> diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/microcode_amd.c
> --- a/xen/arch/x86/microcode_amd.c    Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/microcode_amd.c    Wed Feb 01 15:56:47 2012 +0100
> @@ -25,6 +25,7 @@
>  #include <asm/msr.h>
>  #include <asm/processor.h>
>  #include <asm/microcode.h>
> +#include <asm/hvm/svm/svm.h>
>  
>  struct equiv_cpu_entry {
>      uint32_t installed_cpu;
> @@ -287,7 +288,8 @@ static int cpu_request_microcode(int cpu
>      {
>          printk(KERN_ERR "microcode: error! Wrong "
>                 "microcode patch file magic\n");
> -        return -EINVAL;
> +        error = -EINVAL;
> +        goto out;
>      }
>  
>      mc_amd = xmalloc(struct microcode_amd);
> @@ -295,7 +297,8 @@ static int cpu_request_microcode(int cpu
>      {
>          printk(KERN_ERR "microcode: error! "
>                 "Can not allocate memory for microcode patch\n");
> -        return -ENOMEM;
> +        error = -ENOMEM;
> +        goto out;
>      }
>  
>      error = install_equiv_cpu_table(mc_amd, buf, &offset);
> @@ -303,7 +306,8 @@ static int cpu_request_microcode(int cpu
>      {
>          xfree(mc_amd);
>          printk(KERN_ERR "microcode: installing equivalent cpu table 
> failed\n");
> -        return -EINVAL;
> +        error = -EINVAL;
> +        goto out;
>      }
>  
>      mc_old = uci->mc.mc_amd;
> @@ -337,13 +341,19 @@ static int cpu_request_microcode(int cpu
>      /* On success keep the microcode patch for
>       * re-apply on resume.
>       */
> -    if (error == 1)
> +    if ( error == 1 )
>      {
>          xfree(mc_old);
> -        return 0;
> +        error = 0;
> +    } 
> +    else 
> +    {
> +        xfree(mc_amd);
> +        uci->mc.mc_amd = mc_old;
>      }
> -    xfree(mc_amd);
> -    uci->mc.mc_amd = mc_old;
> +
> +  out:
> +    svm_host_osvw_init();
>  
>      return error;
>  }
> @@ -395,11 +405,19 @@ err1:
>      return -ENOMEM;
>  }
>  
> +static int start_update(void) 
> +{
> +    svm_host_osvw_reset();
> +
> +    return 0;
> +}
> +
>  static const struct microcode_ops microcode_amd_ops = {
>      .microcode_resume_match           = microcode_resume_match,
>      .cpu_request_microcode            = cpu_request_microcode,
>      .collect_cpu_info                 = collect_cpu_info,
>      .apply_microcode                  = apply_microcode,
> +    .start_update                     = start_update,
>  };
>  
>  static __init int microcode_init_amd(void)
> diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/platform_hypercall.c
> --- a/xen/arch/x86/platform_hypercall.c       Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/platform_hypercall.c       Wed Feb 01 15:56:47 2012 +0100
> @@ -166,7 +166,21 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
>              break;
>  
>          guest_from_compat_handle(data, op->u.microcode.data);
> +
> +        /* 
> +         * alloc_vpcu() will access data which is modified during
> +         * microcode update
> +         */
> +        while ( !spin_trylock(&vcpu_alloc_lock) )
> +            if ( hypercall_preempt_check() )
> +            {
> +                ret = hypercall_create_continuation(
> +                    __HYPERVISOR_platform_op, "h", u_xenpf_op);
> +                goto out;
> +            }
> +
>          ret = microcode_update(data, op->u.microcode.length);
> +        spin_unlock(&vcpu_alloc_lock);
>      }
>      break;
>  
> diff -r e2722b24dc09 -r 789bbf4f478b xen/common/domctl.c
> --- a/xen/common/domctl.c     Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/common/domctl.c     Wed Feb 01 15:56:47 2012 +0100
> @@ -29,6 +29,7 @@
>  #include <xsm/xsm.h>
>  
>  static DEFINE_SPINLOCK(domctl_lock);
> +DEFINE_SPINLOCK(vcpu_alloc_lock);
>  
>  int cpumask_to_xenctl_cpumap(
>      struct xenctl_cpumap *xenctl_cpumap, const cpumask_t *cpumask)
> @@ -502,6 +503,18 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
>          /* Needed, for example, to ensure writable p.t. state is synced. */
>          domain_pause(d);
>  
> +        /* 
> +         * Certain operations (e.g. CPU microcode updates) modify data 
> which is
> +         * used during VCPU allocation/initialization
> +         */
> +        while ( !spin_trylock(&vcpu_alloc_lock) )
> +            if ( hypercall_preempt_check() )
> +            {
> +                ret =  hypercall_create_continuation(
> +                    __HYPERVISOR_domctl, "h", u_domctl);
> +                goto maxvcpu_out_novcpulock;
> +            }
> +
>          /* We cannot reduce maximum VCPUs. */
>          ret = -EINVAL;
>          if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) )
> @@ -551,6 +564,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
>          ret = 0;
>  
>      maxvcpu_out:
> +        spin_unlock(&vcpu_alloc_lock);
> +
> +    maxvcpu_out_novcpulock:
>          domain_unpause(d);
>          rcu_unlock_domain(d);
>      }
> diff -r e2722b24dc09 -r 789bbf4f478b xen/include/asm-x86/hvm/svm/svm.h
> --- a/xen/include/asm-x86/hvm/svm/svm.h       Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/asm-x86/hvm/svm/svm.h       Wed Feb 01 15:56:47 2012 +0100
> @@ -98,4 +98,7 @@ extern u32 svm_feature_flags;
>                                    ~TSC_RATIO_RSVD_BITS )
>  #define vcpu_tsc_ratio(v)       TSC_RATIO((v)->domain->arch.tsc_khz, cpu_khz)
>  
> +extern void svm_host_osvw_reset(void);
> +extern void svm_host_osvw_init(void);
> +
>  #endif /* __ASM_X86_HVM_SVM_H__ */
> diff -r e2722b24dc09 -r 789bbf4f478b xen/include/asm-x86/hvm/svm/vmcb.h
> --- a/xen/include/asm-x86/hvm/svm/vmcb.h      Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/asm-x86/hvm/svm/vmcb.h      Wed Feb 01 15:56:47 2012 +0100
> @@ -516,6 +516,12 @@ struct arch_svm_struct {
>      
>      /* AMD lightweight profiling MSR */
>      uint64_t guest_lwp_cfg;
> +
> +    /* OSVW MSRs */
> +    struct {
> +        u64 length;
> +        u64 status;
> +    } osvw;
>  };
>  
>  struct vmcb_struct *alloc_vmcb(void);
> diff -r e2722b24dc09 -r 789bbf4f478b xen/include/asm-x86/microcode.h
> --- a/xen/include/asm-x86/microcode.h Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/asm-x86/microcode.h Wed Feb 01 15:56:47 2012 +0100
> @@ -11,6 +11,7 @@ struct microcode_ops {
>      int (*cpu_request_microcode)(int cpu, const void *buf, size_t size);
>      int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
>      int (*apply_microcode)(int cpu);
> +    int (*start_update)(void);
>  };
>  
>  struct cpu_signature {
> diff -r e2722b24dc09 -r 789bbf4f478b xen/include/xen/domain.h
> --- a/xen/include/xen/domain.h        Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/xen/domain.h        Wed Feb 01 15:56:47 2012 +0100
> @@ -69,6 +69,7 @@ void arch_dump_domain_info(struct domain
>  
>  void arch_vcpu_reset(struct vcpu *v);
>  
> +extern spinlock_t vcpu_alloc_lock;
>  bool_t domctl_lock_acquire(void);
>  void domctl_lock_release(void);
>  
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
 
 
 | 
|  | Lists.xenproject.org is hosted with RackSpace, monitoring our |