[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH for 4.5] x86/viridian: Freeze time reference counter when domain is paused
> -----Original Message----- > From: Andrew Cooper > Sent: 13 October 2014 11:14 > To: Paul Durrant; xen-devel@xxxxxxxxxxxxxxxxxxxx > Cc: Keir (Xen.org); Jan Beulich > Subject: Re: [PATCH for 4.5] x86/viridian: Freeze time reference counter > when domain is paused > > On 10/10/14 10:07, Paul Durrant wrote: > > In XenServer system test it has become apparent that versions of Windows > > that make use of the time reference counter enlightenment cannot cope > with > > large jumps forward in the value read from the MSR. Specifically, > > suspending a very large domain took approx. 45 minutes to complete and > > when the domain was resumed it was discovered that the WMI (Windows > > Management Instrumentation) service had hung. > > It is possibly worth noting in the commit message that the OSes idea of > the suspend time happens at the SHEDOP_suspend time, while the VMs real > suspend time is after all the memory is complete, and the toolstack > calls DOMCTL_gethvmcontext. > > For a large VM (or slow network/storage), this is a delta of 45 minutes. > Ok. > > > > This patch adds code to freeze the value of the time reference counter > > on domain pause and 'thaw' it on domain unpause, but only if the domain is > > not shutting down. The absolute value of the counter is then saved in the > > viridian domain context record. This prevents the guest OS from > experiencing > > large jumps in the value of the MSR and has been shown to reliably fix > > the problem with WMI. > > > > Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx> > > Cc: Keir Fraser <keir@xxxxxxx> > > Cc: Jan Beulich <jbeulich@xxxxxxxx> > > Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> > > --- > > xen/arch/x86/hvm/viridian.c | 62 +++++++++++++++++++++++++-- > ----- > > xen/common/domain.c | 41 +++++++++++++++------ > > xen/include/asm-x86/hvm/hvm.h | 9 ++++- > > xen/include/asm-x86/hvm/viridian.h | 27 ++++++++++++++ > > xen/include/public/arch-x86/hvm/save.h | 1 + > > 5 files changed, 116 insertions(+), 24 deletions(-) > > > > diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c > > index 6726168..7d55363 100644 > > --- a/xen/arch/x86/hvm/viridian.c > > +++ b/xen/arch/x86/hvm/viridian.c > > @@ -289,6 +289,39 @@ int wrmsr_viridian_regs(uint32_t idx, uint64_t val) > > return 1; > > } > > > > +static int64_t raw_trc_val(struct domain *d) > > +{ > > + uint64_t tsc; > > + struct time_scale tsc_to_ns; > > + > > + tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d)); > > + > > + /* convert tsc to count of 100ns periods */ > > + set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul); > > + return scale_delta(tsc, &tsc_to_ns) / 100ul; > > +} > > + > > +void viridian_time_ref_count_freeze(struct domain *d) > > +{ > > + struct viridian_time_ref_count *trc; > > + > > + trc = &d->arch.hvm_domain.viridian.time_ref_count; > > + > > + if ( test_and_clear_bit(_TRC_running, &trc->flags) ) > > + trc->val = raw_trc_val(d) + trc->off; > > +} > > + > > +void viridian_time_ref_count_thaw(struct domain *d) > > +{ > > + struct viridian_time_ref_count *trc; > > + > > + trc = &d->arch.hvm_domain.viridian.time_ref_count; > > + > > + if ( !d->is_shutting_down && > > + !test_and_set_bit(_TRC_running, &trc->flags) ) > > + trc->off = (int64_t)trc->val - raw_trc_val(d); > > +} > > + > > int rdmsr_viridian_regs(uint32_t idx, uint64_t *val) > > { > > struct vcpu *v = current; > > @@ -348,18 +381,19 @@ int rdmsr_viridian_regs(uint32_t idx, uint64_t > *val) > > > > case VIRIDIAN_MSR_TIME_REF_COUNT: > > { > > - uint64_t tsc; > > - struct time_scale tsc_to_ns; > > + struct viridian_time_ref_count *trc; > > + > > + trc = &d->arch.hvm_domain.viridian.time_ref_count; > > > > if ( !(viridian_feature_mask(d) & HVMPV_time_ref_count) ) > > return 0; > > > > - perfc_incr(mshv_rdmsr_time_ref_count); > > - tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d)); > > + if ( !test_and_set_bit(_TRC_accessed, &trc->flags) ) > > + printk(XENLOG_G_INFO "d%d: VIRIDIAN MSR_TIME_REF_COUNT: > accessed\n", > > + d->domain_id); > > > > - /* convert tsc to count of 100ns periods */ > > - set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul); > > - *val = scale_delta(tsc, &tsc_to_ns) / 100ul; > > + perfc_incr(mshv_rdmsr_time_ref_count); > > + *val = raw_trc_val(d) + trc->off; > > break; > > } > > > > @@ -450,9 +484,10 @@ static int viridian_save_domain_ctxt(struct domain > *d, hvm_domain_context_t *h) > > if ( !is_viridian_domain(d) ) > > return 0; > > > > - ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw; > > - ctxt.guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw; > > - > > + ctxt.time_ref_count = d- > >arch.hvm_domain.viridian.time_ref_count.val; > > + ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw; > > + ctxt.guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw; > > + > > return (hvm_save_entry(VIRIDIAN_DOMAIN, 0, h, &ctxt) != 0); > > } > > > > @@ -460,11 +495,12 @@ static int viridian_load_domain_ctxt(struct > domain *d, hvm_domain_context_t *h) > > { > > struct hvm_viridian_domain_context ctxt; > > > > - if ( hvm_load_entry(VIRIDIAN_DOMAIN, h, &ctxt) != 0 ) > > + if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 ) > > return -EINVAL; > > > > - d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa; > > - d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id; > > + d->arch.hvm_domain.viridian.time_ref_count.val = > ctxt.time_ref_count; > > + d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa; > > + d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id; > > > > return 0; > > } > > diff --git a/xen/common/domain.c b/xen/common/domain.c > > index 190998c..af5dc82 100644 > > --- a/xen/common/domain.c > > +++ b/xen/common/domain.c > > @@ -43,6 +43,10 @@ > > #include <xen/trace.h> > > #include <xen/tmem.h> > > > > +#ifdef CONFIG_X86 > > +#include <asm/hvm/viridian.h> > > +#endif > > + > > /* Linux config option: propageted to domain0 */ > > /* xen_processor_pmbits: xen control Cx, Px, ... */ > > unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX; > > @@ -706,6 +710,11 @@ void domain_shutdown(struct domain *d, u8 > reason) > > v->paused_for_shutdown = 1; > > } > > > > +#ifdef CONFIG_X86 > > + if ( has_viridian_time_ref_count(d) ) > > + viridian_time_ref_count_freeze(d); > > +#endif > > + > > __domain_finalise_shutdown(d); > > > > spin_unlock(&d->shutdown_lock); > > @@ -925,32 +934,44 @@ int vcpu_unpause_by_systemcontroller(struct > vcpu *v) > > return 0; > > } > > > > -void domain_pause(struct domain *d) > > +void do_domain_pause(struct domain *d, bool_t nosync) > > { > > struct vcpu *v; > > > > - ASSERT(d != current->domain); > > - > > atomic_inc(&d->pause_count); > > > > for_each_vcpu( d, v ) > > - vcpu_sleep_sync(v); > > + if ( nosync ) > > + vcpu_sleep_nosync(v); > > + else > > + vcpu_sleep_sync(v); > > + > > +#ifdef CONFIG_X86 > > + if ( has_viridian_time_ref_count(d) ) > > + viridian_time_ref_count_freeze(d); > > +#endif > > } > > > > -void domain_pause_nosync(struct domain *d) > > +void domain_pause(struct domain *d) > > { > > - struct vcpu *v; > > - > > - atomic_inc(&d->pause_count); > > + ASSERT(d != current->domain); > > + do_domain_pause(d, 0); > > +} > > > > - for_each_vcpu( d, v ) > > - vcpu_sleep_nosync(v); > > +void domain_pause_nosync(struct domain *d) > > +{ > > + do_domain_pause(d, 1); > > } > > All this fiddling with a boolean nosync parameter might be neater done > in the same style as I did with __domain_pause_by_systemcontroller(), by > passing a function pointer. > Yeah - probably would be neater. I was in two minds about that. > > > > void domain_unpause(struct domain *d) > > { > > struct vcpu *v; > > > > +#ifdef CONFIG_X86 > > + if ( has_viridian_time_ref_count(d) ) > > + viridian_time_ref_count_thaw(d); > > +#endif > > + > > if ( atomic_dec_and_test(&d->pause_count) ) > > for_each_vcpu( d, v ) > > vcpu_wake(v); > > diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm- > x86/hvm/hvm.h > > index 0d94c48..2dda908 100644 > > --- a/xen/include/asm-x86/hvm/hvm.h > > +++ b/xen/include/asm-x86/hvm/hvm.h > > @@ -340,12 +340,19 @@ static inline unsigned long > hvm_get_shadow_gs_base(struct vcpu *v) > > return hvm_funcs.get_shadow_gs_base(v); > > } > > > > + > > +#define has_hvm_params(d) \ > > + ((d)->arch.hvm_domain.params != NULL) > > + > > #define viridian_feature_mask(d) \ > > - ((d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN]) > > + (has_hvm_params(d) && (d)- > >arch.hvm_domain.params[HVM_PARAM_VIRIDIAN]) > > The result of this expression is a boolean, not the viridian feature mask. > Dammit. > Furthermore, I don't think that has_hvm_params() is useful. Any HVM > domain will have params, and without an HVM check, looking into > d->arch.hvm_domain is invalid. > Not true, which is why I made the change. domain_pause() is called on an HVM domain before the array is allocated. Paul > ~Andrew > > > > > #define is_viridian_domain(d) \ > > (is_hvm_domain(d) && (viridian_feature_mask(d) & > HVMPV_base_freq)) > > > > +#define has_viridian_time_ref_count(d) \ > > + (is_viridian_domain(d) && (viridian_feature_mask(d) & > HVMPV_time_ref_count)) > > + > > void hvm_hypervisor_cpuid_leaf(uint32_t sub_idx, > > uint32_t *eax, uint32_t *ebx, > > uint32_t *ecx, uint32_t *edx); > > diff --git a/xen/include/asm-x86/hvm/viridian.h b/xen/include/asm- > x86/hvm/viridian.h > > index 496da33..4cab2e8 100644 > > --- a/xen/include/asm-x86/hvm/viridian.h > > +++ b/xen/include/asm-x86/hvm/viridian.h > > @@ -48,10 +48,24 @@ union viridian_hypercall_gpa > > } fields; > > }; > > > > +struct viridian_time_ref_count > > +{ > > + unsigned long flags; > > + > > +#define _TRC_accessed 0 > > +#define TRC_accessed (1 << _TRC_accessed) > > +#define _TRC_running 1 > > +#define TRC_running (1 << _TRC_running) > > + > > + uint64_t val; > > + int64_t off; > > +}; > > + > > struct viridian_domain > > { > > union viridian_guest_os_id guest_os_id; > > union viridian_hypercall_gpa hypercall_gpa; > > + struct viridian_time_ref_count time_ref_count; > > }; > > > > int > > @@ -75,4 +89,17 @@ rdmsr_viridian_regs( > > int > > viridian_hypercall(struct cpu_user_regs *regs); > > > > +void viridian_time_ref_count_freeze(struct domain *d); > > +void viridian_time_ref_count_thaw(struct domain *d); > > + > > #endif /* __ASM_X86_HVM_VIRIDIAN_H__ */ > > + > > +/* > > + * Local variables: > > + * mode: C > > + * c-file-style: "BSD" > > + * c-basic-offset: 4 > > + * tab-width: 4 > > + * indent-tabs-mode: nil > > + * End: > > + */ > > diff --git a/xen/include/public/arch-x86/hvm/save.h > b/xen/include/public/arch-x86/hvm/save.h > > index 16d85a3..88aab7e 100644 > > --- a/xen/include/public/arch-x86/hvm/save.h > > +++ b/xen/include/public/arch-x86/hvm/save.h > > @@ -568,6 +568,7 @@ struct hvm_hw_cpu_xsave { > > struct hvm_viridian_domain_context { > > uint64_t hypercall_gpa; > > uint64_t guest_os_id; > > + uint64_t time_ref_count; > > }; > > > > DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct > hvm_viridian_domain_context); > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |