[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v3] x86/viridian: Add Partition Reference Time enlightenment
On Mon, Oct 13, 2014 at 03:27:39PM +0100, Paul Durrant wrote: > The presence of the partition reference time enlightenment persuades newer > versions of Windows to prefer the TSC as their primary time source. Hence, > if rdtsc is not being emulated and is invariant then many vmexits (for > alternative time sources such as the HPET or reference counter MSR) can > be avoided. > > The implementation is not yet complete as no attempt is made to prevent > emulation of rdtsc if the enlightenment is active and guest and host > TSC frequencies differ. To do that requires invasive changes in the core > x86 time code and hence a lot more testing. > > This patch avoids the issue by disabling the enlightenment if rdtsc is > being emulated, causing Windows to choose another time source. This is > safe, but may cause a big variation in performance of guests migrated > between hosts of differing TSC frequency. Thus the enlightenment is not > enabled in the default set, but may be enabled to improve guest performance > where such migrations are not a concern. > > See section 15.4 of the Microsoft Hypervisor Top Level Functional > Specification v4.0a for details. > > Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx> > Cc: Keir Fraser <keir@xxxxxxx> > Cc: Jan Beulich <jbeulich@xxxxxxxx> > Cc: Ian Campbell <ian.campbell@xxxxxxxxxx> > Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx> > Cc: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> > Cc: Christoph Egger <chegger@xxxxxxxxx> > --- > v3: > - Prevent TscSequence increment loop from being exposed to a running > domain. > > v2: > - Addressed comments from Jan: > - Leave enlightenment out of default set and add a comment explaining > why. > - De-dup code surrounding adjustment of sequence number on resume. > - Remove accidental code deletion. > > docs/man/xl.cfg.pod.5 | 6 ++ > tools/libxl/libxl_dom.c | 3 + > tools/libxl/libxl_types.idl | 1 + > xen/arch/x86/hvm/viridian.c | 116 > +++++++++++++++++++++++++++++++- > xen/include/asm-x86/hvm/viridian.h | 35 ++++++++++ > xen/include/public/arch-x86/hvm/save.h | 11 +++ > xen/include/public/hvm/params.h | 7 +- > 7 files changed, 177 insertions(+), 2 deletions(-) > > diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5 > index 8bba21c..925adbb 100644 > --- a/docs/man/xl.cfg.pod.5 > +++ b/docs/man/xl.cfg.pod.5 > @@ -1163,6 +1163,12 @@ This group incorporates Partition Time Reference > Counter MSR. This > enlightenment can improve performance of Windows 8 and Windows > Server 2012 onwards. > > +=item B<reference_tsc> > + > +This set incorporates the Partition Reference TSC MSR. This > +enlightenment can improve performance of Windows 7 and Windows > +Server 2008 R2 onwards. > + > =item B<defaults> > > This is a special value that enables the default set of groups, which > diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c > index bd21841..d04fc0d 100644 > --- a/tools/libxl/libxl_dom.c > +++ b/tools/libxl/libxl_dom.c > @@ -262,6 +262,9 @@ static int hvm_set_viridian_features(libxl__gc *gc, > uint32_t domid, > if (libxl_bitmap_test(&enlightenments, > LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT)) > mask |= HVMPV_time_ref_count; > > + if (libxl_bitmap_test(&enlightenments, > LIBXL_VIRIDIAN_ENLIGHTENMENT_REFERENCE_TSC)) > + mask |= HVMPV_reference_tsc; > + > if (mask != 0 && > xc_hvm_param_set(CTX->xch, > domid, > diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl > index 90d152f..da75a40 100644 > --- a/tools/libxl/libxl_types.idl > +++ b/tools/libxl/libxl_types.idl > @@ -183,6 +183,7 @@ libxl_viridian_enlightenment = > Enumeration("viridian_enlightenment", [ > (0, "base"), > (1, "freq"), > (2, "time_ref_count"), > + (3, "reference_tsc"), > ]) > > # > diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c > index 6726168..528bb8b 100644 > --- a/xen/arch/x86/hvm/viridian.c > +++ b/xen/arch/x86/hvm/viridian.c > @@ -21,6 +21,7 @@ > #define VIRIDIAN_MSR_HYPERCALL 0x40000001 > #define VIRIDIAN_MSR_VP_INDEX 0x40000002 > #define VIRIDIAN_MSR_TIME_REF_COUNT 0x40000020 > +#define VIRIDIAN_MSR_REFERENCE_TSC 0x40000021 > #define VIRIDIAN_MSR_TSC_FREQUENCY 0x40000022 > #define VIRIDIAN_MSR_APIC_FREQUENCY 0x40000023 > #define VIRIDIAN_MSR_EOI 0x40000070 > @@ -40,6 +41,7 @@ > #define CPUID3A_MSR_APIC_ACCESS (1 << 4) > #define CPUID3A_MSR_HYPERCALL (1 << 5) > #define CPUID3A_MSR_VP_INDEX (1 << 6) > +#define CPUID3A_MSR_REFERENCE_TSC (1 << 9) > #define CPUID3A_MSR_FREQ (1 << 11) > > /* Viridian CPUID 4000004, Implementation Recommendations. */ > @@ -95,6 +97,8 @@ int cpuid_viridian_leaves(unsigned int leaf, unsigned int > *eax, > *eax |= CPUID3A_MSR_FREQ; > if ( viridian_feature_mask(d) & HVMPV_time_ref_count ) > *eax |= CPUID3A_MSR_TIME_REF_COUNT; > + if ( viridian_feature_mask(d) & HVMPV_reference_tsc ) > + *eax |= CPUID3A_MSR_REFERENCE_TSC; > break; > case 4: > /* Recommended hypercall usage. */ > @@ -155,6 +159,17 @@ static void dump_apic_assist(const struct vcpu *v) > v, aa->fields.enabled, (unsigned long)aa->fields.pfn); > } > > +static void dump_reference_tsc(const struct domain *d) > +{ > + const union viridian_reference_tsc *rt; > + > + rt = &d->arch.hvm_domain.viridian.reference_tsc; > + > + printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: enabled: %x pfn: > %lx\n", > + d->domain_id, > + rt->fields.enabled, (unsigned long)rt->fields.pfn); > +} > + > static void enable_hypercall_page(struct domain *d) > { > unsigned long gmfn = > d->arch.hvm_domain.viridian.hypercall_gpa.fields.pfn; > @@ -224,6 +239,81 @@ static void initialize_apic_assist(struct vcpu *v) > put_page_and_type(page); > } > > +static void update_reference_tsc(struct domain *d, bool_t initialize) > +{ > + unsigned long gmfn = > d->arch.hvm_domain.viridian.reference_tsc.fields.pfn; > + struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); > + HV_REFERENCE_TSC_PAGE *p; > + > + if ( !page || !get_page_type(page, PGT_writable_page) ) > + { > + if ( page ) > + put_page(page); > + gdprintk(XENLOG_WARNING, "Bad GMFN %lx (MFN %lx)\n", gmfn, > + page ? page_to_mfn(page) : INVALID_MFN); > + return; > + } > + > + p = __map_domain_page(page); > + > + if ( initialize ) > + clear_page(p); > + > + /* > + * This enlightenment must be disabled is the host TSC is not invariant. > + * However it is also disabled if vtsc is true (which means rdtsc is > being > + * emulated). This generally happens when guest TSC freq and host TSC > freq > + * don't match. The TscScale value could be adjusted to cope with this, > + * allowing vtsc to be turned off, but support for this is not yet > present > + * in the hypervisor. Thus is it is possible that migrating a Windows VM > + * between hosts of differing TSC frequencies may result in large > + * differences in guest performance. > + */ > + if ( !host_tsc_is_safe() || d->arch.vtsc ) > + { > + /* > + * The specification states that valid values of TscSequence range > + * from 0 to 0xFFFFFFFE. The value 0xFFFFFFFF is used to indicate > + * this mechanism is no longer a reliable source of time and that > + * the VM should fall back to a different source. > + * > + * Server 2012 (6.2 kernel) and 2012 R2 (6.3 kernel) actually violate > + * the spec. and rely on a value of 0 to indicate that this > + * enlightenment should no longer be used. These two kernel > + * versions are currently the only ones to make use of this > + * enlightenment, so just use 0 here. > + */ > + p->TscSequence = 0; > + > + printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: invalidated\n", > + d->domain_id); > + return; > + } > + > + /* > + * The guest will calculate reference time according to the following > + * formula: > + * > + * ReferenceTime = ((RDTSC() * TscScale) >> 64) + TscOffset > + * > + * Windows uses a 100ns tick, so we need a scale which is cpu > + * ticks per 100ns shifted left by 64. > + */ > + p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32; > + > + if ( initialize ) > + p->TscSequence = 1; > + else /* guest is paused */ > + do { > + p->TscSequence++; > + } while ( p->TscSequence == 0xFFFFFFFF || > + p->TscSequence == 0 ); /* Avoid both 'invalid' values */ Why not just something like: p->TscSequence++; /* Avoid both 'invalid' values */ if ( p->TscSequence == 0xFFFFFFFF || p->TscSequence == 0 ) p->TscSequence = 1; If there's something wrong with this approach and you prefer your 'initialize' check, perhaps add some more { } since you have a multi-statement else. --msw > + unmap_domain_page(p); > + > + put_page_and_type(page); > +} > + > int wrmsr_viridian_regs(uint32_t idx, uint64_t val) > { > struct vcpu *v = current; > @@ -282,6 +372,17 @@ int wrmsr_viridian_regs(uint32_t idx, uint64_t val) > initialize_apic_assist(v); > break; > > + case VIRIDIAN_MSR_REFERENCE_TSC: > + if ( !(viridian_feature_mask(d) & HVMPV_reference_tsc) ) > + return 0; > + > + perfc_incr(mshv_wrmsr_tsc_msr); > + d->arch.hvm_domain.viridian.reference_tsc.raw = val; > + dump_reference_tsc(d); > + if ( d->arch.hvm_domain.viridian.reference_tsc.fields.enabled ) > + update_reference_tsc(d, 1); > + break; > + > default: > return 0; > } > @@ -346,6 +447,14 @@ int rdmsr_viridian_regs(uint32_t idx, uint64_t *val) > *val = v->arch.hvm_vcpu.viridian.apic_assist.raw; > break; > > + case VIRIDIAN_MSR_REFERENCE_TSC: > + if ( !(viridian_feature_mask(d) & HVMPV_reference_tsc) ) > + return 0; > + > + perfc_incr(mshv_rdmsr_tsc_msr); > + *val = d->arch.hvm_domain.viridian.reference_tsc.raw; > + break; > + > case VIRIDIAN_MSR_TIME_REF_COUNT: > { > uint64_t tsc; > @@ -452,6 +561,7 @@ static int viridian_save_domain_ctxt(struct domain *d, > hvm_domain_context_t *h) > > ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw; > ctxt.guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw; > + ctxt.reference_tsc = d->arch.hvm_domain.viridian.reference_tsc.raw; > > return (hvm_save_entry(VIRIDIAN_DOMAIN, 0, h, &ctxt) != 0); > } > @@ -460,11 +570,15 @@ static int viridian_load_domain_ctxt(struct domain *d, > hvm_domain_context_t *h) > { > struct hvm_viridian_domain_context ctxt; > > - if ( hvm_load_entry(VIRIDIAN_DOMAIN, h, &ctxt) != 0 ) > + if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 ) > return -EINVAL; > > d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa; > d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id; > + d->arch.hvm_domain.viridian.reference_tsc.raw = ctxt.reference_tsc; > + > + if ( d->arch.hvm_domain.viridian.reference_tsc.fields.enabled ) > + update_reference_tsc(d, 0); > > return 0; > } > diff --git a/xen/include/asm-x86/hvm/viridian.h > b/xen/include/asm-x86/hvm/viridian.h > index 496da33..35a22f5 100644 > --- a/xen/include/asm-x86/hvm/viridian.h > +++ b/xen/include/asm-x86/hvm/viridian.h > @@ -48,10 +48,35 @@ union viridian_hypercall_gpa > } fields; > }; > > +union viridian_reference_tsc > +{ > + uint64_t raw; > + struct > + { > + uint64_t enabled:1; > + uint64_t reserved_preserved:11; > + uint64_t pfn:48; > + } fields; > +}; > + > +/* > + * Type defintion as in Microsoft Hypervisor Top-Level Functional > + * Specification v4.0a, section 15.4.2. > + */ > +typedef struct _HV_REFERENCE_TSC_PAGE > +{ > + uint32_t TscSequence; > + uint32_t Reserved1; > + uint64_t TscScale; > + int64_t TscOffset; > + uint64_t Reserved2[509]; > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > + > struct viridian_domain > { > union viridian_guest_os_id guest_os_id; > union viridian_hypercall_gpa hypercall_gpa; > + union viridian_reference_tsc reference_tsc; > }; > > int > @@ -76,3 +101,13 @@ int > viridian_hypercall(struct cpu_user_regs *regs); > > #endif /* __ASM_X86_HVM_VIRIDIAN_H__ */ > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/xen/include/public/arch-x86/hvm/save.h > b/xen/include/public/arch-x86/hvm/save.h > index 16d85a3..006924b 100644 > --- a/xen/include/public/arch-x86/hvm/save.h > +++ b/xen/include/public/arch-x86/hvm/save.h > @@ -568,6 +568,7 @@ struct hvm_hw_cpu_xsave { > struct hvm_viridian_domain_context { > uint64_t hypercall_gpa; > uint64_t guest_os_id; > + uint64_t reference_tsc; > }; > > DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct > hvm_viridian_domain_context); > @@ -616,3 +617,13 @@ struct hvm_msr { > #define HVM_SAVE_CODE_MAX 20 > > #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/xen/include/public/hvm/params.h b/xen/include/public/hvm/params.h > index 3c51072..a2d43bc 100644 > --- a/xen/include/public/hvm/params.h > +++ b/xen/include/public/hvm/params.h > @@ -92,10 +92,15 @@ > #define _HVMPV_time_ref_count 2 > #define HVMPV_time_ref_count (1 << _HVMPV_time_ref_count) > > +/* Enable Reference TSC Page (HV_X64_MSR_REFERENCE_TSC) */ > +#define _HVMPV_reference_tsc 3 > +#define HVMPV_reference_tsc (1 << _HVMPV_reference_tsc) > + > #define HVMPV_feature_mask \ > (HVMPV_base_freq | \ > HVMPV_no_freq | \ > - HVMPV_time_ref_count) > + HVMPV_time_ref_count | \ > + HVMPV_reference_tsc) > > #endif > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |