[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v4] x86/viridian: Add Partition Reference Time enlightenment
The presence of the partition reference time enlightenment persuades newer versions of Windows to prefer the TSC as their primary time source. Hence, if rdtsc is not being emulated and is invariant then many vmexits (for alternative time sources such as the HPET or reference counter MSR) can be avoided. The implementation is not yet complete as no attempt is made to prevent emulation of rdtsc if the enlightenment is active and guest and host TSC frequencies differ. To do that requires invasive changes in the core x86 time code and hence a lot more testing. This patch avoids the issue by disabling the enlightenment if rdtsc is being emulated, causing Windows to choose another time source. This is safe, but may cause a big variation in performance of guests migrated between hosts of differing TSC frequency. Thus the enlightenment is not enabled in the default set, but may be enabled to improve guest performance where such migrations are not a concern. See section 15.4 of the Microsoft Hypervisor Top Level Functional Specification v4.0a for details. Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx> Cc: Keir Fraser <keir@xxxxxxx> Cc: Jan Beulich <jbeulich@xxxxxxxx> Cc: Ian Campbell <ian.campbell@xxxxxxxxxx> Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx> Cc: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> Cc: Christoph Egger <chegger@xxxxxxxxx> --- v4: - Get rid of loop, as suggested by Matt Wilson v3: - Prevent TscSequence increment loop from being exposed to a running domain. v2: - Addressed comments from Jan: - Leave enlightenment out of default set and add a comment explaining why. - De-dup code surrounding adjustment of sequence number on resume. - Remove accidental code deletion. docs/man/xl.cfg.pod.5 | 6 ++ tools/libxl/libxl_dom.c | 3 + tools/libxl/libxl_types.idl | 1 + xen/arch/x86/hvm/viridian.c | 113 +++++++++++++++++++++++++++++++- xen/include/asm-x86/hvm/viridian.h | 35 ++++++++++ xen/include/public/arch-x86/hvm/save.h | 11 ++++ xen/include/public/hvm/params.h | 7 +- 7 files changed, 174 insertions(+), 2 deletions(-) diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5 index 8bba21c..925adbb 100644 --- a/docs/man/xl.cfg.pod.5 +++ b/docs/man/xl.cfg.pod.5 @@ -1163,6 +1163,12 @@ This group incorporates Partition Time Reference Counter MSR. This enlightenment can improve performance of Windows 8 and Windows Server 2012 onwards. +=item B<reference_tsc> + +This set incorporates the Partition Reference TSC MSR. This +enlightenment can improve performance of Windows 7 and Windows +Server 2008 R2 onwards. + =item B<defaults> This is a special value that enables the default set of groups, which diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c index bd21841..d04fc0d 100644 --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -262,6 +262,9 @@ static int hvm_set_viridian_features(libxl__gc *gc, uint32_t domid, if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT)) mask |= HVMPV_time_ref_count; + if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_REFERENCE_TSC)) + mask |= HVMPV_reference_tsc; + if (mask != 0 && xc_hvm_param_set(CTX->xch, domid, diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index 90d152f..da75a40 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -183,6 +183,7 @@ libxl_viridian_enlightenment = Enumeration("viridian_enlightenment", [ (0, "base"), (1, "freq"), (2, "time_ref_count"), + (3, "reference_tsc"), ]) # diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c index 6726168..5008121 100644 --- a/xen/arch/x86/hvm/viridian.c +++ b/xen/arch/x86/hvm/viridian.c @@ -21,6 +21,7 @@ #define VIRIDIAN_MSR_HYPERCALL 0x40000001 #define VIRIDIAN_MSR_VP_INDEX 0x40000002 #define VIRIDIAN_MSR_TIME_REF_COUNT 0x40000020 +#define VIRIDIAN_MSR_REFERENCE_TSC 0x40000021 #define VIRIDIAN_MSR_TSC_FREQUENCY 0x40000022 #define VIRIDIAN_MSR_APIC_FREQUENCY 0x40000023 #define VIRIDIAN_MSR_EOI 0x40000070 @@ -40,6 +41,7 @@ #define CPUID3A_MSR_APIC_ACCESS (1 << 4) #define CPUID3A_MSR_HYPERCALL (1 << 5) #define CPUID3A_MSR_VP_INDEX (1 << 6) +#define CPUID3A_MSR_REFERENCE_TSC (1 << 9) #define CPUID3A_MSR_FREQ (1 << 11) /* Viridian CPUID 4000004, Implementation Recommendations. */ @@ -95,6 +97,8 @@ int cpuid_viridian_leaves(unsigned int leaf, unsigned int *eax, *eax |= CPUID3A_MSR_FREQ; if ( viridian_feature_mask(d) & HVMPV_time_ref_count ) *eax |= CPUID3A_MSR_TIME_REF_COUNT; + if ( viridian_feature_mask(d) & HVMPV_reference_tsc ) + *eax |= CPUID3A_MSR_REFERENCE_TSC; break; case 4: /* Recommended hypercall usage. */ @@ -155,6 +159,17 @@ static void dump_apic_assist(const struct vcpu *v) v, aa->fields.enabled, (unsigned long)aa->fields.pfn); } +static void dump_reference_tsc(const struct domain *d) +{ + const union viridian_reference_tsc *rt; + + rt = &d->arch.hvm_domain.viridian.reference_tsc; + + printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: enabled: %x pfn: %lx\n", + d->domain_id, + rt->fields.enabled, (unsigned long)rt->fields.pfn); +} + static void enable_hypercall_page(struct domain *d) { unsigned long gmfn = d->arch.hvm_domain.viridian.hypercall_gpa.fields.pfn; @@ -224,6 +239,78 @@ static void initialize_apic_assist(struct vcpu *v) put_page_and_type(page); } +static void update_reference_tsc(struct domain *d, bool_t initialize) +{ + unsigned long gmfn = d->arch.hvm_domain.viridian.reference_tsc.fields.pfn; + struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); + HV_REFERENCE_TSC_PAGE *p; + + if ( !page || !get_page_type(page, PGT_writable_page) ) + { + if ( page ) + put_page(page); + gdprintk(XENLOG_WARNING, "Bad GMFN %lx (MFN %lx)\n", gmfn, + page ? page_to_mfn(page) : INVALID_MFN); + return; + } + + p = __map_domain_page(page); + + if ( initialize ) + clear_page(p); + + /* + * This enlightenment must be disabled is the host TSC is not invariant. + * However it is also disabled if vtsc is true (which means rdtsc is being + * emulated). This generally happens when guest TSC freq and host TSC freq + * don't match. The TscScale value could be adjusted to cope with this, + * allowing vtsc to be turned off, but support for this is not yet present + * in the hypervisor. Thus is it is possible that migrating a Windows VM + * between hosts of differing TSC frequencies may result in large + * differences in guest performance. + */ + if ( !host_tsc_is_safe() || d->arch.vtsc ) + { + /* + * The specification states that valid values of TscSequence range + * from 0 to 0xFFFFFFFE. The value 0xFFFFFFFF is used to indicate + * this mechanism is no longer a reliable source of time and that + * the VM should fall back to a different source. + * + * Server 2012 (6.2 kernel) and 2012 R2 (6.3 kernel) actually violate + * the spec. and rely on a value of 0 to indicate that this + * enlightenment should no longer be used. These two kernel + * versions are currently the only ones to make use of this + * enlightenment, so just use 0 here. + */ + p->TscSequence = 0; + + printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: invalidated\n", + d->domain_id); + return; + } + + /* + * The guest will calculate reference time according to the following + * formula: + * + * ReferenceTime = ((RDTSC() * TscScale) >> 64) + TscOffset + * + * Windows uses a 100ns tick, so we need a scale which is cpu + * ticks per 100ns shifted left by 64. + */ + p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32; + + p->TscSequence++; + if ( p->TscSequence == 0xFFFFFFFF || + p->TscSequence == 0 ) /* Avoid both 'invalid' values */ + p->TscSequence = 1; + + unmap_domain_page(p); + + put_page_and_type(page); +} + int wrmsr_viridian_regs(uint32_t idx, uint64_t val) { struct vcpu *v = current; @@ -282,6 +369,17 @@ int wrmsr_viridian_regs(uint32_t idx, uint64_t val) initialize_apic_assist(v); break; + case VIRIDIAN_MSR_REFERENCE_TSC: + if ( !(viridian_feature_mask(d) & HVMPV_reference_tsc) ) + return 0; + + perfc_incr(mshv_wrmsr_tsc_msr); + d->arch.hvm_domain.viridian.reference_tsc.raw = val; + dump_reference_tsc(d); + if ( d->arch.hvm_domain.viridian.reference_tsc.fields.enabled ) + update_reference_tsc(d, 1); + break; + default: return 0; } @@ -346,6 +444,14 @@ int rdmsr_viridian_regs(uint32_t idx, uint64_t *val) *val = v->arch.hvm_vcpu.viridian.apic_assist.raw; break; + case VIRIDIAN_MSR_REFERENCE_TSC: + if ( !(viridian_feature_mask(d) & HVMPV_reference_tsc) ) + return 0; + + perfc_incr(mshv_rdmsr_tsc_msr); + *val = d->arch.hvm_domain.viridian.reference_tsc.raw; + break; + case VIRIDIAN_MSR_TIME_REF_COUNT: { uint64_t tsc; @@ -452,6 +558,7 @@ static int viridian_save_domain_ctxt(struct domain *d, hvm_domain_context_t *h) ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw; ctxt.guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw; + ctxt.reference_tsc = d->arch.hvm_domain.viridian.reference_tsc.raw; return (hvm_save_entry(VIRIDIAN_DOMAIN, 0, h, &ctxt) != 0); } @@ -460,11 +567,15 @@ static int viridian_load_domain_ctxt(struct domain *d, hvm_domain_context_t *h) { struct hvm_viridian_domain_context ctxt; - if ( hvm_load_entry(VIRIDIAN_DOMAIN, h, &ctxt) != 0 ) + if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 ) return -EINVAL; d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa; d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id; + d->arch.hvm_domain.viridian.reference_tsc.raw = ctxt.reference_tsc; + + if ( d->arch.hvm_domain.viridian.reference_tsc.fields.enabled ) + update_reference_tsc(d, 0); return 0; } diff --git a/xen/include/asm-x86/hvm/viridian.h b/xen/include/asm-x86/hvm/viridian.h index 496da33..35a22f5 100644 --- a/xen/include/asm-x86/hvm/viridian.h +++ b/xen/include/asm-x86/hvm/viridian.h @@ -48,10 +48,35 @@ union viridian_hypercall_gpa } fields; }; +union viridian_reference_tsc +{ + uint64_t raw; + struct + { + uint64_t enabled:1; + uint64_t reserved_preserved:11; + uint64_t pfn:48; + } fields; +}; + +/* + * Type defintion as in Microsoft Hypervisor Top-Level Functional + * Specification v4.0a, section 15.4.2. + */ +typedef struct _HV_REFERENCE_TSC_PAGE +{ + uint32_t TscSequence; + uint32_t Reserved1; + uint64_t TscScale; + int64_t TscOffset; + uint64_t Reserved2[509]; +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; + struct viridian_domain { union viridian_guest_os_id guest_os_id; union viridian_hypercall_gpa hypercall_gpa; + union viridian_reference_tsc reference_tsc; }; int @@ -76,3 +101,13 @@ int viridian_hypercall(struct cpu_user_regs *regs); #endif /* __ASM_X86_HVM_VIRIDIAN_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/public/arch-x86/hvm/save.h b/xen/include/public/arch-x86/hvm/save.h index 16d85a3..006924b 100644 --- a/xen/include/public/arch-x86/hvm/save.h +++ b/xen/include/public/arch-x86/hvm/save.h @@ -568,6 +568,7 @@ struct hvm_hw_cpu_xsave { struct hvm_viridian_domain_context { uint64_t hypercall_gpa; uint64_t guest_os_id; + uint64_t reference_tsc; }; DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context); @@ -616,3 +617,13 @@ struct hvm_msr { #define HVM_SAVE_CODE_MAX 20 #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/public/hvm/params.h b/xen/include/public/hvm/params.h index 3c51072..a2d43bc 100644 --- a/xen/include/public/hvm/params.h +++ b/xen/include/public/hvm/params.h @@ -92,10 +92,15 @@ #define _HVMPV_time_ref_count 2 #define HVMPV_time_ref_count (1 << _HVMPV_time_ref_count) +/* Enable Reference TSC Page (HV_X64_MSR_REFERENCE_TSC) */ +#define _HVMPV_reference_tsc 3 +#define HVMPV_reference_tsc (1 << _HVMPV_reference_tsc) + #define HVMPV_feature_mask \ (HVMPV_base_freq | \ HVMPV_no_freq | \ - HVMPV_time_ref_count) + HVMPV_time_ref_count | \ + HVMPV_reference_tsc) #endif -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |