[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen master] x86/viridian: flush remote tlbs by hypercall



commit b38d426ad09b3e142f474c1dadfd6d6903664a48
Author:     Paul Durrant <paul.durrant@xxxxxxxxxx>
AuthorDate: Thu Nov 26 15:48:41 2015 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu Nov 26 15:48:41 2015 +0100

    x86/viridian: flush remote tlbs by hypercall
    
    The Microsoft Hypervisor Top Level Functional Spec. (section 3.4) defines
    two bits in CPUID leaf 0x40000004:EAX for the hypervisor to recommend
    whether or not to issue a hypercall for local or remote TLB flush.
    
    Whilst it's doubtful whether using a hypercall for local TLB flush would
    be any more efficient than a specific INVLPG VMEXIT, a remote TLB flush
    may well be more efficiently done. This is because the alternative
    mechanism is to IPI all the vCPUs in question which (in the absence of
    APIC virtualisation) will require emulation and scheduling of the vCPUs
    only to have them immediately VMEXIT for local TLB flush.
    
    This patch therefore adds a viridian option which, if selected, enables
    the hypercall for remote TLB flush and implements it using ASID
    invalidation for targetted vCPUs followed by an IPI only to the set of
    CPUs that happened to be running a targetted vCPU (which may be the empty
    set). The flush may be more severe than requested since the hypercall can
    request flush only for a specific address space (CR3) but Xen neither
    keeps a mapping of ASID to guest CR3 nor allows invalidation of a specific
    ASID, but on a host with contended CPUs performance is still likely to
    be better than a more specific flush using IPIs.
    
    The implementation of the patch introduces per-vCPU viridian_init() and
    viridian_deinit() functions to allow a scratch cpumask to be allocated.
    This avoids needing to put this potentially large data structure on stack
    during hypercall processing. It also modifies the hypercall input and
    output bit-fields to allow a check for the 'fast' calling convention,
    and a white-space fix in the definition of HVMPV_feature_mask (to remove
    hard tabs).
    
    Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
    Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Wei Liu <wei.liu2@xxxxxxxxxx>
---
 docs/man/xl.cfg.pod.5              |    6 ++
 tools/libxl/libxl_dom.c            |    3 +
 tools/libxl/libxl_types.idl        |    1 +
 xen/arch/x86/hvm/hvm.c             |   12 ++++
 xen/arch/x86/hvm/viridian.c        |  123 ++++++++++++++++++++++++++++++++----
 xen/include/asm-x86/hvm/viridian.h |    4 +
 xen/include/asm-x86/perfc_defn.h   |    1 +
 xen/include/public/hvm/params.h    |   13 +++-
 8 files changed, 146 insertions(+), 17 deletions(-)

diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
index 2aca8dd..3b695bd 100644
--- a/docs/man/xl.cfg.pod.5
+++ b/docs/man/xl.cfg.pod.5
@@ -1466,6 +1466,12 @@ This set incorporates the Partition Reference TSC MSR. 
This
 enlightenment can improve performance of Windows 7 and Windows
 Server 2008 R2 onwards.
 
+=item B<hcall_remote_tlb_flush>
+
+This set incorporates use of hypercalls for remote TLB flushing.
+This enlightenment may improve performance of Windows guests running
+on hosts with higher levels of (physical) CPU contention.
+
 =item B<defaults>
 
 This is a special value that enables the default set of groups, which
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 44d481b..009ca9c 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -251,6 +251,9 @@ static int hvm_set_viridian_features(libxl__gc *gc, 
uint32_t domid,
     if (libxl_bitmap_test(&enlightenments, 
LIBXL_VIRIDIAN_ENLIGHTENMENT_REFERENCE_TSC))
         mask |= HVMPV_reference_tsc;
 
+    if (libxl_bitmap_test(&enlightenments, 
LIBXL_VIRIDIAN_ENLIGHTENMENT_HCALL_REMOTE_TLB_FLUSH))
+        mask |= HVMPV_hcall_remote_tlb_flush;
+
     if (mask != 0 &&
         xc_hvm_param_set(CTX->xch,
                          domid,
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 6808f2b..cf3730f 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -219,6 +219,7 @@ libxl_viridian_enlightenment = 
Enumeration("viridian_enlightenment", [
     (1, "freq"),
     (2, "time_ref_count"),
     (3, "reference_tsc"),
+    (4, "hcall_remote_tlb_flush"),
     ])
 
 libxl_hdtype = Enumeration("hdtype", [
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 6c2b512..db0aeba 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2457,6 +2457,13 @@ int hvm_vcpu_initialise(struct vcpu *v)
     if ( rc != 0 )
         goto fail6;
 
+    if ( is_viridian_domain(d) )
+    {
+        rc = viridian_vcpu_init(v);
+        if ( rc != 0 )
+            goto fail7;
+    }
+
     if ( v->vcpu_id == 0 )
     {
         /* NB. All these really belong in hvm_domain_initialise(). */
@@ -2473,6 +2480,8 @@ int hvm_vcpu_initialise(struct vcpu *v)
 
     return 0;
 
+ fail7:
+    hvm_all_ioreq_servers_remove_vcpu(v->domain, v);
  fail6:
     nestedhvm_vcpu_destroy(v);
  fail5:
@@ -2489,6 +2498,9 @@ int hvm_vcpu_initialise(struct vcpu *v)
 
 void hvm_vcpu_destroy(struct vcpu *v)
 {
+    if ( is_viridian_domain(v->domain) )
+        viridian_vcpu_deinit(v);
+
     hvm_all_ioreq_servers_remove_vcpu(v->domain, v);
 
     if ( hvm_altp2m_supported() )
diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c
index 2f22783..df6f26d 100644
--- a/xen/arch/x86/hvm/viridian.c
+++ b/xen/arch/x86/hvm/viridian.c
@@ -33,9 +33,15 @@
 /* Viridian Hypercall Status Codes. */
 #define HV_STATUS_SUCCESS                       0x0000
 #define HV_STATUS_INVALID_HYPERCALL_CODE        0x0002
+#define HV_STATUS_INVALID_PARAMETER             0x0005
 
-/* Viridian Hypercall Codes and Parameters. */
-#define HvNotifyLongSpinWait    8
+/* Viridian Hypercall Codes. */
+#define HvFlushVirtualAddressSpace 2
+#define HvFlushVirtualAddressList  3
+#define HvNotifyLongSpinWait       8
+
+/* Viridian Hypercall Flags. */
+#define HV_FLUSH_ALL_PROCESSORS 1
 
 /* Viridian CPUID 4000003, Viridian MSR availability. */
 #define CPUID3A_MSR_TIME_REF_COUNT (1 << 1)
@@ -46,8 +52,9 @@
 #define CPUID3A_MSR_FREQ           (1 << 11)
 
 /* Viridian CPUID 4000004, Implementation Recommendations. */
-#define CPUID4A_MSR_BASED_APIC  (1 << 3)
-#define CPUID4A_RELAX_TIMER_INT (1 << 5)
+#define CPUID4A_HCALL_REMOTE_TLB_FLUSH (1 << 2)
+#define CPUID4A_MSR_BASED_APIC         (1 << 3)
+#define CPUID4A_RELAX_TIMER_INT        (1 << 5)
 
 /* Viridian CPUID 4000006, Implementation HW features detected and in use. */
 #define CPUID6A_APIC_OVERLAY    (1 << 0)
@@ -107,6 +114,8 @@ int cpuid_viridian_leaves(unsigned int leaf, unsigned int 
*eax,
              (d->arch.hvm_domain.viridian.guest_os_id.fields.os < 4) )
             break;
         *eax = CPUID4A_RELAX_TIMER_INT;
+        if ( viridian_feature_mask(d) & HVMPV_hcall_remote_tlb_flush )
+            *eax |= CPUID4A_HCALL_REMOTE_TLB_FLUSH;
         if ( !cpu_has_vmx_apic_reg_virt )
             *eax |= CPUID4A_MSR_BASED_APIC;
         *ebx = 2047; /* long spin count */
@@ -512,9 +521,22 @@ int rdmsr_viridian_regs(uint32_t idx, uint64_t *val)
     return 1;
 }
 
+int viridian_vcpu_init(struct vcpu *v)
+{
+    return alloc_cpumask_var(&v->arch.hvm_vcpu.viridian.flush_cpumask) ?
+           0 : -ENOMEM;
+}
+
+void viridian_vcpu_deinit(struct vcpu *v)
+{
+    free_cpumask_var(v->arch.hvm_vcpu.viridian.flush_cpumask);
+}
+
 int viridian_hypercall(struct cpu_user_regs *regs)
 {
-    int mode = hvm_guest_x86_mode(current);
+    struct vcpu *curr = current;
+    struct domain *currd = curr->domain;
+    int mode = hvm_guest_x86_mode(curr);
     unsigned long input_params_gpa, output_params_gpa;
     uint16_t status = HV_STATUS_SUCCESS;
 
@@ -522,11 +544,12 @@ int viridian_hypercall(struct cpu_user_regs *regs)
         uint64_t raw;
         struct {
             uint16_t call_code;
-            uint16_t rsvd1;
-            unsigned rep_count:12;
-            unsigned rsvd2:4;
-            unsigned rep_start:12;
-            unsigned rsvd3:4;
+            uint16_t fast:1;
+            uint16_t rsvd1:15;
+            uint16_t rep_count:12;
+            uint16_t rsvd2:4;
+            uint16_t rep_start:12;
+            uint16_t rsvd3:4;
         };
     } input;
 
@@ -535,12 +558,12 @@ int viridian_hypercall(struct cpu_user_regs *regs)
         struct {
             uint16_t result;
             uint16_t rsvd1;
-            unsigned rep_complete:12;
-            unsigned rsvd2:20;
+            uint32_t rep_complete:12;
+            uint32_t rsvd2:20;
         };
     } output = { 0 };
 
-    ASSERT(is_viridian_domain(current->domain));
+    ASSERT(is_viridian_domain(currd));
 
     switch ( mode )
     {
@@ -561,10 +584,84 @@ int viridian_hypercall(struct cpu_user_regs *regs)
     switch ( input.call_code )
     {
     case HvNotifyLongSpinWait:
+        /*
+         * See Microsoft Hypervisor Top Level Spec. section 18.5.1.
+         */
         perfc_incr(mshv_call_long_wait);
         do_sched_op(SCHEDOP_yield, guest_handle_from_ptr(NULL, void));
         status = HV_STATUS_SUCCESS;
         break;
+
+    case HvFlushVirtualAddressSpace:
+    case HvFlushVirtualAddressList:
+    {
+        cpumask_t *pcpu_mask;
+        struct vcpu *v;
+        struct {
+            uint64_t address_space;
+            uint64_t flags;
+            uint64_t vcpu_mask;
+        } input_params;
+
+        /*
+         * See Microsoft Hypervisor Top Level Spec. sections 12.4.2
+         * and 12.4.3.
+         */
+        perfc_incr(mshv_flush);
+
+        /* These hypercalls should never use the fast-call convention. */
+        status = HV_STATUS_INVALID_PARAMETER;
+        if ( input.fast )
+            break;
+
+        /* Get input parameters. */
+        if ( hvm_copy_from_guest_phys(&input_params, input_params_gpa,
+                                      sizeof(input_params)) != HVMCOPY_okay )
+            break;
+
+        /*
+         * It is not clear from the spec. if we are supposed to
+         * include current virtual CPU in the set or not in this case,
+         * so err on the safe side.
+         */
+        if ( input_params.flags & HV_FLUSH_ALL_PROCESSORS )
+            input_params.vcpu_mask = ~0ul;
+
+        pcpu_mask = curr->arch.hvm_vcpu.viridian.flush_cpumask;
+        cpumask_clear(pcpu_mask);
+
+        /*
+         * For each specified virtual CPU flush all ASIDs to invalidate
+         * TLB entries the next time it is scheduled and then, if it
+         * is currently running, add its physical CPU to a mask of
+         * those which need to be interrupted to force a flush.
+         */
+        for_each_vcpu ( currd, v )
+        {
+            if ( v->vcpu_id >= (sizeof(input_params.vcpu_mask) * 8) )
+                break;
+
+            if ( !(input_params.vcpu_mask & (1ul << v->vcpu_id)) )
+                continue;
+
+            hvm_asid_flush_vcpu(v);
+            if ( v->is_running )
+                __cpumask_set_cpu(v->processor, pcpu_mask);
+        }
+
+        /*
+         * Since ASIDs have now been flushed it just remains to
+         * force any CPUs currently running target vCPUs out of non-
+         * root mode. It's possible that re-scheduling has taken place
+         * so we may unnecessarily IPI some CPUs.
+         */
+        if ( !cpumask_empty(pcpu_mask) )
+            flush_tlb_mask(pcpu_mask);
+
+        status = HV_STATUS_SUCCESS;
+        break;
+    }
+
     default:
         status = HV_STATUS_INVALID_HYPERCALL_CODE;
         break;
diff --git a/xen/include/asm-x86/hvm/viridian.h 
b/xen/include/asm-x86/hvm/viridian.h
index c4319d7..2eec85e 100644
--- a/xen/include/asm-x86/hvm/viridian.h
+++ b/xen/include/asm-x86/hvm/viridian.h
@@ -22,6 +22,7 @@ union viridian_apic_assist
 struct viridian_vcpu
 {
     union viridian_apic_assist apic_assist;
+    cpumask_var_t flush_cpumask;
 };
 
 union viridian_guest_os_id
@@ -117,6 +118,9 @@ viridian_hypercall(struct cpu_user_regs *regs);
 void viridian_time_ref_count_freeze(struct domain *d);
 void viridian_time_ref_count_thaw(struct domain *d);
 
+int viridian_vcpu_init(struct vcpu *v);
+void viridian_vcpu_deinit(struct vcpu *v);
+
 #endif /* __ASM_X86_HVM_VIRIDIAN_H__ */
 
 /*
diff --git a/xen/include/asm-x86/perfc_defn.h b/xen/include/asm-x86/perfc_defn.h
index 9ef092e..aac9331 100644
--- a/xen/include/asm-x86/perfc_defn.h
+++ b/xen/include/asm-x86/perfc_defn.h
@@ -115,6 +115,7 @@ PERFCOUNTER(mshv_call_sw_addr_space,    "MS Hv Switch 
Address Space")
 PERFCOUNTER(mshv_call_flush_tlb_list,   "MS Hv Flush TLB list")
 PERFCOUNTER(mshv_call_flush_tlb_all,    "MS Hv Flush TLB all")
 PERFCOUNTER(mshv_call_long_wait,        "MS Hv Notify long wait")
+PERFCOUNTER(mshv_call_flush,            "MS Hv Flush TLB")
 PERFCOUNTER(mshv_rdmsr_osid,            "MS Hv rdmsr Guest OS ID")
 PERFCOUNTER(mshv_rdmsr_hc_page,         "MS Hv rdmsr hypercall page")
 PERFCOUNTER(mshv_rdmsr_vp_index,        "MS Hv rdmsr vp index")
diff --git a/xen/include/public/hvm/params.h b/xen/include/public/hvm/params.h
index 356dfd3..b437444 100644
--- a/xen/include/public/hvm/params.h
+++ b/xen/include/public/hvm/params.h
@@ -98,11 +98,16 @@
 #define _HVMPV_reference_tsc 3
 #define HVMPV_reference_tsc  (1 << _HVMPV_reference_tsc)
 
+/* Use Hypercall for remote TLB flush */
+#define _HVMPV_hcall_remote_tlb_flush 4
+#define HVMPV_hcall_remote_tlb_flush (1 << _HVMPV_hcall_remote_tlb_flush)
+
 #define HVMPV_feature_mask \
-       (HVMPV_base_freq | \
-        HVMPV_no_freq | \
-        HVMPV_time_ref_count | \
-        HVMPV_reference_tsc)
+        (HVMPV_base_freq | \
+         HVMPV_no_freq | \
+         HVMPV_time_ref_count | \
+         HVMPV_reference_tsc | \
+         HVMPV_hcall_remote_tlb_flush)
 
 #endif
 
--
generated by git-patchbot for /home/xen/git/xen.git#master

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.