[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3] viridian: fix the HvFlushVirtualAddress/List hypercall implementation



The current code uses hvm_asid_flush_vcpu() but this is insufficient for
a guest running in shadow mode, which results in guest crashes early in
boot if the 'hcall_remote_tlb_flush' is enabled.

This patch, instead of open coding a new flush algorithm, adapts the one
already used by the HVMOP_flush_tlbs Xen hypercall. The implementation is
modified to allow TLB flushing a subset of a domain's vCPUs. A callback
function determines whether or not a vCPU requires flushing. This mechanism
was chosen because, while it is the case that the currently implemented
viridian hypercalls specify a vCPU mask, there are newer variants which
specify a sparse HV_VP_SET and thus use of a callback will avoid needing to
expose details of this outside of the viridian subsystem if and when those
newer variants are implemented.

NOTE: Use of the common flush function requires that the hypercalls are
      restartable and so, with this patch applied, viridian_hypercall()
      can now return HVM_HCALL_preempted. This is safe as no modification
      to struct cpu_user_regs is done before the return.

Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
---
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
Cc: "Roger Pau Monné" <roger.pau@xxxxxxxxxx>

v3:
 - Don't use cpumask_scratch as it is not safe
 - Re-introduce flush_cpumask, but restrict scope

v2:
 - Use cpumask_scratch
 - Make sure local CPU flush isn't missed
---
 xen/arch/x86/hvm/hvm.c               | 48 +++++++++++++++++++++-------
 xen/arch/x86/hvm/viridian/viridian.c | 45 ++++++++------------------
 xen/include/asm-x86/hvm/hvm.h        |  2 ++
 3 files changed, 53 insertions(+), 42 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 410623d437..9a9566c7ab 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3964,26 +3964,28 @@ static void hvm_s3_resume(struct domain *d)
     }
 }
 
-static int hvmop_flush_tlb_all(void)
+bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
+                        void *ctxt)
 {
+    static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
+    cpumask_t *mask = &this_cpu(flush_cpumask);
     struct domain *d = current->domain;
     struct vcpu *v;
 
-    if ( !is_hvm_domain(d) )
-        return -EINVAL;
-
     /* Avoid deadlock if more than one vcpu tries this at the same time. */
     if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
-        return -ERESTART;
+        return false;
 
     /* Pause all other vcpus. */
     for_each_vcpu ( d, v )
-        if ( v != current )
+        if ( v != current && flush_vcpu(ctxt, v) )
             vcpu_pause_nosync(v);
 
+    cpumask_clear(mask);
+
     /* Now that all VCPUs are signalled to deschedule, we wait... */
     for_each_vcpu ( d, v )
-        if ( v != current )
+        if ( v != current && flush_vcpu(ctxt, v) )
             while ( !vcpu_runnable(v) && v->is_running )
                 cpu_relax();
 
@@ -3992,17 +3994,41 @@ static int hvmop_flush_tlb_all(void)
 
     /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
     for_each_vcpu ( d, v )
+    {
+        unsigned int cpu;
+
+        if ( !flush_vcpu(ctxt, v) )
+            continue;
+
         paging_update_cr3(v, false);
 
-    /* Flush all dirty TLBs. */
-    flush_tlb_mask(d->dirty_cpumask);
+        cpu = read_atomic(&v->dirty_cpu);
+        if ( is_vcpu_dirty_cpu(cpu) )
+            __cpumask_set_cpu(cpu, mask);
+    }
+
+    /* Flush TLBs on all CPUs with dirty vcpu state. */
+    flush_tlb_mask(mask);
 
     /* Done. */
     for_each_vcpu ( d, v )
-        if ( v != current )
+        if ( v != current && flush_vcpu(ctxt, v) )
             vcpu_unpause(v);
 
-    return 0;
+    return true;
+}
+
+static bool always_flush(void *ctxt, struct vcpu *v)
+{
+    return true;
+}
+
+static int hvmop_flush_tlb_all(void)
+{
+    if ( !is_hvm_domain(current->domain) )
+        return -EINVAL;
+
+    return hvm_flush_vcpu_tlb(always_flush, NULL) ? 0 : -ERESTART;
 }
 
 static int hvmop_set_evtchn_upcall_vector(
diff --git a/xen/arch/x86/hvm/viridian/viridian.c 
b/xen/arch/x86/hvm/viridian/viridian.c
index c78b2918d9..425af56856 100644
--- a/xen/arch/x86/hvm/viridian/viridian.c
+++ b/xen/arch/x86/hvm/viridian/viridian.c
@@ -430,7 +430,16 @@ void viridian_domain_deinit(struct domain *d)
         viridian_vcpu_deinit(v);
 }
 
-static DEFINE_PER_CPU(cpumask_t, ipi_cpumask);
+/*
+ * Windows should not issue the hypercalls requiring this callback in the
+ * case where vcpu_id would exceed the size of the mask.
+ */
+static bool need_flush(void *ctxt, struct vcpu *v)
+{
+    uint64_t vcpu_mask = *(uint64_t *)ctxt;
+
+    return vcpu_mask & (1ul << v->vcpu_id);
+}
 
 int viridian_hypercall(struct cpu_user_regs *regs)
 {
@@ -494,8 +503,6 @@ int viridian_hypercall(struct cpu_user_regs *regs)
     case HvFlushVirtualAddressSpace:
     case HvFlushVirtualAddressList:
     {
-        cpumask_t *pcpu_mask;
-        struct vcpu *v;
         struct {
             uint64_t address_space;
             uint64_t flags;
@@ -521,36 +528,12 @@ int viridian_hypercall(struct cpu_user_regs *regs)
         if ( input_params.flags & HV_FLUSH_ALL_PROCESSORS )
             input_params.vcpu_mask = ~0ul;
 
-        pcpu_mask = &this_cpu(ipi_cpumask);
-        cpumask_clear(pcpu_mask);
-
-        /*
-         * For each specified virtual CPU flush all ASIDs to invalidate
-         * TLB entries the next time it is scheduled and then, if it
-         * is currently running, add its physical CPU to a mask of
-         * those which need to be interrupted to force a flush.
-         */
-        for_each_vcpu ( currd, v )
-        {
-            if ( v->vcpu_id >= (sizeof(input_params.vcpu_mask) * 8) )
-                break;
-
-            if ( !(input_params.vcpu_mask & (1ul << v->vcpu_id)) )
-                continue;
-
-            hvm_asid_flush_vcpu(v);
-            if ( v != curr && v->is_running )
-                __cpumask_set_cpu(v->processor, pcpu_mask);
-        }
-
         /*
-         * Since ASIDs have now been flushed it just remains to
-         * force any CPUs currently running target vCPUs out of non-
-         * root mode. It's possible that re-scheduling has taken place
-         * so we may unnecessarily IPI some CPUs.
+         * A false return means that another vcpu is currently trying
+         * a similar operation, so back off.
          */
-        if ( !cpumask_empty(pcpu_mask) )
-            smp_send_event_check_mask(pcpu_mask);
+        if ( !hvm_flush_vcpu_tlb(need_flush, &input_params.vcpu_mask) )
+            return HVM_HCALL_preempted;
 
         output.rep_complete = input.rep_count;
 
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 0a10b51554..53ffebb2c5 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -338,6 +338,8 @@ const char *hvm_efer_valid(const struct vcpu *v, uint64_t 
value,
                            signed int cr0_pg);
 unsigned long hvm_cr4_guest_valid_bits(const struct domain *d, bool restore);
 
+bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
+                        void *ctxt);
 
 #ifdef CONFIG_HVM
 
-- 
2.20.1.2.gb21ebb671


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.