Xen project Mailing List

Re: [Xen-devel] [PATCH] x86: don't drop guest visible state updates when 64-bit PV guest is in user mode

To: Jan Beulich <JBeulich@xxxxxxxx>, xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx>

From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

Date: Tue, 21 Jan 2014 16:55:57 +0000

Cc: Keir Fraser <keir@xxxxxxx>

Delivery-date: Tue, 21 Jan 2014 16:56:42 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

On 21/01/2014 15:33, Jan Beulich wrote:

Since 64-bit PV uses separate kernel and user mode page tables, kernel
addresses (as usually provided via VCPUOP_register_runstate_memory_area
and possibly via VCPUOP_register_vcpu_time_memory_area) aren't
necessarily accessible when the respective updating occurs. Add logic
for toggle_guest_mode() to take care of this (if necessary) the next
time the vCPU switches to kernel mode.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1323,10 +1323,10 @@ static void paravirt_ctxt_switch_to(stru
 }
 
 /* Update per-VCPU guest runstate shared memory area (if registered). */
-static void update_runstate_area(struct vcpu *v)
+bool_t update_runstate_area(const struct vcpu *v)

Can you adjust the comment to indicate what the return value means. The logic is quite opaque, but I believe it is "true if the runstate has been suitably updated".

 {
     if ( guest_handle_is_null(runstate_guest(v)) )
-        return;
+        return 1;
 
     if ( has_32bit_shinfo(v->domain) )
     {
@@ -1334,10 +1334,18 @@ static void update_runstate_area(struct 
 
         XLAT_vcpu_runstate_info(&info, &v->runstate);
         __copy_to_guest(v->runstate_guest.compat, &info, 1);
-        return;
+        return 1;
     }
 
-    __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+    return __copy_to_guest(runstate_guest(v), &v->runstate, 1) !=
+           sizeof(v->runstate);
+}
+
+static void _update_runstate_area(struct vcpu *v)
+{
+    if ( !update_runstate_area(v) && is_pv_vcpu(v) &&
+         !(v->arch.flags & TF_kernel_mode) )
+        v->arch.pv_vcpu.need_update_runstate_area = 1;
 }
 
 static inline int need_full_gdt(struct vcpu *v)
@@ -1443,8 +1451,8 @@ void context_switch(struct vcpu *prev, s
         flush_tlb_mask(&dirty_mask);
     }
 
-    if (prev != next)
-        update_runstate_area(prev);
+    if ( prev != next )
+        _update_runstate_area(prev);
 
     if ( is_hvm_vcpu(prev) )
     {
@@ -1497,8 +1505,8 @@ void context_switch(struct vcpu *prev, s
 
     context_saved(prev);
 
-    if (prev != next)
-        update_runstate_area(next);
+    if ( prev != next )
+        _update_runstate_area(next);
 
     /* Ensure that the vcpu has an up-to-date time base. */
     update_vcpu_system_time(next);
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -740,7 +740,6 @@ static void __update_vcpu_system_time(st
 {
     struct cpu_time       *t;
     struct vcpu_time_info *u, _u;
-    XEN_GUEST_HANDLE(vcpu_time_info_t) user_u;
     struct domain *d = v->domain;
     s_time_t tsc_stamp = 0;
 
@@ -805,19 +804,31 @@ static void __update_vcpu_system_time(st
     /* 3. Update guest kernel version. */
     u->version = version_update_end(u->version);
 
-    user_u = v->arch.time_info_guest;
-    if ( !guest_handle_is_null(user_u) )
-    {
-        /* 1. Update userspace version. */
-        __copy_field_to_guest(user_u, &_u, version);
-        wmb();
-        /* 2. Update all other userspavce fields. */
-        __copy_to_guest(user_u, &_u, 1);
-        wmb();
-        /* 3. Update userspace version. */
-        _u.version = version_update_end(_u.version);
-        __copy_field_to_guest(user_u, &_u, version);
-    }
+    if ( !update_secondary_system_time(v, &_u) && is_pv_domain(d) &&
+         !is_pv_32bit_domain(d) && !(v->arch.flags & TF_kernel_mode) )
+        v->arch.pv_vcpu.pending_system_time = _u;
+}
+
+bool_t update_secondary_system_time(const struct vcpu *v,
+                                    struct vcpu_time_info *u)
+{
+    XEN_GUEST_HANDLE(vcpu_time_info_t) user_u = v->arch.time_info_guest;
+
+    if ( guest_handle_is_null(user_u) )
+        return 1;
+
+    /* 1. Update userspace version. */
+    if ( __copy_field_to_guest(user_u, u, version) == sizeof(u->version) )
+        return 0;
+    wmb();
+    /* 2. Update all other userspace fields. */
+    __copy_to_guest(user_u, u, 1);
+    wmb();
+    /* 3. Update userspace version. */
+    u->version = version_update_end(u->version);
+    __copy_field_to_guest(user_u, u, version);
+
+    return 1;
 }
 
 void update_vcpu_system_time(struct vcpu *v)
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -266,6 +266,18 @@ void toggle_guest_mode(struct vcpu *v)
 #else
     write_ptbase(v);
 #endif
+
+    if ( !(v->arch.flags & TF_kernel_mode) )
+        return;
+
+    if ( v->arch.pv_vcpu.need_update_runstate_area &&
+         update_runstate_area(v) )
+        v->arch.pv_vcpu.need_update_runstate_area = 0;
+
+    if ( v->arch.pv_vcpu.pending_system_time.version &&
+         update_secondary_system_time(v,
+                                      &v->arch.pv_vcpu.pending_system_time) )
+        v->arch.pv_vcpu.pending_system_time.version = 0;

What would happen now if a guest kernel loads a faulting address for its runstate info (or edits its pagetables so a previously good address now faults)? It appears as if we could retry writing to the same bad address each time we try to toggle into kernel mode.

Given that we know we are running on the correct set of pagetables, I think both of these pending variable can be unconditionally cleared, whether or not update{runstate_area,secondary_system_time} succeed or fail.

~Andrew

 }
 
 unsigned long do_iret(void)
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -373,6 +373,10 @@ struct pv_vcpu
     /* Current LDT details. */
     unsigned long shadow_ldt_mapcnt;
     spinlock_t shadow_ldt_lock;
+
+    /* Deferred VA-based update state. */
+    bool_t need_update_runstate_area;
+    struct vcpu_time_info pending_system_time;
 };
 
 struct arch_vcpu
@@ -445,6 +449,10 @@ struct arch_vcpu
 #define hvm_vmx         hvm_vcpu.u.vmx
 #define hvm_svm         hvm_vcpu.u.svm
 
+bool_t update_runstate_area(const struct vcpu *);
+bool_t update_secondary_system_time(const struct vcpu *,
+                                    struct vcpu_time_info *);
+
 void vcpu_show_execution_state(struct vcpu *);
 void vcpu_show_registers(const struct vcpu *);

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

_______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.