On 21/01/2014 15:33, Jan Beulich wrote:
Since 64-bit PV uses separate kernel and user mode page tables, kernel
addresses (as usually provided via VCPUOP_register_runstate_memory_area
and possibly via VCPUOP_register_vcpu_time_memory_area) aren't
necessarily accessible when the respective updating occurs. Add logic
for toggle_guest_mode() to take care of this (if necessary) the next
time the vCPU switches to kernel mode.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1323,10 +1323,10 @@ static void paravirt_ctxt_switch_to(stru
}
/* Update per-VCPU guest runstate shared memory area (if registered). */
-static void update_runstate_area(struct vcpu *v)
+bool_t update_runstate_area(const struct vcpu *v)
Can you adjust the comment to indicate what the return value means.
The logic is quite opaque, but I believe it is "true if the runstate
has been suitably updated".
{
if ( guest_handle_is_null(runstate_guest(v)) )
- return;
+ return 1;
if ( has_32bit_shinfo(v->domain) )
{
@@ -1334,10 +1334,18 @@ static void update_runstate_area(struct
XLAT_vcpu_runstate_info(&info, &v->runstate);
__copy_to_guest(v->runstate_guest.compat, &info, 1);
- return;
+ return 1;
}
- __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+ return __copy_to_guest(runstate_guest(v), &v->runstate, 1) !=
+ sizeof(v->runstate);
+}
+
+static void _update_runstate_area(struct vcpu *v)
+{
+ if ( !update_runstate_area(v) && is_pv_vcpu(v) &&
+ !(v->arch.flags & TF_kernel_mode) )
+ v->arch.pv_vcpu.need_update_runstate_area = 1;
}
static inline int need_full_gdt(struct vcpu *v)
@@ -1443,8 +1451,8 @@ void context_switch(struct vcpu *prev, s
flush_tlb_mask(&dirty_mask);
}
- if (prev != next)
- update_runstate_area(prev);
+ if ( prev != next )
+ _update_runstate_area(prev);
if ( is_hvm_vcpu(prev) )
{
@@ -1497,8 +1505,8 @@ void context_switch(struct vcpu *prev, s
context_saved(prev);
- if (prev != next)
- update_runstate_area(next);
+ if ( prev != next )
+ _update_runstate_area(next);
/* Ensure that the vcpu has an up-to-date time base. */
update_vcpu_system_time(next);
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -740,7 +740,6 @@ static void __update_vcpu_system_time(st
{
struct cpu_time *t;
struct vcpu_time_info *u, _u;
- XEN_GUEST_HANDLE(vcpu_time_info_t) user_u;
struct domain *d = v->domain;
s_time_t tsc_stamp = 0;
@@ -805,19 +804,31 @@ static void __update_vcpu_system_time(st
/* 3. Update guest kernel version. */
u->version = version_update_end(u->version);
- user_u = v->arch.time_info_guest;
- if ( !guest_handle_is_null(user_u) )
- {
- /* 1. Update userspace version. */
- __copy_field_to_guest(user_u, &_u, version);
- wmb();
- /* 2. Update all other userspavce fields. */
- __copy_to_guest(user_u, &_u, 1);
- wmb();
- /* 3. Update userspace version. */
- _u.version = version_update_end(_u.version);
- __copy_field_to_guest(user_u, &_u, version);
- }
+ if ( !update_secondary_system_time(v, &_u) && is_pv_domain(d) &&
+ !is_pv_32bit_domain(d) && !(v->arch.flags & TF_kernel_mode) )
+ v->arch.pv_vcpu.pending_system_time = _u;
+}
+
+bool_t update_secondary_system_time(const struct vcpu *v,
+ struct vcpu_time_info *u)
+{
+ XEN_GUEST_HANDLE(vcpu_time_info_t) user_u = v->arch.time_info_guest;
+
+ if ( guest_handle_is_null(user_u) )
+ return 1;
+
+ /* 1. Update userspace version. */
+ if ( __copy_field_to_guest(user_u, u, version) == sizeof(u->version) )
+ return 0;
+ wmb();
+ /* 2. Update all other userspace fields. */
+ __copy_to_guest(user_u, u, 1);
+ wmb();
+ /* 3. Update userspace version. */
+ u->version = version_update_end(u->version);
+ __copy_field_to_guest(user_u, u, version);
+
+ return 1;
}
void update_vcpu_system_time(struct vcpu *v)
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -266,6 +266,18 @@ void toggle_guest_mode(struct vcpu *v)
#else
write_ptbase(v);
#endif
+
+ if ( !(v->arch.flags & TF_kernel_mode) )
+ return;
+
+ if ( v->arch.pv_vcpu.need_update_runstate_area &&
+ update_runstate_area(v) )
+ v->arch.pv_vcpu.need_update_runstate_area = 0;
+
+ if ( v->arch.pv_vcpu.pending_system_time.version &&
+ update_secondary_system_time(v,
+ &v->arch.pv_vcpu.pending_system_time) )
+ v->arch.pv_vcpu.pending_system_time.version = 0;
What would happen now if a guest kernel loads a faulting address for
its runstate info (or edits its pagetables so a previously good
address now faults)? It appears as if we could retry writing to the
same bad address each time we try to toggle into kernel mode.
Given that we know we are running on the correct set of pagetables,
I think both of these pending variable can be unconditionally
cleared, whether or not update{runstate_area,secondary_system_time}
succeed or fail.
~Andrew
}
unsigned long do_iret(void)
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -373,6 +373,10 @@ struct pv_vcpu
/* Current LDT details. */
unsigned long shadow_ldt_mapcnt;
spinlock_t shadow_ldt_lock;
+
+ /* Deferred VA-based update state. */
+ bool_t need_update_runstate_area;
+ struct vcpu_time_info pending_system_time;
};
struct arch_vcpu
@@ -445,6 +449,10 @@ struct arch_vcpu
#define hvm_vmx hvm_vcpu.u.vmx
#define hvm_svm hvm_vcpu.u.svm
+bool_t update_runstate_area(const struct vcpu *);
+bool_t update_secondary_system_time(const struct vcpu *,
+ struct vcpu_time_info *);
+
void vcpu_show_execution_state(struct vcpu *);
void vcpu_show_registers(const struct vcpu *);
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|