[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 1/1] x86/HVM: Use fixed TSC value when saving or restoring domain



When a domain is saved each VCPU's TSC value needs to be preserved. To get it we
use hvm_get_guest_tsc(). This routine (either itself or via get_s_time() which
it may call) calculates VCPU's TSC based on current host's TSC value (by doing a
rdtscll()). Since this is performed for each VCPU separately we end up with
un-synchronized TSCs.

Similarly, during a restore each VCPU is assigned its TSC based on host's 
current
tick, causing virtual TSCs to diverge further.

With this, we can easily get into situation where a guest may see time going
backwards.

Instead of reading new TSC value for each VCPU when saving/restoring it we 
should
use the same value across all VCPUs.

(As part of the patch, update rdtscll's definition so that its variables don't
clash with outer code)

Reported-by: Philippe Coquard <philippe.coquard@xxxxxxxx>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx>
---
 xen/arch/x86/hvm/hvm.c        |   22 +++++++++++++++-------
 xen/arch/x86/hvm/save.c       |   16 ++++++++++++++++
 xen/arch/x86/hvm/svm/svm.c    |   13 ++++++++-----
 xen/arch/x86/hvm/vmx/vmx.c    |    6 +++---
 xen/arch/x86/hvm/vmx/vvmx.c   |    4 ++--
 xen/arch/x86/hvm/vpt.c        |   16 ++++++++++------
 xen/arch/x86/time.c           |   12 ++++++++++--
 xen/common/hvm/save.c         |    5 +++++
 xen/include/asm-x86/domain.h  |    2 ++
 xen/include/asm-x86/hvm/hvm.h |   11 +++++++----
 xen/include/asm-x86/msr.h     |    6 +++---
 xen/include/xen/hvm/save.h    |    2 ++
 xen/include/xen/time.h        |    1 +
 13 files changed, 84 insertions(+), 32 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 5e89cf5..3711377 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -255,16 +255,20 @@ int hvm_set_guest_pat(struct vcpu *v, u64 guest_pat)
     return 1;
 }
 
-void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
+void hvm_set_guest_tsc_fixed(struct vcpu *v, u64 guest_tsc, u64 at_tsc)
 {
     uint64_t tsc;
     uint64_t delta_tsc;
 
     if ( v->domain->arch.vtsc )
     {
-        tsc = hvm_get_guest_time(v);
+        tsc = hvm_get_guest_time_fixed(v, at_tsc);
         tsc = gtime_to_gtsc(v->domain, tsc);
     }
+    else if ( at_tsc )
+    {
+        tsc = at_tsc;
+    }
     else
     {
         rdtscll(tsc);
@@ -275,27 +279,31 @@ void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
                           - v->arch.hvm_vcpu.cache_tsc_offset;
     v->arch.hvm_vcpu.cache_tsc_offset = delta_tsc;
 
-    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, at_tsc);
 }
 
 void hvm_set_guest_tsc_adjust(struct vcpu *v, u64 tsc_adjust)
 {
     v->arch.hvm_vcpu.cache_tsc_offset += tsc_adjust
                             - v->arch.hvm_vcpu.msr_tsc_adjust;
-    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
     v->arch.hvm_vcpu.msr_tsc_adjust = tsc_adjust;
 }
 
-u64 hvm_get_guest_tsc(struct vcpu *v)
+u64 hvm_get_guest_tsc_fixed(struct vcpu *v, uint64_t at_tsc)
 {
     uint64_t tsc;
 
     if ( v->domain->arch.vtsc )
     {
-        tsc = hvm_get_guest_time(v);
+        tsc = hvm_get_guest_time_fixed(v, at_tsc);
         tsc = gtime_to_gtsc(v->domain, tsc);
         v->domain->arch.vtsc_kerncount++;
     }
+    else if ( at_tsc )
+    {
+        tsc = at_tsc;
+    }
     else
     {
         rdtscll(tsc);
@@ -3848,7 +3856,7 @@ void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs, 
uint16_t ip)
     /* Sync AP's TSC with BSP's. */
     v->arch.hvm_vcpu.cache_tsc_offset =
         v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
-    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
 
     v->arch.hvm_vcpu.msr_tsc_adjust = 0;
 
diff --git a/xen/arch/x86/hvm/save.c b/xen/arch/x86/hvm/save.c
index 066fdb2..309a1fd 100644
--- a/xen/arch/x86/hvm/save.c
+++ b/xen/arch/x86/hvm/save.c
@@ -34,6 +34,14 @@ void arch_hvm_save(struct domain *d, struct hvm_save_header 
*hdr)
 
     /* Save guest's preferred TSC. */
     hdr->gtsc_khz = d->arch.tsc_khz;
+
+    /* Time when saving started */
+    rdtscll(d->arch.chkpt_tsc);
+}
+
+void arch_hvm_save_done(struct domain *d)
+{
+    d->arch.chkpt_tsc = 0;
 }
 
 int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
@@ -67,12 +75,20 @@ int arch_hvm_load(struct domain *d, struct hvm_save_header 
*hdr)
     if ( d->arch.vtsc )
         hvm_set_rdtsc_exiting(d, 1);
 
+    /* Time when restore started  */
+    rdtscll(d->arch.chkpt_tsc);
+
     /* VGA state is not saved/restored, so we nobble the cache. */
     d->arch.hvm_domain.stdvga.cache = 0;
 
     return 0;
 }
 
+void arch_hvm_load_done(struct domain *d)
+{
+    d->arch.chkpt_tsc = 0;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 4fd5376..1ec11ba 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -318,7 +318,7 @@ static void svm_save_cpu_state(struct vcpu *v, struct 
hvm_hw_cpu *data)
     data->msr_efer         = v->arch.hvm_vcpu.guest_efer;
     data->msr_flags        = -1ULL;
 
-    data->tsc = hvm_get_guest_tsc(v);
+    data->tsc = hvm_get_guest_tsc_fixed(v, v->domain->arch.chkpt_tsc);
 }
 
 
@@ -334,7 +334,7 @@ static void svm_load_cpu_state(struct vcpu *v, struct 
hvm_hw_cpu *data)
     v->arch.hvm_vcpu.guest_efer = data->msr_efer;
     svm_update_guest_efer(v);
 
-    hvm_set_guest_tsc(v, data->tsc);
+    hvm_set_guest_tsc_fixed(v, data->tsc, v->domain->arch.chkpt_tsc);
 }
 
 static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
@@ -680,7 +680,7 @@ static uint64_t svm_get_tsc_offset(uint64_t host_tsc, 
uint64_t guest_tsc,
     return guest_tsc - offset;
 }
 
-static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
+static void svm_set_tsc_offset(struct vcpu *v, u64 offset, u64 at_tsc)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     struct vmcb_struct *n1vmcb, *n2vmcb;
@@ -688,11 +688,14 @@ static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
     struct domain *d = v->domain;
     uint64_t host_tsc, guest_tsc;
 
-    guest_tsc = hvm_get_guest_tsc(v);
+    guest_tsc = hvm_get_guest_tsc_fixed(v, at_tsc);
 
     /* Re-adjust the offset value when TSC_RATIO is available */
     if ( cpu_has_tsc_ratio && d->arch.vtsc ) {
-        rdtscll(host_tsc);
+        if ( at_tsc )
+            host_tsc = at_tsc;
+        else
+            rdtscll(host_tsc);
         offset = svm_get_tsc_offset(host_tsc, guest_tsc, vcpu_tsc_ratio(v));
     }
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 77ce167..c6e7ba4 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -540,7 +540,7 @@ static void vmx_save_cpu_state(struct vcpu *v, struct 
hvm_hw_cpu *data)
     data->msr_star         = guest_state->msrs[VMX_INDEX_MSR_STAR];
     data->msr_syscall_mask = guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
 
-    data->tsc = hvm_get_guest_tsc(v);
+    data->tsc = hvm_get_guest_tsc_fixed(v, v->domain->arch.chkpt_tsc);
 }
 
 static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
@@ -556,7 +556,7 @@ static void vmx_load_cpu_state(struct vcpu *v, struct 
hvm_hw_cpu *data)
     v->arch.hvm_vmx.cstar     = data->msr_cstar;
     v->arch.hvm_vmx.shadow_gs = data->shadow_gs;
 
-    hvm_set_guest_tsc(v, data->tsc);
+    hvm_set_guest_tsc_fixed(v, data->tsc, v->domain->arch.chkpt_tsc);
 }
 
 
@@ -1052,7 +1052,7 @@ static void vmx_handle_cd(struct vcpu *v, unsigned long 
value)
     }
 }
 
-static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
+static void vmx_set_tsc_offset(struct vcpu *v, u64 offset, u64 at_tsc)
 {
     vmx_vmcs_enter(v);
 
diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
index 40167d6..e263376 100644
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1058,7 +1058,7 @@ static void load_shadow_guest_state(struct vcpu *v)
     if ( control & VM_ENTRY_LOAD_PERF_GLOBAL_CTRL )
         hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL, __get_vvmcs(vvmcs, 
GUEST_PERF_GLOBAL_CTRL));
 
-    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
 
     vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmentry_fields), vmentry_fields);
 
@@ -1259,7 +1259,7 @@ static void load_vvmcs_host_state(struct vcpu *v)
     if ( control & VM_EXIT_LOAD_PERF_GLOBAL_CTRL )
         hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL, __get_vvmcs(vvmcs, 
HOST_PERF_GLOBAL_CTRL));
 
-    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
 
     __set_vvmcs(vvmcs, VM_ENTRY_INTR_INFO, 0);
 }
diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
index f7af688..38541cf 100644
--- a/xen/arch/x86/hvm/vpt.c
+++ b/xen/arch/x86/hvm/vpt.c
@@ -36,7 +36,7 @@ void hvm_init_guest_time(struct domain *d)
     pl->last_guest_time = 0;
 }
 
-u64 hvm_get_guest_time(struct vcpu *v)
+u64 hvm_get_guest_time_fixed(struct vcpu *v, u64 at_tsc)
 {
     struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time;
     u64 now;
@@ -45,11 +45,15 @@ u64 hvm_get_guest_time(struct vcpu *v)
     ASSERT(is_hvm_vcpu(v));
 
     spin_lock(&pl->pl_time_lock);
-    now = get_s_time() + pl->stime_offset;
-    if ( (int64_t)(now - pl->last_guest_time) > 0 )
-        pl->last_guest_time = now;
-    else
-        now = ++pl->last_guest_time;
+    now = get_s_time_fixed(at_tsc) + pl->stime_offset;
+
+    if ( !at_tsc )
+    {
+        if ( (int64_t)(now - pl->last_guest_time) > 0 )
+            pl->last_guest_time = now;
+        else
+            now = ++pl->last_guest_time;
+    }
     spin_unlock(&pl->pl_time_lock);
 
     return now + v->arch.hvm_vcpu.stime_offset;
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index f904af2..b0bd388 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -715,19 +715,27 @@ static unsigned long get_cmos_time(void)
  * System Time
  ***************************************************************************/
 
-s_time_t get_s_time(void)
+s_time_t get_s_time_fixed(u64 at_tsc)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
     u64 tsc, delta;
     s_time_t now;
 
-    rdtscll(tsc);
+    if ( at_tsc )
+        tsc = at_tsc;
+    else
+        rdtscll(tsc);
     delta = tsc - t->local_tsc_stamp;
     now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
 
     return now;
 }
 
+s_time_t get_s_time()
+{
+    return get_s_time_fixed(0);
+}
+
 uint64_t tsc_ticks2ns(uint64_t ticks)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
diff --git a/xen/common/hvm/save.c b/xen/common/hvm/save.c
index 6c16399..7db68af 100644
--- a/xen/common/hvm/save.c
+++ b/xen/common/hvm/save.c
@@ -186,6 +186,8 @@ int hvm_save(struct domain *d, hvm_domain_context_t *h)
         }
     }
 
+    arch_hvm_save_done(d);
+
     /* Save an end-of-file marker */
     if ( hvm_save_entry(END, 0, h, &end) != 0 )
     {
@@ -236,7 +238,10 @@ int hvm_load(struct domain *d, hvm_domain_context_t *h)
         /* Read the typecode of the next entry  and check for the end-marker */
         desc = (struct hvm_save_descriptor *)(&h->data[h->cur]);
         if ( desc->typecode == 0 )
+        {
+            arch_hvm_load_done(d);
             return 0; 
+        }
         
         /* Find the handler for this entry */
         if ( (desc->typecode > HVM_SAVE_CODE_MAX) ||
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 4ff89f0..7274fc1 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -308,6 +308,8 @@ struct arch_domain
                                 (possibly other cases in the future */
     uint64_t vtsc_kerncount; /* for hvm, counts all vtsc */
     uint64_t vtsc_usercount; /* not used for hvm */
+    uint64_t chkpt_tsc;      /* TSC value that VCPUs use to calculate their
+                                tsc_offset value. Used during save/restore */
 
     /* Pseudophysical e820 map (XENMEM_memory_map).  */
     spinlock_t e820_lock;
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index dcc3483..31043b2 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -137,7 +137,7 @@ struct hvm_function_table {
     int  (*get_guest_pat)(struct vcpu *v, u64 *);
     int  (*set_guest_pat)(struct vcpu *v, u64);
 
-    void (*set_tsc_offset)(struct vcpu *v, u64 offset);
+    void (*set_tsc_offset)(struct vcpu *v, u64 offset, u64 at_tsc);
 
     void (*inject_trap)(struct hvm_trap *trap);
 
@@ -232,12 +232,15 @@ bool_t hvm_send_assist_req(struct vcpu *v);
 void hvm_get_guest_pat(struct vcpu *v, u64 *guest_pat);
 int hvm_set_guest_pat(struct vcpu *v, u64 guest_pat);
 
-void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc);
-u64 hvm_get_guest_tsc(struct vcpu *v);
+void hvm_set_guest_tsc_fixed(struct vcpu *v, u64 guest_tsc, u64 at_tsc);
+#define hvm_set_guest_tsc(v, t) hvm_set_guest_tsc_fixed(v, t, 0)
+u64 hvm_get_guest_tsc_fixed(struct vcpu *v, u64 at_tsc);
+#define hvm_get_guest_tsc(v) hvm_get_guest_tsc_fixed(v, 0)
 
 void hvm_init_guest_time(struct domain *d);
 void hvm_set_guest_time(struct vcpu *v, u64 guest_time);
-u64 hvm_get_guest_time(struct vcpu *v);
+u64 hvm_get_guest_time_fixed(struct vcpu *v, u64 at_tsc);
+#define hvm_get_guest_time(v) hvm_get_guest_time_fixed(v, 0)
 
 int vmsi_deliver(
     struct domain *d, int vector,
diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h
index 61f579a..52cae4b 100644
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -78,9 +78,9 @@ static inline int wrmsr_safe(unsigned int msr, uint64_t val)
      __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
 
 #define rdtscll(val) do { \
-     unsigned int a,d; \
-     asm volatile("rdtsc" : "=a" (a), "=d" (d)); \
-     (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \
+     unsigned int _eax, _edx; \
+     asm volatile("rdtsc" : "=a" (_eax), "=d" (_edx)); \
+     (val) = ((unsigned long)_eax) | (((unsigned long)_edx)<<32); \
 } while(0)
 
 #define __write_tsc(val) wrmsrl(MSR_IA32_TSC, val)
diff --git a/xen/include/xen/hvm/save.h b/xen/include/xen/hvm/save.h
index ae6f0bb..70522a9 100644
--- a/xen/include/xen/hvm/save.h
+++ b/xen/include/xen/hvm/save.h
@@ -133,6 +133,8 @@ int hvm_load(struct domain *d, hvm_domain_context_t *h);
 /* Arch-specific definitions. */
 struct hvm_save_header;
 void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr);
+void arch_hvm_save_done(struct domain *d);
 int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr);
+void arch_hvm_load_done(struct domain *d);
 
 #endif /* __XEN_HVM_SAVE_H__ */
diff --git a/xen/include/xen/time.h b/xen/include/xen/time.h
index 2703454..709501f 100644
--- a/xen/include/xen/time.h
+++ b/xen/include/xen/time.h
@@ -32,6 +32,7 @@ struct vcpu;
 typedef s64 s_time_t;
 #define PRI_stime PRId64
 
+s_time_t get_s_time_fixed(u64 at_tick);
 s_time_t get_s_time(void);
 unsigned long get_localtime(struct domain *d);
 uint64_t get_localtime_us(struct domain *d);
-- 
1.7.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.