[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 5/6] x86/time: implement PVCLOCK_TSC_STABLE_BIT



This patch proposes relying on host TSC synchronization and
passthrough to the guest, when running on a TSC-safe platform. On
time_calibration we retrieve the platform time in ns and the counter
read by the clocksource that was used to compute system time. We
introduce a new rendezous function which doesn't require
synchronization between master and slave CPUS and just reads
calibration_rendezvous struct and writes it down the stime and stamp
to the cpu_calibration struct to be used later on. We can guarantee that
on a platform with a constant and reliable TSC, that the time read on
vcpu B right after A is bigger independently of the VCPU calibration
error. Since pvclock time infos are monotonic as seen by any vCPU set
PVCLOCK_TSC_STABLE_BIT, which then enables usage of VDSO on Linux.
IIUC, this is similar to how it's implemented on KVM.

This also changes clocksource=tsc initialization to be used only when CPU
hotplug isn't meant to be performed on the host, which will either be when max
vcpus and num_present_cpu are the same.  This is because a newly hotplugged CPU
may not satisfy the condition of having all TSCs synchronized - so when having
tsc clocksource being used we allow offlining CPUs but not onlining any ones
back. Should note that I've yet to see time going backwards in a long running
test in the past few days (in a dual socket machine), plus few other tests I
did on older platforms.

Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx>
---
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

Changes since v2:
 - Add XEN_ prefix to pvclock flags.
 - Adapter time_calibration_rendezvous_tail to have the case of setting master
 tsc/stime and use it for the nop_rendezvous.
 - Removed hotplug CPU option that was added in v1
 - Prevent online of CPUs when clocksource is tsc.
 - Remove use_tsc_stable_bit, since clocksource is only used to seed
 values. So instead we test if hotplug is possible, and prevent clocksource=tsc
 to be used.
 - Remove 1st paragrah of commit message since the behaviour described
   no longer applies since b64438c.

Changes since v1:
 - Change approach to skip std_rendezvous by introducing a
   nop_rendezvous
 - Change commit message reflecting the change above.
 - Use TSC_STABLE_BIT only if cpu hotplug isn't possible.
 - Add command line option to override it if no cpu hotplug is
 intended.
---
 xen/arch/x86/platform_hypercall.c |  3 +-
 xen/arch/x86/time.c               | 59 +++++++++++++++++++++++++++++++++++----
 xen/include/asm-x86/time.h        |  1 +
 3 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/platform_hypercall.c 
b/xen/arch/x86/platform_hypercall.c
index 780f22d..edef334 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -631,7 +631,8 @@ ret_t 
do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t) u_xenpf_op)
         if ( ret )
             break;
 
-        if ( cpu >= nr_cpu_ids || !cpu_present(cpu) )
+        if ( cpu >= nr_cpu_ids || !cpu_present(cpu) ||
+             host_tsc_is_clocksource() )
         {
             ret = -EINVAL;
             break;
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index 57c1b47..81db255 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -480,6 +480,13 @@ uint64_t ns_to_acpi_pm_tick(uint64_t ns)
 
 static s64 __init init_tsctimer(struct platform_timesource *pts)
 {
+    if ( nr_cpu_ids != num_present_cpus() )
+    {
+        printk(XENLOG_INFO "TSC: CPU Hotplug intended,"
+                           "not using TSC as clocksource\n");
+        return 0;
+    }
+
     return pts->frequency;
 }
 
@@ -955,6 +962,8 @@ static void __update_vcpu_system_time(struct vcpu *v, int 
force)
 
     _u.tsc_timestamp = tsc_stamp;
     _u.system_time   = t->stamp.local_stime;
+    if ( host_tsc_is_clocksource() )
+        _u.flags    |= XEN_PVCLOCK_TSC_STABLE_BIT;
 
     if ( is_hvm_domain(d) )
         _u.tsc_timestamp += v->arch.hvm_vcpu.cache_tsc_offset;
@@ -1328,12 +1337,22 @@ struct calibration_rendezvous {
 };
 
 static void
-time_calibration_rendezvous_tail(const struct calibration_rendezvous *r)
+time_calibration_rendezvous_tail(const struct calibration_rendezvous *r,
+                                 bool_t master_tsc)
 {
     struct cpu_time_stamp *c = &this_cpu(cpu_calibration);
 
-    c->local_tsc    = rdtsc_ordered();
-    c->local_stime  = get_s_time_fixed(c->local_tsc);
+    if ( master_tsc )
+    {
+        c->local_tsc    = r->master_tsc_stamp;
+        c->local_stime  = r->master_stime;
+    }
+    else
+    {
+        c->local_tsc    = rdtsc_ordered();
+        c->local_stime  = get_s_time_fixed(c->local_tsc);
+    }
+
     c->master_stime = r->master_stime;
 
     raise_softirq(TIME_CALIBRATE_SOFTIRQ);
@@ -1386,7 +1405,7 @@ static void time_calibration_tsc_rendezvous(void *_r)
         }
     }
 
-    time_calibration_rendezvous_tail(r);
+    time_calibration_rendezvous_tail(r, false);
 }
 
 /* Ordinary rendezvous function which does not modify TSC values. */
@@ -1411,7 +1430,18 @@ static void time_calibration_std_rendezvous(void *_r)
         smp_rmb(); /* receive signal /then/ read r->master_stime */
     }
 
-    time_calibration_rendezvous_tail(r);
+    time_calibration_rendezvous_tail(r, false);
+}
+
+/*
+ * Rendezvous function used when clocksource is TSC and
+ * no CPU hotplug will be performed.
+ */
+static void time_calibration_nop_rendezvous(void *rv)
+{
+    const struct calibration_rendezvous *r = rv;
+
+    time_calibration_rendezvous_tail(r, true);
 }
 
 static void (*time_calibration_rendezvous_fn)(void *) =
@@ -1423,6 +1453,13 @@ static void time_calibration(void *unused)
         .semaphore = ATOMIC_INIT(0)
     };
 
+    if ( host_tsc_is_clocksource() )
+    {
+        local_irq_disable();
+        r.master_stime = read_platform_stime(&r.master_tsc_stamp);
+        local_irq_enable();
+    }
+
     cpumask_copy(&r.cpu_calibration_map, &cpu_online_map);
 
     /* @wait=1 because we must wait for all cpus before freeing @r. */
@@ -1586,6 +1623,13 @@ static int __init verify_tsc_reliability(void)
             printk(XENLOG_INFO "Switched to Platform timer %s TSC\n",
                    freq_string(plt_src.frequency));
 
+            /*
+             * We won't do CPU Hotplug and TSC clocksource is being used which
+             * means we have a reliable TSC, plus we don't sync with any other
+             * clocksource so no need for rendezvous.
+             */
+            time_calibration_rendezvous_fn = time_calibration_nop_rendezvous;
+
             init_timer(&calibration_timer, time_calibration, NULL, 0);
             set_timer(&calibration_timer, NOW() + EPOCH);
         }
@@ -1885,6 +1929,11 @@ void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs 
*regs, int rdtscp)
              (d->arch.tsc_mode == TSC_MODE_PVRDTSCP) ? d->arch.incarnation : 0;
 }
 
+bool_t host_tsc_is_clocksource(void)
+{
+    return plt_src.read_counter == read_tsc;
+}
+
 int host_tsc_is_safe(void)
 {
     return boot_cpu_has(X86_FEATURE_TSC_RELIABLE);
diff --git a/xen/include/asm-x86/time.h b/xen/include/asm-x86/time.h
index 971883a..bc3debc 100644
--- a/xen/include/asm-x86/time.h
+++ b/xen/include/asm-x86/time.h
@@ -69,6 +69,7 @@ void tsc_get_info(struct domain *d, uint32_t *tsc_mode, 
uint64_t *elapsed_nsec,
 
 void force_update_vcpu_system_time(struct vcpu *v);
 
+bool_t host_tsc_is_clocksource(void);
 int host_tsc_is_safe(void);
 void cpuid_time_leaf(uint32_t sub_idx, uint32_t *eax, uint32_t *ebx,
                      uint32_t *ecx, uint32_t *edx);
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.