[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] New VCPUOP_get_runstate_info hypercall. Returns information about the current



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 2303fb4682e7cd4feb330fd2aec69672facb4ec6
# Parent  a9f3abcc41499b7be971412d66c08d0e9740ff66
New VCPUOP_get_runstate_info hypercall. Returns information about the current
run state of a VCPU (running, runnable, blocked, etc.) and the total time
spent in each state since the VCPU was created.

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>

diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c     Sat Feb 25 11:27:53 2006
+++ b/xen/common/dom0_ops.c     Sat Feb 25 16:58:37 2006
@@ -46,6 +46,7 @@
     struct vcpu   *v;
     u64 cpu_time = 0;
     int flags = DOMFLAGS_BLOCKED;
+    struct vcpu_runstate_info runstate;
     
     info->domain = d->domain_id;
     info->nr_online_vcpus = 0;
@@ -55,7 +56,8 @@
      * - domain is marked as running if any of its vcpus is running
      */
     for_each_vcpu ( d, v ) {
-        cpu_time += v->cpu_time;
+        vcpu_runstate_get(v, &runstate);
+        cpu_time += runstate.time[RUNSTATE_running];
         info->max_vcpu_id = v->vcpu_id;
         if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
         {
@@ -497,6 +499,7 @@
     { 
         struct domain *d;
         struct vcpu   *v;
+        struct vcpu_runstate_info runstate;
 
         ret = -ESRCH;
         if ( (d = find_domain_by_id(op->u.getvcpuinfo.domain)) == NULL )
@@ -510,10 +513,12 @@
         if ( (v = d->vcpu[op->u.getvcpuinfo.vcpu]) == NULL )
             goto getvcpuinfo_out;
 
+        vcpu_runstate_get(v, &runstate);
+
         op->u.getvcpuinfo.online   = !test_bit(_VCPUF_down, &v->vcpu_flags);
         op->u.getvcpuinfo.blocked  = test_bit(_VCPUF_blocked, &v->vcpu_flags);
         op->u.getvcpuinfo.running  = test_bit(_VCPUF_running, &v->vcpu_flags);
-        op->u.getvcpuinfo.cpu_time = v->cpu_time;
+        op->u.getvcpuinfo.cpu_time = runstate.time[RUNSTATE_running];
         op->u.getvcpuinfo.cpu      = v->processor;
         op->u.getvcpuinfo.cpumap   = 0;
         memcpy(&op->u.getvcpuinfo.cpumap,
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/domain.c
--- a/xen/common/domain.c       Sat Feb 25 11:27:53 2006
+++ b/xen/common/domain.c       Sat Feb 25 16:58:37 2006
@@ -451,6 +451,19 @@
     case VCPUOP_is_up:
         rc = !test_bit(_VCPUF_down, &v->vcpu_flags);
         break;
+
+    case VCPUOP_get_runstate_info:
+    {
+        struct vcpu_runstate_info runstate;
+        vcpu_runstate_get(v, &runstate);
+        if ( copy_to_user(arg, &runstate, sizeof(runstate)) )
+            rc = -EFAULT;
+        break;
+    }
+
+    default:
+        rc = -ENOSYS;
+        break;
     }
 
     return rc;
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Sat Feb 25 11:27:53 2006
+++ b/xen/common/keyhandler.c   Sat Feb 25 16:58:37 2006
@@ -169,8 +169,6 @@
 }
 
 extern void dump_runq(unsigned char key);
-extern void print_sched_histo(unsigned char key);
-extern void reset_sched_histo(unsigned char key);
 #ifndef NDEBUG
 extern void audit_domains_key(unsigned char key);
 #endif
@@ -206,10 +204,6 @@
         'd', dump_registers, "dump registers"); 
     register_keyhandler(
         'h', show_handlers, "show this message");
-    register_keyhandler(
-        'l', print_sched_histo, "print sched latency histogram");
-    register_keyhandler(
-        'L', reset_sched_histo, "reset sched latency histogram");
     register_keyhandler(
         'q', dump_domains, "dump domain (and guest debug) info");
     register_keyhandler(
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/sched_bvt.c
--- a/xen/common/sched_bvt.c    Sat Feb 25 11:27:53 2006
+++ b/xen/common/sched_bvt.c    Sat Feb 25 16:58:37 2006
@@ -132,13 +132,13 @@
     vcpu_schedule_unlock_irq(v);
 }
 
-static inline u32 calc_avt(struct vcpu *d, s_time_t now)
+static inline u32 calc_avt(struct vcpu *v, s_time_t now)
 {
     u32 ranfor, mcus;
-    struct bvt_dom_info *inf = BVT_INFO(d->domain);
-    struct bvt_vcpu_info *einf = EBVT_INFO(d);
-    
-    ranfor = (u32)(now - d->lastschd);
+    struct bvt_dom_info *inf = BVT_INFO(v->domain);
+    struct bvt_vcpu_info *einf = EBVT_INFO(v);
+    
+    ranfor = (u32)(now - v->runstate.state_entry_time);
     mcus = (ranfor + MCU - 1)/MCU;
 
     return einf->avt + mcus * inf->mcu_advance;
@@ -262,7 +262,7 @@
     curr_evt = calc_evt(curr, calc_avt(curr, now));
     /* Calculate the time the current domain would run assuming
        the second smallest evt is of the newly woken domain */
-    r_time = curr->lastschd +
+    r_time = curr->runstate.state_entry_time +
         ((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) +
         ctx_allow;
 
@@ -558,7 +558,6 @@
         printk("%3d: %u has=%c ", loop++, v->domain->domain_id,
                test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F');
         bvt_dump_runq_el(v);
-        printk("c=0x%X%08X\n", (u32)(v->cpu_time>>32), (u32)v->cpu_time);
         printk("         l: %p n: %p  p: %p\n",
                &vcpu_inf->run_list, vcpu_inf->run_list.next,
                vcpu_inf->run_list.prev);
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Sat Feb 25 11:27:53 2006
+++ b/xen/common/sched_sedf.c   Sat Feb 25 16:58:37 2006
@@ -1408,18 +1408,14 @@
 {
     printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
            test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
-    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64
+    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu"
            " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
            EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
-           EDOM_INFO(d)->weight, d->cpu_time,
+           EDOM_INFO(d)->weight,
            EDOM_INFO(d)->score[EXTRA_UTIL_Q],
            (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
            EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
     
-    if ( d->cpu_time != 0 )
-        printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
-               / d->cpu_time);
-
 #ifdef SEDF_STATS
     if ( EDOM_INFO(d)->block_time_tot != 0 )
         printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/schedule.c
--- a/xen/common/schedule.c     Sat Feb 25 11:27:53 2006
+++ b/xen/common/schedule.c     Sat Feb 25 16:58:37 2006
@@ -36,14 +36,6 @@
 static char opt_sched[10] = "sedf";
 string_param("sched", opt_sched);
 
-/*#define WAKE_HISTO*/
-/*#define BLOCKTIME_HISTO*/
-#if defined(WAKE_HISTO)
-#define BUCKETS 31
-#elif defined(BLOCKTIME_HISTO)
-#define BUCKETS 200
-#endif
-
 #define TIME_SLOP      (s32)MICROSECS(50)     /* allow time to slip a bit */
 
 /* Various timer handlers. */
@@ -73,6 +65,36 @@
 /* Per-CPU periodic timer sends an event to the currently-executing domain. */
 static struct timer t_timer[NR_CPUS]; 
 
+static inline void vcpu_runstate_change(
+    struct vcpu *v, int new_state, s_time_t new_entry_time)
+{
+    ASSERT(v->runstate.state != new_state);
+    ASSERT(spin_is_locked(&schedule_data[v->processor].schedule_lock));
+
+    v->runstate.time[v->runstate.state] +=
+        new_entry_time - v->runstate.state_entry_time;
+    v->runstate.state_entry_time = new_entry_time;
+    v->runstate.state = new_state;
+}
+
+void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
+{
+    if ( likely(v == current) )
+    {
+        /* Fast lock-free path. */
+        memcpy(runstate, &v->runstate, sizeof(*runstate));
+        ASSERT(runstate->state = RUNSTATE_running);
+        runstate->time[RUNSTATE_running] += NOW() - runstate->state_entry_time;
+    }
+    else
+    {
+        vcpu_schedule_lock_irq(v);
+        memcpy(runstate, &v->runstate, sizeof(*runstate));
+        runstate->time[runstate->state] += NOW() - runstate->state_entry_time;
+        vcpu_schedule_unlock_irq(v);
+    }
+}
+
 struct domain *alloc_domain(void)
 {
     struct domain *d;
@@ -119,6 +141,9 @@
     v->cpu_affinity = is_idle_domain(d) ?
         cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
 
+    v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
+    v->runstate.state_entry_time = NOW();
+
     if ( (vcpu_id != 0) && !is_idle_domain(d) )
         set_bit(_VCPUF_down, &v->vcpu_flags);
 
@@ -165,8 +190,15 @@
     unsigned long flags;
 
     vcpu_schedule_lock_irqsave(v, flags);
+
     if ( likely(!vcpu_runnable(v)) )
+    {
+        if ( v->runstate.state == RUNSTATE_runnable )
+            vcpu_runstate_change(v, RUNSTATE_offline, NOW());
+
         SCHED_OP(sleep, v);
+    }
+
     vcpu_schedule_unlock_irqrestore(v, flags);
 
     TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
@@ -187,11 +219,19 @@
     unsigned long flags;
 
     vcpu_schedule_lock_irqsave(v, flags);
+
     if ( likely(vcpu_runnable(v)) )
     {
+        if ( v->runstate.state >= RUNSTATE_blocked )
+            vcpu_runstate_change(v, RUNSTATE_runnable, NOW());
         SCHED_OP(wake, v);
-        v->wokenup = NOW();
-    }
+    }
+    else if ( !test_bit(_VCPUF_blocked, &v->vcpu_flags) )
+    {
+        if ( v->runstate.state == RUNSTATE_blocked )
+            vcpu_runstate_change(v, RUNSTATE_offline, NOW());
+    }
+
     vcpu_schedule_unlock_irqrestore(v, flags);
 
     TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
@@ -376,8 +416,6 @@
 
     stop_timer(&schedule_data[cpu].s_timer);
     
-    prev->cpu_time += now - prev->lastschd;
-
     /* get policy-specific decision on scheduling... */
     next_slice = ops.do_schedule(now);
 
@@ -386,8 +424,6 @@
 
     schedule_data[cpu].curr = next;
     
-    next->lastschd = now;
-
     set_timer(&schedule_data[cpu].s_timer, now + r_time);
 
     if ( unlikely(prev == next) )
@@ -397,38 +433,23 @@
     }
 
     TRACE_2D(TRC_SCHED_SWITCH_INFPREV,
-             prev->domain->domain_id, now - prev->lastschd);
+             prev->domain->domain_id,
+             now - prev->runstate.state_entry_time);
     TRACE_3D(TRC_SCHED_SWITCH_INFNEXT,
-             next->domain->domain_id, now - next->wokenup, r_time);
-
-    /*
-     * Logic of wokenup field in domain struct:
-     * Used to calculate "waiting time", which is the time that a domain
-     * spends being "runnable", but not actually running. wokenup is set
-     * set whenever a domain wakes from sleeping. However, if wokenup is not
-     * also set here then a preempted runnable domain will get a screwed up
-     * "waiting time" value next time it is scheduled.
-     */
-    prev->wokenup = now;
-
-#if defined(WAKE_HISTO)
-    if ( !is_idle_vcpu(next) && next->wokenup )
-    {
-        ulong diff = (ulong)(now - next->wokenup);
-        diff /= (ulong)MILLISECS(1);
-        if (diff <= BUCKETS-2)  schedule_data[cpu].hist[diff]++;
-        else                    schedule_data[cpu].hist[BUCKETS-1]++;
-    }
-    next->wokenup = (s_time_t)0;
-#elif defined(BLOCKTIME_HISTO)
-    prev->lastdeschd = now;
-    if ( !is_idle_vcpu(next) )
-    {
-        ulong diff = (ulong)((now - next->lastdeschd) / MILLISECS(10));
-        if (diff <= BUCKETS-2)  schedule_data[cpu].hist[diff]++;
-        else                    schedule_data[cpu].hist[BUCKETS-1]++;
-    }
-#endif
+             next->domain->domain_id,
+             (next->runstate.state == RUNSTATE_runnable) ?
+             (now - next->runstate.state_entry_time) : 0,
+             r_time);
+
+    ASSERT(prev->runstate.state == RUNSTATE_running);
+    vcpu_runstate_change(
+        prev,
+        (test_bit(_VCPUF_blocked, &prev->vcpu_flags) ? RUNSTATE_blocked :
+         (vcpu_runnable(prev) ? RUNSTATE_runnable : RUNSTATE_offline)),
+        now);
+
+    ASSERT(next->runstate.state != RUNSTATE_running);
+    vcpu_runstate_change(next, RUNSTATE_running, now);
 
     ASSERT(!test_bit(_VCPUF_running, &next->vcpu_flags));
     set_bit(_VCPUF_running, &next->vcpu_flags);
@@ -567,47 +588,6 @@
 
     local_irq_restore(flags);
 }
-
-#if defined(WAKE_HISTO) || defined(BLOCKTIME_HISTO)
-
-void print_sched_histo(unsigned char key)
-{
-    int i, j, k;
-    for_each_online_cpu ( k )
-    {
-        j = 0;
-        printf ("CPU[%02d]: scheduler latency histogram (ms:[count])\n", k);
-        for ( i = 0; i < BUCKETS; i++ )
-        {
-            if ( schedule_data[k].hist[i] != 0 )
-            {
-                if ( i < BUCKETS-1 )
-                    printk("%2d:[%7u]    ", i, schedule_data[k].hist[i]);
-                else
-                    printk(" >:[%7u]    ", schedule_data[k].hist[i]);
-                if ( !(++j % 5) )
-                    printk("\n");
-            }
-        }
-        printk("\n");
-    }
-      
-}
-
-void reset_sched_histo(unsigned char key)
-{
-    int i, j;
-    for ( j = 0; j < NR_CPUS; j++ )
-        for ( i=0; i < BUCKETS; i++ ) 
-            schedule_data[j].hist[i] = 0;
-}
-
-#else
-
-void print_sched_histo(unsigned char key) { }
-void reset_sched_histo(unsigned char key) { }
-
-#endif
 
 /*
  * Local variables:
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/include/public/vcpu.h
--- a/xen/include/public/vcpu.h Sat Feb 25 11:27:53 2006
+++ b/xen/include/public/vcpu.h Sat Feb 25 16:58:37 2006
@@ -51,6 +51,40 @@
 /* Returns 1 if the given VCPU is up. */
 #define VCPUOP_is_up                3
 
+/*
+ * Return information about the state and running time of a VCPU.
+ * @extra_arg == pointer to xen_vcpu_info structure.
+ */
+#define VCPUOP_get_runstate_info    4
+typedef struct vcpu_runstate_info {
+    /* VCPU's current state (RUNSTATE_*). */
+    int      state;
+    /* When was current state entered (system time, ns)? */
+    uint64_t state_entry_time;
+    /*
+     * Time spent in each RUNSTATE_* (ns). The sum of these times is
+     * guaranteed not to drift from system time.
+     */
+    uint64_t time[4];
+} vcpu_runstate_info_t;
+
+/* VCPU is currently running on a physical CPU. */
+#define RUNSTATE_running  0
+
+/* VCPU is runnable, but not currently scheduled on any physical CPU. */
+#define RUNSTATE_runnable 1
+
+/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */
+#define RUNSTATE_blocked  2
+
+/*
+ * VCPU is not runnable, but it is not blocked.
+ * This is a 'catch all' state for things like hotplug and pauses by the
+ * system administrator (or for critical sections in the hypervisor).
+ * RUNSTATE_blocked dominates this state (it is the preferred state).
+ */
+#define RUNSTATE_offline  3
+
 #endif /* __XEN_PUBLIC_VCPU_H__ */
 
 /*
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Sat Feb 25 11:27:53 2006
+++ b/xen/include/xen/sched-if.h        Sat Feb 25 16:58:37 2006
@@ -8,9 +8,6 @@
 #ifndef __XEN_SCHED_IF_H__
 #define __XEN_SCHED_IF_H__
 
-#define BUCKETS  10
-/*300*/
-
 struct schedule_data {
     spinlock_t          schedule_lock;  /* spinlock protecting curr        */
     struct vcpu        *curr;           /* current task                    */
@@ -18,9 +15,6 @@
     void               *sched_priv;
     struct timer        s_timer;        /* scheduling timer                */
     unsigned long       tick;           /* current periodic 'tick'         */
-#ifdef BUCKETS
-    u32                 hist[BUCKETS];  /* for scheduler latency histogram */
-#endif
 } __cacheline_aligned;
 
 extern struct schedule_data schedule_data[];
diff -r a9f3abcc4149 -r 2303fb4682e7 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Sat Feb 25 11:27:53 2006
+++ b/xen/include/xen/sched.h   Sat Feb 25 16:58:37 2006
@@ -8,6 +8,7 @@
 #include <xen/smp.h>
 #include <public/xen.h>
 #include <public/dom0_ops.h>
+#include <public/vcpu.h>
 #include <xen/time.h>
 #include <xen/timer.h>
 #include <xen/grant_table.h>
@@ -63,14 +64,12 @@
 
     struct vcpu     *next_in_list;
 
-    struct timer  timer;         /* one-shot timer for timeout values */
+    struct timer     timer;         /* one-shot timer for timeout values */
     unsigned long    sleep_tick;    /* tick at which this vcpu started sleep */
 
-    s_time_t         lastschd;      /* time this domain was last scheduled */
-    s_time_t         lastdeschd;    /* time this domain was last descheduled */
-    s_time_t         cpu_time;      /* total CPU time received till now */
-    s_time_t         wokenup;       /* time domain got woken up */
     void            *sched_priv;    /* scheduler-specific data */
+
+    struct vcpu_runstate_info runstate;
 
     unsigned long    vcpu_flags;
 
@@ -397,7 +396,6 @@
 #define _DOMF_debugging        4
 #define DOMF_debugging         (1UL<<_DOMF_debugging)
 
-
 static inline int vcpu_runnable(struct vcpu *v)
 {
     return ( (atomic_read(&v->pausecnt) == 0) &&
@@ -415,6 +413,8 @@
 
 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
 
+void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
+
 static inline void vcpu_unblock(struct vcpu *v)
 {
     if ( test_and_clear_bit(_VCPUF_blocked, &v->vcpu_flags) )

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.