[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH][RESUBMIT] don't schedule unplugged vcpus



* Ryan Harper <ryanh@xxxxxxxxxx> [2005-06-06 16:04]:
> This patch extends the CONFIG_HOTPLUG_CPU behavior down into the
> hypervisor.  Currently when a CPU in Linux is moved offline,
> 
> echo 0 > /sys/devices/system/cpu/cpuX/online
> 
> the offline cpu yields its slice back to the hypervisor.  This patch
> adds two SCHEDOPS (vcpu_down/vcpu_up) which set/clear a new VCPU flag,
> VCPU_down.  The domain_runnable() check now looks at this flag and
> subsequently the vcpu is not scheduled when VCPU_down is set.
> 
> The patch was built and tested against 20050606 nightly snapshot.
> Testing requires DOMU with CONFIG_SMP and CONFIG_HOTPLUG_CPU.  Please
> apply.

I've added in changes to DOM0_GETDOMINFO and DOM0_GETVCPUCONTEXT
hypercalls.  dominfo now creates a vcpu_online_map bitmap which marks
whether vcpus are up or down.  I didn't want to clobber either the
total number of vcpus allocated to a domain (n_vcpu) nor the vcpu_to_cpu
mapping since both are still valid whether the vcpu is being scheduled
or not.

I modified vcpucontext to give the context for the first vcpu not down.
If the requested vcpu is down, it will return the context of the next
vcpu that is up, or -ESRCH if no vcpu past the requested vcpu is valid.

I modified xm list -v to display an ONLINE column which indicates
the online status of each vcpu in a domain.

Please apply.

--
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@xxxxxxxxxx


diffstat output:
 linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c       |    7 +
 linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c       |    4 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h |   31 +++++++
 tools/libxc/xc.h                                             |    1 
 tools/libxc/xc_domain.c                                      |    1 
 tools/python/xen/lowlevel/xc/xc.c                            |    7 +
 tools/python/xen/xend/XendDomainInfo.py                      |    2 
 tools/python/xen/xm/main.py                                  |    6 -
 xen/common/dom0_ops.c                                        |   17 +++
 xen/common/schedule.c                                        |   48 +++++++++++
 xen/include/public/dom0_ops.h                                |    3 
 xen/include/public/xen.h                                     |    3 
 xen/include/xen/sched.h                                      |    5 -
 13 files changed, 127 insertions(+), 8 deletions(-)

Signed-off-by: Ryan Harper <ryanh@xxxxxxxxxx>
---
diff -urN b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c 
c/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c
--- b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c    2005-06-07 
22:09:53.000000000 -0500
+++ c/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c    2005-06-08 
12:57:58.658741356 -0500
@@ -154,8 +154,13 @@
                                cpu_clear(cpu, cpu_idle_map);
                        rmb();
 
-                       if (cpu_is_offline(cpu))
+                       if (cpu_is_offline(cpu)) {
+#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
+            /* Tell hypervisor not to schedule dead vcpus */
+            HYPERVISOR_vcpu_down(cpu);
+#endif
                                play_dead();
+         }
 
                        irq_stat[cpu].idle_timestamp = jiffies;
                        xen_idle();
diff -urN b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c 
c/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c
--- b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c    2005-06-07 
22:10:00.000000000 -0500
+++ c/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c    2005-06-08 
12:57:58.663740663 -0500
@@ -1380,6 +1380,10 @@
        }
 
 #ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_XEN
+   /* Tell hypervisor to bring vcpu up */
+   HYPERVISOR_vcpu_up(cpu);
+#endif
        /* Already up, and in cpu_quiescent now? */
        if (cpu_isset(cpu, smp_commenced_mask)) {
                cpu_enable(cpu);
diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h 
c/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h      
2005-06-07 22:10:03.000000000 -0500
+++ c/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h      
2005-06-08 12:57:58.664740524 -0500
@@ -517,4 +517,35 @@
     return ret;
 }
 
+static inline int
+HYPERVISOR_vcpu_down(
+    int vcpu)
+{
+    int ret;
+    unsigned long ign1;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1)
+       : "0" (__HYPERVISOR_sched_op),
+         "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift))
+        : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_vcpu_up(
+    int vcpu)
+{
+    int ret;
+    unsigned long ign1;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1)
+       : "0" (__HYPERVISOR_sched_op),
+         "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift))
+        : "memory" );
+
+    return ret;
+}
 #endif /* __HYPERCALL_H__ */
diff -urN b/tools/libxc/xc.h c/tools/libxc/xc.h
--- b/tools/libxc/xc.h  2005-06-07 22:09:56.000000000 -0500
+++ c/tools/libxc/xc.h  2005-06-08 12:57:58.000000000 -0500
@@ -118,6 +118,7 @@
     u64           cpu_time;
     unsigned long max_memkb;
     unsigned int  vcpus;
+    u32           vcpu_online_map;
     s32           vcpu_to_cpu[MAX_VIRT_CPUS];
     cpumap_t      cpumap[MAX_VIRT_CPUS];
 } xc_dominfo_t;
diff -urN b/tools/libxc/xc_domain.c c/tools/libxc/xc_domain.c
--- b/tools/libxc/xc_domain.c   2005-06-07 22:09:59.000000000 -0500
+++ c/tools/libxc/xc_domain.c   2005-06-08 12:57:58.000000000 -0500
@@ -106,6 +106,7 @@
         info->shared_info_frame = op.u.getdomaininfo.shared_info_frame;
         info->cpu_time = op.u.getdomaininfo.cpu_time;
         info->vcpus = op.u.getdomaininfo.n_vcpu;
+        info->vcpu_online_map = op.u.getdomaininfo.vcpu_online_map;
         memcpy(&info->vcpu_to_cpu, &op.u.getdomaininfo.vcpu_to_cpu, 
                sizeof(info->vcpu_to_cpu));
         memcpy(&info->cpumap, &op.u.getdomaininfo.cpumap, 
diff -urN b/tools/python/xen/lowlevel/xc/xc.c 
c/tools/python/xen/lowlevel/xc/xc.c
--- b/tools/python/xen/lowlevel/xc/xc.c 2005-06-07 22:09:59.000000000 -0500
+++ c/tools/python/xen/lowlevel/xc/xc.c 2005-06-08 14:40:13.739556324 -0500
@@ -199,7 +199,7 @@
                                      PyObject *kwds)
 {
     XcObject *xc = (XcObject *)self;
-    PyObject *list, *vcpu_list, *cpumap_list, *info_dict;
+    PyObject *list, *vcpu_list, *online_list, *cpumap_list, *info_dict;
 
     u32 first_dom = 0;
     int max_doms = 1024, nr_doms, i, j;
@@ -221,11 +221,15 @@
     {
         vcpu_list = PyList_New(MAX_VIRT_CPUS);
         cpumap_list = PyList_New(MAX_VIRT_CPUS);
+        online_list = PyList_New(MAX_VIRT_CPUS);
         for ( j = 0; j < MAX_VIRT_CPUS; j++ ) {
             PyList_SetItem( vcpu_list, j, 
                             Py_BuildValue("i", info[i].vcpu_to_cpu[j]));
             PyList_SetItem( cpumap_list, j, 
                             Py_BuildValue("i", info[i].cpumap[j]));
+            PyList_SetItem( online_list, j, 
+                            Py_BuildValue("i", 
+                               ((1<<j) & info[i].vcpu_online_map)>>j));
         }
                  
         info_dict = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:i,s:i"
@@ -244,6 +248,7 @@
                                   "shutdown_reason", info[i].shutdown_reason);
         PyDict_SetItemString( info_dict, "vcpu_to_cpu", vcpu_list );
         PyDict_SetItemString( info_dict, "cpumap", cpumap_list );
+        PyDict_SetItemString( info_dict, "vcpu_online_map", online_list );
         PyList_SetItem( list, i, info_dict);
  
     }
diff -urN b/tools/python/xen/xend/XendDomainInfo.py 
c/tools/python/xen/xend/XendDomainInfo.py
--- b/tools/python/xen/xend/XendDomainInfo.py   2005-06-07 22:09:59.000000000 
-0500
+++ c/tools/python/xen/xend/XendDomainInfo.py   2005-06-08 12:57:58.000000000 
-0500
@@ -393,6 +393,8 @@
             sxpr.append(['cpu_time', self.info['cpu_time']/1e9])    
             sxpr.append(['vcpus', self.info['vcpus']])
             sxpr.append(['cpumap', self.info['cpumap']])
+            sxpr.append(['vcpu_online_map', ''.join(map(lambda x: str(x),
+                        self.info['vcpu_online_map'][0:self.info['vcpus']]))])
             sxpr.append(['vcpu_to_cpu', ''.join(map(lambda x: str(x),
                         self.info['vcpu_to_cpu'][0:self.info['vcpus']]))])
             
diff -urN b/tools/python/xen/xm/main.py c/tools/python/xen/xm/main.py
--- b/tools/python/xen/xm/main.py       2005-06-07 22:09:58.000000000 -0500
+++ c/tools/python/xen/xm/main.py       2005-06-08 15:10:18.268757315 -0500
@@ -403,10 +403,11 @@
                    % d)
 
     def show_vcpus(self, doms):
-        print 'Name              Id  VCPU  CPU  CPUMAP'
+        print 'Name              Id  VCPU  CPU  ONLINE  CPUMAP'
         for dom in doms:
             info = server.xend_domain(dom)
             vcpu_to_cpu = sxp.child_value(info, 'vcpu_to_cpu', 
'?').replace('-','')
+            vcpu_online_map = sxp.child_value(info, 'vcpu_online_map', '?')
             cpumap = sxp.child_value(info, 'cpumap', [])
             mask = ((int(sxp.child_value(info, 'vcpus', '0')))**2) - 1
             count = 0
@@ -416,9 +417,10 @@
                 d['dom']    = int(sxp.child_value(info, 'id', '-1'))
                 d['vcpu']   = int(count)
                 d['cpu']    = int(cpu)
+                d['online']  = int(vcpu_online_map[count])
                 d['cpumap'] = int(cpumap[count])&mask
                 count = count + 1
-                print ("%(name)-16s %(dom)3d  %(vcpu)4d  %(cpu)3d  
0x%(cpumap)x" % d)
+                print ("%(name)-16s %(dom)3d  %(vcpu)4d  %(cpu)3d  %(online)6d 
 0x%(cpumap)x" % d)
 
     def long_list(self, doms):
         for dom in doms:
diff -urN b/xen/common/dom0_ops.c c/xen/common/dom0_ops.c
--- b/xen/common/dom0_ops.c     2005-06-07 22:09:59.000000000 -0500
+++ c/xen/common/dom0_ops.c     2005-06-08 14:09:38.000000000 -0500
@@ -304,6 +304,7 @@
         struct vcpu   *v;
         u64 cpu_time = 0;
         int vcpu_count = 0;
+        u32 vcpu_online_map = 0;
         int flags = DOMFLAGS_PAUSED | DOMFLAGS_BLOCKED;
 
         read_lock(&domlist_lock);
@@ -346,11 +347,15 @@
                 flags |= DOMFLAGS_RUNNING;
             if ( v->cpu_time > cpu_time )
                 cpu_time += v->cpu_time;
+            /* mark which vcpus are not down */
+            if ( !(test_bit(_VCPUF_down, &v->vcpu_flags)) )
+                vcpu_online_map |= (1<<v->vcpu_id);
             vcpu_count++;
         }
 
         op->u.getdomaininfo.cpu_time = cpu_time;
         op->u.getdomaininfo.n_vcpu = vcpu_count;
+        op->u.getdomaininfo.vcpu_online_map = vcpu_online_map;
 
         op->u.getdomaininfo.flags = flags |
             ((d->domain_flags & DOMF_dying)    ? DOMFLAGS_DYING    : 0) |
@@ -373,7 +378,8 @@
     { 
         struct vcpu_guest_context *c;
         struct domain             *d;
-        struct vcpu               *v;
+        struct vcpu               *v = NULL;
+        int i;
 
         d = find_domain_by_id(op->u.getvcpucontext.domain);
         if ( d == NULL )
@@ -388,8 +394,15 @@
             put_domain(d);
             break;
         }
+
+        /* find first valid vcpu starting from request. */
+        for ( i=op->u.getvcpucontext.vcpu; i<MAX_VIRT_CPUS; i++ ) 
+        {
+            v = d->vcpu[i];
+            if ( v != NULL && !(test_bit(_VCPUF_down, &v->vcpu_flags)) )
+                break;
+        }
         
-        v = d->vcpu[op->u.getvcpucontext.vcpu];
         if ( v == NULL )
         {
             ret = -ESRCH;
diff -urN b/xen/common/schedule.c c/xen/common/schedule.c
--- b/xen/common/schedule.c     2005-06-07 22:10:02.000000000 -0500
+++ c/xen/common/schedule.c     2005-06-08 12:57:58.000000000 -0500
@@ -261,6 +261,44 @@
     return 0;
 }
 
+/* Mark target vcpu as non-runnable so it is not scheduled */
+static long do_vcpu_down(int vcpu)
+{
+    struct vcpu *target;
+    
+    if (vcpu > MAX_VIRT_CPUS)
+        return -EINVAL;
+
+    target = current->domain->vcpu[vcpu];
+    /* DEBUG
+     * printk("DOM%d VCPU%d going down\n",
+     *     target->domain->domain_id, target->vcpu_id);
+     */
+    set_bit(_VCPUF_down, &target->vcpu_flags);
+
+    return 0;
+}
+
+/* Mark target vcpu as runnable and wake it */
+static long do_vcpu_up(int vcpu)
+{
+    struct vcpu *target;
+   
+    if (vcpu > MAX_VIRT_CPUS)
+        return -EINVAL;
+
+    target = current->domain->vcpu[vcpu];
+    /* DEBUG
+     * printk("DOM%d VCPU%d coming up\n", 
+     *     target->domain->domain_id, target->vcpu_id);
+     */
+    clear_bit(_VCPUF_down, &target->vcpu_flags);
+    /* wake vcpu */
+    domain_wake(target);
+
+    return 0;
+}
+
 /*
  * Demultiplex scheduler-related hypercalls.
  */
@@ -290,6 +328,16 @@
         domain_shutdown((u8)(op >> SCHEDOP_reasonshift));
         break;
     }
+    case SCHEDOP_vcpu_down:
+    {
+        ret = do_vcpu_down((int)(op >> SCHEDOP_vcpushift));
+        break;
+    }
+    case SCHEDOP_vcpu_up:
+    {
+        ret = do_vcpu_up((int)(op >> SCHEDOP_vcpushift));
+        break;
+    }
 
     default:
         ret = -ENOSYS;
diff -urN b/xen/include/public/dom0_ops.h c/xen/include/public/dom0_ops.h
--- b/xen/include/public/dom0_ops.h     2005-06-07 22:10:00.000000000 -0500
+++ c/xen/include/public/dom0_ops.h     2005-06-08 12:57:58.728731646 -0500
@@ -85,7 +85,8 @@
     memory_t max_pages;
     memory_t shared_info_frame;       /* MFN of shared_info struct */
     u64      cpu_time;
-    u32      n_vcpu;
+    u32      n_vcpu;                      /* number of vcpus allocated */
+    u32      vcpu_online_map;             /* bitmap of online vcpus */
     s32      vcpu_to_cpu[MAX_VIRT_CPUS];  /* current mapping   */
     cpumap_t cpumap[MAX_VIRT_CPUS];       /* allowable mapping */
 } dom0_getdomaininfo_t;
diff -urN b/xen/include/public/xen.h c/xen/include/public/xen.h
--- b/xen/include/public/xen.h  2005-06-07 22:10:00.000000000 -0500
+++ c/xen/include/public/xen.h  2005-06-08 12:57:58.729731508 -0500
@@ -200,8 +200,11 @@
 #define SCHEDOP_yield           0   /* Give up the CPU voluntarily.       */
 #define SCHEDOP_block           1   /* Block until an event is received.  */
 #define SCHEDOP_shutdown        2   /* Stop executing this domain.        */
+#define SCHEDOP_vcpu_down       3   /* make target VCPU not-runnable.     */
+#define SCHEDOP_vcpu_up         4   /* make target VCPU runnable.         */
 #define SCHEDOP_cmdmask       255   /* 8-bit command. */
 #define SCHEDOP_reasonshift     8   /* 8-bit reason code. (SCHEDOP_shutdown) */
+#define SCHEDOP_vcpushift       8   /* 8-bit VCPU target. (SCHEDOP_up|down) */
 
 /*
  * Reason codes for SCHEDOP_shutdown. These may be interpreted by control 
diff -urN b/xen/include/xen/sched.h c/xen/include/xen/sched.h
--- b/xen/include/xen/sched.h   2005-06-07 22:09:54.000000000 -0500
+++ c/xen/include/xen/sched.h   2005-06-08 12:57:58.000000000 -0500
@@ -346,6 +346,9 @@
  /* Initialization completed. */
 #define _VCPUF_initialised     8
 #define VCPUF_initialised      (1UL<<_VCPUF_initialised)
+ /* VCPU is not-runnable */
+#define _VCPUF_down            9
+#define VCPUF_down             (1UL<<_VCPUF_down)
 
 /*
  * Per-domain flags (domain_flags).
@@ -375,7 +378,7 @@
 static inline int domain_runnable(struct vcpu *v)
 {
     return ( (atomic_read(&v->pausecnt) == 0) &&
-             !(v->vcpu_flags & (VCPUF_blocked|VCPUF_ctrl_pause)) &&
+             !(v->vcpu_flags & (VCPUF_blocked|VCPUF_ctrl_pause|VCPUF_down)) &&
              !(v->domain->domain_flags & (DOMF_shutdown|DOMF_shuttingdown)) );
 }
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.