[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH][RESUBMIT] don't schedule unplugged vcpus
* Ryan Harper <ryanh@xxxxxxxxxx> [2005-06-06 16:04]: > This patch extends the CONFIG_HOTPLUG_CPU behavior down into the > hypervisor. Currently when a CPU in Linux is moved offline, > > echo 0 > /sys/devices/system/cpu/cpuX/online > > the offline cpu yields its slice back to the hypervisor. This patch > adds two SCHEDOPS (vcpu_down/vcpu_up) which set/clear a new VCPU flag, > VCPU_down. The domain_runnable() check now looks at this flag and > subsequently the vcpu is not scheduled when VCPU_down is set. > > The patch was built and tested against 20050606 nightly snapshot. > Testing requires DOMU with CONFIG_SMP and CONFIG_HOTPLUG_CPU. Please > apply. I've added in changes to DOM0_GETDOMINFO and DOM0_GETVCPUCONTEXT hypercalls. dominfo now creates a vcpu_online_map bitmap which marks whether vcpus are up or down. I didn't want to clobber either the total number of vcpus allocated to a domain (n_vcpu) nor the vcpu_to_cpu mapping since both are still valid whether the vcpu is being scheduled or not. I modified vcpucontext to give the context for the first vcpu not down. If the requested vcpu is down, it will return the context of the next vcpu that is up, or -ESRCH if no vcpu past the requested vcpu is valid. I modified xm list -v to display an ONLINE column which indicates the online status of each vcpu in a domain. Please apply. -- Ryan Harper Software Engineer; Linux Technology Center IBM Corp., Austin, Tx (512) 838-9253 T/L: 678-9253 ryanh@xxxxxxxxxx diffstat output: linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c | 7 + linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c | 4 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h | 31 +++++++ tools/libxc/xc.h | 1 tools/libxc/xc_domain.c | 1 tools/python/xen/lowlevel/xc/xc.c | 7 + tools/python/xen/xend/XendDomainInfo.py | 2 tools/python/xen/xm/main.py | 6 - xen/common/dom0_ops.c | 17 +++ xen/common/schedule.c | 48 +++++++++++ xen/include/public/dom0_ops.h | 3 xen/include/public/xen.h | 3 xen/include/xen/sched.h | 5 - 13 files changed, 127 insertions(+), 8 deletions(-) Signed-off-by: Ryan Harper <ryanh@xxxxxxxxxx> --- diff -urN b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c c/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c --- b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c 2005-06-07 22:09:53.000000000 -0500 +++ c/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c 2005-06-08 12:57:58.658741356 -0500 @@ -154,8 +154,13 @@ cpu_clear(cpu, cpu_idle_map); rmb(); - if (cpu_is_offline(cpu)) + if (cpu_is_offline(cpu)) { +#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU) + /* Tell hypervisor not to schedule dead vcpus */ + HYPERVISOR_vcpu_down(cpu); +#endif play_dead(); + } irq_stat[cpu].idle_timestamp = jiffies; xen_idle(); diff -urN b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c c/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c --- b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c 2005-06-07 22:10:00.000000000 -0500 +++ c/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c 2005-06-08 12:57:58.663740663 -0500 @@ -1380,6 +1380,10 @@ } #ifdef CONFIG_HOTPLUG_CPU +#ifdef CONFIG_XEN + /* Tell hypervisor to bring vcpu up */ + HYPERVISOR_vcpu_up(cpu); +#endif /* Already up, and in cpu_quiescent now? */ if (cpu_isset(cpu, smp_commenced_mask)) { cpu_enable(cpu); diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h c/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h --- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h 2005-06-07 22:10:03.000000000 -0500 +++ c/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h 2005-06-08 12:57:58.664740524 -0500 @@ -517,4 +517,35 @@ return ret; } +static inline int +HYPERVISOR_vcpu_down( + int vcpu) +{ + int ret; + unsigned long ign1; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1) + : "0" (__HYPERVISOR_sched_op), + "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift)) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_vcpu_up( + int vcpu) +{ + int ret; + unsigned long ign1; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1) + : "0" (__HYPERVISOR_sched_op), + "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift)) + : "memory" ); + + return ret; +} #endif /* __HYPERCALL_H__ */ diff -urN b/tools/libxc/xc.h c/tools/libxc/xc.h --- b/tools/libxc/xc.h 2005-06-07 22:09:56.000000000 -0500 +++ c/tools/libxc/xc.h 2005-06-08 12:57:58.000000000 -0500 @@ -118,6 +118,7 @@ u64 cpu_time; unsigned long max_memkb; unsigned int vcpus; + u32 vcpu_online_map; s32 vcpu_to_cpu[MAX_VIRT_CPUS]; cpumap_t cpumap[MAX_VIRT_CPUS]; } xc_dominfo_t; diff -urN b/tools/libxc/xc_domain.c c/tools/libxc/xc_domain.c --- b/tools/libxc/xc_domain.c 2005-06-07 22:09:59.000000000 -0500 +++ c/tools/libxc/xc_domain.c 2005-06-08 12:57:58.000000000 -0500 @@ -106,6 +106,7 @@ info->shared_info_frame = op.u.getdomaininfo.shared_info_frame; info->cpu_time = op.u.getdomaininfo.cpu_time; info->vcpus = op.u.getdomaininfo.n_vcpu; + info->vcpu_online_map = op.u.getdomaininfo.vcpu_online_map; memcpy(&info->vcpu_to_cpu, &op.u.getdomaininfo.vcpu_to_cpu, sizeof(info->vcpu_to_cpu)); memcpy(&info->cpumap, &op.u.getdomaininfo.cpumap, diff -urN b/tools/python/xen/lowlevel/xc/xc.c c/tools/python/xen/lowlevel/xc/xc.c --- b/tools/python/xen/lowlevel/xc/xc.c 2005-06-07 22:09:59.000000000 -0500 +++ c/tools/python/xen/lowlevel/xc/xc.c 2005-06-08 14:40:13.739556324 -0500 @@ -199,7 +199,7 @@ PyObject *kwds) { XcObject *xc = (XcObject *)self; - PyObject *list, *vcpu_list, *cpumap_list, *info_dict; + PyObject *list, *vcpu_list, *online_list, *cpumap_list, *info_dict; u32 first_dom = 0; int max_doms = 1024, nr_doms, i, j; @@ -221,11 +221,15 @@ { vcpu_list = PyList_New(MAX_VIRT_CPUS); cpumap_list = PyList_New(MAX_VIRT_CPUS); + online_list = PyList_New(MAX_VIRT_CPUS); for ( j = 0; j < MAX_VIRT_CPUS; j++ ) { PyList_SetItem( vcpu_list, j, Py_BuildValue("i", info[i].vcpu_to_cpu[j])); PyList_SetItem( cpumap_list, j, Py_BuildValue("i", info[i].cpumap[j])); + PyList_SetItem( online_list, j, + Py_BuildValue("i", + ((1<<j) & info[i].vcpu_online_map)>>j)); } info_dict = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:i,s:i" @@ -244,6 +248,7 @@ "shutdown_reason", info[i].shutdown_reason); PyDict_SetItemString( info_dict, "vcpu_to_cpu", vcpu_list ); PyDict_SetItemString( info_dict, "cpumap", cpumap_list ); + PyDict_SetItemString( info_dict, "vcpu_online_map", online_list ); PyList_SetItem( list, i, info_dict); } diff -urN b/tools/python/xen/xend/XendDomainInfo.py c/tools/python/xen/xend/XendDomainInfo.py --- b/tools/python/xen/xend/XendDomainInfo.py 2005-06-07 22:09:59.000000000 -0500 +++ c/tools/python/xen/xend/XendDomainInfo.py 2005-06-08 12:57:58.000000000 -0500 @@ -393,6 +393,8 @@ sxpr.append(['cpu_time', self.info['cpu_time']/1e9]) sxpr.append(['vcpus', self.info['vcpus']]) sxpr.append(['cpumap', self.info['cpumap']]) + sxpr.append(['vcpu_online_map', ''.join(map(lambda x: str(x), + self.info['vcpu_online_map'][0:self.info['vcpus']]))]) sxpr.append(['vcpu_to_cpu', ''.join(map(lambda x: str(x), self.info['vcpu_to_cpu'][0:self.info['vcpus']]))]) diff -urN b/tools/python/xen/xm/main.py c/tools/python/xen/xm/main.py --- b/tools/python/xen/xm/main.py 2005-06-07 22:09:58.000000000 -0500 +++ c/tools/python/xen/xm/main.py 2005-06-08 15:10:18.268757315 -0500 @@ -403,10 +403,11 @@ % d) def show_vcpus(self, doms): - print 'Name Id VCPU CPU CPUMAP' + print 'Name Id VCPU CPU ONLINE CPUMAP' for dom in doms: info = server.xend_domain(dom) vcpu_to_cpu = sxp.child_value(info, 'vcpu_to_cpu', '?').replace('-','') + vcpu_online_map = sxp.child_value(info, 'vcpu_online_map', '?') cpumap = sxp.child_value(info, 'cpumap', []) mask = ((int(sxp.child_value(info, 'vcpus', '0')))**2) - 1 count = 0 @@ -416,9 +417,10 @@ d['dom'] = int(sxp.child_value(info, 'id', '-1')) d['vcpu'] = int(count) d['cpu'] = int(cpu) + d['online'] = int(vcpu_online_map[count]) d['cpumap'] = int(cpumap[count])&mask count = count + 1 - print ("%(name)-16s %(dom)3d %(vcpu)4d %(cpu)3d 0x%(cpumap)x" % d) + print ("%(name)-16s %(dom)3d %(vcpu)4d %(cpu)3d %(online)6d 0x%(cpumap)x" % d) def long_list(self, doms): for dom in doms: diff -urN b/xen/common/dom0_ops.c c/xen/common/dom0_ops.c --- b/xen/common/dom0_ops.c 2005-06-07 22:09:59.000000000 -0500 +++ c/xen/common/dom0_ops.c 2005-06-08 14:09:38.000000000 -0500 @@ -304,6 +304,7 @@ struct vcpu *v; u64 cpu_time = 0; int vcpu_count = 0; + u32 vcpu_online_map = 0; int flags = DOMFLAGS_PAUSED | DOMFLAGS_BLOCKED; read_lock(&domlist_lock); @@ -346,11 +347,15 @@ flags |= DOMFLAGS_RUNNING; if ( v->cpu_time > cpu_time ) cpu_time += v->cpu_time; + /* mark which vcpus are not down */ + if ( !(test_bit(_VCPUF_down, &v->vcpu_flags)) ) + vcpu_online_map |= (1<<v->vcpu_id); vcpu_count++; } op->u.getdomaininfo.cpu_time = cpu_time; op->u.getdomaininfo.n_vcpu = vcpu_count; + op->u.getdomaininfo.vcpu_online_map = vcpu_online_map; op->u.getdomaininfo.flags = flags | ((d->domain_flags & DOMF_dying) ? DOMFLAGS_DYING : 0) | @@ -373,7 +378,8 @@ { struct vcpu_guest_context *c; struct domain *d; - struct vcpu *v; + struct vcpu *v = NULL; + int i; d = find_domain_by_id(op->u.getvcpucontext.domain); if ( d == NULL ) @@ -388,8 +394,15 @@ put_domain(d); break; } + + /* find first valid vcpu starting from request. */ + for ( i=op->u.getvcpucontext.vcpu; i<MAX_VIRT_CPUS; i++ ) + { + v = d->vcpu[i]; + if ( v != NULL && !(test_bit(_VCPUF_down, &v->vcpu_flags)) ) + break; + } - v = d->vcpu[op->u.getvcpucontext.vcpu]; if ( v == NULL ) { ret = -ESRCH; diff -urN b/xen/common/schedule.c c/xen/common/schedule.c --- b/xen/common/schedule.c 2005-06-07 22:10:02.000000000 -0500 +++ c/xen/common/schedule.c 2005-06-08 12:57:58.000000000 -0500 @@ -261,6 +261,44 @@ return 0; } +/* Mark target vcpu as non-runnable so it is not scheduled */ +static long do_vcpu_down(int vcpu) +{ + struct vcpu *target; + + if (vcpu > MAX_VIRT_CPUS) + return -EINVAL; + + target = current->domain->vcpu[vcpu]; + /* DEBUG + * printk("DOM%d VCPU%d going down\n", + * target->domain->domain_id, target->vcpu_id); + */ + set_bit(_VCPUF_down, &target->vcpu_flags); + + return 0; +} + +/* Mark target vcpu as runnable and wake it */ +static long do_vcpu_up(int vcpu) +{ + struct vcpu *target; + + if (vcpu > MAX_VIRT_CPUS) + return -EINVAL; + + target = current->domain->vcpu[vcpu]; + /* DEBUG + * printk("DOM%d VCPU%d coming up\n", + * target->domain->domain_id, target->vcpu_id); + */ + clear_bit(_VCPUF_down, &target->vcpu_flags); + /* wake vcpu */ + domain_wake(target); + + return 0; +} + /* * Demultiplex scheduler-related hypercalls. */ @@ -290,6 +328,16 @@ domain_shutdown((u8)(op >> SCHEDOP_reasonshift)); break; } + case SCHEDOP_vcpu_down: + { + ret = do_vcpu_down((int)(op >> SCHEDOP_vcpushift)); + break; + } + case SCHEDOP_vcpu_up: + { + ret = do_vcpu_up((int)(op >> SCHEDOP_vcpushift)); + break; + } default: ret = -ENOSYS; diff -urN b/xen/include/public/dom0_ops.h c/xen/include/public/dom0_ops.h --- b/xen/include/public/dom0_ops.h 2005-06-07 22:10:00.000000000 -0500 +++ c/xen/include/public/dom0_ops.h 2005-06-08 12:57:58.728731646 -0500 @@ -85,7 +85,8 @@ memory_t max_pages; memory_t shared_info_frame; /* MFN of shared_info struct */ u64 cpu_time; - u32 n_vcpu; + u32 n_vcpu; /* number of vcpus allocated */ + u32 vcpu_online_map; /* bitmap of online vcpus */ s32 vcpu_to_cpu[MAX_VIRT_CPUS]; /* current mapping */ cpumap_t cpumap[MAX_VIRT_CPUS]; /* allowable mapping */ } dom0_getdomaininfo_t; diff -urN b/xen/include/public/xen.h c/xen/include/public/xen.h --- b/xen/include/public/xen.h 2005-06-07 22:10:00.000000000 -0500 +++ c/xen/include/public/xen.h 2005-06-08 12:57:58.729731508 -0500 @@ -200,8 +200,11 @@ #define SCHEDOP_yield 0 /* Give up the CPU voluntarily. */ #define SCHEDOP_block 1 /* Block until an event is received. */ #define SCHEDOP_shutdown 2 /* Stop executing this domain. */ +#define SCHEDOP_vcpu_down 3 /* make target VCPU not-runnable. */ +#define SCHEDOP_vcpu_up 4 /* make target VCPU runnable. */ #define SCHEDOP_cmdmask 255 /* 8-bit command. */ #define SCHEDOP_reasonshift 8 /* 8-bit reason code. (SCHEDOP_shutdown) */ +#define SCHEDOP_vcpushift 8 /* 8-bit VCPU target. (SCHEDOP_up|down) */ /* * Reason codes for SCHEDOP_shutdown. These may be interpreted by control diff -urN b/xen/include/xen/sched.h c/xen/include/xen/sched.h --- b/xen/include/xen/sched.h 2005-06-07 22:09:54.000000000 -0500 +++ c/xen/include/xen/sched.h 2005-06-08 12:57:58.000000000 -0500 @@ -346,6 +346,9 @@ /* Initialization completed. */ #define _VCPUF_initialised 8 #define VCPUF_initialised (1UL<<_VCPUF_initialised) + /* VCPU is not-runnable */ +#define _VCPUF_down 9 +#define VCPUF_down (1UL<<_VCPUF_down) /* * Per-domain flags (domain_flags). @@ -375,7 +378,7 @@ static inline int domain_runnable(struct vcpu *v) { return ( (atomic_read(&v->pausecnt) == 0) && - !(v->vcpu_flags & (VCPUF_blocked|VCPUF_ctrl_pause)) && + !(v->vcpu_flags & (VCPUF_blocked|VCPUF_ctrl_pause|VCPUF_down)) && !(v->domain->domain_flags & (DOMF_shutdown|DOMF_shuttingdown)) ); } _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |