Xen project Mailing List

[Xen-devel] [Patch 1/6] xen: cpupool support - hypervisor support of cpupools

To: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>

From: Juergen Gross <juergen.gross@xxxxxxxxxxxxxx>

Date: Fri, 17 Apr 2009 11:53:56 +0200

Delivery-date: Fri, 17 Apr 2009 02:56:26 -0700

Domainkey-signature: s=s1536a; d=ts.fujitsu.com; c=nofws; q=dns; h=X-SBRSScore:X-IronPort-AV:Received:X-IronPort-AV: Received:Received:Message-ID:Date:From:Organization: User-Agent:MIME-Version:To:Subject:X-Enigmail-Version: Content-Type; b=t3WwjMSh2l9diu2v0OapHIDGATMF07rlFYtYYb8loojF6KqkAy1QJbnW X4xamnSSkuWN0eAl8nI7xSmBRaxDfZJ4qOvV++sg8eh24n37hPGBsNO5W LxJkC91XwgF6W4i6bPo9EEtSXZsPzW5a+kZykL8dQTIkOdkBsNU6LhHyf Vs1NyzOkH90m8HgtEMMkF1qHNiI1DxEw+oHnKGg2RfjYN/eytIXecKGUx xQpyPNlgyVblAg4f9CStHxQjfTwJC;

List-id: Xen developer discussion <xen-devel.lists.xensource.com>

Signed-off-by: juergen.gross@xxxxxxxxxxxxxx -- Juergen Gross Principal Developer Operating Systems TSP ES&S SWE OS6 Telephone: +49 (0) 89 636 47950 Fujitsu Technolgy Solutions e-mail: juergen.gross@xxxxxxxxxxxxxx Otto-Hahn-Ring 6 Internet: ts.fujitsu.com D-81739 Muenchen Company details: ts.fujitsu.com/imprint.html

diff -r 655dc3bc1d8e xen/arch/x86/acpi/cpu_idle.c --- a/xen/arch/x86/acpi/cpu_idle.c Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/arch/x86/acpi/cpu_idle.c Thu Apr 16 15:04:13 2009 +0200 @@ -198,7 +198,7 @@ static void acpi_processor_idle(void) cpufreq_dbs_timer_suspend(); - sched_tick_suspend(); + sched_tick_suspend(smp_processor_id()); /* * sched_tick_suspend may raise TIMER_SOFTIRQ by __stop_timer, * which will break the later assumption of no sofirq pending, @@ -216,7 +216,7 @@ static void acpi_processor_idle(void) if ( softirq_pending(smp_processor_id()) ) { local_irq_enable(); - sched_tick_resume(); + sched_tick_resume(smp_processor_id()); cpufreq_dbs_timer_resume(); return; } @@ -237,7 +237,7 @@ static void acpi_processor_idle(void) pm_idle_save(); else acpi_safe_halt(); - sched_tick_resume(); + sched_tick_resume(smp_processor_id()); cpufreq_dbs_timer_resume(); return; } @@ -345,7 +345,7 @@ static void acpi_processor_idle(void) default: local_irq_enable(); - sched_tick_resume(); + sched_tick_resume(smp_processor_id()); cpufreq_dbs_timer_resume(); return; } @@ -357,7 +357,7 @@ static void acpi_processor_idle(void) cx->time += sleep_ticks; } - sched_tick_resume(); + sched_tick_resume(smp_processor_id()); cpufreq_dbs_timer_resume(); if ( cpuidle_current_governor->reflect ) diff -r 655dc3bc1d8e xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/arch/x86/domain.c Thu Apr 09 11:58:17 2009 +0200 @@ -1412,7 +1412,13 @@ struct migrate_info { void (*saved_schedule_tail)(struct vcpu *); cpumask_t saved_affinity; unsigned int nest; + int borrowed; }; + +long continue_hypercall_on_cpu_dummy(void *data) +{ + return 0; +} static void continue_hypercall_on_cpu_helper(struct vcpu *v) { @@ -1420,8 +1426,16 @@ static void continue_hypercall_on_cpu_he struct migrate_info *info = v->arch.continue_info; cpumask_t mask = info->saved_affinity; void (*saved_schedule_tail)(struct vcpu *) = info->saved_schedule_tail; + int cpu = -1; regs->eax = info->func(info->data); + + if ( (info->nest == 0) && info->borrowed && + (cpu = cpupool_return_cpu(v->domain->cpupool) >= 0) ) + { + continue_hypercall_on_cpu(cpu, continue_hypercall_on_cpu_dummy, + info->data); + } if ( info->nest-- == 0 ) { @@ -1440,27 +1454,32 @@ int continue_hypercall_on_cpu(int cpu, l struct migrate_info *info; cpumask_t mask = cpumask_of_cpu(cpu); int rc; + int borrowed = 0; if ( cpu == smp_processor_id() ) return func(data); + borrowed = cpupool_borrow_cpu(v->domain->cpupool, cpu); + info = v->arch.continue_info; if ( info == NULL ) { info = xmalloc(struct migrate_info); + rc = -ENOMEM; if ( info == NULL ) - return -ENOMEM; + goto out; rc = vcpu_lock_affinity(v, &mask); if ( rc ) { xfree(info); - return rc; + goto out; } info->saved_schedule_tail = v->arch.schedule_tail; info->saved_affinity = mask; info->nest = 0; + info->borrowed = 0; v->arch.schedule_tail = continue_hypercall_on_cpu_helper; v->arch.continue_info = info; @@ -1470,16 +1489,22 @@ int continue_hypercall_on_cpu(int cpu, l BUG_ON(info->nest != 0); rc = vcpu_locked_change_affinity(v, &mask); if ( rc ) - return rc; + goto out; info->nest++; } + info->borrowed += borrowed; info->func = func; info->data = data; /* Dummy return value will be overwritten by new schedule_tail. */ BUG_ON(!test_bit(SCHEDULE_SOFTIRQ, &softirq_pending(smp_processor_id()))); return 0; + +out: + if ( borrowed ) + cpupool_return_cpu(v->domain->cpupool); + return rc; } #define next_arg(fmt, args) ({ \ diff -r 655dc3bc1d8e xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/arch/x86/domain_build.c Thu Apr 09 11:58:46 2009 +0200 @@ -9,6 +9,7 @@ #include <xen/lib.h> #include <xen/ctype.h> #include <xen/sched.h> +#include <xen/sched-if.h> #include <xen/smp.h> #include <xen/delay.h> #include <xen/event.h> @@ -706,13 +707,13 @@ int __init construct_dom0( shared_info(d, vcpu_info[i].evtchn_upcall_mask) = 1; if ( opt_dom0_max_vcpus == 0 ) - opt_dom0_max_vcpus = num_online_cpus(); + opt_dom0_max_vcpus = num_cpupool_cpus(cpupool0); if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS ) opt_dom0_max_vcpus = MAX_VIRT_CPUS; printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus); for ( i = 1; i < opt_dom0_max_vcpus; i++ ) - (void)alloc_vcpu(d, i, i % num_online_cpus()); + (void)alloc_vcpu(d, i, i % num_cpupool_cpus(cpupool0)); /* Set up CR3 value for write_ptbase */ if ( paging_mode_enabled(d) ) diff -r 655dc3bc1d8e xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/arch/x86/mm.c Thu Apr 09 12:00:02 2009 +0200 @@ -212,7 +212,7 @@ void __init arch_init_memory(void) * Any Xen-heap pages that we will allow to be mapped will have * their domain field set to dom_xen. */ - dom_xen = domain_create(DOMID_XEN, DOMCRF_dummy, 0); + dom_xen = domain_create(DOMID_XEN, CPUPOOLID_NONE, DOMCRF_dummy, 0); BUG_ON(dom_xen == NULL); /* @@ -220,7 +220,7 @@ void __init arch_init_memory(void) * This domain owns I/O pages that are within the range of the page_info * array. Mappings occur at the priv of the caller. */ - dom_io = domain_create(DOMID_IO, DOMCRF_dummy, 0); + dom_io = domain_create(DOMID_IO, CPUPOOLID_NONE, DOMCRF_dummy, 0); BUG_ON(dom_io == NULL); /* First 1MB of RAM is historically marked as I/O. */ diff -r 655dc3bc1d8e xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/arch/x86/setup.c Thu Apr 16 08:20:11 2009 +0200 @@ -2,6 +2,7 @@ #include <xen/init.h> #include <xen/lib.h> #include <xen/sched.h> +#include <xen/sched-if.h> #include <xen/domain.h> #include <xen/serial.h> #include <xen/softirq.h> @@ -232,7 +233,7 @@ static void __init init_idle_domain(void /* Domain creation requires that scheduler structures are initialised. */ scheduler_init(); - idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0); + idle_domain = domain_create(IDLE_DOMAIN_ID, CPUPOOLID_NONE, 0, 0); if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) ) BUG(); @@ -995,8 +996,12 @@ void __init __start_xen(unsigned long mb if ( !tboot_protect_mem_regions() ) panic("Could not protect TXT memory regions\n"); + /* Create initial cpupool 0. */ + cpupool0 = cpupool_create(0, NULL); + if ( (cpupool0 == NULL) || cpupool0_cpu_assign(cpupool0) ) + panic("Error creating cpupool 0\n"); /* Create initial domain 0. */ - dom0 = domain_create(0, DOMCRF_s3_integrity, DOM0_SSIDREF); + dom0 = domain_create(0, 0, DOMCRF_s3_integrity, DOM0_SSIDREF); if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) ) panic("Error creating domain 0\n"); diff -r 655dc3bc1d8e xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/arch/x86/smpboot.c Thu Apr 09 12:04:14 2009 +0200 @@ -1265,7 +1265,7 @@ int __cpu_disable(void) /* It's now safe to remove this processor from the online map */ cpu_clear(cpu, cpu_online_map); - cpu_disable_scheduler(); + cpu_disable_scheduler(cpu, 0); return 0; } @@ -1299,7 +1299,7 @@ int cpu_down(unsigned int cpu) int err = 0; spin_lock(&cpu_add_remove_lock); - if (num_online_cpus() == 1) { + if (cpupool_cpu_remove(cpu)) { err = -EBUSY; goto out; } @@ -1451,6 +1451,7 @@ int __devinit __cpu_up(unsigned int cpu) process_pending_timers(); } + cpupool_cpu_add(cpu); cpufreq_add_cpu(cpu); return 0; } diff -r 655dc3bc1d8e xen/common/Makefile --- a/xen/common/Makefile Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/common/Makefile Thu Apr 09 12:04:41 2009 +0200 @@ -1,4 +1,5 @@ obj-y += bitmap.o obj-y += bitmap.o +obj-y += cpupool.o obj-y += domctl.o obj-y += domain.o obj-y += event_channel.o diff -r 655dc3bc1d8e xen/common/domain.c --- a/xen/common/domain.c Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/common/domain.c Thu Apr 09 13:45:33 2009 +0200 @@ -187,7 +187,7 @@ struct vcpu *alloc_idle_vcpu(unsigned in return v; d = (vcpu_id == 0) ? - domain_create(IDLE_DOMAIN_ID, 0, 0) : + domain_create(IDLE_DOMAIN_ID, CPUPOOLID_NONE, 0, 0) : idle_vcpu[cpu_id - vcpu_id]->domain; BUG_ON(d == NULL); @@ -198,7 +198,7 @@ struct vcpu *alloc_idle_vcpu(unsigned in } struct domain *domain_create( - domid_t domid, unsigned int domcr_flags, ssidref_t ssidref) + domid_t domid, int poolid, unsigned int domcr_flags, ssidref_t ssidref) { struct domain *d, **pd; enum { INIT_xsm = 1u<<0, INIT_rangeset = 1u<<1, INIT_evtchn = 1u<<2, @@ -259,6 +259,9 @@ struct domain *domain_create( d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex); d->irq_caps = rangeset_new(d, "Interrupts", 0); if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) ) + goto fail; + + if ( cpupool_add_domain(d, poolid) != 0 ) goto fail; if ( sched_init_domain(d) != 0 ) @@ -564,6 +567,8 @@ static void complete_domain_destroy(stru sched_destroy_domain(d); + cpupool_rm_domain(d); + /* Free page used by xen oprofile buffer. */ free_xenoprof_pages(d); diff -r 655dc3bc1d8e xen/common/domctl.c --- a/xen/common/domctl.c Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/common/domctl.c Thu Apr 16 08:20:11 2009 +0200 @@ -11,6 +11,7 @@ #include <xen/lib.h> #include <xen/mm.h> #include <xen/sched.h> +#include <xen/sched-if.h> #include <xen/domain.h> #include <xen/event.h> #include <xen/domain_page.h> @@ -138,15 +139,18 @@ void getdomaininfo(struct domain *d, str info->max_pages = d->max_pages; info->shared_info_frame = mfn_to_gmfn(d, __pa(d->shared_info)>>PAGE_SHIFT); + info->cpupool = d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE; + memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t)); } -static unsigned int default_vcpu0_location(void) +static unsigned int default_vcpu0_location(struct domain *dom) { struct domain *d; struct vcpu *v; unsigned int i, cpu, nr_cpus, *cnt; cpumask_t cpu_exclude_map; + cpumask_t online; /* Do an initial CPU placement. Pick the least-populated CPU. */ nr_cpus = last_cpu(cpu_possible_map) + 1; @@ -171,7 +175,8 @@ static unsigned int default_vcpu0_locati if ( cpus_weight(cpu_sibling_map[0]) > 1 ) cpu = next_cpu(cpu, cpu_sibling_map[0]); cpu_exclude_map = cpu_sibling_map[0]; - for_each_online_cpu ( i ) + online = (dom->cpupool == NULL) ? cpu_online_map : dom->cpupool->cpu_valid; + for_each_cpu_mask(i, online) { if ( cpu_isset(i, cpu_exclude_map) ) continue; @@ -366,12 +371,13 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc domid_t dom; static domid_t rover = 0; unsigned int domcr_flags; + int pool = 0; ret = -EINVAL; if ( supervisor_mode_kernel || (op->u.createdomain.flags & ~(XEN_DOMCTL_CDF_hvm_guest | XEN_DOMCTL_CDF_hap | - XEN_DOMCTL_CDF_s3_integrity)) ) + XEN_DOMCTL_CDF_s3_integrity | XEN_DOMCTL_CDF_pool)) ) break; dom = op->domain; @@ -405,9 +411,11 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc domcr_flags |= DOMCRF_hap; if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_s3_integrity ) domcr_flags |= DOMCRF_s3_integrity; + if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_pool ) + pool = op->u.createdomain.cpupool; ret = -ENOMEM; - d = domain_create(dom, domcr_flags, op->u.createdomain.ssidref); + d = domain_create(dom, pool, domcr_flags, op->u.createdomain.ssidref); if ( d == NULL ) break; @@ -426,6 +434,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc { struct domain *d; unsigned int i, max = op->u.max_vcpus.max, cpu; + cpumask_t online; ret = -ESRCH; if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL ) @@ -455,14 +464,15 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc goto maxvcpu_out; ret = -ENOMEM; + online = (d->cpupool == NULL) ? cpu_online_map : d->cpupool->cpu_valid; for ( i = 0; i < max; i++ ) { if ( d->vcpu[i] != NULL ) continue; cpu = (i == 0) ? - default_vcpu0_location() : - cycle_cpu(d->vcpu[i-1]->processor, cpu_online_map); + default_vcpu0_location(d) : + cycle_cpu(d->vcpu[i-1]->processor, online); if ( alloc_vcpu(d, i, cpu) == NULL ) goto maxvcpu_out; @@ -890,6 +900,14 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc } break; + case XEN_DOMCTL_cpupool_op: + { + ret = cpupool_do_domctl(op); + if ( (ret == 0) && copy_to_guest(u_domctl, op, 1) ) + ret = -EFAULT; + } + break; + default: ret = arch_do_domctl(op, u_domctl); break; diff -r 655dc3bc1d8e xen/common/sched_credit.c --- a/xen/common/sched_credit.c Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/common/sched_credit.c Thu Apr 16 09:41:15 2009 +0200 @@ -69,11 +69,15 @@ /* * Useful macros */ +#define CSCHED_PRIV(_ops) \ + ((struct csched_private *)((_ops)->sched_data)) #define CSCHED_PCPU(_c) \ ((struct csched_pcpu *)per_cpu(schedule_data, _c).sched_priv) #define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv) #define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv) #define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq)) +#define CSCHED_CPUONLINE(_pool) \ + (((_pool) == NULL) ? cpupool_free_cpus : (_pool)->cpu_valid) /* @@ -157,10 +161,12 @@ struct csched_private { struct timer master_ticker; unsigned int master; cpumask_t idlers; + cpumask_t cpus; uint32_t weight; uint32_t credit; int credit_balance; uint32_t runq_sort; + int ticker_active; }; @@ -168,8 +174,10 @@ struct csched_private { * Global variables */ static struct csched_private csched_priv; +static struct csched_private *csched_priv0 = NULL; static void csched_tick(void *_cpu); +static void csched_acct(void *dummy); static inline int __vcpu_on_runq(struct csched_vcpu *svc) @@ -214,6 +222,7 @@ __runq_tickle(unsigned int cpu, struct c { struct csched_vcpu * const cur = CSCHED_VCPU(per_cpu(schedule_data, cpu).curr); + struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu)); cpumask_t mask; ASSERT(cur); @@ -240,14 +249,14 @@ __runq_tickle(unsigned int cpu, struct c */ if ( cur->pri > CSCHED_PRI_IDLE ) { - if ( cpus_empty(csched_priv.idlers) ) + if ( cpus_empty(prv->idlers) ) { CSCHED_STAT_CRANK(tickle_idlers_none); } else { CSCHED_STAT_CRANK(tickle_idlers_some); - cpus_or(mask, mask, csched_priv.idlers); + cpus_or(mask, mask, prv->idlers); cpus_and(mask, mask, new->vcpu->cpu_affinity); } } @@ -257,38 +266,78 @@ __runq_tickle(unsigned int cpu, struct c cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ); } -static int -csched_pcpu_init(int cpu) +static void +csched_free_pdata(struct scheduler *ops, void *pcpu, int cpu) +{ + struct csched_private *prv = CSCHED_PRIV(ops); + struct csched_pcpu *spc = pcpu; + unsigned long flags; + + if ( spc == NULL ) + return; + + spin_lock_irqsave(&prv->lock, flags); + + prv->credit -= CSCHED_CREDITS_PER_ACCT; + prv->ncpus--; + cpu_clear(cpu, prv->idlers); + cpu_clear(cpu, prv->cpus); + if ( (prv->master == cpu) && (prv->ncpus > 0) ) + { + prv->master = first_cpu(prv->cpus); + migrate_timer(&prv->master_ticker, prv->master); + } + kill_timer(&spc->ticker); + if ( prv->ncpus == 0 ) + kill_timer(&prv->master_ticker); + + spin_unlock_irqrestore(&prv->lock, flags); + + xfree(spc); +} + +static void * +csched_alloc_pdata(struct scheduler *ops, int cpu) { struct csched_pcpu *spc; + struct csched_private *prv = CSCHED_PRIV(ops); unsigned long flags; /* Allocate per-PCPU info */ spc = xmalloc(struct csched_pcpu); if ( spc == NULL ) - return -1; - - spin_lock_irqsave(&csched_priv.lock, flags); + return NULL; + + spin_lock_irqsave(&prv->lock, flags); /* Initialize/update system-wide config */ - csched_priv.credit += CSCHED_CREDITS_PER_ACCT; - if ( csched_priv.ncpus <= cpu ) - csched_priv.ncpus = cpu + 1; - if ( csched_priv.master >= csched_priv.ncpus ) - csched_priv.master = cpu; + prv->credit += CSCHED_CREDITS_PER_ACCT; + prv->ncpus++; + cpu_set(cpu, prv->cpus); + if ( (prv->ncpus == 1) && (prv != csched_priv0) ) + { + prv->master = cpu; + init_timer( &prv->master_ticker, csched_acct, prv, cpu); + prv->ticker_active = 2; + } init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu); + + if ( prv == csched_priv0 ) + prv->master = first_cpu(prv->cpus); + INIT_LIST_HEAD(&spc->runq); - spc->runq_sort_last = csched_priv.runq_sort; - per_cpu(schedule_data, cpu).sched_priv = spc; + spc->runq_sort_last = prv->runq_sort; + if ( per_cpu(schedule_data, cpu).sched_priv == NULL ) + per_cpu(schedule_data, cpu).sched_priv = spc; /* Start off idling... */ BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr)); - cpu_set(cpu, csched_priv.idlers); - - spin_unlock_irqrestore(&csched_priv.lock, flags); - - return 0; + cpu_set(cpu, prv->idlers); + + spin_unlock_irqrestore(&prv->lock, flags); + + return spc; } #ifndef NDEBUG @@ -361,17 +410,19 @@ __csched_vcpu_is_migrateable(struct vcpu } static int -csched_cpu_pick(struct vcpu *vc) +csched_cpu_pick(struct scheduler *ops, struct vcpu *vc) { cpumask_t cpus; cpumask_t idlers; + cpumask_t online; int cpu; /* * Pick from online CPUs in VCPU's affinity mask, giving a * preference to its current processor if it's in there. */ - cpus_and(cpus, cpu_online_map, vc->cpu_affinity); + online = CSCHED_CPUONLINE(vc->domain->cpupool); + cpus_and(cpus, online, vc->cpu_affinity); cpu = cpu_isset(vc->processor, cpus) ? vc->processor : cycle_cpu(vc->processor, cpus); @@ -389,7 +440,7 @@ csched_cpu_pick(struct vcpu *vc) * like run two VCPUs on co-hyperthreads while there are idle cores * or sockets. */ - idlers = csched_priv.idlers; + idlers = CSCHED_PRIV(ops)->idlers; cpu_set(cpu, idlers); cpus_and(cpus, cpus, idlers); cpu_clear(cpu, cpus); @@ -433,12 +484,12 @@ csched_cpu_pick(struct vcpu *vc) } static inline void -__csched_vcpu_acct_start(struct csched_vcpu *svc) +__csched_vcpu_acct_start(struct csched_private *prv, struct csched_vcpu *svc) { struct csched_dom * const sdom = svc->sdom; unsigned long flags; - spin_lock_irqsave(&csched_priv.lock, flags); + spin_lock_irqsave(&(prv->lock), flags); if ( list_empty(&svc->active_vcpu_elem) ) { @@ -449,16 +500,17 @@ __csched_vcpu_acct_start(struct csched_v list_add(&svc->active_vcpu_elem, &sdom->active_vcpu); if ( list_empty(&sdom->active_sdom_elem) ) { - list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom); - csched_priv.weight += sdom->weight; - } - } - - spin_unlock_irqrestore(&csched_priv.lock, flags); + list_add(&sdom->active_sdom_elem, &(prv->active_sdom)); + prv->weight += sdom->weight; + } + } + + spin_unlock_irqrestore(&(prv->lock), flags); } static inline void -__csched_vcpu_acct_stop_locked(struct csched_vcpu *svc) +__csched_vcpu_acct_stop_locked(struct csched_private *prv, + struct csched_vcpu *svc) { struct csched_dom * const sdom = svc->sdom; @@ -471,16 +523,17 @@ __csched_vcpu_acct_stop_locked(struct cs list_del_init(&svc->active_vcpu_elem); if ( list_empty(&sdom->active_vcpu) ) { - BUG_ON( csched_priv.weight < sdom->weight ); + BUG_ON( prv->weight < sdom->weight ); list_del_init(&sdom->active_sdom_elem); - csched_priv.weight -= sdom->weight; + prv->weight -= sdom->weight; } } static void -csched_vcpu_acct(unsigned int cpu) +csched_vcpu_acct(struct csched_private *prv, unsigned int cpu) { struct csched_vcpu * const svc = CSCHED_VCPU(current); + struct scheduler *ops = per_cpu(scheduler, cpu); ASSERT( current->processor == cpu ); ASSERT( svc->sdom != NULL ); @@ -508,9 +561,9 @@ csched_vcpu_acct(unsigned int cpu) */ if ( list_empty(&svc->active_vcpu_elem) ) { - __csched_vcpu_acct_start(svc); - } - else if ( csched_cpu_pick(current) != cpu ) + __csched_vcpu_acct_start(prv, svc); + } + else if ( csched_cpu_pick(ops, current) != cpu ) { CSCHED_VCPU_STAT_CRANK(svc, migrate_r); CSCHED_STAT_CRANK(migrate_running); @@ -519,34 +572,54 @@ csched_vcpu_acct(unsigned int cpu) } } -static int -csched_vcpu_init(struct vcpu *vc) -{ - struct domain * const dom = vc->domain; - struct csched_dom *sdom = CSCHED_DOM(dom); +static void * +csched_alloc_vdata(struct scheduler *ops, struct vcpu *vc) +{ struct csched_vcpu *svc; - - CSCHED_STAT_CRANK(vcpu_init); /* Allocate per-VCPU info */ svc = xmalloc(struct csched_vcpu); if ( svc == NULL ) - return -1; + return NULL; INIT_LIST_HEAD(&svc->runq_elem); INIT_LIST_HEAD(&svc->active_vcpu_elem); - svc->sdom = sdom; + svc->sdom = CSCHED_DOM(vc->domain); svc->vcpu = vc; atomic_set(&svc->credit, 0); svc->flags = 0U; - svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER; + svc->pri = is_idle_domain(vc->domain) ? + CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER; CSCHED_VCPU_STATS_RESET(svc); + return svc; +} + +static void +csched_vcpu_insert(struct scheduler *ops, struct vcpu *vc) +{ + struct csched_vcpu *svc = vc->sched_priv; + + if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running ) + __runq_insert(vc->processor, svc); +} + +static int +csched_vcpu_init(struct scheduler *ops, struct vcpu *vc) +{ + struct csched_vcpu *svc; + + CSCHED_STAT_CRANK(vcpu_init); + + svc = csched_alloc_vdata(ops, vc); + if ( svc == NULL ) + return -1; + vc->sched_priv = svc; /* Allocate per-PCPU info */ if ( unlikely(!CSCHED_PCPU(vc->processor)) ) { - if ( csched_pcpu_init(vc->processor) != 0 ) + if ( csched_alloc_pdata(ops, vc->processor) == NULL ) return -1; } @@ -555,29 +628,41 @@ csched_vcpu_init(struct vcpu *vc) } static void -csched_vcpu_destroy(struct vcpu *vc) +csched_free_vdata(struct scheduler *ops, void *priv) +{ + struct csched_private *prv = CSCHED_PRIV(ops); + struct csched_vcpu *svc = priv; + unsigned long flags; + + if ( __vcpu_on_runq(svc) ) + __runq_remove(svc); + + spin_lock_irqsave(&(prv->lock), flags); + + if ( !list_empty(&svc->active_vcpu_elem) ) + __csched_vcpu_acct_stop_locked(prv, svc); + + spin_unlock_irqrestore(&(prv->lock), flags); + + xfree(svc); +} + +static void +csched_vcpu_destroy(struct scheduler *ops, struct vcpu *vc) { struct csched_vcpu * const svc = CSCHED_VCPU(vc); struct csched_dom * const sdom = svc->sdom; - unsigned long flags; CSCHED_STAT_CRANK(vcpu_destroy); BUG_ON( sdom == NULL ); BUG_ON( !list_empty(&svc->runq_elem) ); - spin_lock_irqsave(&csched_priv.lock, flags); - - if ( !list_empty(&svc->active_vcpu_elem) ) - __csched_vcpu_acct_stop_locked(svc); - - spin_unlock_irqrestore(&csched_priv.lock, flags); - - xfree(svc); + csched_free_vdata(ops, svc); } static void -csched_vcpu_sleep(struct vcpu *vc) +csched_vcpu_sleep(struct scheduler *ops, struct vcpu *vc) { struct csched_vcpu * const svc = CSCHED_VCPU(vc); @@ -592,7 +677,7 @@ csched_vcpu_sleep(struct vcpu *vc) } static void -csched_vcpu_wake(struct vcpu *vc) +csched_vcpu_wake(struct scheduler *ops, struct vcpu *vc) { struct csched_vcpu * const svc = CSCHED_VCPU(vc); const unsigned int cpu = vc->processor; @@ -648,10 +733,11 @@ csched_vcpu_wake(struct vcpu *vc) static int csched_dom_cntl( - struct domain *d, + struct scheduler *ops, struct domain *d, struct xen_domctl_scheduler_op *op) { struct csched_dom * const sdom = CSCHED_DOM(d); + struct csched_private *prv = CSCHED_PRIV(ops); unsigned long flags; if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo ) @@ -663,14 +749,14 @@ csched_dom_cntl( { ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo); - spin_lock_irqsave(&csched_priv.lock, flags); + spin_lock_irqsave(&(prv->lock), flags); if ( op->u.credit.weight != 0 ) { if ( !list_empty(&sdom->active_sdom_elem) ) { - csched_priv.weight -= sdom->weight; - csched_priv.weight += op->u.credit.weight; + prv->weight -= sdom->weight; + prv->weight += op->u.credit.weight; } sdom->weight = op->u.credit.weight; } @@ -678,14 +764,14 @@ csched_dom_cntl( if ( op->u.credit.cap != (uint16_t)~0U ) sdom->cap = op->u.credit.cap; - spin_unlock_irqrestore(&csched_priv.lock, flags); + spin_unlock_irqrestore(&(prv->lock), flags); } return 0; } static int -csched_dom_init(struct domain *dom) +csched_dom_init(struct scheduler *ops, struct domain *dom) { struct csched_dom *sdom; @@ -711,7 +797,7 @@ csched_dom_init(struct domain *dom) } static void -csched_dom_destroy(struct domain *dom) +csched_dom_destroy(struct scheduler *ops, struct domain *dom) { CSCHED_STAT_CRANK(dom_destroy); xfree(CSCHED_DOM(dom)); @@ -725,7 +811,7 @@ csched_dom_destroy(struct domain *dom) * remember the last UNDER to make the move up operation O(1). */ static void -csched_runq_sort(unsigned int cpu) +csched_runq_sort(struct csched_private *prv, unsigned int cpu) { struct csched_pcpu * const spc = CSCHED_PCPU(cpu); struct list_head *runq, *elem, *next, *last_under; @@ -733,7 +819,7 @@ csched_runq_sort(unsigned int cpu) unsigned long flags; int sort_epoch; - sort_epoch = csched_priv.runq_sort; + sort_epoch = prv->runq_sort; if ( sort_epoch == spc->runq_sort_last ) return; @@ -768,8 +854,9 @@ csched_runq_sort(unsigned int cpu) } static void -csched_acct(void* dummy) -{ +csched_acct(void *dummy) +{ + struct csched_private *prv = dummy; unsigned long flags; struct list_head *iter_vcpu, *next_vcpu; struct list_head *iter_sdom, *next_sdom; @@ -786,22 +873,22 @@ csched_acct(void* dummy) int credit; - spin_lock_irqsave(&csched_priv.lock, flags); - - weight_total = csched_priv.weight; - credit_total = csched_priv.credit; + spin_lock_irqsave(&(prv->lock), flags); + + weight_total = prv->weight; + credit_total = prv->credit; /* Converge balance towards 0 when it drops negative */ - if ( csched_priv.credit_balance < 0 ) - { - credit_total -= csched_priv.credit_balance; + if ( prv->credit_balance < 0 ) + { + credit_total -= prv->credit_balance; CSCHED_STAT_CRANK(acct_balance); } if ( unlikely(weight_total == 0) ) { - csched_priv.credit_balance = 0; - spin_unlock_irqrestore(&csched_priv.lock, flags); + prv->credit_balance = 0; + spin_unlock_irqrestore(&(prv->lock), flags); CSCHED_STAT_CRANK(acct_no_work); goto out; } @@ -813,7 +900,7 @@ csched_acct(void* dummy) credit_xtra = 0; credit_cap = 0U; - list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom ) + list_for_each_safe( iter_sdom, next_sdom, &(prv->active_sdom) ) { sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem); @@ -833,9 +920,9 @@ csched_acct(void* dummy) * only when the system-wide credit balance is negative. */ credit_peak = sdom->active_vcpu_count * CSCHED_CREDITS_PER_ACCT; - if ( csched_priv.credit_balance < 0 ) - { - credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) + + if ( prv->credit_balance < 0 ) + { + credit_peak += ( ( -prv->credit_balance * sdom->weight) + (weight_total - 1) ) / weight_total; } @@ -877,7 +964,7 @@ csched_acct(void* dummy) */ CSCHED_STAT_CRANK(acct_reorder); list_del(&sdom->active_sdom_elem); - list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom); + list_add(&sdom->active_sdom_elem, &(prv->active_sdom)); } credit_fair = credit_peak; @@ -943,7 +1030,7 @@ csched_acct(void* dummy) /* Upper bound on credits means VCPU stops earning */ if ( credit > CSCHED_CREDITS_PER_TSLICE ) { - __csched_vcpu_acct_stop_locked(svc); + __csched_vcpu_acct_stop_locked(prv, svc); credit = 0; atomic_set(&svc->credit, credit); } @@ -955,15 +1042,15 @@ csched_acct(void* dummy) } } - csched_priv.credit_balance = credit_balance; - - spin_unlock_irqrestore(&csched_priv.lock, flags); + prv->credit_balance = credit_balance; + + spin_unlock_irqrestore(&(prv->lock), flags); /* Inform each CPU that its runq needs to be sorted */ - csched_priv.runq_sort++; + prv->runq_sort++; out: - set_timer( &csched_priv.master_ticker, NOW() + + set_timer( &(prv->master_ticker), NOW() + MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT ); } @@ -972,6 +1059,7 @@ csched_tick(void *_cpu) { unsigned int cpu = (unsigned long)_cpu; struct csched_pcpu *spc = CSCHED_PCPU(cpu); + struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu)); spc->tick++; @@ -979,7 +1067,7 @@ csched_tick(void *_cpu) * Accounting for running VCPU */ if ( !is_idle_vcpu(current) ) - csched_vcpu_acct(cpu); + csched_vcpu_acct(prv, cpu); /* * Check if runq needs to be sorted @@ -988,7 +1076,7 @@ csched_tick(void *_cpu) * modified priorities. This is a special O(n) sort and runs at most * once per accounting period (currently 30 milliseconds). */ - csched_runq_sort(cpu); + csched_runq_sort(prv, cpu); set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK)); } @@ -1040,10 +1128,12 @@ csched_runq_steal(int peer_cpu, int cpu, } static struct csched_vcpu * -csched_load_balance(int cpu, struct csched_vcpu *snext) +csched_load_balance(struct csched_private *prv, int cpu, + struct csched_vcpu *snext) { struct csched_vcpu *speer; cpumask_t workers; + cpumask_t online; int peer_cpu; BUG_ON( cpu != snext->vcpu->processor ); @@ -1063,7 +1153,8 @@ csched_load_balance(int cpu, struct csch * Peek at non-idling CPUs in the system, starting with our * immediate neighbour. */ - cpus_andnot(workers, cpu_online_map, csched_priv.idlers); + online = CSCHED_CPUONLINE(per_cpu(cpupool, cpu)); + cpus_andnot(workers, online, prv->idlers); cpu_clear(cpu, workers); peer_cpu = cpu; @@ -1105,16 +1196,39 @@ csched_load_balance(int cpu, struct csch * fast for the common case. */ static struct task_slice -csched_schedule(s_time_t now) +csched_schedule(struct scheduler *ops, s_time_t now) { const int cpu = smp_processor_id(); struct list_head * const runq = RUNQ(cpu); struct csched_vcpu * const scurr = CSCHED_VCPU(current); + struct csched_private *prv = CSCHED_PRIV(ops); struct csched_vcpu *snext; struct task_slice ret; CSCHED_STAT_CRANK(schedule); CSCHED_VCPU_CHECK(current); + + if ( unlikely(!cpu_isset(cpu, CSCHED_CPUONLINE(per_cpu(cpupool, cpu)))) ) + { + struct list_head * iter; + + snext = scurr; + if (is_idle_vcpu(current)) + goto out; + + if ( vcpu_runnable(current) ) + __runq_insert(cpu, scurr); + + list_for_each(iter, runq) + { + snext = __runq_elem(iter); + if ( snext->pri == CSCHED_PRI_IDLE ) + break; + } + BUG_ON( snext->pri != CSCHED_PRI_IDLE ); + __runq_remove(snext); + goto out; + } /* * Select next runnable local VCPU (ie top of local runq) @@ -1137,20 +1251,21 @@ csched_schedule(s_time_t now) if ( snext->pri > CSCHED_PRI_TS_OVER ) __runq_remove(snext); else - snext = csched_load_balance(cpu, snext); - + snext = csched_load_balance(prv, cpu, snext); + +out: /* * Update idlers mask if necessary. When we're idling, other CPUs * will tickle us when they get extra work. */ if ( snext->pri == CSCHED_PRI_IDLE ) { - if ( !cpu_isset(cpu, csched_priv.idlers) ) - cpu_set(cpu, csched_priv.idlers); - } - else if ( cpu_isset(cpu, csched_priv.idlers) ) - { - cpu_clear(cpu, csched_priv.idlers); + if ( !cpu_isset(cpu, prv->idlers) ) + cpu_set(cpu, prv->idlers); + } + else if ( cpu_isset(cpu, prv->idlers) ) + { + cpu_clear(cpu, prv->idlers); } /* @@ -1194,7 +1309,7 @@ csched_dump_vcpu(struct csched_vcpu *svc } static void -csched_dump_pcpu(int cpu) +csched_dump_pcpu(struct scheduler *ops, int cpu) { struct list_head *runq, *iter; struct csched_pcpu *spc; @@ -1231,9 +1346,10 @@ csched_dump_pcpu(int cpu) } static void -csched_dump(void) +csched_dump(struct scheduler *ops) { struct list_head *iter_sdom, *iter_svc; + struct csched_private *prv = CSCHED_PRIV(ops); int loop; char idlers_buf[100]; @@ -1250,12 +1366,12 @@ csched_dump(void) "\tticks per tslice = %d\n" "\tticks per acct = %d\n" "\tmigration delay = %uus\n", - csched_priv.ncpus, - csched_priv.master, - csched_priv.credit, - csched_priv.credit_balance, - csched_priv.weight, - csched_priv.runq_sort, + prv->ncpus, + prv->master, + prv->credit, + prv->credit_balance, + prv->weight, + prv->runq_sort, CSCHED_DEFAULT_WEIGHT, CSCHED_MSECS_PER_TICK, CSCHED_CREDITS_PER_TICK, @@ -1263,12 +1379,12 @@ csched_dump(void) CSCHED_TICKS_PER_ACCT, vcpu_migration_delay); - cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), csched_priv.idlers); + cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), prv->idlers); printk("idlers: %s\n", idlers_buf); printk("active vcpus:\n"); loop = 0; - list_for_each( iter_sdom, &csched_priv.active_sdom ) + list_for_each( iter_sdom, &(prv->active_sdom) ) { struct csched_dom *sdom; sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem); @@ -1284,18 +1400,29 @@ csched_dump(void) } } -static void -csched_init(void) -{ - spin_lock_init(&csched_priv.lock); - INIT_LIST_HEAD(&csched_priv.active_sdom); - csched_priv.ncpus = 0; - csched_priv.master = UINT_MAX; - cpus_clear(csched_priv.idlers); - csched_priv.weight = 0U; - csched_priv.credit = 0U; - csched_priv.credit_balance = 0; - csched_priv.runq_sort = 0U; +static int +csched_init(struct scheduler *ops) +{ + struct csched_private *prv; + + prv = xmalloc(struct csched_private); + if ( prv == NULL ) + return 1; + if (csched_priv0 == NULL) + csched_priv0 = prv; + ops->sched_data = prv; + spin_lock_init(&(prv->lock)); + INIT_LIST_HEAD(&(prv->active_sdom)); + prv->ncpus = 0; + prv->master = UINT_MAX; + cpus_clear(prv->idlers); + prv->weight = 0U; + prv->credit = 0U; + prv->credit_balance = 0; + prv->runq_sort = 0U; + prv->ticker_active = (csched_priv0 == prv) ? 0 : 1; + + return 0; } /* Tickers cannot be kicked until SMP subsystem is alive. */ @@ -1305,8 +1432,10 @@ static __init int csched_start_tickers(v unsigned int cpu; /* Is the credit scheduler initialised? */ - if ( csched_priv.ncpus == 0 ) + if ( (csched_priv0 == NULL) || (csched_priv0->ncpus == 0) ) return 0; + + csched_priv0->ticker_active = 1; for_each_online_cpu ( cpu ) { @@ -1314,45 +1443,70 @@ static __init int csched_start_tickers(v set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK)); } - init_timer( &csched_priv.master_ticker, csched_acct, NULL, - csched_priv.master); - - set_timer( &csched_priv.master_ticker, NOW() + + init_timer( &(csched_priv0->master_ticker), csched_acct, csched_priv0, + csched_priv0->master); + + set_timer( &(csched_priv0->master_ticker), NOW() + MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT ); return 0; } __initcall(csched_start_tickers); -static void csched_tick_suspend(void) +static void +csched_deinit(struct scheduler *ops) +{ + struct csched_private *prv; + + prv = CSCHED_PRIV(ops); + if ( prv != NULL ) + xfree(prv); +} + +static void csched_tick_suspend(struct scheduler *ops, unsigned int cpu) { struct csched_pcpu *spc; - spc = CSCHED_PCPU(smp_processor_id()); + spc = CSCHED_PCPU(cpu); stop_timer(&spc->ticker); } -static void csched_tick_resume(void) +static void csched_tick_resume(struct scheduler *ops, unsigned int cpu) { struct csched_pcpu *spc; uint64_t now = NOW(); - - spc = CSCHED_PCPU(smp_processor_id()); + struct csched_private *prv; + + prv = CSCHED_PRIV(ops); + if ( !prv->ticker_active ) + return; + + spc = CSCHED_PCPU(cpu); set_timer(&spc->ticker, now + MILLISECS(CSCHED_MSECS_PER_TICK) - now % MILLISECS(CSCHED_MSECS_PER_TICK) ); + + if ( (prv->ticker_active == 2) && (prv->master == cpu) ) + { + set_timer( &prv->master_ticker, now + + MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT - + now % MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT); + prv->ticker_active = 1; + } } struct scheduler sched_credit_def = { .name = "SMP Credit Scheduler", .opt_name = "credit", .sched_id = XEN_SCHEDULER_CREDIT, + .sched_data = &csched_priv, .init_domain = csched_dom_init, .destroy_domain = csched_dom_destroy, .init_vcpu = csched_vcpu_init, + .insert_vcpu = csched_vcpu_insert, .destroy_vcpu = csched_vcpu_destroy, .sleep = csched_vcpu_sleep, @@ -1366,6 +1520,11 @@ struct scheduler sched_credit_def = { .dump_cpu_state = csched_dump_pcpu, .dump_settings = csched_dump, .init = csched_init, + .deinit = csched_deinit, + .alloc_vdata = csched_alloc_vdata, + .free_vdata = csched_free_vdata, + .alloc_pdata = csched_alloc_pdata, + .free_pdata = csched_free_pdata, .tick_suspend = csched_tick_suspend, .tick_resume = csched_tick_resume, diff -r 655dc3bc1d8e xen/common/sched_sedf.c --- a/xen/common/sched_sedf.c Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/common/sched_sedf.c Thu Apr 09 14:54:22 2009 +0200 @@ -20,6 +20,9 @@ if ( (_f) <= SEDFLEVEL ) \ printk(_a ); \ } while ( 0 ) + +#define SEDF_CPUONLINE(_pool) \ + (((_pool) == NULL) ? cpupool_free_cpus : (_pool)->cpu_valid) #ifndef NDEBUG #define SEDF_STATS @@ -132,7 +135,7 @@ struct sedf_cpu_info { #define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP)) -static void sedf_dump_cpu_state(int i); +static void sedf_dump_cpu_state(struct scheduler *ops, int i); static inline int extraq_on(struct vcpu *d, int i) { @@ -329,30 +332,17 @@ static inline void __add_to_runqueue_sor } -static int sedf_init_vcpu(struct vcpu *v) +static void *sedf_alloc_vdata(struct scheduler *ops, struct vcpu *v) { struct sedf_vcpu_info *inf; - if ( (v->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL ) - return -1; - memset(v->sched_priv, 0, sizeof(struct sedf_vcpu_info)); - - inf = EDOM_INFO(v); + inf = xmalloc(struct sedf_vcpu_info); + if ( inf == NULL ) + return NULL; + + memset(inf, 0, sizeof(struct sedf_vcpu_info)); inf->vcpu = v; - - /* Allocate per-CPU context if this is the first domain to be added. */ - if ( unlikely(per_cpu(schedule_data, v->processor).sched_priv == NULL) ) - { - per_cpu(schedule_data, v->processor).sched_priv = - xmalloc(struct sedf_cpu_info); - BUG_ON(per_cpu(schedule_data, v->processor).sched_priv == NULL); - memset(CPU_INFO(v->processor), 0, sizeof(*CPU_INFO(v->processor))); - INIT_LIST_HEAD(WAITQ(v->processor)); - INIT_LIST_HEAD(RUNQ(v->processor)); - INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_PEN_Q)); - INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_UTIL_Q)); - } - + /* Every VCPU gets an equal share of extratime by default. */ inf->deadl_abs = 0; inf->latency = 0; @@ -383,19 +373,69 @@ static int sedf_init_vcpu(struct vcpu *v } else { - EDOM_INFO(v)->deadl_abs = 0; - EDOM_INFO(v)->status &= ~SEDF_ASLEEP; - } - + inf->deadl_abs = 0; + inf->status &= ~SEDF_ASLEEP; + } + + return inf; +} + +static void * +sedf_alloc_pdata(struct scheduler *ops, int cpu) +{ + struct sedf_cpu_info *spc; + + spc = xmalloc(struct sedf_cpu_info); + BUG_ON(spc == NULL); + memset(spc, 0, sizeof(*spc)); + INIT_LIST_HEAD(&spc->waitq); + INIT_LIST_HEAD(&spc->runnableq); + INIT_LIST_HEAD(&spc->extraq[EXTRA_PEN_Q]); + INIT_LIST_HEAD(&spc->extraq[EXTRA_UTIL_Q]); + + return (void *)spc; +} + +static void +sedf_free_pdata(struct scheduler *ops, void *spc, int cpu) +{ + if ( spc == NULL ) + return; + + xfree(spc); +} + +static int sedf_init_vcpu(struct scheduler *ops, struct vcpu *v) +{ + struct sedf_vcpu_info *inf; + + /* Allocate per-CPU context if this is the first domain to be added. */ + if ( unlikely(per_cpu(schedule_data, v->processor).sched_priv == NULL) ) + { + per_cpu(schedule_data, v->processor).sched_priv = + sedf_alloc_pdata(ops, v->processor); + } + + inf = sedf_alloc_vdata(ops, v); + if ( inf == NULL ) + return -1; + + v->sched_priv = inf; + return 0; } -static void sedf_destroy_vcpu(struct vcpu *v) -{ - xfree(v->sched_priv); -} - -static int sedf_init_domain(struct domain *d) +static void sedf_free_vdata(struct scheduler *ops, void *priv) +{ + xfree(priv); +} + +static void sedf_destroy_vcpu(struct scheduler *ops, struct vcpu *v) +{ + sedf_free_vdata(ops, v->sched_priv); +} + +static int sedf_init_domain(struct scheduler *ops, struct domain *d) { d->sched_priv = xmalloc(struct sedf_dom_info); if ( d->sched_priv == NULL ) @@ -406,16 +446,18 @@ static int sedf_init_domain(struct domai return 0; } -static void sedf_destroy_domain(struct domain *d) +static void sedf_destroy_domain(struct scheduler *ops, struct domain *d) { xfree(d->sched_priv); } -static int sedf_pick_cpu(struct vcpu *v) +static int sedf_pick_cpu(struct scheduler *ops, struct vcpu *v) { cpumask_t online_affinity; - - cpus_and(online_affinity, v->cpu_affinity, cpu_online_map); + cpumask_t online; + + online = SEDF_CPUONLINE(v->domain->cpupool); + cpus_and(online_affinity, v->cpu_affinity, online); return first_cpu(online_affinity); } @@ -751,7 +793,7 @@ static struct task_slice sedf_do_extra_s -timeslice for the current period used up -domain on waitqueue has started it's period -and various others ;) in general: determine which domain to run next*/ -static struct task_slice sedf_do_schedule(s_time_t now) +static struct task_slice sedf_do_schedule(struct scheduler *ops, s_time_t now) { int cpu = smp_processor_id(); struct list_head *runq = RUNQ(cpu); @@ -786,6 +828,13 @@ static struct task_slice sedf_do_schedul } check_waitq: update_queues(now, runq, waitq); + + if ( unlikely(!cpu_isset(cpu, SEDF_CPUONLINE(per_cpu(cpupool, cpu)))) ) + { + ret.task = IDLETASK(cpu); + ret.time = SECONDS(1); + goto sched_done; + } /*now simply pick the first domain from the runqueue, which has the earliest deadline, because the list is sorted*/ @@ -848,7 +897,7 @@ static struct task_slice sedf_do_schedul } -static void sedf_sleep(struct vcpu *d) +static void sedf_sleep(struct scheduler *ops, struct vcpu *d) { PRINT(2,"sedf_sleep was called, domain-id %i.%i\n", d->domain->domain_id, d->vcpu_id); @@ -1067,7 +1116,7 @@ static inline int should_switch(struct v return 1; } -static void sedf_wake(struct vcpu *d) +static void sedf_wake(struct scheduler *ops, struct vcpu *d) { s_time_t now = NOW(); struct sedf_vcpu_info* inf = EDOM_INFO(d); @@ -1220,8 +1269,8 @@ static void sedf_dump_domain(struct vcpu } -/* dumps all domains on hte specified cpu */ -static void sedf_dump_cpu_state(int i) +/* dumps all domains on the specified cpu */ +static void sedf_dump_cpu_state(struct scheduler *ops, int i) { struct list_head *list, *queue, *tmp; struct sedf_vcpu_info *d_inf; @@ -1294,7 +1343,7 @@ static void sedf_dump_cpu_state(int i) /* Adjusts periods and slices of the domains accordingly to their weights. */ -static int sedf_adjust_weights(struct xen_domctl_scheduler_op *cmd) +static int sedf_adjust_weights(struct cpupool *c, struct xen_domctl_scheduler_op *cmd) { struct vcpu *p; struct domain *d; @@ -1315,6 +1364,8 @@ static int sedf_adjust_weights(struct xe rcu_read_lock(&domlist_read_lock); for_each_domain( d ) { + if ( c != d->cpupool ) + continue; for_each_vcpu( d, p ) { if ( EDOM_INFO(p)->weight ) @@ -1366,7 +1417,7 @@ static int sedf_adjust_weights(struct xe /* set or fetch domain scheduling parameters */ -static int sedf_adjust(struct domain *p, struct xen_domctl_scheduler_op *op) +static int sedf_adjust(struct scheduler *ops, struct domain *p, struct xen_domctl_scheduler_op *op) { struct vcpu *v; int rc; @@ -1425,7 +1476,7 @@ static int sedf_adjust(struct domain *p, } } - rc = sedf_adjust_weights(op); + rc = sedf_adjust_weights(p->cpupool, op); if ( rc ) return rc; @@ -1463,6 +1514,11 @@ struct scheduler sched_sedf_def = { .init_vcpu = sedf_init_vcpu, .destroy_vcpu = sedf_destroy_vcpu, + + .alloc_vdata = sedf_alloc_vdata, + .free_vdata = sedf_free_vdata, + .alloc_pdata = sedf_alloc_pdata, + .free_pdata = sedf_free_pdata, .do_schedule = sedf_do_schedule, .pick_cpu = sedf_pick_cpu, diff -r 655dc3bc1d8e xen/common/schedule.c --- a/xen/common/schedule.c Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/common/schedule.c Thu Apr 16 09:18:40 2009 +0200 @@ -55,6 +55,7 @@ static void poll_timer_fn(void *data); /* This is global for now so that private implementations can reach it */ DEFINE_PER_CPU(struct schedule_data, schedule_data); +DEFINE_PER_CPU(struct scheduler *, scheduler); extern struct scheduler sched_sedf_def; extern struct scheduler sched_credit_def; @@ -66,9 +67,15 @@ static struct scheduler *schedulers[] = static struct scheduler ops; -#define SCHED_OP(fn, ...) \ - (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \ - : (typeof(ops.fn(__VA_ARGS__)))0 ) +#define SCHED_OP(opsptr, fn, ...) \ + (( (opsptr)->fn != NULL ) ? (opsptr)->fn(opsptr, ##__VA_ARGS__ ) \ + : (typeof((opsptr)->fn(opsptr, ##__VA_ARGS__)))0 ) + +#define DOM2OP(_d) (((_d)->cpupool == NULL) ? &ops : &((_d)->cpupool->sched)) +#define VCPU2OP(_v) (DOM2OP((_v)->domain)) +#define VCPU2ONLINE(_v) \ + (((_v)->domain->cpupool == NULL) ? cpu_online_map \ + : (_v)->domain->cpupool->cpu_valid) static inline void trace_runstate_change(struct vcpu *v, int new_state) { @@ -182,7 +189,13 @@ int sched_init_vcpu(struct vcpu *v, unsi TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id); - return SCHED_OP(init_vcpu, v); + if ( SCHED_OP(DOM2OP(d), init_vcpu, v) != 0 ) + return 1; + + if ( is_idle_domain(d) ) + per_cpu(schedule_data, v->processor).sched_idlevpriv = v->sched_priv; + + return 0; } void sched_destroy_vcpu(struct vcpu *v) @@ -190,17 +203,47 @@ void sched_destroy_vcpu(struct vcpu *v) kill_timer(&v->periodic_timer); kill_timer(&v->singleshot_timer); kill_timer(&v->poll_timer); - SCHED_OP(destroy_vcpu, v); + SCHED_OP(VCPU2OP(v), destroy_vcpu, v); +} + +void sched_move_domain(struct domain *d, struct cpupool *c) +{ + struct vcpu *v; + unsigned int new_p; + + domain_pause(d); + + new_p = first_cpu(c->cpu_valid); + for_each_vcpu ( d, v ) + { + migrate_timer(&v->periodic_timer, new_p); + migrate_timer(&v->singleshot_timer, new_p); + migrate_timer(&v->poll_timer, new_p); + + SCHED_OP(VCPU2OP(v), destroy_vcpu, v); + + cpus_setall(v->cpu_affinity); + v->processor = new_p; + SCHED_OP(&(c->sched), init_vcpu, v); + + new_p = next_cpu(new_p, c->cpu_valid); + if ( new_p == NR_CPUS ) + new_p = first_cpu(c->cpu_valid); + } + + d->cpupool = c; + + domain_unpause(d); } int sched_init_domain(struct domain *d) { - return SCHED_OP(init_domain, d); + return SCHED_OP(DOM2OP(d), init_domain, d); } void sched_destroy_domain(struct domain *d) { - SCHED_OP(destroy_domain, d); + SCHED_OP(DOM2OP(d), destroy_domain, d); } void vcpu_sleep_nosync(struct vcpu *v) @@ -214,7 +257,7 @@ void vcpu_sleep_nosync(struct vcpu *v) if ( v->runstate.state == RUNSTATE_runnable ) vcpu_runstate_change(v, RUNSTATE_offline, NOW()); - SCHED_OP(sleep, v); + SCHED_OP(VCPU2OP(v), sleep, v); } vcpu_schedule_unlock_irqrestore(v, flags); @@ -242,7 +285,7 @@ void vcpu_wake(struct vcpu *v) { if ( v->runstate.state >= RUNSTATE_blocked ) vcpu_runstate_change(v, RUNSTATE_runnable, NOW()); - SCHED_OP(wake, v); + SCHED_OP(VCPU2OP(v), wake, v); } else if ( !test_bit(_VPF_blocked, &v->pause_flags) ) { @@ -297,7 +340,7 @@ static void vcpu_migrate(struct vcpu *v) /* Switch to new CPU, then unlock old CPU. */ old_cpu = v->processor; - v->processor = SCHED_OP(pick_cpu, v); + v->processor = SCHED_OP(VCPU2OP(v), pick_cpu, v); spin_unlock_irqrestore( &per_cpu(schedule_data, old_cpu).schedule_lock, flags); @@ -326,22 +369,32 @@ void vcpu_force_reschedule(struct vcpu * } /* - * This function is used by cpu_hotplug code from stop_machine context. - * Hence we can avoid needing to take the + * This function is used by cpu_hotplug code from stop_machine context + * and from cpupools to switch schedulers on a cpu. */ -void cpu_disable_scheduler(void) +int cpu_disable_scheduler(unsigned int cpu, int lock) { struct domain *d; struct vcpu *v; - unsigned int cpu = smp_processor_id(); + struct cpupool *c; + int ret = 0; + + c = per_cpu(cpupool, cpu); + if ( c == NULL ) + return ret; for_each_domain ( d ) { + if ( (d->cpupool != c) || c->pool_paused ) + continue; + for_each_vcpu ( d, v ) { if ( is_idle_vcpu(v) ) continue; + if ( lock != 0 ) + vcpu_schedule_lock_irq(v); if ( (cpus_weight(v->cpu_affinity) == 1) && cpu_isset(cpu, v->cpu_affinity) ) { @@ -351,29 +404,49 @@ void cpu_disable_scheduler(void) } /* - * Migrate single-shot timers to CPU0. A new cpu will automatically - * be chosen when the timer is next re-set. + * Migrate single-shot timers to other cpu of same pool. A new cpu + * will automatically be chosen when the timer is next re-set. */ if ( v->singleshot_timer.cpu == cpu ) - migrate_timer(&v->singleshot_timer, 0); + { + int cpu_mig; + + cpu_mig = first_cpu(c->cpu_valid); + if (cpu_mig == cpu) + cpu_mig = next_cpu(cpu_mig, c->cpu_valid); + migrate_timer(&v->singleshot_timer, cpu_mig); + } if ( v->processor == cpu ) { set_bit(_VPF_migrating, &v->pause_flags); + if ( lock != 0 ) + vcpu_schedule_unlock_irq(v); vcpu_sleep_nosync(v); vcpu_migrate(v); } + else if ( lock != 0 ) + vcpu_schedule_unlock_irq(v); + /* + * A vcpu active in the hypervisor will not be migratable. + * The caller should try again after releasing and reaquiring + * all locks. + */ + if ( v->processor == cpu ) + ret = -EAGAIN; } } + return ret; } static int __vcpu_set_affinity( struct vcpu *v, cpumask_t *affinity, bool_t old_lock_status, bool_t new_lock_status) { - cpumask_t online_affinity, old_affinity; - - cpus_and(online_affinity, *affinity, cpu_online_map); + cpumask_t online, online_affinity, old_affinity; + + online = VCPU2ONLINE(v); + cpus_and(online_affinity, *affinity, online); if ( cpus_empty(online_affinity) ) return -EINVAL; @@ -424,12 +497,13 @@ int vcpu_locked_change_affinity(struct v void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity) { - cpumask_t online_affinity; + cpumask_t online, online_affinity; /* Do not fail if no CPU in old affinity mask is online. */ - cpus_and(online_affinity, *affinity, cpu_online_map); + online = VCPU2ONLINE(v); + cpus_and(online_affinity, *affinity, online); if ( cpus_empty(online_affinity) ) - *affinity = cpu_online_map; + *affinity = VCPU2ONLINE(v); if ( __vcpu_set_affinity(v, affinity, 1, 0) != 0 ) BUG(); @@ -721,7 +795,7 @@ long sched_adjust(struct domain *d, stru struct vcpu *v; long ret; - if ( (op->sched_id != ops.sched_id) || + if ( (op->sched_id != DOM2OP(d)->sched_id) || ((op->cmd != XEN_DOMCTL_SCHEDOP_putinfo) && (op->cmd != XEN_DOMCTL_SCHEDOP_getinfo)) ) return -EINVAL; @@ -748,7 +822,7 @@ long sched_adjust(struct domain *d, stru if ( d == current->domain ) vcpu_schedule_lock_irq(current); - if ( (ret = SCHED_OP(adjust, d, op)) == 0 ) + if ( (ret = SCHED_OP(DOM2OP(d), adjust, d, op)) == 0 ) TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id); if ( d == current->domain ) @@ -796,6 +870,7 @@ static void schedule(void) { struct vcpu *prev = current, *next = NULL; s_time_t now = NOW(); + struct scheduler *sched = this_cpu(scheduler); struct schedule_data *sd; struct task_slice next_slice; @@ -811,7 +886,7 @@ static void schedule(void) stop_timer(&sd->s_timer); /* get policy-specific decision on scheduling... */ - next_slice = ops.do_schedule(now); + next_slice = sched->do_schedule(sched, now); next = next_slice.task; @@ -911,18 +986,25 @@ static void poll_timer_fn(void *data) vcpu_unblock(v); } +/* Get scheduler by id */ +struct scheduler *scheduler_get_by_id(unsigned int id) +{ + int i; + + for ( i = 0; schedulers[i] != NULL; i++ ) + { + if ( schedulers[i]->sched_id == id ) + return schedulers[i]; + } + return NULL; +} + /* Initialise the data structures. */ void __init scheduler_init(void) { int i; open_softirq(SCHEDULE_SOFTIRQ, schedule); - - for_each_cpu ( i ) - { - spin_lock_init(&per_cpu(schedule_data, i).schedule_lock); - init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i); - } for ( i = 0; schedulers[i] != NULL; i++ ) { @@ -934,43 +1016,121 @@ void __init scheduler_init(void) if ( schedulers[i] == NULL ) printk("Could not find scheduler: %s\n", opt_sched); + for_each_cpu ( i ) + { + per_cpu(scheduler, i) = &ops; + spin_lock_init(&per_cpu(schedule_data, i).schedule_lock); + init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i); + } + printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name); - SCHED_OP(init); -} - -void dump_runq(unsigned char key) -{ - s_time_t now = NOW(); - int i; + if ( SCHED_OP(&ops, init) ) + panic("scheduler returned error on init\n"); +} + +/* switch scheduler on cpu */ +void schedule_cpu_switch(unsigned int cpu, struct cpupool *c) +{ unsigned long flags; - - local_irq_save(flags); - - printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name); - SCHED_OP(dump_settings); - printk("sched_smt_power_savings: %s\n", - sched_smt_power_savings? "enabled":"disabled"); - printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now); - - for_each_online_cpu ( i ) + struct vcpu *v; + void *vpriv = NULL; + void *ppriv; + void *ppriv_old; + struct scheduler *old_ops; + struct scheduler *new_ops; + + old_ops = per_cpu(scheduler, cpu); + new_ops = (c == NULL) ? &ops : &(c->sched); + v = per_cpu(schedule_data, cpu).idle; + ppriv = SCHED_OP(new_ops, alloc_pdata, cpu); + if ( c != NULL ) + vpriv = SCHED_OP(new_ops, alloc_vdata, v); + + spin_lock_irqsave(&per_cpu(schedule_data, cpu).schedule_lock, flags); + + if ( c == NULL ) + { + vpriv = v->sched_priv; + v->sched_priv = per_cpu(schedule_data, cpu).sched_idlevpriv; + } + else + { + v->sched_priv = vpriv; + vpriv = NULL; + } + SCHED_OP(old_ops, tick_suspend, cpu); + per_cpu(scheduler, cpu) = new_ops; + ppriv_old = per_cpu(schedule_data, cpu).sched_priv; + per_cpu(schedule_data, cpu).sched_priv = ppriv; + SCHED_OP(new_ops, tick_resume, cpu); + SCHED_OP(new_ops, insert_vcpu, v); + + spin_unlock_irqrestore(&per_cpu(schedule_data, cpu).schedule_lock, flags); + + if ( vpriv != NULL ) + SCHED_OP(old_ops, free_vdata, vpriv); + SCHED_OP(old_ops, free_pdata, ppriv_old, cpu); +} + +/* init scheduler global data */ +int schedule_init_global(char *name, struct scheduler *sched) +{ + int i; + struct scheduler *data; + + data = &ops; + for ( i = 0; (schedulers[i] != NULL) && (name != NULL) ; i++ ) + { + if ( strcmp(schedulers[i]->opt_name, name) == 0 ) + { + data = schedulers[i]; + break; + } + } + memcpy(sched, data, sizeof(*sched)); + return SCHED_OP(sched, init); +} + +/* deinitialize scheduler global data */ +void schedule_deinit_global(struct scheduler *sched) +{ + SCHED_OP(sched, deinit); +} + +void schedule_dump(struct cpupool *c) +{ + int i; + struct scheduler *sched; + cpumask_t cpus; + + sched = (c == NULL) ? &ops : &(c->sched); + cpus = (c == NULL) ? cpupool_free_cpus : c->cpu_valid; + printk("Scheduler: %s (%s)\n", sched->name, sched->opt_name); + SCHED_OP(sched, dump_settings); + + for_each_cpu_mask (i, cpus) { spin_lock(&per_cpu(schedule_data, i).schedule_lock); printk("CPU[%02d] ", i); - SCHED_OP(dump_cpu_state, i); + SCHED_OP(sched, dump_cpu_state, i); spin_unlock(&per_cpu(schedule_data, i).schedule_lock); } - - local_irq_restore(flags); -} - -void sched_tick_suspend(void) -{ - SCHED_OP(tick_suspend); -} - -void sched_tick_resume(void) -{ - SCHED_OP(tick_resume); +} + +void sched_tick_suspend(unsigned int cpu) +{ + struct scheduler *sched; + + sched = per_cpu(scheduler, cpu); + SCHED_OP(sched, tick_suspend, cpu); +} + +void sched_tick_resume(unsigned int cpu) +{ + struct scheduler *sched; + + sched = per_cpu(scheduler, cpu); + SCHED_OP(sched, tick_resume, cpu); } #ifdef CONFIG_COMPAT diff -r 655dc3bc1d8e xen/include/public/domctl.h --- a/xen/include/public/domctl.h Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/include/public/domctl.h Thu Apr 09 11:47:18 2009 +0200 @@ -59,7 +59,11 @@ struct xen_domctl_createdomain { /* Should domain memory integrity be verifed by tboot during Sx? */ #define _XEN_DOMCTL_CDF_s3_integrity 2 #define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity) + /* cpupool is specified (0 otherwise) */ +#define _XEN_DOMCTL_CDF_pool 3 +#define XEN_DOMCTL_CDF_pool (1U<<_XEN_DOMCTL_CDF_pool) uint32_t flags; + uint32_t cpupool; }; typedef struct xen_domctl_createdomain xen_domctl_createdomain_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t); @@ -109,6 +113,7 @@ struct xen_domctl_getdomaininfo { uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ uint32_t ssidref; xen_domain_handle_t handle; + uint32_t cpupool; }; typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); @@ -645,6 +650,30 @@ typedef struct xen_domctl_hvmcontext_par XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */ } xen_domctl_hvmcontext_partial_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t); + +/* + * Move domain to specified cpupool. + */ +#define XEN_DOMCTL_cpupool_op 56 +#define XEN_DOMCTL_CPUPOOL_OP_CREATE 1 /* C */ +#define XEN_DOMCTL_CPUPOOL_OP_DESTROY 2 /* D */ +#define XEN_DOMCTL_CPUPOOL_OP_INFO 3 /* I */ +#define XEN_DOMCTL_CPUPOOL_OP_ADDCPU 4 /* A */ +#define XEN_DOMCTL_CPUPOOL_OP_RMCPU 5 /* R */ +#define XEN_DOMCTL_CPUPOOL_OP_MOVEDOMAIN 6 /* M */ +#define XEN_DOMCTL_CPUPOOL_OP_FREEINFO 7 /* F */ +#define XEN_DOMCTL_CPUPOOL_PAR_ANY 0xFFFFFFFF +struct xen_domctl_cpupool_op { + uint32_t op; /* IN */ + uint32_t cpupool_id; /* IN: CDIARM OUT: CI */ + uint32_t sched_id; /* IN: C OUT: I */ + uint32_t domid; /* IN: M */ + uint32_t cpu; /* IN: AR */ + uint32_t n_dom; /* OUT: I */ + struct xenctl_cpumap cpumap; /* OUT: IF */ +}; +typedef struct xen_domctl_cpupool_op xen_domctl_cpupool_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpupool_op_t); struct xen_domctl { @@ -688,6 +717,7 @@ struct xen_domctl { struct xen_domctl_set_target set_target; struct xen_domctl_subscribe subscribe; struct xen_domctl_debug_op debug_op; + struct xen_domctl_cpupool_op cpupool_op; #if defined(__i386__) || defined(__x86_64__) struct xen_domctl_cpuid cpuid; #endif diff -r 655dc3bc1d8e xen/include/xen/sched-if.h --- a/xen/include/xen/sched-if.h Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/include/xen/sched-if.h Thu Apr 16 09:16:18 2009 +0200 @@ -10,15 +10,24 @@ #include <xen/percpu.h> +/* A global pointer to the initial cpupool (POOL0). */ +extern struct cpupool *cpupool0; + +/* cpus currently in no cpupool */ +extern cpumask_t cpupool_free_cpus; + struct schedule_data { spinlock_t schedule_lock; /* spinlock protecting curr */ struct vcpu *curr; /* current task */ struct vcpu *idle; /* idle task for this cpu */ void *sched_priv; + void *sched_idlevpriv; /* default scheduler vcpu data */ struct timer s_timer; /* scheduling timer */ } __cacheline_aligned; DECLARE_PER_CPU(struct schedule_data, schedule_data); +DECLARE_PER_CPU(struct scheduler *, scheduler); +DECLARE_PER_CPU(struct cpupool *, cpupool); static inline void vcpu_schedule_lock(struct vcpu *v) { @@ -58,28 +67,50 @@ struct scheduler { char *name; /* full name for this scheduler */ char *opt_name; /* option name for this scheduler */ unsigned int sched_id; /* ID for this scheduler */ + void *sched_data; /* global data pointer */ - void (*init) (void); + int (*init) (struct scheduler *); + void (*deinit) (struct scheduler *); - int (*init_domain) (struct domain *); - void (*destroy_domain) (struct domain *); + void (*free_vdata) (struct scheduler *, void *); + void * (*alloc_vdata) (struct scheduler *, struct vcpu *); + void (*free_pdata) (struct scheduler *, void *, int); + void * (*alloc_pdata) (struct scheduler *, int); - int (*init_vcpu) (struct vcpu *); - void (*destroy_vcpu) (struct vcpu *); + int (*init_domain) (struct scheduler *, struct domain *); + void (*destroy_domain) (struct scheduler *, struct domain *); - void (*sleep) (struct vcpu *); - void (*wake) (struct vcpu *); + int (*init_vcpu) (struct scheduler *, struct vcpu *); + void (*insert_vcpu) (struct scheduler *, struct vcpu *); + void (*destroy_vcpu) (struct scheduler *, struct vcpu *); - struct task_slice (*do_schedule) (s_time_t); + void (*sleep) (struct scheduler *, struct vcpu *); + void (*wake) (struct scheduler *, struct vcpu *); - int (*pick_cpu) (struct vcpu *); - int (*adjust) (struct domain *, + struct task_slice (*do_schedule) (struct scheduler *, s_time_t); + + int (*pick_cpu) (struct scheduler *, struct vcpu *); + int (*adjust) (struct scheduler *, struct domain *, struct xen_domctl_scheduler_op *); - void (*dump_settings) (void); - void (*dump_cpu_state) (int); + void (*dump_settings) (struct scheduler *); + void (*dump_cpu_state) (struct scheduler *, int); - void (*tick_suspend) (void); - void (*tick_resume) (void); + void (*tick_suspend) (struct scheduler *, unsigned int); + void (*tick_resume) (struct scheduler *, unsigned int); }; +struct cpupool +{ + int cpupool_id; + cpumask_t cpu_valid; /* all cpus assigned to pool */ + cpumask_t cpus_borrowed; /* cpus borrowed or lent */ + struct cpupool *next; + unsigned int n_dom; + int cpu_in_transit; /* used for adding/removing cpus */ + bool_t pool_paused; + struct scheduler sched; +}; + +struct scheduler *scheduler_get_by_id(unsigned int id); + #endif /* __XEN_SCHED_IF_H__ */ diff -r 655dc3bc1d8e xen/include/xen/sched.h --- a/xen/include/xen/sched.h Thu Apr 16 11:54:06 2009 +0100 +++ b/xen/include/xen/sched.h Thu Apr 16 09:14:00 2009 +0200 @@ -182,6 +182,7 @@ struct domain /* Scheduling. */ void *sched_priv; /* scheduler-specific data */ + struct cpupool *cpupool; struct domain *next_in_list; struct domain *next_in_hashbucket; @@ -341,7 +342,7 @@ static inline struct domain *get_current } struct domain *domain_create( - domid_t domid, unsigned int domcr_flags, ssidref_t ssidref); + domid_t domid, int poolid, unsigned int domcr_flags, ssidref_t ssidref); /* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */ #define _DOMCRF_hvm 0 #define DOMCRF_hvm (1U<<_DOMCRF_hvm) @@ -426,10 +427,11 @@ void sched_destroy_vcpu(struct vcpu *v); void sched_destroy_vcpu(struct vcpu *v); int sched_init_domain(struct domain *d); void sched_destroy_domain(struct domain *d); +void sched_move_domain(struct domain *d, struct cpupool *c); long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *); int sched_id(void); -void sched_tick_suspend(void); -void sched_tick_resume(void); +void sched_tick_suspend(unsigned int cpu); +void sched_tick_resume(unsigned int cpu); void vcpu_wake(struct vcpu *d); void vcpu_sleep_nosync(struct vcpu *d); void vcpu_sleep_sync(struct vcpu *d); @@ -533,8 +535,13 @@ void domain_unpause_by_systemcontroller( void domain_unpause_by_systemcontroller(struct domain *d); void cpu_init(void); +struct scheduler; + +int schedule_init_global(char *name, struct scheduler *sched); +void schedule_deinit_global(struct scheduler *sched); +void schedule_cpu_switch(unsigned int cpu, struct cpupool *c); void vcpu_force_reschedule(struct vcpu *v); -void cpu_disable_scheduler(void); +int cpu_disable_scheduler(unsigned int cpu, int lock); int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity); int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity); int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity); @@ -560,6 +567,21 @@ extern enum cpufreq_controller { extern enum cpufreq_controller { FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen } cpufreq_controller; + +#define CPUPOOLID_NONE -1 + +struct cpupool *cpupool_create(int poolid, char *sched); +int cpupool_destroy(struct cpupool *c); +int cpupool0_cpu_assign(struct cpupool *c); +int cpupool_assign_ncpu(struct cpupool *c, int ncpu); +void cpupool_cpu_add(unsigned int cpu); +int cpupool_cpu_remove(unsigned int cpu); +int cpupool_borrow_cpu(struct cpupool *c, unsigned int cpu); +int cpupool_return_cpu(struct cpupool *c); +int cpupool_add_domain(struct domain *d, int poolid); +void cpupool_rm_domain(struct domain *d); +int cpupool_do_domctl(struct xen_domctl *op); +#define num_cpupool_cpus(c) (cpus_weight((c)->cpu_valid)) #endif /* __SCHED_H__ */ diff -r 655dc3bc1d8e xen/common/cpupool.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/common/cpupool.c Fri Apr 17 11:01:51 2009 +0200 @@ -0,0 +1,698 @@ +/****************************************************************************** + * cpupool.c + * + * Generic cpupool-handling functions. + * + * (C) 2009, Juergen Gross, Fujitsu Technology Solutions + */ + +#include <xen/lib.h> +#include <xen/init.h> +#include <xen/cpumask.h> +#include <xen/percpu.h> +#include <xen/sched.h> +#include <xen/sched-if.h> + +/* #define PRINTD(args...) printk(args) */ + +#define for_each_cpupool(ptr) \ + for ((ptr) = &cpupool_list; *(ptr) != NULL; (ptr) = &((*(ptr))->next)) + +struct cpupool *cpupool0; +cpumask_t cpupool_free_cpus; +cpumask_t cpupool_free_cpus_borrowed; + +static struct cpupool *cpupool_list; /* linked list, sorted by poolid */ + +static int cpupool0_max_cpus; +integer_param("pool0_max_cpus", cpupool0_max_cpus); + +static DEFINE_SPINLOCK(cpupool_lock); + +DEFINE_PER_CPU(struct cpupool *, cpupool); + +static struct cpupool *alloc_cpupool_struct(void) +{ + return xmalloc(struct cpupool); +} + +static void free_cpupool_struct(struct cpupool *c) +{ + xfree(c); +} + +/* + * find a cpupool by it's id. to be called with cpupool lock held, + * returns NULL if not found. + */ +static struct cpupool *cpupool_find_by_id(int id, int exact) +{ + struct cpupool **q; + + for_each_cpupool(q) + { + if ( (*q)->cpupool_id == id ) + return *q; + if ( (*q)->cpupool_id > id ) + break; + } + return exact ? NULL : *q; +} + +/* + * create a new cpupool with specified poolid + * returns pointer to new cpupool structure if okay, NULL else + * possible failures: + * - no memory + * - poolid already used + * - unknown scheduler + */ +struct cpupool *cpupool_create(int poolid, char *sched) +{ + struct cpupool *c; + struct cpupool **q; + int last = 0; + + if ( (c = alloc_cpupool_struct()) == NULL ) + return NULL; + memset(c, 0, sizeof(*c)); + + PRINTD("cpupool_create(%d,%s)\n", poolid, sched); + spin_lock(&cpupool_lock); + for_each_cpupool(q) + { + last = (*q)->cpupool_id; + if ( (poolid != CPUPOOLID_NONE) && (last >= poolid) ) + break; + } + if ( *q != NULL ) + { + if ( (*q)->cpupool_id == poolid ) + { + spin_unlock(&cpupool_lock); + free_cpupool_struct(c); + return NULL; + } + c->next = *q; + } + *q = c; + c->cpupool_id = (poolid == CPUPOOLID_NONE) ? (last + 1) : poolid; + c->cpu_in_transit = -1; + if ( schedule_init_global(sched, &(c->sched)) ) + { + spin_unlock(&cpupool_lock); + cpupool_destroy(c); + return NULL; + } + spin_unlock(&cpupool_lock); + + printk("Created cpupool %d with scheduler %s (%s)\n", c->cpupool_id, + c->sched.name, c->sched.opt_name); + + return c; +} + +/* + * destroys the given cpupool + * returns 0 on success, 1 else + * possible failures: + * - pool still in use + * - cpus still assigned to pool + * - pool not in list + */ +int cpupool_destroy(struct cpupool *c) +{ + struct cpupool **q; + + spin_lock(&cpupool_lock); + for_each_cpupool(q) + if ( *q == c ) + break; + if ( (*q != c) || (c->n_dom != 0) || cpus_weight(c->cpu_valid) ) + { + spin_unlock(&cpupool_lock); + return 1; + } + *q = c->next; + spin_unlock(&cpupool_lock); + PRINTD("cpupool_destroy(%d)\n", c->cpupool_id); + schedule_deinit_global(&(c->sched)); + free_cpupool_struct(c); + return 0; +} + +/* + * assign a specific cpu to a cpupool + */ +static void cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu) +{ + PRINTD("cpupool_assign_cpu(%d,%d)\n", c->cpupool_id, cpu); + per_cpu(cpupool, cpu) = c; + schedule_cpu_switch(cpu, c); + cpu_clear(cpu, cpupool_free_cpus); + cpu_set(cpu, c->cpu_valid); + PRINTD("cpupool_assign_cpu(%d,%d) ready\n", c->cpupool_id, cpu); +} + +/* + * assign free physical cpus to a cpupool + * cpus assigned are unused cpus with lowest possible ids + * returns the number of cpus assigned + */ +int cpupool_assign_ncpu(struct cpupool *c, int ncpu) +{ + int i; + int n; + + n = 0; + spin_lock(&cpupool_lock); + for_each_cpu_mask(i, cpupool_free_cpus) + { + cpupool_assign_cpu_locked(c, i); + n++; + if ( n == ncpu ) + break; + } + spin_unlock(&cpupool_lock); + PRINTD("cpupool_assign_ncpu(%d,%d) rc %d\n", c->cpupool_id, ncpu, n); + return n; +} + +static void cpupool_unassign_cpu_locked_1(struct cpupool *c, unsigned int cpu) +{ + PRINTD("cpupool_unassign_cpu(%d,%d)\n", c->cpupool_id, cpu); + c->cpu_in_transit = cpu; +} + +static int cpupool_unassign_cpu_locked_2(struct cpupool *c) +{ + uint64_t to = NOW() + MILLISECS(100); + int cpu = c->cpu_in_transit; + int ret; + + cpu_clear(cpu, c->cpu_valid); + while ( ((ret = cpu_disable_scheduler(cpu, 1)) != 0) && (NOW() < to) ); + if ( ret ) + { + cpu_set(cpu, c->cpu_valid); + c->cpu_in_transit = -1; + } + else + { + c->cpu_in_transit = -1; + cpu_set(cpu, cpupool_free_cpus); + schedule_cpu_switch(cpu, NULL); + per_cpu(cpupool, cpu) = NULL; + } + PRINTD("cpupool_unassign_cpu(%d,%d) ret %d\n", c->cpupool_id, cpu, ret); + return ret; +} + +static long cpupool_unassign_cpu_helper(void *info) +{ + struct cpupool *c = (struct cpupool *)info; + long ret; + + ret = cpupool_unassign_cpu_locked_2(c); + spin_unlock(&cpupool_lock); + return ret; +} + +static int cpupool_unassign_cpu_locked(struct cpupool *c, unsigned int cpu) +{ + cpupool_unassign_cpu_locked_1(c, cpu); + return cpupool_unassign_cpu_locked_2(c); +} + +/* + * unassign a specific cpu from a cpupool + * possible failures: + * - last cpu and still domains in cpupool + */ +int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu) +{ + int work_cpu; + + spin_lock(&cpupool_lock); + if ( !cpu_isset(cpu, c->cpu_valid) ) + { + spin_unlock(&cpupool_lock); + return 0; + } + if ( (c->n_dom > 0) && (cpus_weight(c->cpu_valid) == 1) ) + { + spin_unlock(&cpupool_lock); + return -EBUSY; + } + cpupool_unassign_cpu_locked_1(c, cpu); + work_cpu = smp_processor_id(); + if ( work_cpu == cpu ) + { + work_cpu = first_cpu(cpupool0->cpu_valid); + if ( work_cpu == cpu ) + work_cpu = next_cpu(cpu, cpupool0->cpu_valid); + } + return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c); +} + +/* + * borrow cpu from another cpupool + * cpu might be free or already in the correct pool + * if cpu is taken from other pool, all domains in this pool will be paused + * rc == 0 if not borrowed, 1 if borrowed + */ +int cpupool_borrow_cpu(struct cpupool *c, unsigned int cpu) +{ + struct cpupool **q; + struct domain *d; + + if ( cpu_isset(cpu, c->cpu_valid) ) + return 0; + + spin_lock(&cpupool_lock); + + if ( cpu_isset(cpu, cpupool_free_cpus) ) + { + cpupool_assign_cpu_locked(c, cpu); + cpu_set(cpu, c->cpus_borrowed); + cpu_set(cpu, cpupool_free_cpus_borrowed); + spin_unlock(&cpupool_lock); + return 1; + } + + for_each_cpupool(q) + { + if ( cpu_isset(cpu, (*q)->cpu_valid) ) + break; + } + BUG_ON(*q == NULL); + if ( (*q)->pool_paused++ == 0 ) + { + for_each_domain(d) + { + if ( d->cpupool == *q ) + domain_pause(d); + } + } + /* unassigning cpu can't fail as all domains in pool should be paused */ + cpupool_unassign_cpu_locked(*q, cpu); + cpupool_assign_cpu_locked(c, cpu); + cpu_set(cpu, c->cpus_borrowed); + cpu_set(cpu, (*q)->cpus_borrowed); + + spin_unlock(&cpupool_lock); + return 1; +} + +/* + * return cpu after borrowing it before + * a cpu borrowed via cpupool_borrow_cpu before is returned to its former + * pool + * returns a cpu to continue on, -1 if all okay + */ +int cpupool_return_cpu(struct cpupool *c) +{ + int cpu = -1; + cpumask_t mask; + struct cpupool **q; + struct domain *d; + + spin_lock(&cpupool_lock); + if ( cpus_weight(c->cpus_borrowed) == 0 ) + goto out; + + if ( cpu_isset(smp_processor_id(), c->cpus_borrowed) ) + { + cpus_andnot(mask, c->cpu_valid, c->cpus_borrowed); + cpu = first_cpu(mask); + BUG_ON(cpu == NR_CPUS); + goto out; + } + + for_each_cpu_mask(cpu, c->cpus_borrowed) + { + BUG_ON(!cpu_isset(cpu, c->cpu_valid)); + if ( cpu_isset(cpu, cpupool_free_cpus_borrowed) ) + { + cpu_clear(cpu, cpupool_free_cpus_borrowed); + cpu_clear(cpu, c->cpus_borrowed); + if ( !cpupool_unassign_cpu_locked(c, cpu) ) + continue; + /* could not move all vcpus, try again */ + cpu_set(cpu, cpupool_free_cpus_borrowed); + cpu_set(cpu, c->cpus_borrowed); + goto out; + } + for_each_cpupool(q) + { + if ( (*q != c) && cpu_isset(cpu, (*q)->cpus_borrowed) ) + break; + } + BUG_ON(*q == NULL); + BUG_ON(!(*q)->pool_paused); + cpu_clear(cpu, (*q)->cpus_borrowed); + cpu_clear(cpu, c->cpus_borrowed); + if ( cpupool_unassign_cpu_locked(c, cpu) ) + { + cpu_set(cpu, (*q)->cpus_borrowed); + cpu_set(cpu, c->cpus_borrowed); + goto out; + } + cpupool_assign_cpu_locked(*q, cpu); + if ( (*q)->pool_paused == 1 ) + { + for_each_domain(d) + { + if ( d->cpupool == *q ) + domain_unpause(d); + } + } + (*q)->pool_paused--; + } + cpu = -1; + +out: + spin_unlock(&cpupool_lock); + return cpu; +} + +/* + * assign cpus to the default cpupool + * default are all cpus, less cpus may be specified as boot parameter + * possible failures: + * - no cpu assigned + */ +int __init cpupool0_cpu_assign(struct cpupool *c) +{ + if ( (cpupool0_max_cpus == 0) || (cpupool0_max_cpus > num_online_cpus()) ) + cpupool0_max_cpus = num_online_cpus(); + if ( !cpupool_assign_ncpu(cpupool0, cpupool0_max_cpus) ) + return 1; + return 0; +} + +/* + * add a new domain to a cpupool + * possible failures: + * - pool does not exist + * - pool is paused + * - no cpu assigned to pool + */ +int cpupool_add_domain(struct domain *d, int poolid) +{ + struct cpupool *c; + int rc = 1; + + if ( poolid == CPUPOOLID_NONE ) + return 0; + spin_lock(&cpupool_lock); + c = cpupool_find_by_id(poolid, 1); + if ( (c != NULL) && !c->pool_paused && cpus_weight(c->cpu_valid) ) + { + c->n_dom++; + d->cpupool = c; + PRINTD("cpupool_add_domain(%d,%d) n_dom %d\n", d->domain_id, poolid, + c->n_dom); + rc = 0; + } + spin_unlock(&cpupool_lock); + return rc; +} + +/* + * remove a domain from a cpupool + */ +void cpupool_rm_domain(struct domain *d) +{ + if ( d->cpupool == NULL ) + return; + spin_lock(&cpupool_lock); + d->cpupool->n_dom--; + PRINTD("cpupool_rm_domain(%d,%d) n_dom %d\n", d->domain_id, + d->cpupool->cpupool_id, d->cpupool->n_dom); + d->cpupool = NULL; + spin_unlock(&cpupool_lock); + return; +} + +/* + * called to add a new cpu to pool admin + * we add a hotplugged cpu to the cpupool0 to be able to add it to dom0 + */ +void cpupool_cpu_add(unsigned int cpu) +{ +#ifdef CONFIG_HOTPLUG_CPU + if ( cpupool0 == NULL ) + return; + spin_lock(&cpupool_lock); + cpu_set(cpu, cpupool_free_cpus); + cpupool_assign_cpu_locked(cpupool0, cpu); + spin_unlock(&cpupool_lock); +#endif + return; +} + +/* called to remove a cpu from pool admin + * possible failures: + * - cpu is last one in a pool with domains in it + * - pool is paused + */ +int cpupool_cpu_remove(unsigned int cpu) +{ + int rc = 0; +#ifdef CONFIG_HOTPLUG_CPU + struct cpupool **q; + + spin_lock(&cpupool_lock); + if ( cpu_isset(cpu, cpupool_free_cpus) ) + { + cpu_clear(cpu, cpupool_free_cpus); + goto out; + } + for_each_cpupool(q) + if ( cpu_isset(cpu, (*q)->cpu_valid) ) + break; + if ( *q == NULL ) + goto out; + if ( (((*q)->n_dom == 0) || (cpus_weight((*q)->cpu_valid) > 1)) && + !(*q)->pool_paused ) + { + cpu_clear(cpu, (*q)->cpu_valid); + schedule_cpu_switch(cpu, NULL); + per_cpu(cpupool, cpu) = NULL; + } + else + rc = 1; +out: + spin_unlock(&cpupool_lock); +#endif + return rc; +} + +/* + * do cpupool related domctl operations + */ +int cpupool_do_domctl(struct xen_domctl *op) +{ + int ret; + struct cpupool *c; + + switch ( op->u.cpupool_op.op ) + { + + case XEN_DOMCTL_CPUPOOL_OP_CREATE: + { + int poolid; + struct scheduler *sched; + + poolid = (op->u.cpupool_op.cpupool_id == XEN_DOMCTL_CPUPOOL_PAR_ANY) ? + CPUPOOLID_NONE: op->u.cpupool_op.cpupool_id; + sched = scheduler_get_by_id(op->u.cpupool_op.sched_id); + ret = -ENOENT; + if ( sched == NULL ) + break; + ret = 0; + c = cpupool_create(poolid, sched->opt_name); + if ( c == NULL ) + ret = -EINVAL; + else + op->u.cpupool_op.cpupool_id = c->cpupool_id; + } + break; + + case XEN_DOMCTL_CPUPOOL_OP_DESTROY: + { + spin_lock(&cpupool_lock); + c = cpupool_find_by_id(op->u.cpupool_op.cpupool_id, 1); + spin_unlock(&cpupool_lock); + ret = -ENOENT; + if ( c == NULL ) + break; + ret = (cpupool_destroy(c) != 0) ? -EBUSY : 0; + } + break; + + case XEN_DOMCTL_CPUPOOL_OP_INFO: + { + spin_lock(&cpupool_lock); + c = cpupool_find_by_id(op->u.cpupool_op.cpupool_id, 0); + spin_unlock(&cpupool_lock); + ret = -ENOENT; + if ( c == NULL ) + break; + op->u.cpupool_op.cpupool_id = c->cpupool_id; + op->u.cpupool_op.sched_id = c->sched.sched_id; + op->u.cpupool_op.n_dom = c->n_dom; + cpumask_to_xenctl_cpumap(&(op->u.cpupool_op.cpumap), &(c->cpu_valid)); + ret = 0; + } + break; + + case XEN_DOMCTL_CPUPOOL_OP_ADDCPU: + { + unsigned cpu; + + cpu = op->u.cpupool_op.cpu; + spin_lock(&cpupool_lock); + if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY ) + cpu = first_cpu(cpupool_free_cpus); + ret = -EINVAL; + if ( cpu >= NR_CPUS ) + goto addcpu_out; + ret = -EBUSY; + if ( !cpu_isset(cpu, cpupool_free_cpus) ) + goto addcpu_out; + c = cpupool_find_by_id(op->u.cpupool_op.cpupool_id, 0); + ret = -ENOENT; + if ( c == NULL ) + goto addcpu_out; + cpupool_assign_cpu_locked(c, cpu); + ret = 0; +addcpu_out: + spin_unlock(&cpupool_lock); + } + break; + + case XEN_DOMCTL_CPUPOOL_OP_RMCPU: + { + unsigned cpu; + + spin_lock(&cpupool_lock); + c = cpupool_find_by_id(op->u.cpupool_op.cpupool_id, 0); + spin_unlock(&cpupool_lock); + ret = -ENOENT; + if ( c == NULL ) + break; + cpu = op->u.cpupool_op.cpu; + if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY ) + cpu = last_cpu(c->cpu_valid); + ret = -EINVAL; + if ( cpu >= NR_CPUS ) + break; + /* caution: cpupool_unassign_cpu uses continue_hypercall_on_cpu and + * will continue after the local return + */ + ret = cpupool_unassign_cpu(c, cpu); + } + break; + + case XEN_DOMCTL_CPUPOOL_OP_MOVEDOMAIN: + { + struct domain *d; + + ret = -EINVAL; + if ( op->u.cpupool_op.domid == 0 ) + break; + ret = -ESRCH; + d = rcu_lock_domain_by_id(op->u.cpupool_op.domid); + if ( d == NULL ) + break; + if ( d->cpupool == NULL ) + { + ret = -EINVAL; + rcu_unlock_domain(d); + break; + } + ret = -ENOENT; + spin_lock(&cpupool_lock); + c = cpupool_find_by_id(op->u.cpupool_op.cpupool_id, 1); + if ( (c != NULL) && cpus_weight(c->cpu_valid) && !c->pool_paused ) + { + PRINTD("cpupool move_domain(%d)->%d\n", d->domain_id, + c->cpupool_id); + d->cpupool->n_dom--; + PRINTD("cpupool move_domain(%d), %d.n_dom=%d\n", d->domain_id, + d->cpupool->cpupool_id, d->cpupool->n_dom); + sched_move_domain(d, c); + c->n_dom++; + PRINTD("cpupool move_domain(%d), %d.n_dom=%d\n", d->domain_id, + c->cpupool_id, c->n_dom); + PRINTD("cpupool move_domain(%d)->%d ready\n", d->domain_id, + c->cpupool_id); + ret = 0; + } + spin_unlock(&cpupool_lock); + rcu_unlock_domain(d); + } + break; + + case XEN_DOMCTL_CPUPOOL_OP_FREEINFO: + { + cpumask_to_xenctl_cpumap(&(op->u.cpupool_op.cpumap), + &cpupool_free_cpus); + ret = 0; + } + break; + + default: + ret = -ENOSYS; + + } + + return ret; +} + +void schedule_dump(struct cpupool *c); + +void dump_runq(unsigned char key) +{ + unsigned long flags; + s_time_t now = NOW(); + struct cpupool **c; + + spin_lock(&cpupool_lock); + local_irq_save(flags); + + printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now); + + printk("Idle cpupool:\n"); + schedule_dump(NULL); + + for_each_cpupool(c) + { + printk("Cpupool %d:\n", (*c)->cpupool_id); + schedule_dump(*c); + } + + local_irq_restore(flags); + spin_unlock(&cpupool_lock); +} + +static int __init cpupool_init(void) +{ + cpupool_free_cpus = cpu_online_map; + cpus_clear(cpupool_free_cpus_borrowed); + cpupool_list = NULL; + return 0; +} +__initcall(cpupool_init); + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */

_______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.