[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [XEN] Separate domain creation from vcpu creation.



# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Node ID 462d6e4cb29a620685f7c382a2372edcc99e2e4a
# Parent  222b492cc0635e9f6ebf2a60da2b8398821e4c5f
[XEN] Separate domain creation from vcpu creation.
Creating a domain no longer creates vcpu0 -- that is now
done later.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 xen/arch/ia64/xen/xensetup.c |   10 ++--
 xen/arch/x86/domain.c        |    2 
 xen/arch/x86/setup.c         |    9 ++-
 xen/arch/x86/traps.c         |    7 +--
 xen/common/dom0_ops.c        |  100 ++++++++++++++++++++++---------------------
 xen/common/domain.c          |   98 +++++++++++++++++++++++++-----------------
 xen/common/event_channel.c   |    7 ++-
 xen/common/sched_sedf.c      |    2 
 xen/drivers/char/console.c   |    2 
 xen/include/xen/sched.h      |   25 +++++++---
 10 files changed, 154 insertions(+), 108 deletions(-)

diff -r 222b492cc063 -r 462d6e4cb29a xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c      Wed Jul 05 11:31:33 2006 +0100
+++ b/xen/arch/ia64/xen/xensetup.c      Wed Jul 05 14:27:27 2006 +0100
@@ -425,8 +425,9 @@ void start_kernel(void)
 
     scheduler_init();
     idle_vcpu[0] = (struct vcpu*) ia64_r13;
-    idle_domain = domain_create(IDLE_DOMAIN_ID, 0);
-    BUG_ON(idle_domain == NULL);
+    idle_domain = domain_create(IDLE_DOMAIN_ID);
+    if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) )
+        BUG();
 
     late_setup_arch(&cmdline);
     alloc_dom_xen_and_dom_io();
@@ -503,9 +504,8 @@ printk("num_online_cpus=%d, max_cpus=%d\
     }
 
     /* Create initial domain 0. */
-    dom0 = domain_create(0, 0);
-
-    if ( dom0 == NULL )
+    dom0 = domain_create(0);
+    if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) )
         panic("Error creating domain 0\n");
 
     set_bit(_DOMF_privileged, &dom0->domain_flags);
diff -r 222b492cc063 -r 462d6e4cb29a xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Wed Jul 05 11:31:33 2006 +0100
+++ b/xen/arch/x86/domain.c     Wed Jul 05 14:27:27 2006 +0100
@@ -951,7 +951,7 @@ void domain_relinquish_resources(struct 
         }
     }
 
-    if ( hvm_guest(d->vcpu[0]) )
+    if ( d->vcpu[0] && hvm_guest(d->vcpu[0]) )
         hvm_relinquish_guest_resources(d);
 
     shadow_mode_disable(d);
diff -r 222b492cc063 -r 462d6e4cb29a xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Wed Jul 05 11:31:33 2006 +0100
+++ b/xen/arch/x86/setup.c      Wed Jul 05 14:27:27 2006 +0100
@@ -439,8 +439,9 @@ void __init __start_xen(multiboot_info_t
 
     scheduler_init();
 
-    idle_domain = domain_create(IDLE_DOMAIN_ID, 0);
-    BUG_ON(idle_domain == NULL);
+    idle_domain = domain_create(IDLE_DOMAIN_ID);
+    if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) )
+        BUG();
 
     set_current(idle_domain->vcpu[0]);
     this_cpu(curr_vcpu) = idle_domain->vcpu[0];
@@ -537,8 +538,8 @@ void __init __start_xen(multiboot_info_t
     acm_init(&initrdidx, mbi, initial_images_start);
 
     /* Create initial domain 0. */
-    dom0 = domain_create(0, 0);
-    if ( dom0 == NULL )
+    dom0 = domain_create(0);
+    if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) )
         panic("Error creating domain 0\n");
 
     set_bit(_DOMF_privileged, &dom0->domain_flags);
diff -r 222b492cc063 -r 462d6e4cb29a xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Wed Jul 05 11:31:33 2006 +0100
+++ b/xen/arch/x86/traps.c      Wed Jul 05 14:27:27 2006 +0100
@@ -1397,13 +1397,14 @@ static void nmi_dom0_report(unsigned int
 static void nmi_dom0_report(unsigned int reason_idx)
 {
     struct domain *d;
-
-    if ( (d = dom0) == NULL )
+    struct vcpu   *v;
+
+    if ( ((d = dom0) == NULL) || ((v = d->vcpu[0]) == NULL) )
         return;
 
     set_bit(reason_idx, &d->shared_info->arch.nmi_reason);
 
-    if ( test_and_set_bit(_VCPUF_nmi_pending, &d->vcpu[0]->vcpu_flags) )
+    if ( test_and_set_bit(_VCPUF_nmi_pending, &v->vcpu_flags) )
         raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */
 }
 
diff -r 222b492cc063 -r 462d6e4cb29a xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c     Wed Jul 05 11:31:33 2006 +0100
+++ b/xen/common/dom0_ops.c     Wed Jul 05 14:27:27 2006 +0100
@@ -90,6 +90,44 @@ static void getdomaininfo(struct domain 
     memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
 }
 
+static unsigned int default_vcpu0_location(void)
+{
+    struct domain *d;
+    struct vcpu   *v;
+    unsigned int   i, cpu, cnt[NR_CPUS] = { 0 };
+    cpumask_t      cpu_exclude_map;
+
+    /* Do an initial CPU placement. Pick the least-populated CPU. */
+    read_lock(&domlist_lock);
+    for_each_domain ( d )
+        for_each_vcpu ( d, v )
+        if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
+            cnt[v->processor]++;
+    read_unlock(&domlist_lock);
+
+    /*
+     * If we're on a HT system, we only auto-allocate to a non-primary HT. We 
+     * favour high numbered CPUs in the event of a tie.
+     */
+    cpu = first_cpu(cpu_sibling_map[0]);
+    if ( cpus_weight(cpu_sibling_map[0]) > 1 )
+        cpu = next_cpu(cpu, cpu_sibling_map[0]);
+    cpu_exclude_map = cpu_sibling_map[0];
+    for_each_online_cpu ( i )
+    {
+        if ( cpu_isset(i, cpu_exclude_map) )
+            continue;
+        if ( (i == first_cpu(cpu_sibling_map[i])) &&
+             (cpus_weight(cpu_sibling_map[i]) > 1) )
+            continue;
+        cpus_or(cpu_exclude_map, cpu_exclude_map, cpu_sibling_map[i]);
+        if ( cnt[i] <= cnt[cpu] )
+            cpu = i;
+    }
+
+    return cpu;
+}
+
 long do_dom0_op(XEN_GUEST_HANDLE(dom0_op_t) u_dom0_op)
 {
     long ret = 0;
@@ -150,7 +188,7 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
         if ( d != NULL )
         {
             ret = -EINVAL;
-            if ( (d != current->domain) && 
+            if ( (d != current->domain) && (d->vcpu[0] != NULL) &&
                  test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) )
             {
                 domain_unpause_by_systemcontroller(d);
@@ -164,11 +202,7 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
     case DOM0_CREATEDOMAIN:
     {
         struct domain *d;
-        unsigned int   pro;
         domid_t        dom;
-        struct vcpu   *v;
-        unsigned int   i, cnt[NR_CPUS] = { 0 };
-        cpumask_t      cpu_exclude_map;
         static domid_t rover = 0;
 
         /*
@@ -202,36 +236,8 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
             rover = dom;
         }
 
-        /* Do an initial CPU placement. Pick the least-populated CPU. */
-        read_lock(&domlist_lock);
-        for_each_domain ( d )
-            for_each_vcpu ( d, v )
-                if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
-                    cnt[v->processor]++;
-        read_unlock(&domlist_lock);
-        
-        /*
-         * If we're on a HT system, we only auto-allocate to a non-primary HT.
-         * We favour high numbered CPUs in the event of a tie.
-         */
-        pro = first_cpu(cpu_sibling_map[0]);
-        if ( cpus_weight(cpu_sibling_map[0]) > 1 )
-            pro = next_cpu(pro, cpu_sibling_map[0]);
-        cpu_exclude_map = cpu_sibling_map[0];
-        for_each_online_cpu ( i )
-        {
-            if ( cpu_isset(i, cpu_exclude_map) )
-                continue;
-            if ( (i == first_cpu(cpu_sibling_map[i])) &&
-                 (cpus_weight(cpu_sibling_map[i]) > 1) )
-                continue;
-            cpus_or(cpu_exclude_map, cpu_exclude_map, cpu_sibling_map[i]);
-            if ( cnt[i] <= cnt[pro] )
-                pro = i;
-        }
-
         ret = -ENOMEM;
-        if ( (d = domain_create(dom, pro)) == NULL )
+        if ( (d = domain_create(dom)) == NULL )
             break;
 
         memcpy(d->handle, op->u.createdomain.handle,
@@ -258,14 +264,8 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
         if ( (d = find_domain_by_id(op->u.max_vcpus.domain)) == NULL )
             break;
 
-        /*
-         * Can only create new VCPUs while the domain is not fully constructed
-         * (and hence not runnable). Xen needs auditing for races before
-         * removing this check.
-         */
-        ret = -EINVAL;
-        if ( test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) )
-            goto maxvcpu_out;
+        /* Needed, for example, to ensure writable p.t. state is synced. */
+        domain_pause(d);
 
         /* We cannot reduce maximum VCPUs. */
         ret = -EINVAL;
@@ -275,17 +275,21 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
         ret = -ENOMEM;
         for ( i = 0; i < max; i++ )
         {
-            if ( d->vcpu[i] == NULL )
-            {
-                cpu = (d->vcpu[i-1]->processor + 1) % num_online_cpus();
-                if ( alloc_vcpu(d, i, cpu) == NULL )
-                    goto maxvcpu_out;
-            }
+            if ( d->vcpu[i] != NULL )
+                continue;
+
+            cpu = (i == 0) ?
+                default_vcpu0_location() :
+                (d->vcpu[i-1]->processor + 1) % num_online_cpus();
+
+            if ( alloc_vcpu(d, i, cpu) == NULL )
+                goto maxvcpu_out;
         }
 
         ret = 0;
 
     maxvcpu_out:
+        domain_unpause(d);
         put_domain(d);
     }
     break;
diff -r 222b492cc063 -r 462d6e4cb29a xen/common/domain.c
--- a/xen/common/domain.c       Wed Jul 05 11:31:33 2006 +0100
+++ b/xen/common/domain.c       Wed Jul 05 14:27:27 2006 +0100
@@ -46,6 +46,7 @@ struct domain *alloc_domain(domid_t domi
     atomic_set(&d->refcnt, 1);
     spin_lock_init(&d->big_lock);
     spin_lock_init(&d->page_alloc_lock);
+    spin_lock_init(&d->pause_lock);
     INIT_LIST_HEAD(&d->page_list);
     INIT_LIST_HEAD(&d->xenpage_list);
 
@@ -81,8 +82,8 @@ struct vcpu *alloc_vcpu(
     v->domain = d;
     v->vcpu_id = vcpu_id;
     v->processor = cpu_id;
-    atomic_set(&v->pausecnt, 0);
     v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id];
+    spin_lock_init(&v->pause_lock);
 
     v->cpu_affinity = is_idle_domain(d) ?
         cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
@@ -110,30 +111,22 @@ struct vcpu *alloc_idle_vcpu(unsigned in
 {
     struct domain *d;
     struct vcpu *v;
-    unsigned int vcpu_id;
-
-    if ((vcpu_id = cpu_id % MAX_VIRT_CPUS) == 0)
-    {
-        d = domain_create(IDLE_DOMAIN_ID, cpu_id);
-        BUG_ON(d == NULL);
-        v = d->vcpu[0];
-    }
-    else
-    {
-        d = idle_vcpu[cpu_id - vcpu_id]->domain;
-        BUG_ON(d == NULL);
-        v = alloc_vcpu(d, vcpu_id, cpu_id);
-    }
-
+    unsigned int vcpu_id = cpu_id % MAX_VIRT_CPUS;
+
+    d = (vcpu_id == 0) ?
+        domain_create(IDLE_DOMAIN_ID) :
+        idle_vcpu[cpu_id - vcpu_id]->domain;
+    BUG_ON(d == NULL);
+
+    v = alloc_vcpu(d, vcpu_id, cpu_id);
     idle_vcpu[cpu_id] = v;
 
     return v;
 }
 
-struct domain *domain_create(domid_t domid, unsigned int cpu)
+struct domain *domain_create(domid_t domid)
 {
     struct domain *d, **pd;
-    struct vcpu *v;
 
     if ( (d = alloc_domain(domid)) == NULL )
         return NULL;
@@ -152,13 +145,10 @@ struct domain *domain_create(domid_t dom
     if ( arch_domain_create(d) != 0 )
         goto fail3;
 
-    if ( (v = alloc_vcpu(d, 0, cpu)) == NULL )
-        goto fail4;
-
     d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
     d->irq_caps   = rangeset_new(d, "Interrupts", 0);
     if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
-        goto fail4; /* NB. alloc_vcpu() is undone in free_domain() */
+        goto fail4;
 
     if ( !is_idle_domain(d) )
     {
@@ -327,11 +317,12 @@ void domain_shutdown(struct domain *d, u
     d->shutdown_code = reason;
 
     /* Put every vcpu to sleep, but don't wait (avoids inter-vcpu deadlock). */
+    spin_lock(&d->pause_lock);
+    d->pause_count++;
+    set_bit(_DOMF_paused, &d->domain_flags);
+    spin_unlock(&d->pause_lock);
     for_each_vcpu ( d, v )
-    {
-        atomic_inc(&v->pausecnt);
         vcpu_sleep_nosync(v);
-    }
 
     get_knownalive_domain(d);
     domain_shuttingdown[smp_processor_id()] = d;
@@ -398,34 +389,65 @@ void domain_destroy(struct domain *d)
 
 void vcpu_pause(struct vcpu *v)
 {
-    BUG_ON(v == current);
-    atomic_inc(&v->pausecnt);
+    ASSERT(v != current);
+
+    spin_lock(&v->pause_lock);
+    if ( v->pause_count++ == 0 )
+        set_bit(_VCPUF_paused, &v->vcpu_flags);
+    spin_unlock(&v->pause_lock);
+
     vcpu_sleep_sync(v);
 }
 
+void vcpu_unpause(struct vcpu *v)
+{
+    int wake;
+
+    ASSERT(v != current);
+
+    spin_lock(&v->pause_lock);
+    wake = (--v->pause_count == 0);
+    if ( wake )
+        clear_bit(_VCPUF_paused, &v->vcpu_flags);
+    spin_unlock(&v->pause_lock);
+
+    if ( wake )
+        vcpu_wake(v);
+}
+
 void domain_pause(struct domain *d)
 {
     struct vcpu *v;
 
+    ASSERT(d != current->domain);
+
+    spin_lock(&d->pause_lock);
+    if ( d->pause_count++ == 0 )
+        set_bit(_DOMF_paused, &d->domain_flags);
+    spin_unlock(&d->pause_lock);
+
     for_each_vcpu( d, v )
-        vcpu_pause(v);
+        vcpu_sleep_sync(v);
 
     sync_pagetable_state(d);
 }
 
-void vcpu_unpause(struct vcpu *v)
-{
-    BUG_ON(v == current);
-    if ( atomic_dec_and_test(&v->pausecnt) )
-        vcpu_wake(v);
-}
-
 void domain_unpause(struct domain *d)
 {
     struct vcpu *v;
-
-    for_each_vcpu( d, v )
-        vcpu_unpause(v);
+    int wake;
+
+    ASSERT(d != current->domain);
+
+    spin_lock(&d->pause_lock);
+    wake = (--d->pause_count == 0);
+    if ( wake )
+        clear_bit(_DOMF_paused, &d->domain_flags);
+    spin_unlock(&d->pause_lock);
+
+    if ( wake )
+        for_each_vcpu( d, v )
+            vcpu_wake(v);
 }
 
 void domain_pause_by_systemcontroller(struct domain *d)
diff -r 222b492cc063 -r 462d6e4cb29a xen/common/event_channel.c
--- a/xen/common/event_channel.c        Wed Jul 05 11:31:33 2006 +0100
+++ b/xen/common/event_channel.c        Wed Jul 05 14:27:27 2006 +0100
@@ -525,11 +525,16 @@ void send_guest_global_virq(struct domai
 void send_guest_global_virq(struct domain *d, int virq)
 {
     int port;
+    struct vcpu *v;
     struct evtchn *chn;
 
     ASSERT(virq_is_global(virq));
 
-    port = d->vcpu[0]->virq_to_evtchn[virq];
+    v = d->vcpu[0];
+    if ( unlikely(v == NULL) )
+        return;
+
+    port = v->virq_to_evtchn[virq];
     if ( unlikely(port == 0) )
         return;
 
diff -r 222b492cc063 -r 462d6e4cb29a xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Wed Jul 05 11:31:33 2006 +0100
+++ b/xen/common/sched_sedf.c   Wed Jul 05 14:27:27 2006 +0100
@@ -1429,6 +1429,8 @@ static int sedf_adjdom(struct domain *p,
     }
     else if ( cmd->direction == SCHED_INFO_GET )
     {
+        if ( p->vcpu[0] == NULL )
+            return -EINVAL;
         cmd->u.sedf.period    = EDOM_INFO(p->vcpu[0])->period;
         cmd->u.sedf.slice     = EDOM_INFO(p->vcpu[0])->slice;
         cmd->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE;
diff -r 222b492cc063 -r 462d6e4cb29a xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Wed Jul 05 11:31:33 2006 +0100
+++ b/xen/drivers/char/console.c        Wed Jul 05 14:27:27 2006 +0100
@@ -279,7 +279,7 @@ static void switch_serial_input(void)
 {
     static char *input_str[2] = { "DOM0", "Xen" };
     xen_rx = !xen_rx;
-    if ( SWITCH_CODE != 0 )
+    if ( (SWITCH_CODE != 0) && (dom0 != NULL) )
     {
         printk("*** Serial input -> %s "
                "(type 'CTRL-%c' three times to switch input to %s).\n",
diff -r 222b492cc063 -r 462d6e4cb29a xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Wed Jul 05 11:31:33 2006 +0100
+++ b/xen/include/xen/sched.h   Wed Jul 05 14:27:27 2006 +0100
@@ -78,9 +78,10 @@ struct vcpu
 
     unsigned long    vcpu_flags;
 
+    spinlock_t       pause_lock;
+    unsigned int     pause_count;
+
     u16              virq_to_evtchn[NR_VIRQS];
-
-    atomic_t         pausecnt;
 
     /* Bitmask of CPUs on which this VCPU may run. */
     cpumask_t        cpu_affinity;
@@ -141,6 +142,10 @@ struct domain
     struct rangeset *irq_caps;
 
     unsigned long    domain_flags;
+
+    spinlock_t       pause_lock;
+    unsigned int     pause_count;
+
     unsigned long    vm_assist;
 
     atomic_t         refcnt;
@@ -220,8 +225,7 @@ static inline void get_knownalive_domain
     ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
 }
 
-extern struct domain *domain_create(
-    domid_t domid, unsigned int cpu);
+extern struct domain *domain_create(domid_t domid);
 extern int construct_dom0(
     struct domain *d,
     unsigned long image_start, unsigned long image_len, 
@@ -368,6 +372,9 @@ extern struct domain *domain_list;
  /* VCPU is polling a set of event channels (SCHEDOP_poll). */
 #define _VCPUF_polling         10
 #define VCPUF_polling          (1UL<<_VCPUF_polling)
+ /* VCPU is paused by the hypervisor? */
+#define _VCPUF_paused          11
+#define VCPUF_paused           (1UL<<_VCPUF_paused)
 
 /*
  * Per-domain flags (domain_flags).
@@ -390,12 +397,16 @@ extern struct domain *domain_list;
  /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
 #define _DOMF_polling          5
 #define DOMF_polling           (1UL<<_DOMF_polling)
+ /* Domain is paused by the hypervisor? */
+#define _DOMF_paused           6
+#define DOMF_paused            (1UL<<_DOMF_paused)
 
 static inline int vcpu_runnable(struct vcpu *v)
 {
-    return ( (atomic_read(&v->pausecnt) == 0) &&
-             !(v->vcpu_flags & (VCPUF_blocked|VCPUF_down)) &&
-             !(v->domain->domain_flags & (DOMF_shutdown|DOMF_ctrl_pause)) );
+    return ( !(v->vcpu_flags &
+               (VCPUF_blocked|VCPUF_down|VCPUF_paused)) &&
+             !(v->domain->domain_flags &
+               (DOMF_shutdown|DOMF_ctrl_pause|DOMF_paused)) );
 }
 
 void vcpu_pause(struct vcpu *v);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.