[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86: Support CPU hotplug offline.



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1221141982 -3600
# Node ID 4ffc70556000869d3c301452a99e4e524dd54b07
# Parent  fba8dca321c2b99842af6624f24afb77c472184b
x86: Support CPU hotplug offline.

Signed-off-by: Shan Haitao <haitao.shan@xxxxxxxxx>
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
 xen/arch/x86/irq.c        |   23 +++++++++++++++++++++--
 xen/arch/x86/smpboot.c    |   40 +++++++++-------------------------------
 xen/common/sched_credit.c |    5 +++++
 xen/common/schedule.c     |   42 ++++++++++++++++++++++++++++++++++++++++++
 xen/include/xen/sched.h   |    1 +
 5 files changed, 78 insertions(+), 33 deletions(-)

diff -r fba8dca321c2 -r 4ffc70556000 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Thu Sep 11 11:58:08 2008 +0100
+++ b/xen/arch/x86/irq.c        Thu Sep 11 15:06:22 2008 +0100
@@ -737,9 +737,12 @@ __initcall(setup_dump_irqs);
 
 void fixup_irqs(cpumask_t map)
 {
-    unsigned int irq;
+    unsigned int irq, sp;
     static int warned;
-
+    irq_guest_action_t *action;
+    struct pending_eoi *peoi;
+
+    /* Direct all future interrupts away from this CPU. */
     for ( irq = 0; irq < NR_IRQS; irq++ )
     {
         cpumask_t mask;
@@ -758,8 +761,24 @@ void fixup_irqs(cpumask_t map)
             printk("Cannot set affinity for irq %i\n", irq);
     }
 
+    /* Service any interrupts that beat us in the re-direction race. */
     local_irq_enable();
     mdelay(1);
     local_irq_disable();
+
+    /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
+    for ( irq = 0; irq < NR_IRQS; irq++ )
+    {
+        if ( !(irq_desc[irq].status & IRQ_GUEST) )
+            continue;
+        action = (irq_guest_action_t *)irq_desc[irq].action;
+        cpu_clear(smp_processor_id(), action->cpu_eoi_map);
+    }
+
+    /* Flush the interrupt EOI stack. */
+    peoi = this_cpu(pending_eoi);
+    for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )
+        peoi[sp].ready = 1;
+    flush_ready_eoi(NULL);
 }
 #endif
diff -r fba8dca321c2 -r 4ffc70556000 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Thu Sep 11 11:58:08 2008 +0100
+++ b/xen/arch/x86/smpboot.c    Thu Sep 11 15:06:22 2008 +0100
@@ -1225,15 +1225,6 @@ int __cpu_disable(void)
        if (cpu == 0)
                return -EBUSY;
 
-       /*
-        * Only S3 is using this path, and thus idle vcpus are running on all
-        * APs when we are called. To support full cpu hotplug, other 
-        * notification mechanisms should be introduced (e.g., migrate vcpus
-        * off this physical cpu before rendezvous point).
-        */
-       if (!is_idle_vcpu(current))
-               return -EINVAL;
-
        local_irq_disable();
        clear_local_APIC();
        /* Allow any queued timer interrupts to get serviced */
@@ -1249,6 +1240,9 @@ int __cpu_disable(void)
        fixup_irqs(map);
        /* It's now safe to remove this processor from the online map */
        cpu_clear(cpu, cpu_online_map);
+
+       cpu_disable_scheduler();
+
        return 0;
 }
 
@@ -1275,28 +1269,6 @@ static int take_cpu_down(void *unused)
     return __cpu_disable();
 }
 
-/* 
- * XXX: One important thing missed here is to migrate vcpus
- * from dead cpu to other online ones and then put whole
- * system into a stop state. It assures a safe environment
- * for a cpu hotplug/remove at normal running state.
- *
- * However for xen PM case, at this point:
- *     -> All other domains should be notified with PM event,
- *        and then in following states:
- *             * Suspend state, or
- *             * Paused state, which is a force step to all
- *               domains if they do nothing to suspend
- *     -> All vcpus of dom0 (except vcpu0) have already beem
- *        hot removed
- * with the net effect that all other cpus only have idle vcpu
- * running. In this special case, we can avoid vcpu migration
- * then and system can be considered in a stop state.
- *
- * So current cpu hotplug is a special version for PM specific
- * usage, and need more effort later for full cpu hotplug.
- * (ktian1)
- */
 int cpu_down(unsigned int cpu)
 {
        int err = 0;
@@ -1304,6 +1276,12 @@ int cpu_down(unsigned int cpu)
        spin_lock(&cpu_add_remove_lock);
        if (num_online_cpus() == 1) {
                err = -EBUSY;
+               goto out;
+       }
+
+       /* Can not offline BSP */
+       if (cpu == 0) {
+               err = -EINVAL;
                goto out;
        }
 
diff -r fba8dca321c2 -r 4ffc70556000 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Thu Sep 11 11:58:08 2008 +0100
+++ b/xen/common/sched_credit.c Thu Sep 11 15:06:22 2008 +0100
@@ -1107,6 +1107,10 @@ csched_load_balance(int cpu, struct csch
 
     BUG_ON( cpu != snext->vcpu->processor );
 
+    /* If this CPU is going offline we shouldn't steal work. */
+    if ( unlikely(!cpu_online(cpu)) )
+        goto out;
+
     if ( snext->pri == CSCHED_PRI_IDLE )
         CSCHED_STAT_CRANK(load_balance_idle);
     else if ( snext->pri == CSCHED_PRI_TS_OVER )
@@ -1149,6 +1153,7 @@ csched_load_balance(int cpu, struct csch
             return speer;
     }
 
+ out:
     /* Failed to find more important work elsewhere... */
     __runq_remove(snext);
     return snext;
diff -r fba8dca321c2 -r 4ffc70556000 xen/common/schedule.c
--- a/xen/common/schedule.c     Thu Sep 11 11:58:08 2008 +0100
+++ b/xen/common/schedule.c     Thu Sep 11 15:06:22 2008 +0100
@@ -288,6 +288,48 @@ void vcpu_force_reschedule(struct vcpu *
     }
 }
 
+/*
+ * This function is used by cpu_hotplug code from stop_machine context.
+ * Hence we can avoid needing to take the 
+ */
+void cpu_disable_scheduler(void)
+{
+    struct domain *d;
+    struct vcpu *v;
+    unsigned int cpu = smp_processor_id();
+
+    for_each_domain ( d )
+    {
+        for_each_vcpu ( d, v )
+        {
+            if ( is_idle_vcpu(v) )
+                continue;
+
+            if ( (cpus_weight(v->cpu_affinity) == 1) &&
+                 cpu_isset(cpu, v->cpu_affinity) )
+            {
+                printk("Breaking vcpu affinity for domain %d vcpu %d\n",
+                        v->domain->domain_id, v->vcpu_id);
+                cpus_setall(v->cpu_affinity);
+            }
+
+            /*
+             * Migrate single-shot timers to CPU0. A new cpu will automatically
+             * be chosen when the timer is next re-set.
+             */
+            if ( v->singleshot_timer.cpu == cpu )
+                migrate_timer(&v->singleshot_timer, 0);
+
+            if ( v->processor == cpu )
+            {
+                set_bit(_VPF_migrating, &v->pause_flags);
+                vcpu_sleep_nosync(v);
+                vcpu_migrate(v);
+            }
+        }
+    }
+}
+
 static int __vcpu_set_affinity(
     struct vcpu *v, cpumask_t *affinity,
     bool_t old_lock_status, bool_t new_lock_status)
diff -r fba8dca321c2 -r 4ffc70556000 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Thu Sep 11 11:58:08 2008 +0100
+++ b/xen/include/xen/sched.h   Thu Sep 11 15:06:22 2008 +0100
@@ -524,6 +524,7 @@ void cpu_init(void);
 void cpu_init(void);
 
 void vcpu_force_reschedule(struct vcpu *v);
+void cpu_disable_scheduler(void);
 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
 int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
 void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.