[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Patch to allow vcpu hotplugging in domU



ChangeSet 1.1402.1.1, 2005/05/03 15:41:52+01:00, cl349@xxxxxxxxxxxxxxxxxxxx

        Patch to allow vcpu hotplugging in domU
        Signed-off-by: Ryan Harper <ryanh@xxxxxxxxxx>
        Signed-off-by: Christian Limpach <Christian.Limpach@xxxxxxxxxxxx>
        
        From: Ryan Harper
          Two config changes are required to use this change:
            1) CONFIG_SMP=y
            2) CONFIG_HOTPLUG_CPU=y
        
          I've tested unplugging/plugging cpus in domU via the sysfs interface
          that the patch provides.
        
          hungerforce:~# grep processor /proc/cpuinfo
          processor       : 0
          processor       : 1
          processor       : 2
          processor       : 3
          hungerforce:~# cd /sys/devices/system/cpu/
          hungerforce:/sys/devices/system/cpu# echo 0 > cpu3/online
          hungerforce:/sys/devices/system/cpu# grep processor /proc/cpuinfo
          processor       : 0
          processor       : 1
          processor       : 2
          hungerforce:/sys/devices/system/cpu# echo 1 > cpu3/online
          hungerforce:/sys/devices/system/cpu# grep processor /proc/cpuinfo
          processor       : 0
          processor       : 1
          processor       : 2
          processor       : 3
        
          It seems that all processors besides cpu0 can be removed.  I've not 
done
          any investigation nor optimization of the hotplug patch.  I attempted 
to
          unplug cpus in dom0, but this resulted in Xen rebooting.
        
        
        Makefile, Kconfig, smpboot.c, smp.c, process.c, irq.c:
          Fix hotplug cpu support.
        Makefile:
          Setup reach-over build of topology.o.
        i386-cpu-hotplug-updated-for-mm.patch:
          Add i386 hotplug patch from:
          
ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.11-rc5/2.6.11-rc5-mm1/broken-out/i386-cpu-hotplug-updated-for-mm.patch
        i386-cpu-hotplug-updated-for-mm.patch, Makefile:
          new file
        traps.c, smpboot.c, smp.c, process.c, irq.c, Kconfig:
          Merge changes from hotplug patch.



 linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig               |   10 
 linux-2.6.11-xen-sparse/arch/xen/i386/Makefile              |    1 
 linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c          |   65 +
 linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c      |   34 +
 linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c          |   24 
 linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c      |  101 ++-
 linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c        |    8 
 linux-2.6.11-xen-sparse/arch/xen/i386/mach-default/Makefile |   12 
 patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch  |  398 ++++++++++++
 9 files changed, 627 insertions(+), 26 deletions(-)


diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig 
b/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig     2005-05-10 11:04:53 
-04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig     2005-05-10 11:04:53 
-04:00
@@ -668,6 +668,16 @@
        depends on (X86_VISWS || SMP) && !X86_VOYAGER
        default n
 
+config HOTPLUG_CPU
+       bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
+       depends on SMP && HOTPLUG && EXPERIMENTAL
+       ---help---
+         Say Y here to experiment with turning CPUs off and on.  CPUs
+         can be controlled through /sys/devices/system/cpu.
+
+         Say N.
+
+
 if XEN_PHYSDEV_ACCESS
 
 menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile 
b/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile    2005-05-10 11:04:53 
-04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile    2005-05-10 11:04:53 
-04:00
@@ -72,6 +72,7 @@
 libs-y                                         += arch/i386/lib/
 core-y                                 += arch/xen/i386/kernel/ \
                                           arch/xen/i386/mm/ \
+                                          arch/xen/i386/mach-default/ \
                                           arch/i386/crypto/
 # \
 #                                         arch/xen/$(mcore-y)/
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c 
b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c        2005-05-10 
11:04:53 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c        2005-05-10 
11:04:53 -04:00
@@ -15,6 +15,9 @@
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
 
 #ifndef CONFIG_X86_LOCAL_APIC
 /*
@@ -207,9 +210,8 @@
 
        if (i == 0) {
                seq_printf(p, "           ");
-               for (j=0; j<NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "CPU%d       ",j);
+               for_each_cpu(j)
+                       seq_printf(p, "CPU%d       ",j);
                seq_putc(p, '\n');
        }
 
@@ -222,9 +224,8 @@
 #ifndef CONFIG_SMP
                seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+               for_each_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
                seq_printf(p, " %14s", irq_desc[i].handler->typename);
                seq_printf(p, "  %s", action->name);
@@ -237,16 +238,13 @@
                spin_unlock_irqrestore(&irq_desc[i].lock, flags);
        } else if (i == NR_IRQS) {
                seq_printf(p, "NMI: ");
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", nmi_count(j));
+               for_each_cpu(j)
+                       seq_printf(p, "%10u ", nmi_count(j));
                seq_putc(p, '\n');
 #ifdef CONFIG_X86_LOCAL_APIC
                seq_printf(p, "LOC: ");
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ",
-                                       irq_stat[j].apic_timer_irqs);
+               for_each_cpu(j)
+                       seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs);
                seq_putc(p, '\n');
 #endif
                seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
@@ -256,3 +254,44 @@
        }
        return 0;
 }
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void fixup_irqs(cpumask_t map)
+{
+       unsigned int irq;
+       static int warned;
+
+       for (irq = 0; irq < NR_IRQS; irq++) {
+               cpumask_t mask;
+               if (irq == 2)
+                       continue;
+
+               cpus_and(mask, irq_affinity[irq], map);
+               if (any_online_cpu(mask) == NR_CPUS) {
+                       printk("Breaking affinity for irq %i\n", irq);
+                       mask = map;
+               }
+               if (irq_desc[irq].handler->set_affinity)
+                       irq_desc[irq].handler->set_affinity(irq, mask);
+               else if (irq_desc[irq].action && !(warned++))
+                       printk("Cannot set affinity for irq %i\n", irq);
+       }
+
+#if 0
+       barrier();
+       /* Ingo Molnar says: "after the IO-APIC masks have been redirected
+          [note the nop - the interrupt-enable boundary on x86 is two
+          instructions from sti] - to flush out pending hardirqs and
+          IPIs. After this point nothing is supposed to reach this CPU." */
+       __asm__ __volatile__("sti; nop; cli");
+       barrier();
+#else
+       /* That doesn't seem sufficient.  Give it 1ms. */
+       local_irq_enable();
+       mdelay(1);
+       local_irq_disable();
+#endif
+}
+#endif
+
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c 
b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c    2005-05-10 
11:04:53 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c    2005-05-10 
11:04:53 -04:00
@@ -13,6 +13,7 @@
 
 #include <stdarg.h>
 
+#include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/fs.h>
@@ -54,6 +55,9 @@
 #include <linux/irq.h>
 #include <linux/err.h>
 
+#include <asm/tlbflush.h>
+#include <asm/cpu.h>
+
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
 int hlt_counter;
@@ -112,6 +116,33 @@
        }
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+#include <asm/nmi.h>
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+       /* Ack it */
+       __get_cpu_var(cpu_state) = CPU_DEAD;
+
+       /* We shouldn't have to disable interrupts while dead, but
+        * some interrupts just don't seem to go away, and this makes
+        * it "work" for testing purposes. */
+       /* Death loop */
+       while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
+               HYPERVISOR_yield();
+
+       local_irq_disable();
+       __flush_tlb_all();
+       cpu_set(smp_processor_id(), cpu_online_map);
+       local_irq_enable();
+}
+#else
+static inline void play_dead(void)
+{
+       BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
 /*
  * The idle thread. There's no useful work to be
  * done, so just try to conserve power and have a
@@ -129,6 +160,9 @@
                        if (cpu_isset(cpu, cpu_idle_map))
                                cpu_clear(cpu, cpu_idle_map);
                        rmb();
+
+                       if (cpu_is_offline(cpu))
+                               play_dead();
 
                        irq_stat[cpu].idle_timestamp = jiffies;
                        xen_idle();
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c 
b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c        2005-05-10 
11:04:53 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c        2005-05-10 
11:04:53 -04:00
@@ -19,6 +19,7 @@
 #include <linux/mc146818rtc.h>
 #include <linux/cache.h>
 #include <linux/interrupt.h>
+#include <linux/cpu.h>
 
 #include <asm/mtrr.h>
 #include <asm/tlbflush.h>
@@ -185,6 +186,7 @@
        unsigned int cpu;
 
        local_irq_save(flags);
+       WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]);
 
        for (cpu = 0; cpu < NR_CPUS; ++cpu) {
                if (cpu_isset(cpu, mask)) {
@@ -320,21 +322,21 @@
 static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
                                                unsigned long va)
 {
-       cpumask_t tmp;
        /*
         * A couple of (to be removed) sanity checks:
         *
-        * - we do not send IPIs to not-yet booted CPUs.
         * - current CPU must not be in mask
         * - mask must exist :)
         */
        BUG_ON(cpus_empty(cpumask));
-
-       cpus_and(tmp, cpumask, cpu_online_map);
-       BUG_ON(!cpus_equal(cpumask, tmp));
        BUG_ON(cpu_isset(smp_processor_id(), cpumask));
        BUG_ON(!mm);
 
+       /* If a CPU which we ran on has gone down, OK. */
+       cpus_and(cpumask, cpumask, cpu_online_map);
+       if (cpus_empty(cpumask))
+               return;
+
        /*
         * i'm not happy about this global shared spinlock in the
         * MM hot path, but we'll see how contended it is.
@@ -465,6 +467,7 @@
  */
 void smp_send_reschedule(int cpu)
 {
+       WARN_ON(cpu_is_offline(cpu));

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.