[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] Rendezvous selected cpus


  • To: "Tian, Kevin" <kevin.tian@xxxxxxxxx>, <xen-devel@xxxxxxxxxxxxxxxxxxx>
  • From: Keir Fraser <Keir.Fraser@xxxxxxxxxxxx>
  • Date: Sun, 03 Feb 2008 17:44:24 +0000
  • Delivery-date: Sun, 03 Feb 2008 09:44:23 -0800
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>
  • Thread-index: Achlcy9d/rIcXbNERJmDq57BBZNC3QBGTqlD
  • Thread-topic: [Xen-devel] [PATCH] Rendezvous selected cpus

This is a good thing to have, and my main comments are stylistic. Firstly, I
think the name and interface change compared with Linux's stop_machine
mechanism is gratuitous. I would prefer us to have the same interface (and
also probably use the same or similar names throughout the implementation,
where appropriate). I don't think the more flexible interface expressed here
is useful -- certainly finding an application for rendezvousing a set of
CPUs and running a function on a subset of those, with optional irq disable,
is a bit of a brain bender. :-) So, stick to stop_machine interface,
semantics, and naming....

Also, I'd prefer this to be implemented in common/stop_machine.c if at all
possible. It's not really x86 specific. Certainly I do not want it in smp.c,
as that file is full enough already of random cruft with no other home.

Oh, also I think you are missing local_irq_disable() on the CPU that calls
on_rendezvous_cpus(). Like the Linxu implementation you should probably do
it at the same time you signal other CPUs to do so.

Apart from that it's a good idea, and I'll look more closely at how you tie
it in to CPU hotplug when you resubmit it.

 Thanks,
 Keir

On 2/2/08 08:11, "Tian, Kevin" <kevin.tian@xxxxxxxxx> wrote:

> Rendezvous selected cpus in softirq
> 
> This is similar to stop_machine_run stub from Linux, to pull
> selected cpus in rendezvous point and the do some batch work
> under a safe environment. Current one usage is from S3 path,
> where individual cpu is pulled down with related online
> footprints being cleared. It's dangerous to have other cpus
> checking clobbered data structure in the middle, such as
> cpu_online_map, cpu_sibling_map, etc.
> 
> Signed-off-by Kevin Tian <kevin.tian@xxxxxxxxx>
> 
> diff -r 667d1a73d2ca xen/arch/x86/domain.c
> --- a/xen/arch/x86/domain.c Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/arch/x86/domain.c Sat Feb 02 14:11:45 2008 +0800
> @@ -82,7 +82,6 @@ static void default_idle(void)
>  
>  static void play_dead(void)
>  {
> -    __cpu_disable();
>      /* This must be done before dead CPU ack */
>      cpu_exit_clear();
>      hvm_cpu_down();
> diff -r 667d1a73d2ca xen/arch/x86/smp.c
> --- a/xen/arch/x86/smp.c Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/arch/x86/smp.c Sat Feb 02 14:11:30 2008 +0800
> @@ -22,6 +22,7 @@
>  #include <asm/ipi.h>
>  #include <asm/hvm/support.h>
>  #include <mach_apic.h>
> +#include <xen/softirq.h>
>  
>  /*
>   * Some notes on x86 processor bugs affecting SMP operation:
> @@ -374,3 +375,151 @@ fastcall void smp_call_function_interrup
>  
>      irq_exit();
>  }
> +
> +enum rendezvous_action {
> +    RENDEZVOUS_START,
> +    RENDEZVOUS_DISABLE_IRQ,
> +    RENDEZVOUS_INVOKE,
> +    RENDEZVOUS_EXIT,
> +};
> +
> +struct rendezvous_data {
> +    void (*fn) (void *);
> +    void *data;
> +    cpumask_t selected;
> +    cpumask_t fn_mask;
> +    atomic_t  done;
> +    enum rendezvous_action action;
> +};
> +
> +static struct rendezvous_data *rdz_data;
> +static spinlock_t rendezvous_lock = SPIN_LOCK_UNLOCKED;
> +static void rendezvous_set_action(enum rendezvous_action action, int
> cpus)
> +{
> +    atomic_set(&rdz_data->done, 0);
> +    smp_wmb();
> +    rdz_data->action = action;
> +    while ( atomic_read(&rdz_data->done) != cpus )
> +        cpu_relax();
> +}
> +
> +/* Rendezouvs selected cpus in softirq context and call (*fn) set in
> fn_mask */ 
> +int on_rendezvous_cpus (
> +    cpumask_t selected,
> +    cpumask_t fn_mask,
> +    void (*fn) (void *),
> +    void *data,
> +    int disable_irq)
> +{
> +    struct rendezvous_data rdz;
> +    unsigned int nr_cpus, nr_fn_cpus, self, cpu, cur =
> smp_processor_id();
> +
> +    ASSERT(local_irq_is_enabled());
> +    ASSERT(cpus_subset(fn_mask, selected));
> +
> +    if ( cpus_weight(fn_mask) && !fn )
> +        return -1;
> +
> +    /* current cpu conducts the rendezvous process */
> +    cpu_clear(cur, selected);
> +    self = cpu_test_and_clear(cur, fn_mask);
> +    nr_cpus = cpus_weight(selected);
> +    nr_fn_cpus = cpus_weight(fn_mask);
> +
> +    if ( nr_cpus == 0 )
> +    {
> +        if ( self )
> +            (*fn)(data);
> +        return 0;
> +    }
> +
> +    rdz.fn = fn;
> +    rdz.data = data;
> +    rdz.selected = selected;
> +    rdz.fn_mask = fn_mask;
> +    atomic_set(&rdz.done, 0);
> +    rdz.action = RENDEZVOUS_START;
> +
> +    /* Note: We shouldn't spin on lock when it's held by others since
> others
> +     * is expecting this cpus to enter softirq context. Or else
> deadlock
> +     * is caused.
> +     */
> +    if ( !spin_trylock(&rendezvous_lock) )
> +        return -1;
> +
> +    rdz_data = &rdz;
> +    smp_wmb();
> +
> +    for_each_cpu_mask(cpu, selected)
> +        cpu_raise_softirq(cpu, RENDEZVOUS_SOFTIRQ);
> +
> +    /* Wait all concerned cpu to enter rendezvous point */
> +    while ( atomic_read(&rdz_data->done) != nr_cpus )
> +        cpu_relax();
> +
> +    if ( disable_irq )
> +        rendezvous_set_action(RENDEZVOUS_DISABLE_IRQ, nr_cpus);
> +
> +    if ( self )
> +        (*fn)(data);
> +
> +    /* Wait cpus to finish work if applicable */
> +    if ( nr_fn_cpus != 0 )
> +        rendezvous_set_action(RENDEZVOUS_INVOKE, nr_fn_cpus);
> +
> +    rendezvous_set_action(RENDEZVOUS_EXIT, nr_cpus);
> +    spin_unlock(&rendezvous_lock);
> +    return 0;
> +}
> +
> +static void rendezvous_softirq(void)
> +{
> +    int irq_disabled = 0;
> +    int invoked = 0;
> +    int required;
> +
> +    ASSERT(cpu_isset(smp_processor_id(), rdz_data->selected));
> +
> +    required = cpu_isset(smp_processor_id(), rdz_data->fn_mask);
> +    smp_mb();
> +    atomic_inc(&rdz_data->done);
> +
> +    while ( rdz_data->action != RENDEZVOUS_EXIT )
> +    {
> +        switch ( rdz_data->action )
> +        {
> +        case RENDEZVOUS_DISABLE_IRQ:
> +            if ( !irq_disabled )
> +            {
> +                local_irq_disable();
> +                irq_disabled = 1;
> +                smp_mb();
> +                atomic_inc(&rdz_data->done);
> +            }
> +            break;
> +        case RENDEZVOUS_INVOKE:
> +            if ( required && !invoked )
> +            {
> +                rdz_data->fn(rdz_data->data);
> +                invoked = 1;
> +                smp_mb();
> +                atomic_inc(&rdz_data->done);
> +            }
> +            break;
> +        default:
> +            break;
> +        }
> +
> +        cpu_relax();
> +    }
> +
> +    smp_mb();
> +    atomic_inc(&rdz_data->done);
> +}
> +
> +static int __init cpu_rendezvous_init(void)
> +{
> +    open_softirq(RENDEZVOUS_SOFTIRQ, rendezvous_softirq);
> +    return 0;
> +}
> +__initcall(cpu_rendezvous_init);
> diff -r 667d1a73d2ca xen/arch/x86/smpboot.c
> --- a/xen/arch/x86/smpboot.c Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/arch/x86/smpboot.c Sat Feb 02 14:11:30 2008 +0800
> @@ -1192,7 +1192,7 @@ remove_siblinginfo(int cpu)
>  }
>  
>  extern void fixup_irqs(cpumask_t map);
> -int __cpu_disable(void)
> +void __cpu_disable(void *data)
>  {
> cpumask_t map = cpu_online_map;
> int cpu = smp_processor_id();
> @@ -1206,7 +1206,15 @@ int __cpu_disable(void)
> * Especially so if we're not using an IOAPIC -zwane
> */
> if (cpu == 0)
> -  return -EBUSY;
> +  return;
> +
> + /* By far only S3 is using this path, and thus only idle vcpus
> +  * are running on all APs when it's called. To support full
> +  * cpu hotplug, other notification mechanisms should be
> introduced
> +  * like to migrate vcpus out of this one before rendezvous point
> +  */
> + if (!is_idle_vcpu(current))
> +  return;
>  
> local_irq_disable();
> clear_local_APIC();
> @@ -1223,7 +1231,6 @@ int __cpu_disable(void)
> fixup_irqs(map);
> /* It's now safe to remove this processor from the online map */
> cpu_clear(cpu, cpu_online_map);
> - return 0;
>  }
>  
>  void __cpu_die(unsigned int cpu)
> @@ -1269,7 +1276,8 @@ int cpu_down(unsigned int cpu)
>  int cpu_down(unsigned int cpu)
>  {
> int err = 0;
> - cpumask_t mask;
> + cpumask_t rmask = cpu_online_map;
> + cpumask_t mask = CPU_MASK_NONE;
>  
> spin_lock(&cpu_add_remove_lock);
> if (num_online_cpus() == 1) {
> @@ -1283,11 +1291,9 @@ int cpu_down(unsigned int cpu)
> }
>  
> printk("Prepare to bring CPU%d down...\n", cpu);
> - /* Send notification to remote idle vcpu */
> - cpus_clear(mask);
> - cpu_set(cpu, mask);
> - per_cpu(cpu_state, cpu) = CPU_DYING;
> - smp_send_event_check_mask(mask);
> + cpu_clear(smp_processor_id(), rmask); /* all except self */
> + cpu_set(cpu, mask); /* cpu to be die */
> + on_rendezvous_cpus(rmask, mask, __cpu_disable, NULL, 1);
>  
> __cpu_die(cpu);
>  
> @@ -1364,9 +1370,8 @@ void enable_nonboot_cpus(void)
> cpus_clear(frozen_cpus);
>  }
>  #else /* ... !CONFIG_HOTPLUG_CPU */
> -int __cpu_disable(void)
> -{
> - return -ENOSYS;
> +void __cpu_disable(void *data)
> +{
>  }
>  
>  void __cpu_die(unsigned int cpu)
> diff -r 667d1a73d2ca xen/include/asm-x86/smp.h
> --- a/xen/include/asm-x86/smp.h Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/include/asm-x86/smp.h Sat Feb 02 14:11:30 2008 +0800
> @@ -51,12 +51,11 @@ extern u8 x86_cpu_to_apicid[];
>  
>  /* State of each CPU. */
>  #define CPU_ONLINE 0x0002 /* CPU is up */
> -#define CPU_DYING 0x0003 /* CPU is requested to die */
>  #define CPU_DEAD 0x0004 /* CPU is dead */
>  DECLARE_PER_CPU(int, cpu_state);
>  
>  #ifdef CONFIG_HOTPLUG_CPU
> -#define cpu_is_offline(cpu) unlikely(per_cpu(cpu_state,cpu) ==
> CPU_DYING)
> +#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
>  extern int cpu_down(unsigned int cpu);
>  extern int cpu_up(unsigned int cpu);
>  extern void cpu_exit_clear(void);
> @@ -102,8 +101,10 @@ static __inline int logical_smp_processo
>  
>  #endif
>  
> -extern int __cpu_disable(void);
> +extern void __cpu_disable(void *data);
>  extern void __cpu_die(unsigned int cpu);
> +extern int on_rendezvous_cpus(cpumask_t selected, cpumask_t fn_mask,
> +    void (*fn) (void *), void *data, int disable_irq);
>  #endif /* !__ASSEMBLY__ */
>  
>  #else /* CONFIG_SMP */
> diff -r 667d1a73d2ca xen/include/xen/softirq.h
> --- a/xen/include/xen/softirq.h Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/include/xen/softirq.h Sat Feb 02 14:12:06 2008 +0800
> @@ -10,8 +10,9 @@
>  #define PAGE_SCRUB_SOFTIRQ                5
>  #define TRACE_SOFTIRQ                     6
>  #define RCU_SOFTIRQ                       7
> +#define RENDEZVOUS_SOFTIRQ                8
>  
> -#define NR_COMMON_SOFTIRQS                8
> +#define NR_COMMON_SOFTIRQS                9
>  
>  #include <asm/softirq.h>
>  
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.