|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v1 2/3] xen/smp: Rewrite on_selected_cpus() to be lockless
On 01.04.2026 18:35, Ross Lagerwall wrote:
> --- a/xen/common/smp.c
> +++ b/xen/common/smp.c
> @@ -24,13 +24,15 @@
> /*
> * Structure and data for smp_call_function()/on_selected_cpus().
> */
> -static DEFINE_SPINLOCK(call_lock);
> -static struct call_data_struct {
> +struct call_data_struct {
> void (*func) (void *info);
> void *info;
> int wait;
> - cpumask_t selected;
> -} call_data;
> + cpumask_t selected __cacheline_aligned;
> +};
> +
> +DEFINE_PER_CPU(struct call_data_struct, call_data);
> +static cpumask_t tasks;
Only first pass feedback for now (I still need to go over all of this more
thoroughly).
Having cpumask_t variables anywhere (not just on the stack, where they're
particularly problematic) isn't very nice. Can this become cpumask_var_t?
(We really also need to deal with the one in smp_call_function(), for
example.)
> @@ -50,55 +52,84 @@ void on_selected_cpus(
> void *info,
> int wait)
> {
> + struct call_data_struct *data;
> + unsigned int cpu = smp_processor_id();
> +
> ASSERT(local_irq_is_enabled());
> ASSERT(cpumask_subset(selected, &cpu_online_map));
>
> - spin_lock(&call_lock);
> + if ( cpumask_empty(selected) )
> + return;
> +
> + data = &this_cpu(call_data);
>
> - cpumask_copy(&call_data.selected, selected);
> + if ( !data->wait )
> + {
> + /* Wait for any previous async call to complete */
> + while ( !cpumask_empty(&data->selected) )
> + cpu_relax();
> +
> + cpumask_clear_cpu(cpu, &tasks);
Since you set this bit again almost immediately, the above can only be to
make sure that ...
> + }
>
> - if ( cpumask_empty(&call_data.selected) )
> - goto out;
> + data->func = func;
> + data->info = info;
> + data->wait = wait;
... these updates and ...
> - call_data.func = func;
> - call_data.info = info;
> - call_data.wait = wait;
> + smp_wmb();
>
> - smp_send_call_function_mask(&call_data.selected);
> + cpumask_copy(&data->selected, selected);
... and this copying happen with the bit clear. Don't you need another
barrier then, though (between cpumask_clear_cpu() and the writes)?
Further isn't the barrier you add coming too early? While the bit in
tasks is clear, nobody's going to look at ->selected. Doesn't the
barrier need to live here, to isolate from ...
> - while ( !cpumask_empty(&call_data.selected) )
> - cpu_relax();
> + cpumask_set_cpu(cpu, &tasks);
... this?
> -out:
> - spin_unlock(&call_lock);
> + smp_send_call_function_mask(&data->selected);
> +
> + if ( wait )
> + {
> + while ( !cpumask_empty(&data->selected) )
> + cpu_relax();
> +
> + cpumask_clear_cpu(cpu, &tasks);
> + }
> }
>
> void smp_call_function_interrupt(void)
> {
> - void (*func)(void *info) = call_data.func;
> - void *info = call_data.info;
> unsigned int cpu = smp_processor_id();
> -
> - if ( !cpumask_test_cpu(cpu, &call_data.selected) )
> - return;
> + unsigned int i;
> + struct call_data_struct *data;
> + void (*func)(void *info);
> + void *info;
Please move into the loop's scope whatever can be moved there.
> irq_enter();
>
> - if ( unlikely(!func) )
> - {
> - cpumask_clear_cpu(cpu, &call_data.selected);
> - }
> - else if ( call_data.wait )
> - {
> - (*func)(info);
> - smp_mb();
> - cpumask_clear_cpu(cpu, &call_data.selected);
> - }
> - else
> + for_each_cpu ( i, &tasks )
> {
> - smp_mb();
> - cpumask_clear_cpu(cpu, &call_data.selected);
> - (*func)(info);
> + data = &per_cpu(call_data, i);
> +
> + if ( !cpumask_test_cpu(cpu, &data->selected) )
> + continue;
> +
> + smp_rmb();
This barrier looks as if it also needs to move (up).
Jan
> + func = data->func;
> + info = data->info;
> +
> + if ( unlikely(!func) )
> + {
> + cpumask_clear_cpu(cpu, &data->selected);
> + }
> + else if ( data->wait )
> + {
> + (*func)(info);
> + smp_mb();
> + cpumask_clear_cpu(cpu, &data->selected);
> + }
> + else
> + {
> + smp_mb();
> + cpumask_clear_cpu(cpu, &data->selected);
> + (*func)(info);
> + }
> }
>
> irq_exit();
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |