Xen project Mailing List

Re: [Xen-devel] [PATCH 2/2] x86/x2apic: properly implement cluster mode

To: Jan Beulich <JBeulich@xxxxxxxx>, xen-devel <xen-devel@xxxxxxxxxxxxx>

From: Keir Fraser <keir@xxxxxxx>

Date: Thu, 08 Nov 2012 15:17:45 +0000

Delivery-date: Thu, 08 Nov 2012 15:18:20 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

Thread-index: Ac29xDQp3gwAiOemPEStfcA1GgAuHQ==

Thread-topic: [Xen-devel] [PATCH 2/2] x86/x2apic: properly implement cluster mode

On 08/11/2012 15:03, "Jan Beulich" <JBeulich@xxxxxxxx> wrote: > So far, cluster mode was just an alternative implementation of > physical mode: Allowing only single CPU interrupt targets, and sending > IPIs to each target CPU separately. Take advantage of what cluster > mode really can do in that regard. What does it allow? Multicast within certain constraints? I know it's not part of our coding style, but some comments would be nice. ;) -- Keir > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > > --- a/xen/arch/x86/genapic/x2apic.c > +++ b/xen/arch/x86/genapic/x2apic.c > @@ -19,6 +19,7 @@ > > #include <xen/config.h> > #include <xen/init.h> > +#include <xen/cpu.h> > #include <xen/cpumask.h> > #include <asm/apicdef.h> > #include <asm/genapic.h> > @@ -33,6 +34,14 @@ static bool_t __initdata x2apic_phys; /* > boolean_param("x2apic_phys", x2apic_phys); > > static DEFINE_PER_CPU_READ_MOSTLY(u32, cpu_2_logical_apicid); > +static DEFINE_PER_CPU_READ_MOSTLY(cpumask_t *, cluster_cpus); > +static cpumask_t *cluster_cpus_spare; > +static DEFINE_PER_CPU(cpumask_var_t, scratch_mask); > + > +static inline u32 x2apic_cluster(unsigned int cpu) > +{ > + return per_cpu(cpu_2_logical_apicid, cpu) >> 16; > +} > > static void init_apic_ldr_x2apic_phys(void) > { > @@ -40,20 +49,53 @@ static void init_apic_ldr_x2apic_phys(vo > > static void init_apic_ldr_x2apic_cluster(void) > { > - this_cpu(cpu_2_logical_apicid) = apic_read(APIC_LDR); > + unsigned int cpu, this_cpu = smp_processor_id(); > + > + per_cpu(cpu_2_logical_apicid, this_cpu) = apic_read(APIC_LDR); > + > + if ( per_cpu(cluster_cpus, this_cpu) ) > + { > + ASSERT(cpumask_test_cpu(this_cpu, per_cpu(cluster_cpus, this_cpu))); > + return; > + } > + > + per_cpu(cluster_cpus, this_cpu) = cluster_cpus_spare; > + for_each_online_cpu ( cpu ) > + { > + if (this_cpu == cpu || x2apic_cluster(this_cpu) != > x2apic_cluster(cpu)) > + continue; > + per_cpu(cluster_cpus, this_cpu) = per_cpu(cluster_cpus, cpu); > + break; > + } > + if ( per_cpu(cluster_cpus, this_cpu) == cluster_cpus_spare ) > + cluster_cpus_spare = NULL; > + > + cpumask_set_cpu(this_cpu, per_cpu(cluster_cpus, this_cpu)); > } > > static void __init clustered_apic_check_x2apic(void) > { > } > > +static const cpumask_t *vector_allocation_cpumask_x2apic_cluster(int cpu) > +{ > + return per_cpu(cluster_cpus, cpu); > +} > + > static unsigned int cpu_mask_to_apicid_x2apic_cluster(const cpumask_t > *cpumask) > { > - return per_cpu(cpu_2_logical_apicid, cpumask_first(cpumask)); > + unsigned int cpu = cpumask_first(cpumask); > + unsigned int dest = per_cpu(cpu_2_logical_apicid, cpu); > + const cpumask_t *cluster_cpus = per_cpu(cluster_cpus, cpu); > + > + for_each_cpu ( cpu, cluster_cpus ) > + if ( cpumask_test_cpu(cpu, cpumask) ) > + dest |= per_cpu(cpu_2_logical_apicid, cpu); > + > + return dest; > } > > -static void __send_IPI_mask_x2apic( > - const cpumask_t *cpumask, int vector, unsigned int dest_mode) > +static void send_IPI_mask_x2apic_phys(const cpumask_t *cpumask, int vector) > { > unsigned int cpu; > unsigned long flags; > @@ -77,23 +119,48 @@ static void __send_IPI_mask_x2apic( > { > if ( !cpu_online(cpu) || (cpu == smp_processor_id()) ) > continue; > - msr_content = (dest_mode == APIC_DEST_PHYSICAL) > - ? cpu_physical_id(cpu) : per_cpu(cpu_2_logical_apicid, cpu); > - msr_content = (msr_content << 32) | APIC_DM_FIXED | dest_mode | > vector; > + msr_content = cpu_physical_id(cpu); > + msr_content = (msr_content << 32) | APIC_DM_FIXED | > + APIC_DEST_PHYSICAL | vector; > apic_wrmsr(APIC_ICR, msr_content); > } > > local_irq_restore(flags); > } > > -static void send_IPI_mask_x2apic_phys(const cpumask_t *cpumask, int vector) > -{ > - __send_IPI_mask_x2apic(cpumask, vector, APIC_DEST_PHYSICAL); > -} > - > static void send_IPI_mask_x2apic_cluster(const cpumask_t *cpumask, int > vector) > { > - __send_IPI_mask_x2apic(cpumask, vector, APIC_DEST_LOGICAL); > + unsigned int cpu = smp_processor_id(); > + cpumask_t *ipimask = per_cpu(scratch_mask, cpu); > + const cpumask_t *cluster_cpus; > + unsigned long flags; > + > + mb(); /* See above for an explanation. */ > + > + local_irq_save(flags); > + > + cpumask_andnot(ipimask, &cpu_online_map, cpumask_of(cpu)); > + > + for ( cpumask_and(ipimask, cpumask, ipimask); !cpumask_empty(ipimask); > + cpumask_andnot(ipimask, ipimask, cluster_cpus) ) > + { > + uint64_t msr_content = 0; > + > + cluster_cpus = per_cpu(cluster_cpus, cpumask_first(ipimask)); > + for_each_cpu ( cpu, cluster_cpus ) > + { > + if ( !cpumask_test_cpu(cpu, ipimask) ) > + continue; > + msr_content |= per_cpu(cpu_2_logical_apicid, cpu); > + } > + > + BUG_ON(!msr_content); > + msr_content = (msr_content << 32) | APIC_DM_FIXED | > + APIC_DEST_LOGICAL | vector; > + apic_wrmsr(APIC_ICR, msr_content); > + } > + > + local_irq_restore(flags); > } > > static const struct genapic apic_x2apic_phys = { > @@ -116,15 +183,60 @@ static const struct genapic apic_x2apic_ > .init_apic_ldr = init_apic_ldr_x2apic_cluster, > .clustered_apic_check = clustered_apic_check_x2apic, > .target_cpus = target_cpus_all, > - .vector_allocation_cpumask = vector_allocation_cpumask_phys, > + .vector_allocation_cpumask = vector_allocation_cpumask_x2apic_cluster, > .cpu_mask_to_apicid = cpu_mask_to_apicid_x2apic_cluster, > .send_IPI_mask = send_IPI_mask_x2apic_cluster, > .send_IPI_self = send_IPI_self_x2apic > }; > > +static int update_clusterinfo( > + struct notifier_block *nfb, unsigned long action, void *hcpu) > +{ > + unsigned int cpu = (unsigned long)hcpu; > + int err = 0; > + > + switch (action) { > + case CPU_UP_PREPARE: > + per_cpu(cpu_2_logical_apicid, cpu) = BAD_APICID; > + if ( !cluster_cpus_spare ) > + cluster_cpus_spare = xzalloc(cpumask_t); > + if ( !cluster_cpus_spare || > + !alloc_cpumask_var(&per_cpu(scratch_mask, cpu)) ) > + err = -ENOMEM; > + break; > + case CPU_UP_CANCELED: > + case CPU_DEAD: > + if ( per_cpu(cluster_cpus, cpu) ) > + { > + cpumask_clear_cpu(cpu, per_cpu(cluster_cpus, cpu)); > + if ( cpumask_empty(per_cpu(cluster_cpus, cpu)) ) > + xfree(per_cpu(cluster_cpus, cpu)); > + } > + free_cpumask_var(per_cpu(scratch_mask, cpu)); > + break; > + } > + > + return !err ? NOTIFY_DONE : notifier_from_errno(err); > +} > + > +static struct notifier_block x2apic_cpu_nfb = { > + .notifier_call = update_clusterinfo > +}; > + > const struct genapic *__init apic_x2apic_probe(void) > { > - return x2apic_phys ? &apic_x2apic_phys : &apic_x2apic_cluster; > + if ( x2apic_phys ) > + return &apic_x2apic_phys; > + > + if ( !this_cpu(cluster_cpus) ) > + { > + update_clusterinfo(NULL, CPU_UP_PREPARE, > + (void *)(long)smp_processor_id()); > + init_apic_ldr_x2apic_cluster(); > + register_cpu_notifier(&x2apic_cpu_nfb); > + } > + > + return &apic_x2apic_cluster; > } > > void __init check_x2apic_preenabled(void) > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.