x86: avoid flush IPI when possible Since CLFLUSH, other than WBINVD, is a cache coherency domain wide flush, there's no need to IPI other CPUs if this is the only flushing being requested. (As a secondary change, move a local variable into the scope where it's actually needed.) Signed-off-by: Jan Beulich --- v2: Adjust the meaning of flush_area_local()'s return value and prefix the function with a respective comment. --- a/xen/arch/x86/flushtlb.c +++ b/xen/arch/x86/flushtlb.c @@ -91,9 +91,13 @@ void write_cr3(unsigned long cr3) local_irq_restore(flags); } -void flush_area_local(const void *va, unsigned int flags) +/* + * The return value of this function is the passed in "flags" argument with + * bits cleared that have been fully (i.e. system-wide) taken care of, i.e. + * namely not requiring any further action on remote CPUs. + */ +unsigned int flush_area_local(const void *va, unsigned int flags) { - const struct cpuinfo_x86 *c = ¤t_cpu_data; unsigned int order = (flags - 1) & FLUSH_ORDER_MASK; unsigned long irqfl; @@ -130,6 +134,7 @@ void flush_area_local(const void *va, un if ( flags & FLUSH_CACHE ) { + const struct cpuinfo_x86 *c = ¤t_cpu_data; unsigned long i, sz = 0; if ( order < (BITS_PER_LONG - PAGE_SHIFT) ) @@ -146,6 +151,7 @@ void flush_area_local(const void *va, un "data16 clflush %0", /* clflushopt */ X86_FEATURE_CLFLUSHOPT, "m" (((const char *)va)[i])); + flags &= ~FLUSH_CACHE; } else { @@ -154,4 +160,6 @@ void flush_area_local(const void *va, un } local_irq_restore(irqfl); + + return flags; } --- a/xen/arch/x86/smp.c +++ b/xen/arch/x86/smp.c @@ -205,26 +205,30 @@ static unsigned int flush_flags; void invalidate_interrupt(struct cpu_user_regs *regs) { + unsigned int flags = flush_flags; ack_APIC_irq(); perfc_incr(ipis); - if ( !__sync_local_execstate() || - (flush_flags & (FLUSH_TLB_GLOBAL | FLUSH_CACHE)) ) - flush_area_local(flush_va, flush_flags); + if ( __sync_local_execstate() ) + flags &= ~FLUSH_TLB; + flush_area_local(flush_va, flags); cpumask_clear_cpu(smp_processor_id(), &flush_cpumask); } void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags) { + unsigned int cpu = smp_processor_id(); + ASSERT(local_irq_is_enabled()); - if ( cpumask_test_cpu(smp_processor_id(), mask) ) - flush_area_local(va, flags); + if ( cpumask_test_cpu(cpu, mask) ) + flags = flush_area_local(va, flags); - if ( !cpumask_subset(mask, cpumask_of(smp_processor_id())) ) + if ( (flags & ~FLUSH_ORDER_MASK) && + !cpumask_subset(mask, cpumask_of(cpu)) ) { spin_lock(&flush_lock); cpumask_and(&flush_cpumask, mask, &cpu_online_map); - cpumask_clear_cpu(smp_processor_id(), &flush_cpumask); + cpumask_clear_cpu(cpu, &flush_cpumask); flush_va = va; flush_flags = flags; send_IPI_mask(&flush_cpumask, INVALIDATE_TLB_VECTOR); --- a/xen/include/asm-x86/flushtlb.h +++ b/xen/include/asm-x86/flushtlb.h @@ -87,7 +87,7 @@ void write_cr3(unsigned long cr3); #define FLUSH_CACHE 0x400 /* Flush local TLBs/caches. */ -void flush_area_local(const void *va, unsigned int flags); +unsigned int flush_area_local(const void *va, unsigned int flags); #define flush_local(flags) flush_area_local(NULL, flags) /* Flush specified CPUs' TLBs/caches */