Rather than passing cpumasks by value in all cases (which is problematic for large NR_CPUS configurations), pass them 'by reference' (i.e. through a pointer to a const cpumask). On x86 this changes send_IPI_mask() to always only send IPIs to remote CPUs (meaning any caller needing to handle the current CPU as well has to do so on its own). IA64 changes compile-tested only. Signed-off-by: Jan Beulich --- 2009-05-19.orig/xen/arch/ia64/linux-xen/smp.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/ia64/linux-xen/smp.c 2009-05-25 08:54:09.000000000 +0200 @@ -57,19 +57,18 @@ //#if CONFIG_SMP || IA64 #if CONFIG_SMP //Huh? This seems to be used on ia64 even if !CONFIG_SMP -void smp_send_event_check_mask(cpumask_t mask) +void smp_send_event_check_mask(const cpumask_t *mask) { int cpu; /* Not for me. */ - cpu_clear(smp_processor_id(), mask); - if (cpus_empty(mask)) + if (cpus_subset(*mask, *cpumask_of(smp_processor_id()))) return; //printf("smp_send_event_check_mask called\n"); for (cpu = 0; cpu < NR_CPUS; ++cpu) - if (cpu_isset(cpu, mask)) + if (cpu_isset(cpu, *mask) && cpu != smp_processor_id()) platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); } #endif @@ -438,11 +437,11 @@ EXPORT_SYMBOL(smp_call_function); #ifdef XEN int -on_selected_cpus(cpumask_t selected, void (*func) (void *info), void *info, - int retry, int wait) +on_selected_cpus(const cpumask_t *selected, void (*func) (void *info), + void *info, int retry, int wait) { struct call_data_struct data; - unsigned int cpu, nr_cpus = cpus_weight(selected); + unsigned int cpu, nr_cpus = cpus_weight(*selected); ASSERT(local_irq_is_enabled()); @@ -460,7 +459,7 @@ on_selected_cpus(cpumask_t selected, voi call_data = &data; wmb(); - for_each_cpu_mask(cpu, selected) + for_each_cpu_mask(cpu, *selected) send_IPI_single(cpu, IPI_CALL_FUNC); while (atomic_read(wait ? &data.finished : &data.started) != nr_cpus) --- 2009-05-19.orig/xen/arch/ia64/linux-xen/sn/kernel/sn2_smp.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/ia64/linux-xen/sn/kernel/sn2_smp.c 2009-05-25 08:37:09.000000000 +0200 @@ -239,7 +239,7 @@ sn2_global_tlb_purge(unsigned long start flush_data.start = start; flush_data.end = end; flush_data.nbits = nbits; - on_selected_cpus(selected_cpus, sn_flush_ptcga_cpu, + on_selected_cpus(&selected_cpus, sn_flush_ptcga_cpu, &flush_data, 1, 1); } spin_unlock(&sn2_ptcg_lock2); --- 2009-05-19.orig/xen/arch/ia64/xen/mm.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/ia64/xen/mm.c 2009-05-25 08:37:09.000000000 +0200 @@ -3182,7 +3182,7 @@ int get_page_type(struct page_info *page if ( unlikely(!cpus_empty(mask)) ) { perfc_incr(need_flush_tlb_flush); - flush_tlb_mask(mask); + flush_tlb_mask(&mask); } /* We lose existing type, back pointer, and validity. */ --- 2009-05-19.orig/xen/arch/ia64/xen/vhpt.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/ia64/xen/vhpt.c 2009-05-25 08:37:09.000000000 +0200 @@ -548,22 +548,21 @@ void flush_tlb_for_log_dirty(struct doma cpus_clear (d->domain_dirty_cpumask); } -void flush_tlb_mask(cpumask_t mask) +void flush_tlb_mask(const cpumask_t *mask) { int cpu; cpu = smp_processor_id(); - if (cpu_isset (cpu, mask)) { - cpu_clear(cpu, mask); + if (cpu_isset(cpu, *mask)) flush_tlb_vhpt_all (NULL); - } - if (cpus_empty(mask)) + if (cpus_subset(*mask, *cpumask_of(cpu))) return; - for_each_cpu_mask (cpu, mask) - smp_call_function_single - (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1); + for_each_cpu_mask (cpu, *mask) + if (cpu != smp_processor_id()) + smp_call_function_single + (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1); } #ifdef PERF_COUNTERS --- 2009-05-19.orig/xen/arch/x86/acpi/cpufreq/cpufreq.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/acpi/cpufreq/cpufreq.c 2009-05-25 08:37:09.000000000 +0200 @@ -186,7 +186,7 @@ static void drv_read(struct drv_cmd *cmd if (likely(cpu_isset(smp_processor_id(), cmd->mask))) do_drv_read((void *)cmd); else - on_selected_cpus( cmd->mask, do_drv_read, (void *)cmd, 0, 1); + on_selected_cpus(&cmd->mask, do_drv_read, (void *)cmd, 0, 1); } static void drv_write(struct drv_cmd *cmd) @@ -195,7 +195,7 @@ static void drv_write(struct drv_cmd *cm cpu_isset(smp_processor_id(), cmd->mask)) do_drv_write((void *)cmd); else - on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0); + on_selected_cpus(&cmd->mask, do_drv_write, (void *)cmd, 0, 0); } static u32 get_cur_val(cpumask_t mask) @@ -274,7 +274,6 @@ static unsigned int get_measured_perf(un struct cpufreq_policy *policy; struct perf_pair readin, cur, *saved; unsigned int perf_percent; - cpumask_t cpumask; unsigned int retval; if (!cpu_online(cpu)) @@ -303,8 +302,7 @@ static unsigned int get_measured_perf(un if (cpu == smp_processor_id()) { read_measured_perf_ctrs((void *)&readin); } else { - cpumask = cpumask_of_cpu(cpu); - on_selected_cpus(cpumask, read_measured_perf_ctrs, + on_selected_cpus(cpumask_of(cpu), read_measured_perf_ctrs, (void *)&readin, 0, 1); } --- 2009-05-19.orig/xen/arch/x86/acpi/cpufreq/powernow.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/acpi/cpufreq/powernow.c 2009-05-25 08:37:09.000000000 +0200 @@ -121,7 +121,7 @@ static int powernow_cpufreq_target(struc cmd.val = next_perf_state; - on_selected_cpus( cmd.mask, transition_pstate, (void *) &cmd, 0, 0); + on_selected_cpus(&cmd.mask, transition_pstate, (void *) &cmd, 0, 0); perf->state = next_perf_state; policy->cur = freqs.new; --- 2009-05-19.orig/xen/arch/x86/cpu/mcheck/mce.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/cpu/mcheck/mce.c 2009-05-25 08:37:09.000000000 +0200 @@ -1205,8 +1205,8 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u add_taint(TAINT_ERROR_INJECT); - on_selected_cpus(cpumask_of_cpu(target), - x86_mc_msrinject, mc_msrinject, 1, 1); + on_selected_cpus(cpumask_of(target), x86_mc_msrinject, + mc_msrinject, 1, 1); break; @@ -1225,8 +1225,8 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u add_taint(TAINT_ERROR_INJECT); - on_selected_cpus(cpumask_of_cpu(target), x86_mc_mceinject, - mc_mceinject, 1, 1); + on_selected_cpus(cpumask_of(target), x86_mc_mceinject, + mc_mceinject, 1, 1); break; default: --- 2009-05-19.orig/xen/arch/x86/cpu/mtrr/main.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/cpu/mtrr/main.c 2009-05-25 08:37:09.000000000 +0200 @@ -688,7 +688,7 @@ void mtrr_save_state(void) if (cpu == 0) mtrr_save_fixed_ranges(NULL); else - on_selected_cpus(cpumask_of_cpu(0), mtrr_save_fixed_ranges, NULL, 1, 1); + on_selected_cpus(cpumask_of(0), mtrr_save_fixed_ranges, NULL, 1, 1); put_cpu(); } --- 2009-05-19.orig/xen/arch/x86/crash.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/crash.c 2009-05-25 08:37:09.000000000 +0200 @@ -58,10 +58,8 @@ static int crash_nmi_callback(struct cpu */ static void smp_send_nmi_allbutself(void) { - cpumask_t allbutself = cpu_online_map; - cpu_clear(smp_processor_id(), allbutself); - if ( !cpus_empty(allbutself) ) - send_IPI_mask(allbutself, APIC_DM_NMI); + if ( !cpus_subset(cpu_online_map, *cpumask_of(smp_processor_id())) ) + send_IPI_mask(&cpu_online_map, APIC_DM_NMI); } static void nmi_shootdown_cpus(void) --- 2009-05-19.orig/xen/arch/x86/domain.c 2009-05-19 16:40:37.000000000 +0200 +++ 2009-05-19/xen/arch/x86/domain.c 2009-05-25 08:37:09.000000000 +0200 @@ -1316,7 +1316,7 @@ void context_switch(struct vcpu *prev, s if ( unlikely(!cpu_isset(cpu, dirty_mask) && !cpus_empty(dirty_mask)) ) { /* Other cpus call __sync_lazy_execstate from flush ipi handler. */ - flush_tlb_mask(dirty_mask); + flush_tlb_mask(&dirty_mask); } if ( is_hvm_vcpu(prev) && !list_empty(&prev->arch.hvm_vcpu.tm_list) ) @@ -1410,7 +1410,7 @@ void sync_vcpu_execstate(struct vcpu *v) (void)__sync_lazy_execstate(); /* Other cpus call __sync_lazy_execstate from flush ipi handler. */ - flush_tlb_mask(v->vcpu_dirty_cpumask); + flush_tlb_mask(&v->vcpu_dirty_cpumask); } struct migrate_info { --- 2009-05-19.orig/xen/arch/x86/genapic/x2apic.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/genapic/x2apic.c 2009-05-25 08:37:09.000000000 +0200 @@ -56,7 +56,7 @@ unsigned int cpu_mask_to_apicid_x2apic(c return cpu_physical_id(first_cpu(cpumask)); } -void send_IPI_mask_x2apic(cpumask_t cpumask, int vector) +void send_IPI_mask_x2apic(const cpumask_t *cpumask, int vector) { unsigned int cpu, cfg; unsigned long flags; @@ -76,8 +76,9 @@ void send_IPI_mask_x2apic(cpumask_t cpum local_irq_save(flags); cfg = APIC_DM_FIXED | 0 /* no shorthand */ | APIC_DEST_PHYSICAL | vector; - for_each_cpu_mask ( cpu, cpumask ) - apic_wrmsr(APIC_ICR, cfg, cpu_physical_id(cpu)); + for_each_cpu_mask ( cpu, *cpumask ) + if ( cpu != smp_processor_id() ) + apic_wrmsr(APIC_ICR, cfg, cpu_physical_id(cpu)); local_irq_restore(flags); } --- 2009-05-19.orig/xen/arch/x86/hpet.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/hpet.c 2009-05-25 08:37:09.000000000 +0200 @@ -617,7 +617,7 @@ void hpet_disable_legacy_broadcast(void) spin_unlock_irq(&legacy_hpet_event.lock); - smp_send_event_check_mask(cpu_online_map); + smp_send_event_check_mask(&cpu_online_map); } void hpet_broadcast_enter(void) --- 2009-05-19.orig/xen/arch/x86/hvm/hvm.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/hvm/hvm.c 2009-05-25 08:37:09.000000000 +0200 @@ -2408,7 +2408,7 @@ static int hvmop_flush_tlb_all(void) paging_update_cr3(v); /* Flush all dirty TLBs. */ - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(&d->domain_dirty_cpumask); /* Done. */ for_each_vcpu ( d, v ) --- 2009-05-19.orig/xen/arch/x86/hvm/vmx/vmcs.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/hvm/vmx/vmcs.c 2009-05-25 08:37:09.000000000 +0200 @@ -264,7 +264,7 @@ static void vmx_clear_vmcs(struct vcpu * int cpu = v->arch.hvm_vmx.active_cpu; if ( cpu != -1 ) - on_selected_cpus(cpumask_of_cpu(cpu), __vmx_clear_vmcs, v, 1, 1); + on_selected_cpus(cpumask_of(cpu), __vmx_clear_vmcs, v, 1, 1); } static void vmx_load_vmcs(struct vcpu *v) @@ -899,7 +899,7 @@ void vmx_do_resume(struct vcpu *v) { int cpu = v->arch.hvm_vmx.active_cpu; if ( cpu != -1 ) - on_selected_cpus(cpumask_of_cpu(cpu), wbinvd_ipi, NULL, 1, 1); + on_selected_cpus(cpumask_of(cpu), wbinvd_ipi, NULL, 1, 1); } vmx_clear_vmcs(v); --- 2009-05-19.orig/xen/arch/x86/hvm/vmx/vmx.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/hvm/vmx/vmx.c 2009-05-25 08:37:09.000000000 +0200 @@ -2163,7 +2163,7 @@ static void ept_handle_violation(unsigne { paging_mark_dirty(d, mfn_x(mfn)); p2m_change_type(d, gfn, p2m_ram_logdirty, p2m_ram_rw); - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(&d->domain_dirty_cpumask); } return; } --- 2009-05-19.orig/xen/arch/x86/irq.c 2009-05-20 11:46:56.000000000 +0200 +++ 2009-05-19/xen/arch/x86/irq.c 2009-05-25 08:37:09.000000000 +0200 @@ -516,7 +516,7 @@ static void __pirq_guest_eoi(struct doma } if ( !cpus_empty(cpu_eoi_map) ) - on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 0); + on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 1, 0); } int pirq_guest_eoi(struct domain *d, int irq) @@ -755,7 +755,7 @@ static irq_guest_action_t *__pirq_guest_ { cpu_eoi_map = action->cpu_eoi_map; spin_unlock_irq(&desc->lock); - on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 0); + on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 1, 0); spin_lock_irq(&desc->lock); } break; @@ -793,7 +793,7 @@ static irq_guest_action_t *__pirq_guest_ { BUG_ON(action->ack_type != ACKTYPE_EOI); spin_unlock_irq(&desc->lock); - on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 1); + on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 1, 1); spin_lock_irq(&desc->lock); } --- 2009-05-19.orig/xen/arch/x86/machine_kexec.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/machine_kexec.c 2009-05-25 08:37:09.000000000 +0200 @@ -91,7 +91,6 @@ static void __machine_reboot_kexec(void void machine_reboot_kexec(xen_kexec_image_t *image) { int reboot_cpu_id; - cpumask_t reboot_cpu; reboot_cpu_id = 0; @@ -100,9 +99,8 @@ void machine_reboot_kexec(xen_kexec_imag if ( reboot_cpu_id != smp_processor_id() ) { - cpus_clear(reboot_cpu); - cpu_set(reboot_cpu_id, reboot_cpu); - on_selected_cpus(reboot_cpu, __machine_reboot_kexec, image, 1, 0); + on_selected_cpus(cpumask_of(reboot_cpu_id), __machine_reboot_kexec, + image, 1, 0); for (;;) ; /* nothing */ } --- 2009-05-19.orig/xen/arch/x86/mm.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/mm.c 2009-05-25 08:37:09.000000000 +0200 @@ -510,7 +510,7 @@ static void invalidate_shadow_ldt(struct /* Rid TLBs of stale mappings (guest mappings and shadow mappings). */ if ( flush ) - flush_tlb_mask(v->vcpu_dirty_cpumask); + flush_tlb_mask(&v->vcpu_dirty_cpumask); out: spin_unlock(&v->arch.shadow_ldt_lock); @@ -1235,7 +1235,7 @@ static void pae_flush_pgd( paging_update_cr3(v); cpus_or(m, m, v->vcpu_dirty_cpumask); } - flush_tlb_mask(m); + flush_tlb_mask(&m); } /* If below 4GB then the pgdir is not shadowed in low memory. */ @@ -1260,7 +1260,7 @@ static void pae_flush_pgd( spin_unlock(&cache->lock); } - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(&d->domain_dirty_cpumask); } #else # define pae_flush_pgd(mfn, idx, nl3e) ((void)0) @@ -2275,7 +2275,7 @@ static int __get_page_type(struct page_i ((nx & PGT_type_mask) == PGT_writable_page)) ) { perfc_incr(need_flush_tlb_flush); - flush_tlb_mask(mask); + flush_tlb_mask(&mask); } /* We lose existing type and validity. */ @@ -2476,7 +2476,7 @@ static void process_deferred_ops(void) if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) ) { if ( deferred_ops & DOP_FLUSH_ALL_TLBS ) - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(&d->domain_dirty_cpumask); else flush_tlb_local(); } @@ -2811,9 +2811,9 @@ int do_mmuext_op( } pmask = vcpumask_to_pcpumask(d, vmask); if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI ) - flush_tlb_mask(pmask); + flush_tlb_mask(&pmask); else - flush_tlb_one_mask(pmask, op.arg1.linear_addr); + flush_tlb_one_mask(&pmask, op.arg1.linear_addr); break; } @@ -2822,7 +2822,7 @@ int do_mmuext_op( break; case MMUEXT_INVLPG_ALL: - flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr); + flush_tlb_one_mask(&d->domain_dirty_cpumask, op.arg1.linear_addr); break; case MMUEXT_FLUSH_CACHE: @@ -3639,7 +3639,7 @@ int do_update_va_mapping(unsigned long v pmask = vcpumask_to_pcpumask(d, vmask); if ( cpu_isset(smp_processor_id(), pmask) ) this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB; - flush_tlb_mask(pmask); + flush_tlb_mask(&pmask); break; } break; @@ -3657,7 +3657,7 @@ int do_update_va_mapping(unsigned long v flush_tlb_one_local(va); break; case UVMF_ALL: - flush_tlb_one_mask(d->domain_dirty_cpumask, va); + flush_tlb_one_mask(&d->domain_dirty_cpumask, va); break; default: if ( unlikely(!is_pv_32on64_domain(d) ? @@ -3667,7 +3667,7 @@ int do_update_va_mapping(unsigned long v pmask = vcpumask_to_pcpumask(d, vmask); if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB ) cpu_clear(smp_processor_id(), pmask); - flush_tlb_one_mask(pmask, va); + flush_tlb_one_mask(&pmask, va); break; } break; --- 2009-05-19.orig/xen/arch/x86/mm/hap/hap.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/mm/hap/hap.c 2009-05-25 08:37:09.000000000 +0200 @@ -64,7 +64,7 @@ int hap_enable_log_dirty(struct domain * /* set l1e entries of P2M table to be read-only. */ p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty); - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(&d->domain_dirty_cpumask); return 0; } @@ -83,7 +83,7 @@ void hap_clean_dirty_bitmap(struct domai { /* set l1e entries of P2M table to be read-only. */ p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty); - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(&d->domain_dirty_cpumask); } /************************************************/ @@ -643,7 +643,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi safe_write_pte(p, new); if ( (old_flags & _PAGE_PRESENT) && (level == 1 || (level == 2 && (old_flags & _PAGE_PSE))) ) - flush_tlb_mask(v->domain->domain_dirty_cpumask); + flush_tlb_mask(&v->domain->domain_dirty_cpumask); #if CONFIG_PAGING_LEVELS == 3 /* install P2M in monitor table for PAE Xen */ --- 2009-05-19.orig/xen/arch/x86/mm/shadow/common.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/mm/shadow/common.c 2009-05-25 08:37:09.000000000 +0200 @@ -695,7 +695,7 @@ static int oos_remove_write_access(struc } if ( ftlb ) - flush_tlb_mask(v->domain->domain_dirty_cpumask); + flush_tlb_mask(&v->domain->domain_dirty_cpumask); return 0; } @@ -1145,7 +1145,7 @@ sh_validate_guest_pt_write(struct vcpu * rc = sh_validate_guest_entry(v, gmfn, entry, size); if ( rc & SHADOW_SET_FLUSH ) /* Need to flush TLBs to pick up shadow PT changes */ - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(&d->domain_dirty_cpumask); if ( rc & SHADOW_SET_ERROR ) { /* This page is probably not a pagetable any more: tear it out of the @@ -1393,7 +1393,7 @@ static void _shadow_prealloc( /* See if that freed up enough space */ if ( space_is_available(d, order, count) ) { - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(&d->domain_dirty_cpumask); return; } } @@ -1447,7 +1447,7 @@ static void shadow_blow_tables(struct do pagetable_get_mfn(v->arch.shadow_table[i])); /* Make sure everyone sees the unshadowings */ - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(&d->domain_dirty_cpumask); } void shadow_blow_tables_per_domain(struct domain *d) @@ -1554,7 +1554,7 @@ mfn_t shadow_alloc(struct domain *d, if ( unlikely(!cpus_empty(mask)) ) { perfc_incr(shadow_alloc_tlbflush); - flush_tlb_mask(mask); + flush_tlb_mask(&mask); } /* Now safe to clear the page for reuse */ p = sh_map_domain_page(page_to_mfn(sp+i)); @@ -2803,7 +2803,7 @@ void sh_remove_shadows(struct vcpu *v, m /* Need to flush TLBs now, so that linear maps are safe next time we * take a fault. */ - flush_tlb_mask(v->domain->domain_dirty_cpumask); + flush_tlb_mask(&v->domain->domain_dirty_cpumask); if ( do_locking ) shadow_unlock(v->domain); } @@ -3435,7 +3435,7 @@ shadow_write_p2m_entry(struct vcpu *v, u { sh_remove_all_shadows_and_parents(v, mfn); if ( sh_remove_all_mappings(v, mfn) ) - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(&d->domain_dirty_cpumask); } } @@ -3474,7 +3474,7 @@ shadow_write_p2m_entry(struct vcpu *v, u } omfn = _mfn(mfn_x(omfn) + 1); } - flush_tlb_mask(flushmask); + flush_tlb_mask(&flushmask); if ( npte ) unmap_domain_page(npte); @@ -3752,7 +3752,7 @@ int shadow_track_dirty_vram(struct domai } } if ( flush_tlb ) - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(&d->domain_dirty_cpumask); goto out; out_sl1ma: --- 2009-05-19.orig/xen/arch/x86/mm/shadow/multi.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/mm/shadow/multi.c 2009-05-25 08:37:09.000000000 +0200 @@ -3146,7 +3146,7 @@ static int sh_page_fault(struct vcpu *v, */ perfc_incr(shadow_rm_write_flush_tlb); atomic_inc(&d->arch.paging.shadow.gtable_dirty_version); - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(&d->domain_dirty_cpumask); } #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) @@ -4135,7 +4135,7 @@ sh_update_cr3(struct vcpu *v, int do_loc * (old) shadow linear maps in the writeable mapping heuristics. */ #if GUEST_PAGING_LEVELS == 2 if ( sh_remove_write_access(v, gmfn, 2, 0) != 0 ) - flush_tlb_mask(v->domain->domain_dirty_cpumask); + flush_tlb_mask(&v->domain->domain_dirty_cpumask); sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow); #elif GUEST_PAGING_LEVELS == 3 /* PAE guests have four shadow_table entries, based on the @@ -4158,7 +4158,7 @@ sh_update_cr3(struct vcpu *v, int do_loc } } if ( flush ) - flush_tlb_mask(v->domain->domain_dirty_cpumask); + flush_tlb_mask(&v->domain->domain_dirty_cpumask); /* Now install the new shadows. */ for ( i = 0; i < 4; i++ ) { @@ -4179,7 +4179,7 @@ sh_update_cr3(struct vcpu *v, int do_loc } #elif GUEST_PAGING_LEVELS == 4 if ( sh_remove_write_access(v, gmfn, 4, 0) != 0 ) - flush_tlb_mask(v->domain->domain_dirty_cpumask); + flush_tlb_mask(&v->domain->domain_dirty_cpumask); sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow); #else #error This should never happen --- 2009-05-19.orig/xen/arch/x86/shutdown.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/shutdown.c 2009-05-25 08:37:09.000000000 +0200 @@ -310,7 +310,7 @@ void machine_restart(unsigned int delay_ if ( get_apic_id() != boot_cpu_physical_apicid ) { /* Send IPI to the boot CPU (logical cpu 0). */ - on_selected_cpus(cpumask_of_cpu(0), __machine_restart, + on_selected_cpus(cpumask_of(0), __machine_restart, &delay_millisecs, 1, 0); for ( ; ; ) halt(); --- 2009-05-19.orig/xen/arch/x86/smp.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/smp.c 2009-05-25 08:37:09.000000000 +0200 @@ -84,12 +84,14 @@ void apic_wait_icr_idle(void) cpu_relax(); } -void send_IPI_mask_flat(cpumask_t cpumask, int vector) +void send_IPI_mask_flat(const cpumask_t *cpumask, int vector) { - unsigned long mask = cpus_addr(cpumask)[0]; + unsigned long mask = cpus_addr(*cpumask)[0]; unsigned long cfg; unsigned long flags; + mask &= ~(1UL << smp_processor_id()); + /* An IPI with no target generates a send accept error from P5/P6 APICs. */ WARN_ON(mask == 0); @@ -119,15 +121,18 @@ void send_IPI_mask_flat(cpumask_t cpumas local_irq_restore(flags); } -void send_IPI_mask_phys(cpumask_t mask, int vector) +void send_IPI_mask_phys(const cpumask_t *mask, int vector) { unsigned long cfg, flags; unsigned int query_cpu; local_irq_save(flags); - for_each_cpu_mask ( query_cpu, mask ) + for_each_cpu_mask ( query_cpu, *mask ) { + if ( query_cpu == smp_processor_id() ) + continue; + /* * Wait for idle. */ @@ -170,20 +175,17 @@ fastcall void smp_invalidate_interrupt(v irq_exit(); } -void flush_area_mask(cpumask_t mask, const void *va, unsigned int flags) +void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags) { ASSERT(local_irq_is_enabled()); - if ( cpu_isset(smp_processor_id(), mask) ) - { + if ( cpu_isset(smp_processor_id(), *mask) ) flush_area_local(va, flags); - cpu_clear(smp_processor_id(), mask); - } - if ( !cpus_empty(mask) ) + if ( !cpus_subset(*mask, *cpumask_of(smp_processor_id())) ) { spin_lock(&flush_lock); - flush_cpumask = mask; + cpus_andnot(flush_cpumask, *mask, *cpumask_of(smp_processor_id())); flush_va = va; flush_flags = flags; send_IPI_mask(mask, INVALIDATE_TLB_VECTOR); @@ -201,17 +203,16 @@ void new_tlbflush_clock_period(void) /* Flush everyone else. We definitely flushed just before entry. */ allbutself = cpu_online_map; cpu_clear(smp_processor_id(), allbutself); - flush_mask(allbutself, FLUSH_TLB); + flush_mask(&allbutself, FLUSH_TLB); /* No need for atomicity: we are the only possible updater. */ ASSERT(tlbflush_clock == 0); tlbflush_clock++; } -void smp_send_event_check_mask(cpumask_t mask) +void smp_send_event_check_mask(const cpumask_t *mask) { - cpu_clear(smp_processor_id(), mask); - if ( !cpus_empty(mask) ) + if ( !cpus_subset(*mask, *cpumask_of(smp_processor_id())) ) send_IPI_mask(mask, EVENT_CHECK_VECTOR); } @@ -225,7 +226,7 @@ struct call_data_struct { int wait; atomic_t started; atomic_t finished; - cpumask_t selected; + const cpumask_t *selected; }; static DEFINE_SPINLOCK(call_lock); @@ -239,18 +240,18 @@ int smp_call_function( { cpumask_t allbutself = cpu_online_map; cpu_clear(smp_processor_id(), allbutself); - return on_selected_cpus(allbutself, func, info, retry, wait); + return on_selected_cpus(&allbutself, func, info, retry, wait); } int on_selected_cpus( - cpumask_t selected, + const cpumask_t *selected, void (*func) (void *info), void *info, int retry, int wait) { struct call_data_struct data; - unsigned int nr_cpus = cpus_weight(selected); + unsigned int nr_cpus = cpus_weight(*selected); ASSERT(local_irq_is_enabled()); @@ -258,7 +259,7 @@ int on_selected_cpus( if ( unlikely(!cpu_has_apic) ) { ASSERT(num_online_cpus() == 1); - if ( cpu_isset(0, selected) ) + if ( cpu_isset(0, *selected) ) { local_irq_disable(); func(info); @@ -279,9 +280,18 @@ int on_selected_cpus( spin_lock(&call_lock); - call_data = &data; + if ( !cpus_subset(*selected, *cpumask_of(smp_processor_id())) ) + { + call_data = &data; - send_IPI_mask(selected, CALL_FUNCTION_VECTOR); + send_IPI_mask(selected, CALL_FUNCTION_VECTOR); + } + + if ( cpu_isset(smp_processor_id(), *selected) ) + { + func(info); + --nr_cpus; + } while ( atomic_read(wait ? &data.finished : &data.started) != nr_cpus ) cpu_relax(); @@ -349,7 +359,7 @@ fastcall void smp_call_function_interrup ack_APIC_irq(); perfc_incr(ipis); - if ( !cpu_isset(smp_processor_id(), call_data->selected) ) + if ( !cpu_isset(smp_processor_id(), *call_data->selected) ) return; irq_enter(); --- 2009-05-19.orig/xen/arch/x86/time.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/arch/x86/time.c 2009-05-25 08:37:09.000000000 +0200 @@ -1189,7 +1189,7 @@ static void time_calibration(void *unuse }; /* @wait=1 because we must wait for all cpus before freeing @r. */ - on_selected_cpus(r.cpu_calibration_map, + on_selected_cpus(&r.cpu_calibration_map, opt_consistent_tscs ? time_calibration_tsc_rendezvous : time_calibration_std_rendezvous, --- 2009-05-19.orig/xen/common/Makefile 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/common/Makefile 2009-05-25 08:37:09.000000000 +0200 @@ -1,4 +1,5 @@ obj-y += bitmap.o +obj-y += cpu.o obj-y += domctl.o obj-y += domain.o obj-y += event_channel.o --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2009-05-19/xen/common/cpu.c 2009-05-25 08:37:09.000000000 +0200 @@ -0,0 +1,26 @@ +#include +#include + +/* + * cpu_bit_bitmap[] is a special, "compressed" data structure that + * represents all NR_CPUS bits binary values of 1< 32 + MASK_DECLARE_8(32), MASK_DECLARE_8(40), + MASK_DECLARE_8(48), MASK_DECLARE_8(56), +#endif +}; --- 2009-05-19.orig/xen/common/grant_table.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/common/grant_table.c 2009-05-25 08:37:09.000000000 +0200 @@ -715,7 +715,7 @@ gnttab_unmap_grant_ref( goto fault; } - flush_tlb_mask(current->domain->domain_dirty_cpumask); + flush_tlb_mask(¤t->domain->domain_dirty_cpumask); for ( i = 0; i < partial_done; i++ ) __gnttab_unmap_common_complete(&(common[i])); @@ -727,7 +727,7 @@ gnttab_unmap_grant_ref( return 0; fault: - flush_tlb_mask(current->domain->domain_dirty_cpumask); + flush_tlb_mask(¤t->domain->domain_dirty_cpumask); for ( i = 0; i < partial_done; i++ ) __gnttab_unmap_common_complete(&(common[i])); @@ -774,7 +774,7 @@ gnttab_unmap_and_replace( goto fault; } - flush_tlb_mask(current->domain->domain_dirty_cpumask); + flush_tlb_mask(¤t->domain->domain_dirty_cpumask); for ( i = 0; i < partial_done; i++ ) __gnttab_unmap_common_complete(&(common[i])); @@ -786,7 +786,7 @@ gnttab_unmap_and_replace( return 0; fault: - flush_tlb_mask(current->domain->domain_dirty_cpumask); + flush_tlb_mask(¤t->domain->domain_dirty_cpumask); for ( i = 0; i < partial_done; i++ ) __gnttab_unmap_common_complete(&(common[i])); @@ -1123,7 +1123,7 @@ gnttab_transfer( #ifndef __ia64__ /* IA64 implicitly replaces the old page in steal_page(). */ guest_physmap_remove_page(d, gop.mfn, mfn, 0); #endif - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(&d->domain_dirty_cpumask); /* Find the target domain. */ if ( unlikely((e = rcu_lock_domain_by_id(gop.domid)) == NULL) ) --- 2009-05-19.orig/xen/common/keyhandler.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/common/keyhandler.c 2009-05-25 08:37:09.000000000 +0200 @@ -119,7 +119,7 @@ static void dump_registers(unsigned char if ( cpu == smp_processor_id() ) continue; printk("\n*** Dumping CPU%d host state: ***\n", cpu); - on_selected_cpus(cpumask_of_cpu(cpu), __dump_execstate, NULL, 1, 1); + on_selected_cpus(cpumask_of(cpu), __dump_execstate, NULL, 1, 1); } printk("\n"); --- 2009-05-19.orig/xen/common/page_alloc.c 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/common/page_alloc.c 2009-05-25 08:37:09.000000000 +0200 @@ -422,7 +422,7 @@ static struct page_info *alloc_heap_page if ( unlikely(!cpus_empty(mask)) ) { perfc_incr(need_flush_tlb_flush); - flush_tlb_mask(mask); + flush_tlb_mask(&mask); } return pg; --- 2009-05-19.orig/xen/include/asm-ia64/tlbflush.h 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/include/asm-ia64/tlbflush.h 2009-05-25 08:37:09.000000000 +0200 @@ -39,7 +39,7 @@ void domain_flush_tlb_vhpt(struct domain void flush_tlb_for_log_dirty(struct domain *d); /* Flush v-tlb on cpus set in mask for current domain. */ -void flush_tlb_mask(cpumask_t mask); +void flush_tlb_mask(const cpumask_t *mask); /* Flush local machine TLB. */ void local_flush_tlb_all (void); --- 2009-05-19.orig/xen/include/asm-x86/flushtlb.h 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/include/asm-x86/flushtlb.h 2009-05-25 08:37:09.000000000 +0200 @@ -90,12 +90,12 @@ void flush_area_local(const void *va, un #define flush_local(flags) flush_area_local(NULL, flags) /* Flush specified CPUs' TLBs/caches */ -void flush_area_mask(cpumask_t, const void *va, unsigned int flags); +void flush_area_mask(const cpumask_t *, const void *va, unsigned int flags); #define flush_mask(mask, flags) flush_area_mask(mask, NULL, flags) /* Flush all CPUs' TLBs/caches */ -#define flush_area_all(va, flags) flush_area_mask(cpu_online_map, va, flags) -#define flush_all(flags) flush_mask(cpu_online_map, flags) +#define flush_area_all(va, flags) flush_area_mask(&cpu_online_map, va, flags) +#define flush_all(flags) flush_mask(&cpu_online_map, flags) /* Flush local TLBs */ #define flush_tlb_local() \ @@ -111,8 +111,8 @@ void flush_area_mask(cpumask_t, const vo /* Flush all CPUs' TLBs */ #define flush_tlb_all() \ - flush_tlb_mask(cpu_online_map) + flush_tlb_mask(&cpu_online_map) #define flush_tlb_one_all(v) \ - flush_tlb_one_mask(cpu_online_map, v) + flush_tlb_one_mask(&cpu_online_map, v) #endif /* __FLUSHTLB_H__ */ --- 2009-05-19.orig/xen/include/asm-x86/genapic.h 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/include/asm-x86/genapic.h 2009-05-25 08:37:09.000000000 +0200 @@ -35,7 +35,7 @@ struct genapic { void (*clustered_apic_check)(void); cpumask_t (*target_cpus)(void); unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); - void (*send_IPI_mask)(cpumask_t mask, int vector); + void (*send_IPI_mask)(const cpumask_t *mask, int vector); }; #define APICFUNC(x) .x = x @@ -52,7 +52,7 @@ void init_apic_ldr_flat(void); void clustered_apic_check_flat(void); cpumask_t target_cpus_flat(void); unsigned int cpu_mask_to_apicid_flat(cpumask_t cpumask); -void send_IPI_mask_flat(cpumask_t mask, int vector); +void send_IPI_mask_flat(const cpumask_t *mask, int vector); #define GENAPIC_FLAT \ .int_delivery_mode = dest_LowestPrio, \ .int_dest_mode = 1 /* logical delivery */, \ @@ -66,7 +66,7 @@ void init_apic_ldr_x2apic(void); void clustered_apic_check_x2apic(void); cpumask_t target_cpus_x2apic(void); unsigned int cpu_mask_to_apicid_x2apic(cpumask_t cpumask); -void send_IPI_mask_x2apic(cpumask_t mask, int vector); +void send_IPI_mask_x2apic(const cpumask_t *mask, int vector); #define GENAPIC_X2APIC \ .int_delivery_mode = dest_Fixed, \ .int_dest_mode = 0 /* physical delivery */, \ @@ -80,7 +80,7 @@ void init_apic_ldr_phys(void); void clustered_apic_check_phys(void); cpumask_t target_cpus_phys(void); unsigned int cpu_mask_to_apicid_phys(cpumask_t cpumask); -void send_IPI_mask_phys(cpumask_t mask, int vector); +void send_IPI_mask_phys(const cpumask_t *mask, int vector); #define GENAPIC_PHYS \ .int_delivery_mode = dest_Fixed, \ .int_dest_mode = 0 /* physical delivery */, \ --- 2009-05-19.orig/xen/include/xen/cpumask.h 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/include/xen/cpumask.h 2009-05-25 08:37:09.000000000 +0200 @@ -80,7 +80,6 @@ #include typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; -extern cpumask_t _unused_cpumask_arg_; #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) @@ -244,17 +243,23 @@ static inline int __cycle_cpu(int n, con return nxt; } -#define cpumask_of_cpu(cpu) \ -({ \ - typeof(_unused_cpumask_arg_) m; \ - if (sizeof(m) == sizeof(unsigned long)) { \ - m.bits[0] = 1UL<<(cpu); \ - } else { \ - cpus_clear(m); \ - cpu_set((cpu), m); \ - } \ - m; \ -}) +/* + * Special-case data structure for "single bit set only" constant CPU masks. + * + * We pre-generate all the 64 (or 32) possible bit positions, with enough + * padding to the left and the right, and return the constant pointer + * appropriately offset. + */ +extern const unsigned long + cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; + +static inline const cpumask_t *cpumask_of(unsigned int cpu) +{ + const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; + return (const cpumask_t *)(p - cpu / BITS_PER_LONG); +} + +#define cpumask_of_cpu(cpu) (*cpumask_of(cpu)) #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) --- 2009-05-19.orig/xen/include/xen/smp.h 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/include/xen/smp.h 2009-05-25 08:37:09.000000000 +0200 @@ -9,9 +9,9 @@ */ extern void smp_send_stop(void); -extern void smp_send_event_check_mask(cpumask_t mask); +extern void smp_send_event_check_mask(const cpumask_t *mask); #define smp_send_event_check_cpu(cpu) \ - smp_send_event_check_mask(cpumask_of_cpu(cpu)) + smp_send_event_check_mask(cpumask_of(cpu)) /* * Prepare machine for booting other CPUs. @@ -41,7 +41,7 @@ extern int smp_call_function( * Call a function on a selection of processors */ extern int on_selected_cpus( - cpumask_t selected, + const cpumask_t *selected, void (*func) (void *info), void *info, int retry, @@ -62,7 +62,7 @@ static inline int on_each_cpu( int retry, int wait) { - return on_selected_cpus(cpu_online_map, func, info, retry, wait); + return on_selected_cpus(&cpu_online_map, func, info, retry, wait); } #define smp_processor_id() raw_smp_processor_id() --- 2009-05-19.orig/xen/include/xen/softirq.h 2009-05-20 15:49:29.000000000 +0200 +++ 2009-05-19/xen/include/xen/softirq.h 2009-05-25 08:37:09.000000000 +0200 @@ -39,7 +39,7 @@ static inline void cpumask_raise_softirq cpu_clear(cpu, mask); } - smp_send_event_check_mask(mask); + smp_send_event_check_mask(&mask); } static inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr)