[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v2] x86/hvm: Remove callback from paging->flush_tlb() hook
TLB flushing is a hotpath, and function pointer calls are expensive (especially under repoline) for what amounts to an identity transform on the data. Just pass the vcpu_bitmap bitmap directly. As we use NULL for all rather than none, introduce a flush_vcpu() helper to avoid the risk of logical errors from opencoding the expression. This also means the viridian callers can avoid writing an all-ones bitmap for the flushing logic to consume. No functional change. Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx> --- CC: Jan Beulich <JBeulich@xxxxxxxx> CC: Roger Pau Monné <roger.pau@xxxxxxxxxx> CC: Wei Liu <wl@xxxxxxx> CC: Paul Durrant <paul@xxxxxxx> v2: * Also short-circuit the hvcall_flush_ex() HV_GENERIC_SET_ALL path. The result even compiles smaller: $ ../scripts/bloat-o-meter xen-syms-before xen-syms-after add/remove: 0/2 grow/shrink: 0/3 up/down: 0/-125 (-125) Function old new delta flush_tlb 207 205 -2 always_flush 6 - -6 need_flush 9 - -9 do_hvm_op 2434 2418 -16 shadow_flush_tlb 628 536 -92 but this is very much not the point of the patch. It would be rather more efficient in this case and probably others for for_each_vcpu() to iterate over d->vcpu[] than follow the v->next_vcpu pointer chain (better locality of access), and also because then the vCPU id is the loop induction variable, not a value read from memory. --- xen/arch/x86/hvm/hvm.c | 7 +------ xen/arch/x86/hvm/viridian/viridian.c | 34 +++++++++++----------------------- xen/arch/x86/mm/hap/hap.c | 11 ++++++++--- xen/arch/x86/mm/shadow/common.c | 18 +++++++++++------- xen/arch/x86/mm/shadow/private.h | 3 +-- xen/include/asm-x86/paging.h | 11 ++++------- 6 files changed, 36 insertions(+), 48 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index eee365711d63..31e9474db093 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -4047,17 +4047,12 @@ static void hvm_s3_resume(struct domain *d) } } -static bool always_flush(void *ctxt, struct vcpu *v) -{ - return true; -} - static int hvmop_flush_tlb_all(void) { if ( !is_hvm_domain(current->domain) ) return -EINVAL; - return paging_flush_tlb(always_flush, NULL) ? 0 : -ERESTART; + return paging_flush_tlb(NULL) ? 0 : -ERESTART; } static int hvmop_set_evtchn_upcall_vector( diff --git a/xen/arch/x86/hvm/viridian/viridian.c b/xen/arch/x86/hvm/viridian/viridian.c index b906f7b86a74..51d50e194e6d 100644 --- a/xen/arch/x86/hvm/viridian/viridian.c +++ b/xen/arch/x86/hvm/viridian/viridian.c @@ -574,13 +574,6 @@ static void vpmask_fill(struct hypercall_vpmask *vpmask) bitmap_fill(vpmask->mask, HVM_MAX_VCPUS); } -static bool vpmask_test(const struct hypercall_vpmask *vpmask, - unsigned int vp) -{ - ASSERT(vp < HVM_MAX_VCPUS); - return test_bit(vp, vpmask->mask); -} - static unsigned int vpmask_first(const struct hypercall_vpmask *vpmask) { return find_first_bit(vpmask->mask, HVM_MAX_VCPUS); @@ -669,17 +662,6 @@ static uint16_t hv_vpset_to_vpmask(const struct hv_vpset *set, #undef NR_VPS_PER_BANK } -/* - * Windows should not issue the hypercalls requiring this callback in the - * case where vcpu_id would exceed the size of the mask. - */ -static bool need_flush(void *ctxt, struct vcpu *v) -{ - struct hypercall_vpmask *vpmask = ctxt; - - return vpmask_test(vpmask, v->vcpu_id); -} - union hypercall_input { uint64_t raw; struct { @@ -714,6 +696,7 @@ static int hvcall_flush(const union hypercall_input *input, uint64_t flags; uint64_t vcpu_mask; } input_params; + unsigned long *vcpu_bitmap; /* These hypercalls should never use the fast-call convention. */ if ( input->fast ) @@ -730,18 +713,19 @@ static int hvcall_flush(const union hypercall_input *input, * so err on the safe side. */ if ( input_params.flags & HV_FLUSH_ALL_PROCESSORS ) - vpmask_fill(vpmask); + vcpu_bitmap = NULL; else { vpmask_empty(vpmask); vpmask_set(vpmask, 0, input_params.vcpu_mask); + vcpu_bitmap = vpmask->mask; } /* * A false return means that another vcpu is currently trying * a similar operation, so back off. */ - if ( !paging_flush_tlb(need_flush, vpmask) ) + if ( !paging_flush_tlb(vcpu_bitmap) ) return -ERESTART; output->rep_complete = input->rep_count; @@ -760,6 +744,7 @@ static int hvcall_flush_ex(const union hypercall_input *input, uint64_t flags; struct hv_vpset set; } input_params; + unsigned long *vcpu_bitmap; /* These hypercalls should never use the fast-call convention. */ if ( input->fast ) @@ -770,8 +755,9 @@ static int hvcall_flush_ex(const union hypercall_input *input, sizeof(input_params)) != HVMTRANS_okay ) return -EINVAL; - if ( input_params.flags & HV_FLUSH_ALL_PROCESSORS ) - vpmask_fill(vpmask); + if ( input_params.flags & HV_FLUSH_ALL_PROCESSORS || + input_params.set.format == HV_GENERIC_SET_ALL ) + vcpu_bitmap = NULL; else { union hypercall_vpset *vpset = &this_cpu(hypercall_vpset); @@ -807,13 +793,15 @@ static int hvcall_flush_ex(const union hypercall_input *input, rc = hv_vpset_to_vpmask(set, vpmask); if ( rc ) return rc; + + vcpu_bitmap = vpmask->mask; } /* * A false return means that another vcpu is currently trying * a similar operation, so back off. */ - if ( !paging_flush_tlb(need_flush, vpmask) ) + if ( !paging_flush_tlb(vcpu_bitmap) ) return -ERESTART; output->rep_complete = input->rep_count; diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c index 5b269ef8b3bb..de4b13565ab4 100644 --- a/xen/arch/x86/mm/hap/hap.c +++ b/xen/arch/x86/mm/hap/hap.c @@ -696,8 +696,13 @@ static void hap_update_cr3(struct vcpu *v, int do_locking, bool noflush) hvm_update_guest_cr3(v, noflush); } -static bool flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v), - void *ctxt) +static bool flush_vcpu(const struct vcpu *v, const unsigned long *vcpu_bitmap) +{ + return !vcpu_bitmap || test_bit(v->vcpu_id, vcpu_bitmap); +} + +/* Flush TLB of selected vCPUs. NULL for all. */ +static bool flush_tlb(const unsigned long *vcpu_bitmap) { static DEFINE_PER_CPU(cpumask_t, flush_cpumask); cpumask_t *mask = &this_cpu(flush_cpumask); @@ -712,7 +717,7 @@ static bool flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v), { unsigned int cpu; - if ( !flush_vcpu(ctxt, v) ) + if ( !flush_vcpu(v, vcpu_bitmap) ) continue; hvm_asid_flush_vcpu(v); diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c index 8c1b041f7135..de09ef5cae58 100644 --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -3069,9 +3069,13 @@ static void sh_clean_dirty_bitmap(struct domain *d) } -/* Fluhs TLB of selected vCPUs. */ -bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v), - void *ctxt) +static bool flush_vcpu(const struct vcpu *v, const unsigned long *vcpu_bitmap) +{ + return !vcpu_bitmap || test_bit(v->vcpu_id, vcpu_bitmap); +} + +/* Flush TLB of selected vCPUs. NULL for all. */ +bool shadow_flush_tlb(const unsigned long *vcpu_bitmap) { static DEFINE_PER_CPU(cpumask_t, flush_cpumask); cpumask_t *mask = &this_cpu(flush_cpumask); @@ -3084,12 +3088,12 @@ bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v), /* Pause all other vcpus. */ for_each_vcpu ( d, v ) - if ( v != current && flush_vcpu(ctxt, v) ) + if ( v != current && flush_vcpu(v, vcpu_bitmap) ) vcpu_pause_nosync(v); /* Now that all VCPUs are signalled to deschedule, we wait... */ for_each_vcpu ( d, v ) - if ( v != current && flush_vcpu(ctxt, v) ) + if ( v != current && flush_vcpu(v, vcpu_bitmap) ) while ( !vcpu_runnable(v) && v->is_running ) cpu_relax(); @@ -3103,7 +3107,7 @@ bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v), { unsigned int cpu; - if ( !flush_vcpu(ctxt, v) ) + if ( !flush_vcpu(v, vcpu_bitmap) ) continue; paging_update_cr3(v, false); @@ -3118,7 +3122,7 @@ bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v), /* Done. */ for_each_vcpu ( d, v ) - if ( v != current && flush_vcpu(ctxt, v) ) + if ( v != current && flush_vcpu(v, vcpu_bitmap) ) vcpu_unpause(v); return true; diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h index 35efb1b984fb..e4db8d32546a 100644 --- a/xen/arch/x86/mm/shadow/private.h +++ b/xen/arch/x86/mm/shadow/private.h @@ -922,8 +922,7 @@ static inline int sh_check_page_has_no_refs(struct page_info *page) } /* Flush the TLB of the selected vCPUs. */ -bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v), - void *ctxt); +bool shadow_flush_tlb(const unsigned long *vcpu_bitmap); #endif /* _XEN_SHADOW_PRIVATE_H */ diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h index 996c2cd0383f..308f1115dde9 100644 --- a/xen/include/asm-x86/paging.h +++ b/xen/include/asm-x86/paging.h @@ -141,9 +141,7 @@ struct paging_mode { void (*update_cr3 )(struct vcpu *v, int do_locking, bool noflush); void (*update_paging_modes )(struct vcpu *v); - bool (*flush_tlb )(bool (*flush_vcpu)(void *ctxt, - struct vcpu *v), - void *ctxt); + bool (*flush_tlb )(const unsigned long *vcpu_bitmap); unsigned int guest_levels; @@ -417,11 +415,10 @@ static always_inline unsigned int paging_max_paddr_bits(const struct domain *d) return bits; } -static inline bool paging_flush_tlb(bool (*flush_vcpu)(void *ctxt, - struct vcpu *v), - void *ctxt) +/* Flush selected vCPUs TLBs. NULL for all. */ +static inline bool paging_flush_tlb(const unsigned long *vcpu_bitmap) { - return paging_get_hostmode(current)->flush_tlb(flush_vcpu, ctxt); + return paging_get_hostmode(current)->flush_tlb(vcpu_bitmap); } #endif /* XEN_PAGING_H */ -- 2.11.0
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |