Xen project Mailing List

Re: [Xen-devel] [PATCH v2 3/3] x86/hyperv: L0 assisted TLB flush

To: Roger Pau Monné <roger.pau@xxxxxxxxxx>

From: Wei Liu <wl@xxxxxxx>

Date: Fri, 14 Feb 2020 16:03:12 +0000

Cc: Wei Liu <liuwe@xxxxxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Paul Durrant <pdurrant@xxxxxxxxxx>, Michael Kelley <mikelley@xxxxxxxxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>, Xen Development List <xen-devel@xxxxxxxxxxxxxxxxxxxx>

Delivery-date: Fri, 14 Feb 2020 16:03:28 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On Fri, Feb 14, 2020 at 03:42:17PM +0100, Roger Pau Monné wrote: [...] > > #endif /* __XEN_HYPERV_PRIVIATE_H__ */ > > diff --git a/xen/arch/x86/guest/hyperv/tlb.c > > b/xen/arch/x86/guest/hyperv/tlb.c > > index 48f527229e..f68e14f151 100644 > > --- a/xen/arch/x86/guest/hyperv/tlb.c > > +++ b/xen/arch/x86/guest/hyperv/tlb.c > > @@ -19,15 +19,185 @@ > > * Copyright (c) 2020 Microsoft. > > */ > > > > +#include <xen/cpu.h> > > #include <xen/cpumask.h> > > #include <xen/errno.h> > > > > +#include <asm/guest/hyperv.h> > > +#include <asm/guest/hyperv-hcall.h> > > +#include <asm/guest/hyperv-tlfs.h> > > + > > #include "private.h" > > > > +/* > > + * It is possible to encode up to 4096 pages using the lower 12 bits > > + * in an element of gva_list > > + */ > > +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) > > + > > +static unsigned int fill_gva_list(uint64_t *gva_list, const void *va, > > + unsigned int order) > > +{ > > + unsigned long start = (unsigned long)va; > > + unsigned long end = start + (PAGE_SIZE << order) - 1; > > + unsigned int n = 0; > > + > > + do { > > + unsigned long remain = end - start; > > + > > + gva_list[n] = start & PAGE_MASK; > > + > > + /* > > + * Use lower 12 bits to encode the number of additional pages > > + * to flush > > + */ > > + if ( remain >= HV_TLB_FLUSH_UNIT ) > > + { > > + gva_list[n] |= ~PAGE_MASK; > > + start += HV_TLB_FLUSH_UNIT; > > + } > > + else if ( remain ) > > remain is always going to be > 0, since the loop condition is end > > start, and hence this can be a plain else. Ack. > > > + { > > + gva_list[n] |= (remain - 1) >> PAGE_SHIFT; > > + start = end; > > + } > > + > > + n++; > > + } while ( start < end ); > > + > > + return n; > > +} > > + > > +static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va, > > + unsigned int flags) > > +{ > > + struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page); > > + int nr_banks; > > + unsigned int max_gvas, order = flags & FLUSH_ORDER_MASK; > > + uint64_t ret; > > + > > + if ( !flush || local_irq_is_enabled() ) > > + { > > + ASSERT_UNREACHABLE(); > > + return ~0ULL; > > + } > > + > > + if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) ) > > + return ~0ULL; > > + > > + flush->address_space = 0; > > + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; > > + if ( !(flags & FLUSH_TLB_GLOBAL) ) > > + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; > > + > > + nr_banks = cpumask_to_vpset(&flush->hv_vp_set, mask); > > + if ( nr_banks < 0 ) > > + return ~0ULL; > > It would be nice to propagate the error code from cpumask_to_vpset, > but since the function can also return HyperV error codes this doesn't > make much sense. > > > + > > + max_gvas = > > + (PAGE_SIZE - sizeof(*flush) - nr_banks * > > + sizeof(flush->hv_vp_set.bank_contents[0])) / > > + sizeof(uint64_t); /* gva is represented as uint64_t */ > > + > > + /* > > + * Flush the entire address space if va is NULL or if there is not > > + * enough space for gva_list. > > + */ > > + if ( !va || (PAGE_SIZE << order) / HV_TLB_FLUSH_UNIT > max_gvas ) > > + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0, > > + nr_banks, virt_to_maddr(flush), 0); > > You could just return hv_do_rep_hypercall(...); here, which will avoid > the else branch below and the indentation. Ack. > > > + else > > + { > > + uint64_t *gva_list = > > + (uint64_t *)flush + sizeof(*flush) / sizeof(uint64_t) + > > nr_banks; > > + unsigned int gvas = fill_gva_list(gva_list, va, order); > > + > > + BUILD_BUG_ON(sizeof(*flush) % sizeof(uint64_t)); > > + > > + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, > > + gvas, nr_banks, virt_to_maddr(flush), 0); > > + } > > + > > + return ret; > > +} > > + > > int hyperv_flush_tlb(const cpumask_t *mask, const void *va, > > unsigned int flags) > > { > > - return -EOPNOTSUPP; > > + unsigned long irq_flags; > > + struct hv_tlb_flush *flush = this_cpu(hv_input_page); > > + unsigned int max_gvas, order = flags & FLUSH_ORDER_MASK; > > + uint64_t ret; > > + > > + ASSERT(flush); > > + ASSERT(!cpumask_empty(mask)); > > I would also turn this into an if ( ... ) { ASSERT; return -EFOO; } Ack. > > > + > > + local_irq_save(irq_flags); > > + > > + flush->address_space = 0; > > + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; > > + flush->processor_mask = 0; > > + if ( !(flags & FLUSH_TLB_GLOBAL) ) > > + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; > > + > > + if ( cpumask_equal(mask, &cpu_online_map) ) > > + flush->flags |= HV_FLUSH_ALL_PROCESSORS; > > + else > > + { > > + unsigned int cpu; > > + > > + /* > > + * Normally VP indices are in ascending order and match Xen's > > + * idea of CPU ids. Check the last index to see if VP index is > > + * >= 64. If so, we can skip setting up parameters for > > + * non-applicable hypercalls without looking further. > > + */ > > + if ( hv_vp_index(cpumask_last(mask)) >= 64 ) > > + goto do_ex_hypercall; > > + > > + for_each_cpu ( cpu, mask ) > > + { > > + uint32_t vpid = hv_vp_index(cpu); > > This should be unsigned int now. Good catch. > > > + > > + if ( vpid > ms_hyperv.max_vp_index ) > > + { > > + local_irq_restore(irq_flags); > > + return -ENXIO; > > + } > > + > > + if ( vpid >= 64 ) > > + goto do_ex_hypercall; > > + > > + __set_bit(vpid, &flush->processor_mask); > > + } > > Would it make sense to abstract this as cpumask_to_processor_mask, > since you are adding cpumask_to_vpset below. > There is only one usage so far, so I don't think it is necessary. > > + } > > + > > + max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); > > You could init this at declaration, and make it const static since the > value can be calculated at compile time AFAICT. Or create a define > with it (HV_TLB_FLUSH_MAX_GVAS?). There's no need to store it on the > stack. I can introduce a define, but the name you suggested is too generic. If there is variable sized header, the calculation is going to be different. > > > + > > + /* > > + * Flush the entire address space if va is NULL or if there is not > > + * enough space for gva_list. > > + */ > > + if ( !va || (PAGE_SIZE << order) / HV_TLB_FLUSH_UNIT > max_gvas ) > > + ret = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, > > + virt_to_maddr(flush), 0); > > + else > > + { > > + unsigned int gvas = fill_gva_list(flush->gva_list, va, order); > > No need for the gvas variable, you can just call fill_gva_list at > hv_do_rep_hypercall. > Sure. > > + > > + ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, gvas, > > 0, > > + virt_to_maddr(flush), 0); > > + } > > + > > + goto done; > > + > > + do_ex_hypercall: > > + ret = flush_tlb_ex(mask, va, flags); > > + > > + done: > > + local_irq_restore(irq_flags); > > + > > + return ret & HV_HYPERCALL_RESULT_MASK ? -ENXIO : 0; > > } > > > > /* > > diff --git a/xen/arch/x86/guest/hyperv/util.c > > b/xen/arch/x86/guest/hyperv/util.c > > new file mode 100644 > > index 0000000000..e092593746 > > --- /dev/null > > +++ b/xen/arch/x86/guest/hyperv/util.c > > @@ -0,0 +1,74 @@ > > +/****************************************************************************** > > + * arch/x86/guest/hyperv/util.c > > + * > > + * Hyper-V utility functions > > + * > > + * This program is free software; you can redistribute it and/or modify > > + * it under the terms of the GNU General Public License as published by > > + * the Free Software Foundation; either version 2 of the License, or > > + * (at your option) any later version. > > + * > > + * This program is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + * GNU General Public License for more details. > > + * > > + * You should have received a copy of the GNU General Public License > > + * along with this program; If not, see <http://www.gnu.org/licenses/>. > > + * > > + * Copyright (c) 2020 Microsoft. > > + */ > > + > > +#include <xen/cpu.h> > > +#include <xen/cpumask.h> > > +#include <xen/errno.h> > > + > > +#include <asm/guest/hyperv.h> > > +#include <asm/guest/hyperv-tlfs.h> > > + > > +#include "private.h" > > + > > +int cpumask_to_vpset(struct hv_vpset *vpset, > > + const cpumask_t *mask) > > +{ > > + int nr = 1; > > + unsigned int cpu, vcpu_bank, vcpu_offset; > > + unsigned int max_banks = ms_hyperv.max_vp_index / 64; > > + > > + /* Up to 64 banks can be represented by valid_bank_mask */ > > + if ( max_banks >= 64 ) > > + return -E2BIG; > > + > > + /* Clear all banks to avoid flushing unwanted CPUs */ > > + for ( vcpu_bank = 0; vcpu_bank <= max_banks; vcpu_bank++ ) > > I think this is off by one and should be vcpu_bank < max_banks? Or > else you are clearing one extra bank. If max_vp_index were the maximum vp index, max_banks would be inclusive (for example, max_vp_index is 63, 63/64=0). The code should be correct in that case. However, when I did another round of self-review, I discovered that the value stored in max_vp_index was actually "the maximum number of virtual processors supported", so it wasn't really the maximum index. This means I will need to adjust places where max_vp_index is used. Here, max_banks is not inclusive anymore. And an earlier place + if ( vpid > ms_hyperv.max_vp_index ) + { also requires fixing. I will also see about changing max_{l,v}p_index to more sensible names or add some comments to their definitions. Wei. > > Thanks, Roger. _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.