|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] RFC: unmap_page_range optimisation (avoiding emulation faults during VM migration)
Hi, When profiling a VM migration I noticed that on the sender side a significant (~45%) time is spent in PV Dom0 Linux taking an emulation fault in unmap_page_range [1]: The call comes from zap_pte_range (pgtable_64.h, inlined): ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); This has 2 implementations: native and a generic one where pte_clear is implemented using set_pte_at with a Xen pvop. As a proof of concept I’ve deleted the native implementation [2], which makes it fall back to the generic implementation [3]. This is not necessarily safe on SMP (it reads and clears as 2 separate steps), but shows that a paravirt op is faster than emulation. To fix this we may need to partially revert https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4cdd9c8931767e1c56a51a1078d33a8c340f4405 To test this more easily I’ve written a small test program that maps/unmaps pages from a domain in Dom0 [4]. Before: 3.26932 +- 0.00503 seconds time elapsed ( +- 0.15% ) After: 0.75642 +- 0.00202 seconds time elapsed ( +- 0.27% ) It is more than 4x faster to use the paravirt ops than trapping and emulating. From a functional point of view the above commit is correct, Xen doesn’t need a dedicated PV operation: trap and emulation works. But from a performance point of view I’d say that Xen does need it. This is a hot-path during migration, and it’d be worthwhile to optimise it. Just deleting the native implementation is probably not the solution, since we also want a value returned, and the existing PV operation is void. It probably needs a new PV operation (re)introduced with the desired semantics? Best regards, --Edwin [1]: https://cdn.jsdelivr.net/gh/edwintorok/xen@pmustack-coverletter/docs/tmp/migrate-send.svg?x=950.6&y=2197 [2]: diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 690c0307afed..ab6318bb5676 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1097,13 +1097,9 @@ extern int ptep_test_and_clear_young(struct vm_area_struct *vma, extern int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - pte_t pte = native_ptep_get_and_clear(ptep); - return pte; -} +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, + pte_t *ptep); #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, [3]: #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep) { pte_t pte = *ptep; pte_clear(mm, address, ptep); return pte; } #endif static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { set_pte_at(mm, addr, ptep, __pte(0)); } [4]: $ cat >main.c <<EOF #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <sys/mman.h> #include <xenctrl.h> #include <xenforeignmemory.h> int main(int argc, char *argv[]) { if (argc != 3) { fprintf(stderr, "Usage: %s <domid> <batchsize>\n", argv[0]); return 1; } uint32_t domid = atoi(argv[1]); uint32_t batch_size = atoi(argv[2]); if (!domid || !batch_size) { fprintf(stderr, "Invalid arguments, expected 2 integers"); return 2; } xenforeignmemory_handle *handle = xenforeignmemory_open(NULL, 0); if (!handle) return 3; xc_interface *xc = xc_interface_open(NULL, NULL, 0); if (!xc) return 4; xen_pfn_t nr_pfns; if (xc_domain_nr_gpfns(xc, domid, &nr_pfns) < 0) return 5; unsigned mappings_size = nr_pfns / batch_size + 1; void **mappings = calloc(mappings_size, sizeof(void *)); if (!mappings) { perror("calloc"); return 6; } for (xen_pfn_t i = 0; i < nr_pfns; i += batch_size) { xen_pfn_t arr[batch_size]; int err[batch_size]; for (unsigned j = 0; j < batch_size; j++) arr[j] = i + j; unsigned long idx = i / batch_size; assert(idx < mappings_size); mappings[idx] = xenforeignmemory_map( handle, domid, PROT_READ, sizeof(arr) / sizeof(arr[0]), arr, err); if (!mappings[idx]) return 7; } for (xen_pfn_t i = 0; i < nr_pfns; i += batch_size) { unsigned long idx = i / batch_size; assert(idx < mappings_size); if (xenforeignmemory_unmap(handle, mappings[idx], batch_size)) return 8; } free(mappings); xc_interface_close(xc); xenforeignmemory_close(handle); return EXIT_SUCCESS; } EOF $ cat >meson.build <<EOF project('xfm_scale', 'c', default_options:['c_std=gnu11']) xfm = dependency('xenforeignmemory') xc = dependency('xencontrol’) executable('xfm_scale', 'main.c', dependencies: [xfm, xc]) EOF
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |