|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v2 02/70] xen/sort: Switch to an extern inline implementation
Hi Andrew,
> On 14 Feb 2022, at 12:50, Andrew Cooper <andrew.cooper3@xxxxxxxxxx> wrote:
>
> There are exactly 3 callers of sort() in the hypervisor. Callbacks in a tight
> loop like this are problematic for performance, especially with Spectre v2
> protections, which is why extern inline is used commonly by libraries.
>
> Both ARM callers pass in NULL for the swap function, and while this might seem
> like an attractive option at first, it causes generic_swap() to be used, which
> forced a byte-wise copy. Provide real swap functions so the compiler can
> optimise properly, which is very important for ARM downstreams where
> milliseconds until the system is up matters.
>
> No functional change.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
Just one comment fix after, with it fixed for the arm part:
Reviewed-by: Bertrand Marquis <bertrand.marquis@xxxxxxx>
> ---
> CC: Jan Beulich <JBeulich@xxxxxxxx>
> CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> CC: Wei Liu <wl@xxxxxxx>
> CC: Stefano Stabellini <sstabellini@xxxxxxxxxx>
> CC: Julien Grall <julien@xxxxxxx>
> CC: Volodymyr Babchuk <Volodymyr_Babchuk@xxxxxxxx>
> CC: Bertrand Marquis <bertrand.marquis@xxxxxxx>
>
> v2:
> * Adjust commit message
> ---
> xen/arch/arm/bootfdt.c | 9 +++++-
> xen/arch/arm/io.c | 9 +++++-
> xen/include/xen/sort.h | 55 +++++++++++++++++++++++++++++++++-
> xen/lib/sort.c | 80 ++------------------------------------------------
> 4 files changed, 72 insertions(+), 81 deletions(-)
>
> diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c
> index afaa0e249b71..e318ef960386 100644
> --- a/xen/arch/arm/bootfdt.c
> +++ b/xen/arch/arm/bootfdt.c
> @@ -448,6 +448,13 @@ static int __init cmp_memory_node(const void *key, const
> void *elem)
> return 0;
> }
>
> +static void __init swap_memory_node(void *_a, void *_b, size_t size)
> +{
> + struct membank *a = _a, *b = _b;
> +
> + SWAP(*a, *b);
> +}
> +
> /**
> * boot_fdt_info - initialize bootinfo from a DTB
> * @fdt: flattened device tree binary
> @@ -472,7 +479,7 @@ size_t __init boot_fdt_info(const void *fdt, paddr_t
> paddr)
> * the banks sorted in ascending order. So sort them through.
> */
> sort(bootinfo.mem.bank, bootinfo.mem.nr_banks, sizeof(struct membank),
> - cmp_memory_node, NULL);
> + cmp_memory_node, swap_memory_node);
>
> early_print_info();
>
> diff --git a/xen/arch/arm/io.c b/xen/arch/arm/io.c
> index 729287e37c59..1a066f9ae502 100644
> --- a/xen/arch/arm/io.c
> +++ b/xen/arch/arm/io.c
> @@ -80,6 +80,13 @@ static int cmp_mmio_handler(const void *key, const void
> *elem)
> return 0;
> }
>
> +static void swap_mmio_handler(void *_a, void *_b, size_t size)
> +{
> + struct mmio_handler *a = _a, *b = _b;
> +
> + SWAP(*a, *b);
> +}
> +
> static const struct mmio_handler *find_mmio_handler(struct domain *d,
> paddr_t gpa)
> {
> @@ -170,7 +177,7 @@ void register_mmio_handler(struct domain *d,
>
> /* Sort mmio handlers in ascending order based on base address */
> sort(vmmio->handlers, vmmio->num_entries, sizeof(struct mmio_handler),
> - cmp_mmio_handler, NULL);
> + cmp_mmio_handler, swap_mmio_handler);
>
> write_unlock(&vmmio->lock);
> }
> diff --git a/xen/include/xen/sort.h b/xen/include/xen/sort.h
> index a403652948e7..01479ea44606 100644
> --- a/xen/include/xen/sort.h
> +++ b/xen/include/xen/sort.h
> @@ -3,8 +3,61 @@
>
> #include <xen/types.h>
>
> +/*
> + * sort - sort an array of elements
> + * @base: pointer to data to sort
> + * @num: number of elements
> + * @size: size of each element
> + * @cmp: pointer to comparison function
> + * @swap: pointer to swap function or NULL
The function is not accepting anymore to have NULL as parameter.
The comment should be fixed here.
Bertrand
> + *
> + * This function does a heapsort on the given array. You may provide a
> + * swap function optimized to your element type.
> + *
> + * Sorting time is O(n log n) both on average and worst-case. While
> + * qsort is about 20% faster on average, it suffers from exploitable
> + * O(n*n) worst-case behavior and extra memory requirements that make
> + * it less suitable for kernel use.
> + */
> +#ifndef SORT_IMPLEMENTATION
> +extern gnu_inline
> +#endif
> void sort(void *base, size_t num, size_t size,
> int (*cmp)(const void *, const void *),
> - void (*swap)(void *, void *, size_t));
> + void (*swap)(void *, void *, size_t))
> +{
> + /* pre-scale counters for performance */
> + size_t i = (num / 2) * size, n = num * size, c, r;
> +
> + /* heapify */
> + while ( i > 0 )
> + {
> + for ( r = i -= size; r * 2 + size < n; r = c )
> + {
> + c = r * 2 + size;
> + if ( (c < n - size) && (cmp(base + c, base + c + size) < 0) )
> + c += size;
> + if ( cmp(base + r, base + c) >= 0 )
> + break;
> + swap(base + r, base + c, size);
> + }
> + }
> +
> + /* sort */
> + for ( i = n; i > 0; )
> + {
> + i -= size;
> + swap(base, base + i, size);
> + for ( r = 0; r * 2 + size < i; r = c )
> + {
> + c = r * 2 + size;
> + if ( (c < i - size) && (cmp(base + c, base + c + size) < 0) )
> + c += size;
> + if ( cmp(base + r, base + c) >= 0 )
> + break;
> + swap(base + r, base + c, size);
> + }
> + }
> +}
>
> #endif /* __XEN_SORT_H__ */
> diff --git a/xen/lib/sort.c b/xen/lib/sort.c
> index 35ce0d7abdec..b7e78cc0e8d2 100644
> --- a/xen/lib/sort.c
> +++ b/xen/lib/sort.c
> @@ -4,81 +4,5 @@
> * Jan 23 2005 Matt Mackall <mpm@xxxxxxxxxxx>
> */
>
> -#include <xen/types.h>
> -
> -static void u32_swap(void *a, void *b, size_t size)
> -{
> - uint32_t t = *(uint32_t *)a;
> -
> - *(uint32_t *)a = *(uint32_t *)b;
> - *(uint32_t *)b = t;
> -}
> -
> -static void generic_swap(void *a, void *b, size_t size)
> -{
> - char t;
> -
> - do {
> - t = *(char *)a;
> - *(char *)a++ = *(char *)b;
> - *(char *)b++ = t;
> - } while ( --size > 0 );
> -}
> -
> -/*
> - * sort - sort an array of elements
> - * @base: pointer to data to sort
> - * @num: number of elements
> - * @size: size of each element
> - * @cmp: pointer to comparison function
> - * @swap: pointer to swap function or NULL
> - *
> - * This function does a heapsort on the given array. You may provide a
> - * swap function optimized to your element type.
> - *
> - * Sorting time is O(n log n) both on average and worst-case. While
> - * qsort is about 20% faster on average, it suffers from exploitable
> - * O(n*n) worst-case behavior and extra memory requirements that make
> - * it less suitable for kernel use.
> - */
> -
> -void sort(void *base, size_t num, size_t size,
> - int (*cmp)(const void *, const void *),
> - void (*swap)(void *, void *, size_t size))
> -{
> - /* pre-scale counters for performance */
> - size_t i = (num / 2) * size, n = num * size, c, r;
> -
> - if ( !swap )
> - swap = (size == 4 ? u32_swap : generic_swap);
> -
> - /* heapify */
> - while ( i > 0 )
> - {
> - for ( r = i -= size; r * 2 + size < n; r = c )
> - {
> - c = r * 2 + size;
> - if ( (c < n - size) && (cmp(base + c, base + c + size) < 0) )
> - c += size;
> - if ( cmp(base + r, base + c) >= 0 )
> - break;
> - swap(base + r, base + c, size);
> - }
> - }
> -
> - /* sort */
> - for ( i = n; i > 0; )
> - {
> - i -= size;
> - swap(base, base + i, size);
> - for ( r = 0; r * 2 + size < i; r = c )
> - {
> - c = r * 2 + size;
> - if ( (c < i - size) && (cmp(base + c, base + c + size) < 0) )
> - c += size;
> - if ( cmp(base + r, base + c) >= 0 )
> - break;
> - swap(base + r, base + c, size);
> - }
> - }
> -}
> +#define SORT_IMPLEMENTATION
> +#include <xen/sort.h>
> --
> 2.11.0
>
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |