[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 6/9] xen/common: add cache coloring allocator for domains


  • To: Carlo Nonato <carlo.nonato@xxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Thu, 10 Nov 2022 17:47:06 +0100
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=5lKcOLx4akakoXrqY8U5a0xwc8cdQmoFqA3mntJ4N3g=; b=RIiyK+KMip/qCDYyKm+89qzPQlgBXg/8KZ8Uug43rba5mXUArLsKxng3355BtNiqoarHeJ+ASZ7iEuVt5R6H8i+GEE/Op22fkyC7u6+jC1DMyqdVhqtD92iHhi01rNxcuZZ7vOcKqvAFWeUZSZjSYd6Wrpjt7qyeXXfu3DtREEX4elIhsAVlDnRfIV62NuQPjZvf4fkX4FxKgNpVorwm+SvczRLVL5OwTGF5kSAGvb8wFL2227HvbhAnFlxye7tqzABUN6uMIxx+8d/fVPIhzaeS9s5ktafLcOBVIjSwRHIrk7/sYKaFRbDPgH7ajHSsgI0bLihKhmgBj06nHVGoYw==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=YXBFYXKPP83Bvnc6R/izr2x3zbUqKPju6Hdr1AYFV5koMkbGBWrOjuEKMCs3WJnNzhTw+K/JBKB8wKcWYpyrXjwIEyAC0aCk1CGh2rda7ymQzik3/wdbR6Pbk84ygbNN7WQDcAIA03r2t8yuXpKIULQI9kljRubhoU0LL2emIXhDysUwm7bA9zVWjUD9pxVuV2DZDenRmQhQ+EqISU6WbjBH1quVOJJTKRtYWqeiIZU0nsNnGrZXyvn6GPWqQ2bKQt2xuPYNWL8l5AmahazRhh/l31QWfXO9yV+Gh4YxQeefAeCH0dtUhup/WiTajOkH+gb23GMZqOTRE5+DrVuEyw==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: marco.solieri@xxxxxxxxxx, andrea.bastoni@xxxxxxxxxxxxxxx, lucmiccio@xxxxxxxxx, Stefano Stabellini <sstabellini@xxxxxxxxxx>, Julien Grall <julien@xxxxxxx>, Bertrand Marquis <bertrand.marquis@xxxxxxx>, Volodymyr Babchuk <Volodymyr_Babchuk@xxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Marco Solieri <marco.solieri@xxxxxxxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx
  • Delivery-date: Thu, 10 Nov 2022 16:47:20 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 22.10.2022 17:51, Carlo Nonato wrote:
> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -661,7 +661,12 @@ static int p2m_create_table(struct p2m_domain *p2m, 
> lpae_t *entry)
>  
>      ASSERT(!p2m_is_valid(*entry));
>  
> -    page = alloc_domheap_page(NULL, 0);
> +    /* If cache coloring is enabled, p2m tables are allocated using the 
> domain
> +     * coloring configuration to prevent cache interference. */
> +    if ( IS_ENABLED(CONFIG_CACHE_COLORING) )
> +        page = alloc_domheap_page(p2m->domain, MEMF_no_refcount);

Are you sure you don't mean MEMF_no_owner (which implies MEMF_no_refcount)
here? And then ...

> +    else
> +        page = alloc_domheap_page(NULL, 0);

... is it really necessary to keep the two cases separate?

Also nit: Comment style.

> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -150,6 +150,9 @@
>  #define p2m_pod_offline_or_broken_hit(pg) 0
>  #define p2m_pod_offline_or_broken_replace(pg) BUG_ON(pg != NULL)
>  #endif
> +#ifdef CONFIG_HAS_CACHE_COLORING
> +#include <asm/coloring.h>
> +#endif
>  
>  #ifndef PGC_static
>  #define PGC_static 0
> @@ -231,6 +234,14 @@ static bool __read_mostly scrub_debug;
>  #define scrub_debug    false
>  #endif
>  
> +/* Memory required for buddy allocator to work with colored one */
> +#ifdef CONFIG_BUDDY_ALLOCATOR_SIZE
> +static unsigned long __initdata buddy_alloc_size =
> +    CONFIG_BUDDY_ALLOCATOR_SIZE << 20;
> +#else
> +    static unsigned long __initdata buddy_alloc_size = 0;

Nit: Bogus indentation. I wonder anyway whether if wouldn't better
be

static unsigned long __initdata buddy_alloc_size =
#ifdef CONFIG_BUDDY_ALLOCATOR_SIZE
    CONFIG_BUDDY_ALLOCATOR_SIZE << 20;
#else
    0;
#endif

or

static unsigned long __initdata buddy_alloc_size
#ifdef CONFIG_BUDDY_ALLOCATOR_SIZE
    = CONFIG_BUDDY_ALLOCATOR_SIZE << 20
#endif
    ;

> +static void free_color_heap_page(struct page_info *pg)
> +{
> +    struct page_info *pos;
> +    unsigned int color = page_to_color(pg);
> +    colored_pages_t *head = color_heap(color);
> +
> +    spin_lock(&heap_lock);
> +
> +    pg->count_info = PGC_state_free | PGC_colored;
> +    page_set_owner(pg, NULL);
> +    free_colored_pages[color]++;
> +
> +    page_list_for_each( pos, head )
> +    {
> +        if ( page_to_maddr(pos) < page_to_maddr(pg) )
> +            break;
> +    }

I continue to view such loops as problematic. With them in place I don't
think this feature can move to being (security) supported, so I think this
and similar places want annotating with a FIXME or alike comment.

> +    page_list_add_next(pg, pos, head);
> 
> +    spin_unlock(&heap_lock);
> +}
> +
> +static struct page_info *alloc_color_heap_page(unsigned int memflags,
> +                                               const unsigned int *colors,
> +                                               unsigned int num_colors)
> +{
> +    struct page_info *pg = NULL;
> +    unsigned int i, color;
> +    bool need_tlbflush = false;
> +    uint32_t tlbflush_timestamp = 0;
> +
> +    spin_lock(&heap_lock);
> +
> +    for ( i = 0; i < num_colors; i++ )
> +    {
> +        struct page_info *tmp;
> +
> +        if ( page_list_empty(color_heap(colors[i])) )
> +            continue;
> +
> +        tmp = page_list_first(color_heap(colors[i]));
> +        if ( !pg || page_to_maddr(tmp) > page_to_maddr(pg) )
> +            pg = tmp;
> +    }
> +
> +    if ( !pg )
> +    {
> +        spin_unlock(&heap_lock);
> +        return NULL;
> +    }
> +
> +    pg->count_info = PGC_state_inuse | PGC_colored;
> +
> +    if ( !(memflags & MEMF_no_tlbflush) )
> +        accumulate_tlbflush(&need_tlbflush, pg, &tlbflush_timestamp);
> +
> +    init_free_page_fields(pg);
> +    flush_page_to_ram(mfn_x(page_to_mfn(pg)),
> +                      !(memflags & MEMF_no_icache_flush));
> +
> +    color = page_to_color(pg);

You don't really need to retrieve the color here, do you? You could as
well latch it in the loop above.

> +static void dump_color_heap(void)
> +{
> +    unsigned int color;
> +
> +    printk("Dumping coloring heap info\n");
> +    for ( color = 0; color < get_max_colors(); color++ )
> +        printk("Color heap[%u]: %lu pages\n", color, 
> free_colored_pages[color]);
> +}
> +
> +integer_param("buddy-alloc-size", buddy_alloc_size);

This would preferably live next to the variable it controls, e.g. (taking
the earlier comment into account)

static unsigned long __initdata buddy_alloc_size =
#ifdef CONFIG_CACHE_COLORING
    CONFIG_BUDDY_ALLOCATOR_SIZE << 20;
integer_param("buddy-alloc-size", buddy_alloc_size);
#else
    0;
#endif

(Assuming buddy_alloc_size is indeed used anywhere outside any #ifdef
CONFIG_CACHE_COLORING in the first place.)

> @@ -1926,24 +2106,49 @@ static unsigned long avail_heap_pages(
>  void __init end_boot_allocator(void)
>  {
>      unsigned int i;
> +    unsigned long buddy_pages;
>  
> -    /* Pages that are free now go to the domain sub-allocator. */
> -    for ( i = 0; i < nr_bootmem_regions; i++ )
> +    buddy_pages = PFN_DOWN(buddy_alloc_size);

Any reason this can't be the initializer of the variable?

> +    if ( !IS_ENABLED(CONFIG_CACHE_COLORING) )
>      {
> -        struct bootmem_region *r = &bootmem_region_list[i];
> -        if ( (r->s < r->e) &&
> -             (phys_to_nid(pfn_to_paddr(r->s)) == cpu_to_node(0)) )
> +        /* Pages that are free now go to the domain sub-allocator. */
> +        for ( i = 0; i < nr_bootmem_regions; i++ )
>          {
> -            init_heap_pages(mfn_to_page(_mfn(r->s)), r->e - r->s);
> -            r->e = r->s;
> -            break;
> +            struct bootmem_region *r = &bootmem_region_list[i];
> +            if ( (r->s < r->e) &&

Even if you're only re-indenting the original code (which personally I'd
prefer if it was avoided), please add the missing blank line between
declaration and statement here.

> +                (phys_to_nid(pfn_to_paddr(r->s)) == cpu_to_node(0)) )
> +            {
> +                init_heap_pages(mfn_to_page(_mfn(r->s)), r->e - r->s);
> +                r->e = r->s;
> +                break;
> +            }
>          }
>      }
> +
>      for ( i = nr_bootmem_regions; i-- > 0; )
>      {
> -        struct bootmem_region *r = &bootmem_region_list[i];
> +        struct bootmem_region *r;
> +
> +        if ( IS_ENABLED(CONFIG_CACHE_COLORING) )
> +            r = &bootmem_region_list[nr_bootmem_regions - i - 1];

If you want to handle things low-to-high, why don't you alter the
earlier loop rather than skipping (and re-indenting) it? However,
considering that in alloc_color_heap_page() you prefer pages at
higher addresses, I continue to find it odd that here you want to
process low address pages first.

> +        else
> +            r = &bootmem_region_list[i];
> +
> +        if ( buddy_pages && (r->s < r->e) )
> +        {
> +            unsigned long pages = MIN(r->e - r->s, buddy_pages);
> +            init_heap_pages(mfn_to_page(_mfn(r->s)), pages);

Nit: Blank line between declaration(s) and statement(s) please. Also:
Any reason the type-safe min() cannot be used here?

> --- a/xen/include/xen/mm.h
> +++ b/xen/include/xen/mm.h
> @@ -297,6 +297,37 @@ page_list_add_tail(struct page_info *page, struct 
> page_list_head *head)
>      }
>      head->tail = page;
>  }
> +static inline void
> +_page_list_add(struct page_info *new, struct page_info *prev,
> +               struct page_info *next)
> +{
> +    new->list.prev = page_to_pdx(prev);
> +     new->list.next = page_to_pdx(next);
> +     prev->list.next = page_to_pdx(new);
> +     next->list.prev = page_to_pdx(new);

Nit: Several hard tabs here, and ...

> +}
> +static inline void
> +page_list_add_next(struct page_info *new, struct page_info *prev,
> +                   struct page_list_head *head)
> +{
> +     struct page_info *next = page_list_next(prev, head);

... one more here (and at least one more further down).

Afaict you're passing a NULL "pos" in here from free_color_heap_page()
if the list was previously empty and page lists aren't simply "normal"
(xen/list.h) lists. I don't consider it valid to call page_list_next()
with a NULL first argument, even if it looks as if this would work
right now as long as the list is empty (but I think we'd see a NULL
prev here also if all other pages looked at by free_color_heap_page()
are at lower addresses). So perhaps ...

> +    if ( !next )
> +        page_list_add_tail(new, head);
> +    else
> +        _page_list_add(new, prev, next);

    if ( !prev )
        page_list_add_tail(new, head);
    else
        _page_list_add(new, prev, page_list_next(prev, head));

?

> +}
> +static inline void
> +page_list_add_prev(struct page_info *new, struct page_info *next,
> +                   struct page_list_head *head)
> +{
> +     struct page_info *prev = page_list_prev(next, head);
> +
> +    if ( !prev )
> +        page_list_add(new, head);
> +    else
> +        _page_list_add(new, prev, next);
> +}

This function looks to not be used anywhere.

Jan



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.