[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v6 10/15] xen: add cache coloring allocator for domains


  • To: Carlo Nonato <carlo.nonato@xxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Thu, 1 Feb 2024 16:53:10 +0100
  • Autocrypt: addr=jbeulich@xxxxxxxx; keydata= xsDiBFk3nEQRBADAEaSw6zC/EJkiwGPXbWtPxl2xCdSoeepS07jW8UgcHNurfHvUzogEq5xk hu507c3BarVjyWCJOylMNR98Yd8VqD9UfmX0Hb8/BrA+Hl6/DB/eqGptrf4BSRwcZQM32aZK 7Pj2XbGWIUrZrd70x1eAP9QE3P79Y2oLrsCgbZJfEwCgvz9JjGmQqQkRiTVzlZVCJYcyGGsD /0tbFCzD2h20ahe8rC1gbb3K3qk+LpBtvjBu1RY9drYk0NymiGbJWZgab6t1jM7sk2vuf0Py O9Hf9XBmK0uE9IgMaiCpc32XV9oASz6UJebwkX+zF2jG5I1BfnO9g7KlotcA/v5ClMjgo6Gl MDY4HxoSRu3i1cqqSDtVlt+AOVBJBACrZcnHAUSuCXBPy0jOlBhxPqRWv6ND4c9PH1xjQ3NP nxJuMBS8rnNg22uyfAgmBKNLpLgAGVRMZGaGoJObGf72s6TeIqKJo/LtggAS9qAUiuKVnygo 3wjfkS9A3DRO+SpU7JqWdsveeIQyeyEJ/8PTowmSQLakF+3fote9ybzd880fSmFuIEJldWxp Y2ggPGpiZXVsaWNoQHN1c2UuY29tPsJgBBMRAgAgBQJZN5xEAhsDBgsJCAcDAgQVAggDBBYC AwECHgECF4AACgkQoDSui/t3IH4J+wCfQ5jHdEjCRHj23O/5ttg9r9OIruwAn3103WUITZee e7Sbg12UgcQ5lv7SzsFNBFk3nEQQCACCuTjCjFOUdi5Nm244F+78kLghRcin/awv+IrTcIWF hUpSs1Y91iQQ7KItirz5uwCPlwejSJDQJLIS+QtJHaXDXeV6NI0Uef1hP20+y8qydDiVkv6l IreXjTb7DvksRgJNvCkWtYnlS3mYvQ9NzS9PhyALWbXnH6sIJd2O9lKS1Mrfq+y0IXCP10eS FFGg+Av3IQeFatkJAyju0PPthyTqxSI4lZYuJVPknzgaeuJv/2NccrPvmeDg6Coe7ZIeQ8Yj t0ARxu2xytAkkLCel1Lz1WLmwLstV30g80nkgZf/wr+/BXJW/oIvRlonUkxv+IbBM3dX2OV8 AmRv1ySWPTP7AAMFB/9PQK/VtlNUJvg8GXj9ootzrteGfVZVVT4XBJkfwBcpC/XcPzldjv+3 HYudvpdNK3lLujXeA5fLOH+Z/G9WBc5pFVSMocI71I8bT8lIAzreg0WvkWg5V2WZsUMlnDL9 mpwIGFhlbM3gfDMs7MPMu8YQRFVdUvtSpaAs8OFfGQ0ia3LGZcjA6Ik2+xcqscEJzNH+qh8V m5jjp28yZgaqTaRbg3M/+MTbMpicpZuqF4rnB0AQD12/3BNWDR6bmh+EkYSMcEIpQmBM51qM EKYTQGybRCjpnKHGOxG0rfFY1085mBDZCH5Kx0cl0HVJuQKC+dV2ZY5AqjcKwAxpE75MLFkr wkkEGBECAAkFAlk3nEQCGwwACgkQoDSui/t3IH7nnwCfcJWUDUFKdCsBH/E5d+0ZnMQi+G0A nAuWpQkjM1ASeQwSHEeAWPgskBQL
  • Cc: andrea.bastoni@xxxxxxxxxxxxxxx, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxx>, Julien Grall <julien@xxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Bertrand Marquis <bertrand.marquis@xxxxxxx>, Michal Orzel <michal.orzel@xxxxxxx>, Volodymyr Babchuk <Volodymyr_Babchuk@xxxxxxxx>, Marco Solieri <marco.solieri@xxxxxxxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx
  • Delivery-date: Thu, 01 Feb 2024 15:53:24 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 29.01.2024 18:18, Carlo Nonato wrote:
> @@ -157,7 +158,11 @@
>  #define PGC_static 0
>  #endif
>  
> -#define PGC_preserved (PGC_extra | PGC_static)
> +#ifndef PGC_colored
> +#define PGC_colored 0
> +#endif
> +
> +#define PGC_preserved (PGC_extra | PGC_static | PGC_colored)
>  
>  #ifndef PGT_TYPE_INFO_INITIALIZER
>  #define PGT_TYPE_INFO_INITIALIZER 0
> @@ -1504,6 +1509,7 @@ static void free_heap_pages(
>              if ( !mfn_valid(page_to_mfn(predecessor)) ||
>                   !page_state_is(predecessor, free) ||
>                   (predecessor->count_info & PGC_static) ||
> +                 (predecessor->count_info & PGC_colored) ||

How about a 2nd #define (e.g. PGC_no_buddy_merge) to use here and ...

>                   (PFN_ORDER(predecessor) != order) ||
>                   (page_to_nid(predecessor) != node) )
>                  break;
> @@ -1528,6 +1534,7 @@ static void free_heap_pages(
>              if ( !mfn_valid(page_to_mfn(successor)) ||
>                   !page_state_is(successor, free) ||
>                   (successor->count_info & PGC_static) ||
> +                 (successor->count_info & PGC_colored) ||

... here? That'll then also avoid me commenting that I don't see why
these two PGC_* checks aren't folded.

> @@ -1943,6 +1950,161 @@ static unsigned long avail_heap_pages(
>      return free_pages;
>  }
>  
> +/*************************
> + * COLORED SIDE-ALLOCATOR
> + *
> + * Pages are grouped by LLC color in lists which are globally referred to as 
> the
> + * color heap. Lists are populated in end_boot_allocator().
> + * After initialization there will be N lists where N is the number of
> + * available colors on the platform.
> + */
> +static struct page_list_head *__ro_after_init _color_heap;
> +static unsigned long *__ro_after_init free_colored_pages;
> +
> +/* Memory required for buddy allocator to work with colored one */
> +#ifdef CONFIG_LLC_COLORING
> +static unsigned long __initdata buddy_alloc_size =
> +    MB(CONFIG_BUDDY_ALLOCATOR_SIZE);
> +size_param("buddy-alloc-size", buddy_alloc_size);
> +
> +#define domain_num_llc_colors(d) (d)->num_llc_colors
> +#define domain_llc_color(d, i)   (d)->llc_colors[(i)]

Nit: Unnecessary parentheses inside the square brackets.

> +#else
> +static unsigned long __initdata buddy_alloc_size;
> +
> +#define domain_num_llc_colors(d) 0
> +#define domain_llc_color(d, i)   0
> +#endif
> +
> +#define color_heap(color) (&_color_heap[color])

Wouldn't this better live next to _color_heap()'s definition?

> +static void free_color_heap_page(struct page_info *pg, bool need_scrub)
> +{
> +    unsigned int color = page_to_llc_color(pg);
> +    struct page_list_head *head = color_heap(color);
> +
> +    spin_lock(&heap_lock);
> +
> +    mark_page_free(pg, page_to_mfn(pg));
> +
> +    if ( need_scrub )
> +    {
> +        pg->count_info |= PGC_need_scrub;
> +        poison_one_page(pg);
> +    }
> +
> +    pg->count_info |= PGC_colored;

The page transiently losing PGC_colored is not an issue then (presumably
because of holding the heap lock)? Did you consider having mark_page_free()
also use PGC_preserved?

> +    free_colored_pages[color]++;
> +    page_list_add(pg, head);
> +
> +    spin_unlock(&heap_lock);
> +}
> +
> +static struct page_info *alloc_color_heap_page(unsigned int memflags,
> +                                               const struct domain *d)
> +{
> +    struct page_info *pg = NULL;
> +    unsigned int i, color = 0;
> +    unsigned long max = 0;
> +    bool need_tlbflush = false;
> +    uint32_t tlbflush_timestamp = 0;
> +    bool scrub = !(memflags & MEMF_no_scrub);
> +
> +    spin_lock(&heap_lock);
> +
> +    for ( i = 0; i < domain_num_llc_colors(d); i++ )
> +    {
> +        unsigned long free = free_colored_pages[domain_llc_color(d, i)];
> +
> +        if ( free > max )
> +        {
> +            color = domain_llc_color(d, i);
> +            pg = page_list_first(color_heap(color));
> +            max = free;
> +        }
> +    }

The apporach is likely fine at least initially, but: By going from where
the most free pages are, you're not necessarily evenly distributing a
domain's pages over the colors it may use, unless the domain uses its
set of colors exclusively.

> +    if ( !pg )
> +    {
> +        spin_unlock(&heap_lock);
> +        return NULL;
> +    }
> +
> +    pg->count_info = PGC_state_inuse | PGC_colored |
> +                     (pg->count_info & PGC_need_scrub);

Isn't PGC_colored already set on the page? Together with ...

> +    free_colored_pages[color]--;
> +    page_list_del(pg, color_heap(color));
> +
> +    if ( !(memflags & MEMF_no_tlbflush) )
> +        accumulate_tlbflush(&need_tlbflush, pg, &tlbflush_timestamp);
> +
> +    init_free_page_fields(pg);
> +
> +    spin_unlock(&heap_lock);
> +
> +    if ( test_and_clear_bit(_PGC_need_scrub, &pg->count_info) && scrub )

... this, can't the above be simplified?

> +        scrub_one_page(pg);
> +    else if ( scrub )
> +        check_one_page(pg);
> +
> +    if ( need_tlbflush )
> +        filtered_flush_tlb_mask(tlbflush_timestamp);
> +
> +    flush_page_to_ram(mfn_x(page_to_mfn(pg)),
> +                      !(memflags & MEMF_no_icache_flush));
> +
> +    return pg;
> +}
> +
> +static void __init init_color_heap_pages(struct page_info *pg,
> +                                         unsigned long nr_pages)
> +{
> +    unsigned int i;
> +    bool need_scrub = opt_bootscrub == BOOTSCRUB_IDLE;
> +
> +    if ( buddy_alloc_size )
> +    {
> +        unsigned long buddy_pages = min(PFN_DOWN(buddy_alloc_size), 
> nr_pages);
> +
> +        init_heap_pages(pg, buddy_pages);
> +        nr_pages -= buddy_pages;
> +        buddy_alloc_size -= buddy_pages << PAGE_SHIFT;
> +        pg += buddy_pages;
> +    }
> +
> +    if ( !_color_heap )
> +    {
> +        unsigned int max_nr_colors = get_max_nr_llc_colors();
> +
> +        _color_heap = xmalloc_array(struct page_list_head, max_nr_colors);
> +        free_colored_pages = xzalloc_array(unsigned long, max_nr_colors);
> +        if ( !_color_heap || !free_colored_pages )
> +            panic("Can't allocate colored heap. Buddy reserved size is too 
> low");
> +
> +        for ( i = 0; i < max_nr_colors; i++ )
> +            INIT_PAGE_LIST_HEAD(color_heap(i));
> +    }
> +
> +    if ( nr_pages )
> +        printk(XENLOG_DEBUG
> +               "Init color heap with %lu pages, start MFN: %"PRI_mfn"\n",
> +               nr_pages, mfn_x(page_to_mfn(pg)));

This message can be issued more than once. Is that really desirable /
necessary?

> @@ -2458,7 +2626,14 @@ struct page_info *alloc_domheap_pages(
>      if ( memflags & MEMF_no_owner )
>          memflags |= MEMF_no_refcount;
>  
> -    if ( !dma_bitsize )
> +    /* Only domains are supported for coloring */
> +    if ( d && llc_coloring_enabled )
> +    {
> +        /* Colored allocation must be done on 0 order */
> +        if ( order || (pg = alloc_color_heap_page(memflags, d)) == NULL )
> +            return NULL;
> +    }

I think I had asked before: What about MEMF_node() or MEMF_bits()
having been used by the caller?

Jan



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.