[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 9/9] xen/ppc: mm-radix: Allocate all paging structures at runtime


  • To: Shawn Anastasio <sanastasio@xxxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Mon, 25 Mar 2024 16:39:09 +0100
  • Autocrypt: addr=jbeulich@xxxxxxxx; keydata= xsDiBFk3nEQRBADAEaSw6zC/EJkiwGPXbWtPxl2xCdSoeepS07jW8UgcHNurfHvUzogEq5xk hu507c3BarVjyWCJOylMNR98Yd8VqD9UfmX0Hb8/BrA+Hl6/DB/eqGptrf4BSRwcZQM32aZK 7Pj2XbGWIUrZrd70x1eAP9QE3P79Y2oLrsCgbZJfEwCgvz9JjGmQqQkRiTVzlZVCJYcyGGsD /0tbFCzD2h20ahe8rC1gbb3K3qk+LpBtvjBu1RY9drYk0NymiGbJWZgab6t1jM7sk2vuf0Py O9Hf9XBmK0uE9IgMaiCpc32XV9oASz6UJebwkX+zF2jG5I1BfnO9g7KlotcA/v5ClMjgo6Gl MDY4HxoSRu3i1cqqSDtVlt+AOVBJBACrZcnHAUSuCXBPy0jOlBhxPqRWv6ND4c9PH1xjQ3NP nxJuMBS8rnNg22uyfAgmBKNLpLgAGVRMZGaGoJObGf72s6TeIqKJo/LtggAS9qAUiuKVnygo 3wjfkS9A3DRO+SpU7JqWdsveeIQyeyEJ/8PTowmSQLakF+3fote9ybzd880fSmFuIEJldWxp Y2ggPGpiZXVsaWNoQHN1c2UuY29tPsJgBBMRAgAgBQJZN5xEAhsDBgsJCAcDAgQVAggDBBYC AwECHgECF4AACgkQoDSui/t3IH4J+wCfQ5jHdEjCRHj23O/5ttg9r9OIruwAn3103WUITZee e7Sbg12UgcQ5lv7SzsFNBFk3nEQQCACCuTjCjFOUdi5Nm244F+78kLghRcin/awv+IrTcIWF hUpSs1Y91iQQ7KItirz5uwCPlwejSJDQJLIS+QtJHaXDXeV6NI0Uef1hP20+y8qydDiVkv6l IreXjTb7DvksRgJNvCkWtYnlS3mYvQ9NzS9PhyALWbXnH6sIJd2O9lKS1Mrfq+y0IXCP10eS FFGg+Av3IQeFatkJAyju0PPthyTqxSI4lZYuJVPknzgaeuJv/2NccrPvmeDg6Coe7ZIeQ8Yj t0ARxu2xytAkkLCel1Lz1WLmwLstV30g80nkgZf/wr+/BXJW/oIvRlonUkxv+IbBM3dX2OV8 AmRv1ySWPTP7AAMFB/9PQK/VtlNUJvg8GXj9ootzrteGfVZVVT4XBJkfwBcpC/XcPzldjv+3 HYudvpdNK3lLujXeA5fLOH+Z/G9WBc5pFVSMocI71I8bT8lIAzreg0WvkWg5V2WZsUMlnDL9 mpwIGFhlbM3gfDMs7MPMu8YQRFVdUvtSpaAs8OFfGQ0ia3LGZcjA6Ik2+xcqscEJzNH+qh8V m5jjp28yZgaqTaRbg3M/+MTbMpicpZuqF4rnB0AQD12/3BNWDR6bmh+EkYSMcEIpQmBM51qM EKYTQGybRCjpnKHGOxG0rfFY1085mBDZCH5Kx0cl0HVJuQKC+dV2ZY5AqjcKwAxpE75MLFkr wkkEGBECAAkFAlk3nEQCGwwACgkQoDSui/t3IH7nnwCfcJWUDUFKdCsBH/E5d+0ZnMQi+G0A nAuWpQkjM1ASeQwSHEeAWPgskBQL
  • Cc: tpearson@xxxxxxxxxxxxxxxxxxxxx, xen-devel@xxxxxxxxxxxxxxxxxxxx
  • Delivery-date: Mon, 25 Mar 2024 15:39:15 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 14.03.2024 23:15, Shawn Anastasio wrote:
> --- a/xen/arch/ppc/mm-radix.c
> +++ b/xen/arch/ppc/mm-radix.c
> @@ -21,69 +21,101 @@ void enable_mmu(void);
>  #define radix_dprintk(...)
>  #endif
> 
> -#define INITIAL_LVL1_PD_COUNT      1
> -#define INITIAL_LVL2_LVL3_PD_COUNT 2
> -#define INITIAL_LVL4_PT_COUNT      256
> -
> -static size_t __initdata initial_lvl1_pd_pool_used;
> -static struct lvl1_pd initial_lvl1_pd_pool[INITIAL_LVL1_PD_COUNT];
> -
> -static size_t __initdata initial_lvl2_lvl3_pd_pool_used;
> -static struct lvl2_pd initial_lvl2_lvl3_pd_pool[INITIAL_LVL2_LVL3_PD_COUNT];
> -
> -static size_t __initdata initial_lvl4_pt_pool_used;
> -static struct lvl4_pt initial_lvl4_pt_pool[INITIAL_LVL4_PT_COUNT];
> -
> -/* Only reserve minimum Partition and Process tables  */
>  #define PATB_SIZE_LOG2 16 /* Only supported partition table size on POWER9 */
>  #define PATB_SIZE      (1UL << PATB_SIZE_LOG2)
> -#define PRTB_SIZE_LOG2 12
> +#define PRTB_SIZE_LOG2 24 /* Maximum process table size on POWER9 */
>  #define PRTB_SIZE      (1UL << PRTB_SIZE_LOG2)
> 
> -static struct patb_entry
> -    __aligned(PATB_SIZE) initial_patb[PATB_SIZE / sizeof(struct patb_entry)];
> +static struct patb_entry *initial_patb;
> +static struct prtb_entry *initial_prtb;
> 
> -static struct prtb_entry
> -    __aligned(PRTB_SIZE) initial_prtb[PRTB_SIZE / sizeof(struct prtb_entry)];
> +static mfn_t __initdata min_alloc_mfn = {-1};
> +static mfn_t __initdata max_alloc_mfn = {0};
> 
> -static __init struct lvl1_pd *lvl1_pd_pool_alloc(void)
> +/*
> + * A thin wrapper for alloc_boot_pages that keeps track of the maximum and
> + * minimum mfns that have been allocated. This information is used by
> + * setup_initial_mapping to include the allocated pages in the initial
> + * page mapping.
> + */
> +static mfn_t __init initial_page_alloc(unsigned long nr_pfns,
> +                                       unsigned long pfn_align)
>  {
> -    if ( initial_lvl1_pd_pool_used >= INITIAL_LVL1_PD_COUNT )
> -    {
> -        early_printk("Ran out of space for LVL1 PD!\n");
> -        die();
> -    }
> +    mfn_t mfn_first, mfn_last;
> 
> -    return &initial_lvl1_pd_pool[initial_lvl1_pd_pool_used++];
> -}
> +    mfn_first = alloc_boot_pages(nr_pfns, pfn_align);
> +    mfn_last = _mfn(mfn_x(mfn_first) + nr_pfns - 1);

Please can you use mfn_add() here?

> -static __init struct lvl2_pd *lvl2_pd_pool_alloc(void)
> -{
> -    if ( initial_lvl2_lvl3_pd_pool_used >= INITIAL_LVL2_LVL3_PD_COUNT )
> -    {
> -        early_printk("Ran out of space for LVL2/3 PD!\n");
> -        die();
> -    }
> +    min_alloc_mfn = _mfn(min(mfn_x(min_alloc_mfn), mfn_x(mfn_first)));
> +    max_alloc_mfn = _mfn(max(mfn_x(max_alloc_mfn), mfn_x(mfn_last)));

Together with the comment ahead of the function - is there some kind of
assumption here that this range won't span almost all of system memory?
E.g. expecting allocations to be almost contiguous? If so, I wonder how
reliable this is, and whether using a rangeset wouldn't be better here.

> @@ -105,81 +138,47 @@ static void __init setup_initial_mapping(struct lvl1_pd 
> *lvl1,
>          die();
>      }
> 
> +    /* Identity map Xen itself */
>      for ( page_addr = map_start; page_addr < map_end; page_addr += PAGE_SIZE 
> )
>      {
> -        struct lvl2_pd *lvl2;
> -        struct lvl3_pd *lvl3;
> -        struct lvl4_pt *lvl4;
> -        pde_t *pde;
> -        pte_t *pte;
> -
> -        /* Allocate LVL 2 PD if necessary */
> -        pde = pt_entry(lvl1, page_addr);
> -        if ( !pde_is_valid(*pde) )
> -        {
> -            lvl2 = lvl2_pd_pool_alloc();
> -            *pde = paddr_to_pde(__pa(lvl2), PDE_VALID,
> -                                XEN_PT_ENTRIES_LOG2_LVL_2);
> -        }
> -        else
> -            lvl2 = __va(pde_to_paddr(*pde));
> +        unsigned long flags;
> 
> -        /* Allocate LVL 3 PD if necessary */
> -        pde = pt_entry(lvl2, page_addr);
> -        if ( !pde_is_valid(*pde) )
> +        if ( is_kernel_text(page_addr) || is_kernel_inittext(page_addr) )
>          {
> -            lvl3 = lvl3_pd_pool_alloc();
> -            *pde = paddr_to_pde(__pa(lvl3), PDE_VALID,
> -                                XEN_PT_ENTRIES_LOG2_LVL_3);
> +            radix_dprintk("%016lx being marked as TEXT (RX)\n", page_addr);
> +            flags = PTE_XEN_RX;
>          }
> -        else
> -            lvl3 = __va(pde_to_paddr(*pde));
> -
> -        /* Allocate LVL 4 PT if necessary */
> -        pde = pt_entry(lvl3, page_addr);
> -        if ( !pde_is_valid(*pde) )
> +        else if ( is_kernel_rodata(page_addr) )
>          {
> -            lvl4 = lvl4_pt_pool_alloc();
> -            *pde = paddr_to_pde(__pa(lvl4), PDE_VALID,
> -                                XEN_PT_ENTRIES_LOG2_LVL_4);
> +            radix_dprintk("%016lx being marked as RODATA (RO)\n", page_addr);
> +            flags = PTE_XEN_RO;
>          }
>          else
> -            lvl4 = __va(pde_to_paddr(*pde));
> -
> -        /* Finally, create PTE in LVL 4 PT */
> -        pte = pt_entry(lvl4, page_addr);
> -        if ( !pte_is_valid(*pte) )
>          {
> -            unsigned long paddr = (page_addr - map_start) + phys_base;
> -            unsigned long flags;
> -
> -            radix_dprintk("%016lx being mapped to %016lx\n", paddr, 
> page_addr);
> -            if ( is_kernel_text(page_addr) || is_kernel_inittext(page_addr) )
> -            {
> -                radix_dprintk("%016lx being marked as TEXT (RX)\n", 
> page_addr);
> -                flags = PTE_XEN_RX;
> -            }
> -            else if ( is_kernel_rodata(page_addr) )
> -            {
> -                radix_dprintk("%016lx being marked as RODATA (RO)\n", 
> page_addr);
> -                flags = PTE_XEN_RO;
> -            }
> -            else
> -            {
> -                radix_dprintk("%016lx being marked as DEFAULT (RW)\n", 
> page_addr);
> -                flags = PTE_XEN_RW;
> -            }
> -
> -            *pte = paddr_to_pte(paddr, flags);
> -            radix_dprintk("%016lx is the result of PTE map\n",
> -                paddr_to_pte(paddr, flags).pte);
> -        }
> -        else
> -        {
> -            early_printk("BUG: Tried to create PTE for already-mapped 
> page!");
> -            die();
> +            radix_dprintk("%016lx being marked as DEFAULT (RW)\n", 
> page_addr);
> +            flags = PTE_XEN_RW;
>          }
> +
> +        map_page_initial(lvl1, page_addr, (page_addr - map_start) + 
> phys_base, flags);
> +    }
> +
> +    previous_max_alloc_mfn = max_alloc_mfn;
> +
> +    /*
> +     * Identity map all pages we've allocated for paging structures. This act
> +     * itself will allocate more pages, so continue until we've mapped from
> +     * `max_alloc_mfn` down to `min_alloc_mfn`. This assumes that the heap 
> grows
> +     * downwards, which matches the behavior of alloc_boot_pages.
> +     */
> +    for ( page_addr = (vaddr_t)__va(mfn_to_maddr(max_alloc_mfn));
> +          mfn_to_maddr(min_alloc_mfn) <= __pa(page_addr);
> +          page_addr -= PAGE_SIZE)
> +    {
> +        map_page_initial(lvl1, page_addr, __pa(page_addr), PTE_XEN_RW);
>      }
> +
> +    if ( mfn_x(previous_max_alloc_mfn) != mfn_x(max_alloc_mfn) )
> +        panic("Early page heap unexpectedly grew upwards\n");
>  }

Oh, yet another assumption on allocator behavior.

Jan



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.