[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [for 4.22 v5 18/18] xen/riscv: introduce metadata table to store P2M type


  • To: Oleksii Kurochko <oleksii.kurochko@xxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Wed, 12 Nov 2025 12:49:35 +0100
  • Autocrypt: addr=jbeulich@xxxxxxxx; keydata= xsDiBFk3nEQRBADAEaSw6zC/EJkiwGPXbWtPxl2xCdSoeepS07jW8UgcHNurfHvUzogEq5xk hu507c3BarVjyWCJOylMNR98Yd8VqD9UfmX0Hb8/BrA+Hl6/DB/eqGptrf4BSRwcZQM32aZK 7Pj2XbGWIUrZrd70x1eAP9QE3P79Y2oLrsCgbZJfEwCgvz9JjGmQqQkRiTVzlZVCJYcyGGsD /0tbFCzD2h20ahe8rC1gbb3K3qk+LpBtvjBu1RY9drYk0NymiGbJWZgab6t1jM7sk2vuf0Py O9Hf9XBmK0uE9IgMaiCpc32XV9oASz6UJebwkX+zF2jG5I1BfnO9g7KlotcA/v5ClMjgo6Gl MDY4HxoSRu3i1cqqSDtVlt+AOVBJBACrZcnHAUSuCXBPy0jOlBhxPqRWv6ND4c9PH1xjQ3NP nxJuMBS8rnNg22uyfAgmBKNLpLgAGVRMZGaGoJObGf72s6TeIqKJo/LtggAS9qAUiuKVnygo 3wjfkS9A3DRO+SpU7JqWdsveeIQyeyEJ/8PTowmSQLakF+3fote9ybzd880fSmFuIEJldWxp Y2ggPGpiZXVsaWNoQHN1c2UuY29tPsJgBBMRAgAgBQJZN5xEAhsDBgsJCAcDAgQVAggDBBYC AwECHgECF4AACgkQoDSui/t3IH4J+wCfQ5jHdEjCRHj23O/5ttg9r9OIruwAn3103WUITZee e7Sbg12UgcQ5lv7SzsFNBFk3nEQQCACCuTjCjFOUdi5Nm244F+78kLghRcin/awv+IrTcIWF hUpSs1Y91iQQ7KItirz5uwCPlwejSJDQJLIS+QtJHaXDXeV6NI0Uef1hP20+y8qydDiVkv6l IreXjTb7DvksRgJNvCkWtYnlS3mYvQ9NzS9PhyALWbXnH6sIJd2O9lKS1Mrfq+y0IXCP10eS FFGg+Av3IQeFatkJAyju0PPthyTqxSI4lZYuJVPknzgaeuJv/2NccrPvmeDg6Coe7ZIeQ8Yj t0ARxu2xytAkkLCel1Lz1WLmwLstV30g80nkgZf/wr+/BXJW/oIvRlonUkxv+IbBM3dX2OV8 AmRv1ySWPTP7AAMFB/9PQK/VtlNUJvg8GXj9ootzrteGfVZVVT4XBJkfwBcpC/XcPzldjv+3 HYudvpdNK3lLujXeA5fLOH+Z/G9WBc5pFVSMocI71I8bT8lIAzreg0WvkWg5V2WZsUMlnDL9 mpwIGFhlbM3gfDMs7MPMu8YQRFVdUvtSpaAs8OFfGQ0ia3LGZcjA6Ik2+xcqscEJzNH+qh8V m5jjp28yZgaqTaRbg3M/+MTbMpicpZuqF4rnB0AQD12/3BNWDR6bmh+EkYSMcEIpQmBM51qM EKYTQGybRCjpnKHGOxG0rfFY1085mBDZCH5Kx0cl0HVJuQKC+dV2ZY5AqjcKwAxpE75MLFkr wkkEGBECAAkFAlk3nEQCGwwACgkQoDSui/t3IH7nnwCfcJWUDUFKdCsBH/E5d+0ZnMQi+G0A nAuWpQkjM1ASeQwSHEeAWPgskBQL
  • Cc: Alistair Francis <alistair.francis@xxxxxxx>, Bob Eshleman <bobbyeshleman@xxxxxxxxx>, Connor Davis <connojdavis@xxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Anthony PERARD <anthony.perard@xxxxxxxxxx>, Michal Orzel <michal.orzel@xxxxxxx>, Julien Grall <julien@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx
  • Delivery-date: Wed, 12 Nov 2025 11:49:46 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 20.10.2025 17:58, Oleksii Kurochko wrote:
> --- a/xen/arch/riscv/p2m.c
> +++ b/xen/arch/riscv/p2m.c
> @@ -20,6 +20,16 @@
>  
>  #define P2M_SUPPORTED_LEVEL_MAPPING 2
>  
> +/*
> + * P2M PTE context is used only when a PTE's P2M type is p2m_ext_storage.
> + * In this case, the P2M type is stored separately in the metadata page.
> + */
> +struct p2m_pte_ctx {
> +    struct page_info *pt_page;   /* Page table page containing the PTE. */
> +    unsigned int index;          /* Index of the PTE within that page. */
> +    unsigned int level;          /* Paging level at which the PTE resides. */
> +};
> +
>  unsigned char __ro_after_init gstage_mode;
>  unsigned int __ro_after_init gstage_root_level;
>  
> @@ -363,24 +373,89 @@ static struct page_info *p2m_alloc_page(struct 
> p2m_domain *p2m)
>      return pg;
>  }
>  
> -static int p2m_set_type(pte_t *pte, p2m_type_t t)
> +/*
> + * `pte` – PTE entry for which the type `t` will be stored.
> + *
> + * If `t` is `p2m_ext_storage`, both `ctx` and `p2m` must be provided;
> + * otherwise, only p2m may be NULL.
> + */
> +static void p2m_set_type(pte_t *pte, const p2m_type_t t,
> +                         struct p2m_pte_ctx *ctx,
> +                         struct p2m_domain *p2m)
>  {
> -    int rc = 0;
> +    struct page_info **md_pg;
> +    pte_t *metadata = NULL;

I'm not convinced it is a good idea to re-use pte_t for this purpose. If you 
used
a separate type, and if then you defined that as a bitfield with only a few bits
dedicated to type, future changes (additions) may be quite a bit easier.

> -    if ( t > p2m_first_external )
> -        panic("unimplemeted\n");
> -    else
> +    ASSERT(p2m);
> +
> +     /* Be sure that an index correspondent to page level is passed. */
> +    ASSERT(ctx && ctx->index < P2M_PAGETABLE_ENTRIES(ctx->level));
> +
> +    /*
> +     * For the root page table (16 KB in size), we need to select the correct
> +     * metadata table, since allocations are 4 KB each. In total, there are
> +     * 4 tables of 4 KB each.
> +     * For none-root page table index of ->pt_page[] will be always 0 as
> +     * index won't be higher then 511. ASSERT() above verifies that.
> +     */
> +    md_pg = &ctx->pt_page[ctx->index / PAGETABLE_ENTRIES].v.md.pg;
> +
> +    if ( !*md_pg && (t >= p2m_first_external) )
> +    {
> +        BUG_ON(ctx->level > P2M_SUPPORTED_LEVEL_MAPPING);
> +
> +        if ( ctx->level <= P2M_SUPPORTED_LEVEL_MAPPING )
> +        {
> +            struct domain *d = p2m->domain;

This is (if at all) needed only ...

> +            *md_pg = p2m_alloc_page(p2m);
> +            if ( !*md_pg )
> +            {

... in this more narrow scope.

> +                printk("%s: can't allocate extra memory for dom%d\n",
> +                       __func__, d->domain_id);

The logging text isn't specific enough for my taste. For ordinary printk()s I'd
also recommend against use of __func__ (that's fine for dprintk()).

Also please us %pd in such cases.

> +                domain_crash(d);
> +
> +                return;
> +            }
> +        }
> +    }
> +
> +    if ( *md_pg )
> +        metadata = __map_domain_page(*md_pg);
> +
> +    if ( t < p2m_first_external )
> +    {
>          pte->pte |= MASK_INSR(t, P2M_TYPE_PTE_BITS_MASK);
>  
> -    return rc;
> +        if ( metadata )
> +            metadata[ctx->index].pte = p2m_invalid;

Shouldn't this be accompanied with a BUILD_BUG_ON(p2m_invalid), as otherwise
p2m_alloc_page()'s clearing of the page won't have the intended effect?

> +    }
> +    else
> +    {
> +        pte->pte |= MASK_INSR(p2m_ext_storage, P2M_TYPE_PTE_BITS_MASK);
> +
> +        metadata[ctx->index].pte = t;

If you set t to p2m_ext_storage here, the pte->pte updating could be moved ...

> +    }

... here, covering both cases. Overally this may then be easier as

    if ( t >= p2m_first_external )
        metadata[ctx->index].pte = t;
    else if ( metadata )
        metadata[ctx->index].pte = p2m_invalid;

    pte->pte |= MASK_INSR(t, P2M_TYPE_PTE_BITS_MASK);

Then raising the question whether it couldn't still be the real type that's
stored in metadata[] even for t < p2m_first_external. That woiuld further
reduce conditionals.

> +    unmap_domain_page(metadata);
>  }
>  
> -static p2m_type_t p2m_get_type(const pte_t pte)
> +/*
> + * `pte` -> PTE entry that stores the PTE's type.
> + *
> + * If the PTE's type is `p2m_ext_storage`, `ctx` should be provided;
> + * otherwise it could be NULL.
> + */
> +static p2m_type_t p2m_get_type(const pte_t pte, const struct p2m_pte_ctx 
> *ctx)
>  {
>      p2m_type_t type = MASK_EXTR(pte.pte, P2M_TYPE_PTE_BITS_MASK);
>  
>      if ( type == p2m_ext_storage )
> -        panic("unimplemented\n");
> +    {
> +        const pte_t *md = __map_domain_page(ctx->pt_page->v.md.pg);
> +        type = md[ctx->index].pte;
> +        unmap_domain_page(md);

Nit (style): Blank line please between declaration(s) and statement(s).

> @@ -470,7 +545,15 @@ static void p2m_set_permission(pte_t *e, p2m_type_t t)
>      }
>  }
>  
> -static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t, bool is_table)
> +/*
> + * If p2m_pte_from_mfn() is called with p2m_pte_ctx = NULL and p2m = NULL,
> + * it means the function is working with a page table for which the `t`
> + * should not be applicable. Otherwise, the function is handling a leaf PTE
> + * for which `t` is applicable.
> + */
> +static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t,
> +                              struct p2m_pte_ctx *p2m_pte_ctx,
> +                              struct p2m_domain *p2m)
>  {
>      pte_t e = (pte_t) { PTE_VALID };
>  
> @@ -478,7 +561,7 @@ static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t, 
> bool is_table)
>  
>      ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK) || mfn_eq(mfn, INVALID_MFN));
>  
> -    if ( !is_table )
> +    if ( p2m_pte_ctx && p2m )
>      {

Maybe better

    if ( p2m_pte_ctx )
    {
        ASSERT(p2m);
        ...

(if you really think the 2nd check is needed)?

> @@ -506,12 +589,19 @@ static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t, 
> bool is_table)
>  /* Generate table entry with correct attributes. */
>  static pte_t page_to_p2m_table(const struct page_info *page)
>  {
> -    /*
> -     * p2m_invalid will be ignored inside p2m_pte_from_mfn() as is_table is
> -     * set to true and p2m_type_t shouldn't be applied for PTEs which
> -     * describe an intermidiate table.
> -     */
> -    return p2m_pte_from_mfn(page_to_mfn(page), p2m_invalid, true);
> +    return p2m_pte_from_mfn(page_to_mfn(page), p2m_invalid, NULL, NULL);
> +}

How come the comment is dropped? If you deem it unecessary, why was it added
earlier in this same series?

> +static void p2m_free_page(struct p2m_domain *p2m, struct page_info *pg);
> +
> +/*
> + * Free page table's page and metadata page linked to page table's page.
> + */
> +static void p2m_free_table(struct p2m_domain *p2m, struct page_info *tbl_pg)
> +{
> +    if ( tbl_pg->v.md.pg )
> +        p2m_free_page(p2m, tbl_pg->v.md.pg);

To play safe, maybe better also clear tbl_pg->v.md.pg?

> @@ -749,6 +849,10 @@ static bool p2m_split_superpage(struct p2m_domain *p2m, 
> pte_t *entry,
>      unsigned int next_level = level - 1;
>      unsigned int level_order = P2M_LEVEL_ORDER(next_level);
>  
> +    struct p2m_pte_ctx p2m_pte_ctx;

I think this would better be one variable instance per scope where it's needed,
and then using an initzializer. Or else ...

> +    /* Init with p2m_invalid just to make compiler happy. */
> +    p2m_type_t old_type = p2m_invalid;
> +
>      /*
>       * This should only be called with target != level and the entry is
>       * a superpage.
> @@ -770,6 +874,19 @@ static bool p2m_split_superpage(struct p2m_domain *p2m, 
> pte_t *entry,
>  
>      table = __map_domain_page(page);
>  
> +    if ( MASK_EXTR(entry->pte, P2M_TYPE_PTE_BITS_MASK) == p2m_ext_storage )
> +    {
> +        p2m_pte_ctx.pt_page = tbl_pg;
> +        p2m_pte_ctx.index = offsets[level];
> +        /*
> +         * It doesn't really matter what is a value for a level as
> +         * p2m_get_type() doesn't need it, so it is initialized just in case.
> +         */
> +        p2m_pte_ctx.level = level;
> +
> +        old_type = p2m_get_type(*entry, &p2m_pte_ctx);
> +    }
> +
>      for ( i = 0; i < P2M_PAGETABLE_ENTRIES(next_level); i++ )
>      {
>          pte_t *new_entry = table + i;
> @@ -781,6 +898,15 @@ static bool p2m_split_superpage(struct p2m_domain *p2m, 
> pte_t *entry,
>          pte = *entry;
>          pte_set_mfn(&pte, mfn_add(mfn, i << level_order));
>  
> +        if ( MASK_EXTR(pte.pte, P2M_TYPE_PTE_BITS_MASK) == p2m_ext_storage )
> +        {
> +            p2m_pte_ctx.pt_page = page;
> +            p2m_pte_ctx.index = i;
> +            p2m_pte_ctx.level = next_level;

... why are the loop-invariat fields not filled ahead of the loop here?

> @@ -927,7 +1061,13 @@ static int p2m_set_entry(struct p2m_domain *p2m,
>          p2m_clean_pte(entry, p2m->clean_dcache);
>      else
>      {
> -        pte_t pte = p2m_pte_from_mfn(mfn, t, false);
> +        struct p2m_pte_ctx tmp_ctx = {
> +            .pt_page = mfn_to_page(domain_page_map_to_mfn(table)),
> +            .index =  offsets[level],

Nit: Stray blank.

> @@ -1153,7 +1301,14 @@ static mfn_t p2m_get_entry(struct p2m_domain *p2m, 
> gfn_t gfn,
>      if ( pte_is_valid(entry) )
>      {
>          if ( t )
> -            *t = p2m_get_type(entry);
> +        {
> +            struct p2m_pte_ctx p2m_pte_ctx = {
> +                .pt_page = mfn_to_page(domain_page_map_to_mfn(table)),
> +                .index = offsets[level],
> +            };

.level not being set here?

Jan



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.