[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 1/3] xen/riscv: introduce setup_initial_pages


  • To: Oleksii Kurochko <oleksii.kurochko@xxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Mon, 20 Mar 2023 17:41:37 +0100
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=Qziuh1sgoig5+OOG1khj/cI73IBt/HWzQfnBz0IjnmY=; b=IJmrKxpZe6i6EMHTh9qIBeGDyhaWUAwc/S53fRPL3QozHgy6hayhybVmz7ZRxAgydt56UjemwEYVgY6u+6HaqDK1x0k+vWXLtr1AbMXVHpP6zxoQTgofb6tpQPeK3QDlz+jdQDmGZfW6UE+5NwC7eaYta+DoZKdKRe0pOILM1GRoZj5cVyRNCCNmME1eOrR20NnKOy5oGFkqDGOG22dO3SUAidrbAkI4t4w8j5DRM2NyAumx/cEss1SPqAhaVHNnG9YFaTi1LPdvzsAeP91BwSE0lsd9vPnPlEKcBueyZWHTeB+6gI4REKB+IEvXvEe6UuisC9zZvfPCw1DQcgu+TA==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=PYcf0uTQk7o9e6zjYqWKn/GnpZZdA91hJthYWhAEl278X1nRSCpkWWgxejgPJaOQG/bwTV9E5iAgn1pHruvj9Gn4MUPdKx5uRhPyKKVWysKC+Tc4DTBxpxfy+C3YuLyQosUciIJ0aB0GZmXNrhZhmZXk8cM9WPEFBF4y693t5V/cUpwcaXqqJLOXqWK1j9dUTZA5/N24myvpPmUtI58kjZ2s6msAeBONBwljn8ZTKYEBGVtDtYo15uGoRvk9NGjNqKpTzIHGambScCyf/iCA4kms/21R60sgtX+INu6rZJw+zh3VpXqXhzViujZDrFOWECQDOWUQO2vl4ueLsPePPA==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: Julien Grall <julien@xxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, Gianluca Guida <gianluca@xxxxxxxxxxxx>, Bob Eshleman <bobbyeshleman@xxxxxxxxx>, Alistair Francis <alistair.francis@xxxxxxx>, Connor Davis <connojdavis@xxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx
  • Delivery-date: Mon, 20 Mar 2023 16:42:03 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 16.03.2023 17:43, Oleksii Kurochko wrote:
> --- /dev/null
> +++ b/xen/arch/riscv/include/asm/mm.h
> @@ -0,0 +1,8 @@
> +#ifndef _ASM_RISCV_MM_H
> +#define _ASM_RISCV_MM_H
> +
> +void setup_initial_pagetables(void);
> +
> +extern void enable_mmu(void);

Nit: At least within a single header you probably want to be consistent
about the use of "extern". Personally I think it would better be omitted
from function declarations.

> --- /dev/null
> +++ b/xen/arch/riscv/include/asm/page.h
> @@ -0,0 +1,67 @@
> +#ifndef _ASM_RISCV_PAGE_H
> +#define _ASM_RISCV_PAGE_H
> +
> +#include <xen/const.h>
> +#include <xen/types.h>
> +
> +#define PAGE_ENTRIES                (1 << PAGETABLE_ORDER)
> +#define VPN_MASK                    ((unsigned long)(PAGE_ENTRIES - 1))
> +
> +#define PAGE_ORDER                  (12)

DYM PAGE_SHIFT here, as used elsewhere in Xen?

Also are you aware of page-bits.h, where I think some of these constants
should go?

> +#ifdef CONFIG_RISCV_64
> +#define PAGETABLE_ORDER             (9)
> +#else /* CONFIG_RISCV_32 */
> +#define PAGETABLE_ORDER             (10)
> +#endif
> +
> +#define LEVEL_ORDER(lvl)            (lvl * PAGETABLE_ORDER)
> +#define LEVEL_SHIFT(lvl)            (LEVEL_ORDER(lvl) + PAGE_ORDER)
> +#define LEVEL_SIZE(lvl)             (_AT(paddr_t, 1) << LEVEL_SHIFT(lvl))
> +
> +#define XEN_PT_LEVEL_SHIFT(lvl)     LEVEL_SHIFT(lvl)
> +#define XEN_PT_LEVEL_ORDER(lvl)     LEVEL_ORDER(lvl)
> +#define XEN_PT_LEVEL_SIZE(lvl)      LEVEL_SIZE(lvl)

Mind me asking what these are good for? Doesn't one set of macros
suffice?

> +#define XEN_PT_LEVEL_MAP_MASK(lvl)  (~(XEN_PT_LEVEL_SIZE(lvl) - 1))
> +#define XEN_PT_LEVEL_MASK(lvl)      (VPN_MASK << XEN_PT_LEVEL_SHIFT(lvl))
> +
> +#define PTE_SHIFT                   10

What does this describe? According to its single use here it may
simply require a better name.

> +#define PTE_VALID                   BIT(0, UL)
> +#define PTE_READABLE                BIT(1, UL)
> +#define PTE_WRITABLE                BIT(2, UL)
> +#define PTE_EXECUTABLE              BIT(3, UL)
> +#define PTE_USER                    BIT(4, UL)
> +#define PTE_GLOBAL                  BIT(5, UL)
> +#define PTE_ACCESSED                BIT(6, UL)
> +#define PTE_DIRTY                   BIT(7, UL)
> +#define PTE_RSW                     (BIT(8, UL) | BIT(9, UL))
> +
> +#define PTE_LEAF_DEFAULT            (PTE_VALID | PTE_READABLE | 
> PTE_EXECUTABLE)
> +#define PTE_TABLE                   (PTE_VALID)
> +
> +/* Calculate the offsets into the pagetables for a given VA */
> +#define pt_linear_offset(lvl, va)   ((va) >> XEN_PT_LEVEL_SHIFT(lvl))
> +
> +#define pt_index(lvl, va)   pt_linear_offset(lvl, (va) & 
> XEN_PT_LEVEL_MASK(lvl))
> +
> +/* Page Table entry */
> +typedef struct {
> +    uint64_t pte;
> +} pte_t;

Not having read the respective spec (yet) I'm wondering if this really
is this way also for RV32 (despite the different PAGETABLE_ORDER).

> --- /dev/null
> +++ b/xen/arch/riscv/mm.c
> @@ -0,0 +1,121 @@
> +#include <xen/compiler.h>
> +#include <xen/init.h>
> +#include <xen/kernel.h>
> +#include <xen/lib.h>
> +#include <xen/page-size.h>
> +
> +#include <asm/boot-info.h>
> +#include <asm/config.h>
> +#include <asm/csr.h>
> +#include <asm/mm.h>
> +#include <asm/page.h>
> +#include <asm/traps.h>
> +
> +/*
> + * xen_second_pagetable is indexed with the VPN[2] page table entry field
> + * xen_first_pagetable is accessed from the VPN[1] page table entry field
> + * xen_zeroeth_pagetable is accessed from the VPN[0] page table entry field
> + */
> +pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
> +    xen_second_pagetable[PAGE_ENTRIES];
> +static pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
> +    xen_first_pagetable[PAGE_ENTRIES];
> +static pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
> +    xen_zeroeth_pagetable[PAGE_ENTRIES];

I would assume Andrew's comment on the earlier version also extended to
the names used here (and then also to various local variables or function
parameters further down).

> +extern unsigned long __init_begin[];
> +extern unsigned long __init_end[];
> +extern unsigned char cpu0_boot_stack[STACK_SIZE];
> +
> +static void __init
> +setup_initial_mapping(pte_t *second, pte_t *first, pte_t *zeroeth,
> +                      unsigned long map_start,
> +                      unsigned long map_end,
> +                      unsigned long pa_start,
> +                      unsigned long flags)
> +{
> +    unsigned long page_addr;
> +
> +    // /* align start addresses */
> +    // map_start &= XEN_PT_LEVEL_MAP_MASK(0);
> +    // pa_start &= XEN_PT_LEVEL_MAP_MASK(0);

It's not clear what this is about, but in any event the comment is malformed.

> +    page_addr = map_start;
> +    while ( page_addr < map_end )
> +    {
> +        unsigned long index2 = pt_index(2, page_addr);
> +        unsigned long index1 = pt_index(1, page_addr);
> +        unsigned long index0 = pt_index(0, page_addr);
> +
> +        /* Setup level2 table */
> +        second[index2] = paddr_to_pte((unsigned long)first);
> +        second[index2].pte |= PTE_TABLE;
> +
> +        /* Setup level1 table */
> +        first[index1] = paddr_to_pte((unsigned long)zeroeth);
> +        first[index1].pte |= PTE_TABLE;

Whould it make sense to have paddr_to_pte() take attributes right
away as 2nd argument?

> +
> +

Nit: No double blank lines please.

> +        /* Setup level0 table */
> +        if ( !pte_is_valid(&zeroeth[index0]) )
> +        {
> +            /* Update level0 table */
> +            zeroeth[index0] = paddr_to_pte((page_addr - map_start) + 
> pa_start);
> +            zeroeth[index0].pte |= flags;
> +        }
> +
> +        /* Point to next page */
> +        page_addr += XEN_PT_LEVEL_SIZE(0);
> +    }
> +}
> +
> +/*
> + * setup_initial_pagetables:
> + *
> + * Build the page tables for Xen that map the following:
> + *   load addresses to linker addresses
> + */
> +void __init setup_initial_pagetables(void)
> +{
> +    pte_t *second;
> +    pte_t *first;
> +    pte_t *zeroeth;
> +
> +    unsigned long load_addr_start   = boot_info.load_start;
> +    unsigned long load_addr_end     = boot_info.load_end;
> +    unsigned long linker_addr_start = boot_info.linker_start;
> +    unsigned long linker_addr_end   = boot_info.linker_end;
> +
> +    BUG_ON(load_addr_start % (PAGE_ENTRIES * PAGE_SIZE) != 0);
> +    if (load_addr_start != linker_addr_start)

Nit: Style (missing blanks).

> +        BUG_ON((linker_addr_end > load_addr_start && load_addr_end > 
> linker_addr_start));
> +
> +    /* Get the addresses where the page tables were loaded */
> +    second  = (pte_t *)(&xen_second_pagetable);
> +    first   = (pte_t *)(&xen_first_pagetable);
> +    zeroeth = (pte_t *)(&xen_zeroeth_pagetable);

Please avoid casts whenever possible.

> @@ -32,3 +33,67 @@ ENTRY(start)
>          add     sp, sp, t0
>  
>          tail    start_xen
> +
> +ENTRY(enable_mmu)
> +        /* Calculate physical offset between linker and load addresses */
> +        la      t0, boot_info
> +        REG_L   t1, BI_LINKER_START(t0)
> +        REG_L   t2, BI_LOAD_START(t0)
> +        sub     t1, t1, t2
> +
> +        /*
> +         * Calculate and update a linker time address of the 
> .L_mmu_is_enabled
> +         * label and update CSR_STVEC with it.
> +         * MMU is configured in a way where linker addresses are mapped
> +         * on load addresses so case when linker addresses are not equal to
> +         * load addresses, and thereby, after MMU is enabled, it will cause
> +         * an exception and jump to linker time addresses
> +         */
> +        la      t3, .L_mmu_is_enabled
> +        add     t3, t3, t1
> +        csrw    CSR_STVEC, t3
> +
> +        /* Calculate a value for SATP register */
> +        li      t5, SATP_MODE_SV39
> +        li      t6, SATP_MODE_SHIFT
> +        sll     t5, t5, t6
> +
> +        la      t4, xen_second_pagetable
> +        srl     t4, t4, PAGE_SHIFT
> +        or      t4, t4, t5
> +        sfence.vma
> +        csrw    CSR_SATP, t4
> +
> +        .align 2
> +.L_mmu_is_enabled:
> +        /*
> +         * Stack should be re-inited as:
> +         * 1. Right now an address of the stack is relative to load time
> +         *    addresses what will cause an issue in case of load start 
> address
> +         *    isn't equal to linker start address.
> +         * 2. Addresses in stack are all load time relative which can be an
> +         *    issue in case when load start address isn't equal to linker
> +         *    start address.
> +         */
> +        la      sp, cpu0_boot_stack
> +        li      t0, STACK_SIZE
> +        add     sp, sp, t0
> +
> +        /*
> +         * Re-init an address of exception handler as it was overwritten  
> with
> +         * the address of the .L_mmu_is_enabled label.
> +         * Before jump to trap_init save return address of enable_mmu() to
> +         * know where we should back after enable_mmu() will be finished.
> +         */
> +        mv      s0, ra

Don't you need to preserve s0 for your caller?

> +        lla     t0, trap_init

Any reason for lla here when elsewhere above you use la? And aren't ...

> +        jalr    ra, t0

... these two together "call" anyway?

> +        /*
> +         * Re-calculate the return address of enable_mmu() function for case
> +         * when linker start address isn't equal to load start address
> +         */
> +        add     s0, s0, t1
> +        mv      ra, s0

"add ra, s0, t1"?

But then - can't t1 be clobbered by trap_init()?

> --- a/xen/arch/riscv/xen.lds.S
> +++ b/xen/arch/riscv/xen.lds.S
> @@ -179,3 +179,5 @@ SECTIONS
>  
>  ASSERT(!SIZEOF(.got),      ".got non-empty")
>  ASSERT(!SIZEOF(.got.plt),  ".got.plt non-empty")
> +
> +ASSERT(_end - _start <= MB(2), "Xen too large for early-boot assumptions")

Again the question whether this is also applicable to RV32.

Jan



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.