[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v3 1/3] xen/riscv: introduce setup_initial_pages
On Tue, 2023-03-28 at 17:34 +0200, Jan Beulich wrote: > On 27.03.2023 19:17, Oleksii Kurochko wrote: > > --- a/xen/arch/riscv/include/asm/config.h > > +++ b/xen/arch/riscv/include/asm/config.h > > @@ -39,12 +39,25 @@ > > name: > > #endif > > > > -#define XEN_VIRT_START _AT(UL, 0x80200000) > > +#define ADDRESS_SPACE_END (_AC(-1, UL)) > > +#define SZ_1G 0x40000000 > > No need for this - please use GB(1) (see xen/config.h) in its stead. I'll use GB(1). Thanks. > > > +#ifdef CONFIG_RISCV_64 > > +#define XEN_VIRT_START (ADDRESS_SPACE_END - SZ_1G + 1) > > I first thought the " + 1" would be rendering the address misaligned. > May I suggest (ADDRESS_SPACE_END + 1 - SZ_1G) to help avoiding this > impression? Sure. Will update it in next version of the patch series. > > > +#else > > +#error "RV32 isn't supported"asm > > Stray "asm" at the end of the line. My fault... > > > --- /dev/null > > +++ b/xen/arch/riscv/include/asm/page.h > > @@ -0,0 +1,62 @@ > > +#ifndef _ASM_RISCV_PAGE_H > > +#define _ASM_RISCV_PAGE_H > > + > > +#include <xen/const.h> > > +#include <xen/types.h> > > + > > +#define VPN_MASK ((unsigned > > long)(PAGETABLE_ENTRIES - 1)) > > + > > +#define XEN_PT_LEVEL_ORDER(lvl) ((lvl) * PAGETABLE_ORDER) > > +#define XEN_PT_LEVEL_SHIFT(lvl) (XEN_PT_LEVEL_ORDER(lvl) + > > PAGE_SHIFT) > > +#define XEN_PT_LEVEL_SIZE(lvl) (_AT(paddr_t, 1) << > > XEN_PT_LEVEL_SHIFT(lvl)) > > +#define XEN_PT_LEVEL_MAP_MASK(lvl) (~(XEN_PT_LEVEL_SIZE(lvl) - > > 1)) > > +#define XEN_PT_LEVEL_MASK(lvl) (VPN_MASK << > > XEN_PT_LEVEL_SHIFT(lvl)) > > + > > +#define PTE_VALID BIT(0, UL) > > +#define PTE_READABLE BIT(1, UL) > > +#define PTE_WRITABLE BIT(2, UL) > > +#define PTE_EXECUTABLE BIT(3, UL) > > +#define PTE_USER BIT(4, UL) > > +#define PTE_GLOBAL BIT(5, UL) > > +#define PTE_ACCESSED BIT(6, UL) > > +#define PTE_DIRTY BIT(7, UL) > > +#define PTE_RSW (BIT(8, UL) | BIT(9, UL)) > > + > > +#define PTE_LEAF_DEFAULT (PTE_VALID | PTE_READABLE | > > PTE_WRITABLE) > > +#define PTE_TABLE (PTE_VALID) > > + > > +/* Calculate the offsets into the pagetables for a given VA */ > > +#define pt_linear_offset(lvl, va) ((va) >> > > XEN_PT_LEVEL_SHIFT(lvl)) > > + > > +#define pt_index(lvl, va) pt_linear_offset(lvl, (va) & > > XEN_PT_LEVEL_MASK(lvl)) > > + > > +#ifdef CONFIG_RISCV_64 > > +typedef uint64_t pte_t; > > +#else > > +typedef uint32_t pte_t; > > +#endif > > Are you sure you don't want to encapsulate these in a struct, for > type > safety purposes? Yeah, it makes sense. > > > +#define addr_to_pte(x) (((x) >> PTE_PPN_SHIFT) << PAGE_SHIFT) > > This looking like the inverse of ... > > > +/* Shift the VPN[x] or PPN[x] fields of a virtual or physical > > address > > + * to become the shifted PPN[x] fields of a page table entry */ > > +#define addr_to_ppn(x) (((x) >> PAGE_SHIFT) << PTE_PPN_SHIFT) > > ... this, does that one want to be called "ppn_to_addr()"? Otherwise > I > can't see how name an operation fit together. ppn_to_addr() will be better so it will be update in next version of the patch series. > > > +static inline pte_t paddr_to_pte(const unsigned long paddr, > > paddr_t? It looks like I used paddr_t before but got a recommendation to change it to 'ul'. I will double-check. > > > + const unsigned long permissions) > > +{ > > + pte_t tmp = (pte_t)addr_to_ppn(paddr); > > + return tmp | permissions; > > +} > > + > > +static inline pte_t pte_to_paddr(const unsigned long paddr) > > A function of this name wants to take pte_t as parameter and return > paddr_t (note the type safety remark higher up, as it becomes > relevant > right here). It also hardly can be implemented ... > > > +{ > > + return (pte_t)addr_to_pte(paddr); > > ... in terms of a function/macro with effectively the opposite name. Thanks. I'll update that. > > > --- /dev/null > > +++ b/xen/arch/riscv/mm.c > > @@ -0,0 +1,277 @@ > > +#include <xen/init.h> > > +#include <xen/kernel.h> > > + > > +#include <asm/early_printk.h> > > +#include <asm/config.h> > > +#include <asm/csr.h> > > +#include <asm/mm.h> > > +#include <asm/page.h> > > +#include <asm/processor.h> > > + > > +#define PGTBL_INITIAL_COUNT 8 > > What does this magic number derive from? (A comment may help.) It can be now 4 tables as it is enough to map 2Mb. 8 it was when I experimented with bigger sizes of Xen binary and verified that logic of working with page tables works. > > > +#define PGTBL_ENTRY_AMOUNT (PAGE_SIZE/sizeof(pte_t)) > > Nit: Style (blanks around binary operators). Thanks. > > > +struct mmu_desc { > > + unsigned long num_levels; > > + uint32_t pgtbl_count; > > Nit: Style (as before please avoid fixed width types when their use > isn't really warranted; afaics unsigned int will do fine here and > elsewhere below). I will change it but I am not sure that I fully understand what is an issue with uint32_t. > > > + pte_t *next_pgtbl; > > + pte_t *pgtbl_base; > > +}; > > + > > +extern unsigned long __bss_start[]; > > +extern unsigned long __bss_end[]; > > Why are these being added here? They aren't being used afaics. Misssed to remove them. They was used in the previous version of the patch. > > > +extern unsigned char cpu0_boot_stack[STACK_SIZE]; > > + > > +#define PHYS_OFFSET ((unsigned long)_start - XEN_VIRT_START) > > +#define LOAD_TO_LINK(addr) ((addr) - PHYS_OFFSET) > > +#define LINK_TO_LOAD(addr) ((addr) + PHYS_OFFSET) > > + > > +pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE) > > +stage1_pgtbl_root[PGTBL_ENTRY_AMOUNT]; > > + > > +pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE) > > +stage1_pgtbl_nonroot[PGTBL_INITIAL_COUNT * PGTBL_ENTRY_AMOUNT]; > > + > > +#define > > HANDLE_PGTBL() > > \ > > + index = pt_index(curr_lvl_num, > > page_addr); \ > > Nit: Style (stray double blanks). Sure. Thanks. > > > + if ( pte_is_valid(pgtbl[index]) ) > > { \ > > Nit: Style (brace placement throughout the macro - in really small > macros the way you have it may be okay as an exception, but this > macro > isn't small). It looks like I confused brace placement for if/else with LK... Anyway it will be updated. > > > + /* Find L{ 0-3 } table > > */ \ > > + pgtbl = (pte_t > > *)pte_to_paddr(pgtbl[index]); \ > > + } else > > { \ > > + /* Allocate new L{0-3} page table > > */ \ > > + if ( mmu_desc->pgtbl_count == PGTBL_INITIAL_COUNT ) > > { \ > > + early_printk("(XEN) No initial table > > available\n"); \ > > + /* panic(), BUG() or ASSERT() aren't ready now. > > */ \ > > + > > die(); \ > > + > > } > > \ > > + mmu_desc- > > >pgtbl_count++; \ > > + pgtbl[index] = paddr_to_pte((unsigned long)mmu_desc- > > >next_pgtbl, \ > > + > > PTE_VALID); \ > > + pgtbl = mmu_desc- > > >next_pgtbl; \ > > + mmu_desc->next_pgtbl += > > PGTBL_ENTRY_AMOUNT; \ > > + } > > + > > +static void __init setup_initial_mapping(struct mmu_desc > > *mmu_desc, > > + unsigned long map_start, > > + unsigned long map_end, > > + unsigned long pa_start, > > + unsigned long permission) > > Nit: Style (indentation, and it feels like I commented on this > before). You commented. I will double check because I though that I fixed it. > > > +{ > > + uint32_t index; > > + pte_t *pgtbl; > > + unsigned long page_addr; > > + unsigned int curr_lvl_num; > > + pte_t pte_to_be_written; > > + unsigned long paddr; > > + > > + if ( (unsigned long)_start % XEN_PT_LEVEL_SIZE(0) ) { > > Nit: Style (brace placement). Thanks. Ill update brace placement. > > > + early_printk("(XEN) Xen should be loaded at 4k > > boundary\n"); > > + die(); > > + } > > + > > + if ( map_start & ~XEN_PT_LEVEL_MAP_MASK(0) || > > + pa_start & ~XEN_PT_LEVEL_MAP_MASK(0) ) { > > Nit: Style (indentation and brace placement; more below). Thanks. Ill update brace placement. > > > + early_printk("(XEN) map and pa start addresses should be > > aligned\n"); > > + /* panic(), BUG() or ASSERT() aren't ready now. */ > > + die(); > > + } > > + > > + page_addr = map_start; > > + while ( page_addr < map_end ) { > > + pgtbl = mmu_desc->pgtbl_base; > > + > > + switch (mmu_desc->num_levels) > > + { > > + case 4: /* Level 3 */ > > + curr_lvl_num = 3; > > + HANDLE_PGTBL(); > > + case 3: /* Level 2 */ > > + curr_lvl_num = 2; > > + HANDLE_PGTBL(); > > + case 2: /* Level 1 */ > > + curr_lvl_num = 1; > > + HANDLE_PGTBL(); > > Am I getting it right that curr_lvl_num solely exists to pass a > parameter > to the macro? If so, why doesn't the macro take an appropriate > parameter > instead? curr_lvl_num exists only to pass current page table level to the macro. I'll add a parameter to the macro. Thanks. > > > + case 1: /* Level 0 */ > > + index = pt_index(0, page_addr); > > + paddr = (page_addr - map_start) + pa_start; > > + pte_to_be_written = paddr_to_pte(paddr, permission); > > + > > + if ( !pte_is_valid(pgtbl[index]) ) > > + pgtbl[index] = pte_to_be_written; > > + else { > > + /* > > + * get an adresses of current pte and that one to > > + * be written without permission flags > > + */ > > + unsigned long curr_pte = > > + (unsigned long)&pgtbl[index] & (PTE_PPN_SHIFT > > - 1); > > + pte_to_be_written &= (PTE_PPN_SHIFT - 1); > > + > > + if ( curr_pte != pte_to_be_written ) { > > + early_printk("PTE that is intended to write > > isn't the \ > > + same that the once are > > overwriting\n"); > > Iirc there are compilers which warn about line continuation > characters > inside a string literal. Since adjacent string literals are > concatenated > by the compiler anyway, you'll find elsewhere we have the equivalent > of > > early_printk("PTE that is intended to write isn't > the" > " same that the once are > overwriting\n"); > > This will also avoid an excessive number of blanks in the middle of > the > string. Agree. It will be much better. > > > + /* panic(), <asm/bug.h> aren't ready now. */ > > + die(); > > + } > > + } > > + } > > + > > + /* Point to next page */ > > + page_addr += XEN_PT_LEVEL_SIZE(0); > > + } > > +} > > + > > +static void __init calc_pgtbl_lvls_num(struct mmu_desc *mmu_desc) > > +{ > > + unsigned long satp_mode = RV_STAGE1_MODE; > > + > > + /* Number of page table levels */ > > + switch (satp_mode) { > > Nit: Style (missing blanks and brace placement). I think that after this patch I'll remmeber about brace placement. Thank you for remind me that :) > > > + case SATP_MODE_SV32: > > + mmu_desc->num_levels = 2; > > + break; > > + case SATP_MODE_SV39: > > + mmu_desc->num_levels = 3; > > + break; > > + case SATP_MODE_SV48: > > + mmu_desc->num_levels = 4; > > + break; > > + default: > > + early_printk("(XEN) Unsupported SATP_MODE\n"); > > + while (1); > > die() (until you have panic()) like above? Should be die. Thanks. > > > + } > > +} > > + > > +static void __init setup_ptes_permission(struct mmu_desc > > *mmu_desc) > > +{ > > + pte_t *pgtbl; > > + uint32_t index; > > + uint32_t permissions = PTE_VALID; > > + unsigned long start = LOAD_TO_LINK((unsigned long)_start); > > + unsigned long end = LOAD_TO_LINK((unsigned long)_end); > > + > > + while ( start < end ) > > + { > > + pgtbl = (pte_t *) mmu_desc->pgtbl_base; > > Nit: Style (extra blank after closing parenthesis). But afaict you > don't > need a case here in the first place; as said before, please avoid > casts > whenever possible. There is no any sense in cast. Thanks. > > > + switch (mmu_desc->num_levels) > > Nit: Style (Missing blanks). > > > + { > > + case 4: /* find Level 3 page table*/ > > + index = pt_index(3, start); > > + pgtbl = (pte_t *)pte_to_paddr(pgtbl[index]); > > + case 3: /* find level 2 page table */ > > + index = pt_index(2, start); > > + pgtbl = (pte_t *)pte_to_paddr(pgtbl[index]); > > + case 2: /* find level 1 page table */ > > + index = pt_index(1, start); > > + pgtbl = (pte_t *)pte_to_paddr(pgtbl[index]); > > + case 1: /* find level 0 page table */ > > + index = pt_index(0, start); > > + > > + if ( is_kernel_text(LINK_TO_LOAD(start)) || > > + is_kernel_inittext(LINK_TO_LOAD(start)) ) > > + permissions |= PTE_EXECUTABLE | PTE_READABLE; > > + > > + if ( is_kernel_rodata(LINK_TO_LOAD(start)) ) > > + permissions |= PTE_READABLE; > > + > > + pgtbl[index] |= permissions; > > setup_initial_mapping() has installed R/W mappings. Hence ORing in > PTE_READABLE is meaningless here, and neither .text nor .rodata > will end up write-protected. The question is ... > > > + } > > + > > + start += PAGE_SIZE; > > + } > > +} > > + > > +/* > > + * setup_initial_pagetables: > > + * > > + * Build the page tables for Xen that map the following: > > + * 1. Calculate page table's level numbers. > > + * 2. Init mmu description structure. > > + * 3. Check that linker addresses range doesn't overlap > > + * with load addresses range > > + * 4. Map all linker addresses and load addresses ( it shouldn't > > + * be 1:1 mapped and will be 1:1 mapped only in case if > > + * linker address is equal to load address ) with > > + * RW permissions by default. > > + * 5. Setup proper PTE permissions for each section. > > + */ > > +void __init setup_initial_pagetables(void) > > +{ > > + struct mmu_desc mmu_desc = { 0, 0, NULL, 0 }; > > + > > + /* > > + * Access to _{stard, end } is always PC-relative > > + * thereby when access them we will get load adresses > > + * of start and end of Xen > > + * To get linker addresses LOAD_TO_LINK() is required > > + * to use > > + */ > > + unsigned long load_start = (unsigned long)_start; > > + unsigned long load_end = (unsigned long)_end; > > + unsigned long linker_start = LOAD_TO_LINK(load_start); > > + unsigned long linker_end = LOAD_TO_LINK(load_end); > > + > > + if ( (linker_start != load_start) && > > + (linker_start <= load_end) && (load_start <= linker_end) > > ) { > > + early_printk("(XEN) linker and load address ranges > > overlap\n"); > > + die(); > > + } > > + > > + calc_pgtbl_lvls_num(&mmu_desc); > > + > > + mmu_desc.pgtbl_base = stage1_pgtbl_root; > > + mmu_desc.next_pgtbl = stage1_pgtbl_nonroot; > > + > > + setup_initial_mapping(&mmu_desc, > > + linker_start, > > + linker_end, > > + load_start, > > + PTE_LEAF_DEFAULT); > > + > > + setup_ptes_permission(&mmu_desc); > > ...: Why does this require a 2nd pass / function in the first place? Probably I misunderstood Julien and it setup_pte_permission can be done in setup_initial_mapping() but here is the reply: https://lore.kernel.org/xen-devel/79e83610-5980-d9b5-7994-6b0cb2b9049a@xxxxxxx/ > > > +} > > + > > +void __init enable_mmu() > > +{ > > + /* > > + * Calculate a linker time address of the mmu_is_enabled > > + * label and update CSR_STVEC with it. > > + * MMU is configured in a way where linker addresses are > > mapped > > + * on load addresses so in a case when linker addresses are > > not equal > > + * to load addresses, after MMU is enabled, it will cause > > + * an exception and jump to linker time addresses. > > + * Otherwise if load addresses are equal to linker addresses > > the code > > + * after mmu_is_enabled label will be executed without > > exception. > > + */ > > + csr_write(CSR_STVEC, LOAD_TO_LINK((unsigned > > long)&&mmu_is_enabled)); > > That looks like a pretty unusual way of moving from physical to > virtual > addresses. But if it works ... > > > + /* Ensure page table writes precede loading the SATP */ > > + asm volatile("sfence.vma"); > > + > > + /* Enable the MMU and load the new pagetable for Xen */ > > + csr_write(CSR_SATP, > > + ((unsigned long)stage1_pgtbl_root >> PAGE_SHIFT) | > > RV_STAGE1_MODE << SATP_MODE_SHIFT); > > Nit: Style (excessively long line). Sure. I will update it. > > > + asm volatile(".align 2"); > > +mmu_is_enabled: > > Nit: Style (labels want indenting by at least one blank). Thanks. I will take it into account. > > > @@ -26,3 +27,13 @@ void __init noreturn start_xen(unsigned long > > bootcpu_id, > > > > unreachable(); > > } > > + > > +void __init noreturn cont_after_mmu_is_enabled(void) > > To prevent undue link-time optimization such a function also wants to > be noinline. Sure. Missed that. ~ Oleksii
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |