[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v3 1/3] xen/riscv: introduce setup_initial_pages
On 27.03.2023 19:17, Oleksii Kurochko wrote: > --- a/xen/arch/riscv/include/asm/config.h > +++ b/xen/arch/riscv/include/asm/config.h > @@ -39,12 +39,25 @@ > name: > #endif > > -#define XEN_VIRT_START _AT(UL, 0x80200000) > +#define ADDRESS_SPACE_END (_AC(-1, UL)) > +#define SZ_1G 0x40000000 No need for this - please use GB(1) (see xen/config.h) in its stead. > +#ifdef CONFIG_RISCV_64 > +#define XEN_VIRT_START (ADDRESS_SPACE_END - SZ_1G + 1) I first thought the " + 1" would be rendering the address misaligned. May I suggest (ADDRESS_SPACE_END + 1 - SZ_1G) to help avoiding this impression? > +#else > +#error "RV32 isn't supported"asm Stray "asm" at the end of the line. > --- /dev/null > +++ b/xen/arch/riscv/include/asm/page.h > @@ -0,0 +1,62 @@ > +#ifndef _ASM_RISCV_PAGE_H > +#define _ASM_RISCV_PAGE_H > + > +#include <xen/const.h> > +#include <xen/types.h> > + > +#define VPN_MASK ((unsigned long)(PAGETABLE_ENTRIES - 1)) > + > +#define XEN_PT_LEVEL_ORDER(lvl) ((lvl) * PAGETABLE_ORDER) > +#define XEN_PT_LEVEL_SHIFT(lvl) (XEN_PT_LEVEL_ORDER(lvl) + PAGE_SHIFT) > +#define XEN_PT_LEVEL_SIZE(lvl) (_AT(paddr_t, 1) << > XEN_PT_LEVEL_SHIFT(lvl)) > +#define XEN_PT_LEVEL_MAP_MASK(lvl) (~(XEN_PT_LEVEL_SIZE(lvl) - 1)) > +#define XEN_PT_LEVEL_MASK(lvl) (VPN_MASK << XEN_PT_LEVEL_SHIFT(lvl)) > + > +#define PTE_VALID BIT(0, UL) > +#define PTE_READABLE BIT(1, UL) > +#define PTE_WRITABLE BIT(2, UL) > +#define PTE_EXECUTABLE BIT(3, UL) > +#define PTE_USER BIT(4, UL) > +#define PTE_GLOBAL BIT(5, UL) > +#define PTE_ACCESSED BIT(6, UL) > +#define PTE_DIRTY BIT(7, UL) > +#define PTE_RSW (BIT(8, UL) | BIT(9, UL)) > + > +#define PTE_LEAF_DEFAULT (PTE_VALID | PTE_READABLE | PTE_WRITABLE) > +#define PTE_TABLE (PTE_VALID) > + > +/* Calculate the offsets into the pagetables for a given VA */ > +#define pt_linear_offset(lvl, va) ((va) >> XEN_PT_LEVEL_SHIFT(lvl)) > + > +#define pt_index(lvl, va) pt_linear_offset(lvl, (va) & > XEN_PT_LEVEL_MASK(lvl)) > + > +#ifdef CONFIG_RISCV_64 > +typedef uint64_t pte_t; > +#else > +typedef uint32_t pte_t; > +#endif Are you sure you don't want to encapsulate these in a struct, for type safety purposes? > +#define addr_to_pte(x) (((x) >> PTE_PPN_SHIFT) << PAGE_SHIFT) This looking like the inverse of ... > +/* Shift the VPN[x] or PPN[x] fields of a virtual or physical address > + * to become the shifted PPN[x] fields of a page table entry */ > +#define addr_to_ppn(x) (((x) >> PAGE_SHIFT) << PTE_PPN_SHIFT) ... this, does that one want to be called "ppn_to_addr()"? Otherwise I can't see how name an operation fit together. > +static inline pte_t paddr_to_pte(const unsigned long paddr, paddr_t? > + const unsigned long permissions) > +{ > + pte_t tmp = (pte_t)addr_to_ppn(paddr); > + return tmp | permissions; > +} > + > +static inline pte_t pte_to_paddr(const unsigned long paddr) A function of this name wants to take pte_t as parameter and return paddr_t (note the type safety remark higher up, as it becomes relevant right here). It also hardly can be implemented ... > +{ > + return (pte_t)addr_to_pte(paddr); ... in terms of a function/macro with effectively the opposite name. > --- /dev/null > +++ b/xen/arch/riscv/mm.c > @@ -0,0 +1,277 @@ > +#include <xen/init.h> > +#include <xen/kernel.h> > + > +#include <asm/early_printk.h> > +#include <asm/config.h> > +#include <asm/csr.h> > +#include <asm/mm.h> > +#include <asm/page.h> > +#include <asm/processor.h> > + > +#define PGTBL_INITIAL_COUNT 8 What does this magic number derive from? (A comment may help.) > +#define PGTBL_ENTRY_AMOUNT (PAGE_SIZE/sizeof(pte_t)) Nit: Style (blanks around binary operators). > +struct mmu_desc { > + unsigned long num_levels; > + uint32_t pgtbl_count; Nit: Style (as before please avoid fixed width types when their use isn't really warranted; afaics unsigned int will do fine here and elsewhere below). > + pte_t *next_pgtbl; > + pte_t *pgtbl_base; > +}; > + > +extern unsigned long __bss_start[]; > +extern unsigned long __bss_end[]; Why are these being added here? They aren't being used afaics. > +extern unsigned char cpu0_boot_stack[STACK_SIZE]; > + > +#define PHYS_OFFSET ((unsigned long)_start - XEN_VIRT_START) > +#define LOAD_TO_LINK(addr) ((addr) - PHYS_OFFSET) > +#define LINK_TO_LOAD(addr) ((addr) + PHYS_OFFSET) > + > +pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE) > +stage1_pgtbl_root[PGTBL_ENTRY_AMOUNT]; > + > +pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE) > +stage1_pgtbl_nonroot[PGTBL_INITIAL_COUNT * PGTBL_ENTRY_AMOUNT]; > + > +#define HANDLE_PGTBL() \ > + index = pt_index(curr_lvl_num, page_addr); \ Nit: Style (stray double blanks). > + if ( pte_is_valid(pgtbl[index]) ) { \ Nit: Style (brace placement throughout the macro - in really small macros the way you have it may be okay as an exception, but this macro isn't small). > + /* Find L{ 0-3 } table */ \ > + pgtbl = (pte_t *)pte_to_paddr(pgtbl[index]); \ > + } else { \ > + /* Allocate new L{0-3} page table */ \ > + if ( mmu_desc->pgtbl_count == PGTBL_INITIAL_COUNT ) { \ > + early_printk("(XEN) No initial table available\n"); \ > + /* panic(), BUG() or ASSERT() aren't ready now. */ \ > + die(); \ > + } \ > + mmu_desc->pgtbl_count++; \ > + pgtbl[index] = paddr_to_pte((unsigned long)mmu_desc->next_pgtbl, \ > + PTE_VALID); \ > + pgtbl = mmu_desc->next_pgtbl; \ > + mmu_desc->next_pgtbl += PGTBL_ENTRY_AMOUNT; \ > + } > + > +static void __init setup_initial_mapping(struct mmu_desc *mmu_desc, > + unsigned long map_start, > + unsigned long map_end, > + unsigned long pa_start, > + unsigned long permission) Nit: Style (indentation, and it feels like I commented on this before). > +{ > + uint32_t index; > + pte_t *pgtbl; > + unsigned long page_addr; > + unsigned int curr_lvl_num; > + pte_t pte_to_be_written; > + unsigned long paddr; > + > + if ( (unsigned long)_start % XEN_PT_LEVEL_SIZE(0) ) { Nit: Style (brace placement). > + early_printk("(XEN) Xen should be loaded at 4k boundary\n"); > + die(); > + } > + > + if ( map_start & ~XEN_PT_LEVEL_MAP_MASK(0) || > + pa_start & ~XEN_PT_LEVEL_MAP_MASK(0) ) { Nit: Style (indentation and brace placement; more below). > + early_printk("(XEN) map and pa start addresses should be aligned\n"); > + /* panic(), BUG() or ASSERT() aren't ready now. */ > + die(); > + } > + > + page_addr = map_start; > + while ( page_addr < map_end ) { > + pgtbl = mmu_desc->pgtbl_base; > + > + switch (mmu_desc->num_levels) > + { > + case 4: /* Level 3 */ > + curr_lvl_num = 3; > + HANDLE_PGTBL(); > + case 3: /* Level 2 */ > + curr_lvl_num = 2; > + HANDLE_PGTBL(); > + case 2: /* Level 1 */ > + curr_lvl_num = 1; > + HANDLE_PGTBL(); Am I getting it right that curr_lvl_num solely exists to pass a parameter to the macro? If so, why doesn't the macro take an appropriate parameter instead? > + case 1: /* Level 0 */ > + index = pt_index(0, page_addr); > + paddr = (page_addr - map_start) + pa_start; > + pte_to_be_written = paddr_to_pte(paddr, permission); > + > + if ( !pte_is_valid(pgtbl[index]) ) > + pgtbl[index] = pte_to_be_written; > + else { > + /* > + * get an adresses of current pte and that one to > + * be written without permission flags > + */ > + unsigned long curr_pte = > + (unsigned long)&pgtbl[index] & (PTE_PPN_SHIFT - 1); > + pte_to_be_written &= (PTE_PPN_SHIFT - 1); > + > + if ( curr_pte != pte_to_be_written ) { > + early_printk("PTE that is intended to write isn't the \ > + same that the once are overwriting\n"); Iirc there are compilers which warn about line continuation characters inside a string literal. Since adjacent string literals are concatenated by the compiler anyway, you'll find elsewhere we have the equivalent of early_printk("PTE that is intended to write isn't the" " same that the once are overwriting\n"); This will also avoid an excessive number of blanks in the middle of the string. > + /* panic(), <asm/bug.h> aren't ready now. */ > + die(); > + } > + } > + } > + > + /* Point to next page */ > + page_addr += XEN_PT_LEVEL_SIZE(0); > + } > +} > + > +static void __init calc_pgtbl_lvls_num(struct mmu_desc *mmu_desc) > +{ > + unsigned long satp_mode = RV_STAGE1_MODE; > + > + /* Number of page table levels */ > + switch (satp_mode) { Nit: Style (missing blanks and brace placement). > + case SATP_MODE_SV32: > + mmu_desc->num_levels = 2; > + break; > + case SATP_MODE_SV39: > + mmu_desc->num_levels = 3; > + break; > + case SATP_MODE_SV48: > + mmu_desc->num_levels = 4; > + break; > + default: > + early_printk("(XEN) Unsupported SATP_MODE\n"); > + while (1); die() (until you have panic()) like above? > + } > +} > + > +static void __init setup_ptes_permission(struct mmu_desc *mmu_desc) > +{ > + pte_t *pgtbl; > + uint32_t index; > + uint32_t permissions = PTE_VALID; > + unsigned long start = LOAD_TO_LINK((unsigned long)_start); > + unsigned long end = LOAD_TO_LINK((unsigned long)_end); > + > + while ( start < end ) > + { > + pgtbl = (pte_t *) mmu_desc->pgtbl_base; Nit: Style (extra blank after closing parenthesis). But afaict you don't need a case here in the first place; as said before, please avoid casts whenever possible. > + switch (mmu_desc->num_levels) Nit: Style (Missing blanks). > + { > + case 4: /* find Level 3 page table*/ > + index = pt_index(3, start); > + pgtbl = (pte_t *)pte_to_paddr(pgtbl[index]); > + case 3: /* find level 2 page table */ > + index = pt_index(2, start); > + pgtbl = (pte_t *)pte_to_paddr(pgtbl[index]); > + case 2: /* find level 1 page table */ > + index = pt_index(1, start); > + pgtbl = (pte_t *)pte_to_paddr(pgtbl[index]); > + case 1: /* find level 0 page table */ > + index = pt_index(0, start); > + > + if ( is_kernel_text(LINK_TO_LOAD(start)) || > + is_kernel_inittext(LINK_TO_LOAD(start)) ) > + permissions |= PTE_EXECUTABLE | PTE_READABLE; > + > + if ( is_kernel_rodata(LINK_TO_LOAD(start)) ) > + permissions |= PTE_READABLE; > + > + pgtbl[index] |= permissions; setup_initial_mapping() has installed R/W mappings. Hence ORing in PTE_READABLE is meaningless here, and neither .text nor .rodata will end up write-protected. The question is ... > + } > + > + start += PAGE_SIZE; > + } > +} > + > +/* > + * setup_initial_pagetables: > + * > + * Build the page tables for Xen that map the following: > + * 1. Calculate page table's level numbers. > + * 2. Init mmu description structure. > + * 3. Check that linker addresses range doesn't overlap > + * with load addresses range > + * 4. Map all linker addresses and load addresses ( it shouldn't > + * be 1:1 mapped and will be 1:1 mapped only in case if > + * linker address is equal to load address ) with > + * RW permissions by default. > + * 5. Setup proper PTE permissions for each section. > + */ > +void __init setup_initial_pagetables(void) > +{ > + struct mmu_desc mmu_desc = { 0, 0, NULL, 0 }; > + > + /* > + * Access to _{stard, end } is always PC-relative > + * thereby when access them we will get load adresses > + * of start and end of Xen > + * To get linker addresses LOAD_TO_LINK() is required > + * to use > + */ > + unsigned long load_start = (unsigned long)_start; > + unsigned long load_end = (unsigned long)_end; > + unsigned long linker_start = LOAD_TO_LINK(load_start); > + unsigned long linker_end = LOAD_TO_LINK(load_end); > + > + if ( (linker_start != load_start) && > + (linker_start <= load_end) && (load_start <= linker_end) ) { > + early_printk("(XEN) linker and load address ranges overlap\n"); > + die(); > + } > + > + calc_pgtbl_lvls_num(&mmu_desc); > + > + mmu_desc.pgtbl_base = stage1_pgtbl_root; > + mmu_desc.next_pgtbl = stage1_pgtbl_nonroot; > + > + setup_initial_mapping(&mmu_desc, > + linker_start, > + linker_end, > + load_start, > + PTE_LEAF_DEFAULT); > + > + setup_ptes_permission(&mmu_desc); ...: Why does this require a 2nd pass / function in the first place? > +} > + > +void __init enable_mmu() > +{ > + /* > + * Calculate a linker time address of the mmu_is_enabled > + * label and update CSR_STVEC with it. > + * MMU is configured in a way where linker addresses are mapped > + * on load addresses so in a case when linker addresses are not equal > + * to load addresses, after MMU is enabled, it will cause > + * an exception and jump to linker time addresses. > + * Otherwise if load addresses are equal to linker addresses the code > + * after mmu_is_enabled label will be executed without exception. > + */ > + csr_write(CSR_STVEC, LOAD_TO_LINK((unsigned long)&&mmu_is_enabled)); That looks like a pretty unusual way of moving from physical to virtual addresses. But if it works ... > + /* Ensure page table writes precede loading the SATP */ > + asm volatile("sfence.vma"); > + > + /* Enable the MMU and load the new pagetable for Xen */ > + csr_write(CSR_SATP, > + ((unsigned long)stage1_pgtbl_root >> PAGE_SHIFT) | > RV_STAGE1_MODE << SATP_MODE_SHIFT); Nit: Style (excessively long line). > + asm volatile(".align 2"); > +mmu_is_enabled: Nit: Style (labels want indenting by at least one blank). > @@ -26,3 +27,13 @@ void __init noreturn start_xen(unsigned long bootcpu_id, > > unreachable(); > } > + > +void __init noreturn cont_after_mmu_is_enabled(void) To prevent undue link-time optimization such a function also wants to be noinline. Jan
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |