[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v6 2/4] xen/riscv: introduce setup_initial_pages
On 03.05.2023 18:31, Oleksii Kurochko wrote: > --- a/xen/arch/riscv/include/asm/config.h > +++ b/xen/arch/riscv/include/asm/config.h > @@ -70,12 +70,23 @@ > name: > #endif > > -#define XEN_VIRT_START _AT(UL, 0x80200000) > +#ifdef CONFIG_RISCV_64 > +#define XEN_VIRT_START 0xFFFFFFFFC0000000 /* (_AC(-1, UL) + 1 - GB(1)) */ > +#else > +#error "RV32 isn't supported" > +#endif > > #define SMP_CACHE_BYTES (1 << 6) > > #define STACK_SIZE PAGE_SIZE > > +#ifdef CONFIG_RISCV_64 > +#define CONFIG_PAGING_LEVELS 3 > +#define RV_STAGE1_MODE SATP_MODE_SV39 > +#else #define CONFIG_PAGING_LEVELS 2 (or else I would think ... > +#define RV_STAGE1_MODE SATP_MODE_SV32 ... this and hence the entire "#else" should also be omitted) > --- /dev/null > +++ b/xen/arch/riscv/include/asm/current.h > @@ -0,0 +1,10 @@ > +#ifndef __ASM_CURRENT_H > +#define __ASM_CURRENT_H > + > +#define switch_stack_and_jump(stack, fn) \ > + asm volatile ( \ > + "mv sp, %0 \n" \ > + "j " #fn :: "r" (stack) : \ > + ) Nit: Style. Our normal way of writing this would be #define switch_stack_and_jump(stack, fn) \ asm volatile ( \ "mv sp, %0\n" \ "j " #fn :: "r" (stack) ) i.e. unnecessary colon omitted, no trailin blank on any generated assembly line, and closing paren not placed on its own line (only closing figure braces would normally live on their own lines). However, as to the 3rd colon: Can you really get away here without a memory clobber (i.e. the construct being a full compiler barrier)? Further I think you want to make the use of "fn" visible to the compiler, by using an "X" constraint just like Arm does. Finally I think you want to add unreachable(), like both Arm and x86 have it. Plus the "noreturn" on relevant functions. > --- /dev/null > +++ b/xen/arch/riscv/include/asm/page.h > @@ -0,0 +1,62 @@ > +#ifndef _ASM_RISCV_PAGE_H > +#define _ASM_RISCV_PAGE_H > + > +#include <xen/const.h> > +#include <xen/types.h> > + > +#define VPN_MASK ((unsigned long)(PAGETABLE_ENTRIES - 1)) > + > +#define XEN_PT_LEVEL_ORDER(lvl) ((lvl) * PAGETABLE_ORDER) > +#define XEN_PT_LEVEL_SHIFT(lvl) (XEN_PT_LEVEL_ORDER(lvl) + PAGE_SHIFT) > +#define XEN_PT_LEVEL_SIZE(lvl) (_AT(paddr_t, 1) << > XEN_PT_LEVEL_SHIFT(lvl)) > +#define XEN_PT_LEVEL_MAP_MASK(lvl) (~(XEN_PT_LEVEL_SIZE(lvl) - 1)) > +#define XEN_PT_LEVEL_MASK(lvl) (VPN_MASK << XEN_PT_LEVEL_SHIFT(lvl)) > + > +#define PTE_VALID BIT(0, UL) > +#define PTE_READABLE BIT(1, UL) > +#define PTE_WRITABLE BIT(2, UL) > +#define PTE_EXECUTABLE BIT(3, UL) > +#define PTE_USER BIT(4, UL) > +#define PTE_GLOBAL BIT(5, UL) > +#define PTE_ACCESSED BIT(6, UL) > +#define PTE_DIRTY BIT(7, UL) > +#define PTE_RSW (BIT(8, UL) | BIT(9, UL)) > + > +#define PTE_LEAF_DEFAULT (PTE_VALID | PTE_READABLE | PTE_WRITABLE) > +#define PTE_TABLE (PTE_VALID) > + > +/* Calculate the offsets into the pagetables for a given VA */ > +#define pt_linear_offset(lvl, va) ((va) >> XEN_PT_LEVEL_SHIFT(lvl)) > + > +#define pt_index(lvl, va) pt_linear_offset(lvl, (va) & > XEN_PT_LEVEL_MASK(lvl)) Maybe better #define pt_index(lvl, va) (pt_linear_offset(lvl, va) & VPN_MASK) as the involved constant will be easier to use for the compiler? > +/* Page Table entry */ > +typedef struct { > +#ifdef CONFIG_RISCV_64 > + uint64_t pte; > +#else > + uint32_t pte; > +#endif > +} pte_t; > + > +#define pte_to_addr(x) (((paddr_t)(x) >> PTE_PPN_SHIFT) << PAGE_SHIFT) > + > +#define addr_to_pte(x) (((x) >> PAGE_SHIFT) << PTE_PPN_SHIFT) Are these two macros useful on their own? I ask because I still consider them somewhat misnamed, as they don't produce / consume a PTE (but the raw value). Yet generally you want to avoid any code operating on raw values, using pte_t instead. IOW I would hope to be able to convince you to ... > +static inline pte_t paddr_to_pte(paddr_t paddr, > + unsigned int permissions) > +{ > + return (pte_t) { .pte = addr_to_pte(paddr) | permissions }; > +} > + > +static inline paddr_t pte_to_paddr(pte_t pte) > +{ > + return pte_to_addr(pte.pte); > +} ... expand them in the two inline functions and then drop the macros. > --- /dev/null > +++ b/xen/arch/riscv/mm.c > @@ -0,0 +1,315 @@ > +#include <xen/compiler.h> > +#include <xen/init.h> > +#include <xen/kernel.h> > +#include <xen/pfn.h> > + > +#include <asm/early_printk.h> > +#include <asm/config.h> No inclusion of asm/config.h (or xen/config.h) in any new code please. For quite some time xen/config.h has been forcefully included everywhere by the build system. > +/* > + * It is expected that Xen won't be more then 2 MB. > + * The check in xen.lds.S guarantees that. > + * At least 3 page (in case of Sv39 ) > + * tables are needed to cover 2 MB. I guess this reads a little better as "At least 3 page tables (in case of Sv39) ..." or "At least 3 (in case of Sv39) page tables ..." Also could I talk you into using the full 80 columns instead of wrapping early in the middle of a sentence? > One for each page level > + * table with PAGE_SIZE = 4 Kb > + * > + * One L0 page table can cover 2 MB > + * (512 entries of one page table * PAGE_SIZE). > + * > + */ > +#define PGTBL_INITIAL_COUNT ((CONFIG_PAGING_LEVELS - 1) + 1) Hmm, did you lose the part of the comment explaining the "+ 1"? Or did the need for that actually go away (and you should drop it here)? > +#define PGTBL_ENTRY_AMOUNT (PAGE_SIZE / sizeof(pte_t)) Isn't this PAGETABLE_ENTRIES (and the constant hence unnecessary)? > +static void __init setup_initial_mapping(struct mmu_desc *mmu_desc, > + unsigned long map_start, > + unsigned long map_end, > + unsigned long pa_start, > + unsigned int permissions) What use is the last parameter, when the sole caller passes PTE_LEAF_DEFAULT (i.e. a build-time constant)? Then ... > +{ > + unsigned int index; > + pte_t *pgtbl; > + unsigned long page_addr; > + pte_t pte_to_be_written; > + unsigned long paddr; > + unsigned int tmp_permissions; ... this variable (which would better be of more narrow scope anyway, like perhaps several more of the above) could also have its tmp_ prefix dropped afaict. > + if ( (unsigned long)_start % XEN_PT_LEVEL_SIZE(0) ) > + { > + early_printk("(XEN) Xen should be loaded at 4k boundary\n"); > + die(); > + } > + > + if ( map_start & ~XEN_PT_LEVEL_MAP_MASK(0) || > + pa_start & ~XEN_PT_LEVEL_MAP_MASK(0) ) > + { > + early_printk("(XEN) map and pa start addresses should be aligned\n"); > + /* panic(), BUG() or ASSERT() aren't ready now. */ > + die(); > + } > + > + for ( page_addr = map_start; page_addr < map_end; page_addr += > XEN_PT_LEVEL_SIZE(0) ) Nit: Style (line looks to be too long now). for ( page_addr = map_start; page_addr < map_end; page_addr += XEN_PT_LEVEL_SIZE(0) ) is the way we would typically wrap it, but for ( page_addr = map_start; page_addr < map_end; page_addr += XEN_PT_LEVEL_SIZE(0) ) would of course also be okay if you preferred that. > + { > + pgtbl = mmu_desc->pgtbl_base; > + > + switch ( mmu_desc->num_levels ) > + { > + case 4: /* Level 3 */ > + HANDLE_PGTBL(3); > + case 3: /* Level 2 */ > + HANDLE_PGTBL(2); > + case 2: /* Level 1 */ > + HANDLE_PGTBL(1); > + case 1: /* Level 0 */ > + index = pt_index(0, page_addr); > + paddr = (page_addr - map_start) + pa_start; > + > + tmp_permissions = permissions; > + > + if ( is_kernel_text(LINK_TO_LOAD(page_addr)) || > + is_kernel_inittext(LINK_TO_LOAD(page_addr)) ) Nit: Style (and I'm pretty sure I did comment on this kind of too deep indentation already). > + tmp_permissions = > + PTE_EXECUTABLE | PTE_READABLE | PTE_VALID; > + > + if ( is_kernel_rodata(LINK_TO_LOAD(page_addr)) ) > + tmp_permissions = PTE_READABLE | PTE_VALID; > + > + pte_to_be_written = paddr_to_pte(paddr, tmp_permissions); > + > + if ( !pte_is_valid(pgtbl[index]) ) > + pgtbl[index] = pte_to_be_written; > + else > + { > + /* > + * get an adresses of current pte and that one to > + * be written > + */ Nit: Style (one missing blank each in the last three lines, and comment text wants to start with a capital letter). > + unsigned long curr_pte = > + pgtbl[index].pte & ~(PTE_DIRTY | PTE_ACCESSED); While technically "unsigned long" is okay to use here (afaict), I'd still recommend ... > + pte_to_be_written.pte &= ~(PTE_DIRTY | PTE_ACCESSED); > + > + if ( curr_pte != pte_to_be_written.pte ) ... doing everything in terms of pte_t: pte_t curr_pte = pgtbl[index]; curr_pte.pte &= ~(PTE_DIRTY | PTE_ACCESSED); pte_to_be_written.pte &= ~(PTE_DIRTY | PTE_ACCESSED); if ( curr_pte.pte != pte_to_be_written.pte ) or perhaps even simply if ( (pgtbl[index].pte ^ pte_to_be_written.pte) & ~(PTE_DIRTY | PTE_ACCESSED) ) > + { > + early_printk("PTE overridden has occurred\n"); > + /* panic(), <asm/bug.h> aren't ready now. */ > + die(); > + } > + } > + } > + } > + #undef HANDLE_PGTBL Nit: Such an #undef, even if technically okay either way, would imo better live in the same scope (and have the same indentation) as the corresponding #define. Since your #define is ahead of the function, it would ... > +} ... then live here. > +static void __init calc_pgtbl_lvls_num(struct mmu_desc *mmu_desc) Nit: Double blank after "struct". > +{ > + unsigned long satp_mode = RV_STAGE1_MODE; > + > + /* Number of page table levels */ > + switch (satp_mode) > + { > + case SATP_MODE_SV32: > + mmu_desc->num_levels = 2; > + break; > + case SATP_MODE_SV39: > + mmu_desc->num_levels = 3; > + break; > + case SATP_MODE_SV48: > + mmu_desc->num_levels = 4; > + break; > + default: > + early_printk("(XEN) Unsupported SATP_MODE\n"); > + die(); > + } > +} Do you really need this function anymore? Isn't it now simply mmu_desc.num_levels = CONFIG_PAGING_LEVELS? in the caller? WHich could then even be the variable's initializer there? > +static bool __init check_pgtbl_mode_support(struct mmu_desc *mmu_desc, > + unsigned long load_start, > + unsigned long satp_mode) > +{ > + bool is_mode_supported = false; > + unsigned int index; > + unsigned int page_table_level = (mmu_desc->num_levels - 1); > + unsigned level_map_mask = XEN_PT_LEVEL_MAP_MASK(page_table_level); > + > + unsigned long aligned_load_start = load_start & level_map_mask; > + unsigned long aligned_page_size = XEN_PT_LEVEL_SIZE(page_table_level); > + unsigned long xen_size = (unsigned long)(_end - start); > + > + if ( (load_start + xen_size) > (aligned_load_start + aligned_page_size) ) > + { > + early_printk("please place Xen to be in range of PAGE_SIZE " > + "where PAGE_SIZE is XEN_PT_LEVEL_SIZE( {L3 | L2 | L1} ) > " > + "depending on expected SATP_MODE \n" > + "XEN_PT_LEVEL_SIZE is defined in <asm/page.h>\n"); > + die(); > + } > + > + index = pt_index(page_table_level, aligned_load_start); > + stage1_pgtbl_root[index] = paddr_to_pte(aligned_load_start, > + PTE_LEAF_DEFAULT | > PTE_EXECUTABLE); > + > + asm volatile ( "sfence.vma" ); > + csr_write(CSR_SATP, > + PFN_DOWN((unsigned long)stage1_pgtbl_root) | > + satp_mode << SATP_MODE_SHIFT); > + > + if ( (csr_read(CSR_SATP) >> SATP_MODE_SHIFT) == satp_mode ) > + is_mode_supported = true; Just as a remark: While plain shift is kind of okay here, because the field is the top-most one in the register, generally you will want to get used to MASK_EXTR() (and MASK_INSR()) in cases like this one. > + csr_write(CSR_SATP, 0); > + > + /* Clean MMU root page table */ > + stage1_pgtbl_root[index] = paddr_to_pte(0x0, 0x0); > + > + asm volatile ( "sfence.vma" ); Doesn't this want to move between the SATP write and the clearing of the root table slot? Also here and elsewhere - don't these asm()s need memory clobbers? And anyway - could I talk you into introducing an inline wrapper (e.g. named sfence_vma()) so all uses end up consistent? > +void __init setup_initial_pagetables(void) > +{ > + struct mmu_desc mmu_desc = { 0, 0, NULL, NULL }; > + > + /* > + * Access to _stard, _end is always PC-relative Nit: Typo-ed symbol name. Also ... > + * thereby when access them we will get load adresses > + * of start and end of Xen > + * To get linker addresses LOAD_TO_LINK() is required > + * to use > + */ see the earlier line wrapping remark again. Finally in multi-sentence comments full stops are required. > + unsigned long load_start = (unsigned long)_start; > + unsigned long load_end = (unsigned long)_end; > + unsigned long linker_start = LOAD_TO_LINK(load_start); > + unsigned long linker_end = LOAD_TO_LINK(load_end); > + > + if ( (linker_start != load_start) && > + (linker_start <= load_end) && (load_start <= linker_end) ) > + { > + early_printk("(XEN) linker and load address ranges overlap\n"); > + die(); > + } > + > + calc_pgtbl_lvls_num(&mmu_desc); > + > + if ( !check_pgtbl_mode_support(&mmu_desc, load_start, RV_STAGE1_MODE) ) It is again questionable here whether passing a constant to a function at its sole call site is really useful. > +void __init noinline enable_mmu() > +{ > + /* > + * Calculate a linker time address of the mmu_is_enabled > + * label and update CSR_STVEC with it. > + * MMU is configured in a way where linker addresses are mapped > + * on load addresses so in a case when linker addresses are not equal > + * to load addresses, after MMU is enabled, it will cause > + * an exception and jump to linker time addresses. > + * Otherwise if load addresses are equal to linker addresses the code > + * after mmu_is_enabled label will be executed without exception. > + */ > + csr_write(CSR_STVEC, LOAD_TO_LINK((unsigned long)&&mmu_is_enabled)); > + > + /* Ensure page table writes precede loading the SATP */ > + asm volatile ( "sfence.vma" ); > + > + /* Enable the MMU and load the new pagetable for Xen */ > + csr_write(CSR_SATP, > + PFN_DOWN((unsigned long)stage1_pgtbl_root) | > + RV_STAGE1_MODE << SATP_MODE_SHIFT); > + > + asm volatile ( ".align 2" ); May I suggest to avoid .align, and to prefer .balign and .p2align instead? This helps people coming from different architectures, as which of the two .align resolves to is arch-dependent. > --- a/xen/arch/riscv/riscv64/head.S > +++ b/xen/arch/riscv/riscv64/head.S > @@ -1,4 +1,5 @@ > #include <asm/asm.h> > +#include <asm/asm-offsets.h> > #include <asm/riscv_encoding.h> > > .section .text.header, "ax", %progbits How does a need for this arise all of the sudden, without other changes to this file? Is this maybe a leftover which wants dropping? > --- a/xen/arch/riscv/xen.lds.S > +++ b/xen/arch/riscv/xen.lds.S > @@ -137,6 +137,7 @@ SECTIONS > __init_end = .; > > .bss : { /* BSS */ > + . = ALIGN(POINTER_ALIGN); > __bss_start = .; > *(.bss.stack_aligned) > . = ALIGN(PAGE_SIZE); Doesn't this want to be a separate change? Jan
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |