[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2 1/3] xen/arm: Add 4-level page table for stage 2 translation
On Tue, 2014-05-27 at 12:16 +0530, vijay.kilari@xxxxxxxxx wrote: > From: Vijaya Kumar K <Vijaya.Kumar@xxxxxxxxxxxxxxxxxx> > > To support 48-bit Physical Address support, add 4-level > page tables for stage 2 translation. > With this patch stage 1 and stage 2 translation at EL2 are > with 4-levels > > Configure TCR_EL2.IPS and VTCR_EL2.PS based on platform > supported PA range at runtime > > Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@xxxxxxxxxxxxxxxxxx> > --- > xen/arch/arm/arm64/head.S | 14 ++-- > xen/arch/arm/mm.c | 18 +++--- > xen/arch/arm/p2m.c | 136 > +++++++++++++++++++++++++++++++++------ > xen/include/asm-arm/p2m.h | 5 +- > xen/include/asm-arm/page.h | 16 +++-- > xen/include/asm-arm/processor.h | 102 ++++++++++++++++++++++++++++- > 6 files changed, 248 insertions(+), 43 deletions(-) > > diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S > index 2a13527..8396268 100644 > --- a/xen/arch/arm/arm64/head.S > +++ b/xen/arch/arm/arm64/head.S > @@ -224,13 +224,13 @@ skip_bss: > ldr x0, =MAIRVAL > msr mair_el2, x0 > > - /* Set up the HTCR: > - * PASize -- 40 bits / 1TB > - * Top byte is used > - * PT walks use Inner-Shareable accesses, > - * PT walks are write-back, write-allocate in both cache levels, > - * Full 64-bit address space goes through this table. */ > - ldr x0, =0x80823500 > + mrs x1, ID_AA64MMFR0_EL1 > + > + /* Set up the HTCR */ > + ldr x0, =TCR_VAL_BASE > + > + /* Set TCR_EL2.IPS based on ID_AA64MMFR0_EL1.PARange */ > + bfi x0, x1, #16, #3 > msr tcr_el2, x0 > > /* Set up the SCTLR_EL2: > diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c > index eac228c..04e3182 100644 > --- a/xen/arch/arm/mm.c > +++ b/xen/arch/arm/mm.c > @@ -377,17 +377,17 @@ void __init arch_init_memory(void) > void __cpuinit setup_virt_paging(void) > { > /* Setup Stage 2 address translation */ > - /* SH0=11 (Inner-shareable) > - * ORGN0=IRGN0=01 (Normal memory, Write-Back Write-Allocate Cacheable) > - * SL0=01 (Level-1) > - * ARVv7: T0SZ=(1)1000 = -8 (32-(-8) = 40 bit physical addresses) > - * ARMv8: T0SZ=01 1000 = 24 (64-24 = 40 bit physical addresses) > - * PS=010 == 40 bits > - */ > #ifdef CONFIG_ARM_32 > - WRITE_SYSREG32(0x80003558, VTCR_EL2); > + WRITE_SYSREG32(VTCR_VAL_BASE, VTCR_EL2); > #else > - WRITE_SYSREG32(0x80023558, VTCR_EL2); > + /* Update IPA 48 bit and PA 48 bit */ > + if ( current_cpu_data.mm64.pa_range == VTCR_PS_48BIT_VAL ) What about values other than 48 or 40 bit? Why is the content of ID_AA64MMFR0_EL1 being compared to a #define relating to VTCR? Can't mm64.pa_range be transformed directly into the right value for VTCR without needing to jump through these hoops (like what you do for tcr_el2)? > + WRITE_SYSREG32(VTCR_VAL_BASE | VTCR_TOSZ_48BIT | VTCR_PS_48BIT, > + VTCR_EL2); > + else > + /* default to IPA 48 bit and PA 40 bit */ > + WRITE_SYSREG32(VTCR_VAL_BASE | VTCR_TOSZ_40BIT | VTCR_PS_40BIT, > + VTCR_EL2); > #endif > isb(); > } > diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c > index 603c097..045c003 100644 > --- a/xen/arch/arm/p2m.c > +++ b/xen/arch/arm/p2m.c > @@ -10,29 +10,42 @@ > #include <asm/hardirq.h> > #include <asm/page.h> > > +#ifdef CONFIG_ARM_64 > +/* Zeroeth level is of 1 page size */ > +#define P2M_ROOT_ORDER 0 > +#else > /* First level P2M is 2 consecutive pages */ > -#define P2M_FIRST_ORDER 1 > -#define P2M_FIRST_ENTRIES (LPAE_ENTRIES<<P2M_FIRST_ORDER) > +#define P2M_ROOT_ORDER 1 > +#endif > +#define P2M_FIRST_ENTRIES (LPAE_ENTRIES << P2M_ROOT_ORDER) > > void dump_p2m_lookup(struct domain *d, paddr_t addr) > { > struct p2m_domain *p2m = &d->arch.p2m; > - lpae_t *first; > + lpae_t *lookup; > > printk("dom%d IPA 0x%"PRIpaddr"\n", d->domain_id, addr); > > - if ( first_linear_offset(addr) > LPAE_ENTRIES ) > +#ifdef CONFIG_ARM_64 > + if ( zeroeth_linear_offset(addr) > P2M_FIRST_ENTRIES ) > + { > + printk("Beyond number of support entries at zeroeth level\n"); "supported". But actually I think this would be a programming error and therefore could be an assert or BUG_ON. > + return; > + } > +#else > + if ( first_linear_offset(addr) > P2M_FIRST_ENTRIES ) > { > printk("Cannot dump addresses in second of first level pages...\n"); > return; > } > +#endif > > @@ -107,6 +135,9 @@ paddr_t p2m_lookup(struct domain *d, paddr_t paddr, > p2m_type_t *t) > { > struct p2m_domain *p2m = &d->arch.p2m; > lpae_t pte, *first = NULL, *second = NULL, *third = NULL; > +#ifdef CONFIG_ARM_64 > + lpae_t *zeroeth = NULL; > +#endif > paddr_t maddr = INVALID_PADDR; > p2m_type_t _t; > > @@ -117,9 +148,26 @@ paddr_t p2m_lookup(struct domain *d, paddr_t paddr, > p2m_type_t *t) > > spin_lock(&p2m->lock); > > +#ifdef CONFIG_ARM_64 > + zeroeth = p2m_map_zeroeth(p2m, paddr); Naming this function p2m_map_root might allow you to get rid of some of the ifdefs. > + if ( !zeroeth ) > + goto err; > + > + pte = zeroeth[zeroeth_table_offset(paddr)]; > + /* Zeroeth level does not support block translation > + * so pte.p2m.table should be always set. ASSERT/BUG_ON? > @@ -541,13 +639,15 @@ int p2m_alloc_table(struct domain *d) > clear_page(p); > unmap_domain_page(p); > > +#ifdef CONFIG_ARM_32 Since you've defined it and used it for the allocation this should be based on P2M_ROOT_ORDER I think. > p = __map_domain_page(page + 1); > clear_page(p); > unmap_domain_page(p); > +#endif > > - p2m->first_level = page; > + p2m->root_level = page; You could profitable have done this rename in a precursor patch, which I think would have reduced the size of this one to more manageable size. Nevermind now though. > @@ -110,8 +114,8 @@ typedef struct __packed { > unsigned long ng:1; /* Not-Global */ > > /* The base address must be appropriately aligned for Block entries */ > - unsigned long base:28; /* Base address of block or next table */ > - unsigned long sbz:12; /* Must be zero */ > + unsigned long base:36; /* Base address of block or next table */ > + unsigned long sbz:4; /* Must be zero */ Alignment has gotten undone here. > > /* These seven bits are only used in Block entries and are ignored > * in Table entries. */ > diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h > index 5978b8a..23c2f66 100644 > --- a/xen/include/asm-arm/processor.h > +++ b/xen/include/asm-arm/processor.h > @@ -31,6 +31,95 @@ > #define MPIDR_AFFINITY_LEVEL(mpidr, level) \ > ((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK) > > +/* > + * VTCR register configuration for stage 2 translation > + */ > +#define VTCR_T0SZ_SHIFT 0 > +#define VTCR_TOSZ_40BIT (24 << VTCR_T0SZ_SHIFT) > +#define VTCR_TOSZ_48BIT (16 << VTCR_T0SZ_SHIFT) > + > +#define VTCR_SL0_SHIFT 6 > +#define VTCR_SL0_0 (0x2 << VTCR_SL0_SHIFT) > +#define VTCR_SL0_1 (0x1 << VTCR_SL0_SHIFT) > +#define VTCR_SL0_2 (0x0 << VTCR_SL0_SHIFT) > + > +#define VTCR_IRGN0_SHIFT 8 > +#define VTCR_IRGN0_NC (0x0 << VTCR_IRGN0_SHIFT) > +#define VTCR_IRGN0_WBWA (0x1 << VTCR_IRGN0_SHIFT) > +#define VTCR_IRGN0_WT (0x2 << VTCR_IRGN0_SHIFT) > +#define VTCR_IRGN0_WB (0x3 << VTCR_IRGN0_SHIFT) > + > +#define VTCR_ORGN0_SHIFT 10 > +#define VTCR_ORGN0_NC (0x0 << VTCR_ORGN0_SHIFT) > +#define VTCR_ORGN0_WBWA (0x1 << VTCR_ORGN0_SHIFT) > +#define VTCR_ORGN0_WT (0x2 << VTCR_ORGN0_SHIFT) > +#define VTCR_ORGN0_WB (0x3 << VTCR_ORGN0_SHIFT) > + > +#define VTCR_SH0_SHIFT 12 > +#define VTCR_SH0_NS (0x0 << VTCR_SH0_SHIFT) > +#define VTCR_SH0_OS (0x2 << VTCR_SH0_SHIFT) > +#define VTCR_SH0_IS (0x3 << VTCR_SH0_SHIFT) > + > +#define VTCR_TG0_SHIFT 14 > +#define VTCR_TG0_4K (0x0 << VTCR_TG0_SHIFT) > +#define VTCR_TG0_64K (0x1 << VTCR_TG0_SHIFT) > + > +#define VTCR_PS_SHIFT 16 > +#define VTCR_PS_32BIT (0x0 << VTCR_PS_SHIFT) > +#define VTCR_PS_40BIT (0x2 << VTCR_PS_SHIFT) > +#define VTCR_PS_48BIT (0x5 << VTCR_PS_SHIFT) > +#define VTCR_PS_48BIT_VAL 0x5 This spurious _VAL is interesting... > + > +#ifdef CONFIG_ARM_64 > +/* > + * SL0=10 => Level-0 initial look up level > + * SH0=11 => Inner-shareable > + * ORGN0=IRGN0=01 => Normal memory, Write-Back Write-Allocate Cacheable > + * TG0=00 => 4K page granular size > + */ > +#define VTCR_VAL_BASE ((VTCR_SL0_0) | \ > + (VTCR_IRGN0_WBWA) | \ > + (VTCR_ORGN0_WBWA) | \ > + (VTCR_SH0_OS) | \ > + (VTCR_TG0_4K)) > +#else > +/* > + * T0SZ=(1)1000 => -8 (32-(-8) = 40 bit IPA) > + * SL0=01 => Level-1 initial look up level > + * SH0=11 => Inner-shareable > + * ORGN0=IRGN0=01 => Normal memory, Write-Back Write-Allocate Cacheable > + * TG0=00 => 4K page granular size > + * PS=010 => 40 bits > + * 40 bit IPA and 32 bit PA > + */ > +#define VTCR_VAL_BASE ((VTCR_TOSZ_40BIT) | \ I didn't see you actually patching arm32/head.S to use this. > + (VTCR_SL0_1) | \ > + (VTCR_IRGN0_WBWA) | \ > + (VTCR_ORGN0_WBWA) | \ > + (VTCR_SH0_OS) | \ > + (VTCR_TG0_4K) | \ > + (VTCR_PS_32BIT)) > +#endif > + > +/* TCR register configuration for Xen Stage 1 translation*/ > + > +#define TCR_TBI_SHIFT 20 > +#define TCR_TBI_USE_TBYTE (0x0 << TCR_TBI_SHIFT) 0x0? Did you mean 0x1? Isn't this 64-bit specific > + > +#ifdef CONFIG_ARM_64 > +/* > + * 48 bit Hypervisor - VA to 40 bit PA This is far less information than was in the head.S comment which you removed. > + * if platform supports 48 bit PA update runtime in head.S I think you should omit VTCR_PS_* here altogether and require that it is unconditionally set appropriately in head.S (which seems to be what you have implemented anyway) > + */ > +#define TCR_VAL_BASE ((VTCR_TOSZ_48BIT) | \ > + (VTCR_IRGN0_WBWA) | \ > + (VTCR_ORGN0_WBWA) | \ > + (VTCR_SH0_OS) | \ > + (VTCR_TG0_4K) | \ > + (VTCR_PS_40BIT) | \ > + (TCR_TBI_USE_TBYTE)) No 32-bit equivalent? Ian. _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |