[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 10/10] xen: arm: Use a direct mapping of RAM on arm64
On Tue, 18 Jun 2013, Ian Campbell wrote: > We have plenty of virtual address space so we can avoid needing to map and > unmap pages all the time. > > A totally arbitrarily chosen 32GB frame table leads to support for 5TB of RAM. > I haven't tested with anything near that amount of RAM though. There is plenty > of room to expand further when that becomes necessary. > > Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx> > --- > xen/arch/arm/mm.c | 155 > ++++++++++++++++++++++++++++++++++-------- > xen/arch/arm/setup.c | 92 +++++++++++++++++++++++++ > xen/include/asm-arm/config.h | 96 +++++++++++++++++++------- > xen/include/asm-arm/mm.h | 16 ++++ > 4 files changed, 307 insertions(+), 52 deletions(-) > > diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c > index 5ad687f..35ef56d 100644 > --- a/xen/arch/arm/mm.c > +++ b/xen/arch/arm/mm.c > @@ -37,6 +37,7 @@ > #include <public/memory.h> > #include <xen/sched.h> > #include <xen/vmap.h> > +#include <asm/early_printk.h> > #include <xsm/xsm.h> > #include <xen/pfn.h> > > @@ -48,6 +49,14 @@ struct domain *dom_xen, *dom_io, *dom_cow; > lpae_t boot_pgtable[LPAE_ENTRIES] __attribute__((__aligned__(4096))); > #ifdef CONFIG_ARM_64 > lpae_t boot_first[LPAE_ENTRIES] __attribute__((__aligned__(4096))); > +/* The first page of the first level mapping of the xenheap. The > + * subsequent xenheap first level pages are dynamically allocated, but > + * we need this one to bootstrap ourselves. */ Why? Can't we dynamically allocate the first page of the first level mapping too? > +lpae_t xenheap_first_first[LPAE_ENTRIES] __attribute__((__aligned__(4096))); > +/* The zeroeth level slot which uses xenheap_first_first. Used because > + * setup_xenheap_mappings otherwise relies on mfn_to_virt which isn't > + * valid for a non-xenheap mapping. */ > +static __initdata int xenheap_first_first_slot = -1; > #endif > > /* > @@ -57,6 +66,9 @@ lpae_t boot_first[LPAE_ENTRIES] > __attribute__((__aligned__(4096))); > * xen_second, xen_fixmap and xen_xenmap are shared between all PCPUs. > */ > > +#ifdef CONFIG_ARM_64 > +#define THIS_CPU_PGTABLE boot_pgtable > +#else > /* Per-CPU pagetable pages */ > /* xen_pgtable == root of the trie (zeroeth level on 64-bit, first on > 32-bit) */ > static DEFINE_PER_CPU(lpae_t *, xen_pgtable); > @@ -65,6 +77,7 @@ static DEFINE_PER_CPU(lpae_t *, xen_pgtable); > * the second level pagetables which mapp the domheap region > * DOMHEAP_VIRT_START...DOMHEAP_VIRT_END in 2MB chunks. */ > static DEFINE_PER_CPU(lpae_t *, xen_dommap); > +#endif > > /* Common pagetable leaves */ > /* Second level page tables. > @@ -73,10 +86,16 @@ static DEFINE_PER_CPU(lpae_t *, xen_dommap); > * addresses from 0 to 0x7fffffff. Offsets into it are calculated > * with second_linear_offset(), not second_table_offset(). > * > - * Addresses 0x80000000 to 0xffffffff are covered by the per-cpu > - * xen_domheap mappings described above. However we allocate 4 pages > + * On 32bit addresses 0x80000000 to 0xffffffff are covered by the > + * per-cpu xen_domheap mappings described above. We allocate 4 pages > * here for use in the boot page tables and the second two pages > * become the boot CPUs xen_dommap pages. > + * > + * On 64bit addresses 0x80000000 to 0xffffffff are unused. However we > + * allocate 4 pages here for use while relocating Xen, which currently > + * expects a second level page to exist for all addresses in the first > + * 4GB. We need to keep these extra mappings in place for seconary CPU > + * bring up too. For now we just leave them forever. > */ > lpae_t xen_second[LPAE_ENTRIES*4] __attribute__((__aligned__(4096*4))); > /* First level page table used for fixmap */ > @@ -92,7 +111,7 @@ uint64_t boot_ttbr; > static paddr_t phys_offset; > > /* Limits of the Xen heap */ > -unsigned long xenheap_mfn_start __read_mostly; > +unsigned long xenheap_mfn_start __read_mostly = ~0UL; > unsigned long xenheap_mfn_end __read_mostly; > unsigned long xenheap_virt_end __read_mostly; > > @@ -112,7 +131,9 @@ static inline void > check_memory_layout_alignment_constraints(void) { > BUILD_BUG_ON(BOOT_MISC_VIRT_START & ~SECOND_MASK); > /* 1GB aligned regions */ > BUILD_BUG_ON(XENHEAP_VIRT_START & ~FIRST_MASK); > +#ifdef CONFIG_DOMAIN_PAGE > BUILD_BUG_ON(DOMHEAP_VIRT_START & ~FIRST_MASK); > +#endif > } > > void dump_pt_walk(lpae_t *first, paddr_t addr) > @@ -180,6 +201,7 @@ void clear_fixmap(unsigned map) > flush_xen_data_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE); > } > > +#ifdef CONFIG_DOMAIN_PAGE > void *map_domain_page_global(unsigned long mfn) > { > return vmap(&mfn, 1); > > @@ -284,6 +306,7 @@ unsigned long domain_page_map_to_mfn(const void *va) > > return map[slot].pt.base + offset; > } > +#endif these are exported functions, shouldn't we implement another version of them for the non-CONFIG_DOMAIN_PAGE case? > void __init arch_init_memory(void) > { > @@ -431,6 +454,7 @@ void __init setup_pagetables(unsigned long > boot_phys_offset, paddr_t xen_paddr) > /* Flush everything after setting WXN bit. */ > flush_xen_text_tlb(); > > +#ifdef CONFIG_ARM_32 > per_cpu(xen_pgtable, 0) = boot_pgtable; > per_cpu(xen_dommap, 0) = xen_second + > second_linear_offset(DOMHEAP_VIRT_START); > @@ -440,43 +464,31 @@ void __init setup_pagetables(unsigned long > boot_phys_offset, paddr_t xen_paddr) > memset(this_cpu(xen_dommap), 0, DOMHEAP_SECOND_PAGES*PAGE_SIZE); > flush_xen_dcache_va_range(this_cpu(xen_dommap), > DOMHEAP_SECOND_PAGES*PAGE_SIZE); > +#endif > } > > int init_secondary_pagetables(int cpu) > { > - lpae_t *root, *first, *domheap, pte; > - int i; > - > - root = alloc_xenheap_page(); > #ifdef CONFIG_ARM_64 > - first = alloc_xenheap_page(); > + /* All CPUs share a single page table on 64 bit */ > + return 0; I would just ifdef the entire function > #else > - first = root; /* root == first level on 32-bit 3-level trie */ > -#endif > + lpae_t *first, *domheap, pte; > + int i; > + > + first = alloc_xenheap_page(); /* root == first level on 32-bit 3-level > trie */ > domheap = > alloc_xenheap_pages(get_order_from_pages(DOMHEAP_SECOND_PAGES), 0); > > - if ( root == NULL || domheap == NULL || first == NULL ) > + if ( domheap == NULL || first == NULL ) > { > printk("Not enough free memory for secondary CPU%d pagetables\n", > cpu); > free_xenheap_pages(domheap, > get_order_from_pages(DOMHEAP_SECOND_PAGES)); > -#ifdef CONFIG_ARM_64 > free_xenheap_page(first); > -#endif > - free_xenheap_page(root); > return -ENOMEM; > } > > /* Initialise root pagetable from root of boot tables */ > - memcpy(root, boot_pgtable, PAGE_SIZE); > - > -#ifdef CONFIG_ARM_64 > - /* Initialise first pagetable from first level of boot tables, and > - * hook into the new root. */ > - memcpy(first, boot_first, PAGE_SIZE); > - pte = mfn_to_xen_entry(virt_to_mfn(first)); > - pte.pt.table = 1; > - write_pte(root, pte); > -#endif > + memcpy(first, boot_pgtable, PAGE_SIZE); > > /* Ensure the domheap has no stray mappings */ > memset(domheap, 0, DOMHEAP_SECOND_PAGES*PAGE_SIZE); > @@ -490,16 +502,14 @@ int init_secondary_pagetables(int cpu) > > write_pte(&first[first_table_offset(DOMHEAP_VIRT_START+i*FIRST_SIZE)], pte); > } > > - flush_xen_dcache_va_range(root, PAGE_SIZE); > -#ifdef CONFIG_ARM_64 > flush_xen_dcache_va_range(first, PAGE_SIZE); > -#endif > flush_xen_dcache_va_range(domheap, DOMHEAP_SECOND_PAGES*PAGE_SIZE); > > - per_cpu(xen_pgtable, cpu) = root; > + per_cpu(xen_pgtable, cpu) = first; > per_cpu(xen_dommap, cpu) = domheap; > > return 0; > +#endif > } > > /* MMU setup for secondary CPUS (which already have paging enabled) */ > @@ -544,6 +554,7 @@ static void __init create_32mb_mappings(lpae_t *second, > flush_xen_data_tlb(); > } > > +#ifdef CONFIG_ARM_32 > /* Set up the xenheap: up to 1GB of contiguous, always-mapped memory. */ > void __init setup_xenheap_mappings(unsigned long base_mfn, > unsigned long nr_mfns) > @@ -556,19 +567,107 @@ void __init setup_xenheap_mappings(unsigned long > base_mfn, > xenheap_mfn_end = base_mfn + nr_mfns; > } > > +#else /* CONFIG_ARM_64 */ > + > +void __init setup_xenheap_mappings(unsigned long base_mfn, > + unsigned long nr_mfns) > +{ > + lpae_t *first, pte; > + unsigned long offset, end_mfn; > + vaddr_t vaddr; > + > + /* First call sets the xenheap physical offset. */ > + if ( xenheap_mfn_start == ~0UL ) > + xenheap_mfn_start = base_mfn; > + > + if ( base_mfn < xenheap_mfn_start ) > + early_panic("cannot add xenheap mapping at %lx below heap start > %lx\n", > + base_mfn, xenheap_mfn_start); > + > + end_mfn = base_mfn + nr_mfns; > + > + /* Align to previous 1GB boundary */ > + base_mfn &= ~FIRST_MASK; > + > + offset = base_mfn - xenheap_mfn_start; > + vaddr = DIRECTMAP_VIRT_START + offset*PAGE_SIZE; > + > + while ( base_mfn < end_mfn ) > + { > + int slot = zeroeth_table_offset(vaddr); > + lpae_t *p = &boot_pgtable[slot]; > + > + if ( p->pt.valid ) > + { > + /* mfn_to_virt is not valid on the 1st 1st mfn, since it > + * is not within the xenheap. */ > + first = slot == xenheap_first_first_slot ? > + xenheap_first_first : mfn_to_virt(p->pt.base); > + } > + else if ( xenheap_first_first_slot == -1) > + { > + /* Use xenheap_first_first to bootstrap the mappings */ > + first = xenheap_first_first; > + > + pte = pte_of_xenaddr((vaddr_t)xenheap_first_first); > + pte.pt.table = 1; > + write_pte(p, pte); > + > + xenheap_first_first_slot = slot; I take that at this point alloc_boot_pages wouldn't work? > + } > + else > + { > + unsigned long first_mfn = alloc_boot_pages(1, 1); > + pte = mfn_to_xen_entry(first_mfn); > + pte.pt.table = 1; > + write_pte(p, pte); > + first = mfn_to_virt(first_mfn); > + } > + > + pte = mfn_to_xen_entry(base_mfn); > + //pte.pt.contig = /* XXX need to think about the logic */ Two different comment styles in the same line. Setting the contig bit here means that we assume that it's likely that the first 16 adjacent entries in this table point to a contiguous output address range. I think that should be the case, unless the memory banks are noncontiguous. > + write_pte(&first[first_table_offset(vaddr)], pte); > + > + base_mfn += FIRST_SIZE>>PAGE_SHIFT; > + vaddr += FIRST_SIZE; > + } > + > + flush_xen_data_tlb(); > +} > +#endif > + > /* Map a frame table to cover physical addresses ps through pe */ > void __init setup_frametable_mappings(paddr_t ps, paddr_t pe) > { > unsigned long nr_pages = (pe - ps) >> PAGE_SHIFT; > unsigned long frametable_size = nr_pages * sizeof(struct page_info); > unsigned long base_mfn; > +#ifdef CONFIG_ARM_64 > + lpae_t *second, pte; > + unsigned long nr_second, second_base; > + int i; > +#endif > > frametable_base_mfn = ps >> PAGE_SHIFT; > > /* Round up to 32M boundary */ > frametable_size = (frametable_size + 0x1ffffff) & ~0x1ffffff; > base_mfn = alloc_boot_pages(frametable_size >> PAGE_SHIFT, 32<<(20-12)); > + > +#ifdef CONFIG_ARM_64 > + nr_second = frametable_size >> SECOND_SHIFT; > + second_base = alloc_boot_pages(nr_second, 1); > + second = mfn_to_virt(second_base); > + for ( i = 0; i < nr_second; i++ ) > + { > + pte = mfn_to_xen_entry(second_base + i); > + pte.pt.table = 1; > + write_pte(&boot_first[first_table_offset(FRAMETABLE_VIRT_START)+i], > pte); > + } > + create_32mb_mappings(second, 0, base_mfn, frametable_size >> PAGE_SHIFT); > +#else > create_32mb_mappings(xen_second, FRAMETABLE_VIRT_START, base_mfn, > frametable_size >> PAGE_SHIFT); > +#endif Is it possible to unify the arm32 and arm64 code paths here? Being able to handle a frametable that spans multiple second level pagetable pages shouldn't be a problem for arm32. > memset(&frame_table[0], 0, nr_pages * sizeof(struct page_info)); > memset(&frame_table[nr_pages], -1, > diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c > index da2a734..3efa063 100644 > --- a/xen/arch/arm/setup.c > +++ b/xen/arch/arm/setup.c > +#else /* CONFIG_ARM_64 */ > +static void __init setup_mm(unsigned long dtb_paddr, size_t dtb_size) > +{ > + paddr_t ram_start = ~0; > + paddr_t ram_end = 0; > + int bank; > + unsigned long xenheap_pages = 0; > + unsigned long dtb_pages; > + > + /* > + * TODO: only using the first RAM bank for now. The heaps and the > + * frame table assume RAM is physically contiguous. > + */ > + total_pages = 0; > + for ( bank = 0 ; bank < early_info.mem.nr_banks; bank++ ) > + { > + paddr_t bank_start = early_info.mem.bank[bank].start; > + paddr_t bank_size = early_info.mem.bank[bank].size; > + paddr_t bank_end = bank_start + bank_size; > + unsigned long bank_pages = bank_size >> PAGE_SHIFT; > + paddr_t s, e; > + > + total_pages += bank_pages; > + > + if ( bank_start < ram_start ) > + ram_start = bank_start; > + if ( bank_end > ram_end ) > + ram_end = bank_end; > + > + /* > + * Locate the xenheap using these constraints: > + * > + * - must be 32 MiB aligned > + * - must not include Xen itself or the boot modules > + * - must be at most 1 GiB > + * - must be at least 128M > + * > + * We try to allocate the largest xenheap possible within these > + * constraints. > + */ this comment is not correct on ARM64, is it? > + xenheap_pages += (bank_size >> PAGE_SHIFT); > + > + /* XXX we assume that the ram regions are ordered */ > + s = bank_start; > + while ( s < bank_end ) > + { > + paddr_t n = bank_end; > + > + e = next_module(s, &n); > + > + if ( e == ~(paddr_t)0 ) > + { > + e = n = bank_end; > + } > + > + setup_xenheap_mappings(s>>PAGE_SHIFT, (e-s)>>PAGE_SHIFT); > + > + xenheap_mfn_end = e; > + > + init_boot_pages(s, e); > + s = n; > + } > + } > + > + xenheap_virt_end = XENHEAP_VIRT_START + ram_end - ram_start; > + xenheap_mfn_start = ram_start >> PAGE_SHIFT; > + xenheap_mfn_end = ram_end >> PAGE_SHIFT; > + xenheap_max_mfn(xenheap_mfn_end); > + > + /* > + * Need enough mapped pages for copying the DTB. > + * > + * TODO: The DTB (and other payloads) are assumed to be towards > + * the start of RAM. > + */ > + dtb_pages = (dtb_size + PAGE_SIZE-1) >> PAGE_SHIFT; > + > + /* > + * Copy the DTB. > + * > + * TODO: handle other payloads too. > + */ > + device_tree_flattened = mfn_to_virt(alloc_boot_pages(dtb_pages, 1)); > + copy_from_paddr(device_tree_flattened, dtb_paddr, dtb_size, BUFFERABLE); > + > + setup_frametable_mappings(ram_start, ram_end); > + max_page = PFN_DOWN(ram_end); > + > + end_boot_allocator(); > +} > +#endif > > size_t __read_mostly cacheline_bytes; > > diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h > index fb9e93c..259d4c6 100644 > --- a/xen/include/asm-arm/config.h > +++ b/xen/include/asm-arm/config.h > @@ -35,9 +35,6 @@ > > #define CONFIG_SMP 1 > > -#define CONFIG_DOMAIN_PAGE 1 > -#define CONFIG_SEPARATE_XENHEAP 1 > - > #define CONFIG_VIDEO 1 > > #define OPT_CONSOLE_STR "dtuart" > @@ -76,38 +73,89 @@ > #include <xen/const.h> > > /* > - * Memory layout: > - * 0 - 2M Unmapped > - * 2M - 4M Xen text, data, bss > - * 4M - 6M Fixmap: special-purpose 4K mapping slots > - * 6M - 8M Early boot misc (see below) > - * > - * 32M - 128M Frametable: 24 bytes per page for 16GB of RAM > - * 256M - 1G VMAP: ioremap and early_ioremap use this virtual address > - * space > - * > - * 1G - 2G Xenheap: always-mapped memory > - * 2G - 4G Domheap: on-demand-mapped > + * Common ARM32 and ARM64 layout: > + * 0 - 2M Unmapped > + * 2M - 4M Xen text, data, bss > + * 4M - 6M Fixmap: special-purpose 4K mapping slots > + * 6M - 8M Early boot misc (see below) > * > * The early boot misc area is used: > * - in head.S for the DTB for device_tree_early_init(). > * - in setup_pagetables() when relocating Xen. > + * > + * ARM32 layout: > + * 0 - 8M <COMMON> > + * > + * 32M - 128M Frametable: 24 bytes per page for 16GB of RAM > + * 256M - 1G VMAP: ioremap and early_ioremap use this virtual address > + * space > + * > + * 1G - 2G Xenheap: always-mapped memory > + * 2G - 4G Domheap: on-demand-mapped > + * > + * ARM64 layout: > + * 0x0000000000000000 - 0x0000007fffffffff (512GB, L0 slot [0]) > + * 0 - 8M <COMMON> > + * > + * 1G - 2G VMAP: ioremap and early_ioremap > + * > + * 32G - 64G Frametable: 24 bytes per page for 5.3TB of RAM > + * > + * 0x0000008000000000 - 0x00007fffffffffff (127.5TB, L0 slots [1..255]) > + * Unused > + * > + * 0x0000800000000000 - 0x000084ffffffffff (5TB, L0 slots [256..265]) > + * 1:1 mapping of RAM > + * > + * 0x0000850000000000 - 0x0000ffffffffffff (123TB, L0 slots [266..511]) > + * Unused > */ > > -#define XEN_VIRT_START _AC(0x00200000,UL) > -#define FIXMAP_ADDR(n) (_AC(0x00400000,UL) + (n) * PAGE_SIZE) > -#define BOOT_MISC_VIRT_START _AC(0x00600000,UL) > -#define FRAMETABLE_VIRT_START _AC(0x02000000,UL) > -#define VMAP_VIRT_START _AC(0x10000000,UL) > -#define XENHEAP_VIRT_START _AC(0x40000000,UL) > -#define DOMHEAP_VIRT_START _AC(0x80000000,UL) > -#define DOMHEAP_VIRT_END _AC(0xffffffff,UL) > +#define XEN_VIRT_START _AT(vaddr_t,0x00200000) > +#define FIXMAP_ADDR(n) (_AT(vaddr_t,0x00400000) + (n) * PAGE_SIZE) > +#define BOOT_MISC_VIRT_START _AT(vaddr_t,0x00600000) > > -#define VMAP_VIRT_END XENHEAP_VIRT_START > #define HYPERVISOR_VIRT_START XEN_VIRT_START > > +#ifdef CONFIG_ARM_32 > + > +#define CONFIG_DOMAIN_PAGE 1 > +#define CONFIG_SEPARATE_XENHEAP 1 > + > +#define FRAMETABLE_VIRT_START _AT(vaddr_t,0x02000000) > +#define VMAP_VIRT_START _AT(vaddr_t,0x10000000) > +#define XENHEAP_VIRT_START _AT(vaddr_t,0x40000000) > +#define XENHEAP_VIRT_END _AT(vaddr_t,0x7fffffff) > +#define DOMHEAP_VIRT_START _AT(vaddr_t,0x80000000) > +#define DOMHEAP_VIRT_END _AT(vaddr_t,0xffffffff) > + > +#define VMAP_VIRT_END XENHEAP_VIRT_START > + > #define DOMHEAP_ENTRIES 1024 /* 1024 2MB mapping slots */ > > +#else /* ARM_64 */ > + > +#define SLOT0_ENTRY_BITS 39 > +#define SLOT0(slot) (_AT(vaddr_t,slot) << SLOT0_ENTRY_BITS) > +#define SLOT0_ENTRY_SIZE SLOT0(1) > +#define GB(_gb) (_AC(_gb, UL) << 30) > + > +#define VMAP_VIRT_START GB(1) > +#define VMAP_VIRT_END (VMAP_VIRT_START + GB(1) - 1) > + > +#define FRAMETABLE_VIRT_START GB(32) > +#define FRAMETABLE_VIRT_END (FRAMETABLE_VIRT_START + GB(32) - 1) > + > +#define DIRECTMAP_VIRT_START SLOT0(256) > +#define DIRECTMAP_SIZE (SLOT0_ENTRY_SIZE * (265-256)) > +#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + DIRECTMAP_SIZE - 1) > + > +#define XENHEAP_VIRT_START DIRECTMAP_VIRT_START > + > +#define HYPERVISOR_VIRT_END DIRECTMAP_VIRT_END > + > +#endif > + > /* Number of domheap pagetable pages required at the second level (2MB > mappings) */ > #define DOMHEAP_SECOND_PAGES ((DOMHEAP_VIRT_END - DOMHEAP_VIRT_START + 1) >> > FIRST_SHIFT) > > diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h > index 7aca836..27284d0 100644 > --- a/xen/include/asm-arm/mm.h > +++ b/xen/include/asm-arm/mm.h > @@ -118,11 +118,18 @@ struct page_info > extern unsigned long xenheap_mfn_start, xenheap_mfn_end; > extern unsigned long xenheap_virt_end; > > +#ifdef CONFIG_ARM_32 > #define is_xen_heap_page(page) is_xen_heap_mfn(page_to_mfn(page)) > #define is_xen_heap_mfn(mfn) ({ \ > unsigned long _mfn = (mfn); \ > (_mfn >= xenheap_mfn_start && _mfn < xenheap_mfn_end); \ > }) I take that this check wouldn't work for arm64 because it would always return true? > +#else > +#define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap) > +#define is_xen_heap_mfn(mfn) \ > + (mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn))) > +#endif Looks like the ARM64 version of these two functions should work for arm32 too. _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |