[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 4/4] arm: allocate per-PCPU domheap pagetable pages



On Mon, 22 Apr 2013, Ian Campbell wrote:
> The domheap mappings are supposed to be per-PCPU. Therefore xen_pgtable
> becomes a per-PCPU variable and we allocate and setup the page tables for each
> secondary PCPU just before we tell it to come up.
> 
> Each secondary PCPU starts out on the boot page table but switches to its own
> page tables ASAP.
> 
> The boot PCPU uses the boot pagetables as its own.
> 
> Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
> ---
>  xen/arch/arm/mm.c            |  138 
> +++++++++++++++++++++++++++++++++++++-----
>  xen/arch/arm/smpboot.c       |    6 ++
>  xen/include/asm-arm/config.h |    4 +
>  xen/include/asm-arm/mm.h     |    4 +-
>  4 files changed, 136 insertions(+), 16 deletions(-)
> 
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index f4179d8..e3b8541 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -39,22 +39,47 @@
>  
>  struct domain *dom_xen, *dom_io, *dom_cow;
>  
> -/* Static start-of-day pagetables that we use before the allocators are up */
> -/* boot_pgtable == root of the trie (zeroeth level on 64-bit, first on 
> 32-bit) */
> +/* Static start-of-day pagetables that we use before the
> + * allocators are up. These go on to become the boot CPUs real pagetables.
> + */
>  lpae_t boot_pgtable[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
>  #ifdef CONFIG_ARM_64
>  lpae_t boot_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
>  #endif
> -/* N.B. The second-level table is 4 contiguous pages long, and covers
> - * all addresses from 0 to 0xffffffff.  Offsets into it are calculated
> - * with second_linear_offset(), not second_table_offset(). */
> +
> +/*
> + * xen_pgtable and xen_dommap are per-PCPU and are allocated before
> + * bringing up each CPU. On 64-bit a first level table is also allocated.
> + *
> + * xen_second, xen_fixmap and xen_xenmap are shared between all PCPUS.
> + */
> +
> +/* Per-CPU pagetable pages */
> +/* xen_pgtable == root of the trie (zeroeth level on 64-bit, first on 
> 32-bit) */
> +static DEFINE_PER_CPU(lpae_t *, xen_pgtable);
> +/* xen_dommap == pages used by map_domain_page, these pages contain
> + * the second level pagetables which mapp the domheap region
> + * DOMHEAP_VIRT_START...DOMHEAP_VIRT_END in 2MB chunks. */
> +static DEFINE_PER_CPU(lpae_t *, xen_dommap);
> +
> +/* Common pagetable leaves */
> +/* Second level page tables.
> + *
> + * The second-level table is 2 contiguous pages long, and covers all
> + * addresses from 0 to 0x7fffffff.
> + *
> + * Addresses 0x80000000 to 0xffffffff are covered by the per-cpu
> + * xen_domheap mappings described above. However we allocate 4 pages
> + * here for use in the boot page tables and the second two pages
> + * become the boot CPUs xen_dommap pages.
> + */
>  lpae_t xen_second[LPAE_ENTRIES*4] __attribute__((__aligned__(4096*4)));
> +/* First level page table used for fixmap */
>  lpae_t xen_fixmap[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> +/* First level page table used to map Xen itself with the XN bit set
> + * as appropriate. */
>  static lpae_t xen_xenmap[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
>  
> -/* boot_pgtable becomes the boot processors pagetable, eventually this will
> - * become a per-cpu variable */
> -#define xen_pgtable boot_pgtable
>  
>  /* Non-boot CPUs use this to find the correct pagetables. */
>  uint64_t boot_ttbr;
> @@ -107,12 +132,17 @@ done:
>  void dump_hyp_walk(vaddr_t addr)
>  {
>      uint64_t ttbr = READ_SYSREG64(TTBR0_EL2);
> +    lpae_t *pgtable = this_cpu(xen_pgtable);
>  
> -    printk("Walking Hypervisor VA 0x%"PRIvaddr" via TTBR 0x%016"PRIx64"\n",
> -           addr, ttbr);
> +    printk("Walking Hypervisor VA 0x%"PRIvaddr" "
> +           "on CPU%d via TTBR 0x%016"PRIx64"\n",
> +           addr, smp_processor_id(), ttbr);
>  
> -    BUG_ON( (lpae_t *)(unsigned long)(ttbr - phys_offset) != xen_pgtable );
> -    dump_pt_walk(xen_pgtable, addr);
> +    if ( smp_processor_id() == 0 )
> +        BUG_ON( (lpae_t *)(unsigned long)(ttbr - phys_offset) != pgtable );
> +    else
> +        BUG_ON( virt_to_maddr(pgtable) != ttbr );
> +    dump_pt_walk(pgtable, addr);
>  }
>  
>  /* Map a 4k page in a fixmap entry */
> @@ -138,7 +168,7 @@ void clear_fixmap(unsigned map)
>  void *map_domain_page(unsigned long mfn)
>  {
>      unsigned long flags;
> -    lpae_t *map = xen_second + second_linear_offset(DOMHEAP_VIRT_START);
> +    lpae_t *map = this_cpu(xen_dommap);
>      unsigned long slot_mfn = mfn & ~LPAE_ENTRY_MASK;
>      vaddr_t va;
>      lpae_t pte;
> @@ -204,7 +234,7 @@ void *map_domain_page(unsigned long mfn)
>  void unmap_domain_page(const void *va)
>  {
>      unsigned long flags;
> -    lpae_t *map = xen_second + second_linear_offset(DOMHEAP_VIRT_START);
> +    lpae_t *map = this_cpu(xen_dommap);
>      int slot = ((unsigned long) va - DOMHEAP_VIRT_START) >> SECOND_SHIFT;
>  
>      local_irq_save(flags);
> @@ -219,7 +249,7 @@ void unmap_domain_page(const void *va)
>  
>  unsigned long domain_page_map_to_mfn(const void *va)
>  {
> -    lpae_t *map = xen_second + second_linear_offset(DOMHEAP_VIRT_START);
> +    lpae_t *map = this_cpu(xen_dommap);
>      int slot = ((unsigned long) va - DOMHEAP_VIRT_START) >> SECOND_SHIFT;
>      unsigned long offset = ((unsigned long)va>>THIRD_SHIFT) & 
> LPAE_ENTRY_MASK;
>  
> @@ -361,11 +391,89 @@ void __init setup_pagetables(unsigned long 
> boot_phys_offset, paddr_t xen_paddr)
>      WRITE_SYSREG32(READ_SYSREG32(SCTLR_EL2) | SCTLR_WXN, SCTLR_EL2);
>      /* Flush everything after setting WXN bit. */
>      flush_xen_text_tlb();
> +
> +    per_cpu(xen_pgtable, 0) = boot_pgtable;
> +    per_cpu(xen_dommap, 0) = xen_second +
> +        second_linear_offset(DOMHEAP_VIRT_START);
> +
> +    /* Some of these slots may have been used during start of day and/or
> +     * relocation. Make sure they are clear now. */
> +    memset(this_cpu(xen_dommap), 0, DOMHEAP_SECOND_PAGES*PAGE_SIZE);
> +    flush_xen_dcache_va_range(this_cpu(xen_dommap),
> +                              DOMHEAP_SECOND_PAGES*PAGE_SIZE);
> +}
> +
> +int init_secondary_pagetables(int cpu)
> +{
> +    lpae_t *root, *first, *domheap, pte;
> +    int i;
> +
> +    root = alloc_xenheap_page();
> +#ifdef CONFIG_ARM_64
> +    first = alloc_xenheap_page();
> +#else
> +    first = root; /* root == first level on 32-bit 3-level trie */
> +#endif
> +    domheap = 
> alloc_xenheap_pages(get_order_from_pages(DOMHEAP_SECOND_PAGES), 0);
> +
> +    if ( root == NULL || domheap == NULL || first == NULL
> +        )

code style


> +    {
> +        printk("Not enough free memory for secondary CPU%d pagetables\n", 
> cpu);
> +        free_xenheap_pages(domheap, 
> get_order_from_pages(DOMHEAP_SECOND_PAGES));
> +#ifdef CONFIG_ARM_64
> +        free_xenheap_page(first);
> +#endif
> +        free_xenheap_page(root);
> +        return -ENOMEM;
> +    }
> +
> +    /* Initialise root pagetable from root of boot tables */
> +    memcpy(root, boot_pgtable, PAGE_SIZE);
> +
> +#ifdef CONFIG_ARM_64
> +    /* Initialise first pagetable from first level of boot tables, and
> +     * hook into the new root. */
> +    memcpy(first, boot_first, PAGE_SIZE);
> +    pte = mfn_to_xen_entry(virt_to_mfn(first));
> +    pte.pt.table = 1;
> +    write_pte(root, pte);
> +#endif
> +
> +    /* Ensure the domheap has no stray mappings */
> +    memset(domheap, 0, DOMHEAP_SECOND_PAGES*PAGE_SIZE);
> +
> +    /* Update the first level mapping to reference the local CPUs
> +     * domheap mapping pages. */
> +    for ( i = 0; i < 2; i++ )

instead of being hardcoded to "2", shouldn't the limit be based on
DOMHEAP_SECOND_PAGES?


> +    {
> +        pte = mfn_to_xen_entry(virt_to_mfn(domheap+i*LPAE_ENTRIES));
> +        pte.pt.table = 1;
> +        
> write_pte(&first[first_table_offset(DOMHEAP_VIRT_START+i*FIRST_SIZE)], pte);

Also shouldn't we add an ASSERT to check that DOMHEAP_VIRT_START is
properly aligned?


> +    }
> +
> +    per_cpu(xen_pgtable, cpu) = root;
> +    per_cpu(xen_dommap, cpu) = domheap;
> +
> +    return 0;
>  }
>  
>  /* MMU setup for secondary CPUS (which already have paging enabled) */
>  void __cpuinit mmu_init_secondary_cpu(void)
>  {
> +    uint64_t ttbr;
> +
> +    /* Change to this CPUs pagetables */
> +    ttbr = (uintptr_t) virt_to_maddr(this_cpu(xen_pgtable));

we should be flushing this ttbr write


> +    flush_xen_dcache_va_range(this_cpu(xen_pgtable), PAGE_SIZE);
> +    flush_xen_dcache_va_range(this_cpu(xen_dommap),
> +                              DOMHEAP_SECOND_PAGES*PAGE_SIZE);

Given that these pagetable pages are written by cpu0, I wonder whether we
actually need to execute any of these flushes on secondary cpus. I think
they should be moved to init_secondary_pagetables.


> +    flush_xen_text_tlb();
>
> +    WRITE_SYSREG64(ttbr, TTBR0_EL2);
> +    dsb();                         /* Ensure visibility of HTTBR update */
> +    flush_xen_text_tlb();

The two flush_xen_text_tlb are probably necessary at least for the
I-cache and the BP.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.