[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] x86: rework hypercall argument translation area setup


  • To: Jan Beulich <JBeulich@xxxxxxxx>, xen-devel <xen-devel@xxxxxxxxxxxxx>
  • From: Keir Fraser <keir@xxxxxxx>
  • Date: Thu, 21 Feb 2013 12:08:46 +0000
  • Delivery-date: Thu, 21 Feb 2013 12:09:18 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xen.org>
  • Thread-index: Ac4QLDL3Pm4uVhVtY0mDkpwWrmZ3jg==
  • Thread-topic: [Xen-devel] [PATCH] x86: rework hypercall argument translation area setup

On 21/02/2013 11:42, "Jan Beulich" <JBeulich@xxxxxxxx> wrote:

> Rather than using an order-1 Xen heap allocation, use (currently 2)
> individual domain heap pages to populate space in the per-domain
> mapping area.
> 
> Also fix an off-by-one mistake in is_compat_arg_xlat_range().
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

Well, fine in principle. This manual setup/teardown of l2/l1 pagetables in
the perdomain area is getting a bit tiresome though isn't it? Isn't there
some setup helper that could be abstracted out for all users, and perhaps
even some automated teardown-and-freeing on domain destroy? I just sighed
when I saw more skanky pagetable manipulation for what is a conceptually
simple change.

 -- Keir

> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -642,6 +642,14 @@ void arch_domain_destroy(struct domain *
>              free_domheap_page(d->arch.perdomain_l2_pg[i]);
>      free_domheap_page(d->arch.perdomain_l3_pg);
>  
> +    if ( d->arch.arg_xlat_l1_pg )
> +    {
> +        for ( i = 0; i < PFN_UP(MAX_VIRT_CPUS << ARG_XLAT_VA_SHIFT); ++i )
> +            if ( d->arch.arg_xlat_l1_pg[i] )
> +                free_domheap_page(d->arch.arg_xlat_l1_pg[i]);
> +        xfree(d->arch.arg_xlat_l1_pg);
> +    }
> +
>      free_xenheap_page(d->shared_info);
>      cleanup_domain_irq_mapping(d);
>  }
> --- a/xen/arch/x86/usercopy.c
> +++ b/xen/arch/x86/usercopy.c
> @@ -6,8 +6,8 @@
>   * Copyright 2002 Andi Kleen <ak@xxxxxxx>
>   */
>  
> -#include <xen/config.h>
>  #include <xen/lib.h>
> +#include <xen/sched.h>
>  #include <asm/uaccess.h>
>  
>  unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned
> n)
> --- a/xen/arch/x86/x86_64/mm.c
> +++ b/xen/arch/x86/x86_64/mm.c
> @@ -832,27 +832,93 @@ void __init zap_low_mappings(void)
>                       __PAGE_HYPERVISOR);
>  }
>  
> -void *compat_arg_xlat_virt_base(void)
> -{
> -    return current->arch.compat_arg_xlat;
> -}
> -
>  int setup_compat_arg_xlat(struct vcpu *v)
>  {
> -    unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
> +    struct domain *d = v->domain;
> +    struct page_info *pg;
> +    unsigned int idx, i;
> +    l1_pgentry_t *l1tab;
> +
> +    if ( !d->arch.perdomain_l2_pg[ARG_XLAT_SLOT] )
> +    {
> +        l3_pgentry_t *l3tab;
> +
> +        pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
> +        if ( !pg )
> +            return -ENOMEM;
> +        clear_domain_page(page_to_mfn(pg));
> +        d->arch.perdomain_l2_pg[ARG_XLAT_SLOT] = pg;
> +
> +        l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
> +        l3tab[l3_table_offset(ARG_XLAT_VIRT_START)] =
> +            l3e_from_page(pg, __PAGE_HYPERVISOR);
> +        unmap_domain_page(l3tab);
> +    }
> +
> +    BUILD_BUG_ON((MAX_VIRT_CPUS << ARG_XLAT_VA_SHIFT) >
> +                 (PERDOMAIN_SLOT_MBYTES << 20));
> +
> +    if ( !d->arch.arg_xlat_l1_pg )
> +        d->arch.arg_xlat_l1_pg = xzalloc_array(struct page_info *,
> +                                               PFN_UP(MAX_VIRT_CPUS <<
> +                                                      ARG_XLAT_VA_SHIFT));
> +    if ( !d->arch.arg_xlat_l1_pg )
> +        return -ENOMEM;
> +
> +    idx = l2_table_offset(ARG_XLAT_START(v));
> +    if ( !d->arch.arg_xlat_l1_pg[idx] )
> +    {
> +        l2_pgentry_t *l2tab;
> +
> +        pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
> +        if ( !pg )
> +            return -ENOMEM;
> +        clear_domain_page(page_to_mfn(pg));
> +        d->arch.arg_xlat_l1_pg[idx] = pg;
> +
> +        l2tab = __map_domain_page(d->arch.perdomain_l2_pg[ARG_XLAT_SLOT]);
> +        l2tab[idx] = l2e_from_page(pg, __PAGE_HYPERVISOR);
> +        unmap_domain_page(l2tab);
> +    }
>  
> -    v->arch.compat_arg_xlat = alloc_xenheap_pages(order,
> -                
> MEMF_node(vcpu_to_node(v)));
> +    l1tab = __map_domain_page(d->arch.arg_xlat_l1_pg[idx]);
> +    idx = l1_table_offset(ARG_XLAT_START(v));
> +    BUILD_BUG_ON(COMPAT_ARG_XLAT_SIZE > (1UL << ARG_XLAT_VA_SHIFT));
> +    for ( i = 0; i < PFN_UP(COMPAT_ARG_XLAT_SIZE); ++i )
> +    {
> +        pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
> +        if ( !pg )
> +            return -ENOMEM;
> +        l1tab[idx + i] = l1e_from_page(pg, __PAGE_HYPERVISOR);
> +    }
> +    unmap_domain_page(l1tab);
>  
> -    return v->arch.compat_arg_xlat ? 0 : -ENOMEM;
> +    return 0;
>  }
>  
>  void free_compat_arg_xlat(struct vcpu *v)
>  {
> -    unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
> +    struct domain *d = v->domain;
> +    unsigned long start = ARG_XLAT_START(v);
> +    unsigned long va = start + COMPAT_ARG_XLAT_SIZE - 1;
> +    l1_pgentry_t *l1tab =
> +        __map_domain_page(d->arch.arg_xlat_l1_pg[l2_table_offset(start)]);
> +
> +    do {
> +        l1_pgentry_t l1e = l1tab[l1_table_offset(va)];
> +
> +        if ( l1e_get_flags(l1e) & _PAGE_PRESENT )
> +        {
> +            struct page_info *pg = l1e_get_page(l1e);
> +
> +            l1tab[l1_table_offset(va)] = l1e_empty();
> +            flush_tlb_one_mask(d->domain_dirty_cpumask, va);
> +            free_domheap_page(pg);
> +        }
> +        va -= PAGE_SIZE;
> +    } while ( va >= start );
>  
> -    free_xenheap_pages(v->arch.compat_arg_xlat, order);
> -    v->arch.compat_arg_xlat = NULL;
> +    unmap_domain_page(l1tab);
>  }
>  
>  void cleanup_frame_table(struct mem_hotadd_info *info)
> --- a/xen/include/asm-x86/config.h
> +++ b/xen/include/asm-x86/config.h
> @@ -212,7 +212,7 @@ extern unsigned char boot_edid_info[128]
>  /* Slot 260: per-domain mappings (including map cache). */
>  #define PERDOMAIN_VIRT_START    (PML4_ADDR(260))
>  #define PERDOMAIN_SLOT_MBYTES   (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
> -#define PERDOMAIN_SLOTS         2
> +#define PERDOMAIN_SLOTS         3
>  #define PERDOMAIN_VIRT_SLOT(s)  (PERDOMAIN_VIRT_START + (s) * \
>                                   (PERDOMAIN_SLOT_MBYTES << 20))
>  /* Slot 261: machine-to-phys conversion table (256GB). */
> @@ -304,6 +304,14 @@ extern unsigned long xen_phys_start;
>  #define LDT_VIRT_START(v)    \
>      (GDT_VIRT_START(v) + (64*1024))
>  
> +/* Argument translation area. The 2nd to last per-domain-mapping sub-area. */
> +#define ARG_XLAT_SLOT            (PERDOMAIN_SLOTS - 2)
> +/* Allow for at least one guard page (COMPAT_ARG_XLAT_SIZE being 2 pages): */
> +#define ARG_XLAT_VA_SHIFT        (2 + PAGE_SHIFT)
> +#define ARG_XLAT_VIRT_START      PERDOMAIN_VIRT_SLOT(ARG_XLAT_SLOT)
> +#define ARG_XLAT_START(v)        \
> +    (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT))
> +
>  /* map_domain_page() map cache. The last per-domain-mapping sub-area. */
>  #define MAPCACHE_VCPU_ENTRIES    (CONFIG_PAGING_LEVELS *
> CONFIG_PAGING_LEVELS)
>  #define MAPCACHE_ENTRIES         (MAX_VIRT_CPUS * MAPCACHE_VCPU_ENTRIES)
> @@ -313,7 +321,7 @@ extern unsigned long xen_phys_start;
>                                    MAPCACHE_ENTRIES * PAGE_SIZE)
>  
>  #define PDPT_L1_ENTRIES       \
> -    ((PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS - 1) - PERDOMAIN_VIRT_START) >>
> PAGE_SHIFT)
> +    ((PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS - 2) - PERDOMAIN_VIRT_START) >>
> PAGE_SHIFT)
>  #define PDPT_L2_ENTRIES       \
>      ((PDPT_L1_ENTRIES + (1 << PAGETABLE_ORDER) - 1) >> PAGETABLE_ORDER)
>  
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -244,6 +244,7 @@ struct arch_domain
>      void **perdomain_pts;
>      struct page_info *perdomain_l2_pg[PERDOMAIN_SLOTS];
>      struct page_info *perdomain_l3_pg;
> +    struct page_info **arg_xlat_l1_pg;
>  
>      unsigned int hv_compat_vstart;
>  
> @@ -448,9 +449,6 @@ struct arch_vcpu
>  
>      /* A secondary copy of the vcpu time info. */
>      XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
> -
> -    void *compat_arg_xlat;
> -
>  } __cacheline_aligned;
>  
>  /* Shorthands to improve code legibility. */
> --- a/xen/include/asm-x86/x86_64/uaccess.h
> +++ b/xen/include/asm-x86/x86_64/uaccess.h
> @@ -1,16 +1,15 @@
>  #ifndef __X86_64_UACCESS_H
>  #define __X86_64_UACCESS_H
>  
> -#define COMPAT_ARG_XLAT_VIRT_BASE compat_arg_xlat_virt_base()
> +#define COMPAT_ARG_XLAT_VIRT_BASE ((void *)ARG_XLAT_START(current))
>  #define COMPAT_ARG_XLAT_SIZE      (2*PAGE_SIZE)
>  struct vcpu;
> -void *compat_arg_xlat_virt_base(void);
>  int setup_compat_arg_xlat(struct vcpu *v);
>  void free_compat_arg_xlat(struct vcpu *v);
>  #define is_compat_arg_xlat_range(addr, size) ({
> \
>      unsigned long __off;
> \
>      __off = (unsigned long)(addr) - (unsigned long)COMPAT_ARG_XLAT_VIRT_BASE;
> \
> -    (__off <= COMPAT_ARG_XLAT_SIZE) &&
> \
> +    (__off < COMPAT_ARG_XLAT_SIZE) &&
> \
>      ((__off + (unsigned long)(size)) <= COMPAT_ARG_XLAT_SIZE);
> \
>  })
>  
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.