[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 1/2] x86/xen: safely map and unmap grant frames when in atomic context



On Wed, Jul 02, 2014 at 11:25:28AM +0100, David Vrabel wrote:
> arch_gnttab_map_frames() and arch_gnttab_unmap_frames() are called in
> atomic context but were calling alloc_vm_area() which might sleep.
> 
> Also, if a driver attempts to allocate a grant ref from an interrupt
> and the table needs expanding, then the CPU may already by in lazy MMU
> mode and apply_to_page_range() will BUG when it tries to re-enable
> lazy MMU mode.
> 
> These two functions are only used in PV guests.
> 
> Introduce arch_gnttab_init() to allocates the virtual address space in
> advance.
> 
> Avoid the use of apply_to_page_range() by using saving and using the
> array of PTE addresses from the alloc_vm_area() call.
> 
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>

With:
 a) I believe all the __init has to go as the code will be
    called during suspend/resume cycle. But you already know that :-)

 b) If you could also add in the description: "alloc_vm_area'
    pre-allocates the pagetable so there is no need to worry about
    having to do a PGD/PUD/PMD walk (like apply_to_page_range does) and
    we can instead do set_pte. 

that you can stick on it:

Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>

Thank you!
> ---
>  arch/arm/xen/grant-table.c |    5 ++
>  arch/x86/xen/grant-table.c |  147 
> +++++++++++++++++++++++++++-----------------
>  drivers/xen/grant-table.c  |    9 ++-
>  include/xen/grant_table.h  |    1 +
>  4 files changed, 105 insertions(+), 57 deletions(-)
> 
> diff --git a/arch/arm/xen/grant-table.c b/arch/arm/xen/grant-table.c
> index 859a9bb..91cf08b 100644
> --- a/arch/arm/xen/grant-table.c
> +++ b/arch/arm/xen/grant-table.c
> @@ -51,3 +51,8 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long 
> nr_gframes,
>  {
>       return -ENOSYS;
>  }
> +
> +int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status)
> +{
> +     return 0;
> +}
> diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
> index c985835..27301df 100644
> --- a/arch/x86/xen/grant-table.c
> +++ b/arch/x86/xen/grant-table.c
> @@ -36,6 +36,7 @@
>  
>  #include <linux/sched.h>
>  #include <linux/mm.h>
> +#include <linux/slab.h>
>  #include <linux/vmalloc.h>
>  
>  #include <xen/interface/xen.h>
> @@ -44,87 +45,121 @@
>  
>  #include <asm/pgtable.h>
>  
> -static int map_pte_fn(pte_t *pte, struct page *pmd_page,
> -                   unsigned long addr, void *data)
> +static struct gnttab_vm_area {
> +     struct vm_struct *area;
> +     pte_t **ptes;
> +} gnttab_shared_vm_area, gnttab_status_vm_area;
> +
> +int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
> +                        unsigned long max_nr_gframes,
> +                        void **__shared)
>  {
> -     unsigned long **frames = (unsigned long **)data;
> +     void *shared = *__shared;
> +     unsigned long addr;
> +     unsigned long i;
>  
> -     set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
> -     (*frames)++;
> -     return 0;
> -}
> +     if (shared == NULL)
> +             *__shared = shared = gnttab_shared_vm_area.area->addr;
>  
> -/*
> - * This function is used to map shared frames to store grant status. It is
> - * different from map_pte_fn above, the frames type here is uint64_t.
> - */
> -static int map_pte_fn_status(pte_t *pte, struct page *pmd_page,
> -                          unsigned long addr, void *data)
> -{
> -     uint64_t **frames = (uint64_t **)data;
> +     addr = (unsigned long)shared;
> +
> +     for (i = 0; i < nr_gframes; i++) {
> +             set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i],
> +                        mfn_pte(frames[i], PAGE_KERNEL));
> +             addr += PAGE_SIZE;
> +     }
>  
> -     set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
> -     (*frames)++;
>       return 0;
>  }
>  
> -static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
> -                     unsigned long addr, void *data)
> +int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
> +                        unsigned long max_nr_gframes,
> +                        grant_status_t **__shared)
>  {
> +     grant_status_t *shared = *__shared;
> +     unsigned long addr;
> +     unsigned long i;
> +
> +     if (shared == NULL)
> +             *__shared = shared = gnttab_status_vm_area.area->addr;
> +
> +     addr = (unsigned long)shared;
> +
> +     for (i = 0; i < nr_gframes; i++) {
> +             set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i],
> +                        mfn_pte(frames[i], PAGE_KERNEL));
> +             addr += PAGE_SIZE;
> +     }
>  
> -     set_pte_at(&init_mm, addr, pte, __pte(0));
>       return 0;
>  }
>  
> -int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
> -                        unsigned long max_nr_gframes,
> -                        void **__shared)
> +void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
>  {
> -     int rc;
> -     void *shared = *__shared;
> +     pte_t **ptes;
> +     unsigned long addr;
> +     unsigned long i;
>  
> -     if (shared == NULL) {
> -             struct vm_struct *area =
> -                     alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL);
> -             BUG_ON(area == NULL);
> -             shared = area->addr;
> -             *__shared = shared;
> -     }
> +     if (shared == gnttab_status_vm_area.area->addr)
> +             ptes = gnttab_status_vm_area.ptes;
> +     else
> +             ptes = gnttab_shared_vm_area.ptes;
>  
> -     rc = apply_to_page_range(&init_mm, (unsigned long)shared,
> -                              PAGE_SIZE * nr_gframes,
> -                              map_pte_fn, &frames);
> -     return rc;
> +     addr = (unsigned long)shared;
> +
> +     for (i = 0; i < nr_gframes; i++) {
> +             set_pte_at(&init_mm, addr, ptes[i], __pte(0));
> +             addr += PAGE_SIZE;
> +     }
>  }
>  
> -int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
> -                        unsigned long max_nr_gframes,
> -                        grant_status_t **__shared)
> +static int __init arch_gnttab_valloc(struct gnttab_vm_area *area,
> +                                      unsigned nr_frames)
>  {
> -     int rc;
> -     grant_status_t *shared = *__shared;
> +     area->ptes = kmalloc(sizeof(pte_t *) * nr_frames, GFP_KERNEL);
> +     if (area->ptes == NULL)
> +             return -ENOMEM;
>  
> -     if (shared == NULL) {
> -             /* No need to pass in PTE as we are going to do it
> -              * in apply_to_page_range anyhow. */
> -             struct vm_struct *area =
> -                     alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL);
> -             BUG_ON(area == NULL);
> -             shared = area->addr;
> -             *__shared = shared;
> +     area->area = alloc_vm_area(PAGE_SIZE * nr_frames, area->ptes);
> +     if (area->area == NULL) {
> +             kfree(area->ptes);
> +             return -ENOMEM;
>       }
>  
> -     rc = apply_to_page_range(&init_mm, (unsigned long)shared,
> -                              PAGE_SIZE * nr_gframes,
> -                              map_pte_fn_status, &frames);
> -     return rc;
> +     return 0;
>  }
>  
> -void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
> +static void __init arch_gnttab_vfree(struct gnttab_vm_area *area)
>  {
> -     apply_to_page_range(&init_mm, (unsigned long)shared,
> -                         PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
> +     free_vm_area(area->area);
> +     kfree(area->ptes);
>  }
> +
> +int __init arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status)
> +{
> +     int ret;
> +
> +     if (!xen_pv_domain())
> +             return 0;
> +
> +     ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared);
> +     if (ret < 0)
> +             return ret;
> +
> +     /*
> +      * Always allocate the space for the status frames in case
> +      * we're migrated to a host with V2 support.
> +      */
> +     ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status);
> +     if (ret < 0)
> +             goto err;
> +
> +     return 0;
> +  err:
> +     arch_gnttab_vfree(&gnttab_shared_vm_area);
> +     return -ENOMEM;
> +}
> +
>  #ifdef CONFIG_XEN_PVH
>  #include <xen/balloon.h>
>  #include <xen/events.h>
> diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
> index 5d4de88..eeba754 100644
> --- a/drivers/xen/grant-table.c
> +++ b/drivers/xen/grant-table.c
> @@ -1195,18 +1195,20 @@ static int gnttab_expand(unsigned int req_entries)
>  int gnttab_init(void)
>  {
>       int i;
> +     unsigned long max_nr_grant_frames;
>       unsigned int max_nr_glist_frames, nr_glist_frames;
>       unsigned int nr_init_grefs;
>       int ret;
>  
>       gnttab_request_version();
> +     max_nr_grant_frames = gnttab_max_grant_frames();
>       nr_grant_frames = 1;
>  
>       /* Determine the maximum number of frames required for the
>        * grant reference free list on the current hypervisor.
>        */
>       BUG_ON(grefs_per_grant_frame == 0);
> -     max_nr_glist_frames = (gnttab_max_grant_frames() *
> +     max_nr_glist_frames = (max_nr_grant_frames *
>                              grefs_per_grant_frame / RPP);
>  
>       gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
> @@ -1223,6 +1225,11 @@ int gnttab_init(void)
>               }
>       }
>  
> +     ret = arch_gnttab_init(max_nr_grant_frames,
> +                            nr_status_frames(max_nr_grant_frames));
> +     if (ret < 0)
> +             goto ini_nomem;
> +
>       if (gnttab_setup() < 0) {
>               ret = -ENODEV;
>               goto ini_nomem;
> diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
> index a5af2a2..5c1aba1 100644
> --- a/include/xen/grant_table.h
> +++ b/include/xen/grant_table.h
> @@ -170,6 +170,7 @@ gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, 
> phys_addr_t addr,
>       unmap->dev_bus_addr = 0;
>  }
>  
> +int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status);
>  int arch_gnttab_map_shared(xen_pfn_t *frames, unsigned long nr_gframes,
>                          unsigned long max_nr_gframes,
>                          void **__shared);
> -- 
> 1.7.10.4
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.