[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 3/3] Port of mmap_batch_v2 to support paging in Xen



On Fri, Dec 16, 2011 at 10:22:21PM -0500, Adin Scannell wrote:
> This wasn't ported from any patch, but was rewritten based on the XCP 2.6.32
> tree.  The code structure is significantly different and this patch mirrors 
> the
> existing Linux code.
> 
> The primary reason for need the V2 interface is to support foreign mappings
> (i.e. qemu) of paged-out pages.  The libxc code will already retry mappings
> when an ENOENT is returned.  The V2 interface provides a richer error value,
> so the user-space code is capable of handling these errors specifically.

Can you give more details on how to use paged-out pages. Perhaps a
pointer to the xen's docs?

> 
> Signed-off-by: Adin Scannell <adin@xxxxxxxxxxx>
> 
> Index: linux/drivers/xen/xenfs/privcmd.c
> ===================================================================
> ---
>  drivers/xen/xenfs/privcmd.c |   90 
> ++++++++++++++++++++++++++++++++++++++++++-

So that file just moved to drivers/xen/privcmd.c

>  include/xen/privcmd.h       |   10 +++++
>  2 files changed, 99 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c
> index dbd3b16..21cbb5a 100644
> --- a/drivers/xen/xenfs/privcmd.c
> +++ b/drivers/xen/xenfs/privcmd.c
> @@ -70,7 +70,7 @@ static void free_page_list(struct list_head *pages)
>   */
>  static int gather_array(struct list_head *pagelist,
>                       unsigned nelem, size_t size,
> -                     void __user *data)
> +                     const void __user *data)
>  {
>       unsigned pageidx;
>       void *pagedata;
> @@ -245,6 +245,15 @@ struct mmap_batch_state {
>       xen_pfn_t __user *user;
>  };
>  
> +struct mmap_batch_v2_state {
> +     domid_t domain;
> +     unsigned long va;
> +     struct vm_area_struct *vma;
> +     int paged_out;

Should this be unsigned int?
> +
> +     int __user *err;
> +};
> +
>  static int mmap_batch_fn(void *data, void *state)
>  {
>       xen_pfn_t *mfnp = data;
> @@ -260,6 +269,20 @@ static int mmap_batch_fn(void *data, void *state)
>       return 0;
>  }
>  
> +static int mmap_batch_v2_fn(void *data, void *state)
> +{
> +     xen_pfn_t *mfnp = data;
> +     struct mmap_batch_v2_state *st = state;
> +
> +     int rc = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 
> 1,
> +                                    st->vma->vm_page_prot, st->domain);

You don't want to check that st is not NULL?

> +     if ( rc == -ENOENT )

This is the wrong style. Please fix.

> +             st->paged_out++;

Is it possible that this ends overflowing and hitting 0?

> +     st->va += PAGE_SIZE;
> +
> +     return put_user(rc, st->err++);
> +}
> +
>  static int mmap_return_errors(void *data, void *state)
>  {
>       xen_pfn_t *mfnp = data;
> @@ -332,6 +355,67 @@ out:
>       return ret;
>  }
>  
> +static long privcmd_ioctl_mmap_batch_v2(void __user *udata)
> +{
> +     int ret;
> +     struct privcmd_mmapbatch_v2 m;
> +     struct mm_struct *mm = current->mm;
> +     struct vm_area_struct *vma = NULL;
> +     unsigned long nr_pages;
> +     LIST_HEAD(pagelist);
> +     struct mmap_batch_v2_state state;
> +
> +     if (!xen_initial_domain())
> +             return -EPERM;
> +
> +     if (copy_from_user(&m, udata, sizeof(m)))
> +             return -EFAULT;
> +
> +     nr_pages = m.num;
> +     if ((m.num <= 0) || (nr_pages > (ULONG_MAX >> PAGE_SHIFT)))

Just make it nr_pages instead of m.num.

> +             return -EINVAL;
> +
> +     ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),


nr_pages.
> +                        m.arr);
> +
> +     if (ret || list_empty(&pagelist))
> +             goto out;
> +
> +     down_write(&mm->mmap_sem);
> +
> +     vma = find_vma(mm, m.addr);
> +     ret = -EINVAL;
> +     /* We allow multiple shots here, because this interface
> +      * is used by libxc and mappings for specific pages will
> +      * be retried when pages are paged-out (ENOENT). */
> +     if (!vma ||
> +         vma->vm_ops != &privcmd_vm_ops ||
> +         (m.addr < vma->vm_start) ||
> +         ((m.addr + (nr_pages << PAGE_SHIFT)) > vma->vm_end)) {
> +             up_write(&mm->mmap_sem);
> +             goto out;
> +     }
> +
> +     state.domain = m.dom;

Should you check the m.dom for incorrect ones? Like -1? or DOMID_IO?

> +     state.vma = vma;
> +     state.va = m.addr;
> +     state.err = m.err;
> +     state.paged_out = 0;
> +
> +     up_write(&mm->mmap_sem);
> +
> +     ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> +                          &pagelist, mmap_batch_v2_fn, &state);
> +
> +out:
> +     free_page_list(&pagelist);
> +
> +     if ( (ret == 0) && (state.paged_out > 0) )
> +             return -ENOENT;
> +        else
> +             return ret;
> +}
> +
>  static long privcmd_ioctl(struct file *file,
>                         unsigned int cmd, unsigned long data)
>  {
> @@ -351,6 +435,10 @@ static long privcmd_ioctl(struct file *file,
>               ret = privcmd_ioctl_mmap_batch(udata);
>               break;
>  
> +     case IOCTL_PRIVCMD_MMAPBATCH_V2:
> +             ret = privcmd_ioctl_mmap_batch_v2(udata);
> +             break;
> +
>       default:
>               ret = -EINVAL;
>               break;
> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> index 17857fb..39b92b1 100644
> --- a/include/xen/privcmd.h
> +++ b/include/xen/privcmd.h
> @@ -62,6 +62,14 @@ struct privcmd_mmapbatch {
>       xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
>  };
>  
> +struct privcmd_mmapbatch_v2 {
> +     int num;          /* number of pages to populate */
> +     domid_t dom;      /* target domain */
> +     __u64 addr;       /* virtual address */
> +     const xen_pfn_t __user *arr; /* array of mfns */
> +     int __user *err;  /* array of error codes */
> +};
> +
>  /*
>   * @cmd: IOCTL_PRIVCMD_HYPERCALL
>   * @arg: &privcmd_hypercall_t
> @@ -73,5 +81,7 @@ struct privcmd_mmapbatch {
>       _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
>  #define IOCTL_PRIVCMD_MMAPBATCH                                      \
>       _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> +#define IOCTL_PRIVCMD_MMAPBATCH_V2                           \
> +     _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>  
>  #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
> -- 
> 1.6.2.5
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.