[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
On Wed, Sep 05, 2012 at 01:09:32PM -0400, Andres Lagar-Cavilla wrote: > Super. To which branch are you applying these now? (n00b question but have to > ask) They will show up on stable/for-linus-3.7 once the overnight tests pass. > Andres > On Sep 5, 2012, at 12:21 PM, Konrad Rzeszutek Wilk wrote: > > > On Fri, Aug 31, 2012 at 09:59:30AM -0400, Andres Lagar-Cavilla wrote: > >> Re-spin of alternative patch after David's feedback. > >> Thanks > >> Andres > > > > applied. fixed some whitespace issues. > >> > >> commit ab351a5cef1797935b083c2f6e72800a8949c515 > >> Author: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> > >> Date: Thu Aug 30 12:23:33 2012 -0400 > >> > >> xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl > >> > >> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional > >> field for reporting the error code for every frame that could not be > >> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH. > >> > >> Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top > >> nibble > >> in the mfn array. > >> > >> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx> > >> Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> > >> > >> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c > >> index 85226cb..5386f20 100644 > >> --- a/drivers/xen/privcmd.c > >> +++ b/drivers/xen/privcmd.c > >> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages) > >> */ > >> static int gather_array(struct list_head *pagelist, > >> unsigned nelem, size_t size, > >> - void __user *data) > >> + const void __user *data) > >> { > >> unsigned pageidx; > >> void *pagedata; > >> @@ -246,61 +246,117 @@ struct mmap_batch_state { > >> domid_t domain; > >> unsigned long va; > >> struct vm_area_struct *vma; > >> - int err; > >> - > >> - xen_pfn_t __user *user; > >> + /* A tristate: > >> + * 0 for no errors > >> + * 1 if at least one error has happened (and no > >> + * -ENOENT errors have happened) > >> + * -ENOENT if at least 1 -ENOENT has happened. > >> + */ > >> + int global_error; > >> + /* An array for individual errors */ > >> + int *err; > >> + > >> + /* User-space mfn array to store errors in the second pass for V1. */ > >> + xen_pfn_t __user *user_mfn; > >> }; > >> > >> static int mmap_batch_fn(void *data, void *state) > >> { > >> xen_pfn_t *mfnp = data; > >> struct mmap_batch_state *st = state; > >> + int ret; > >> > >> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, > >> - st->vma->vm_page_prot, st->domain) < 0) { > >> - *mfnp |= 0xf0000000U; > >> - st->err++; > >> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, > >> + st->vma->vm_page_prot, st->domain); > >> + > >> + /* Store error code for second pass. */ > >> + *(st->err++) = ret; > >> + > >> + /* And see if it affects the global_error. */ > >> + if (ret < 0) { > >> + if (ret == -ENOENT) > >> + st->global_error = -ENOENT; > >> + else { > >> + /* Record that at least one error has happened. */ > >> + if (st->global_error == 0) > >> + st->global_error = 1; > >> + } > >> } > >> st->va += PAGE_SIZE; > >> > >> return 0; > >> } > >> > >> -static int mmap_return_errors(void *data, void *state) > >> +static int mmap_return_errors_v1(void *data, void *state) > >> { > >> xen_pfn_t *mfnp = data; > >> struct mmap_batch_state *st = state; > >> - > >> - return put_user(*mfnp, st->user++); > >> + int err = *(st->err++); > >> + > >> + /* > >> + * V1 encodes the error codes in the 32bit top nibble of the > >> + * mfn (with its known limitations vis-a-vis 64 bit callers). > >> + */ > >> + *mfnp |= (err == -ENOENT) ? > >> + PRIVCMD_MMAPBATCH_PAGED_ERROR : > >> + PRIVCMD_MMAPBATCH_MFN_ERROR; > >> + return __put_user(*mfnp, st->user_mfn++); > >> } > >> > >> static struct vm_operations_struct privcmd_vm_ops; > >> > >> -static long privcmd_ioctl_mmap_batch(void __user *udata) > >> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version) > >> { > >> int ret; > >> - struct privcmd_mmapbatch m; > >> + struct privcmd_mmapbatch_v2 m; > >> struct mm_struct *mm = current->mm; > >> struct vm_area_struct *vma; > >> unsigned long nr_pages; > >> LIST_HEAD(pagelist); > >> + int *err_array = NULL; > >> struct mmap_batch_state state; > >> > >> if (!xen_initial_domain()) > >> return -EPERM; > >> > >> - if (copy_from_user(&m, udata, sizeof(m))) > >> - return -EFAULT; > >> + switch (version) { > >> + case 1: > >> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) > >> + return -EFAULT; > >> + /* Returns per-frame error in m.arr. */ > >> + m.err = NULL; > >> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) > >> + return -EFAULT; > >> + break; > >> + case 2: > >> + if (copy_from_user(&m, udata, sizeof(struct > >> privcmd_mmapbatch_v2))) > >> + return -EFAULT; > >> + /* Returns per-frame error code in m.err. */ > >> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) > >> + return -EFAULT; > >> + break; > >> + default: > >> + return -EINVAL; > >> + } > >> > >> nr_pages = m.num; > >> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) > >> return -EINVAL; > >> > >> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), > >> - m.arr); > >> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); > >> + > >> + if (ret) > >> + goto out; > >> + if (list_empty(&pagelist)) { > >> + ret = -EINVAL; > >> + goto out; > >> + } > >> > >> - if (ret || list_empty(&pagelist)) > >> + err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL); > >> + if (err_array == NULL) { > >> + ret = -ENOMEM; > >> goto out; > >> + } > >> > >> down_write(&mm->mmap_sem); > >> > >> @@ -315,24 +371,34 @@ static long privcmd_ioctl_mmap_batch(void __user > >> *udata) > >> goto out; > >> } > >> > >> - state.domain = m.dom; > >> - state.vma = vma; > >> - state.va = m.addr; > >> - state.err = 0; > >> + state.domain = m.dom; > >> + state.vma = vma; > >> + state.va = m.addr; > >> + state.global_error = 0; > >> + state.err = err_array; > >> > >> - ret = traverse_pages(m.num, sizeof(xen_pfn_t), > >> - &pagelist, mmap_batch_fn, &state); > >> + /* mmap_batch_fn guarantees ret == 0 */ > >> + BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), > >> + &pagelist, mmap_batch_fn, &state)); > >> > >> up_write(&mm->mmap_sem); > >> > >> - if (state.err > 0) { > >> - state.user = m.arr; > >> + if (state.global_error && (version == 1)) { > >> + /* Write back errors in second pass. */ > >> + state.user_mfn = (xen_pfn_t *)m.arr; > >> + state.err = err_array; > >> ret = traverse_pages(m.num, sizeof(xen_pfn_t), > >> - &pagelist, > >> - mmap_return_errors, &state); > >> - } > >> + &pagelist, mmap_return_errors_v1, > >> &state); > >> + } else > >> + ret = __copy_to_user(m.err, err_array, m.num * sizeof(int)); > >> + > >> + /* If we have not had any EFAULT-like global errors then set the > >> global > >> + * error to -ENOENT if necessary. */ > >> + if ((ret == 0) && (state.global_error == -ENOENT)) > >> + ret = -ENOENT; > >> > >> out: > >> + kfree(err_array); > >> free_page_list(&pagelist); > >> > >> return ret; > >> @@ -354,7 +420,11 @@ static long privcmd_ioctl(struct file *file, > >> break; > >> > >> case IOCTL_PRIVCMD_MMAPBATCH: > >> - ret = privcmd_ioctl_mmap_batch(udata); > >> + ret = privcmd_ioctl_mmap_batch(udata, 1); > >> + break; > >> + > >> + case IOCTL_PRIVCMD_MMAPBATCH_V2: > >> + ret = privcmd_ioctl_mmap_batch(udata, 2); > >> break; > >> > >> default: > >> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h > >> index 45c1aa1..a853168 100644 > >> --- a/include/xen/privcmd.h > >> +++ b/include/xen/privcmd.h > >> @@ -58,13 +58,33 @@ struct privcmd_mmapbatch { > >> int num; /* number of pages to populate */ > >> domid_t dom; /* target domain */ > >> __u64 addr; /* virtual address */ > >> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ > >> + xen_pfn_t __user *arr; /* array of mfns - or'd with > >> + PRIVCMD_MMAPBATCH_*_ERROR on err */ > >> +}; > >> + > >> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U > >> +#define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U > >> + > >> +struct privcmd_mmapbatch_v2 { > >> + unsigned int num; /* number of pages to populate */ > >> + domid_t dom; /* target domain */ > >> + __u64 addr; /* virtual address */ > >> + const xen_pfn_t __user *arr; /* array of mfns */ > >> + int __user *err; /* array of error codes */ > >> }; > >> > >> /* > >> * @cmd: IOCTL_PRIVCMD_HYPERCALL > >> * @arg: &privcmd_hypercall_t > >> * Return: Value returned from execution of the specified hypercall. > >> + * > >> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2 > >> + * @arg: &struct privcmd_mmapbatch_v2 > >> + * Return: 0 on success (i.e., arg->err contains valid error codes for > >> + * each frame). On an error other than a failed frame remap, -1 is > >> + * returned and errno is set to EINVAL, EFAULT etc. As an exception, > >> + * if the operation was otherwise successful but any frame failed with > >> + * -ENOENT, then -1 is returned and errno is set to ENOENT. > >> */ > >> #define IOCTL_PRIVCMD_HYPERCALL \ > >> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall)) > >> @@ -72,5 +92,7 @@ struct privcmd_mmapbatch { > >> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap)) > >> #define IOCTL_PRIVCMD_MMAPBATCH \ > >> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) > >> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \ > >> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2)) > >> > >> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ > >> > >> On Aug 30, 2012, at 8:58 AM, David Vrabel wrote: > >> > >>> From: David Vrabel <david.vrabel@xxxxxxxxxx> > >>> > >>> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional > >>> field for reporting the error code for every frame that could not be > >>> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH. > >>> > >>> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx> > >>> --- > >>> drivers/xen/privcmd.c | 99 > >>> +++++++++++++++++++++++++++++++++++++++--------- > >>> include/xen/privcmd.h | 23 +++++++++++- > >>> 2 files changed, 102 insertions(+), 20 deletions(-) > >>> > >>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c > >>> index ccee0f1..c0e89e7 100644 > >>> --- a/drivers/xen/privcmd.c > >>> +++ b/drivers/xen/privcmd.c > >>> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages) > >>> */ > >>> static int gather_array(struct list_head *pagelist, > >>> unsigned nelem, size_t size, > >>> - void __user *data) > >>> + const void __user *data) > >>> { > >>> unsigned pageidx; > >>> void *pagedata; > >>> @@ -248,18 +248,37 @@ struct mmap_batch_state { > >>> struct vm_area_struct *vma; > >>> int err; > >>> > >>> - xen_pfn_t __user *user; > >>> + xen_pfn_t __user *user_mfn; > >>> + int __user *user_err; > >>> }; > >>> > >>> static int mmap_batch_fn(void *data, void *state) > >>> { > >>> xen_pfn_t *mfnp = data; > >>> struct mmap_batch_state *st = state; > >>> + int ret; > >>> > >>> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, > >>> - st->vma->vm_page_prot, st->domain) < 0) { > >>> - *mfnp |= 0xf0000000U; > >>> - st->err++; > >>> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, > >>> + st->vma->vm_page_prot, st->domain); > >>> + if (ret < 0) { > >>> + /* > >>> + * Error reporting is a mess but userspace relies on > >>> + * it behaving this way. > >>> + * > >>> + * V2 needs to a) return the result of each frame's > >>> + * remap; and b) return -ENOENT if any frame failed > >>> + * with -ENOENT. > >>> + * > >>> + * In this first pass the error code is saved by > >>> + * overwriting the mfn and an error is indicated in > >>> + * st->err. > >>> + * > >>> + * The second pass by mmap_return_errors() will write > >>> + * the error codes to user space and get the right > >>> + * ioctl return value. > >>> + */ > >>> + *(int *)mfnp = ret; > >>> + st->err = ret; > >>> } > >>> st->va += PAGE_SIZE; > >>> > >>> @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void > >>> *state) > >>> { > >>> xen_pfn_t *mfnp = data; > >>> struct mmap_batch_state *st = state; > >>> + int ret; > >>> + > >>> + if (st->user_err) { > >>> + int err = *(int *)mfnp; > >>> + > >>> + if (err == -ENOENT) > >>> + st->err = err; > >>> > >>> - return put_user(*mfnp, st->user++); > >>> + return __put_user(err, st->user_err++); > >>> + } else { > >>> + xen_pfn_t mfn; > >>> + > >>> + ret = __get_user(mfn, st->user_mfn); > >>> + if (ret < 0) > >>> + return ret; > >>> + > >>> + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR; > >>> + return __put_user(mfn, st->user_mfn++); > >>> + } > >>> } > >>> > >>> static struct vm_operations_struct privcmd_vm_ops; > >>> > >>> -static long privcmd_ioctl_mmap_batch(void __user *udata) > >>> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version) > >>> { > >>> int ret; > >>> - struct privcmd_mmapbatch m; > >>> + struct privcmd_mmapbatch_v2 m; > >>> struct mm_struct *mm = current->mm; > >>> struct vm_area_struct *vma; > >>> unsigned long nr_pages; > >>> @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user > >>> *udata) > >>> if (!xen_initial_domain()) > >>> return -EPERM; > >>> > >>> - if (copy_from_user(&m, udata, sizeof(m))) > >>> - return -EFAULT; > >>> + switch (version) { > >>> + case 1: > >>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) > >>> + return -EFAULT; > >>> + /* Returns per-frame error in m.arr. */ > >>> + m.err = NULL; > >>> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) > >>> + return -EFAULT; > >>> + break; > >>> + case 2: > >>> + if (copy_from_user(&m, udata, sizeof(struct > >>> privcmd_mmapbatch_v2))) > >>> + return -EFAULT; > >>> + /* Returns per-frame error code in m.err. */ > >>> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) > >>> + return -EFAULT; > >>> + break; > >>> + default: > >>> + return -EINVAL; > >>> + } > >>> > >>> nr_pages = m.num; > >>> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) > >>> return -EINVAL; > >>> > >>> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), > >>> - m.arr); > >>> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); > >>> > >>> if (ret || list_empty(&pagelist)) > >>> goto out; > >>> @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user > >>> *udata) > >>> > >>> up_write(&mm->mmap_sem); > >>> > >>> - if (state.err > 0) { > >>> - state.user = m.arr; > >>> + if (state.err) { > >>> + state.err = 0; > >>> + state.user_mfn = (xen_pfn_t *)m.arr; > >>> + state.user_err = m.err; > >>> ret = traverse_pages(m.num, sizeof(xen_pfn_t), > >>> - &pagelist, > >>> - mmap_return_errors, &state); > >>> - } > >>> + &pagelist, > >>> + mmap_return_errors, &state); > >>> + if (ret >= 0) > >>> + ret = state.err; > >>> + } else if (m.err) > >>> + __clear_user(m.err, m.num * sizeof(*m.err)); > >>> > >>> out: > >>> free_page_list(&pagelist); > >>> @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file, > >>> break; > >>> > >>> case IOCTL_PRIVCMD_MMAPBATCH: > >>> - ret = privcmd_ioctl_mmap_batch(udata); > >>> + ret = privcmd_ioctl_mmap_batch(udata, 1); > >>> + break; > >>> + > >>> + case IOCTL_PRIVCMD_MMAPBATCH_V2: > >>> + ret = privcmd_ioctl_mmap_batch(udata, 2); > >>> break; > >>> > >>> default: > >>> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h > >>> index 17857fb..f60d75c 100644 > >>> --- a/include/xen/privcmd.h > >>> +++ b/include/xen/privcmd.h > >>> @@ -59,13 +59,32 @@ struct privcmd_mmapbatch { > >>> int num; /* number of pages to populate */ > >>> domid_t dom; /* target domain */ > >>> __u64 addr; /* virtual address */ > >>> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ > >>> + xen_pfn_t __user *arr; /* array of mfns - or'd with > >>> + PRIVCMD_MMAPBATCH_MFN_ERROR on err */ > >>> +}; > >>> + > >>> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U > >>> + > >>> +struct privcmd_mmapbatch_v2 { > >>> + unsigned int num; /* number of pages to populate */ > >>> + domid_t dom; /* target domain */ > >>> + __u64 addr; /* virtual address */ > >>> + const xen_pfn_t __user *arr; /* array of mfns */ > >>> + int __user *err; /* array of error codes */ > >>> }; > >>> > >>> /* > >>> * @cmd: IOCTL_PRIVCMD_HYPERCALL > >>> * @arg: &privcmd_hypercall_t > >>> * Return: Value returned from execution of the specified hypercall. > >>> + * > >>> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2 > >>> + * @arg: &struct privcmd_mmapbatch_v2 > >>> + * Return: 0 on success (i.e., arg->err contains valid error codes for > >>> + * each frame). On an error other than a failed frame remap, -1 is > >>> + * returned and errno is set to EINVAL, EFAULT etc. As an exception, > >>> + * if the operation was otherwise successful but any frame failed with > >>> + * -ENOENT, then -1 is returned and errno is set to ENOENT. > >>> */ > >>> #define IOCTL_PRIVCMD_HYPERCALL \ > >>> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall)) > >>> @@ -73,5 +92,7 @@ struct privcmd_mmapbatch { > >>> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap)) > >>> #define IOCTL_PRIVCMD_MMAPBATCH \ > >>> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) > >>> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \ > >>> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2)) > >>> > >>> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ > >>> -- > >>> 1.7.2.5 > >>> _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |