[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
Super. To which branch are you applying these now? (n00b question but have to ask) Andres On Sep 5, 2012, at 12:21 PM, Konrad Rzeszutek Wilk wrote: > On Fri, Aug 31, 2012 at 09:59:30AM -0400, Andres Lagar-Cavilla wrote: >> Re-spin of alternative patch after David's feedback. >> Thanks >> Andres > > applied. fixed some whitespace issues. >> >> commit ab351a5cef1797935b083c2f6e72800a8949c515 >> Author: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> >> Date: Thu Aug 30 12:23:33 2012 -0400 >> >> xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl >> >> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional >> field for reporting the error code for every frame that could not be >> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH. >> >> Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top >> nibble >> in the mfn array. >> >> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx> >> Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> >> >> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c >> index 85226cb..5386f20 100644 >> --- a/drivers/xen/privcmd.c >> +++ b/drivers/xen/privcmd.c >> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages) >> */ >> static int gather_array(struct list_head *pagelist, >> unsigned nelem, size_t size, >> - void __user *data) >> + const void __user *data) >> { >> unsigned pageidx; >> void *pagedata; >> @@ -246,61 +246,117 @@ struct mmap_batch_state { >> domid_t domain; >> unsigned long va; >> struct vm_area_struct *vma; >> - int err; >> - >> - xen_pfn_t __user *user; >> + /* A tristate: >> + * 0 for no errors >> + * 1 if at least one error has happened (and no >> + * -ENOENT errors have happened) >> + * -ENOENT if at least 1 -ENOENT has happened. >> + */ >> + int global_error; >> + /* An array for individual errors */ >> + int *err; >> + >> + /* User-space mfn array to store errors in the second pass for V1. */ >> + xen_pfn_t __user *user_mfn; >> }; >> >> static int mmap_batch_fn(void *data, void *state) >> { >> xen_pfn_t *mfnp = data; >> struct mmap_batch_state *st = state; >> + int ret; >> >> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, >> - st->vma->vm_page_prot, st->domain) < 0) { >> - *mfnp |= 0xf0000000U; >> - st->err++; >> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, >> + st->vma->vm_page_prot, st->domain); >> + >> + /* Store error code for second pass. */ >> + *(st->err++) = ret; >> + >> + /* And see if it affects the global_error. */ >> + if (ret < 0) { >> + if (ret == -ENOENT) >> + st->global_error = -ENOENT; >> + else { >> + /* Record that at least one error has happened. */ >> + if (st->global_error == 0) >> + st->global_error = 1; >> + } >> } >> st->va += PAGE_SIZE; >> >> return 0; >> } >> >> -static int mmap_return_errors(void *data, void *state) >> +static int mmap_return_errors_v1(void *data, void *state) >> { >> xen_pfn_t *mfnp = data; >> struct mmap_batch_state *st = state; >> - >> - return put_user(*mfnp, st->user++); >> + int err = *(st->err++); >> + >> + /* >> + * V1 encodes the error codes in the 32bit top nibble of the >> + * mfn (with its known limitations vis-a-vis 64 bit callers). >> + */ >> + *mfnp |= (err == -ENOENT) ? >> + PRIVCMD_MMAPBATCH_PAGED_ERROR : >> + PRIVCMD_MMAPBATCH_MFN_ERROR; >> + return __put_user(*mfnp, st->user_mfn++); >> } >> >> static struct vm_operations_struct privcmd_vm_ops; >> >> -static long privcmd_ioctl_mmap_batch(void __user *udata) >> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version) >> { >> int ret; >> - struct privcmd_mmapbatch m; >> + struct privcmd_mmapbatch_v2 m; >> struct mm_struct *mm = current->mm; >> struct vm_area_struct *vma; >> unsigned long nr_pages; >> LIST_HEAD(pagelist); >> + int *err_array = NULL; >> struct mmap_batch_state state; >> >> if (!xen_initial_domain()) >> return -EPERM; >> >> - if (copy_from_user(&m, udata, sizeof(m))) >> - return -EFAULT; >> + switch (version) { >> + case 1: >> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) >> + return -EFAULT; >> + /* Returns per-frame error in m.arr. */ >> + m.err = NULL; >> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) >> + return -EFAULT; >> + break; >> + case 2: >> + if (copy_from_user(&m, udata, sizeof(struct >> privcmd_mmapbatch_v2))) >> + return -EFAULT; >> + /* Returns per-frame error code in m.err. */ >> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) >> + return -EFAULT; >> + break; >> + default: >> + return -EINVAL; >> + } >> >> nr_pages = m.num; >> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) >> return -EINVAL; >> >> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), >> - m.arr); >> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); >> + >> + if (ret) >> + goto out; >> + if (list_empty(&pagelist)) { >> + ret = -EINVAL; >> + goto out; >> + } >> >> - if (ret || list_empty(&pagelist)) >> + err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL); >> + if (err_array == NULL) { >> + ret = -ENOMEM; >> goto out; >> + } >> >> down_write(&mm->mmap_sem); >> >> @@ -315,24 +371,34 @@ static long privcmd_ioctl_mmap_batch(void __user >> *udata) >> goto out; >> } >> >> - state.domain = m.dom; >> - state.vma = vma; >> - state.va = m.addr; >> - state.err = 0; >> + state.domain = m.dom; >> + state.vma = vma; >> + state.va = m.addr; >> + state.global_error = 0; >> + state.err = err_array; >> >> - ret = traverse_pages(m.num, sizeof(xen_pfn_t), >> - &pagelist, mmap_batch_fn, &state); >> + /* mmap_batch_fn guarantees ret == 0 */ >> + BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), >> + &pagelist, mmap_batch_fn, &state)); >> >> up_write(&mm->mmap_sem); >> >> - if (state.err > 0) { >> - state.user = m.arr; >> + if (state.global_error && (version == 1)) { >> + /* Write back errors in second pass. */ >> + state.user_mfn = (xen_pfn_t *)m.arr; >> + state.err = err_array; >> ret = traverse_pages(m.num, sizeof(xen_pfn_t), >> - &pagelist, >> - mmap_return_errors, &state); >> - } >> + &pagelist, mmap_return_errors_v1, >> &state); >> + } else >> + ret = __copy_to_user(m.err, err_array, m.num * sizeof(int)); >> + >> + /* If we have not had any EFAULT-like global errors then set the global >> + * error to -ENOENT if necessary. */ >> + if ((ret == 0) && (state.global_error == -ENOENT)) >> + ret = -ENOENT; >> >> out: >> + kfree(err_array); >> free_page_list(&pagelist); >> >> return ret; >> @@ -354,7 +420,11 @@ static long privcmd_ioctl(struct file *file, >> break; >> >> case IOCTL_PRIVCMD_MMAPBATCH: >> - ret = privcmd_ioctl_mmap_batch(udata); >> + ret = privcmd_ioctl_mmap_batch(udata, 1); >> + break; >> + >> + case IOCTL_PRIVCMD_MMAPBATCH_V2: >> + ret = privcmd_ioctl_mmap_batch(udata, 2); >> break; >> >> default: >> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h >> index 45c1aa1..a853168 100644 >> --- a/include/xen/privcmd.h >> +++ b/include/xen/privcmd.h >> @@ -58,13 +58,33 @@ struct privcmd_mmapbatch { >> int num; /* number of pages to populate */ >> domid_t dom; /* target domain */ >> __u64 addr; /* virtual address */ >> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ >> + xen_pfn_t __user *arr; /* array of mfns - or'd with >> + PRIVCMD_MMAPBATCH_*_ERROR on err */ >> +}; >> + >> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U >> +#define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U >> + >> +struct privcmd_mmapbatch_v2 { >> + unsigned int num; /* number of pages to populate */ >> + domid_t dom; /* target domain */ >> + __u64 addr; /* virtual address */ >> + const xen_pfn_t __user *arr; /* array of mfns */ >> + int __user *err; /* array of error codes */ >> }; >> >> /* >> * @cmd: IOCTL_PRIVCMD_HYPERCALL >> * @arg: &privcmd_hypercall_t >> * Return: Value returned from execution of the specified hypercall. >> + * >> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2 >> + * @arg: &struct privcmd_mmapbatch_v2 >> + * Return: 0 on success (i.e., arg->err contains valid error codes for >> + * each frame). On an error other than a failed frame remap, -1 is >> + * returned and errno is set to EINVAL, EFAULT etc. As an exception, >> + * if the operation was otherwise successful but any frame failed with >> + * -ENOENT, then -1 is returned and errno is set to ENOENT. >> */ >> #define IOCTL_PRIVCMD_HYPERCALL \ >> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall)) >> @@ -72,5 +92,7 @@ struct privcmd_mmapbatch { >> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap)) >> #define IOCTL_PRIVCMD_MMAPBATCH \ >> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) >> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \ >> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2)) >> >> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ >> >> On Aug 30, 2012, at 8:58 AM, David Vrabel wrote: >> >>> From: David Vrabel <david.vrabel@xxxxxxxxxx> >>> >>> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional >>> field for reporting the error code for every frame that could not be >>> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH. >>> >>> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx> >>> --- >>> drivers/xen/privcmd.c | 99 >>> +++++++++++++++++++++++++++++++++++++++--------- >>> include/xen/privcmd.h | 23 +++++++++++- >>> 2 files changed, 102 insertions(+), 20 deletions(-) >>> >>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c >>> index ccee0f1..c0e89e7 100644 >>> --- a/drivers/xen/privcmd.c >>> +++ b/drivers/xen/privcmd.c >>> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages) >>> */ >>> static int gather_array(struct list_head *pagelist, >>> unsigned nelem, size_t size, >>> - void __user *data) >>> + const void __user *data) >>> { >>> unsigned pageidx; >>> void *pagedata; >>> @@ -248,18 +248,37 @@ struct mmap_batch_state { >>> struct vm_area_struct *vma; >>> int err; >>> >>> - xen_pfn_t __user *user; >>> + xen_pfn_t __user *user_mfn; >>> + int __user *user_err; >>> }; >>> >>> static int mmap_batch_fn(void *data, void *state) >>> { >>> xen_pfn_t *mfnp = data; >>> struct mmap_batch_state *st = state; >>> + int ret; >>> >>> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, >>> - st->vma->vm_page_prot, st->domain) < 0) { >>> - *mfnp |= 0xf0000000U; >>> - st->err++; >>> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, >>> + st->vma->vm_page_prot, st->domain); >>> + if (ret < 0) { >>> + /* >>> + * Error reporting is a mess but userspace relies on >>> + * it behaving this way. >>> + * >>> + * V2 needs to a) return the result of each frame's >>> + * remap; and b) return -ENOENT if any frame failed >>> + * with -ENOENT. >>> + * >>> + * In this first pass the error code is saved by >>> + * overwriting the mfn and an error is indicated in >>> + * st->err. >>> + * >>> + * The second pass by mmap_return_errors() will write >>> + * the error codes to user space and get the right >>> + * ioctl return value. >>> + */ >>> + *(int *)mfnp = ret; >>> + st->err = ret; >>> } >>> st->va += PAGE_SIZE; >>> >>> @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state) >>> { >>> xen_pfn_t *mfnp = data; >>> struct mmap_batch_state *st = state; >>> + int ret; >>> + >>> + if (st->user_err) { >>> + int err = *(int *)mfnp; >>> + >>> + if (err == -ENOENT) >>> + st->err = err; >>> >>> - return put_user(*mfnp, st->user++); >>> + return __put_user(err, st->user_err++); >>> + } else { >>> + xen_pfn_t mfn; >>> + >>> + ret = __get_user(mfn, st->user_mfn); >>> + if (ret < 0) >>> + return ret; >>> + >>> + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR; >>> + return __put_user(mfn, st->user_mfn++); >>> + } >>> } >>> >>> static struct vm_operations_struct privcmd_vm_ops; >>> >>> -static long privcmd_ioctl_mmap_batch(void __user *udata) >>> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version) >>> { >>> int ret; >>> - struct privcmd_mmapbatch m; >>> + struct privcmd_mmapbatch_v2 m; >>> struct mm_struct *mm = current->mm; >>> struct vm_area_struct *vma; >>> unsigned long nr_pages; >>> @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user >>> *udata) >>> if (!xen_initial_domain()) >>> return -EPERM; >>> >>> - if (copy_from_user(&m, udata, sizeof(m))) >>> - return -EFAULT; >>> + switch (version) { >>> + case 1: >>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) >>> + return -EFAULT; >>> + /* Returns per-frame error in m.arr. */ >>> + m.err = NULL; >>> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) >>> + return -EFAULT; >>> + break; >>> + case 2: >>> + if (copy_from_user(&m, udata, sizeof(struct >>> privcmd_mmapbatch_v2))) >>> + return -EFAULT; >>> + /* Returns per-frame error code in m.err. */ >>> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) >>> + return -EFAULT; >>> + break; >>> + default: >>> + return -EINVAL; >>> + } >>> >>> nr_pages = m.num; >>> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) >>> return -EINVAL; >>> >>> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), >>> - m.arr); >>> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); >>> >>> if (ret || list_empty(&pagelist)) >>> goto out; >>> @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user >>> *udata) >>> >>> up_write(&mm->mmap_sem); >>> >>> - if (state.err > 0) { >>> - state.user = m.arr; >>> + if (state.err) { >>> + state.err = 0; >>> + state.user_mfn = (xen_pfn_t *)m.arr; >>> + state.user_err = m.err; >>> ret = traverse_pages(m.num, sizeof(xen_pfn_t), >>> - &pagelist, >>> - mmap_return_errors, &state); >>> - } >>> + &pagelist, >>> + mmap_return_errors, &state); >>> + if (ret >= 0) >>> + ret = state.err; >>> + } else if (m.err) >>> + __clear_user(m.err, m.num * sizeof(*m.err)); >>> >>> out: >>> free_page_list(&pagelist); >>> @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file, >>> break; >>> >>> case IOCTL_PRIVCMD_MMAPBATCH: >>> - ret = privcmd_ioctl_mmap_batch(udata); >>> + ret = privcmd_ioctl_mmap_batch(udata, 1); >>> + break; >>> + >>> + case IOCTL_PRIVCMD_MMAPBATCH_V2: >>> + ret = privcmd_ioctl_mmap_batch(udata, 2); >>> break; >>> >>> default: >>> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h >>> index 17857fb..f60d75c 100644 >>> --- a/include/xen/privcmd.h >>> +++ b/include/xen/privcmd.h >>> @@ -59,13 +59,32 @@ struct privcmd_mmapbatch { >>> int num; /* number of pages to populate */ >>> domid_t dom; /* target domain */ >>> __u64 addr; /* virtual address */ >>> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ >>> + xen_pfn_t __user *arr; /* array of mfns - or'd with >>> + PRIVCMD_MMAPBATCH_MFN_ERROR on err */ >>> +}; >>> + >>> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U >>> + >>> +struct privcmd_mmapbatch_v2 { >>> + unsigned int num; /* number of pages to populate */ >>> + domid_t dom; /* target domain */ >>> + __u64 addr; /* virtual address */ >>> + const xen_pfn_t __user *arr; /* array of mfns */ >>> + int __user *err; /* array of error codes */ >>> }; >>> >>> /* >>> * @cmd: IOCTL_PRIVCMD_HYPERCALL >>> * @arg: &privcmd_hypercall_t >>> * Return: Value returned from execution of the specified hypercall. >>> + * >>> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2 >>> + * @arg: &struct privcmd_mmapbatch_v2 >>> + * Return: 0 on success (i.e., arg->err contains valid error codes for >>> + * each frame). On an error other than a failed frame remap, -1 is >>> + * returned and errno is set to EINVAL, EFAULT etc. As an exception, >>> + * if the operation was otherwise successful but any frame failed with >>> + * -ENOENT, then -1 is returned and errno is set to ENOENT. >>> */ >>> #define IOCTL_PRIVCMD_HYPERCALL \ >>> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall)) >>> @@ -73,5 +92,7 @@ struct privcmd_mmapbatch { >>> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap)) >>> #define IOCTL_PRIVCMD_MMAPBATCH \ >>> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) >>> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \ >>> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2)) >>> >>> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ >>> -- >>> 1.7.2.5 >>> _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |