Xen project Mailing List

Re: [Xen-devel] [PATCH RESEND 3/4] qdisk, hw/block/xen_disk: Perform grant copy instead of grant map.

On 31/05/2016 05:44, Paulina Szubarczyk wrote: > Grant copy operation is divided into two phases different for > 'read' and 'write' operation. > > For a 'read' operation the flow is as follow: > 1. allocate local buffers for all the segments contained in > a request. Allocating buffers page-by-page looks sub-optimal to me. Why not allocate one large buffer for the whole request? > 2. fill the request io vectors with the buffers' addresses > 3. invoke read operation by qemu device > 4. in the completition call grant copy > 5. free the buffers > > Function 'ioreq_read_init' implements 1. and 2. step. It is called > instead of 'ioreq_map' in 'ioreq_runio_qemu_aio'. Then the function > 'ioreq_runio_qemu_aio' continues withouth changes performing step 3. > Steps 4. and 5. are called in the callback function > 'qemu_aio_complete'. The ioreq_read' function is implemented for > step 4 which calls the new function 'xc_gnttab_copy_grant' presented > in the other part of the patch. > > For a 'write' operation steps 4. happens before step 2.. First data > are copied from calling guest domains and then qemu operates on > them. > --- > hw/block/xen_disk.c | 185 > ++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 185 insertions(+) > > diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c > index 3b7882e..43cd9c9 100644 > --- a/hw/block/xen_disk.c > +++ b/hw/block/xen_disk.c > @@ -284,6 +284,154 @@ err: > return -1; > } > > + > +static void* get_buffer(void) { > + void *buf; > + > + buf = mmap(NULL, 1 << XC_PAGE_SHIFT, PROT_READ | PROT_WRITE, > + MAP_SHARED | MAP_ANONYMOUS, -1, 0); > + > + if (unlikely(buf == MAP_FAILED)) > + return NULL; > + > + return buf; > +} > + > +static int free_buffer(void* buf) { > + return munmap(buf, 1 << XC_PAGE_SHIFT); I would make this void and assert() the munmap is successful since if buf is valid the munmap() cannot fail. This means... > +} > + > +static int free_buffers(void** page, int count) This can be void and... > +{ > + int i, r = 0; > + > + for (i = 0; i < count; i++) { > + > + if(free_buffer(page[i])) > + r = 1; > + > + page[i] = NULL; > + } > + > + return r; > +} > + > +static int ioreq_write(struct ioreq *ioreq) > +{ > + XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; > + uint16_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + uint32_t offset[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + uint32_t len[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + void *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + int i, count = 0, rc, r; > + /* Copy the data for write operation from guest grant pages addressed by > + * domids, refs, offset, len to local buffers. > + * > + * Bufferes are then mapped to the pending request for further > + * completition. > + */ > + > + if (ioreq->v.niov == 0) { > + r = 0; goto out; > + } > + > + count = ioreq->v.niov; > + for (i = 0; i < count; i++) { > + domids[i] = ioreq->domids[i]; > + refs[i] = ioreq->refs[i]; > + offset[i] = ioreq->req.seg[i].first_sect * ioreq->blkdev->file_blk; > + len[i] = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect > + 1) > + * ioreq->blkdev->file_blk; > + pages[i] = get_buffer(); > + > + if(!pages[i]) { > + xen_be_printf(&ioreq->blkdev->xendev, 0, > + "failed to alloc page, errno %d \n", errno); > + r = 1; goto out; > + } > + } > + rc = xc_gnttab_copy_grant(gnt, count, domids, refs, pages, offset, len, > 1); > + > + if(rc) { > + xen_be_printf(&ioreq->blkdev->xendev, 0, > + "failed to copy data for write %d \n", rc); > + > + if(free_buffers(ioreq->page, ioreq->v.niov)) { > + xen_be_printf(&ioreq->blkdev->xendev, 0, > + "failed to free page, errno %d \n", errno); > + } > + r = 1; goto out; > + } > + > + for (i = 0; i < ioreq->v.niov; i++) { > + ioreq->page[i] = pages[i]; > + ioreq->v.iov[i].iov_base += (uintptr_t)pages[i]; > + } > + > + r = 0; > +out: > + return r; > +} > + > +static int ioreq_read_init(struct ioreq *ioreq) > +{ > + int i; > + > + if (ioreq->v.niov == 0) { > + return 0; > + } > + > + for (i = 0; i < ioreq->v.niov; i++) { > + ioreq->page[i] = get_buffer(); > + if(!ioreq->page[i]) { > + return -1; > + } > + ioreq->v.iov[i].iov_base += (uintptr_t)ioreq->page[i]; > + } > + > + return 0; > +} > + > +static int ioreq_read(struct ioreq *ioreq) > +{ > + XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; > + uint16_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + uint32_t offset[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + uint32_t len[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + void *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + int i, count = 0, rc; > + > + /* Copy the data from local buffers to guest grant pages addressed by > + * domids, refs, offset on the completition of read operation. > + */ > + > + if (ioreq->v.niov == 0) { > + return 0; > + } > + > + count = ioreq->v.niov; > + for (i = 0; i < count; i++) { > + domids[i] = ioreq->domids[i]; > + refs[i] = ioreq->refs[i]; > + offset[i] = ioreq->req.seg[i].first_sect * ioreq->blkdev->file_blk; > + len[i] = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect > + 1) > + * ioreq->blkdev->file_blk; > + pages[i] = ioreq->v.iov[i].iov_base; > + } You can build the ops for read/write at the same time using the same code as the only difference is the direction. > + > + rc = xc_gnttab_copy_grant(gnt, count, domids, refs, pages, offset, len, > 0); > + > + if(rc) { > + xen_be_printf(&ioreq->blkdev->xendev, 0, > + "failed to copy data to guest %d \n", rc); > + return -1; > + } > + > + return 0; > +} > + > static int ioreq_runio_qemu_aio(struct ioreq *ioreq); > > static void qemu_aio_complete(void *opaque, int ret) > @@ -313,6 +461,22 @@ static void qemu_aio_complete(void *opaque, int ret) > } > > ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY; > + > + switch(ioreq->req.operation) { > + case BLKIF_OP_READ: > + if(ioreq_read(ioreq)) { > + xen_be_printf(&ioreq->blkdev->xendev, 0, > + "failed to copy read data to guest\n"); You need to report the failure back to the frontend. > + } Need a comment here since you're deliberating missing the "break". > + case BLKIF_OP_WRITE: > + if(free_buffers(ioreq->page, ioreq->v.niov)) { ...you don't need to consider errors here (see comment on free_buffer() above). > + xen_be_printf(&ioreq->blkdev->xendev, 0, > + "failed to free page, errno %d \n", errno); > + } > + break; > + default: > + break; > + } > > ioreq_finish(ioreq); > switch (ioreq->req.operation) { > @@ -335,6 +499,27 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq) > { > struct XenBlkDev *blkdev = ioreq->blkdev; > > + switch (ioreq->req.operation) { > + case BLKIF_OP_READ: > + if (ioreq_read_init(ioreq)) { > + xen_be_printf(&ioreq->blkdev->xendev, 0, > + "failed to initialize buffers for" > + "copy data to guest %d \n", errno); > + goto out; > + } > + break; > + case BLKIF_OP_WRITE: > + case BLKIF_OP_FLUSH_DISKCACHE: > + if (ioreq_write(ioreq)) { > + xen_be_printf(&ioreq->blkdev->xendev, 0, > + "failed to write data from guest\n"); > + goto out; > + } > + break; > + default: > + break; > + } > + > ioreq->aio_inflight++; > if (ioreq->presync) { > blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq); > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.