[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH RESEND 3/4] qdisk, hw/block/xen_disk: Perform grant copy instead of grant map.



On Tue, 2016-05-31 at 10:37 +0100, David Vrabel wrote:
> On 31/05/2016 05:44, Paulina Szubarczyk wrote:
> > Grant copy operation is divided into two phases different for
> > 'read' and 'write' operation.
> > 
> > For a 'read' operation the flow is as follow:
> >     1. allocate local buffers for all the segments contained in
> >        a request.
> 
> Allocating buffers page-by-page looks sub-optimal to me.  Why not
> allocate one large buffer for the whole request?
I thought about caching the pages and reuse them if there are more request. 
I did the change in the next patch 4/4. 

> >     2. fill the request io vectors with the buffers' addresses
> >     3. invoke read operation by qemu device
> >     4. in the completition call grant copy
> >     5. free the buffers
> > 
> > Function 'ioreq_read_init' implements 1. and 2. step. It is called
> > instead of 'ioreq_map' in 'ioreq_runio_qemu_aio'. Then the function
> > 'ioreq_runio_qemu_aio' continues withouth changes performing step 3.
> > Steps 4. and 5. are called in the callback function
> > 'qemu_aio_complete'. The ioreq_read' function is implemented for
> > step 4 which calls the new function 'xc_gnttab_copy_grant' presented
> > in the other part of the patch.
> > 
> > For a 'write' operation steps 4. happens before step 2.. First data
> > are copied from calling guest domains and then qemu operates on
> > them.
> > ---
> >  hw/block/xen_disk.c | 185 
> > ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 185 insertions(+)
> > 
> > diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> > index 3b7882e..43cd9c9 100644
> > --- a/hw/block/xen_disk.c
> > +++ b/hw/block/xen_disk.c
> > @@ -284,6 +284,154 @@ err:
> >      return -1;
> >  }
> >  
> > +
> > +static void* get_buffer(void) {
> > +    void *buf;
> > +
> > +    buf = mmap(NULL, 1 << XC_PAGE_SHIFT, PROT_READ | PROT_WRITE, 
> > +               MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> > +
> > +    if (unlikely(buf == MAP_FAILED))
> > +        return NULL;
> > +
> > +    return buf;
> > +}
> > +
> > +static int free_buffer(void* buf) {
> > +    return munmap(buf, 1 << XC_PAGE_SHIFT);
> 
> I would make this void and assert() the munmap is successful since if
> buf is valid the munmap() cannot fail.  This means...
> 
> > +}
> > +
> > +static int free_buffers(void** page, int count) 
> 
> This can be void and...
> 
> > +{
> > +    int i, r = 0;
> > +
> > +    for (i = 0; i < count; i++) { 
> > +        
> > +        if(free_buffer(page[i])) 
> > +            r = 1;
> > +        
> > +        page[i] = NULL;
> > +    }
> > +
> > +    return r;
> > +}
> > +
> > +static int ioreq_write(struct ioreq *ioreq) 
> > +{
> > +    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
> > +    uint16_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> > +    uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> > +    uint32_t offset[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> > +    uint32_t len[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> > +    void *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> > +    int i, count = 0, rc, r;
> > +    /* Copy the data for write operation from guest grant pages addressed 
> > by 
> > +     * domids, refs, offset, len to local buffers.
> > +     * 
> > +     * Bufferes are then mapped to the pending request for further 
> > +     * completition.
> > +     */
> > +
> > +    if (ioreq->v.niov == 0) {
> > +        r = 0; goto out;
> > +    }
> > +
> > +    count = ioreq->v.niov;
> > +    for (i = 0; i < count; i++) {
> > +        domids[i] = ioreq->domids[i];
> > +        refs[i]   = ioreq->refs[i];
> > +        offset[i] = ioreq->req.seg[i].first_sect * ioreq->blkdev->file_blk;
> > +        len[i] = (ioreq->req.seg[i].last_sect - 
> > ioreq->req.seg[i].first_sect + 1) 
> > +                  * ioreq->blkdev->file_blk;
> > +        pages[i]  = get_buffer();
> > +
> > +        if(!pages[i]) {
> > +            xen_be_printf(&ioreq->blkdev->xendev, 0, 
> > +                          "failed to alloc page, errno %d \n", errno);
> > +            r = 1; goto out;
> > +        }
> > +    }
> > +    rc = xc_gnttab_copy_grant(gnt, count, domids, refs, pages, offset, 
> > len, 1);
> > +
> > +    if(rc) {
> > +        xen_be_printf(&ioreq->blkdev->xendev, 0, 
> > +                      "failed to copy data for write %d \n", rc);
> > +
> > +        if(free_buffers(ioreq->page, ioreq->v.niov)) {
> > +            xen_be_printf(&ioreq->blkdev->xendev, 0, 
> > +                          "failed to free page, errno %d \n", errno);
> > +        }
> > +        r = 1; goto out;
> > +    }
> > +
> > +    for (i = 0; i < ioreq->v.niov; i++) {
> > +        ioreq->page[i] = pages[i];
> > +        ioreq->v.iov[i].iov_base += (uintptr_t)pages[i];
> > +    }
> > +
> > +    r = 0;
> > +out:
> > +    return r;
> > +}
> > +
> > +static int ioreq_read_init(struct ioreq *ioreq) 
> > +{
> > +    int i;
> > +
> > +    if (ioreq->v.niov == 0) {
> > +        return 0;
> > +    }
> > +
> > +    for (i = 0; i < ioreq->v.niov; i++) {
> > +        ioreq->page[i] = get_buffer();
> > +        if(!ioreq->page[i]) {
> > +            return -1;
> > +        }
> > +        ioreq->v.iov[i].iov_base += (uintptr_t)ioreq->page[i];
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int ioreq_read(struct ioreq *ioreq) 
> > +{
> > +    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
> > +    uint16_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> > +    uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> > +    uint32_t offset[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> > +    uint32_t len[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> > +    void *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> > +    int i, count = 0, rc;
> > +
> > +    /* Copy the data from local buffers to guest grant pages addressed by 
> > +     * domids, refs, offset on the completition of read operation.
> > +     */
> > +
> > +    if (ioreq->v.niov == 0) {
> > +        return 0;
> > +    }
> > +
> > +    count = ioreq->v.niov;
> > +    for (i = 0; i < count; i++) {
> > +        domids[i] = ioreq->domids[i];
> > +        refs[i]   = ioreq->refs[i];
> > +        offset[i] = ioreq->req.seg[i].first_sect * ioreq->blkdev->file_blk;
> > +        len[i] = (ioreq->req.seg[i].last_sect - 
> > ioreq->req.seg[i].first_sect + 1) 
> > +                 * ioreq->blkdev->file_blk;
> > +        pages[i]   = ioreq->v.iov[i].iov_base;
> > +    }
> 
> You can build the ops for read/write at the same time using the same
> code as the only difference is the direction.
> > +
> > +    rc = xc_gnttab_copy_grant(gnt, count, domids, refs, pages, offset, 
> > len, 0);
> > +    
> > +    if(rc) {
> > +        xen_be_printf(&ioreq->blkdev->xendev, 0, 
> > +                      "failed to copy data to guest %d \n", rc);
> > +        return -1;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
> >  
> >  static void qemu_aio_complete(void *opaque, int ret)
> > @@ -313,6 +461,22 @@ static void qemu_aio_complete(void *opaque, int ret)
> >      }
> >  
> >      ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
> > +    
> > +    switch(ioreq->req.operation) {
> > +    case BLKIF_OP_READ: 
> > +        if(ioreq_read(ioreq)) {
> > +            xen_be_printf(&ioreq->blkdev->xendev, 0, 
> > +                          "failed to copy read data to guest\n");
> 
> You need to report the failure back to the frontend.
> 
> > +        }
> 
> Need a comment here since you're deliberating missing the "break".
> 
> > +    case BLKIF_OP_WRITE:
> > +        if(free_buffers(ioreq->page, ioreq->v.niov)) {
> 
> ...you don't need to consider errors here (see comment on free_buffer()
> above).
Thank you for all the above remarks I will correct the code.

Paulina


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.