[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v3 2/2] qdisk - hw/block/xen_disk: grant copy implementation





On 07/15/2016 06:55 PM, Anthony PERARD wrote:
On Wed, Jun 22, 2016 at 10:38:53AM +0200, Paulina Szubarczyk wrote:
Copy data operated on during request from/to local buffers to/from
the grant references.

Before grant copy operation local buffers must be allocated what is
done by calling ioreq_init_copy_buffers. For the 'read' operation,
first, the qemu device invokes the read operation on local buffers
and on the completion grant copy is called and buffers are freed.
For the 'write' operation grant copy is performed before invoking
write by qemu device.

A new value 'feature_grant_copy' is added to recognize when the
grant copy operation is supported by a guest.
The body of the function 'ioreq_runio_qemu_aio' is moved to
'ioreq_runio_qemu_aio_blk' and in the 'ioreq_runio_qemu_aio' depending
on the support for grant copy according checks, initialization, grant
operation are made, then the 'ioreq_runio_qemu_aio_blk' function is
called.

Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@xxxxxxxxx>

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 37e14d1..4eca06a 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -500,6 +503,99 @@ static int ioreq_map(struct ioreq *ioreq)
      return 0;
  }

+static void* get_buffer(int count)
+{
+    return xc_memalign(xen_xc, XC_PAGE_SIZE, count*XC_PAGE_SIZE);

Instead of xc_memalign, I think you need to call qemu_memalign() here.
Have a look at the file HACKING, the section '3. Low level memory
management'. Also, you probably do not need an the extra function
get_buffer() and can call qemu_memalign() directly in
ioreq_init_copy_buffers().


Ok, I will changed that.

+}
+
+static void free_buffers(struct ioreq *ioreq)
+{
+    int i;
+
+    for (i = 0; i < ioreq->v.niov; i++) {
+        ioreq->page[i] = NULL;
+    }
+
+    free(ioreq->pages);

With the use of qemu_memalign, this would need to be qemu_vfree().

+}
+
+static int ioreq_init_copy_buffers(struct ioreq *ioreq) {
+    int i;
+
+    if (ioreq->v.niov == 0) {
+        return 0;
+    }
+
+    ioreq->pages = get_buffer(ioreq->v.niov);
+    if (!ioreq->pages) {
+        return -1;
+    }
+
+    for (i = 0; i < ioreq->v.niov; i++) {
+        ioreq->page[i] = ioreq->pages + i*XC_PAGE_SIZE;
+        ioreq->v.iov[i].iov_base += (uintptr_t)ioreq->page[i];

Is the += intended here?


I was suggested by ioreq_map assignment to the ioreq->v.iov[i].iov_base which is made that way. But I do not think that makes sense to sum up the pointers. I will change it to =.

+    }
+
+    return 0;
+}
+
+static int ioreq_copy(struct ioreq *ioreq)
+{
+    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
+    xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    int i, count = 0, r, rc;
+    int64_t file_blk = ioreq->blkdev->file_blk;
+
+    if (ioreq->v.niov == 0) {
+        return 0;
+    }
+
+    count = ioreq->v.niov;
+
+    for (i = 0; i < count; i++) {
+
+        if (ioreq->req.operation == BLKIF_OP_READ) {
+            segs[i].flags = GNTCOPY_dest_gref;
+            segs[i].dest.foreign.ref = ioreq->refs[i];
+            segs[i].dest.foreign.domid = ioreq->domids[i];
+            segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * 
file_blk;
+            segs[i].source.virt = ioreq->v.iov[i].iov_base;
+        } else {
+            segs[i].flags = GNTCOPY_source_gref;
+            segs[i].source.foreign.ref = ioreq->refs[i];
+            segs[i].source.foreign.domid = ioreq->domids[i];
+            segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * 
file_blk;
+            segs[i].dest.virt = ioreq->v.iov[i].iov_base;
+        }
+        segs[i].len = (ioreq->req.seg[i].last_sect
+                       - ioreq->req.seg[i].first_sect + 1) * file_blk;
+
+    }
+
+    rc = xengnttab_grant_copy(gnt, count, segs);
+
+    if (rc) {
+        xen_be_printf(&ioreq->blkdev->xendev, 0,
+                      "failed to copy data %d \n", rc);
+        ioreq->aio_errors++;
+        return -1;
+    } else {
+        r = 0;
+    }
+
+    for (i = 0; i < count; i++) {
+        if (segs[i].status != GNTST_okay) {
+            xen_be_printf(&ioreq->blkdev->xendev, 3,
+                          "failed to copy data %d for gref %d, domid %d\n", rc,
+                          ioreq->refs[i], ioreq->domids[i]);
+            ioreq->aio_errors++;
+            r = -1;
+        }
+    }
+
+    return r;
+}
+
  static int ioreq_runio_qemu_aio(struct ioreq *ioreq);

  static void qemu_aio_complete(void *opaque, int ret)
@@ -528,8 +624,31 @@ static void qemu_aio_complete(void *opaque, int ret)
          return;
      }

+    if (ioreq->blkdev->feature_grant_copy) {
+        switch (ioreq->req.operation) {
+        case BLKIF_OP_READ:
+            /* in case of failure ioreq->aio_errors is increased */
+            ioreq_copy(ioreq);
+            free_buffers(ioreq);
+            break;
+        case BLKIF_OP_WRITE:
+        case BLKIF_OP_FLUSH_DISKCACHE:
+            if (!ioreq->req.nr_segments) {
+                break;
+            }
+            free_buffers(ioreq);
+            break;
+        default:
+            break;
+        }
+    }
+
      ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
-    ioreq_unmap(ioreq);
+
+    if (!ioreq->blkdev->feature_grant_copy) {
+        ioreq_unmap(ioreq);
+    }
+
      ioreq_finish(ioreq);
      switch (ioreq->req.operation) {
      case BLKIF_OP_WRITE:
@@ -547,14 +666,42 @@ static void qemu_aio_complete(void *opaque, int ret)
      qemu_bh_schedule(ioreq->blkdev->bh);
  }

+static int ioreq_runio_qemu_aio_blk(struct ioreq *ioreq);
+
  static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
  {
-    struct XenBlkDev *blkdev = ioreq->blkdev;
+    if (ioreq->blkdev->feature_grant_copy) {
+
+        ioreq_init_copy_buffers(ioreq);
+        if (ioreq->req.nr_segments && (ioreq->req.operation == BLKIF_OP_WRITE 
||
+            ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE)) {
+            if (ioreq_copy(ioreq)) {

Would it make sens to do this ioreq_copy() directly in
ioreq_runio_qemu_aio_blk ?

Do you mean moving only the ioreq_copy() or remove that new function?

I divided the old ioreq_runio_qemu_aio function on two parts. It clarifies which feature is chosen. The grant copy and grant map initialization and error handling is different and that way the Xen memory management and an actual processing the request by qemu is separated.


+                free_buffers(ioreq);
+                goto err;
+            }
+        }
+        if (ioreq_runio_qemu_aio_blk(ioreq)) goto err;
+
+    } else {

-    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
-        goto err_no_map;
+        if (ioreq->req.nr_segments && ioreq_map(ioreq)) goto err;
+        if (ioreq_runio_qemu_aio_blk(ioreq)) {
+            ioreq_unmap(ioreq);
+            goto err;
+        }
      }

+    return 0;
+err:
+    ioreq_finish(ioreq);
+    ioreq->status = BLKIF_RSP_ERROR;
+    return -1;
+}
+
+static int ioreq_runio_qemu_aio_blk(struct ioreq *ioreq)
+{
+    struct XenBlkDev *blkdev = ioreq->blkdev;
+
      ioreq->aio_inflight++;
      if (ioreq->presync) {
          blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq);
@@ -594,19 +741,12 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
      }
      default:
          /* unknown operation (shouldn't happen -- parse catches this) */
-        goto err;
+        return -1;
      }

      qemu_aio_complete(ioreq, 0);

      return 0;
-
-err:
-    ioreq_unmap(ioreq);
-err_no_map:
-    ioreq_finish(ioreq);
-    ioreq->status = BLKIF_RSP_ERROR;
-    return -1;
  }

  static int blk_send_response_one(struct ioreq *ioreq)
@@ -1020,10 +1160,17 @@ static int blk_connect(struct XenDevice *xendev)

      xen_be_bind_evtchn(&blkdev->xendev);

+    blkdev->feature_grant_copy =
+                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
+
+    xen_be_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
+                  blkdev->feature_grant_copy ? "enabled" : "disabled");
+
      xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
                    "remote port %d, local port %d\n",
                    blkdev->xendev.protocol, blkdev->ring_ref,
                    blkdev->xendev.remote_port, blkdev->xendev.local_port);
+
      return 0;
  }

Other things, could you rebase your patch on QEMU upstream and CC
qemu-devel@xxxxxxxxxx? You can also check the coding style of the patch
with ./scripts/checkpatch.pl.

Thank you,


Yes, thank you.

Paulina

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.