[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] Re: <missing subject #3>



On Thu, Aug 18, 2011 at 6:33 PM, Jan Beulich <JBeulich@xxxxxxxxxx> wrote:
>        >>> On 18.08.11 at 11:35, Li Dongyang <lidongyang@xxxxxxxxxx> wrote:
>> JBeulich@xxxxxxxxxx
>> Subject: [PATCH V2 3/3] xen-blkback: handle trim request in backend driver
>> Date: Thu, 18 Aug 2011 17:34:31 +0800
>> Message-Id: <1313660071-25230-4-git-send-email-lidongyang@xxxxxxxxxx>
>> X-Mailer: git-send-email 1.7.6
>> In-Reply-To: <1313660071-25230-1-git-send-email-lidongyang@xxxxxxxxxx>
>> References: <1313660071-25230-1-git-send-email-lidongyang@xxxxxxxxxx>
>>
>> Now blkback driver can handle the trim request from guest, we will
>> forward the request to phy device if it really has trim support, or we'll
>> punch a hole on the image file.
>>
>> Signed-off-by: Li Dongyang <lidongyang@xxxxxxxxxx>
>> ---
>>  drivers/block/xen-blkback/blkback.c |   85 
>> +++++++++++++++++++++++++++++------
>>  drivers/block/xen-blkback/common.h  |    4 +-
>>  drivers/block/xen-blkback/xenbus.c  |   61 +++++++++++++++++++++++++
>>  3 files changed, 135 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkback/blkback.c
>> b/drivers/block/xen-blkback/blkback.c
>> index 2330a9a..5acc37a 100644
>> --- a/drivers/block/xen-blkback/blkback.c
>> +++ b/drivers/block/xen-blkback/blkback.c
>> @@ -39,6 +39,9 @@
>>  #include <linux/list.h>
>>  #include <linux/delay.h>
>>  #include <linux/freezer.h>
>> +#include <linux/loop.h>
>> +#include <linux/falloc.h>
>> +#include <linux/fs.h>
>>
>>  #include <xen/events.h>
>>  #include <xen/page.h>
>> @@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
>>
>>  static void print_stats(struct xen_blkif *blkif)
>>  {
>> -     pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d\n",
>> +     pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d"
>> +              "  |  tr %4d\n",
>>                current->comm, blkif->st_oo_req,
>> -              blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
>> +              blkif->st_rd_req, blkif->st_wr_req,
>> +              blkif->st_f_req, blkif->st_tr_req);
>>       blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
>>       blkif->st_rd_req = 0;
>>       blkif->st_wr_req = 0;
>>       blkif->st_oo_req = 0;
>> +     blkif->st_tr_req = 0;
>>  }
>>
>>  int xen_blkif_schedule(void *arg)
>> @@ -563,6 +569,10 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
>>               blkif->st_f_req++;
>>               operation = WRITE_FLUSH;
>>               break;
>> +     case BLKIF_OP_TRIM:
>> +             blkif->st_tr_req++;
>> +             operation = REQ_DISCARD;
>> +             break;
>>       case BLKIF_OP_WRITE_BARRIER:
>>       default:
>>               operation = 0; /* make gcc happy */
>> @@ -572,7 +582,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
>>
>>       /* Check that the number of segments is sane. */
>>       nseg = req->nr_segments;
>> -     if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
>> +     if (unlikely(nseg == 0 && operation != (WRITE_FLUSH | REQ_DISCARD)) ||
>
> This will match neither WRITE_FLUSH nor REQ_DISCARD.
sorry for the stupid mistake.
>
>>           unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
>>               pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
>>                        nseg);
>> @@ -627,10 +637,13 @@ static int dispatch_rw_block_io(struct xen_blkif
>> *blkif,
>>        * the hypercall to unmap the grants - that is all done in
>>        * xen_blkbk_unmap.
>>        */
>> -     if (xen_blkbk_map(req, pending_req, seg))
>> +     if (operation != BLKIF_OP_TRIM && xen_blkbk_map(req, pending_req, seg))
>>               goto fail_flush;
>>
>> -     /* This corresponding xen_blkif_put is done in __end_block_io_op */
>> +     /*
>> +      * This corresponding xen_blkif_put is done in __end_block_io_op, or
>> +      * below if we are handling a BLKIF_OP_TRIM.
>> +      */
>>       xen_blkif_get(blkif);
>>
>>       for (i = 0; i < nseg; i++) {
>> @@ -654,18 +667,62 @@ static int dispatch_rw_block_io(struct xen_blkif
>> *blkif,
>>               preq.sector_number += seg[i].nsec;
>>       }
>>
>> -     /* This will be hit if the operation was a flush. */
>> +     /* This will be hit if the operation was a flush or trim. */
>>       if (!bio) {
>> -             BUG_ON(operation != WRITE_FLUSH);
>> +             BUG_ON(operation != (WRITE_FLUSH | REQ_DISCARD));
>
> Same here.
>
>>
>> -             bio = bio_alloc(GFP_KERNEL, 0);
>> -             if (unlikely(bio == NULL))
>> -                     goto fail_put_bio;
>> +             if (operation == WRITE_FLUSH) {
>> +                     bio = bio_alloc(GFP_KERNEL, 0);
>> +                     if (unlikely(bio == NULL))
>> +                             goto fail_put_bio;
>>
>> -             biolist[nbio++] = bio;
>> -             bio->bi_bdev    = preq.bdev;
>> -             bio->bi_private = pending_req;
>> -             bio->bi_end_io  = end_block_io_op;
>> +                     biolist[nbio++] = bio;
>> +                     bio->bi_bdev    = preq.bdev;
>> +                     bio->bi_private = pending_req;
>> +                     bio->bi_end_io  = end_block_io_op;
>> +             } else if (operation == REQ_DISCARD) {
>> +                     int err = 0;
>> +                     int status = BLKIF_RSP_OKAY;
>> +                     struct block_device *bdev = blkif->vbd.bdev;
>> +
>> +                     preq.nr_sects = req->u.trim.nr_sectors;
>> +                     if (blkif->vbd.type & VDISK_PHY_BACKEND)
>> +                             /* just forward the trim request */
>> +                             err = blkdev_issue_discard(bdev,
>> +                                             preq.sector_number,
>> +                                             preq.nr_sects,
>> +                                             GFP_KERNEL, 0);
>> +                     else if (blkif->vbd.type & VDISK_FILE_BACKEND) {
>> +                             /* punch a hole in the backing file */
>> +                             struct loop_device *lo =
>> +                                     bdev->bd_disk->private_data;
>> +                             struct file *file = lo->lo_backing_file;
>> +
>> +                             if (file->f_op->fallocate)
>> +                                     err = file->f_op->fallocate(file,
>> +                                             FALLOC_FL_KEEP_SIZE |
>> +                                             FALLOC_FL_PUNCH_HOLE,
>> +                                             preq.sector_number << 9,
>> +                                             preq.nr_sects << 9);
>> +                             else
>> +                                     err = -EOPNOTSUPP;
>> +                     } else
>
> Are you not worried about doing this synchronously, i.e. blocking any
> other I/O going on for the device?
if the backend is a phy has trim, what we do is forward the trim,
and blkdev_issue_trim will alloc a bio and wait to finish,
sure it will block I/O, cause trim is a non-queue, no-merge op, and it
gonna stall the queue anyway.
if the backend is a file, we gonna punch a hole on the file to make
the fs release the blocks,
thus to make a "hole" inside the file, so the disk usage is reduced.
for hole punching, I don't think we can
make it async, correct me if am wrong.
>
>> +                             status = BLKIF_RSP_EOPNOTSUPP;
>> +
>> +                     if (err == -EOPNOTSUPP) {
>> +                             DPRINTK("blkback: discard op failed, "
>> +                                             "not supported\n");
>> +                             status = BLKIF_RSP_EOPNOTSUPP;
>> +                     } else if (err)
>> +                             status = BLKIF_RSP_ERROR;
>> +
>> +                     if (status == BLKIF_RSP_OKAY)
>> +                             blkif->st_tr_sect += preq.nr_sects;
>
> I don't think this is a particularly useful statistic.
>
>> +                     make_response(blkif, req->id, req->operation, status);
>> +                     xen_blkif_put(blkif);
>> +                     free_req(pending_req);
>> +                     return 0;
>> +             }
>>       }
>>
>>       /*
>> diff --git a/drivers/block/xen-blkback/common.h
>> b/drivers/block/xen-blkback/common.h
>> index 9e40b28..1fef727 100644
>> --- a/drivers/block/xen-blkback/common.h
>> +++ b/drivers/block/xen-blkback/common.h
>> @@ -159,8 +159,10 @@ struct xen_blkif {
>>       int                     st_wr_req;
>>       int                     st_oo_req;
>>       int                     st_f_req;
>> +     int                     st_tr_req;
>>       int                     st_rd_sect;
>>       int                     st_wr_sect;
>> +     int                     st_tr_sect;
>>
>>       wait_queue_head_t       waiting_to_free;
>>
>> @@ -182,7 +184,7 @@ struct xen_blkif {
>>
>>  struct phys_req {
>>       unsigned short          dev;
>> -     unsigned short          nr_sects;
>> +     blkif_sector_t          nr_sects;
>>       struct block_device     *bdev;
>>       blkif_sector_t          sector_number;
>>  };
>> diff --git a/drivers/block/xen-blkback/xenbus.c
>> b/drivers/block/xen-blkback/xenbus.c
>> index 3f129b4..05ea8e0 100644
>> --- a/drivers/block/xen-blkback/xenbus.c
>> +++ b/drivers/block/xen-blkback/xenbus.c
>> @@ -272,16 +272,20 @@ VBD_SHOW(oo_req,  "%d\n", be->blkif->st_oo_req);
>>  VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
>>  VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
>>  VBD_SHOW(f_req,  "%d\n", be->blkif->st_f_req);
>> +VBD_SHOW(tr_req, "%d\n", be->blkif->st_tr_req);
>>  VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
>>  VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
>> +VBD_SHOW(tr_sect, "%d\n", be->blkif->st_tr_sect);
>>
>>  static struct attribute *xen_vbdstat_attrs[] = {
>>       &dev_attr_oo_req.attr,
>>       &dev_attr_rd_req.attr,
>>       &dev_attr_wr_req.attr,
>>       &dev_attr_f_req.attr,
>> +     &dev_attr_tr_req.attr,
>>       &dev_attr_rd_sect.attr,
>>       &dev_attr_wr_sect.attr,
>> +     &dev_attr_tr_sect.attr,
>>       NULL
>>  };
>>
>> @@ -419,6 +423,59 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction
>> xbt,
>>       return err;
>>  }
>>
>> +int xen_blkbk_trim(struct xenbus_transaction xbt, struct backend_info *be)
>> +{
>> +     struct xenbus_device *dev = be->dev;
>> +     struct xen_vbd *vbd = &be->blkif->vbd;
>> +     char *type;
>> +     int err;
>> +     int state = 0;
>> +
>> +     type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
>> +     if (!IS_ERR(type)) {
>> +             if (strcmp(type, "file") == 0)
>> +                     state = 1;
>> +                     vbd->type |= VDISK_FILE_BACKEND;
>
> Missing { and }.
>
> Jan
>
>> +             if (strcmp(type, "phy") == 0) {
>> +                     struct block_device *bdev = be->blkif->vbd.bdev;
>> +                     struct request_queue *q = bdev_get_queue(bdev);
>> +                     if (blk_queue_discard(q)) {
>> +                             err = xenbus_printf(xbt, dev->nodename,
>> +                                     "discard_granularity", "%u",
>> +                                     q->limits.discard_granularity);
>> +                             if (err) {
>> +                                     xenbus_dev_fatal(dev, err,
>> +                                             "writing discard_granularity");
>> +                                     goto kfree;
>> +                             }
>> +                             err = xenbus_printf(xbt, dev->nodename,
>> +                                     "discard_alignment", "%u",
>> +                                     q->limits.discard_alignment);
>> +                             if (err) {
>> +                                     xenbus_dev_fatal(dev, err,
>> +                                             "writing discard_alignment");
>> +                                     goto kfree;
>> +                             }
>> +                             state = 1;
>> +                             vbd->type |= VDISK_PHY_BACKEND;
>> +                     }
>> +             }
>> +     } else {
>> +             err = PTR_ERR(type);
>> +             xenbus_dev_fatal(dev, err, "reading type");
>> +             goto out;
>> +     }
>> +
>> +     err = xenbus_printf(xbt, dev->nodename, "feature-trim",
>> +                         "%d", state);
>> +     if (err)
>> +             xenbus_dev_fatal(dev, err, "writing feature-trim");
>> +kfree:
>> +     kfree(type);
>> +out:
>> +     return err;
>> +}
>> +
>>  /*
>>   * Entry point to this code when a new device is created.  Allocate the
>> basic
>>   * structures, and watch the store waiting for the hotplug scripts to tell
>> us
>> @@ -650,6 +707,10 @@ again:
>>       if (err)
>>               goto abort;
>>
>> +     err = xen_blkbk_trim(xbt, be);
>> +     if (err)
>> +             goto abort;
>> +
>>       err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
>>                           (unsigned long long)vbd_sz(&be->blkif->vbd));
>>       if (err) {
>
>
>
>

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.