[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH V2 3/3] xen-blkback: handle trim request in backend driver



On Thu, Aug 18, 2011 at 05:34:31PM +0800, Li Dongyang wrote:
> Now blkback driver can handle the trim request from guest, we will
> forward the request to phy device if it really has trim support, or we'll
> punch a hole on the image file.
> 
> Signed-off-by: Li Dongyang <lidongyang@xxxxxxxxxx>
> ---
>  drivers/block/xen-blkback/blkback.c |   85 
> +++++++++++++++++++++++++++++------
>  drivers/block/xen-blkback/common.h  |    4 +-
>  drivers/block/xen-blkback/xenbus.c  |   61 +++++++++++++++++++++++++
>  3 files changed, 135 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/block/xen-blkback/blkback.c 
> b/drivers/block/xen-blkback/blkback.c
> index 2330a9a..5acc37a 100644
> --- a/drivers/block/xen-blkback/blkback.c
> +++ b/drivers/block/xen-blkback/blkback.c
> @@ -39,6 +39,9 @@
>  #include <linux/list.h>
>  #include <linux/delay.h>
>  #include <linux/freezer.h>
> +#include <linux/loop.h>
> +#include <linux/falloc.h>
> +#include <linux/fs.h>
>  
>  #include <xen/events.h>
>  #include <xen/page.h>
> @@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
>  
>  static void print_stats(struct xen_blkif *blkif)
>  {
> -     pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d\n",
> +     pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d"
> +              "  |  tr %4d\n",
>                current->comm, blkif->st_oo_req,
> -              blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
> +              blkif->st_rd_req, blkif->st_wr_req,
> +              blkif->st_f_req, blkif->st_tr_req);
>       blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
>       blkif->st_rd_req = 0;
>       blkif->st_wr_req = 0;
>       blkif->st_oo_req = 0;
> +     blkif->st_tr_req = 0;
>  }
>  
>  int xen_blkif_schedule(void *arg)
> @@ -563,6 +569,10 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
>               blkif->st_f_req++;
>               operation = WRITE_FLUSH;
>               break;
> +     case BLKIF_OP_TRIM:
> +             blkif->st_tr_req++;
> +             operation = REQ_DISCARD;
> +             break;
>       case BLKIF_OP_WRITE_BARRIER:
>       default:
>               operation = 0; /* make gcc happy */
> @@ -572,7 +582,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
>  
>       /* Check that the number of segments is sane. */
>       nseg = req->nr_segments;
> -     if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
> +     if (unlikely(nseg == 0 && operation != (WRITE_FLUSH | REQ_DISCARD)) ||
>           unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
>               pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
>                        nseg);
> @@ -627,10 +637,13 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
>        * the hypercall to unmap the grants - that is all done in
>        * xen_blkbk_unmap.
>        */
> -     if (xen_blkbk_map(req, pending_req, seg))
> +     if (operation != BLKIF_OP_TRIM && xen_blkbk_map(req, pending_req, seg))
>               goto fail_flush;
>  
> -     /* This corresponding xen_blkif_put is done in __end_block_io_op */
> +     /*
> +      * This corresponding xen_blkif_put is done in __end_block_io_op, or
> +      * below if we are handling a BLKIF_OP_TRIM.
> +      */
>       xen_blkif_get(blkif);
>  
>       for (i = 0; i < nseg; i++) {
> @@ -654,18 +667,62 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
>               preq.sector_number += seg[i].nsec;
>       }
>  
> -     /* This will be hit if the operation was a flush. */
> +     /* This will be hit if the operation was a flush or trim. */
>       if (!bio) {
> -             BUG_ON(operation != WRITE_FLUSH);
> +             BUG_ON(operation != (WRITE_FLUSH | REQ_DISCARD));
>  
> -             bio = bio_alloc(GFP_KERNEL, 0);
> -             if (unlikely(bio == NULL))
> -                     goto fail_put_bio;
> +             if (operation == WRITE_FLUSH) {
> +                     bio = bio_alloc(GFP_KERNEL, 0);
> +                     if (unlikely(bio == NULL))
> +                             goto fail_put_bio;
>  
> -             biolist[nbio++] = bio;
> -             bio->bi_bdev    = preq.bdev;
> -             bio->bi_private = pending_req;
> -             bio->bi_end_io  = end_block_io_op;
> +                     biolist[nbio++] = bio;
> +                     bio->bi_bdev    = preq.bdev;
> +                     bio->bi_private = pending_req;
> +                     bio->bi_end_io  = end_block_io_op;
> +             } else if (operation == REQ_DISCARD) {
> +                     int err = 0;
> +                     int status = BLKIF_RSP_OKAY;
> +                     struct block_device *bdev = blkif->vbd.bdev;
> +
> +                     preq.nr_sects = req->u.trim.nr_sectors;
> +                     if (blkif->vbd.type & VDISK_PHY_BACKEND)
> +                             /* just forward the trim request */
> +                             err = blkdev_issue_discard(bdev,
> +                                             preq.sector_number,
> +                                             preq.nr_sects,
> +                                             GFP_KERNEL, 0);
> +                     else if (blkif->vbd.type & VDISK_FILE_BACKEND) {
> +                             /* punch a hole in the backing file */
> +                             struct loop_device *lo =
> +                                     bdev->bd_disk->private_data;
> +                             struct file *file = lo->lo_backing_file;
> +
> +                             if (file->f_op->fallocate)
> +                                     err = file->f_op->fallocate(file,
> +                                             FALLOC_FL_KEEP_SIZE |
> +                                             FALLOC_FL_PUNCH_HOLE,
> +                                             preq.sector_number << 9,
> +                                             preq.nr_sects << 9);
> +                             else
> +                                     err = -EOPNOTSUPP;
> +                     } else
> +                             status = BLKIF_RSP_EOPNOTSUPP;
> +
> +                     if (err == -EOPNOTSUPP) {
> +                             DPRINTK("blkback: discard op failed, "
> +                                             "not supported\n");

Use pr_debug like the rest of the file does.

> +                             status = BLKIF_RSP_EOPNOTSUPP;
> +                     } else if (err)
> +                             status = BLKIF_RSP_ERROR;
> +
> +                     if (status == BLKIF_RSP_OKAY)
> +                             blkif->st_tr_sect += preq.nr_sects;
> +                     make_response(blkif, req->id, req->operation, status);
> +                     xen_blkif_put(blkif);
> +                     free_req(pending_req);
> +                     return 0;
> +             }

All of this should really be moved to its own function.

>       }
>  
>       /*
> diff --git a/drivers/block/xen-blkback/common.h 
> b/drivers/block/xen-blkback/common.h
> index 9e40b28..1fef727 100644
> --- a/drivers/block/xen-blkback/common.h
> +++ b/drivers/block/xen-blkback/common.h
> @@ -159,8 +159,10 @@ struct xen_blkif {
>       int                     st_wr_req;
>       int                     st_oo_req;
>       int                     st_f_req;
> +     int                     st_tr_req;
>       int                     st_rd_sect;
>       int                     st_wr_sect;
> +     int                     st_tr_sect;
>  
>       wait_queue_head_t       waiting_to_free;
>  
> @@ -182,7 +184,7 @@ struct xen_blkif {
>  
>  struct phys_req {
>       unsigned short          dev;
> -     unsigned short          nr_sects;
> +     blkif_sector_t          nr_sects;
>       struct block_device     *bdev;
>       blkif_sector_t          sector_number;
>  };
> diff --git a/drivers/block/xen-blkback/xenbus.c 
> b/drivers/block/xen-blkback/xenbus.c
> index 3f129b4..05ea8e0 100644
> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -272,16 +272,20 @@ VBD_SHOW(oo_req,  "%d\n", be->blkif->st_oo_req);
>  VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
>  VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
>  VBD_SHOW(f_req,  "%d\n", be->blkif->st_f_req);
> +VBD_SHOW(tr_req, "%d\n", be->blkif->st_tr_req);
>  VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
>  VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
> +VBD_SHOW(tr_sect, "%d\n", be->blkif->st_tr_sect);
>  
>  static struct attribute *xen_vbdstat_attrs[] = {
>       &dev_attr_oo_req.attr,
>       &dev_attr_rd_req.attr,
>       &dev_attr_wr_req.attr,
>       &dev_attr_f_req.attr,
> +     &dev_attr_tr_req.attr,
>       &dev_attr_rd_sect.attr,
>       &dev_attr_wr_sect.attr,
> +     &dev_attr_tr_sect.attr,
>       NULL
>  };
>  
> @@ -419,6 +423,59 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction 
> xbt,
>       return err;
>  }
>  
> +int xen_blkbk_trim(struct xenbus_transaction xbt, struct backend_info *be)
> +{
> +     struct xenbus_device *dev = be->dev;
> +     struct xen_vbd *vbd = &be->blkif->vbd;
> +     char *type;
> +     int err;
> +     int state = 0;
> +
> +     type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
> +     if (!IS_ERR(type)) {
> +             if (strcmp(type, "file") == 0)
> +                     state = 1;
> +                     vbd->type |= VDISK_FILE_BACKEND;
> +             if (strcmp(type, "phy") == 0) {

Use 'strncmp' please.

> +                     struct block_device *bdev = be->blkif->vbd.bdev;
> +                     struct request_queue *q = bdev_get_queue(bdev);
> +                     if (blk_queue_discard(q)) {
> +                             err = xenbus_printf(xbt, dev->nodename,
> +                                     "discard_granularity", "%u",

Hm, most of the items written to the Xenbus use '-', not '_'.
Any particular reason for using '_'?

> +                                     q->limits.discard_granularity);
> +                             if (err) {
> +                                     xenbus_dev_fatal(dev, err,
> +                                             "writing discard_granularity");
> +                                     goto kfree;
> +                             }
> +                             err = xenbus_printf(xbt, dev->nodename,
> +                                     "discard_alignment", "%u",

Ditto here.
> +                                     q->limits.discard_alignment);
> +                             if (err) {
> +                                     xenbus_dev_fatal(dev, err,
> +                                             "writing discard_alignment");
> +                                     goto kfree;
> +                             }
> +                             state = 1;
> +                             vbd->type |= VDISK_PHY_BACKEND;
> +                     }
> +             }
> +     } else {
> +             err = PTR_ERR(type);
> +             xenbus_dev_fatal(dev, err, "reading type");
> +             goto out;
> +     }
> +
> +     err = xenbus_printf(xbt, dev->nodename, "feature-trim",
> +                         "%d", state);
> +     if (err)
> +             xenbus_dev_fatal(dev, err, "writing feature-trim");
> +kfree:
> +     kfree(type);
> +out:
> +     return err;
> +}
> +
>  /*
>   * Entry point to this code when a new device is created.  Allocate the basic
>   * structures, and watch the store waiting for the hotplug scripts to tell us
> @@ -650,6 +707,10 @@ again:
>       if (err)
>               goto abort;
>  
> +     err = xen_blkbk_trim(xbt, be);
> +     if (err)
> +             goto abort;
> +
>       err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
>                           (unsigned long long)vbd_sz(&be->blkif->vbd));
>       if (err) {
> -- 
> 1.7.6
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.