[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] Re: <missing subject #3>
On Thu, Aug 18, 2011 at 6:33 PM, Jan Beulich <JBeulich@xxxxxxxxxx> wrote: > >>> On 18.08.11 at 11:35, Li Dongyang <lidongyang@xxxxxxxxxx> wrote: >> JBeulich@xxxxxxxxxx >> Subject: [PATCH V2 3/3] xen-blkback: handle trim request in backend driver >> Date: Thu, 18 Aug 2011 17:34:31 +0800 >> Message-Id: <1313660071-25230-4-git-send-email-lidongyang@xxxxxxxxxx> >> X-Mailer: git-send-email 1.7.6 >> In-Reply-To: <1313660071-25230-1-git-send-email-lidongyang@xxxxxxxxxx> >> References: <1313660071-25230-1-git-send-email-lidongyang@xxxxxxxxxx> >> >> Now blkback driver can handle the trim request from guest, we will >> forward the request to phy device if it really has trim support, or we'll >> punch a hole on the image file. >> >> Signed-off-by: Li Dongyang <lidongyang@xxxxxxxxxx> >> --- >> drivers/block/xen-blkback/blkback.c | 85 >> +++++++++++++++++++++++++++++------ >> drivers/block/xen-blkback/common.h | 4 +- >> drivers/block/xen-blkback/xenbus.c | 61 +++++++++++++++++++++++++ >> 3 files changed, 135 insertions(+), 15 deletions(-) >> >> diff --git a/drivers/block/xen-blkback/blkback.c >> b/drivers/block/xen-blkback/blkback.c >> index 2330a9a..5acc37a 100644 >> --- a/drivers/block/xen-blkback/blkback.c >> +++ b/drivers/block/xen-blkback/blkback.c >> @@ -39,6 +39,9 @@ >> #include <linux/list.h> >> #include <linux/delay.h> >> #include <linux/freezer.h> >> +#include <linux/loop.h> >> +#include <linux/falloc.h> >> +#include <linux/fs.h> >> >> #include <xen/events.h> >> #include <xen/page.h> >> @@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) >> >> static void print_stats(struct xen_blkif *blkif) >> { >> - pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", >> + pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d" >> + " | tr %4d\n", >> current->comm, blkif->st_oo_req, >> - blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); >> + blkif->st_rd_req, blkif->st_wr_req, >> + blkif->st_f_req, blkif->st_tr_req); >> blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); >> blkif->st_rd_req = 0; >> blkif->st_wr_req = 0; >> blkif->st_oo_req = 0; >> + blkif->st_tr_req = 0; >> } >> >> int xen_blkif_schedule(void *arg) >> @@ -563,6 +569,10 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, >> blkif->st_f_req++; >> operation = WRITE_FLUSH; >> break; >> + case BLKIF_OP_TRIM: >> + blkif->st_tr_req++; >> + operation = REQ_DISCARD; >> + break; >> case BLKIF_OP_WRITE_BARRIER: >> default: >> operation = 0; /* make gcc happy */ >> @@ -572,7 +582,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, >> >> /* Check that the number of segments is sane. */ >> nseg = req->nr_segments; >> - if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || >> + if (unlikely(nseg == 0 && operation != (WRITE_FLUSH | REQ_DISCARD)) || > > This will match neither WRITE_FLUSH nor REQ_DISCARD. sorry for the stupid mistake. > >> unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { >> pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", >> nseg); >> @@ -627,10 +637,13 @@ static int dispatch_rw_block_io(struct xen_blkif >> *blkif, >> * the hypercall to unmap the grants - that is all done in >> * xen_blkbk_unmap. >> */ >> - if (xen_blkbk_map(req, pending_req, seg)) >> + if (operation != BLKIF_OP_TRIM && xen_blkbk_map(req, pending_req, seg)) >> goto fail_flush; >> >> - /* This corresponding xen_blkif_put is done in __end_block_io_op */ >> + /* >> + * This corresponding xen_blkif_put is done in __end_block_io_op, or >> + * below if we are handling a BLKIF_OP_TRIM. >> + */ >> xen_blkif_get(blkif); >> >> for (i = 0; i < nseg; i++) { >> @@ -654,18 +667,62 @@ static int dispatch_rw_block_io(struct xen_blkif >> *blkif, >> preq.sector_number += seg[i].nsec; >> } >> >> - /* This will be hit if the operation was a flush. */ >> + /* This will be hit if the operation was a flush or trim. */ >> if (!bio) { >> - BUG_ON(operation != WRITE_FLUSH); >> + BUG_ON(operation != (WRITE_FLUSH | REQ_DISCARD)); > > Same here. > >> >> - bio = bio_alloc(GFP_KERNEL, 0); >> - if (unlikely(bio == NULL)) >> - goto fail_put_bio; >> + if (operation == WRITE_FLUSH) { >> + bio = bio_alloc(GFP_KERNEL, 0); >> + if (unlikely(bio == NULL)) >> + goto fail_put_bio; >> >> - biolist[nbio++] = bio; >> - bio->bi_bdev = preq.bdev; >> - bio->bi_private = pending_req; >> - bio->bi_end_io = end_block_io_op; >> + biolist[nbio++] = bio; >> + bio->bi_bdev = preq.bdev; >> + bio->bi_private = pending_req; >> + bio->bi_end_io = end_block_io_op; >> + } else if (operation == REQ_DISCARD) { >> + int err = 0; >> + int status = BLKIF_RSP_OKAY; >> + struct block_device *bdev = blkif->vbd.bdev; >> + >> + preq.nr_sects = req->u.trim.nr_sectors; >> + if (blkif->vbd.type & VDISK_PHY_BACKEND) >> + /* just forward the trim request */ >> + err = blkdev_issue_discard(bdev, >> + preq.sector_number, >> + preq.nr_sects, >> + GFP_KERNEL, 0); >> + else if (blkif->vbd.type & VDISK_FILE_BACKEND) { >> + /* punch a hole in the backing file */ >> + struct loop_device *lo = >> + bdev->bd_disk->private_data; >> + struct file *file = lo->lo_backing_file; >> + >> + if (file->f_op->fallocate) >> + err = file->f_op->fallocate(file, >> + FALLOC_FL_KEEP_SIZE | >> + FALLOC_FL_PUNCH_HOLE, >> + preq.sector_number << 9, >> + preq.nr_sects << 9); >> + else >> + err = -EOPNOTSUPP; >> + } else > > Are you not worried about doing this synchronously, i.e. blocking any > other I/O going on for the device? if the backend is a phy has trim, what we do is forward the trim, and blkdev_issue_trim will alloc a bio and wait to finish, sure it will block I/O, cause trim is a non-queue, no-merge op, and it gonna stall the queue anyway. if the backend is a file, we gonna punch a hole on the file to make the fs release the blocks, thus to make a "hole" inside the file, so the disk usage is reduced. for hole punching, I don't think we can make it async, correct me if am wrong. > >> + status = BLKIF_RSP_EOPNOTSUPP; >> + >> + if (err == -EOPNOTSUPP) { >> + DPRINTK("blkback: discard op failed, " >> + "not supported\n"); >> + status = BLKIF_RSP_EOPNOTSUPP; >> + } else if (err) >> + status = BLKIF_RSP_ERROR; >> + >> + if (status == BLKIF_RSP_OKAY) >> + blkif->st_tr_sect += preq.nr_sects; > > I don't think this is a particularly useful statistic. > >> + make_response(blkif, req->id, req->operation, status); >> + xen_blkif_put(blkif); >> + free_req(pending_req); >> + return 0; >> + } >> } >> >> /* >> diff --git a/drivers/block/xen-blkback/common.h >> b/drivers/block/xen-blkback/common.h >> index 9e40b28..1fef727 100644 >> --- a/drivers/block/xen-blkback/common.h >> +++ b/drivers/block/xen-blkback/common.h >> @@ -159,8 +159,10 @@ struct xen_blkif { >> int st_wr_req; >> int st_oo_req; >> int st_f_req; >> + int st_tr_req; >> int st_rd_sect; >> int st_wr_sect; >> + int st_tr_sect; >> >> wait_queue_head_t waiting_to_free; >> >> @@ -182,7 +184,7 @@ struct xen_blkif { >> >> struct phys_req { >> unsigned short dev; >> - unsigned short nr_sects; >> + blkif_sector_t nr_sects; >> struct block_device *bdev; >> blkif_sector_t sector_number; >> }; >> diff --git a/drivers/block/xen-blkback/xenbus.c >> b/drivers/block/xen-blkback/xenbus.c >> index 3f129b4..05ea8e0 100644 >> --- a/drivers/block/xen-blkback/xenbus.c >> +++ b/drivers/block/xen-blkback/xenbus.c >> @@ -272,16 +272,20 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); >> VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); >> VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); >> VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); >> +VBD_SHOW(tr_req, "%d\n", be->blkif->st_tr_req); >> VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); >> VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); >> +VBD_SHOW(tr_sect, "%d\n", be->blkif->st_tr_sect); >> >> static struct attribute *xen_vbdstat_attrs[] = { >> &dev_attr_oo_req.attr, >> &dev_attr_rd_req.attr, >> &dev_attr_wr_req.attr, >> &dev_attr_f_req.attr, >> + &dev_attr_tr_req.attr, >> &dev_attr_rd_sect.attr, >> &dev_attr_wr_sect.attr, >> + &dev_attr_tr_sect.attr, >> NULL >> }; >> >> @@ -419,6 +423,59 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction >> xbt, >> return err; >> } >> >> +int xen_blkbk_trim(struct xenbus_transaction xbt, struct backend_info *be) >> +{ >> + struct xenbus_device *dev = be->dev; >> + struct xen_vbd *vbd = &be->blkif->vbd; >> + char *type; >> + int err; >> + int state = 0; >> + >> + type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL); >> + if (!IS_ERR(type)) { >> + if (strcmp(type, "file") == 0) >> + state = 1; >> + vbd->type |= VDISK_FILE_BACKEND; > > Missing { and }. > > Jan > >> + if (strcmp(type, "phy") == 0) { >> + struct block_device *bdev = be->blkif->vbd.bdev; >> + struct request_queue *q = bdev_get_queue(bdev); >> + if (blk_queue_discard(q)) { >> + err = xenbus_printf(xbt, dev->nodename, >> + "discard_granularity", "%u", >> + q->limits.discard_granularity); >> + if (err) { >> + xenbus_dev_fatal(dev, err, >> + "writing discard_granularity"); >> + goto kfree; >> + } >> + err = xenbus_printf(xbt, dev->nodename, >> + "discard_alignment", "%u", >> + q->limits.discard_alignment); >> + if (err) { >> + xenbus_dev_fatal(dev, err, >> + "writing discard_alignment"); >> + goto kfree; >> + } >> + state = 1; >> + vbd->type |= VDISK_PHY_BACKEND; >> + } >> + } >> + } else { >> + err = PTR_ERR(type); >> + xenbus_dev_fatal(dev, err, "reading type"); >> + goto out; >> + } >> + >> + err = xenbus_printf(xbt, dev->nodename, "feature-trim", >> + "%d", state); >> + if (err) >> + xenbus_dev_fatal(dev, err, "writing feature-trim"); >> +kfree: >> + kfree(type); >> +out: >> + return err; >> +} >> + >> /* >> * Entry point to this code when a new device is created. Allocate the >> basic >> * structures, and watch the store waiting for the hotplug scripts to tell >> us >> @@ -650,6 +707,10 @@ again: >> if (err) >> goto abort; >> >> + err = xen_blkbk_trim(xbt, be); >> + if (err) >> + goto abort; >> + >> err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", >> (unsigned long long)vbd_sz(&be->blkif->vbd)); >> if (err) { > > > > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |