[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] stubdom: add asynchronous disk flush support
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1207430425 -3600 # Node ID 6bf674bd386de8c3e4acd259c566aaf7206d6f11 # Parent e0f9bee70cbfec0a14a3416beaadba9768f18eb6 stubdom: add asynchronous disk flush support Signed-off-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx> --- extras/mini-os/blkfront.c | 188 +++++++++++++++++++------------------- extras/mini-os/include/blkfront.h | 12 ++ extras/mini-os/kernel.c | 41 +++----- tools/ioemu/block-vbd.c | 48 +++++++-- 4 files changed, 163 insertions(+), 126 deletions(-) diff -r e0f9bee70cbf -r 6bf674bd386d extras/mini-os/blkfront.c --- a/extras/mini-os/blkfront.c Sat Apr 05 22:19:02 2008 +0100 +++ b/extras/mini-os/blkfront.c Sat Apr 05 22:20:25 2008 +0100 @@ -48,11 +48,7 @@ struct blkfront_dev { char *nodename; char *backend; - unsigned sector_size; - unsigned sectors; - int mode; - int barrier; - int flush; + struct blkfront_info info; #ifdef HAVE_LIBC int fd; @@ -70,7 +66,7 @@ void blkfront_handler(evtchn_port_t port wake_up(&blkfront_queue); } -struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode, int *info) +struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info) { xenbus_transaction_t xbt; char* err; @@ -163,9 +159,9 @@ done: return NULL; } if (*c == 'w') - *mode = dev->mode = O_RDWR; + dev->info.mode = O_RDWR; else - *mode = dev->mode = O_RDONLY; + dev->info.mode = O_RDONLY; free(c); snprintf(path, sizeof(path), "%s/state", dev->backend); @@ -177,24 +173,26 @@ done: xenbus_unwatch_path(XBT_NIL, path); snprintf(path, sizeof(path), "%s/info", dev->backend); - *info = xenbus_read_integer(path); + dev->info.info = xenbus_read_integer(path); snprintf(path, sizeof(path), "%s/sectors", dev->backend); // FIXME: read_integer returns an int, so disk size limited to 1TB for now - *sectors = dev->sectors = xenbus_read_integer(path); + dev->info.sectors = xenbus_read_integer(path); snprintf(path, sizeof(path), "%s/sector-size", dev->backend); - *sector_size = dev->sector_size = xenbus_read_integer(path); + dev->info.sector_size = xenbus_read_integer(path); snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend); - dev->barrier = xenbus_read_integer(path); + dev->info.barrier = xenbus_read_integer(path); snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend); - dev->flush = xenbus_read_integer(path); + dev->info.flush = xenbus_read_integer(path); + + *info = dev->info; } unmask_evtchn(dev->evtchn); - printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size); + printk("%u sectors of %u bytes\n", dev->info.sectors, dev->info.sector_size); printk("**************************\n"); return dev; @@ -258,11 +256,11 @@ void blkfront_aio(struct blkfront_aiocb uintptr_t start, end; // Can't io at non-sector-aligned location - ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1))); + ASSERT(!(aiocbp->aio_offset & (dev->info.sector_size-1))); // Can't io non-sector-sized amounts - ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1))); + ASSERT(!(aiocbp->aio_nbytes & (dev->info.sector_size-1))); // Can't io non-sector-aligned buffer - ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1))); + ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->info.sector_size-1))); start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK; end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK; @@ -280,7 +278,7 @@ void blkfront_aio(struct blkfront_aiocb req->nr_segments = n; req->handle = dev->handle; req->id = (uintptr_t) aiocbp; - req->sector_number = aiocbp->aio_offset / dev->sector_size; + req->sector_number = aiocbp->aio_offset / dev->info.sector_size; for (j = 0; j < n; j++) { uintptr_t data = start + j * PAGE_SIZE; @@ -292,10 +290,10 @@ void blkfront_aio(struct blkfront_aiocb aiocbp->gref[j] = req->seg[j].gref = gnttab_grant_access(dev->dom, virtual_to_mfn(data), write); req->seg[j].first_sect = 0; - req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1; - } - req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->sector_size; - req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size; + req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1; + } + req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->info.sector_size; + req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size; dev->ring.req_prod_pvt = i + 1; @@ -315,67 +313,7 @@ void blkfront_aio_read(struct blkfront_a blkfront_aio(aiocbp, 0); } -int blkfront_aio_poll(struct blkfront_dev *dev) -{ - RING_IDX rp, cons; - struct blkif_response *rsp; - int more; - -moretodo: -#ifdef HAVE_LIBC - files[dev->fd].read = 0; - mb(); /* Make sure to let the handler set read to 1 before we start looking at the ring */ -#endif - - rp = dev->ring.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ - cons = dev->ring.rsp_cons; - - int nr_consumed = 0; - while ((cons != rp)) - { - rsp = RING_GET_RESPONSE(&dev->ring, cons); - nr_consumed++; - - if (rsp->status != BLKIF_RSP_OKAY) - printk("block error %d for op %d\n", rsp->status, rsp->operation); - - switch (rsp->operation) { - case BLKIF_OP_READ: - case BLKIF_OP_WRITE: - { - struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id; - int status = rsp->status; - int j; - - for (j = 0; j < aiocbp->n; j++) - gnttab_end_access(aiocbp->gref[j]); - - dev->ring.rsp_cons = ++cons; - /* Nota: callback frees aiocbp itself */ - aiocbp->aio_cb(aiocbp, status ? -EIO : 0); - if (dev->ring.rsp_cons != cons) - /* We reentered, we must not continue here */ - goto out; - break; - } - default: - printk("unrecognized block operation %d response\n", rsp->operation); - case BLKIF_OP_WRITE_BARRIER: - case BLKIF_OP_FLUSH_DISKCACHE: - dev->ring.rsp_cons = ++cons; - break; - } - } - -out: - RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more); - if (more) goto moretodo; - - return nr_consumed; -} - -static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op) +static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op, uint64_t id) { int i; struct blkif_request *req; @@ -387,8 +325,7 @@ static void blkfront_push_operation(stru req->operation = op; req->nr_segments = 0; req->handle = dev->handle; - /* Not used */ - req->id = 0; + req->id = id; /* Not needed anyway, but the backend will check it */ req->sector_number = 0; dev->ring.req_prod_pvt = i + 1; @@ -397,16 +334,22 @@ static void blkfront_push_operation(stru if (notify) notify_remote_via_evtchn(dev->evtchn); } +void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op) +{ + struct blkfront_dev *dev = aiocbp->aio_dev; + blkfront_push_operation(dev, op, (uintptr_t) aiocbp); +} + void blkfront_sync(struct blkfront_dev *dev) { unsigned long flags; - if (dev->mode == O_RDWR) { - if (dev->barrier == 1) - blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER); - - if (dev->flush == 1) - blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE); + if (dev->info.mode == O_RDWR) { + if (dev->info.barrier == 1) + blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER, 0); + + if (dev->info.flush == 1) + blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE, 0); } /* Note: This won't finish if another thread enqueues requests. */ @@ -426,6 +369,69 @@ void blkfront_sync(struct blkfront_dev * local_irq_restore(flags); } +int blkfront_aio_poll(struct blkfront_dev *dev) +{ + RING_IDX rp, cons; + struct blkif_response *rsp; + int more; + +moretodo: +#ifdef HAVE_LIBC + files[dev->fd].read = 0; + mb(); /* Make sure to let the handler set read to 1 before we start looking at the ring */ +#endif + + rp = dev->ring.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + cons = dev->ring.rsp_cons; + + int nr_consumed = 0; + while ((cons != rp)) + { + rsp = RING_GET_RESPONSE(&dev->ring, cons); + nr_consumed++; + + struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id; + int status = rsp->status; + + if (status != BLKIF_RSP_OKAY) + printk("block error %d for op %d\n", status, rsp->operation); + + switch (rsp->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + { + int j; + + for (j = 0; j < aiocbp->n; j++) + gnttab_end_access(aiocbp->gref[j]); + + break; + } + + case BLKIF_OP_WRITE_BARRIER: + case BLKIF_OP_FLUSH_DISKCACHE: + break; + + default: + printk("unrecognized block operation %d response\n", rsp->operation); + } + + dev->ring.rsp_cons = ++cons; + /* Nota: callback frees aiocbp itself */ + if (aiocbp && aiocbp->aio_cb) + aiocbp->aio_cb(aiocbp, status ? -EIO : 0); + if (dev->ring.rsp_cons != cons) + /* We reentered, we must not continue here */ + break; + } + + RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more); + if (more) goto moretodo; + + return nr_consumed; +} + #ifdef HAVE_LIBC int blkfront_open(struct blkfront_dev *dev) { diff -r e0f9bee70cbf -r 6bf674bd386d extras/mini-os/include/blkfront.h --- a/extras/mini-os/include/blkfront.h Sat Apr 05 22:19:02 2008 +0100 +++ b/extras/mini-os/include/blkfront.h Sat Apr 05 22:20:25 2008 +0100 @@ -15,13 +15,23 @@ struct blkfront_aiocb void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret); }; -struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode, int *info); +struct blkfront_info +{ + uint64_t sectors; + unsigned sector_size; + int mode; + int info; + int barrier; + int flush; +}; +struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info); #ifdef HAVE_LIBC int blkfront_open(struct blkfront_dev *dev); #endif void blkfront_aio(struct blkfront_aiocb *aiocbp, int write); void blkfront_aio_read(struct blkfront_aiocb *aiocbp); void blkfront_aio_write(struct blkfront_aiocb *aiocbp); +void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op); int blkfront_aio_poll(struct blkfront_dev *dev); void blkfront_sync(struct blkfront_dev *dev); void shutdown_blkfront(struct blkfront_dev *dev); diff -r e0f9bee70cbf -r 6bf674bd386d extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Sat Apr 05 22:19:02 2008 +0100 +++ b/extras/mini-os/kernel.c Sat Apr 05 22:20:25 2008 +0100 @@ -91,9 +91,7 @@ static void netfront_thread(void *p) } static struct blkfront_dev *blk_dev; -static uint64_t blk_sectors; -static unsigned blk_sector_size; -static int blk_mode; +static struct blkfront_info blk_info; static uint64_t blk_size_read; static uint64_t blk_size_write; @@ -111,9 +109,9 @@ static struct blk_req *blk_alloc_req(uin { struct blk_req *req = xmalloc(struct blk_req); req->aiocb.aio_dev = blk_dev; - req->aiocb.aio_buf = _xmalloc(blk_sector_size, blk_sector_size); - req->aiocb.aio_nbytes = blk_sector_size; - req->aiocb.aio_offset = sector * blk_sector_size; + req->aiocb.aio_buf = _xmalloc(blk_info.sector_size, blk_info.sector_size); + req->aiocb.aio_nbytes = blk_info.sector_size; + req->aiocb.aio_offset = sector * blk_info.sector_size; req->aiocb.data = req; req->next = NULL; return req; @@ -125,7 +123,7 @@ static void blk_read_completed(struct bl if (ret) printk("got error code %d when reading at offset %ld\n", ret, aiocb->aio_offset); else - blk_size_read += blk_sector_size; + blk_size_read += blk_info.sector_size; free(aiocb->aio_buf); free(req); } @@ -154,10 +152,10 @@ static void blk_write_read_completed(str free(req); return; } - blk_size_read += blk_sector_size; + blk_size_read += blk_info.sector_size; buf = (int*) aiocb->aio_buf; rand_value = req->rand_value; - for (i = 0; i < blk_sector_size / sizeof(int); i++) { + for (i = 0; i < blk_info.sector_size / sizeof(int); i++) { if (buf[i] != rand_value) { printk("bogus data at offset %ld\n", aiocb->aio_offset + i); break; @@ -177,7 +175,7 @@ static void blk_write_completed(struct b free(req); return; } - blk_size_write += blk_sector_size; + blk_size_write += blk_info.sector_size; /* Push write check */ req->next = blk_to_read; blk_to_read = req; @@ -195,7 +193,7 @@ static void blk_write_sector(uint64_t se req->rand_value = rand_value = rand(); buf = (int*) req->aiocb.aio_buf; - for (i = 0; i < blk_sector_size / sizeof(int); i++) { + for (i = 0; i < blk_info.sector_size / sizeof(int); i++) { buf[i] = rand_value; rand_value *= RAND_MIX; } @@ -207,35 +205,34 @@ static void blkfront_thread(void *p) static void blkfront_thread(void *p) { time_t lasttime = 0; - int blk_info; - - blk_dev = init_blkfront(NULL, &blk_sectors, &blk_sector_size, &blk_mode, &blk_info); + + blk_dev = init_blkfront(NULL, &blk_info); if (!blk_dev) return; - if (blk_info & VDISK_CDROM) + if (blk_info.info & VDISK_CDROM) printk("Block device is a CDROM\n"); - if (blk_info & VDISK_REMOVABLE) + if (blk_info.info & VDISK_REMOVABLE) printk("Block device is removable\n"); - if (blk_info & VDISK_READONLY) + if (blk_info.info & VDISK_READONLY) printk("Block device is read-only\n"); #ifdef BLKTEST_WRITE - if (blk_mode == O_RDWR) { + if (blk_info.mode == O_RDWR) { blk_write_sector(0); - blk_write_sector(blk_sectors-1); + blk_write_sector(blk_info.sectors-1); } else #endif { blk_read_sector(0); - blk_read_sector(blk_sectors-1); + blk_read_sector(blk_info.sectors-1); } while (1) { - uint64_t sector = rand() % blk_sectors; + uint64_t sector = rand() % blk_info.sectors; struct timeval tv; #ifdef BLKTEST_WRITE - if (blk_mode == O_RDWR) + if (blk_info.mode == O_RDWR) blk_write_sector(sector); else #endif diff -r e0f9bee70cbf -r 6bf674bd386d tools/ioemu/block-vbd.c --- a/tools/ioemu/block-vbd.c Sat Apr 05 22:19:02 2008 +0100 +++ b/tools/ioemu/block-vbd.c Sat Apr 05 22:20:25 2008 +0100 @@ -49,11 +49,7 @@ typedef struct BDRVVbdState { typedef struct BDRVVbdState { struct blkfront_dev *dev; int fd; - int type; - int mode; - int info; - uint64_t sectors; - unsigned sector_size; + struct blkfront_info info; QEMU_LIST_ENTRY(BDRVVbdState) list; } BDRVVbdState; @@ -81,13 +77,13 @@ static int vbd_open(BlockDriverState *bs //handy to test posix access //return -EIO; - s->dev = init_blkfront((char *) filename, &s->sectors, &s->sector_size, &s->mode, &s->info); + s->dev = init_blkfront((char *) filename, &s->info); if (!s->dev) return -EIO; - if (SECTOR_SIZE % s->sector_size) { - printf("sector size is %d, we only support sector sizes that divide %d\n", s->sector_size, SECTOR_SIZE); + if (SECTOR_SIZE % s->info.sector_size) { + printf("sector size is %d, we only support sector sizes that divide %d\n", s->info.sector_size, SECTOR_SIZE); return -EIO; } @@ -267,6 +263,32 @@ static void vbd_aio_cancel(BlockDriverAI // Try to cancel. If can't, wait for it, drop the callback and call qemu_aio_release(acb) } +static void vbd_nop_cb(void *opaque, int ret) +{ +} + +static BlockDriverAIOCB *vbd_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVVbdState *s = bs->opaque; + VbdAIOCB *acb = NULL; + + if (s->info.barrier == 1) { + acb = vbd_aio_setup(bs, 0, NULL, 0, + s->info.flush == 1 ? vbd_nop_cb : cb, opaque); + if (!acb) + return NULL; + blkfront_aio_push_operation(&acb->aiocb, BLKIF_OP_WRITE_BARRIER); + } + if (s->info.flush == 1) { + acb = vbd_aio_setup(bs, 0, NULL, 0, cb, opaque); + if (!acb) + return NULL; + blkfront_aio_push_operation(&acb->aiocb, BLKIF_OP_FLUSH_DISKCACHE); + } + return &acb->common; +} + static void vbd_close(BlockDriverState *bs) { BDRVVbdState *s = bs->opaque; @@ -282,13 +304,14 @@ static int64_t vbd_getlength(BlockDrive static int64_t vbd_getlength(BlockDriverState *bs) { BDRVVbdState *s = bs->opaque; - return s->sectors * s->sector_size; -} - -static void vbd_flush(BlockDriverState *bs) + return s->info.sectors * s->info.sector_size; +} + +static int vbd_flush(BlockDriverState *bs) { BDRVVbdState *s = bs->opaque; blkfront_sync(s->dev); + return 0; } /***********************************************/ @@ -333,6 +356,7 @@ BlockDriver bdrv_vbd = { .bdrv_aio_read = vbd_aio_read, .bdrv_aio_write = vbd_aio_write, .bdrv_aio_cancel = vbd_aio_cancel, + .bdrv_aio_flush = vbd_aio_flush, .aiocb_size = sizeof(VbdAIOCB), .bdrv_read = vbd_read, .bdrv_write = vbd_write, _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |