[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [TAPDISK] add tapdisk support for image chaining
# HG changeset patch # User Jake Wires <jwires@xxxxxxxxxxxxx> # Date 1171686687 28800 # Node ID 3c827d68fa87fbbe75e218cdaf4ec02f437009ce # Parent 32a0599135914ecb98bd8e1a7c9111cc64ab3ce7 [TAPDISK] add tapdisk support for image chaining Enables tapdisk to chain an arbitrary number of VDIs, propagating reads of holes in children to their parent images. Introduces two new functions to the tapdisk interface to facilitate this. Modifies the QCoW plugin to take advantage of these changes, thus providing support for arbitrarily long chains of QCoW image types. --- tools/blktap/drivers/Makefile | 16 - tools/blktap/drivers/block-aio.c | 132 ++++---- tools/blktap/drivers/block-qcow.c | 563 +++++++++++++++++--------------------- tools/blktap/drivers/block-ram.c | 125 ++++---- tools/blktap/drivers/block-sync.c | 95 +++--- tools/blktap/drivers/block-vmdk.c | 99 +++--- tools/blktap/drivers/img2qcow.c | 28 + tools/blktap/drivers/qcow2raw.c | 75 ++--- tools/blktap/drivers/tapdisk.c | 372 ++++++++++++++++++------- tools/blktap/drivers/tapdisk.h | 62 ++-- tools/blktap/lib/blktaplib.h | 13 tools/blktap/lib/xs_api.c | 8 12 files changed, 889 insertions(+), 699 deletions(-) diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/Makefile --- a/tools/blktap/drivers/Makefile Fri Feb 16 16:34:28 2007 +0000 +++ b/tools/blktap/drivers/Makefile Fri Feb 16 20:31:27 2007 -0800 @@ -5,7 +5,7 @@ INCLUDES += -I.. -I../lib IBIN = blktapctrl tapdisk QCOW_UTIL = img2qcow qcow2raw qcow-create -INST_DIR = /usr/sbin +INST_DIR = /usr/sbin LIBAIO_DIR = ../../libaio/src CFLAGS += -Werror @@ -17,7 +17,7 @@ CFLAGS += -D_GNU_SOURCE # Get gcc to generate the dependencies for us. CFLAGS += -Wp,-MD,.$(@F).d -DEPS = .*.d +DEPS = .*.d THREADLIB := -lpthread -lz LIBS := -L. -L.. -L../lib @@ -29,10 +29,10 @@ LIBS += -L$(XEN_XENSTORE) -lxenstor AIOLIBS := $(LIBAIO_DIR)/libaio.a -BLK-OBJS := block-aio.o -BLK-OBJS += block-sync.o +BLK-OBJS := block-aio.o +BLK-OBJS += block-sync.o BLK-OBJS += block-vmdk.o -BLK-OBJS += block-ram.o +BLK-OBJS += block-ram.o BLK-OBJS += block-qcow.o BLK-OBJS += aes.o @@ -52,13 +52,13 @@ qcow-util: img2qcow qcow2raw qcow-create qcow-util: img2qcow qcow2raw qcow-create img2qcow qcow2raw qcow-create: %: $(BLK-OBJS) - $(CC) $(CFLAGS) -o $* $(BLK-OBJS) $*.c $(AIOLIBS) $(LIBS) + $(CC) $(CFLAGS) -o $* $(BLK-OBJS) $*.c $(AIOLIBS) $(LIBS) install: all - $(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(DESTDIR)$(INST_DIR) + $(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(VHD_UTIL) $(DESTDIR)$(INST_DIR) clean: - rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL) + rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL) $(VHD_UTIL) .PHONY: clean install diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/block-aio.c --- a/tools/blktap/drivers/block-aio.c Fri Feb 16 16:34:28 2007 +0000 +++ b/tools/blktap/drivers/block-aio.c Fri Feb 16 20:31:27 2007 -0800 @@ -58,6 +58,7 @@ struct pending_aio { td_callback_t cb; int id; void *private; + uint64_t lsec; }; struct tdaio_state { @@ -139,12 +140,23 @@ static int get_image_info(struct td_stat return 0; } +static inline void init_fds(struct disk_driver *dd) +{ + int i; + struct tdaio_state *prv = (struct tdaio_state *)dd->private; + + for(i = 0; i < MAX_IOFD; i++) + dd->io_fd[i] = 0; + + dd->io_fd[0] = prv->poll_fd; +} + /* Open the disk file and initialize aio state. */ -int tdaio_open (struct td_state *s, const char *name) +int tdaio_open (struct disk_driver *dd, const char *name) { int i, fd, ret = 0; - struct tdaio_state *prv = (struct tdaio_state *)s->private; - s->private = prv; + struct td_state *s = dd->td_state; + struct tdaio_state *prv = (struct tdaio_state *)dd->private; DPRINTF("block-aio open('%s')", name); /* Initialize AIO */ @@ -194,18 +206,21 @@ int tdaio_open (struct td_state *s, cons prv->fd = fd; + init_fds(dd); ret = get_image_info(s, fd); + done: return ret; } -int tdaio_queue_read(struct td_state *s, uint64_t sector, - int nb_sectors, char *buf, td_callback_t cb, - int id, void *private) +int tdaio_queue_read(struct disk_driver *dd, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) { struct iocb *io; struct pending_aio *pio; - struct tdaio_state *prv = (struct tdaio_state *)s->private; + struct td_state *s = dd->td_state; + struct tdaio_state *prv = (struct tdaio_state *)dd->private; int size = nb_sectors * s->sector_size; uint64_t offset = sector * (uint64_t)s->sector_size; long ioidx; @@ -219,22 +234,24 @@ int tdaio_queue_read(struct td_state *s, pio->cb = cb; pio->id = id; pio->private = private; + pio->lsec = sector; io_prep_pread(io, prv->fd, buf, size, offset); io->data = (void *)ioidx; prv->iocb_queue[prv->iocb_queued++] = io; - - return 0; -} - -int tdaio_queue_write(struct td_state *s, uint64_t sector, - int nb_sectors, char *buf, td_callback_t cb, - int id, void *private) + + return 0; +} + +int tdaio_queue_write(struct disk_driver *dd, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) { struct iocb *io; struct pending_aio *pio; - struct tdaio_state *prv = (struct tdaio_state *)s->private; + struct td_state *s = dd->td_state; + struct tdaio_state *prv = (struct tdaio_state *)dd->private; int size = nb_sectors * s->sector_size; uint64_t offset = sector * (uint64_t)s->sector_size; long ioidx; @@ -248,19 +265,20 @@ int tdaio_queue_write(struct td_state *s pio->cb = cb; pio->id = id; pio->private = private; + pio->lsec = sector; io_prep_pwrite(io, prv->fd, buf, size, offset); io->data = (void *)ioidx; prv->iocb_queue[prv->iocb_queued++] = io; - - return 0; -} - -int tdaio_submit(struct td_state *s) + + return 0; +} + +int tdaio_submit(struct disk_driver *dd) { int ret; - struct tdaio_state *prv = (struct tdaio_state *)s->private; + struct tdaio_state *prv = (struct tdaio_state *)dd->private; ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue); @@ -269,38 +287,24 @@ int tdaio_submit(struct td_state *s) /* Success case: */ prv->iocb_queued = 0; - return ret; -} - -int *tdaio_get_fd(struct td_state *s) -{ - struct tdaio_state *prv = (struct tdaio_state *)s->private; - int *fds, i; - - fds = malloc(sizeof(int) * MAX_IOFD); - /*initialise the FD array*/ - for(i=0;i<MAX_IOFD;i++) fds[i] = 0; - - fds[0] = prv->poll_fd; - - return fds; -} - -int tdaio_close(struct td_state *s) -{ - struct tdaio_state *prv = (struct tdaio_state *)s->private; + return 0; +} + +int tdaio_close(struct disk_driver *dd) +{ + struct tdaio_state *prv = (struct tdaio_state *)dd->private; io_destroy(prv->aio_ctx); close(prv->fd); - - return 0; -} - -int tdaio_do_callbacks(struct td_state *s, int sid) + + return 0; +} + +int tdaio_do_callbacks(struct disk_driver *dd, int sid) { int ret, i, rsp = 0; struct io_event *ep; - struct tdaio_state *prv = (struct tdaio_state *)s->private; + struct tdaio_state *prv = (struct tdaio_state *)dd->private; /* Non-blocking test for completed io. */ ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events, @@ -311,22 +315,34 @@ int tdaio_do_callbacks(struct td_state * struct pending_aio *pio; pio = &prv->pending_aio[(long)io->data]; - rsp += pio->cb(s, ep->res == io->u.c.nbytes ? 0 : 1, + rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1, + pio->lsec, io->u.c.nbytes >> 9, pio->id, pio->private); prv->iocb_free[prv->iocb_free_count++] = io; } return rsp; } - + +int tdaio_has_parent(struct disk_driver *dd) +{ + return 0; +} + +int tdaio_get_parent(struct disk_driver *dd, struct disk_driver *parent) +{ + return -EINVAL; +} + struct tap_disk tapdisk_aio = { - "tapdisk_aio", - sizeof(struct tdaio_state), - tdaio_open, - tdaio_queue_read, - tdaio_queue_write, - tdaio_submit, - tdaio_get_fd, - tdaio_close, - tdaio_do_callbacks, + .disk_type = "tapdisk_aio", + .private_data_size = sizeof(struct tdaio_state), + .td_open = tdaio_open, + .td_queue_read = tdaio_queue_read, + .td_queue_write = tdaio_queue_write, + .td_submit = tdaio_submit, + .td_has_parent = tdaio_has_parent, + .td_get_parent = tdaio_get_parent, + .td_close = tdaio_close, + .td_do_callbacks = tdaio_do_callbacks, }; diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/block-qcow.c --- a/tools/blktap/drivers/block-qcow.c Fri Feb 16 16:34:28 2007 +0000 +++ b/tools/blktap/drivers/block-qcow.c Fri Feb 16 20:31:27 2007 -0800 @@ -55,7 +55,6 @@ /******AIO DEFINES******/ #define REQUEST_ASYNC_FD 1 -#define MAX_QCOW_IDS 0xFFFF #define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ) struct pending_aio { @@ -65,7 +64,6 @@ struct pending_aio { int nb_sectors; char *buf; uint64_t sector; - int qcow_idx; }; #define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list) @@ -115,9 +113,9 @@ struct tdqcow_state { struct tdqcow_state { int fd; /*Main Qcow file descriptor */ uint64_t fd_end; /*Store a local record of file length */ - int bfd; /*Backing file descriptor*/ char *name; /*Record of the filename*/ - int poll_pipe[2]; /*dummy fd for polling on */ + uint32_t backing_file_size; + uint64_t backing_file_offset; int encrypted; /*File contents are encrypted or plain*/ int cluster_bits; /*Determines length of cluster as *indicated by file hdr*/ @@ -149,7 +147,6 @@ struct tdqcow_state { AES_KEY aes_decrypt_key; /*AES key*/ /* libaio state */ io_context_t aio_ctx; - int nr_reqs [MAX_QCOW_IDS]; struct iocb iocb_list [MAX_AIO_REQS]; struct iocb *iocb_free [MAX_AIO_REQS]; struct pending_aio pending_aio[MAX_AIO_REQS]; @@ -162,10 +159,11 @@ struct tdqcow_state { static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset); -static int init_aio_state(struct td_state *bs) +static int init_aio_state(struct disk_driver *dd) { int i; - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + struct td_state *bs = dd->td_state; + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; long ioidx; /*Initialize Locking bitmap*/ @@ -202,8 +200,7 @@ static int init_aio_state(struct td_stat for (i=0;i<MAX_AIO_REQS;i++) s->iocb_free[i] = &s->iocb_list[i]; - for (i=0;i<MAX_QCOW_IDS;i++) - s->nr_reqs[i] = 0; + DPRINTF("AIO state initialised\n"); return 0; @@ -238,7 +235,10 @@ static uint32_t gen_cksum(char *ptr, int if(!md) return 0; - if (MD5((unsigned char *)ptr, len, md) != md) return 0; + if (MD5((unsigned char *)ptr, len, md) != md) { + free(md); + return 0; + } memcpy(&ret, md, sizeof(uint32_t)); free(md); @@ -247,26 +247,42 @@ static uint32_t gen_cksum(char *ptr, int static int get_filesize(char *filename, uint64_t *size, struct stat *st) { - int blockfd; + int fd; + QCowHeader header; /*Set to the backing file size*/ + fd = open(filename, O_RDONLY); + if (fd < 0) + return -1; + if (read(fd, &header, sizeof(header)) < sizeof(header)) { + close(fd); + return -1; + } + close(fd); + + be32_to_cpus(&header.magic); + be64_to_cpus(&header.size); + if (header.magic == QCOW_MAGIC) { + *size = header.size >> SECTOR_SHIFT; + return 0; + } + if(S_ISBLK(st->st_mode)) { - blockfd = open(filename, O_RDONLY); - if (blockfd < 0) + fd = open(filename, O_RDONLY); + if (fd < 0) return -1; - if (ioctl(blockfd,BLKGETSIZE,size)!=0) { + if (ioctl(fd,BLKGETSIZE,size)!=0) { printf("Unable to get Block device size\n"); - close(blockfd); + close(fd); return -1; } - close(blockfd); + close(fd); } else *size = (st->st_size >> SECTOR_SHIFT); return 0; } -static int qcow_set_key(struct td_state *bs, const char *key) -{ - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; +static int qcow_set_key(struct tdqcow_state *s, const char *key) +{ uint8_t keybuf[16]; int len, i; @@ -306,10 +322,9 @@ static int qcow_set_key(struct td_state return 0; } -static int async_read(struct tdqcow_state *s, int fd, int size, - uint64_t offset, - char *buf, td_callback_t cb, - int id, uint64_t sector, int qcow_idx, void *private) +static int async_read(struct tdqcow_state *s, int size, + uint64_t offset, char *buf, td_callback_t cb, + int id, uint64_t sector, void *private) { struct iocb *io; struct pending_aio *pio; @@ -325,9 +340,8 @@ static int async_read(struct tdqcow_stat pio->nb_sectors = size/512; pio->buf = buf; pio->sector = sector; - pio->qcow_idx = qcow_idx; - - io_prep_pread(io, fd, buf, size, offset); + + io_prep_pread(io, s->fd, buf, size, offset); io->data = (void *)ioidx; s->iocb_queue[s->iocb_queued++] = io; @@ -335,10 +349,9 @@ static int async_read(struct tdqcow_stat return 1; } -static int async_write(struct tdqcow_state *s, int fd, int size, - uint64_t offset, - char *buf, td_callback_t cb, - int id, uint64_t sector, int qcow_idx, void *private) +static int async_write(struct tdqcow_state *s, int size, + uint64_t offset, char *buf, td_callback_t cb, + int id, uint64_t sector, void *private) { struct iocb *io; struct pending_aio *pio; @@ -354,9 +367,8 @@ static int async_write(struct tdqcow_sta pio->nb_sectors = size/512; pio->buf = buf; pio->sector = sector; - pio->qcow_idx = qcow_idx; - - io_prep_pwrite(io, fd, buf, size, offset); + + io_prep_pwrite(io, s->fd, buf, size, offset); io->data = (void *)ioidx; s->iocb_queue[s->iocb_queued++] = io; @@ -381,17 +393,6 @@ static void aio_unlock(struct tdqcow_sta --s->sector_lock[sector]; return; -} - -/*TODO - Use a freelist*/ -static int get_free_idx(struct tdqcow_state *s) -{ - int i; - - for(i = 0; i < MAX_QCOW_IDS; i++) { - if(s->nr_reqs[i] == 0) return i; - } - return -1; } /* @@ -425,23 +426,23 @@ static int qtruncate(int fd, off_t lengt { int ret, i; int current = 0, rem = 0; - int sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE; + uint64_t sectors; struct stat st; - char buf[DEFAULT_SECTOR_SIZE]; + char *buf; /* If length is greater than the current file len * we synchronously write zeroes to the end of the * file, otherwise we truncate the length down */ - memset(buf, 0x00, DEFAULT_SECTOR_SIZE); ret = fstat(fd, &st); - if (ret == -1) + if (ret == -1) return -1; if (S_ISBLK(st.st_mode)) return 0; - + + sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE; current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE; - rem = st.st_size % DEFAULT_SECTOR_SIZE; + rem = st.st_size % DEFAULT_SECTOR_SIZE; /* If we are extending this file, we write zeros to the end -- * this tries to ensure that the extents allocated wind up being @@ -449,28 +450,40 @@ static int qtruncate(int fd, off_t lengt */ if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) { /*We are extending the file*/ + if ((ret = posix_memalign((void **)&buf, + 512, DEFAULT_SECTOR_SIZE))) { + DPRINTF("posix_memalign failed: %d\n", ret); + return -1; + } + memset(buf, 0x00, DEFAULT_SECTOR_SIZE); if (lseek(fd, 0, SEEK_END)==-1) { - fprintf(stderr, - "Lseek EOF failed (%d), internal error\n", + DPRINTF("Lseek EOF failed (%d), internal error\n", errno); + free(buf); return -1; } if (rem) { ret = write(fd, buf, rem); - if (ret != rem) + if (ret != rem) { + DPRINTF("write failed: ret = %d, err = %s\n", + ret, strerror(errno)); + free(buf); return -1; + } } for (i = current; i < sectors; i++ ) { ret = write(fd, buf, DEFAULT_SECTOR_SIZE); - if (ret != DEFAULT_SECTOR_SIZE) + if (ret != DEFAULT_SECTOR_SIZE) { + DPRINTF("write failed: ret = %d, err = %s\n", + ret, strerror(errno)); + free(buf); return -1; - } - + } + } + free(buf); } else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE)) - if (ftruncate(fd, sectors * DEFAULT_SECTOR_SIZE)==-1) { - fprintf(stderr, - "Ftruncate failed (%d), internal error\n", - errno); + if (ftruncate(fd, (off_t)sectors * DEFAULT_SECTOR_SIZE)==-1) { + DPRINTF("Ftruncate failed (%s)\n", strerror(errno)); return -1; } return 0; @@ -490,12 +503,11 @@ static int qtruncate(int fd, off_t lengt * * return 0 if not allocated. */ -static uint64_t get_cluster_offset(struct td_state *bs, +static uint64_t get_cluster_offset(struct tdqcow_state *s, uint64_t offset, int allocate, int compressed_size, int n_start, int n_end) { - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector; char *tmp_ptr, *tmp_ptr2, *l2_ptr, *l1_ptr; uint64_t l2_offset, *l2_table, cluster_offset, tmp; @@ -550,8 +562,10 @@ static uint64_t get_cluster_offset(struc * entry is written before blocks. */ lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET); - if (write(s->fd, tmp_ptr, 4096) != 4096) + if (write(s->fd, tmp_ptr, 4096) != 4096) { + free(tmp_ptr); return 0; + } free(tmp_ptr); new_l2_table = 1; @@ -716,9 +730,10 @@ found: return cluster_offset; } -static void init_cluster_cache(struct td_state *bs) -{ - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; +static void init_cluster_cache(struct disk_driver *dd) +{ + struct td_state *bs = dd->td_state; + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; uint32_t count = 0; int i, cluster_entries; @@ -727,22 +742,20 @@ static void init_cluster_cache(struct td cluster_entries, s->cluster_size); for (i = 0; i < bs->size; i += cluster_entries) { - if (get_cluster_offset(bs, i << 9, 0, 0, 0, 1)) count++; + if (get_cluster_offset(s, i << 9, 0, 0, 0, 1)) count++; if (count >= L2_CACHE_SIZE) return; } DPRINTF("Finished cluster initialisation, added %d entries\n", count); return; } -static int qcow_is_allocated(struct td_state *bs, int64_t sector_num, +static int qcow_is_allocated(struct tdqcow_state *s, int64_t sector_num, int nb_sectors, int *pnum) { - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; - int index_in_cluster, n; uint64_t cluster_offset; - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); + cluster_offset = get_cluster_offset(s, sector_num << 9, 0, 0, 0, 0); index_in_cluster = sector_num & (s->cluster_sectors - 1); n = s->cluster_sectors - index_in_cluster; if (n > nb_sectors) @@ -800,11 +813,23 @@ static int decompress_cluster(struct tdq return 0; } +static inline void init_fds(struct disk_driver *dd) +{ + int i; + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; + + for(i = 0; i < MAX_IOFD; i++) + dd->io_fd[i] = 0; + + dd->io_fd[0] = s->poll_fd; +} + /* Open the disk file and initialize qcow state. */ -int tdqcow_open (struct td_state *bs, const char *name) +int tdqcow_open (struct disk_driver *dd, const char *name) { int fd, len, i, shift, ret, size, l1_table_size; - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + struct td_state *bs = dd->td_state; + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; char *buf; QCowHeader *header; QCowHeader_ext *exthdr; @@ -812,10 +837,6 @@ int tdqcow_open (struct td_state *bs, co uint64_t final_cluster = 0; DPRINTF("QCOW: Opening %s\n",name); - /* set up a pipe so that we can hand back a poll fd that won't fire.*/ - ret = pipe(s->poll_pipe); - if (ret != 0) - return (0 - errno); fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE); if (fd < 0) { @@ -826,7 +847,7 @@ int tdqcow_open (struct td_state *bs, co s->fd = fd; asprintf(&s->name,"%s", name); - ASSERT(sizeof(header) < 512); + ASSERT(sizeof(QCowHeader) + sizeof(QCowHeader_ext) < 512); ret = posix_memalign((void **)&buf, 512, 512); if (ret != 0) goto fail; @@ -861,7 +882,9 @@ int tdqcow_open (struct td_state *bs, co s->cluster_alloc = s->l2_size; bs->size = header->size / 512; s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1; - + s->backing_file_offset = header->backing_file_offset; + s->backing_file_size = header->backing_file_size; + /* read the level 1 table */ shift = s->cluster_bits + s->l2_bits; s->l1_size = (header->size + (1LL << shift) - 1) >> shift; @@ -887,7 +910,7 @@ int tdqcow_open (struct td_state *bs, co if (read(fd, s->l1_table, l1_table_size) != l1_table_size) goto fail; - for(i = 0;i < s->l1_size; i++) { + for(i = 0; i < s->l1_size; i++) { //be64_to_cpus(&s->l1_table[i]); //DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]); if (s->l1_table[i] > final_cluster) @@ -907,41 +930,15 @@ int tdqcow_open (struct td_state *bs, co if(ret != 0) goto fail; s->cluster_cache_offset = -1; - /* read the backing file name */ - s->bfd = -1; - if (header->backing_file_offset != 0) { - DPRINTF("Reading backing file data\n"); - len = header->backing_file_size; - if (len > 1023) - len = 1023; - - /*TODO - Fix read size for O_DIRECT and use original fd!*/ - fd = open(name, O_RDONLY | O_LARGEFILE); - - lseek(fd, header->backing_file_offset, SEEK_SET); - if (read(fd, bs->backing_file, len) != len) - goto fail; - bs->backing_file[len] = '\0'; - close(fd); - /***********************************/ - - /*Open backing file*/ - fd = open(bs->backing_file, O_RDONLY | O_DIRECT | O_LARGEFILE); - if (fd < 0) { - DPRINTF("Unable to open backing file: %s\n", - bs->backing_file); - goto fail; - } - s->bfd = fd; + if (s->backing_file_offset != 0) s->cluster_alloc = 1; /*Cannot use pre-alloc*/ - } bs->sector_size = 512; bs->info = 0; /*Detect min_cluster_alloc*/ s->min_cluster_alloc = 1; /*Default*/ - if (s->bfd == -1 && (s->l1_table_offset % 4096 == 0) ) { + if (s->backing_file_offset == 0 && s->l1_table_offset % 4096 == 0) { /*We test to see if the xen magic # exists*/ exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader)); be32_to_cpus(&exthdr->xmagic); @@ -962,10 +959,11 @@ int tdqcow_open (struct td_state *bs, co } end_xenhdr: - if (init_aio_state(bs)!=0) { + if (init_aio_state(dd)!=0) { DPRINTF("Unable to initialise AIO state\n"); goto fail; } + init_fds(dd); s->fd_end = (final_cluster == 0 ? (s->l1_table_offset + l1_table_size) : (final_cluster + s->cluster_size)); @@ -981,213 +979,145 @@ fail: return -1; } - int tdqcow_queue_read(struct td_state *bs, uint64_t sector, - int nb_sectors, char *buf, td_callback_t cb, - int id, void *private) -{ - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; - int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0; - uint64_t cluster_offset; +int tdqcow_queue_read(struct disk_driver *dd, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; + int ret = 0, index_in_cluster, n, i, rsp = 0; + uint64_t cluster_offset, sec, nr_secs; + + sec = sector; + nr_secs = nb_sectors; /*Check we can get a lock*/ - for (i = 0; i < nb_sectors; i++) - if (!aio_can_lock(s, sector + i)) { - DPRINTF("AIO_CAN_LOCK failed [%llu]\n", - (long long) sector + i); - return -EBUSY; - } - + for (i = 0; i < nb_sectors; i++) + if (!aio_can_lock(s, sector + i)) + return cb(dd, -EBUSY, sector, nb_sectors, id, private); + /*We store a local record of the request*/ - qcow_idx = get_free_idx(s); while (nb_sectors > 0) { cluster_offset = - get_cluster_offset(bs, sector << 9, 0, 0, 0, 0); + get_cluster_offset(s, sector << 9, 0, 0, 0, 0); index_in_cluster = sector & (s->cluster_sectors - 1); n = s->cluster_sectors - index_in_cluster; if (n > nb_sectors) n = nb_sectors; - if (s->iocb_free_count == 0 || !aio_lock(s, sector)) { - DPRINTF("AIO_LOCK or iocb_free_count (%d) failed" - "[%llu]\n", s->iocb_free_count, - (long long) sector); - return -ENOMEM; - } + if (s->iocb_free_count == 0 || !aio_lock(s, sector)) + return cb(dd, -EBUSY, sector, nb_sectors, id, private); - if (!cluster_offset && (s->bfd > 0)) { - s->nr_reqs[qcow_idx]++; - asubmit += async_read(s, s->bfd, n * 512, sector << 9, - buf, cb, id, sector, - qcow_idx, private); - } else if(!cluster_offset) { - memset(buf, 0, 512 * n); + if(!cluster_offset) { aio_unlock(s, sector); + ret = cb(dd, BLK_NOT_ALLOCATED, + sector, n, id, private); + if (ret == -EBUSY) { + /* mark remainder of request + * as busy and try again later */ + return cb(dd, -EBUSY, sector + n, + nb_sectors - n, id, private); + } else rsp += ret; } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { + aio_unlock(s, sector); if (decompress_cluster(s, cluster_offset) < 0) { - ret = -1; + rsp += cb(dd, -EIO, sector, + nb_sectors, id, private); goto done; } memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n); - } else { - s->nr_reqs[qcow_idx]++; - asubmit += async_read(s, s->fd, n * 512, - (cluster_offset + - index_in_cluster * 512), - buf, cb, id, sector, - qcow_idx, private); + rsp += cb(dd, 0, sector, n, id, private); + } else { + async_read(s, n * 512, + (cluster_offset + index_in_cluster * 512), + buf, cb, id, sector, private); } nb_sectors -= n; sector += n; buf += n * 512; } done: - /*Callback if no async requests outstanding*/ - if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private); - - return 0; -} - - int tdqcow_queue_write(struct td_state *bs, uint64_t sector, - int nb_sectors, char *buf, td_callback_t cb, - int id, void *private) -{ - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; - int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0; - uint64_t cluster_offset; + return rsp; +} + +int tdqcow_queue_write(struct disk_driver *dd, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; + int ret = 0, index_in_cluster, n, i; + uint64_t cluster_offset, sec, nr_secs; + + sec = sector; + nr_secs = nb_sectors; /*Check we can get a lock*/ for (i = 0; i < nb_sectors; i++) - if (!aio_can_lock(s, sector + i)) { - DPRINTF("AIO_CAN_LOCK failed [%llu]\n", - (long long) (sector + i)); - return -EBUSY; - } + if (!aio_can_lock(s, sector + i)) + return cb(dd, -EBUSY, sector, nb_sectors, id, private); /*We store a local record of the request*/ - qcow_idx = get_free_idx(s); while (nb_sectors > 0) { index_in_cluster = sector & (s->cluster_sectors - 1); n = s->cluster_sectors - index_in_cluster; if (n > nb_sectors) n = nb_sectors; - if (s->iocb_free_count == 0 || !aio_lock(s, sector)){ - DPRINTF("AIO_LOCK or iocb_free_count (%d) failed" - "[%llu]\n", s->iocb_free_count, - (long long) sector); - return -ENOMEM; - } - - if (!IS_ZERO(buf,n * 512)) { - - cluster_offset = get_cluster_offset(bs, sector << 9, - 1, 0, - index_in_cluster, - index_in_cluster+n - ); - if (!cluster_offset) { - DPRINTF("Ooops, no write cluster offset!\n"); - ret = -1; - goto done; - } - - if (s->crypt_method) { - encrypt_sectors(s, sector, s->cluster_data, - (unsigned char *)buf, n, 1, - &s->aes_encrypt_key); - s->nr_reqs[qcow_idx]++; - asubmit += async_write(s, s->fd, n * 512, - (cluster_offset + - index_in_cluster*512), - (char *)s->cluster_data, - cb, id, sector, - qcow_idx, private); - } else { - s->nr_reqs[qcow_idx]++; - asubmit += async_write(s, s->fd, n * 512, - (cluster_offset + - index_in_cluster*512), - buf, cb, id, sector, - qcow_idx, private); - } + if (s->iocb_free_count == 0 || !aio_lock(s, sector)) + return cb(dd, -EBUSY, sector, nb_sectors, id, private); + + cluster_offset = get_cluster_offset(s, sector << 9, 1, 0, + index_in_cluster, + index_in_cluster+n); + if (!cluster_offset) { + DPRINTF("Ooops, no write cluster offset!\n"); + return cb(dd, -EIO, sector, nb_sectors, id, private); + } + + if (s->crypt_method) { + encrypt_sectors(s, sector, s->cluster_data, + (unsigned char *)buf, n, 1, + &s->aes_encrypt_key); + async_write(s, n * 512, + (cluster_offset + index_in_cluster*512), + (char *)s->cluster_data, cb, id, sector, + private); } else { - /*Write data contains zeros, but we must check to see - if cluster already allocated*/ - cluster_offset = get_cluster_offset(bs, sector << 9, - 0, 0, - index_in_cluster, - index_in_cluster+n - ); - if(cluster_offset) { - if (s->crypt_method) { - encrypt_sectors(s, sector, - s->cluster_data, - (unsigned char *)buf, - n, 1, - &s->aes_encrypt_key); - s->nr_reqs[qcow_idx]++; - asubmit += async_write(s, s->fd, - n * 512, - (cluster_offset+ - index_in_cluster * 512), - (char *)s->cluster_data, cb, id, sector, - qcow_idx, private); - } else { - s->nr_reqs[qcow_idx]++; - asubmit += async_write(s, s->fd, n*512, - cluster_offset + index_in_cluster * 512, - buf, cb, id, sector, - qcow_idx, private); - } - } - else aio_unlock(s, sector); - } + async_write(s, n * 512, + (cluster_offset + index_in_cluster*512), + buf, cb, id, sector, private); + } + nb_sectors -= n; sector += n; buf += n * 512; } s->cluster_cache_offset = -1; /* disable compressed cache */ -done: - /*Callback if no async requests outstanding*/ - if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private); - return 0; } -int tdqcow_submit(struct td_state *bs) +int tdqcow_submit(struct disk_driver *dd) { int ret; - struct tdqcow_state *prv = (struct tdqcow_state *)bs->private; - - ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue); + struct tdqcow_state *prv = (struct tdqcow_state *)dd->private; + + if (!prv->iocb_queued) + return 0; + + ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue); /* XXX: TODO: Handle error conditions here. */ /* Success case: */ prv->iocb_queued = 0; - return ret; -} - - -int *tdqcow_get_fd(struct td_state *bs) -{ - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; - int *fds, i; - - fds = malloc(sizeof(int) * MAX_IOFD); - /*initialise the FD array*/ - for(i=0;i<MAX_IOFD;i++) fds[i] = 0; - - fds[0] = s->poll_fd; - return fds; -} - -int tdqcow_close(struct td_state *bs) -{ - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; + return 0; +} + +int tdqcow_close(struct disk_driver *dd) +{ + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; uint32_t cksum, out; int fd, offset; @@ -1203,6 +1133,7 @@ int tdqcow_close(struct td_state *bs) close(fd); } + io_destroy(s->aio_ctx); free(s->name); free(s->l1_table); free(s->l2_cache); @@ -1212,11 +1143,11 @@ int tdqcow_close(struct td_state *bs) return 0; } -int tdqcow_do_callbacks(struct td_state *s, int sid) +int tdqcow_do_callbacks(struct disk_driver *dd, int sid) { int ret, i, rsp = 0,*ptr; struct io_event *ep; - struct tdqcow_state *prv = (struct tdqcow_state *)s->private; + struct tdqcow_state *prv = (struct tdqcow_state *)dd->private; if (sid > MAX_IOFD) return 1; @@ -1224,25 +1155,24 @@ int tdqcow_do_callbacks(struct td_state ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events, NULL); - for (ep=prv->aio_events, i = ret; i-->0; ep++) { + for (ep = prv->aio_events, i = ret; i-- > 0; ep++) { struct iocb *io = ep->obj; struct pending_aio *pio; pio = &prv->pending_aio[(long)io->data]; aio_unlock(prv, pio->sector); - if (pio->id >= 0) { - if (prv->crypt_method) - encrypt_sectors(prv, pio->sector, - (unsigned char *)pio->buf, - (unsigned char *)pio->buf, - pio->nb_sectors, 0, - &prv->aes_decrypt_key); - prv->nr_reqs[pio->qcow_idx]--; - if (prv->nr_reqs[pio->qcow_idx] == 0) - rsp += pio->cb(s, ep->res == io->u.c.nbytes ? 0 : 1, pio->id, - pio->private); - } else if (pio->id == -2) free(pio->buf); + + if (prv->crypt_method) + encrypt_sectors(prv, pio->sector, + (unsigned char *)pio->buf, + (unsigned char *)pio->buf, + pio->nb_sectors, 0, + &prv->aes_decrypt_key); + + rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1, + pio->sector, pio->nb_sectors, + pio->id, pio->private); prv->iocb_free[prv->iocb_free_count++] = io; } @@ -1250,7 +1180,7 @@ int tdqcow_do_callbacks(struct td_state } int qcow_create(const char *filename, uint64_t total_size, - const char *backing_file, int sparse) + const char *backing_file, int sparse) { int fd, header_size, backing_filename_len, l1_size, i; int shift, length, adjust, flags = 0, ret = 0; @@ -1391,9 +1321,8 @@ int qcow_create(const char *filename, ui return 0; } -int qcow_make_empty(struct td_state *bs) -{ - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; +int qcow_make_empty(struct tdqcow_state *s) +{ uint32_t l1_length = s->l1_size * sizeof(uint64_t); memset(s->l1_table, 0, l1_length); @@ -1412,19 +1341,16 @@ int qcow_make_empty(struct td_state *bs) return 0; } -int qcow_get_cluster_size(struct td_state *bs) -{ - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; - +int qcow_get_cluster_size(struct tdqcow_state *s) +{ return s->cluster_size; } /* XXX: put compressed sectors first, then all the cluster aligned tables to avoid losing bytes in alignment */ -int qcow_compress_cluster(struct td_state *bs, int64_t sector_num, +int qcow_compress_cluster(struct tdqcow_state *s, int64_t sector_num, const uint8_t *buf) { - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; z_stream strm; int ret, out_len; uint8_t *out_buf; @@ -1463,7 +1389,7 @@ int qcow_compress_cluster(struct td_stat /* could not compress: write normal cluster */ //tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors); } else { - cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, + cluster_offset = get_cluster_offset(s, sector_num << 9, 2, out_len, 0, 0); cluster_offset &= s->cluster_offset_mask; lseek(s->fd, cluster_offset, SEEK_SET); @@ -1477,15 +1403,54 @@ int qcow_compress_cluster(struct td_stat return 0; } +int tdqcow_has_parent(struct disk_driver *dd) +{ + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; + return (s->backing_file_offset ? 1 : 0); +} + +int tdqcow_get_parent(struct disk_driver *cdd, struct disk_driver *pdd) +{ + off_t off; + char *buf, *filename; + int len, secs, ret = -1; + struct tdqcow_state *child = (struct tdqcow_state *)cdd->private; + + if (!child->backing_file_offset) + return -1; + + /* read the backing file name */ + len = child->backing_file_size; + off = child->backing_file_offset - (child->backing_file_offset % 512); + secs = (len + (child->backing_file_offset - off) + 511) >> 9; + + if (posix_memalign((void **)&buf, 512, secs << 9)) + return -1; + + if (lseek(child->fd, off, SEEK_SET) == (off_t)-1) + goto out; + + if (read(child->fd, buf, secs << 9) != secs << 9) + goto out; + filename = buf + (child->backing_file_offset - off); + filename[len] = '\0'; + + /*Open backing file*/ + ret = tdqcow_open(pdd, filename); + out: + free(buf); + return ret; +} + struct tap_disk tapdisk_qcow = { - "tapdisk_qcow", - sizeof(struct tdqcow_state), - tdqcow_open, - tdqcow_queue_read, - tdqcow_queue_write, - tdqcow_submit, - tdqcow_get_fd, - tdqcow_close, - tdqcow_do_callbacks, + .disk_type = "tapdisk_qcow", + .private_data_size = sizeof(struct tdqcow_state), + .td_open = tdqcow_open, + .td_queue_read = tdqcow_queue_read, + .td_queue_write = tdqcow_queue_write, + .td_submit = tdqcow_submit, + .td_has_parent = tdqcow_has_parent, + .td_get_parent = tdqcow_get_parent, + .td_close = tdqcow_close, + .td_do_callbacks = tdqcow_do_callbacks, }; - diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/block-ram.c --- a/tools/blktap/drivers/block-ram.c Fri Feb 16 16:34:28 2007 +0000 +++ b/tools/blktap/drivers/block-ram.c Fri Feb 16 20:31:27 2007 -0800 @@ -123,14 +123,25 @@ static int get_image_info(struct td_stat return 0; } +static inline void init_fds(struct disk_driver *dd) +{ + int i; + struct tdram_state *prv = (struct tdram_state *)dd->private; + + for(i =0 ; i < MAX_IOFD; i++) + dd->io_fd[i] = 0; + + dd->io_fd[0] = prv->poll_pipe[0]; +} + /* Open the disk file and initialize ram state. */ -int tdram_open (struct td_state *s, const char *name) -{ +int tdram_open (struct disk_driver *dd, const char *name) +{ + char *p; + uint64_t size; int i, fd, ret = 0, count = 0; - struct tdram_state *prv = (struct tdram_state *)s->private; - uint64_t size; - char *p; - s->private = prv; + struct td_state *s = dd->td_state; + struct tdram_state *prv = (struct tdram_state *)dd->private; connections++; @@ -209,88 +220,80 @@ int tdram_open (struct td_state *s, cons ret = 0; } + init_fds(dd); done: return ret; } - int tdram_queue_read(struct td_state *s, uint64_t sector, - int nb_sectors, char *buf, td_callback_t cb, - int id, void *private) -{ - struct tdram_state *prv = (struct tdram_state *)s->private; + int tdram_queue_read(struct disk_driver *dd, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct td_state *s = dd->td_state; + struct tdram_state *prv = (struct tdram_state *)dd->private; int size = nb_sectors * s->sector_size; uint64_t offset = sector * (uint64_t)s->sector_size; - int ret; memcpy(buf, img + offset, size); - ret = size; - - cb(s, (ret < 0) ? ret: 0, id, private); - - return ret; -} - - int tdram_queue_write(struct td_state *s, uint64_t sector, - int nb_sectors, char *buf, td_callback_t cb, - int id, void *private) -{ - struct tdram_state *prv = (struct tdram_state *)s->private; + + return cb(dd, 0, sector, nb_sectors, id, private); +} + +int tdram_queue_write(struct disk_driver *dd, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, + int id, void *private) +{ + struct td_state *s = dd->td_state; + struct tdram_state *prv = (struct tdram_state *)dd->private; int size = nb_sectors * s->sector_size; uint64_t offset = sector * (uint64_t)s->sector_size; - int ret; - - /*We assume that write access is controlled at a higher level for multiple disks*/ + + /* We assume that write access is controlled + * at a higher level for multiple disks */ memcpy(img + offset, buf, size); - ret = size; - - cb(s, (ret < 0) ? ret : 0, id, private); - - return ret; + + return cb(dd, 0, sector, nb_sectors, id, private); } -int tdram_submit(struct td_state *s) +int tdram_submit(struct disk_driver *dd) { return 0; } - -int *tdram_get_fd(struct td_state *s) -{ - struct tdram_state *prv = (struct tdram_state *)s->private; - int *fds, i; - - fds = malloc(sizeof(int) * MAX_IOFD); - /*initialise the FD array*/ - for(i=0;i<MAX_IOFD;i++) fds[i] = 0; - - fds[0] = prv->poll_pipe[0]; - return fds; -} - -int tdram_close(struct td_state *s) -{ - struct tdram_state *prv = (struct tdram_state *)s->private; +int tdram_close(struct disk_driver *dd) +{ + struct tdram_state *prv = (struct tdram_state *)dd->private; connections--; return 0; } -int tdram_do_callbacks(struct td_state *s, int sid) +int tdram_do_callbacks(struct disk_driver *dd, int sid) { /* always ask for a kick */ return 1; } +int tdram_has_parent(struct disk_driver *dd) +{ + return 0; +} + +int tdram_get_parent(struct disk_driver *dd, struct disk_driver *parent) +{ + return -EINVAL; +} + struct tap_disk tapdisk_ram = { - "tapdisk_ram", - sizeof(struct tdram_state), - tdram_open, - tdram_queue_read, - tdram_queue_write, - tdram_submit, - tdram_get_fd, - tdram_close, - tdram_do_callbacks, + .disk_type = "tapdisk_ram", + .private_data_size = sizeof(struct tdram_state), + .td_open = tdram_open, + .td_queue_read = tdram_queue_read, + .td_queue_write = tdram_queue_write, + .td_submit = tdram_submit, + .td_has_parent = tdram_has_parent, + .td_get_parent = tdram_get_parent, + .td_close = tdram_close, + .td_do_callbacks = tdram_do_callbacks, }; - diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/block-sync.c --- a/tools/blktap/drivers/block-sync.c Fri Feb 16 16:34:28 2007 +0000 +++ b/tools/blktap/drivers/block-sync.c Fri Feb 16 20:31:27 2007 -0800 @@ -106,12 +106,23 @@ static int get_image_info(struct td_stat return 0; } +static inline void init_fds(struct disk_driver *dd) +{ + int i; + struct tdsync_state *prv = (struct tdsync_state *)dd->private; + + for(i = 0; i < MAX_IOFD; i++) + dd->io_fd[i] = 0; + + dd->io_fd[0] = prv->poll_pipe[0]; +} + /* Open the disk file and initialize aio state. */ -int tdsync_open (struct td_state *s, const char *name) +int tdsync_open (struct disk_driver *dd, const char *name) { int i, fd, ret = 0; - struct tdsync_state *prv = (struct tdsync_state *)s->private; - s->private = prv; + struct td_state *s = dd->td_state; + struct tdsync_state *prv = (struct tdsync_state *)dd->private; /* set up a pipe so that we can hand back a poll fd that won't fire.*/ ret = pipe(prv->poll_pipe); @@ -138,16 +149,18 @@ int tdsync_open (struct td_state *s, con prv->fd = fd; + init_fds(dd); ret = get_image_info(s, fd); done: return ret; } - int tdsync_queue_read(struct td_state *s, uint64_t sector, + int tdsync_queue_read(struct disk_driver *dd, uint64_t sector, int nb_sectors, char *buf, td_callback_t cb, int id, void *private) { - struct tdsync_state *prv = (struct tdsync_state *)s->private; + struct td_state *s = dd->td_state; + struct tdsync_state *prv = (struct tdsync_state *)dd->private; int size = nb_sectors * s->sector_size; uint64_t offset = sector * (uint64_t)s->sector_size; int ret; @@ -162,16 +175,15 @@ done: } } else ret = 0 - errno; - cb(s, (ret < 0) ? ret: 0, id, private); - - return 1; -} - - int tdsync_queue_write(struct td_state *s, uint64_t sector, + return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private); +} + + int tdsync_queue_write(struct disk_driver *dd, uint64_t sector, int nb_sectors, char *buf, td_callback_t cb, int id, void *private) { - struct tdsync_state *prv = (struct tdsync_state *)s->private; + struct td_state *s = dd->td_state; + struct tdsync_state *prv = (struct tdsync_state *)dd->private; int size = nb_sectors * s->sector_size; uint64_t offset = sector * (uint64_t)s->sector_size; int ret = 0; @@ -186,34 +198,17 @@ done: } } else ret = 0 - errno; - cb(s, (ret < 0) ? ret : 0, id, private); - - return 1; + return cb(dd, (ret < 0) ? ret : 0, sector, nb_sectors, id, private); } -int tdsync_submit(struct td_state *s) +int tdsync_submit(struct disk_driver *dd) { return 0; } - -int *tdsync_get_fd(struct td_state *s) -{ - struct tdsync_state *prv = (struct tdsync_state *)s->private; - - int *fds, i; - - fds = malloc(sizeof(int) * MAX_IOFD); - /*initialise the FD array*/ - for(i=0;i<MAX_IOFD;i++) fds[i] = 0; - - fds[0] = prv->poll_pipe[0]; - return fds; -} - -int tdsync_close(struct td_state *s) -{ - struct tdsync_state *prv = (struct tdsync_state *)s->private; +int tdsync_close(struct disk_driver *dd) +{ + struct tdsync_state *prv = (struct tdsync_state *)dd->private; close(prv->fd); close(prv->poll_pipe[0]); @@ -222,21 +217,31 @@ int tdsync_close(struct td_state *s) return 0; } -int tdsync_do_callbacks(struct td_state *s, int sid) +int tdsync_do_callbacks(struct disk_driver *dd, int sid) { /* always ask for a kick */ return 1; } +int tdsync_has_parent(struct disk_driver *dd) +{ + return 0; +} + +int tdsync_get_parent(struct disk_driver *dd, struct disk_driver *parent) +{ + return -EINVAL; +} + struct tap_disk tapdisk_sync = { - "tapdisk_sync", - sizeof(struct tdsync_state), - tdsync_open, - tdsync_queue_read, - tdsync_queue_write, - tdsync_submit, - tdsync_get_fd, - tdsync_close, - tdsync_do_callbacks, + .disk_type = "tapdisk_sync", + .private_data_size = sizeof(struct tdsync_state), + .td_open = tdsync_open, + .td_queue_read = tdsync_queue_read, + .td_queue_write = tdsync_queue_write, + .td_submit = tdsync_submit, + .td_has_parent = tdsync_has_parent, + .td_get_parent = tdsync_get_parent, + .td_close = tdsync_close, + .td_do_callbacks = tdsync_do_callbacks, }; - diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/block-vmdk.c --- a/tools/blktap/drivers/block-vmdk.c Fri Feb 16 16:34:28 2007 +0000 +++ b/tools/blktap/drivers/block-vmdk.c Fri Feb 16 20:31:27 2007 -0800 @@ -107,14 +107,25 @@ struct tdvmdk_state { unsigned int cluster_sectors; }; +static inline void init_fds(struct disk_driver *dd) +{ + int i; + struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private; + + for (i = 0; i < MAX_IOFD; i++) + dd->io_fd[i] = 0; + + dd->io_fd[0] = prv->poll_pipe[0]; +} /* Open the disk file and initialize aio state. */ -static int tdvmdk_open (struct td_state *s, const char *name) +static int tdvmdk_open (struct disk_driver *dd, const char *name) { int ret, fd; int l1_size, i; uint32_t magic; - struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; + struct td_state *s = dd->td_state; + struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private; /* set up a pipe so that we can hand back a poll fd that won't fire.*/ ret = pipe(prv->poll_pipe); @@ -206,6 +217,7 @@ static int tdvmdk_open (struct td_state if (!prv->l2_cache) goto fail; prv->fd = fd; + init_fds(dd); DPRINTF("VMDK File opened successfully\n"); return 0; @@ -218,10 +230,9 @@ fail: return -1; } -static uint64_t get_cluster_offset(struct td_state *s, +static uint64_t get_cluster_offset(struct tdvmdk_state *prv, uint64_t offset, int allocate) { - struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; unsigned int l1_index, l2_offset, l2_index; int min_index, i, j; uint32_t min_count, *l2_table, tmp; @@ -291,16 +302,17 @@ static uint64_t get_cluster_offset(struc return cluster_offset; } -static int tdvmdk_queue_read(struct td_state *s, uint64_t sector, +static int tdvmdk_queue_read(struct disk_driver *dd, uint64_t sector, int nb_sectors, char *buf, td_callback_t cb, int id, void *private) { - struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; + struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private; int index_in_cluster, n; uint64_t cluster_offset; int ret = 0; + while (nb_sectors > 0) { - cluster_offset = get_cluster_offset(s, sector << 9, 0); + cluster_offset = get_cluster_offset(prv, sector << 9, 0); index_in_cluster = sector % prv->cluster_sectors; n = prv->cluster_sectors - index_in_cluster; if (n > nb_sectors) @@ -321,27 +333,24 @@ static int tdvmdk_queue_read(struct td_s buf += n * 512; } done: - cb(s, ret == -1 ? -1 : 0, id, private); - - return 1; -} - -static int tdvmdk_queue_write(struct td_state *s, uint64_t sector, + return cb(dd, ret == -1 ? -1 : 0, sector, nb_sectors, id, private); +} + +static int tdvmdk_queue_write(struct disk_driver *dd, uint64_t sector, int nb_sectors, char *buf, td_callback_t cb, int id, void *private) { - struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; + struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private; int index_in_cluster, n; uint64_t cluster_offset; int ret = 0; - while (nb_sectors > 0) { index_in_cluster = sector & (prv->cluster_sectors - 1); n = prv->cluster_sectors - index_in_cluster; if (n > nb_sectors) n = nb_sectors; - cluster_offset = get_cluster_offset(s, sector << 9, 1); + cluster_offset = get_cluster_offset(prv, sector << 9, 1); if (!cluster_offset) { ret = -1; goto done; @@ -358,33 +367,17 @@ static int tdvmdk_queue_write(struct td buf += n * 512; } done: - cb(s, ret == -1 ? -1 : 0, id, private); - - return 1; + return cb(dd, ret == -1 ? -1 : 0, sector, nb_sectors, id, private); } -static int tdvmdk_submit(struct td_state *s) +static int tdvmdk_submit(struct disk_driver *dd) { return 0; } - -static int *tdvmdk_get_fd(struct td_state *s) -{ - struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; - int *fds, i; - - fds = malloc(sizeof(int) * MAX_IOFD); - /*initialise the FD array*/ - for (i=0;i<MAX_IOFD;i++) fds[i] = 0; - - fds[0] = prv->poll_pipe[0]; - return fds; -} - -static int tdvmdk_close(struct td_state *s) -{ - struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; +static int tdvmdk_close(struct disk_driver *dd) +{ + struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private; safer_free(prv->l1_table); safer_free(prv->l1_backup_table); @@ -395,21 +388,31 @@ static int tdvmdk_close(struct td_state return 0; } -static int tdvmdk_do_callbacks(struct td_state *s, int sid) +static int tdvmdk_do_callbacks(struct disk_driver *dd, int sid) { /* always ask for a kick */ return 1; } +static int tdvmdk_has_parent(struct disk_driver *dd) +{ + return 0; +} + +static int tdvmdk_get_parent(struct disk_driver *dd, struct disk_driver *parent) +{ + return -EINVAL; +} + struct tap_disk tapdisk_vmdk = { - "tapdisk_vmdk", - sizeof(struct tdvmdk_state), - tdvmdk_open, - tdvmdk_queue_read, - tdvmdk_queue_write, - tdvmdk_submit, - tdvmdk_get_fd, - tdvmdk_close, - tdvmdk_do_callbacks, + .disk_type = "tapdisk_vmdk", + .private_data_size = sizeof(struct tdvmdk_state), + .td_open = tdvmdk_open, + .td_queue_read = tdvmdk_queue_read, + .td_queue_write = tdvmdk_queue_write, + .td_submit = tdvmdk_submit, + .td_has_parent = tdvmdk_has_parent, + .td_get_parent = tdvmdk_get_parent, + .td_close = tdvmdk_close, + .td_do_callbacks = tdvmdk_do_callbacks, }; - diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/img2qcow.c --- a/tools/blktap/drivers/img2qcow.c Fri Feb 16 16:34:28 2007 +0000 +++ b/tools/blktap/drivers/img2qcow.c Fri Feb 16 20:31:27 2007 -0800 @@ -147,7 +147,8 @@ static int get_image_info(struct td_stat return 0; } -static int send_responses(struct td_state *s, int res, int idx, void *private) +static int send_responses(struct disk_driver *dd, int res, uint64_t sec, + int nr_secs, int idx, void *private) { if (res < 0) DFPRINTF("AIO FAILURE: res [%d]!\n",res); @@ -159,7 +160,7 @@ static int send_responses(struct td_stat int main(int argc, char *argv[]) { - struct tap_disk *drv; + struct disk_driver dd; struct td_state *s; int ret = -1, fd, len; fd_set readfds; @@ -195,16 +196,17 @@ int main(int argc, char *argv[]) } else DFPRINTF("Qcow file created: size %llu sectors\n", (long long unsigned)s->size); - drv = &tapdisk_qcow; - s->private = malloc(drv->private_data_size); + dd.td_state = s; + dd.drv = &tapdisk_qcow; + dd.private = malloc(dd.drv->private_data_size); /*Open qcow file*/ - if (drv->td_open(s, argv[1])!=0) { + if (dd.drv->td_open(&dd, argv[1])!=0) { DFPRINTF("Unable to open Qcow file [%s]\n",argv[1]); exit(-1); } - io_fd = drv->td_get_fd(s); + io_fd = dd.io_fd; /*Initialise the output string*/ memset(output,0x20,25); @@ -245,9 +247,9 @@ int main(int argc, char *argv[]) len = (len >> 9) << 9; } - ret = drv->td_queue_write(s, i >> 9, - len >> 9, buf, - send_responses, 0, buf); + ret = dd.drv->td_queue_write(&dd, i >> 9, + len >> 9, buf, + send_responses, 0, buf); if (!ret) submit_events++; @@ -261,7 +263,7 @@ int main(int argc, char *argv[]) debug_output(i,s->size << 9); if ((submit_events % 10 == 0) || complete) - drv->td_submit(s); + dd.drv->td_submit(&dd); timeout.tv_usec = 0; } else { @@ -275,14 +277,14 @@ int main(int argc, char *argv[]) ret = select(maxfds + 1, &readfds, (fd_set *) 0, (fd_set *) 0, &timeout); - if (ret > 0) drv->td_do_callbacks(s, 0); + if (ret > 0) dd.drv->td_do_callbacks(&dd, 0); if (complete && (returned_events == submit_events)) running = 0; } memcpy(output+prev+1,"=",1); DFPRINTF("\r%s 100%%\nTRANSFER COMPLETE\n\n", output); - drv->td_close(s); - free(s->private); + dd.drv->td_close(&dd); + free(dd.private); free(s); return 0; diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/qcow2raw.c --- a/tools/blktap/drivers/qcow2raw.c Fri Feb 16 16:34:28 2007 +0000 +++ b/tools/blktap/drivers/qcow2raw.c Fri Feb 16 20:31:27 2007 -0800 @@ -55,8 +55,7 @@ static int returned_read_events = 0, ret static int returned_read_events = 0, returned_write_events = 0; static int submit_events = 0; static uint32_t read_idx = 0, write_idx = 0; -struct tap_disk *drv1, *drv2; -struct td_state *sqcow, *saio; +struct disk_driver ddqcow, ddaio; static uint64_t prev = 0, written = 0; static char output[25]; @@ -100,7 +99,8 @@ static inline void LOCAL_FD_SET(fd_set * return; } -static int send_write_responses(struct td_state *s, int res, int idx, void *private) +static int send_write_responses(struct disk_driver *dd, int res, uint64_t sec, + int nr_secs, int idx, void *private) { if (res < 0) { DFPRINTF("AIO FAILURE: res [%d]!\n",res); @@ -112,12 +112,13 @@ static int send_write_responses(struct t if (complete && (returned_write_events == submit_events)) write_complete = 1; - debug_output(written, s->size << 9); + debug_output(written, dd->td_state->size << 9); free(private); return 0; } -static int send_read_responses(struct td_state *s, int res, int idx, void *private) +static int send_read_responses(struct disk_driver *dd, int res, uint64_t sec, + int nr_secs, int idx, void *private) { int ret; @@ -128,8 +129,8 @@ static int send_read_responses(struct td if (complete && (returned_read_events == submit_events)) read_complete = 1; - ret = drv2->td_queue_write(saio, idx, BLOCK_PROCESSSZ>>9, private, - send_write_responses, idx, private); + ret = ddaio.drv->td_queue_write(&ddaio, idx, BLOCK_PROCESSSZ>>9, private, + send_write_responses, idx, private); if (ret != 0) { DFPRINTF("ERROR in submitting queue write!\n"); return 0; @@ -137,7 +138,7 @@ static int send_read_responses(struct td if ( (complete && returned_read_events == submit_events) || (returned_read_events % 10 == 0) ) { - drv2->td_submit(saio); + ddaio.drv->td_submit(&ddaio); } return 0; @@ -161,20 +162,20 @@ int main(int argc, char *argv[]) exit(-1); } - sqcow = malloc(sizeof(struct td_state)); - saio = malloc(sizeof(struct td_state)); + ddqcow.td_state = malloc(sizeof(struct td_state)); + ddaio.td_state = malloc(sizeof(struct td_state)); /*Open qcow source file*/ - drv1 = &tapdisk_qcow; - sqcow->private = malloc(drv1->private_data_size); - - if (drv1->td_open(sqcow, argv[2])!=0) { + ddqcow.drv = &tapdisk_qcow; + ddqcow.private = malloc(ddqcow.drv->private_data_size); + + if (ddqcow.drv->td_open(&ddqcow, argv[2])!=0) { DFPRINTF("Unable to open Qcow file [%s]\n",argv[2]); exit(-1); } else DFPRINTF("QCOW file opened, size %llu\n", - (long long unsigned)sqcow->size); - - qcowio_fd = drv1->td_get_fd(sqcow); + (long long unsigned)ddqcow.td_state->size); + + qcowio_fd = ddqcow.io_fd; /*Setup aio destination file*/ ret = stat(argv[1],&finfo); @@ -191,12 +192,12 @@ int main(int argc, char *argv[]) argv[1], 0 - errno); exit(-1); } - if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) { + if (ftruncate(fd, (off_t)ddqcow.td_state->size<<9) < 0) { DFPRINTF("Unable to create file " "[%s] of size %llu (errno %d). " "Exiting...\n", argv[1], - (long long unsigned)sqcow->size<<9, + (long long unsigned)ddqcow.td_state->size<<9, 0 - errno); close(fd); exit(-1); @@ -238,43 +239,43 @@ int main(int argc, char *argv[]) close(fd); exit(-1); } - if (size < sqcow->size<<9) { + if (size < ddqcow.td_state->size<<9) { DFPRINTF("ERROR: Not enough space on device " "%s (%lu bytes available, %llu bytes required\n", argv[1], size, - (long long unsigned)sqcow->size<<9); + (long long unsigned)ddqcow.td_state->size<<9); close(fd); exit(-1); } } else { - if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) { + if (ftruncate(fd, (off_t)ddqcow.td_state->size<<9) < 0) { DFPRINTF("Unable to create file " "[%s] of size %llu (errno %d). " "Exiting...\n", argv[1], - (long long unsigned)sqcow->size<<9, + (long long unsigned)ddqcow.td_state->size<<9, 0 - errno); close(fd); exit(-1); } else DFPRINTF("File [%s] truncated to length %llu " "(%llu)\n", argv[1], - (long long unsigned)sqcow->size<<9, - (long long unsigned)sqcow->size); + (long long unsigned)ddqcow.td_state->size<<9, + (long long unsigned)ddqcow.td_state->size); } close(fd); } /*Open aio destination file*/ - drv2 = &tapdisk_aio; - saio->private = malloc(drv2->private_data_size); - - if (drv2->td_open(saio, argv[1])!=0) { + ddaio.drv = &tapdisk_aio; + ddaio.private = malloc(ddaio.drv->private_data_size); + + if (ddaio.drv->td_open(&ddaio, argv[1])!=0) { DFPRINTF("Unable to open Qcow file [%s]\n", argv[1]); exit(-1); } - aio_fd = drv2->td_get_fd(saio); + aio_fd = ddaio.io_fd; /*Initialise the output string*/ memset(output,0x20,25); @@ -298,9 +299,9 @@ int main(int argc, char *argv[]) } /*Attempt to read 4k sized blocks*/ - ret = drv1->td_queue_read(sqcow, i>>9, - BLOCK_PROCESSSZ>>9, buf, - send_read_responses, i>>9, buf); + ret = ddqcow.drv->td_queue_read(&ddqcow, i>>9, + BLOCK_PROCESSSZ>>9, buf, + send_read_responses, i>>9, buf); if (ret < 0) { DFPRINTF("UNABLE TO READ block [%llu]\n", @@ -311,12 +312,12 @@ int main(int argc, char *argv[]) submit_events++; } - if (i >= sqcow->size<<9) { + if (i >= ddqcow.td_state->size<<9) { complete = 1; } if ((submit_events % 10 == 0) || complete) - drv1->td_submit(sqcow); + ddqcow.drv->td_submit(&ddqcow); timeout.tv_usec = 0; } else { @@ -332,9 +333,9 @@ int main(int argc, char *argv[]) if (ret > 0) { if (FD_ISSET(qcowio_fd[0], &readfds)) - drv1->td_do_callbacks(sqcow, 0); + ddqcow.drv->td_do_callbacks(&ddqcow, 0); if (FD_ISSET(aio_fd[0], &readfds)) - drv2->td_do_callbacks(saio, 0); + ddaio.drv->td_do_callbacks(&ddaio, 0); } if (complete && (returned_write_events == submit_events)) running = 0; diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/tapdisk.c --- a/tools/blktap/drivers/tapdisk.c Fri Feb 16 16:34:28 2007 +0000 +++ b/tools/blktap/drivers/tapdisk.c Fri Feb 16 20:31:27 2007 -0800 @@ -48,6 +48,12 @@ int connected_disks = 0; int connected_disks = 0; fd_list_entry_t *fd_start = NULL; +int do_cow_read(struct disk_driver *dd, blkif_request_t *req, + int sidx, uint64_t sector, int nr_secs); + +#define td_for_each_disk(tds, drv) \ + for (drv = tds->disks; drv != NULL; drv = drv->next) + void usage(void) { fprintf(stderr, "blktap-utils: v1.0.0\n"); @@ -78,10 +84,17 @@ static void unmap_disk(struct td_state * static void unmap_disk(struct td_state *s) { tapdev_info_t *info = s->ring_info; - struct tap_disk *drv = s->drv; + struct disk_driver *dd, *tmp; fd_list_entry_t *entry; - drv->td_close(s); + dd = s->disks; + while (dd) { + tmp = dd->next; + dd->drv->td_close(dd); + free(dd->private); + free(dd); + dd = tmp; + } if (info != NULL && info->mem > 0) munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE); @@ -96,7 +109,6 @@ static void unmap_disk(struct td_state * free(s->fd_entry); free(s->blkif); free(s->ring_info); - free(s->private); free(s); return; @@ -113,16 +125,19 @@ static inline int LOCAL_FD_SET(fd_set *r static inline int LOCAL_FD_SET(fd_set *readfds) { fd_list_entry_t *ptr; + struct disk_driver *dd; ptr = fd_start; while (ptr != NULL) { if (ptr->tap_fd) { FD_SET(ptr->tap_fd, readfds); - if (ptr->io_fd[READ]) - FD_SET(ptr->io_fd[READ], readfds); - maxfds = (ptr->io_fd[READ] > maxfds ? - ptr->io_fd[READ]: maxfds); - maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd: maxfds); + td_for_each_disk(ptr->s, dd) { + if (dd->io_fd[READ]) + FD_SET(dd->io_fd[READ], readfds); + maxfds = (dd->io_fd[READ] > maxfds ? + dd->io_fd[READ] : maxfds); + } + maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd : maxfds); } ptr = ptr->next; } @@ -130,8 +145,7 @@ static inline int LOCAL_FD_SET(fd_set *r return 0; } -static inline fd_list_entry_t *add_fd_entry( - int tap_fd, int io_fd[MAX_IOFD], struct td_state *s) +static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s) { fd_list_entry_t **pprev, *entry; int i; @@ -139,12 +153,10 @@ static inline fd_list_entry_t *add_fd_en DPRINTF("Adding fd_list_entry\n"); /*Add to linked list*/ - s->fd_entry = entry = malloc(sizeof(fd_list_entry_t)); + s->fd_entry = entry = malloc(sizeof(fd_list_entry_t)); entry->tap_fd = tap_fd; - for (i = 0; i < MAX_IOFD; i++) - entry->io_fd[i] = io_fd[i]; - entry->s = s; - entry->next = NULL; + entry->s = s; + entry->next = NULL; pprev = &fd_start; while (*pprev != NULL) @@ -171,7 +183,7 @@ static struct tap_disk *get_driver(int d static struct tap_disk *get_driver(int drivertype) { /* blktapctrl has passed us the driver type */ - + return dtypes[drivertype]->drv; } @@ -183,12 +195,34 @@ static struct td_state *state_init(void) s = malloc(sizeof(struct td_state)); blkif = s->blkif = malloc(sizeof(blkif_t)); - s->ring_info = malloc(sizeof(tapdev_info_t)); - - for (i = 0; i < MAX_REQUESTS; i++) - blkif->pending_list[i].count = 0; + s->ring_info = calloc(1, sizeof(tapdev_info_t)); + + for (i = 0; i < MAX_REQUESTS; i++) { + blkif->pending_list[i].secs_pending = 0; + blkif->pending_list[i].submitting = 0; + } return s; +} + +static struct disk_driver *disk_init(struct td_state *s, struct tap_disk *drv) +{ + struct disk_driver *dd; + + dd = calloc(1, sizeof(struct disk_driver)); + if (!dd) + return NULL; + + dd->private = malloc(drv->private_data_size); + if (!dd->private) { + free(dd); + return NULL; + } + + dd->drv = drv; + dd->td_state = s; + + return dd; } static int map_new_dev(struct td_state *s, int minor) @@ -246,6 +280,51 @@ static int map_new_dev(struct td_state * return -1; } +static int open_disk(struct td_state *s, struct disk_driver *dd, char *path) +{ + int err; + struct disk_driver *d = dd; + + err = dd->drv->td_open(dd, path); + if (err) + return err; + + /* load backing files as necessary */ + while (d->drv->td_has_parent(d)) { + struct disk_driver *new; + + new = calloc(1, sizeof(struct disk_driver)); + if (!new) + goto fail; + new->drv = d->drv; + new->td_state = s; + new->private = malloc(new->drv->private_data_size); + if (!new->private) { + free(new); + goto fail; + } + + err = d->drv->td_get_parent(d, new); + if (err) + goto fail; + + d = d->next = new; + } + + return 0; + + fail: + DPRINTF("failed opening disk\n"); + while (dd) { + d = dd->next; + dd->drv->td_close(dd); + free(dd->private); + free(dd); + dd = d; + } + return err; +} + static int read_msg(char *buf) { int length, len, msglen, tap_fd, *io_fd; @@ -255,6 +334,7 @@ static int read_msg(char *buf) msg_newdev_t *msg_dev; msg_pid_t *msg_pid; struct tap_disk *drv; + struct disk_driver *dd; int ret = -1; struct td_state *s = NULL; fd_list_entry_t *entry; @@ -289,20 +369,20 @@ static int read_msg(char *buf) if (s == NULL) goto params_done; - s->drv = drv; - s->private = malloc(drv->private_data_size); - if (s->private == NULL) { + s->disks = dd = disk_init(s, drv); + if (!dd) { free(s); goto params_done; } /*Open file*/ - ret = drv->td_open(s, path); - io_fd = drv->td_get_fd(s); - - entry = add_fd_entry(0, io_fd, s); + ret = open_disk(s, dd, path); + if (ret) + goto params_done; + + entry = add_fd_entry(0, s); entry->cookie = msg->cookie; - DPRINTF("Entered cookie %d\n",entry->cookie); + DPRINTF("Entered cookie %d\n", entry->cookie); memset(buf, 0x00, MSG_SIZE); @@ -323,13 +403,12 @@ static int read_msg(char *buf) free(path); return 1; - - case CTLMSG_NEWDEV: msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t)); s = get_state(msg->cookie); - DPRINTF("Retrieving state, cookie %d.....[%s]\n",msg->cookie, (s == NULL ? "FAIL":"OK")); + DPRINTF("Retrieving state, cookie %d.....[%s]\n", + msg->cookie, (s == NULL ? "FAIL":"OK")); if (s != NULL) { ret = ((map_new_dev(s, msg_dev->devnum) == msg_dev->devnum ? 0: -1)); @@ -397,49 +476,75 @@ static inline void kick_responses(struct } } -void io_done(struct td_state *s, int sid) -{ - struct tap_disk *drv = s->drv; +void io_done(struct disk_driver *dd, int sid) +{ + struct tap_disk *drv = dd->drv; if (!run) return; /*We have received signal to close*/ - if (drv->td_do_callbacks(s, sid) > 0) kick_responses(s); + if (drv->td_do_callbacks(dd, sid) > 0) kick_responses(dd->td_state); return; } -int send_responses(struct td_state *s, int res, int idx, void *private) -{ +static inline uint64_t +segment_start(blkif_request_t *req, int sidx) +{ + int i; + uint64_t start = req->sector_number; + + for (i = 0; i < sidx; i++) + start += (req->seg[i].last_sect - req->seg[i].first_sect + 1); + + return start; +} + +uint64_t sends, responds; +int send_responses(struct disk_driver *dd, int res, + uint64_t sector, int nr_secs, int idx, void *private) +{ + pending_req_t *preq; blkif_request_t *req; int responses_queued = 0; + struct td_state *s = dd->td_state; blkif_t *blkif = s->blkif; - - req = &blkif->pending_list[idx].req; - - if ( (idx > MAX_REQUESTS-1) || - (blkif->pending_list[idx].count == 0) ) + int sidx = (int)private, secs_done = nr_secs; + + if ( (idx > MAX_REQUESTS-1) ) { DPRINTF("invalid index returned(%u)!\n", idx); return 0; } + preq = &blkif->pending_list[idx]; + req = &preq->req; + + if (res == BLK_NOT_ALLOCATED) { + res = do_cow_read(dd, req, sidx, sector, nr_secs); + if (res >= 0) { + secs_done = res; + res = 0; + } else + secs_done = 0; + } + + preq->secs_pending -= secs_done; + + if (res == -EBUSY && preq->submitting) + return -EBUSY; /* propagate -EBUSY back to higher layers */ + if (res) + preq->status = BLKIF_RSP_ERROR; - if (res != 0) { - blkif->pending_list[idx].status = BLKIF_RSP_ERROR; - } - - blkif->pending_list[idx].count--; - - if (blkif->pending_list[idx].count == 0) + if (!preq->submitting && preq->secs_pending == 0) { blkif_request_t tmp; blkif_response_t *rsp; - - tmp = blkif->pending_list[idx].req; + + tmp = preq->req; rsp = (blkif_response_t *)req; rsp->id = tmp.id; rsp->operation = tmp.operation; - rsp->status = blkif->pending_list[idx].status; + rsp->status = preq->status; write_rsp_to_ring(s, rsp); responses_queued++; @@ -447,15 +552,51 @@ int send_responses(struct td_state *s, i return responses_queued; } +int do_cow_read(struct disk_driver *dd, blkif_request_t *req, + int sidx, uint64_t sector, int nr_secs) +{ + char *page; + int ret, early; + uint64_t seg_start, seg_end; + struct td_state *s = dd->td_state; + tapdev_info_t *info = s->ring_info; + struct disk_driver *parent = dd->next; + + seg_start = segment_start(req, sidx); + seg_end = seg_start + req->seg[sidx].last_sect + 1; + + ASSERT(sector >= seg_start && sector + nr_secs <= seg_end); + + page = (char *)MMAP_VADDR(info->vstart, + (unsigned long)req->id, sidx); + page += (req->seg[sidx].first_sect << SECTOR_SHIFT); + page += ((sector - seg_start) << SECTOR_SHIFT); + + if (!parent) { + memset(page, 0, nr_secs << SECTOR_SHIFT); + return nr_secs; + } + + /* reissue request to backing file */ + ret = parent->drv->td_queue_read(parent, sector, nr_secs, + page, send_responses, + req->id, (void *)sidx); + if (ret > 0) + parent->early += ret; + + return ((ret >= 0) ? 0 : ret); +} + static void get_io_request(struct td_state *s) { - RING_IDX rp, rc, j, i, ret; + RING_IDX rp, rc, j, i; blkif_request_t *req; - int idx, nsects; + int idx, nsects, ret; uint64_t sector_nr; char *page; int early = 0; /* count early completions */ - struct tap_disk *drv = s->drv; + struct disk_driver *dd = s->disks; + struct tap_disk *drv = dd->drv; blkif_t *blkif = s->blkif; tapdev_info_t *info = s->ring_info; int page_size = getpagesize(); @@ -466,23 +607,33 @@ static void get_io_request(struct td_sta rmb(); for (j = info->fe_ring.req_cons; j != rp; j++) { - int done = 0; + int done = 0, start_seg = 0; req = NULL; req = RING_GET_REQUEST(&info->fe_ring, j); ++info->fe_ring.req_cons; if (req == NULL) continue; - + idx = req->id; - ASSERT(blkif->pending_list[idx].count == 0); - memcpy(&blkif->pending_list[idx].req, req, sizeof(*req)); - blkif->pending_list[idx].status = BLKIF_RSP_OKAY; - blkif->pending_list[idx].count = req->nr_segments; - - sector_nr = req->sector_number; - - for (i = 0; i < req->nr_segments; i++) { + + if (info->busy.req) { + /* continue where we left off last time */ + ASSERT(info->busy.req == req); + start_seg = info->busy.seg_idx; + sector_nr = segment_start(req, start_seg); + info->busy.seg_idx = 0; + info->busy.req = NULL; + } else { + ASSERT(blkif->pending_list[idx].secs_pending == 0); + memcpy(&blkif->pending_list[idx].req, + req, sizeof(*req)); + blkif->pending_list[idx].status = BLKIF_RSP_OKAY; + blkif->pending_list[idx].submitting = 1; + sector_nr = req->sector_number; + } + + for (i = start_seg; i < req->nr_segments; i++) { nsects = req->seg[i].last_sect - req->seg[i].first_sect + 1; @@ -508,31 +659,37 @@ static void get_io_request(struct td_sta (long long unsigned) sector_nr); continue; } - + + blkif->pending_list[idx].secs_pending += nsects; + switch (req->operation) { case BLKIF_OP_WRITE: - ret = drv->td_queue_write(s, sector_nr, - nsects, page, send_responses, - idx, NULL); - if (ret > 0) early += ret; + ret = drv->td_queue_write(dd, sector_nr, + nsects, page, + send_responses, + idx, (void *)i); + if (ret > 0) dd->early += ret; else if (ret == -EBUSY) { - /* - * TODO: Sector is locked * - * Need to put req back on queue * - */ + /* put req back on queue */ + --info->fe_ring.req_cons; + info->busy.req = req; + info->busy.seg_idx = i; + goto out; } break; case BLKIF_OP_READ: - ret = drv->td_queue_read(s, sector_nr, - nsects, page, send_responses, - idx, NULL); - if (ret > 0) early += ret; + ret = drv->td_queue_read(dd, sector_nr, + nsects, page, + send_responses, + idx, (void *)i); + if (ret > 0) dd->early += ret; else if (ret == -EBUSY) { - /* - * TODO: Sector is locked * - * Need to put req back on queue * - */ + /* put req back on queue */ + --info->fe_ring.req_cons; + info->busy.req = req; + info->busy.seg_idx = i; + goto out; } break; default: @@ -541,14 +698,22 @@ static void get_io_request(struct td_sta } sector_nr += nsects; } - } - + blkif->pending_list[idx].submitting = 0; + /* force write_rsp_to_ring for synchronous case */ + if (blkif->pending_list[idx].secs_pending == 0) + dd->early += send_responses(dd, 0, 0, 0, idx, (void *)0); + } + + out: /*Batch done*/ - drv->td_submit(s); - - if (early > 0) - io_done(s,10); - + td_for_each_disk(s, dd) { + dd->early += dd->drv->td_submit(dd); + if (dd->early > 0) { + io_done(dd, 10); + dd->early = 0; + } + } + return; } @@ -558,10 +723,9 @@ int main(int argc, char *argv[]) char *p, *buf; fd_set readfds, writefds; fd_list_entry_t *ptr; - struct tap_disk *drv; struct td_state *s; char openlogbuf[128]; - + if (argc != 3) usage(); daemonize(); @@ -573,12 +737,12 @@ int main(int argc, char *argv[]) signal (SIGINT, sig_handler); /*Open the control channel*/ - fds[READ] = open(argv[1],O_RDWR|O_NONBLOCK); + fds[READ] = open(argv[1],O_RDWR|O_NONBLOCK); fds[WRITE] = open(argv[2],O_RDWR|O_NONBLOCK); if ( (fds[READ] < 0) || (fds[WRITE] < 0) ) { - DPRINTF("FD open failed [%d,%d]\n",fds[READ], fds[WRITE]); + DPRINTF("FD open failed [%d,%d]\n", fds[READ], fds[WRITE]); exit(-1); } @@ -608,11 +772,22 @@ int main(int argc, char *argv[]) { ptr = fd_start; while (ptr != NULL) { - if (FD_ISSET(ptr->tap_fd, &readfds)) + int progress_made = 0; + struct disk_driver *dd; + tapdev_info_t *info = ptr->s->ring_info; + + td_for_each_disk(ptr->s, dd) { + if (dd->io_fd[READ] && + FD_ISSET(dd->io_fd[READ], + &readfds)) { + io_done(dd, READ); + progress_made = 1; + } + } + + if (FD_ISSET(ptr->tap_fd, &readfds) || + (info->busy.req && progress_made)) get_io_request(ptr->s); - if (ptr->io_fd[READ] && - FD_ISSET(ptr->io_fd[READ], &readfds)) - io_done(ptr->s, READ); ptr = ptr->next; } @@ -628,11 +803,8 @@ int main(int argc, char *argv[]) ptr = fd_start; while (ptr != NULL) { s = ptr->s; - drv = s->drv; unmap_disk(s); - drv->td_close(s); - free(s->private); free(s->blkif); free(s->ring_info); free(s); diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/tapdisk.h --- a/tools/blktap/drivers/tapdisk.h Fri Feb 16 16:34:28 2007 +0000 +++ b/tools/blktap/drivers/tapdisk.h Fri Feb 16 20:31:27 2007 -0800 @@ -43,6 +43,9 @@ * - The fd used for poll is an otherwise unused pipe, which allows poll to * be safely called without ever returning anything. * + * NOTE: tapdisk uses the number of sectors submitted per request as a + * ref count. Plugins must use the callback function to communicate the + * completion--or error--of every sector submitted to them. */ #ifndef TAPDISK_H_ @@ -65,39 +68,55 @@ #define SECTOR_SHIFT 9 #define DEFAULT_SECTOR_SIZE 512 +#define MAX_IOFD 2 + +#define BLK_NOT_ALLOCATED 99 + +struct td_state; +struct tap_disk; + +struct disk_driver { + int early; + void *private; + int io_fd[MAX_IOFD]; + struct tap_disk *drv; + struct td_state *td_state; + struct disk_driver *next; +}; + /* This structure represents the state of an active virtual disk. */ struct td_state { - void *private; - void *drv; + struct disk_driver *disks; void *blkif; void *image; void *ring_info; void *fd_entry; - char backing_file[1024]; /*Used by differencing disks, e.g. qcow*/ unsigned long sector_size; unsigned long long size; unsigned int info; }; /* Prototype of the callback to activate as requests complete. */ -typedef int (*td_callback_t)(struct td_state *s, int res, int id, void *prv); +typedef int (*td_callback_t)(struct disk_driver *dd, int res, uint64_t sector, + int nb_sectors, int id, void *private); /* Structure describing the interface to a virtual disk implementation. */ /* See note at the top of this file describing this interface. */ struct tap_disk { const char *disk_type; int private_data_size; - int (*td_open) (struct td_state *s, const char *name); - int (*td_queue_read) (struct td_state *s, uint64_t sector, - int nb_sectors, char *buf, td_callback_t cb, + int (*td_open) (struct disk_driver *dd, const char *name); + int (*td_queue_read) (struct disk_driver *dd, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, int id, void *prv); - int (*td_queue_write) (struct td_state *s, uint64_t sector, - int nb_sectors, char *buf, td_callback_t cb, + int (*td_queue_write) (struct disk_driver *dd, uint64_t sector, + int nb_sectors, char *buf, td_callback_t cb, int id, void *prv); - int (*td_submit) (struct td_state *s); - int *(*td_get_fd) (struct td_state *s); - int (*td_close) (struct td_state *s); - int (*td_do_callbacks)(struct td_state *s, int sid); + int (*td_submit) (struct disk_driver *dd); + int (*td_has_parent) (struct disk_driver *dd); + int (*td_get_parent) (struct disk_driver *dd, struct disk_driver *p); + int (*td_close) (struct disk_driver *dd); + int (*td_do_callbacks)(struct disk_driver *dd, int sid); }; typedef struct disk_info { @@ -119,14 +138,13 @@ extern struct tap_disk tapdisk_ram; extern struct tap_disk tapdisk_ram; extern struct tap_disk tapdisk_qcow; -#define MAX_DISK_TYPES 20 -#define MAX_IOFD 2 - -#define DISK_TYPE_AIO 0 -#define DISK_TYPE_SYNC 1 -#define DISK_TYPE_VMDK 2 -#define DISK_TYPE_RAM 3 -#define DISK_TYPE_QCOW 4 +#define MAX_DISK_TYPES 20 + +#define DISK_TYPE_AIO 0 +#define DISK_TYPE_SYNC 1 +#define DISK_TYPE_VMDK 2 +#define DISK_TYPE_RAM 3 +#define DISK_TYPE_QCOW 4 /*Define Individual Disk Parameters here */ @@ -197,12 +215,10 @@ typedef struct fd_list_entry { typedef struct fd_list_entry { int cookie; int tap_fd; - int io_fd[MAX_IOFD]; struct td_state *s; struct fd_list_entry **pprev, *next; } fd_list_entry_t; int qcow_create(const char *filename, uint64_t total_size, const char *backing_file, int flags); - #endif /*TAPDISK_H_*/ diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/lib/blktaplib.h --- a/tools/blktap/lib/blktaplib.h Fri Feb 16 16:34:28 2007 +0000 +++ b/tools/blktap/lib/blktaplib.h Fri Feb 16 20:31:27 2007 -0800 @@ -91,8 +91,9 @@ struct blkif; typedef struct { blkif_request_t req; - struct blkif *blkif; - int count; + struct blkif *blkif; + int submitting; + int secs_pending; int16_t status; } pending_req_t; @@ -116,7 +117,7 @@ typedef struct blkif { void *prv; /* device-specific data */ void *info; /*Image parameter passing */ - pending_req_t pending_list[MAX_REQUESTS]; + pending_req_t pending_list[MAX_REQUESTS]; int devnum; int fds[2]; int be_id; @@ -141,6 +142,11 @@ void free_blkif(blkif_t *blkif); void free_blkif(blkif_t *blkif); void __init_blkif(void); +typedef struct busy_state { + int seg_idx; + blkif_request_t *req; +} busy_state_t; + typedef struct tapdev_info { int fd; char *mem; @@ -148,6 +154,7 @@ typedef struct tapdev_info { blkif_back_ring_t fe_ring; unsigned long vstart; blkif_t *blkif; + busy_state_t busy; } tapdev_info_t; typedef struct domid_translate { diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/lib/xs_api.c --- a/tools/blktap/lib/xs_api.c Fri Feb 16 16:34:28 2007 +0000 +++ b/tools/blktap/lib/xs_api.c Fri Feb 16 20:31:27 2007 -0800 @@ -311,8 +311,8 @@ int unregister_xenbus_watch(struct xs_ha } if (!xs_unwatch(h, watch->node, token)) - DPRINTF("XENBUS Failed to release watch %s: %i\n", - watch->node, er); + DPRINTF("XENBUS Failed to release watch %s\n", + watch->node); list_del(&watch->list); @@ -351,9 +351,9 @@ int xs_fire_next_watch(struct xs_handle node = res[XS_WATCH_PATH]; token = res[XS_WATCH_TOKEN]; - + w = find_watch(token); - if (w) + if (w) w->callback(h, w, node); free(res); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |