[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC][PATCH] Use ioemu block drivers through blktap
Hi Konrad, first of all, thank you for your review. You noticed quite a few points I never really looked at because I inherited them from the current tapdisk code. But probably I should fix these issues as well. ;-) Konrad Rzeszutek schrieb: >> + blkif->fds[READ] = open_ctrl_socket(wrctldev); >> + blkif->fds[WRITE] = open_ctrl_socket(rdctldev); > > How about freeing the data here once? > >> + >> + if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) { >> + free(rdctldev); >> + free(wrctldev); > > And then this is not needed. > >> + return -1; >> + } >> + >> + DPRINTF("Attached to qemu blktap pipes\n"); >> + free(rdctldev); >> + free(wrctldev); > > Nor these two lines above. Hmm, good point. This code looks a bit silly... Will move the free to the place you suggested. >> --- a/tools/python/xen/xend/server/BlktapController.py Mon Mar 10 >> 22:51:57 2008 +0000 >> +++ b/tools/python/xen/xend/server/BlktapController.py Thu Mar 13 >> 13:00:18 2008 +0100 >> @@ -13,7 +13,9 @@ blktap_disk_types = [ >> 'vmdk', >> 'ram', >> 'qcow', >> - 'qcow2' >> + 'qcow2', >> + >> + 'ioemu' > > Why add the extra \n ? I wanted to separate the ioemu pseudo driver (which is the only one that doesn't go through tapdisk) from the "real" tapdisk drivers. >> +static struct td_state *state_init(void) >> +{ >> + int i; >> + struct td_state *s; >> + blkif_t *blkif; >> + >> + s = malloc(sizeof(struct td_state)); > > Would it make sense to zero out the allocated memory? This code comes directly from tapdisk and it worked there. On the other hand, it certainly wouldn't hurt. >> + switch (req->operation) >> + { >> + case BLKIF_OP_WRITE: >> + aiocb_info = malloc(sizeof(*aiocb_info)); >> + >> + aiocb_info->s = s; >> + aiocb_info->sector = sector_nr; >> + aiocb_info->nr_secs = nsects; >> + aiocb_info->idx = idx; >> + aiocb_info->i = i; >> + >> + ret = (NULL == bdrv_aio_write(s->bs, sector_nr, >> + page, nsects, >> + qemu_send_responses, >> + aiocb_info)); > > Who de-allocates aiocb_info? qemu_send_responses is a callback function which gets aiocb_info as parameter and frees it when it's done. I've attached a new version of the patch. Kevin diff -r 7530c4dba8a5 tools/blktap/drivers/blktapctrl.c --- a/tools/blktap/drivers/blktapctrl.c Mon Mar 3 15:19:39 2008 +++ b/tools/blktap/drivers/blktapctrl.c Fri Mar 14 11:14:10 2008 @@ -501,6 +501,80 @@ return 0; } +/* Connect to qemu-dm */ +static int connect_qemu(blkif_t *blkif) +{ + char *rdctldev, *wrctldev; + + if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", + blkif->domid) < 0) + return -1; + + if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d", + blkif->domid) < 0) { + free(rdctldev); + return -1; + } + + DPRINTF("Using qemu blktap pipe: %s\n", rdctldev); + + blkif->fds[READ] = open_ctrl_socket(wrctldev); + blkif->fds[WRITE] = open_ctrl_socket(rdctldev); + + free(rdctldev); + free(wrctldev); + + if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) + return -1; + + DPRINTF("Attached to qemu blktap pipes\n"); + return 0; +} + +/* Launch tapdisk instance */ +static int connect_tapdisk(blkif_t *blkif, int minor) +{ + char *rdctldev = NULL, *wrctldev = NULL; + int ret = -1; + + DPRINTF("tapdisk process does not exist:\n"); + + if (asprintf(&rdctldev, + "%s/tapctrlread%d", BLKTAP_CTRL_DIR, minor) == -1) + goto fail; + + if (asprintf(&wrctldev, + "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR, minor) == -1) + goto fail; + + blkif->fds[READ] = open_ctrl_socket(rdctldev); + blkif->fds[WRITE] = open_ctrl_socket(wrctldev); + + if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) + goto fail; + + /*launch the new process*/ + DPRINTF("Launching process, CMDLINE [tapdisk %s %s]\n", + wrctldev, rdctldev); + + if (launch_tapdisk(wrctldev, rdctldev) == -1) { + DPRINTF("Unable to fork, cmdline: [tapdisk %s %s]\n", + wrctldev, rdctldev); + goto fail; + } + + ret = 0; + +fail: + if (rdctldev) + free(rdctldev); + + if (wrctldev) + free(wrctldev); + + return ret; +} + int blktapctrl_new_blkif(blkif_t *blkif) { blkif_info_t *blk; @@ -524,30 +598,14 @@ blkif->cookie = next_cookie++; if (!exist) { - DPRINTF("Process does not exist:\n"); - if (asprintf(&rdctldev, - "%s/tapctrlread%d", BLKTAP_CTRL_DIR, minor) == -1) - goto fail; - if (asprintf(&wrctldev, - "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR, minor) == -1) { - free(rdctldev); - goto fail; + if (type == DISK_TYPE_IOEMU) { + if (connect_qemu(blkif)) + goto fail; + } else { + if (connect_tapdisk(blkif, minor)) + goto fail; } - blkif->fds[READ] = open_ctrl_socket(rdctldev); - blkif->fds[WRITE] = open_ctrl_socket(wrctldev); - - if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) - goto fail; - - /*launch the new process*/ - DPRINTF("Launching process, CMDLINE [tapdisk %s %s]\n",wrctldev, rdctldev); - if (launch_tapdisk(wrctldev, rdctldev) == -1) { - DPRINTF("Unable to fork, cmdline: [tapdisk %s %s]\n",wrctldev, rdctldev); - goto fail; - } - - free(rdctldev); - free(wrctldev); + } else { DPRINTF("Process exists!\n"); blkif->fds[READ] = exist->fds[READ]; diff -r 7530c4dba8a5 tools/blktap/drivers/tapdisk.h --- a/tools/blktap/drivers/tapdisk.h Mon Mar 3 15:19:39 2008 +++ b/tools/blktap/drivers/tapdisk.h Fri Mar 14 11:14:10 2008 @@ -167,6 +167,7 @@ #define DISK_TYPE_RAM 3 #define DISK_TYPE_QCOW 4 #define DISK_TYPE_QCOW2 5 +#define DISK_TYPE_IOEMU 6 /*Define Individual Disk Parameters here */ @@ -227,6 +228,16 @@ 0, #ifdef TAPDISK &tapdisk_qcow2, +#endif +}; + +static disk_info_t ioemu_disk = { + DISK_TYPE_IOEMU, + "ioemu disk", + "ioemu", + 0, +#ifdef TAPDISK + NULL #endif }; @@ -238,6 +249,7 @@ &ram_disk, &qcow_disk, &qcow2_disk, + &ioemu_disk, }; typedef struct driver_list_entry { diff -r 7530c4dba8a5 tools/blktap/lib/blktaplib.h --- a/tools/blktap/lib/blktaplib.h Mon Mar 3 15:19:39 2008 +++ b/tools/blktap/lib/blktaplib.h Fri Mar 14 11:14:10 2008 @@ -221,15 +221,5 @@ ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * getpagesize()) + \ ((_seg) * getpagesize())) -/* Defines that are only used by library clients */ - -#ifndef __COMPILING_BLKTAP_LIB - -static char *blkif_op_name[] = { - [BLKIF_OP_READ] = "READ", - [BLKIF_OP_WRITE] = "WRITE", -}; - -#endif /* __COMPILING_BLKTAP_LIB */ #endif /* __BLKTAPLIB_H__ */ diff -r 7530c4dba8a5 tools/ioemu/Makefile.target --- a/tools/ioemu/Makefile.target Mon Mar 3 15:19:39 2008 +++ b/tools/ioemu/Makefile.target Fri Mar 14 11:14:10 2008 @@ -17,6 +17,7 @@ VPATH=$(SRC_PATH):$(TARGET_PATH):$(SRC_PATH)/hw:$(SRC_PATH)/audio CPPFLAGS+=-I. -I.. -I$(TARGET_PATH) -I$(SRC_PATH) CPPFLAGS+= -I$(XEN_ROOT)/tools/libxc +CPPFLAGS+= -I$(XEN_ROOT)/tools/blktap/lib CPPFLAGS+= -I$(XEN_ROOT)/tools/xenstore CPPFLAGS+= -I$(XEN_ROOT)/tools/include ifdef CONFIG_DARWIN_USER @@ -429,6 +430,7 @@ VL_OBJS+= usb-uhci.o smbus_eeprom.o VL_OBJS+= piix4acpi.o VL_OBJS+= xenstore.o +VL_OBJS+= xen_blktap.o VL_OBJS+= xen_platform.o VL_OBJS+= xen_machine_fv.o VL_OBJS+= xen_machine_pv.o diff -r 7530c4dba8a5 tools/ioemu/hw/xen_machine_pv.c --- a/tools/ioemu/hw/xen_machine_pv.c Mon Mar 3 15:19:39 2008 +++ b/tools/ioemu/hw/xen_machine_pv.c Fri Mar 14 11:14:10 2008 @@ -26,6 +26,9 @@ #include "xen_console.h" #include "xenfb.h" +extern void init_blktap(void); + + /* The Xen PV machine currently provides * - a virtual framebuffer * - .... @@ -40,6 +43,10 @@ { struct xenfb *xenfb; extern int domid; + + + /* Initialize tapdisk client */ + init_blktap(); /* Connect to text console */ if (serial_hds[0]) { diff -r 7530c4dba8a5 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Mon Mar 3 15:19:39 2008 +++ b/tools/ioemu/vl.c Fri Mar 14 11:14:10 2008 @@ -6266,6 +6266,12 @@ powerdown_requested = 1; if (cpu_single_env) cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT); +} + +static void qemu_sighup_handler(int signal) +{ + fprintf(stderr, "Received SIGHUP, terminating.\n"); + exit(0); } void main_loop_wait(int timeout) @@ -7976,7 +7982,7 @@ #ifndef CONFIG_STUBDOM /* Unblock SIGTERM and SIGHUP, which may have been blocked by the caller */ - signal(SIGHUP, SIG_DFL); + signal(SIGHUP, qemu_sighup_handler); sigemptyset(&set); sigaddset(&set, SIGTERM); sigaddset(&set, SIGHUP); diff -r 7530c4dba8a5 tools/python/xen/xend/server/BlktapController.py --- a/tools/python/xen/xend/server/BlktapController.py Mon Mar 3 15:19:39 2008 +++ b/tools/python/xen/xend/server/BlktapController.py Fri Mar 14 11:14:10 2008 @@ -13,7 +13,9 @@ 'vmdk', 'ram', 'qcow', - 'qcow2' + 'qcow2', + + 'ioemu' ] class BlktapController(BlkifController): diff -r 7530c4dba8a5 tools/ioemu/hw/xen_blktap.c --- /dev/null Mon Mar 3 15:19:39 2008 +++ b/tools/ioemu/hw/xen_blktap.c Fri Mar 14 11:14:10 2008 @@ -0,0 +1,686 @@ +/* xen_blktap.c + * + * Interface to blktapctrl to allow use of qemu block drivers with blktap. + * This file is based on tools/blktap/drivers/tapdisk.c + * + * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield. + * Copyright (c) 2008 Kevin Wolf + */ + +/* + * There are several communication channels which are used by this interface: + * + * - A pair of pipes for receiving and sending general control messages + * (qemu-read-N and qemu-writeN in /var/run/tap, where N is the domain ID). + * These control messages are handled by handle_blktap_ctrlmsg(). + * + * - One file descriptor per attached disk (/dev/xen/blktapN) for disk + * specific control messages. A callback is triggered on this fd if there + * is a new IO request. The callback function is handle_blktap_iomsg(). + * + * - A shared ring for each attached disk containing the actual IO requests + * and responses. Whenever handle_blktap_iomsg() is triggered it processes + * the requests on this ring. + */ + +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> + +#include "vl.h" +#include "blktaplib.h" +#include "xen_blktap.h" +#include "block_int.h" + +#define MSG_SIZE 4096 + +#define BLKTAP_CTRL_DIR "/var/run/tap" + +/* If enabled, print debug messages to stderr */ +#if 1 +#define DPRINTF(_f, _a...) fprintf(stderr, __FILE__ ":%d: " _f, __LINE__, ##_a) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + +#if 1 +#define ASSERT(_p) \ + if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s\n", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#else +#define ASSERT(_p) ((void)0) +#endif + + +extern int domid; + +int read_fd; +int write_fd; + +static pid_t process; +fd_list_entry_t *fd_start = NULL; + +static void handle_blktap_iomsg(void* private); + +struct aiocb_info { + struct td_state *s; + uint64_t sector; + int nr_secs; + int idx; + long i; +}; + +static void unmap_disk(struct td_state *s) +{ + tapdev_info_t *info = s->ring_info; + fd_list_entry_t *entry; + + bdrv_close(s->bs); + + if (info != NULL && info->mem > 0) + munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE); + + entry = s->fd_entry; + *entry->pprev = entry->next; + if (entry->next) + entry->next->pprev = entry->pprev; + + qemu_set_fd_handler2(info->fd, NULL, NULL, NULL, NULL); + close(info->fd); + + free(s->fd_entry); + free(s->blkif); + free(s->ring_info); + free(s); + + return; +} + +static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s) +{ + fd_list_entry_t **pprev, *entry; + + DPRINTF("Adding fd_list_entry\n"); + + /*Add to linked list*/ + s->fd_entry = entry = malloc(sizeof(fd_list_entry_t)); + entry->tap_fd = tap_fd; + entry->s = s; + entry->next = NULL; + + pprev = &fd_start; + while (*pprev != NULL) + pprev = &(*pprev)->next; + + *pprev = entry; + entry->pprev = pprev; + + return entry; +} + +static inline struct td_state *get_state(int cookie) +{ + fd_list_entry_t *ptr; + + ptr = fd_start; + while (ptr != NULL) { + if (ptr->cookie == cookie) return ptr->s; + ptr = ptr->next; + } + return NULL; +} + +static struct td_state *state_init(void) +{ + int i; + struct td_state *s; + blkif_t *blkif; + + s = malloc(sizeof(struct td_state)); + blkif = s->blkif = malloc(sizeof(blkif_t)); + s->ring_info = calloc(1, sizeof(tapdev_info_t)); + + for (i = 0; i < MAX_REQUESTS; i++) { + blkif->pending_list[i].secs_pending = 0; + blkif->pending_list[i].submitting = 0; + } + + return s; +} + +static int map_new_dev(struct td_state *s, int minor) +{ + int tap_fd; + tapdev_info_t *info = s->ring_info; + char *devname; + fd_list_entry_t *ptr; + int page_size; + + if (asprintf(&devname,"%s/%s%d", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, minor) == -1) + return -1; + tap_fd = open(devname, O_RDWR); + if (tap_fd == -1) + { + DPRINTF("open failed on dev %s!\n",devname); + goto fail; + } + info->fd = tap_fd; + + /*Map the shared memory*/ + page_size = getpagesize(); + info->mem = mmap(0, page_size * BLKTAP_MMAP_REGION_SIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, info->fd, 0); + if ((long int)info->mem == -1) + { + DPRINTF("mmap failed on dev %s!\n",devname); + goto fail; + } + + /* assign the rings to the mapped memory */ + info->sring = (blkif_sring_t *)((unsigned long)info->mem); + BACK_RING_INIT(&info->fe_ring, info->sring, page_size); + + info->vstart = + (unsigned long)info->mem + (BLKTAP_RING_PAGES * page_size); + + ioctl(info->fd, BLKTAP_IOCTL_SENDPID, process ); + ioctl(info->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE ); + free(devname); + + /*Update the fd entry*/ + ptr = fd_start; + while (ptr != NULL) { + if (s == ptr->s) { + ptr->tap_fd = tap_fd; + + /* Setup fd_handler for qemu main loop */ + DPRINTF("set tap_fd = %d\n", tap_fd); + qemu_set_fd_handler2(tap_fd, NULL, &handle_blktap_iomsg, NULL, s); + + break; + } + ptr = ptr->next; + } + + + DPRINTF("map_new_dev = %d\n", minor); + return minor; + + fail: + free(devname); + return -1; +} + +static int open_disk(struct td_state *s, char *path, int readonly) +{ + struct disk_id id; + BlockDriverState* bs; + + DPRINTF("Opening %s\n", path); + bs = calloc(1, sizeof(*bs)); + + memset(&id, 0, sizeof(struct disk_id)); + + if (bdrv_open(bs, path, 0) != 0) { + fprintf(stderr, "Could not open image file %s\n", path); + return -ENOMEM; + } + + s->bs = bs; + s->flags = readonly ? TD_RDONLY : 0; + s->size = bs->total_sectors; + s->sector_size = 512; + + s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0); + + return 0; +} + +static inline void write_rsp_to_ring(struct td_state *s, blkif_response_t *rsp) +{ + tapdev_info_t *info = s->ring_info; + blkif_response_t *rsp_d; + + rsp_d = RING_GET_RESPONSE(&info->fe_ring, info->fe_ring.rsp_prod_pvt); + memcpy(rsp_d, rsp, sizeof(blkif_response_t)); + info->fe_ring.rsp_prod_pvt++; +} + +static inline void kick_responses(struct td_state *s) +{ + tapdev_info_t *info = s->ring_info; + + if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod) + { + RING_PUSH_RESPONSES(&info->fe_ring); + ioctl(info->fd, BLKTAP_IOCTL_KICK_FE); + } +} + +static int send_responses(struct td_state *s, int res, + uint64_t sector, int nr_secs, int idx, void *private) +{ + pending_req_t *preq; + blkif_request_t *req; + int responses_queued = 0; + blkif_t *blkif = s->blkif; + int secs_done = nr_secs; + + if ( (idx > MAX_REQUESTS-1) ) + { + DPRINTF("invalid index returned(%u)!\n", idx); + return 0; + } + preq = &blkif->pending_list[idx]; + req = &preq->req; + + preq->secs_pending -= secs_done; + + if (res == -EBUSY && preq->submitting) + return -EBUSY; /* propagate -EBUSY back to higher layers */ + if (res) + preq->status = BLKIF_RSP_ERROR; + + if (!preq->submitting && preq->secs_pending == 0) + { + blkif_request_t tmp; + blkif_response_t *rsp; + + tmp = preq->req; + rsp = (blkif_response_t *)req; + + rsp->id = tmp.id; + rsp->operation = tmp.operation; + rsp->status = preq->status; + + write_rsp_to_ring(s, rsp); + responses_queued++; + + kick_responses(s); + } + + return responses_queued; +} + +static void qemu_send_responses(void* opaque, int ret) +{ + struct aiocb_info* info = opaque; + + if (ret != 0) { + DPRINTF("ERROR: ret = %d (%s)\n", ret, strerror(-ret)); + } + + send_responses(info->s, ret, info->sector, info->nr_secs, + info->idx, (void*) info->i); + free(info); +} + +/** + * Callback function for the IO message pipe. Reads requests from the ring + * and processes them (call qemu read/write functions). + * + * The private parameter points to the struct td_state representing the + * disk the request is targeted at. + */ +static void handle_blktap_iomsg(void* private) +{ + struct td_state* s = private; + + RING_IDX rp, j, i; + blkif_request_t *req; + int idx, nsects, ret; + uint64_t sector_nr; + uint8_t *page; + blkif_t *blkif = s->blkif; + tapdev_info_t *info = s->ring_info; + int page_size = getpagesize(); + + struct aiocb_info *aiocb_info; + + if (info->fe_ring.sring == NULL) { + DPRINTF(" sring == NULL, ignoring IO request\n"); + return; + } + + rp = info->fe_ring.sring->req_prod; + xen_rmb(); + + for (j = info->fe_ring.req_cons; j != rp; j++) + { + int start_seg = 0; + + req = NULL; + req = RING_GET_REQUEST(&info->fe_ring, j); + ++info->fe_ring.req_cons; + + if (req == NULL) + continue; + + idx = req->id; + + ASSERT(blkif->pending_list[idx].secs_pending == 0); + memcpy(&blkif->pending_list[idx].req, req, sizeof(*req)); + blkif->pending_list[idx].status = BLKIF_RSP_OKAY; + blkif->pending_list[idx].submitting = 1; + sector_nr = req->sector_number; + + /* Don't allow writes on readonly devices */ + if ((s->flags & TD_RDONLY) && + (req->operation == BLKIF_OP_WRITE)) { + blkif->pending_list[idx].status = BLKIF_RSP_ERROR; + goto send_response; + } + + for (i = start_seg; i < req->nr_segments; i++) { + nsects = req->seg[i].last_sect - + req->seg[i].first_sect + 1; + + if ((req->seg[i].last_sect >= page_size >> 9) || + (nsects <= 0)) + continue; + + page = (uint8_t*) MMAP_VADDR(info->vstart, + (unsigned long)req->id, i); + page += (req->seg[i].first_sect << SECTOR_SHIFT); + + if (sector_nr >= s->size) { + DPRINTF("Sector request failed:\n"); + DPRINTF("%s request, idx [%d,%d] size [%llu], " + "sector [%llu,%llu]\n", + (req->operation == BLKIF_OP_WRITE ? + "WRITE" : "READ"), + idx,i, + (long long unsigned) + nsects<<SECTOR_SHIFT, + (long long unsigned) + sector_nr<<SECTOR_SHIFT, + (long long unsigned) sector_nr); + continue; + } + + blkif->pending_list[idx].secs_pending += nsects; + + switch (req->operation) + { + case BLKIF_OP_WRITE: + aiocb_info = malloc(sizeof(*aiocb_info)); + + aiocb_info->s = s; + aiocb_info->sector = sector_nr; + aiocb_info->nr_secs = nsects; + aiocb_info->idx = idx; + aiocb_info->i = i; + + ret = (NULL == bdrv_aio_write(s->bs, sector_nr, + page, nsects, + qemu_send_responses, + aiocb_info)); + + if (ret) { + blkif->pending_list[idx].status = BLKIF_RSP_ERROR; + DPRINTF("ERROR: bdrv_write() == NULL\n"); + goto send_response; + } + break; + + case BLKIF_OP_READ: + aiocb_info = malloc(sizeof(*aiocb_info)); + + aiocb_info->s = s; + aiocb_info->sector = sector_nr; + aiocb_info->nr_secs = nsects; + aiocb_info->idx = idx; + aiocb_info->i = i; + + ret = (NULL == bdrv_aio_read(s->bs, sector_nr, + page, nsects, + qemu_send_responses, + aiocb_info)); + + if (ret) { + blkif->pending_list[idx].status = BLKIF_RSP_ERROR; + DPRINTF("ERROR: bdrv_read() == NULL\n"); + goto send_response; + } + break; + + default: + DPRINTF("Unknown block operation\n"); + break; + } + sector_nr += nsects; + } + send_response: + blkif->pending_list[idx].submitting = 0; + + /* force write_rsp_to_ring for synchronous case */ + if (blkif->pending_list[idx].secs_pending == 0) + send_responses(s, 0, 0, 0, idx, (void *)(long)0); + } +} + +/** + * Callback function for the qemu-read pipe. Reads and processes control + * message from the pipe. + * + * The parameter private is unused. + */ +static void handle_blktap_ctrlmsg(void* private) +{ + int length, len, msglen; + char *ptr, *path; + image_t *img; + msg_hdr_t *msg; + msg_newdev_t *msg_dev; + msg_pid_t *msg_pid; + int ret = -1; + struct td_state *s = NULL; + fd_list_entry_t *entry; + + char buf[MSG_SIZE]; + + length = read(read_fd, buf, MSG_SIZE); + + if (length > 0 && length >= sizeof(msg_hdr_t)) + { + msg = (msg_hdr_t *)buf; + DPRINTF("blktap: Received msg, len %d, type %d, UID %d\n", + length,msg->type,msg->cookie); + + switch (msg->type) { + case CTLMSG_PARAMS: + ptr = buf + sizeof(msg_hdr_t); + len = (length - sizeof(msg_hdr_t)); + path = calloc(1, len + 1); + + memcpy(path, ptr, len); + DPRINTF("Received CTLMSG_PARAMS: [%s]\n", path); + + /* Allocate the disk structs */ + s = state_init(); + + /*Open file*/ + if (s == NULL || open_disk(s, path, msg->readonly)) { + msglen = sizeof(msg_hdr_t); + msg->type = CTLMSG_IMG_FAIL; + msg->len = msglen; + } else { + entry = add_fd_entry(0, s); + entry->cookie = msg->cookie; + DPRINTF("Entered cookie %d\n", entry->cookie); + + memset(buf, 0x00, MSG_SIZE); + + msglen = sizeof(msg_hdr_t) + sizeof(image_t); + msg->type = CTLMSG_IMG; + img = (image_t *)(buf + sizeof(msg_hdr_t)); + img->size = s->size; + img->secsize = s->sector_size; + img->info = s->info; + DPRINTF("Writing (size, secsize, info) = " + "(%#" PRIx64 ", %#" PRIx64 ", %d)\n", + s->size, s->sector_size, s->info); + } + len = write(write_fd, buf, msglen); + free(path); + break; + + case CTLMSG_NEWDEV: + msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t)); + + s = get_state(msg->cookie); + DPRINTF("Retrieving state, cookie %d.....[%s]\n", + msg->cookie, (s == NULL ? "FAIL":"OK")); + if (s != NULL) { + ret = ((map_new_dev(s, msg_dev->devnum) + == msg_dev->devnum ? 0: -1)); + } + + memset(buf, 0x00, MSG_SIZE); + msglen = sizeof(msg_hdr_t); + msg->type = (ret == 0 ? CTLMSG_NEWDEV_RSP + : CTLMSG_NEWDEV_FAIL); + msg->len = msglen; + + len = write(write_fd, buf, msglen); + break; + + case CTLMSG_CLOSE: + s = get_state(msg->cookie); + if (s) unmap_disk(s); + break; + + case CTLMSG_PID: + memset(buf, 0x00, MSG_SIZE); + msglen = sizeof(msg_hdr_t) + sizeof(msg_pid_t); + msg->type = CTLMSG_PID_RSP; + msg->len = msglen; + + msg_pid = (msg_pid_t *)(buf + sizeof(msg_hdr_t)); + process = getpid(); + msg_pid->pid = process; + + len = write(write_fd, buf, msglen); + break; + + default: + break; + } + } +} + +/** + * Opens a control socket, i.e. a pipe to communicate with blktapctrl. + * + * Returns the file descriptor number for the pipe; -1 in error case + */ +static int open_ctrl_socket(char *devname) +{ + int ret; + int ipc_fd; + + if (mkdir(BLKTAP_CTRL_DIR, 0755) == 0) + DPRINTF("Created %s directory\n", BLKTAP_CTRL_DIR); + + ret = mkfifo(devname,S_IRWXU|S_IRWXG|S_IRWXO); + if ( (ret != 0) && (errno != EEXIST) ) { + DPRINTF("ERROR: pipe failed (%d)\n", errno); + return -1; + } + + ipc_fd = open(devname,O_RDWR|O_NONBLOCK); + + if (ipc_fd < 0) { + DPRINTF("FD open failed\n"); + return -1; + } + + return ipc_fd; +} + +/** + * Unmaps all disks and closes their pipes + */ +void shutdown_blktap(void) +{ + fd_list_entry_t *ptr; + struct td_state *s; + char *devname; + + DPRINTF("Shutdown blktap\n"); + + /* Unmap all disks */ + ptr = fd_start; + while (ptr != NULL) { + s = ptr->s; + unmap_disk(s); + close(ptr->tap_fd); + ptr = ptr->next; + } + + /* Delete control pipes */ + if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) { + DPRINTF("Delete %s\n", devname); + if (unlink(devname)) + DPRINTF("Could not delete: %s\n", strerror(errno)); + free(devname); + } + + if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) { + DPRINTF("Delete %s\n", devname); + if (unlink(devname)) + DPRINTF("Could not delete: %s\n", strerror(errno)); + free(devname); + } +} + +/** + * Initialize the blktap interface, i.e. open a pair of pipes in /var/run/tap + * and register a fd handler. + * + * Returns 0 on success. + */ +int init_blktap(void) +{ + char* devname; + + DPRINTF("Init blktap pipes\n"); + + /* Open the read pipe */ + if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) { + read_fd = open_ctrl_socket(devname); + free(devname); + + if (read_fd == -1) { + fprintf(stderr, "Could not open %s/qemu-read-%d\n", + BLKTAP_CTRL_DIR, domid); + return -1; + } + } + + /* Open the write pipe */ + if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) { + write_fd = open_ctrl_socket(devname); + free(devname); + + if (write_fd == -1) { + fprintf(stderr, "Could not open %s/qemu-write-%d\n", + BLKTAP_CTRL_DIR, domid); + close(read_fd); + return -1; + } + } + + /* Attach a handler to the read pipe (called from qemu main loop) */ + qemu_set_fd_handler2(read_fd, NULL, &handle_blktap_ctrlmsg, NULL, NULL); + + /* Register handler to clean up when the domain is destroyed */ + atexit(&shutdown_blktap); + + return 0; +} diff -r 7530c4dba8a5 tools/ioemu/hw/xen_blktap.h --- /dev/null Mon Mar 3 15:19:39 2008 +++ b/tools/ioemu/hw/xen_blktap.h Fri Mar 14 11:14:10 2008 @@ -0,0 +1,57 @@ +/* xen_blktap.h + * + * Generic disk interface for blktap-based image adapters. + * + * (c) 2006 Andrew Warfield and Julian Chesterfield + */ + +#ifndef XEN_BLKTAP_H_ +#define XEN_BLKTAP_H_ + +#include <stdint.h> +#include <syslog.h> +#include <stdio.h> + +#include "block_int.h" + +/* Things disks need to know about, these should probably be in a higher-level + * header. */ +#define MAX_SEGMENTS_PER_REQ 11 +#define SECTOR_SHIFT 9 +#define DEFAULT_SECTOR_SIZE 512 + +#define MAX_IOFD 2 + +#define BLK_NOT_ALLOCATED 99 +#define TD_NO_PARENT 1 + +typedef uint32_t td_flag_t; + +#define TD_RDONLY 1 + +struct disk_id { + char *name; + int drivertype; +}; + +/* This structure represents the state of an active virtual disk. */ +struct td_state { + BlockDriverState* bs; + td_flag_t flags; + void *blkif; + void *image; + void *ring_info; + void *fd_entry; + uint64_t sector_size; + uint64_t size; + unsigned int info; +}; + +typedef struct fd_list_entry { + int cookie; + int tap_fd; + struct td_state *s; + struct fd_list_entry **pprev, *next; +} fd_list_entry_t; + +#endif /*XEN_BLKTAP_H_*/ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |