[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] minios: add blkfront



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1200588106 0
# Node ID 764d6741de079f3d2def86f09e743c753d8bf100
# Parent  bf828db8d0174896d078fcc6dce12427b6b64cc5
minios: add blkfront

Signed-off-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>
---
 extras/mini-os/blkfront.c         |  392 ++++++++++++++++++++++++++++++++++++++
 extras/mini-os/include/blkfront.h |   26 ++
 2 files changed, 418 insertions(+)

diff -r bf828db8d017 -r 764d6741de07 extras/mini-os/blkfront.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/extras/mini-os/blkfront.c Thu Jan 17 16:41:46 2008 +0000
@@ -0,0 +1,392 @@
+/* Minimal block driver for Mini-OS. 
+ * Copyright (c) 2007-2008 Samuel Thibault.
+ * Based on netfront.c.
+ */
+
+#include <os.h>
+#include <xenbus.h>
+#include <events.h>
+#include <errno.h>
+#include <xen/io/blkif.h>
+#include <gnttab.h>
+#include <xmalloc.h>
+#include <time.h>
+#include <blkfront.h>
+#include <lib.h>
+#include <fcntl.h>
+
+/* Note: we generally don't need to disable IRQs since we hardly do anything in
+ * the interrupt handler.  */
+
+/* Note: we really suppose non-preemptive threads.  */
+
+DECLARE_WAIT_QUEUE_HEAD(blkfront_queue);
+
+
+
+
+#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
+#define GRANT_INVALID_REF 0
+
+
+struct blk_buffer {
+    void* page;
+    grant_ref_t gref;
+};
+
+struct blkfront_dev {
+    struct blkif_front_ring ring;
+    grant_ref_t ring_ref;
+    evtchn_port_t evtchn, local_port;
+    blkif_vdev_t handle;
+
+    char *nodename;
+    char *backend;
+    unsigned sector_size;
+    unsigned sectors;
+    int mode;
+    int barrier;
+    int flush;
+};
+
+static inline int xenblk_rxidx(RING_IDX idx)
+{
+    return idx & (BLK_RING_SIZE - 1);
+}
+
+void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
+{
+    wake_up(&blkfront_queue);
+}
+
+struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned 
*sector_size, int *mode)
+{
+    xenbus_transaction_t xbt;
+    char* err;
+    char* message=NULL;
+    struct blkif_sring *s;
+    int retry=0;
+    char* msg;
+    char* c;
+
+    struct blkfront_dev *dev;
+
+    ASSERT(!strncmp(nodename, "/local/domain/", 14));
+    nodename = strchr(nodename + 14, '/') + 1;
+
+    char path[strlen(nodename) + 1 + 10 + 1];
+
+    printk("******************* BLKFRONT for %s **********\n\n\n", nodename);
+
+    dev = malloc(sizeof(*dev));
+    dev->nodename = strdup(nodename);
+
+    s = (struct blkif_sring*) alloc_page();
+    memset(s,0,PAGE_SIZE);
+
+
+    SHARED_RING_INIT(s);
+    FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
+
+    dev->ring_ref = gnttab_grant_access(0,virt_to_mfn(s),0);
+
+    evtchn_alloc_unbound_t op;
+    op.dom = DOMID_SELF;
+    snprintf(path, sizeof(path), "%s/backend-id", nodename);
+    op.remote_dom = xenbus_read_integer(path); 
+    HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op);
+    clear_evtchn(op.port);        /* Without, handler gets invoked now! */
+    dev->local_port = bind_evtchn(op.port, blkfront_handler, dev);
+    dev->evtchn=op.port;
+
+    // FIXME: proper frees on failures
+again:
+    err = xenbus_transaction_start(&xbt);
+    if (err) {
+        printk("starting transaction\n");
+    }
+
+    err = xenbus_printf(xbt, nodename, "ring-ref","%u",
+                dev->ring_ref);
+    if (err) {
+        message = "writing ring-ref";
+        goto abort_transaction;
+    }
+    err = xenbus_printf(xbt, nodename,
+                "event-channel", "%u", dev->evtchn);
+    if (err) {
+        message = "writing event-channel";
+        goto abort_transaction;
+    }
+
+    err = xenbus_printf(xbt, nodename, "state", "%u",
+            4); /* connected */
+
+
+    err = xenbus_transaction_end(xbt, 0, &retry);
+    if (retry) {
+            goto again;
+        printk("completing transaction\n");
+    }
+
+    goto done;
+
+abort_transaction:
+    xenbus_transaction_end(xbt, 1, &retry);
+    return NULL;
+
+done:
+
+    snprintf(path, sizeof(path), "%s/backend", nodename);
+    msg = xenbus_read(XBT_NIL, path, &dev->backend);
+    if (msg) {
+        printk("Error %s when reading the backend path %s\n", msg, path);
+        return NULL;
+    }
+
+    printk("backend at %s\n", dev->backend);
+
+    dev->handle = simple_strtoul(strrchr(nodename, '/')+1, NULL, 0);
+
+    {
+        char path[strlen(dev->backend) + 1 + 19 + 1];
+        snprintf(path, sizeof(path), "%s/mode", dev->backend);
+        msg = xenbus_read(XBT_NIL, path, &c);
+        if (msg) {
+            printk("Error %s when reading the mode\n", msg);
+            return NULL;
+        }
+        if (*c == 'w')
+            *mode = dev->mode = O_RDWR;
+        else
+            *mode = dev->mode = O_RDONLY;
+        free(c);
+
+        snprintf(path, sizeof(path), "%s/state", dev->backend);
+
+        xenbus_watch_path(XBT_NIL, path);
+
+        xenbus_wait_for_value(path,"4");
+
+        xenbus_unwatch_path(XBT_NIL, path);
+
+        snprintf(path, sizeof(path), "%s/sectors", dev->backend);
+        // FIXME: read_integer returns an int, so disk size limited to 1TB for 
now
+        *sectors = dev->sectors = xenbus_read_integer(path);
+
+        snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
+        *sector_size = dev->sector_size = xenbus_read_integer(path);
+
+        snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
+        dev->barrier = xenbus_read_integer(path);
+
+        snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
+        dev->flush = xenbus_read_integer(path);
+    }
+
+    printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size);
+    printk("**************************\n");
+
+    return dev;
+}
+
+void shutdown_blkfront(struct blkfront_dev *dev)
+{
+    char* err;
+    char *nodename = dev->nodename;
+
+    char path[strlen(dev->backend) + 1 + 5 + 1];
+
+    blkfront_sync(dev);
+
+    printk("close blk: backend at %s\n",dev->backend);
+
+    snprintf(path, sizeof(path), "%s/state", dev->backend);
+    err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 5); /* closing */
+    xenbus_wait_for_value(path,"5");
+
+    err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6);
+    xenbus_wait_for_value(path,"6");
+
+    unbind_evtchn(dev->local_port);
+
+    free(nodename);
+    free(dev->backend);
+    free(dev);
+}
+
+static void blkfront_wait_slot(struct blkfront_dev *dev)
+{
+    /* Wait for a slot */
+    if (RING_FULL(&dev->ring)) {
+       unsigned long flags;
+       DEFINE_WAIT(w);
+       local_irq_save(flags);
+       while (1) {
+           blkfront_aio_poll(dev);
+           if (!RING_FULL(&dev->ring))
+               break;
+           /* Really no slot, go to sleep. */
+           add_waiter(w, blkfront_queue);
+           local_irq_restore(flags);
+           schedule();
+           local_irq_save(flags);
+       }
+       remove_waiter(w);
+       local_irq_restore(flags);
+    }
+}
+
+/* Issue an aio */
+void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
+{
+    struct blkfront_dev *dev = aiocbp->aio_dev;
+    struct blkif_request *req;
+    RING_IDX i;
+    int notify;
+    int n, j;
+    uintptr_t start, end;
+
+    // Can't io at non-sector-aligned location
+    ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1)));
+    // Can't io non-sector-sized amounts
+    ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1)));
+    // Can't io non-sector-aligned buffer
+    ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1)));
+
+    start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
+    end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & 
PAGE_MASK;
+    n = (end - start) / PAGE_SIZE;
+
+    /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB,
+     * so max 44KB can't happen */
+    ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+    blkfront_wait_slot(dev);
+    i = dev->ring.req_prod_pvt;
+    req = RING_GET_REQUEST(&dev->ring, i);
+
+    req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
+    req->nr_segments = n;
+    req->handle = dev->handle;
+    req->id = (uintptr_t) aiocbp;
+    req->sector_number = aiocbp->aio_offset / dev->sector_size;
+
+    for (j = 0; j < n; j++) {
+       uintptr_t data = start + j * PAGE_SIZE;
+       aiocbp->gref[j] = req->seg[j].gref =
+            gnttab_grant_access(0, virt_to_mfn(data), write);
+       req->seg[j].first_sect = 0;
+       req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1;
+    }
+    req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / 
dev->sector_size;
+    req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + 
aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size;
+
+    dev->ring.req_prod_pvt = i + 1;
+
+    wmb();
+    RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
+
+    if(notify) notify_remote_via_evtchn(dev->evtchn);
+}
+
+void blkfront_aio_write(struct blkfront_aiocb *aiocbp)
+{
+    blkfront_aio(aiocbp, 1);
+}
+
+void blkfront_aio_read(struct blkfront_aiocb *aiocbp)
+{
+    blkfront_aio(aiocbp, 0);
+}
+
+int blkfront_aio_poll(struct blkfront_dev *dev)
+{
+    RING_IDX rp, cons;
+    struct blkif_response *rsp;
+
+moretodo:
+    rp = dev->ring.sring->rsp_prod;
+    rmb(); /* Ensure we see queued responses up to 'rp'. */
+    cons = dev->ring.rsp_cons;
+
+    int nr_consumed = 0;
+    while ((cons != rp))
+    {
+       rsp = RING_GET_RESPONSE(&dev->ring, cons);
+
+        switch (rsp->operation) {
+        case BLKIF_OP_READ:
+        case BLKIF_OP_WRITE:
+        {
+            struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
+            int n = (aiocbp->aio_nbytes + PAGE_SIZE - 1) / PAGE_SIZE, j;
+            for (j = 0; j < n; j++)
+                gnttab_end_access(aiocbp->gref[j]);
+
+            /* Nota: callback frees aiocbp itself */
+            aiocbp->aio_cb(aiocbp, rsp->status ? -EIO : 0);
+            break;
+        }
+        case BLKIF_OP_WRITE_BARRIER:
+        case BLKIF_OP_FLUSH_DISKCACHE:
+            break;
+        default:
+            printk("unrecognized block operation %d response\n", 
rsp->operation);
+            break;
+        }
+
+       nr_consumed++;
+       ++cons;
+    }
+    dev->ring.rsp_cons = cons;
+
+    int more;
+    RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
+    if (more) goto moretodo;
+
+    return nr_consumed;
+}
+
+static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op)
+{
+    int i;
+    struct blkif_request *req;
+    int notify;
+
+    blkfront_wait_slot(dev);
+    i = dev->ring.req_prod_pvt;
+    req = RING_GET_REQUEST(&dev->ring, i);
+    req->operation = op;
+    dev->ring.req_prod_pvt = i + 1;
+    wmb();
+    RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
+    if (notify) notify_remote_via_evtchn(dev->evtchn);
+}
+
+void blkfront_sync(struct blkfront_dev *dev)
+{
+    unsigned long flags;
+
+    if (dev->barrier == 1)
+        blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER);
+
+    if (dev->flush == 1)
+        blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE);
+
+    /* Note: This won't finish if another thread enqueues requests.  */
+    local_irq_save(flags);
+    DEFINE_WAIT(w);
+    while (1) {
+       blkfront_aio_poll(dev);
+       if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
+           break;
+
+       add_waiter(w, blkfront_queue);
+       local_irq_restore(flags);
+       schedule();
+       local_irq_save(flags);
+    }
+    remove_waiter(w);
+    local_irq_restore(flags);
+}
diff -r bf828db8d017 -r 764d6741de07 extras/mini-os/include/blkfront.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/extras/mini-os/include/blkfront.h Thu Jan 17 16:41:46 2008 +0000
@@ -0,0 +1,26 @@
+#include <wait.h>
+#include <xen/io/blkif.h>
+#include <types.h>
+struct blkfront_dev;
+struct blkfront_aiocb
+{
+    struct blkfront_dev *aio_dev;
+    uint8_t *aio_buf;
+    size_t aio_nbytes;
+    uint64_t aio_offset;
+    void *data;
+
+    grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+
+    void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret);
+};
+struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned 
*sector_size, int *mode);
+int blkfront_open(struct blkfront_dev *dev);
+void blkfront_aio(struct blkfront_aiocb *aiocbp, int write);
+void blkfront_aio_read(struct blkfront_aiocb *aiocbp);
+void blkfront_aio_write(struct blkfront_aiocb *aiocbp);
+int blkfront_aio_poll(struct blkfront_dev *dev);
+void blkfront_sync(struct blkfront_dev *dev);
+void shutdown_blkfront(struct blkfront_dev *dev);
+
+extern struct wait_queue_head blkfront_queue;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.