[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC v1 1/5] VBD: enlarge max segment per request in blkfront



refactoring the blkfront
Signed-off-by: Ronghui Duan <ronghui.duan@xxxxxxxxx>

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 4e86393..a263faf 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -64,6 +64,12 @@ enum blkif_state {
        BLKIF_STATE_SUSPENDED,
 };

+enum blkif_ring_type {
+       RING_TYPE_UNDEFINED = 0,
+       RING_TYPE_1 = 1,
+       RING_TYPE_2 = 2,
+};
+
 struct blk_shadow {
        struct blkif_request req;
        struct request *request;
@@ -91,12 +97,14 @@ struct blkfront_info
        enum blkif_state connected;
        int ring_ref;
        struct blkif_front_ring ring;
-       struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct scatterlist *sg;
        unsigned int evtchn, irq;
        struct request_queue *rq;
        struct work_struct work;
        struct gnttab_free_callback callback;
-       struct blk_shadow shadow[BLK_RING_SIZE];
+       struct blk_shadow *shadow;
+       struct blk_front_operations *ops;
+       enum blkif_ring_type ring_type;
        unsigned long shadow_free;
        unsigned int feature_flush;
        unsigned int flush_op;
@@ -107,6 +115,36 @@ struct blkfront_info
        int is_ready;
 };

+/* interface of blkfront ring operation */
+static struct blk_front_operations {
+       void *(*ring_get_request) (struct blkfront_info *info);
+       struct blkif_response *(*ring_get_response) (struct blkfront_info 
*info);
+       struct blkif_request_segment *(*ring_get_segment)
+                               (struct blkfront_info *info, int i);
+       unsigned long (*get_id) (struct blkfront_info *info);
+       void (*add_id) (struct blkfront_info *info, unsigned long id);
+       void (*save_seg_shadow) (struct blkfront_info *info, unsigned long mfn,
+                                unsigned long id, int i);
+       void (*save_req_shadow) (struct blkfront_info *info,
+                                struct request *req, unsigned long id);
+       struct request *(*get_req_from_shadow)(struct blkfront_info *info,
+                                              unsigned long id);
+       RING_IDX (*get_rsp_prod) (struct blkfront_info *info);
+       RING_IDX (*get_rsp_cons) (struct blkfront_info *info);
+       RING_IDX (*get_req_prod_pvt) (struct blkfront_info *info);
+       void (*check_left_response) (struct blkfront_info *info, int 
*more_to_do);
+       void (*update_rsp_event) (struct blkfront_info *info, int i);
+       void (*update_rsp_cons) (struct blkfront_info *info);
+       void (*update_req_prod_pvt) (struct blkfront_info *info);
+       void (*ring_push) (struct blkfront_info *info, int *notify);
+       int (*recover) (struct blkfront_info *info);
+       int (*ring_full) (struct blkfront_info *info);
+       int (*setup_blkring) (struct xenbus_device *dev, struct blkfront_info 
*info);
+       void (*free_blkring) (struct blkfront_info *info, int suspend);
+       void (*blkif_completion) (struct blkfront_info *info, unsigned long id);
+       unsigned int max_seg;
+} blk_front_ops;
+
 static unsigned int nr_minors;
 static unsigned long *minors;
 static DEFINE_SPINLOCK(minor_lock);
@@ -132,7 +170,7 @@ static DEFINE_SPINLOCK(minor_lock);

 #define DEV_NAME       "xvd"   /* name in /dev */

-static int get_id_from_freelist(struct blkfront_info *info)
+static unsigned long get_id_from_freelist(struct blkfront_info *info)
 {
        unsigned long free = info->shadow_free;
        BUG_ON(free >= BLK_RING_SIZE);
@@ -141,7 +179,7 @@ static int get_id_from_freelist(struct blkfront_info *info)
        return free;
 }

-static void add_id_to_freelist(struct blkfront_info *info,
+void add_id_to_freelist(struct blkfront_info *info,
                               unsigned long id)
 {
        info->shadow[id].req.u.rw.id  = info->shadow_free;
@@ -251,6 +289,42 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t 
mode,
        return 0;
 }

+static int ring_full(struct blkfront_info *info)
+{
+       return RING_FULL(&info->ring);
+}
+
+void *ring_get_request(struct blkfront_info *info)
+{
+       return (void *)RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
+}
+
+struct blkif_request_segment *ring_get_segment(struct blkfront_info *info, int 
i)
+{
+       struct blkif_request *ring_req =
+                       (struct blkif_request 
*)info->ops->ring_get_request(info);
+       return &ring_req->u.rw.seg[i];
+}
+
+void save_seg_shadow(struct blkfront_info *info,
+                     unsigned long mfn, unsigned long id, int i)
+{
+       info->shadow[id].frame[i] = mfn_to_pfn(mfn);
+}
+
+void save_req_shadow(struct blkfront_info *info,
+                     struct request *req, unsigned long id)
+{
+       struct blkif_request *ring_req =
+                       (struct blkif_request 
*)info->ops->ring_get_request(info);
+       info->shadow[id].req = *ring_req;
+       info->shadow[id].request = req;
+}
+
+void update_req_prod_pvt(struct blkfront_info *info)
+{
+       info->ring.req_prod_pvt++;
+}
 /*
  * Generate a Xen blkfront IO request from a blk layer request.  Reads
  * and writes are handled as expected.
@@ -262,6 +336,7 @@ static int blkif_queue_request(struct request *req)
        struct blkfront_info *info = req->rq_disk->private_data;
        unsigned long buffer_mfn;
        struct blkif_request *ring_req;
+       struct blkif_request_segment *ring_seg;
        unsigned long id;
        unsigned int fsect, lsect;
        int i, ref;
@@ -282,9 +357,9 @@ static int blkif_queue_request(struct request *req)
        }

        /* Fill out a communications ring structure. */
-       ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
-       id = get_id_from_freelist(info);
-       info->shadow[id].request = req;
+       ring_req = (struct blkif_request *)info->ops->ring_get_request(info);
+       id = info->ops->get_id(info);
+       //info->shadow[id].request = req;

        ring_req->u.rw.id = id;
        ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
@@ -315,8 +390,7 @@ static int blkif_queue_request(struct request *req)
        } else {
                ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
                                                           info->sg);
-               BUG_ON(ring_req->u.rw.nr_segments >
-                      BLKIF_MAX_SEGMENTS_PER_REQUEST);
+               BUG_ON(ring_req->u.rw.nr_segments > info->ops->max_seg);

                for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
                        buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
@@ -332,31 +406,35 @@ static int blkif_queue_request(struct request *req)
                                        buffer_mfn,
                                        rq_data_dir(req));

-                       info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
-                       ring_req->u.rw.seg[i] =
-                                       (struct blkif_request_segment) {
-                                               .gref       = ref,
-                                               .first_sect = fsect,
-                                               .last_sect  = lsect };
+                       ring_seg = info->ops->ring_get_segment(info, i);
+                       *ring_seg =(struct blkif_request_segment) {
+                                       .gref       = ref,
+                                       .first_sect = fsect,
+                                       .last_sect  = lsect };
+                       info->ops->save_seg_shadow(info, buffer_mfn, id, i);
                }
        }

-       info->ring.req_prod_pvt++;
-
        /* Keep a private copy so we can reissue requests when recovering. */
-       info->shadow[id].req = *ring_req;
+       info->ops->save_req_shadow(info, req, id);
+
+       info->ops->update_req_prod_pvt(info);

        gnttab_free_grant_references(gref_head);

        return 0;
 }

+void ring_push(struct blkfront_info *info, int *notify)
+{
+       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, *notify);
+}

 static inline void flush_requests(struct blkfront_info *info)
 {
        int notify;

-       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
+       info->ops->ring_push(info, &notify);

        if (notify)
                notify_remote_via_irq(info->irq);
@@ -379,7 +457,7 @@ static void do_blkif_request(struct request_queue *rq)
        while ((req = blk_peek_request(rq)) != NULL) {
                info = req->rq_disk->private_data;

-               if (RING_FULL(&info->ring))
+               if (info->ops->ring_full(info))
                        goto wait;

                blk_start_request(req);
@@ -434,14 +512,15 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 
sector_size)

        /* Hard sector size and max sectors impersonate the equiv. hardware. */
        blk_queue_logical_block_size(rq, sector_size);
-       blk_queue_max_hw_sectors(rq, 512);

        /* Each segment in a request is up to an aligned page in size. */
        blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
        blk_queue_max_segment_size(rq, PAGE_SIZE);

        /* Ensure a merged request will fit in a single I/O ring slot. */
-       blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+       blk_queue_max_segments(rq, info->ops->max_seg);
+       blk_queue_max_hw_sectors(rq, info->ops->max_seg * PAGE_SIZE
+                                / sector_size);

        /* Make sure buffer addresses are sector-aligned. */
        blk_queue_dma_alignment(rq, 511);
@@ -661,7 +740,7 @@ static void xlvbd_release_gendisk(struct blkfront_info 
*info)

 static void kick_pending_request_queues(struct blkfront_info *info)
 {
-       if (!RING_FULL(&info->ring)) {
+       if (!ring_full(info)) {
                /* Re-enable calldowns. */
                blk_start_queue(info->rq);
                /* Kick things off immediately. */
@@ -696,20 +775,17 @@ static void blkif_free(struct blkfront_info *info, int 
suspend)
        flush_work_sync(&info->work);

        /* Free resources associated with old device channel. */
-       if (info->ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(info->ring_ref, 0,
-                                         (unsigned long)info->ring.sring);
-               info->ring_ref = GRANT_INVALID_REF;
-               info->ring.sring = NULL;
-       }
+       info->ops->free_blkring(info, suspend);
+
        if (info->irq)
                unbind_from_irqhandler(info->irq, info);
        info->evtchn = info->irq = 0;

 }

-static void blkif_completion(struct blk_shadow *s)
+static void blkif_completion(struct blkfront_info *info, unsigned long id)
 {
+       struct blk_shadow *s = &info->shadow[id];
        int i;
        /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
         * flag. */
@@ -717,6 +793,39 @@ static void blkif_completion(struct blk_shadow *s)
                gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
 }

+struct blkif_response *ring_get_response(struct blkfront_info *info)
+{
+       return RING_GET_RESPONSE(&info->ring, info->ring.rsp_cons);
+}
+RING_IDX get_rsp_prod(struct blkfront_info *info)
+{
+       return info->ring.sring->rsp_prod;
+}
+RING_IDX get_rsp_cons(struct blkfront_info *info)
+{
+       return info->ring.rsp_cons;
+}
+struct request *get_req_from_shadow(struct blkfront_info *info,
+                                   unsigned long id)
+{
+       return info->shadow[id].request;
+}
+void update_rsp_cons(struct blkfront_info *info)
+{
+       info->ring.rsp_cons++;
+}
+RING_IDX get_req_prod_pvt(struct blkfront_info *info)
+{
+       return info->ring.req_prod_pvt;
+}
+void check_left_response(struct blkfront_info *info, int *more_to_do)
+{
+       RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, *more_to_do);
+}
+void update_rsp_event(struct blkfront_info *info, int i)
+{
+       info->ring.sring->rsp_event = i + 1;
+}
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 {
        struct request *req;
@@ -734,20 +843,20 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
        }

  again:
-       rp = info->ring.sring->rsp_prod;
+       rp = info->ops->get_rsp_prod(info);
        rmb(); /* Ensure we see queued responses up to 'rp'. */

-       for (i = info->ring.rsp_cons; i != rp; i++) {
+       for (i = info->ops->get_rsp_cons(info); i != rp; i++) {
                unsigned long id;

-               bret = RING_GET_RESPONSE(&info->ring, i);
+               bret = info->ops->ring_get_response(info);
                id   = bret->id;
-               req  = info->shadow[id].request;
+               req  = info->ops->get_req_from_shadow(info, id);

                if (bret->operation != BLKIF_OP_DISCARD)
-                       blkif_completion(&info->shadow[id]);
+                       info->ops->blkif_completion(info, id);

-               add_id_to_freelist(info, id);
+               info->ops->add_id(info, id);

                error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
                switch (bret->operation) {
@@ -800,17 +909,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                default:
                        BUG();
                }
+               info->ops->update_rsp_cons(info);
        }

-       info->ring.rsp_cons = i;
-
-       if (i != info->ring.req_prod_pvt) {
+       rp = info->ops->get_req_prod_pvt(info);
+       if (i != rp) {
                int more_to_do;
-               RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+               info->ops->check_left_response(info, &more_to_do);
                if (more_to_do)
                        goto again;
        } else
-               info->ring.sring->rsp_event = i + 1;
+               info->ops->update_rsp_event(info, i);

        kick_pending_request_queues(info);

@@ -819,6 +928,26 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }

+static int init_shadow(struct blkfront_info *info)
+{
+       unsigned int ring_size;
+       int i;
+       if (info->ring_type != RING_TYPE_UNDEFINED)
+               return 0;
+
+       info->ring_type = RING_TYPE_1;
+       ring_size = BLK_RING_SIZE;
+       info->shadow = kzalloc(sizeof(struct blk_shadow) * ring_size,
+                              GFP_KERNEL);
+       if (!info->shadow)
+               return -ENOMEM;
+
+       for (i = 0; i < ring_size; i++)
+               info->shadow[i].req.u.rw.id = i+1;
+       info->shadow[ring_size - 1].req.u.rw.id = 0x0fffffff;
+
+       return 0;
+}

 static int setup_blkring(struct xenbus_device *dev,
                         struct blkfront_info *info)
@@ -836,8 +965,6 @@ static int setup_blkring(struct xenbus_device *dev,
        SHARED_RING_INIT(sring);
        FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);

-       sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
        err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
        if (err < 0) {
                free_page((unsigned long)sring);
@@ -846,6 +973,16 @@ static int setup_blkring(struct xenbus_device *dev,
        }
        info->ring_ref = err;

+       info->sg = kzalloc(sizeof(struct scatterlist) * info->ops->max_seg, 
GFP_KERNEL);
+       if (!info->sg) {
+               err = -ENOMEM;
+               goto fail;
+       }
+       sg_init_table(info->sg, info->ops->max_seg);
+
+       err = init_shadow(info);
+       if (err)
+               goto fail;
        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
                goto fail;
@@ -866,6 +1003,20 @@ fail:
        return err;
 }

+static void free_blkring(struct blkfront_info *info, int suspend)
+{
+       if (info->ring_ref != GRANT_INVALID_REF) {
+               gnttab_end_foreign_access(info->ring_ref, 0,
+                                        (unsigned long)info->ring.sring);
+               info->ring_ref = GRANT_INVALID_REF;
+               info->ring.sring = NULL;
+       }
+
+       kfree(info->sg);
+
+       if (!suspend)
+               kfree(info->shadow);
+}

 /* Common code used when first setting up, and when resuming. */
 static int talk_to_blkback(struct xenbus_device *dev,
@@ -875,8 +1026,11 @@ static int talk_to_blkback(struct xenbus_device *dev,
        struct xenbus_transaction xbt;
        int err;

+       /* register ring ops */
+       info->ops = &blk_front_ops;
+
        /* Create shared ring, alloc event channel. */
-       err = setup_blkring(dev, info);
+       err = info->ops->setup_blkring(dev, info);
        if (err)
                goto out;

@@ -937,7 +1091,7 @@ again:
 static int blkfront_probe(struct xenbus_device *dev,
                          const struct xenbus_device_id *id)
 {
-       int err, vdevice, i;
+       int err, vdevice;
        struct blkfront_info *info;

        /* FIXME: Use dynamic device id if this is not set. */
@@ -995,10 +1149,6 @@ static int blkfront_probe(struct xenbus_device *dev,
        info->connected = BLKIF_STATE_DISCONNECTED;
        INIT_WORK(&info->work, blkif_restart_queue);

-       for (i = 0; i < BLK_RING_SIZE; i++)
-               info->shadow[i].req.u.rw.id = i+1;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
-
        /* Front end dir is a number, which is used as the id. */
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
        dev_set_drvdata(&dev->dev, info);
@@ -1022,14 +1172,14 @@ static int blkif_recover(struct blkfront_info *info)
        int j;

        /* Stage 1: Make a safe copy of the shadow state. */
-       copy = kmalloc(sizeof(info->shadow),
+       copy = kmalloc(sizeof(struct blk_shadow) * BLK_RING_SIZE,
                       GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
        if (!copy)
                return -ENOMEM;
-       memcpy(copy, info->shadow, sizeof(info->shadow));
+       memcpy(copy, info->shadow, sizeof(struct blk_shadow) * BLK_RING_SIZE);

        /* Stage 2: Set up free list. */
-       memset(&info->shadow, 0, sizeof(info->shadow));
+       memset(info->shadow, 0, sizeof(struct blk_shadow) * BLK_RING_SIZE);
        for (i = 0; i < BLK_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
        info->shadow_free = info->ring.req_prod_pvt;
@@ -1042,11 +1192,11 @@ static int blkif_recover(struct blkfront_info *info)
                        continue;

                /* Grab a request slot and copy shadow state into it. */
-               req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
+               req = (struct blkif_request *)info->ops->ring_get_request(info);
                *req = copy[i].req;

                /* We get a new request id, and must reset the shadow state. */
-               req->u.rw.id = get_id_from_freelist(info);
+               req->u.rw.id = info->ops->get_id(info);
                memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));

                if (req->operation != BLKIF_OP_DISCARD) {
@@ -1100,7 +1250,7 @@ static int blkfront_resume(struct xenbus_device *dev)

        err = talk_to_blkback(dev, info);
        if (info->connected == BLKIF_STATE_SUSPENDED && !err)
-               err = blkif_recover(info);
+               err = info->ops->recover(info);

        return err;
 }
@@ -1280,7 +1430,6 @@ static void blkfront_connect(struct blkfront_info *info)
        info->connected = BLKIF_STATE_CONNECTED;
        kick_pending_request_queues(info);
        spin_unlock_irq(&info->io_lock);
-
        add_disk(info->gd);

        info->is_ready = 1;
@@ -1444,6 +1593,31 @@ out:
        return 0;
 }

+static struct blk_front_operations blk_front_ops = {
+       .ring_get_request = ring_get_request,
+       .ring_get_response = ring_get_response,
+       .ring_get_segment = ring_get_segment,
+       .get_id = get_id_from_freelist,
+       .add_id = add_id_to_freelist,
+       .save_seg_shadow = save_seg_shadow,
+       .save_req_shadow = save_req_shadow,
+       .get_req_from_shadow = get_req_from_shadow,
+       .get_rsp_prod = get_rsp_prod,
+       .get_rsp_cons = get_rsp_cons,
+       .get_req_prod_pvt = get_req_prod_pvt,
+       .check_left_response = check_left_response,
+       .update_rsp_event = update_rsp_event,
+       .update_rsp_cons = update_rsp_cons,
+       .update_req_prod_pvt = update_req_prod_pvt,
+       .ring_push = ring_push,
+       .recover = blkif_recover,
+       .ring_full = ring_full,
+       .setup_blkring = setup_blkring,
+       .free_blkring = free_blkring,
+       .blkif_completion = blkif_completion,
+       .max_seg = BLKIF_MAX_SEGMENTS_PER_REQUEST,
+};
+
 static const struct block_device_operations xlvbd_block_fops =
 {
        .owner = THIS_MODULE,

-ronghui


Attachment: vbd_enlarge_segments_01.patch
Description: vbd_enlarge_segments_01.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.