|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC v1 2/5] VBD: enlarge max segment per request in blkfront
add segring support in blkfront
Signed-off-by: Ronghui Duan <ronghui.duan@xxxxxxxxx>
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a263faf..b9f383d 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -76,10 +76,23 @@ struct blk_shadow {
unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
+struct blk_req_shadow {
+ struct blkif_request_header req;
+ struct request *request;
+};
+
+struct blk_seg_shadow {
+ uint64_t id;
+ struct blkif_request_segment req;
+ unsigned long frame;
+};
+
static DEFINE_MUTEX(blkfront_mutex);
static const struct block_device_operations xlvbd_block_fops;
#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+#define BLK_REQ_RING_SIZE __CONST_RING_SIZE(blkif_request, PAGE_SIZE)
+#define BLK_SEG_RING_SIZE __CONST_RING_SIZE(blkif_segment, PAGE_SIZE)
/*
* We have one of these per vbd, whether ide, scsi or 'other'. They
@@ -96,22 +109,30 @@ struct blkfront_info
blkif_vdev_t handle;
enum blkif_state connected;
int ring_ref;
+ int reqring_ref;
+ int segring_ref;
struct blkif_front_ring ring;
+ struct blkif_request_front_ring reqring;
+ struct blkif_segment_front_ring segring;
struct scatterlist *sg;
unsigned int evtchn, irq;
struct request_queue *rq;
struct work_struct work;
struct gnttab_free_callback callback;
struct blk_shadow *shadow;
+ struct blk_req_shadow *req_shadow;
+ struct blk_seg_shadow *seg_shadow;
struct blk_front_operations *ops;
enum blkif_ring_type ring_type;
unsigned long shadow_free;
+ unsigned long seg_shadow_free;
unsigned int feature_flush;
unsigned int flush_op;
unsigned int feature_discard:1;
unsigned int feature_secdiscard:1;
unsigned int discard_granularity;
unsigned int discard_alignment;
+ unsigned long last_id;
int is_ready;
};
@@ -124,7 +145,7 @@ static struct blk_front_operations {
unsigned long (*get_id) (struct blkfront_info *info);
void (*add_id) (struct blkfront_info *info, unsigned long id);
void (*save_seg_shadow) (struct blkfront_info *info, unsigned long mfn,
- unsigned long id, int i);
+ unsigned long id, int i, struct
blkif_request_segment *ring_seg);
void (*save_req_shadow) (struct blkfront_info *info,
struct request *req, unsigned long id);
struct request *(*get_req_from_shadow)(struct blkfront_info *info,
@@ -136,14 +157,16 @@ static struct blk_front_operations {
void (*update_rsp_event) (struct blkfront_info *info, int i);
void (*update_rsp_cons) (struct blkfront_info *info);
void (*update_req_prod_pvt) (struct blkfront_info *info);
+ void (*update_segment_rsp_cons) (struct blkfront_info *info, unsigned
long id);
void (*ring_push) (struct blkfront_info *info, int *notify);
int (*recover) (struct blkfront_info *info);
int (*ring_full) (struct blkfront_info *info);
+ int (*segring_full) (struct blkfront_info *info, unsigned int
nr_segments);
int (*setup_blkring) (struct xenbus_device *dev, struct blkfront_info
*info);
void (*free_blkring) (struct blkfront_info *info, int suspend);
void (*blkif_completion) (struct blkfront_info *info, unsigned long id);
unsigned int max_seg;
-} blk_front_ops;
+} blk_front_ops, blk_front_ops_v2;
static unsigned int nr_minors;
static unsigned long *minors;
@@ -179,6 +202,24 @@ static unsigned long get_id_from_freelist(struct
blkfront_info *info)
return free;
}
+static unsigned long get_id_from_freelist_v2(struct blkfront_info *info)
+{
+ unsigned long free = info->shadow_free;
+ BUG_ON(free >= BLK_REQ_RING_SIZE);
+ info->shadow_free = info->req_shadow[free].req.u.rw.id;
+ info->req_shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
+ return free;
+}
+
+static unsigned long get_seg_shadow_id(struct blkfront_info *info)
+{
+ unsigned long free = info->seg_shadow_free;
+ BUG_ON(free >= BLK_SEG_RING_SIZE);
+ info->seg_shadow_free = info->seg_shadow[free].id;
+ info->seg_shadow[free].id = 0x0fffffee; /* debug */
+ return free;
+}
+
void add_id_to_freelist(struct blkfront_info *info,
unsigned long id)
{
@@ -187,6 +228,21 @@ void add_id_to_freelist(struct blkfront_info *info,
info->shadow_free = id;
}
+static void add_id_to_freelist_v2(struct blkfront_info *info,
+ unsigned long id)
+{
+ info->req_shadow[id].req.u.rw.id = info->shadow_free;
+ info->req_shadow[id].request = NULL;
+ info->shadow_free = id;
+}
+
+static void free_seg_shadow_id(struct blkfront_info *info,
+ unsigned long id)
+{
+ info->seg_shadow[id].id = info->seg_shadow_free;
+ info->seg_shadow_free = id;
+}
+
static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
{
unsigned int end = minor + nr;
@@ -299,6 +355,14 @@ void *ring_get_request(struct blkfront_info *info)
return (void *)RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
}
+void *ring_get_request_v2(struct blkfront_info *info)
+{
+ struct blkif_request_header *ring_req;
+ ring_req = RING_GET_REQUEST(&info->reqring,
+ info->reqring.req_prod_pvt);
+ return (void *)ring_req;
+}
+
struct blkif_request_segment *ring_get_segment(struct blkfront_info *info, int
i)
{
struct blkif_request *ring_req =
@@ -306,12 +370,34 @@ struct blkif_request_segment *ring_get_segment(struct
blkfront_info *info, int i
return &ring_req->u.rw.seg[i];
}
-void save_seg_shadow(struct blkfront_info *info,
- unsigned long mfn, unsigned long id, int i)
+struct blkif_request_segment *ring_get_segment_v2(struct blkfront_info *info,
int i)
+{
+ return RING_GET_REQUEST(&info->segring, info->segring.req_prod_pvt++);
+}
+
+void save_seg_shadow(struct blkfront_info *info, unsigned long mfn,
+ unsigned long id, int i, struct blkif_request_segment
*ring_seg)
{
info->shadow[id].frame[i] = mfn_to_pfn(mfn);
}
+void save_seg_shadow_v2(struct blkfront_info *info, unsigned long mfn,
+ unsigned long id, int i, struct blkif_request_segment
*ring_seg)
+{
+ struct blkif_request_header *ring_req;
+ unsigned long seg_id = get_seg_shadow_id(info);
+
+ ring_req = (struct blkif_request_header
*)info->ops->ring_get_request(info);
+ if (i == 0)
+ ring_req->u.rw.seg_id = seg_id;
+ else
+ info->seg_shadow[info->last_id].id = seg_id;
+ info->seg_shadow[seg_id].frame = mfn_to_pfn(mfn);
+ memcpy(&(info->seg_shadow[seg_id].req), ring_seg,
+ sizeof(struct blkif_request_segment));
+ info->last_id = seg_id;
+}
+
void save_req_shadow(struct blkfront_info *info,
struct request *req, unsigned long id)
{
@@ -321,10 +407,34 @@ void save_req_shadow(struct blkfront_info *info,
info->shadow[id].request = req;
}
+void save_req_shadow_v2(struct blkfront_info *info,
+ struct request *req, unsigned long id)
+{
+ struct blkif_request_header *ring_req =
+ (struct blkif_request_header
*)info->ops->ring_get_request(info);
+ info->req_shadow[id].req = *ring_req;
+ info->req_shadow[id].request = req;
+}
+
void update_req_prod_pvt(struct blkfront_info *info)
{
info->ring.req_prod_pvt++;
}
+
+void update_req_prod_pvt_v2(struct blkfront_info *info)
+{
+ info->reqring.req_prod_pvt++;
+}
+
+int segring_full(struct blkfront_info *info, unsigned int nr_segments)
+{
+ return 0;
+}
+
+int segring_full_v2(struct blkfront_info *info, unsigned int nr_segments)
+{
+ return nr_segments > RING_FREE_REQUESTS(&info->segring);
+}
/*
* Generate a Xen blkfront IO request from a blk layer request. Reads
* and writes are handled as expected.
@@ -347,19 +457,18 @@ static int blkif_queue_request(struct request *req)
return 1;
if (gnttab_alloc_grant_references(
- BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
+ info->ops->max_seg, &gref_head) < 0) {
gnttab_request_free_callback(
&info->callback,
blkif_restart_queue_callback,
info,
- BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ info->ops->max_seg);
return 1;
}
/* Fill out a communications ring structure. */
ring_req = (struct blkif_request *)info->ops->ring_get_request(info);
id = info->ops->get_id(info);
- //info->shadow[id].request = req;
ring_req->u.rw.id = id;
ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
@@ -392,6 +501,9 @@ static int blkif_queue_request(struct request *req)
info->sg);
BUG_ON(ring_req->u.rw.nr_segments > info->ops->max_seg);
+ if (info->ops->segring_full(info, ring_req->u.rw.nr_segments))
+ goto wait;
+
for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
fsect = sg->offset >> 9;
@@ -411,7 +523,7 @@ static int blkif_queue_request(struct request *req)
.gref = ref,
.first_sect = fsect,
.last_sect = lsect };
- info->ops->save_seg_shadow(info, buffer_mfn, id, i);
+ info->ops->save_seg_shadow(info, buffer_mfn, id, i,
ring_seg);
}
}
@@ -423,6 +535,11 @@ static int blkif_queue_request(struct request *req)
gnttab_free_grant_references(gref_head);
return 0;
+wait:
+ gnttab_free_grant_references(gref_head);
+ pr_debug("No enough segment!\n");
+ info->ops->add_id(info, id);
+ return 1;
}
void ring_push(struct blkfront_info *info, int *notify)
@@ -430,6 +547,13 @@ void ring_push(struct blkfront_info *info, int *notify)
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, *notify);
}
+void ring_push_v2(struct blkfront_info *info, int *notify)
+{
+ RING_PUSH_REQUESTS(&info->segring);
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->reqring, *notify);
+}
+
+
static inline void flush_requests(struct blkfront_info *info)
{
int notify;
@@ -440,6 +564,16 @@ static inline void flush_requests(struct blkfront_info
*info)
notify_remote_via_irq(info->irq);
}
+static int ring_free_v2(struct blkfront_info *info)
+{
+ return (!RING_FULL(&info->reqring) &&
+ RING_FREE_REQUESTS(&info->segring) >
RING_SIZE(&info->segring)/3);
+}
+static int ring_full_v2(struct blkfront_info *info)
+{
+ return (RING_FULL(&info->reqring) || RING_FULL(&info->segring));
+}
+
/*
* do_blkif_request
* read a block; request is in a request queue
@@ -490,6 +624,17 @@ wait:
flush_requests(info);
}
+static void update_blk_queue(struct blkfront_info *info)
+{
+ struct request_queue *q = info->rq;
+
+ blk_queue_max_segments(q, info->ops->max_seg);
+ blk_queue_max_hw_sectors(q, queue_max_segments(q) *
+ queue_max_segment_size(q) /
+ queue_logical_block_size(q));
+ return;
+}
+
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
{
struct request_queue *rq;
@@ -740,7 +885,7 @@ static void xlvbd_release_gendisk(struct blkfront_info
*info)
static void kick_pending_request_queues(struct blkfront_info *info)
{
- if (!ring_full(info)) {
+ if (!info->ops->ring_full(info)) {
/* Re-enable calldowns. */
blk_start_queue(info->rq);
/* Kick things off immediately. */
@@ -793,39 +938,115 @@ static void blkif_completion(struct blkfront_info *info,
unsigned long id)
gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
}
+static void blkif_completion_v2(struct blkfront_info *info, unsigned long id)
+{
+ int i;
+ /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
+ * flag. */
+ unsigned short nr = info->req_shadow[id].req.u.rw.nr_segments;
+ unsigned long shadow_id, free_id;
+
+ shadow_id = info->req_shadow[id].req.u.rw.seg_id;
+ for (i = 0; i < nr; i++) {
+ gnttab_end_foreign_access(info->seg_shadow[shadow_id].req.gref,
0, 0UL);
+ free_id = shadow_id;
+ shadow_id = info->seg_shadow[shadow_id].id;
+ free_seg_shadow_id(info, free_id);
+ }
+}
+
struct blkif_response *ring_get_response(struct blkfront_info *info)
{
return RING_GET_RESPONSE(&info->ring, info->ring.rsp_cons);
}
+
+struct blkif_response *ring_get_response_v2(struct blkfront_info *info)
+{
+ return RING_GET_RESPONSE(&info->reqring, info->reqring.rsp_cons);
+}
+
RING_IDX get_rsp_prod(struct blkfront_info *info)
{
return info->ring.sring->rsp_prod;
}
+
+RING_IDX get_rsp_prod_v2(struct blkfront_info *info)
+{
+ return info->reqring.sring->rsp_prod;
+}
+
RING_IDX get_rsp_cons(struct blkfront_info *info)
{
return info->ring.rsp_cons;
}
+
+RING_IDX get_rsp_cons_v2(struct blkfront_info *info)
+{
+ return info->reqring.rsp_cons;
+}
+
struct request *get_req_from_shadow(struct blkfront_info *info,
unsigned long id)
{
return info->shadow[id].request;
}
+
+struct request *get_req_from_shadow_v2(struct blkfront_info *info,
+ unsigned long id)
+{
+ return info->req_shadow[id].request;
+}
+
void update_rsp_cons(struct blkfront_info *info)
{
info->ring.rsp_cons++;
}
+
+void update_rsp_cons_v2(struct blkfront_info *info)
+{
+ info->reqring.rsp_cons++;
+}
+
RING_IDX get_req_prod_pvt(struct blkfront_info *info)
{
return info->ring.req_prod_pvt;
}
+
+RING_IDX get_req_prod_pvt_v2(struct blkfront_info *info)
+{
+ return info->reqring.req_prod_pvt;
+}
+
void check_left_response(struct blkfront_info *info, int *more_to_do)
{
RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, *more_to_do);
}
+
+void check_left_response_v2(struct blkfront_info *info, int *more_to_do)
+{
+ RING_FINAL_CHECK_FOR_RESPONSES(&info->reqring, *more_to_do);
+}
+
void update_rsp_event(struct blkfront_info *info, int i)
{
info->ring.sring->rsp_event = i + 1;
}
+
+void update_rsp_event_v2(struct blkfront_info *info, int i)
+{
+ info->reqring.sring->rsp_event = i + 1;
+}
+
+void update_segment_rsp_cons(struct blkfront_info *info, unsigned long id)
+{
+ return;
+}
+
+void update_segment_rsp_cons_v2(struct blkfront_info *info, unsigned long id)
+{
+ info->segring.rsp_cons += info->req_shadow[id].req.u.rw.nr_segments;
+ return;
+}
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
{
struct request *req;
@@ -903,8 +1124,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
if (unlikely(bret->status != BLKIF_RSP_OKAY))
dev_dbg(&info->xbdev->dev, "Bad return from
blkdev data "
"request: %x\n", bret->status);
-
__blk_end_request_all(req, error);
+ info->ops->update_segment_rsp_cons(info, id);
break;
default:
BUG();
@@ -949,6 +1170,43 @@ static int init_shadow(struct blkfront_info *info)
return 0;
}
+static int init_shadow_v2(struct blkfront_info *info)
+{
+ unsigned int ring_size;
+ int i;
+
+ if (info->ring_type != RING_TYPE_UNDEFINED)
+ return 0;
+
+ info->ring_type = RING_TYPE_2;
+
+ ring_size = BLK_REQ_RING_SIZE;
+ info->req_shadow = kzalloc(sizeof(struct blk_req_shadow) * ring_size,
+ GFP_KERNEL);
+ if (!info->req_shadow)
+ return -ENOMEM;
+
+ for (i = 0; i < ring_size; i++)
+ info->req_shadow[i].req.u.rw.id = i+1;
+ info->req_shadow[ring_size - 1].req.u.rw.id = 0x0fffffff;
+
+ ring_size = BLK_SEG_RING_SIZE;
+
+ info->seg_shadow = kzalloc(sizeof(struct blk_seg_shadow) * ring_size,
+ GFP_KERNEL);
+ if (!info->seg_shadow) {
+ kfree(info->req_shadow);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < ring_size; i++) {
+ info->seg_shadow[i].id = i+1;
+ }
+ info->seg_shadow[ring_size - 1].id = 0x0fffffff;
+
+ return 0;
+}
+
static int setup_blkring(struct xenbus_device *dev,
struct blkfront_info *info)
{
@@ -1003,6 +1261,84 @@ fail:
return err;
}
+static int setup_blkring_v2(struct xenbus_device *dev,
+ struct blkfront_info *info)
+{
+ struct blkif_request_sring *sring;
+ struct blkif_segment_sring *seg_sring;
+ int err;
+
+ info->reqring_ref = GRANT_INVALID_REF;
+
+ sring = (struct blkif_request_sring *)__get_free_page(GFP_NOIO |
__GFP_HIGH);
+ if (!sring) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
+ return -ENOMEM;
+ }
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&info->reqring, sring, PAGE_SIZE);
+
+ err = xenbus_grant_ring(dev, virt_to_mfn(info->reqring.sring));
+ if (err < 0) {
+ free_page((unsigned long)sring);
+ info->reqring.sring = NULL;
+ goto fail;
+ }
+
+ info->reqring_ref = err;
+
+ info->segring_ref = GRANT_INVALID_REF;
+
+ seg_sring = (struct blkif_segment_sring *)__get_free_page(GFP_NOIO |
__GFP_HIGH);
+ if (!seg_sring) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
+ err = -ENOMEM;
+ goto fail;
+ }
+ SHARED_RING_INIT(seg_sring);
+ FRONT_RING_INIT(&info->segring, seg_sring, PAGE_SIZE);
+
+ err = xenbus_grant_ring(dev, virt_to_mfn(info->segring.sring));
+ if (err < 0) {
+ free_page((unsigned long)seg_sring);
+ info->segring.sring = NULL;
+ goto fail;
+ }
+
+ info->segring_ref = err;
+
+ info->sg = kzalloc(sizeof(struct scatterlist) * info->ops->max_seg,
+ GFP_KERNEL);
+ if (!info->sg) {
+ err = -ENOMEM;
+ goto fail;
+ }
+ sg_init_table(info->sg, info->ops->max_seg);
+
+ err = init_shadow_v2(info);
+ if (err)
+ goto fail;
+
+ err = xenbus_alloc_evtchn(dev, &info->evtchn);
+ if (err)
+ goto fail;
+
+ err = bind_evtchn_to_irqhandler(info->evtchn,
+ blkif_interrupt,
+ IRQF_SAMPLE_RANDOM, "blkif", info);
+ if (err <= 0) {
+ xenbus_dev_fatal(dev, err,
+ "bind_evtchn_to_irqhandler failed");
+ goto fail;
+ }
+ info->irq = err;
+
+ return 0;
+fail:
+ blkif_free(info, 0);
+ return err;
+}
+
static void free_blkring(struct blkfront_info *info, int suspend)
{
if (info->ring_ref != GRANT_INVALID_REF) {
@@ -1018,6 +1354,32 @@ static void free_blkring(struct blkfront_info *info, int
suspend)
kfree(info->shadow);
}
+static void free_blkring_v2(struct blkfront_info *info, int suspend)
+{
+ if (info->reqring_ref != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(info->reqring_ref, 0,
+ (unsigned long)info->reqring.sring);
+ info->reqring_ref = GRANT_INVALID_REF;
+ info->reqring.sring = NULL;
+ }
+
+ if (info->segring_ref != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(info->segring_ref, 0,
+ (unsigned long)info->segring.sring);
+ info->segring_ref = GRANT_INVALID_REF;
+ info->segring.sring = NULL;
+ }
+
+ kfree(info->sg);
+
+ if(!suspend) {
+ kfree(info->req_shadow);
+ kfree(info->seg_shadow);
+ }
+
+}
+
+
/* Common code used when first setting up, and when resuming. */
static int talk_to_blkback(struct xenbus_device *dev,
struct blkfront_info *info)
@@ -1025,9 +1387,17 @@ static int talk_to_blkback(struct xenbus_device *dev,
const char *message = NULL;
struct xenbus_transaction xbt;
int err;
+ unsigned int type;
/* register ring ops */
- info->ops = &blk_front_ops;
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "blkback-ring-type", "%u",
+ &type);
+ if (err != 1)
+ type = 1;
+ if (type == 2)
+ info->ops = &blk_front_ops_v2;
+ else
+ info->ops = &blk_front_ops;
/* Create shared ring, alloc event channel. */
err = info->ops->setup_blkring(dev, info);
@@ -1040,13 +1410,6 @@ again:
xenbus_dev_fatal(dev, err, "starting transaction");
goto destroy_blkring;
}
-
- err = xenbus_printf(xbt, dev->nodename,
- "ring-ref", "%u", info->ring_ref);
- if (err) {
- message = "writing ring-ref";
- goto abort_transaction;
- }
err = xenbus_printf(xbt, dev->nodename,
"event-channel", "%u", info->evtchn);
if (err) {
@@ -1059,7 +1422,40 @@ again:
message = "writing protocol";
goto abort_transaction;
}
-
+ if (type == 1) {
+ err = xenbus_printf(xbt, dev->nodename,
+ "ring-ref", "%u", info->ring_ref);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename, "blkfront-ring-type",
+ "%u", type);
+ if (err) {
+ message = "writing blkfront ring type";
+ goto abort_transaction;
+ }
+ }
+ if (type == 2) {
+ err = xenbus_printf(xbt, dev->nodename,
+ "reqring-ref", "%u", info->reqring_ref);
+ if (err) {
+ message = "writing reqring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename,
+ "segring-ref", "%u", info->segring_ref);
+ if (err) {
+ message = "writing segring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename, "blkfront-ring-type",
+ "%u", type);
+ if (err) {
+ message = "writing blkfront ring type";
+ goto abort_transaction;
+ }
+ }
err = xenbus_transaction_end(xbt, 0);
if (err) {
if (err == -EAGAIN)
@@ -1164,7 +1560,7 @@ static int blkfront_probe(struct xenbus_device *dev,
}
-static int blkif_recover(struct blkfront_info *info)
+static int recover_from_v1_to_v1(struct blkfront_info *info)
{
int i;
struct blkif_request *req;
@@ -1233,6 +1629,372 @@ static int blkif_recover(struct blkfront_info *info)
return 0;
}
+/* migrate from V2 type ring to V1 type*/
+static int recover_from_v2_to_v1(struct blkfront_info *info)
+{
+ struct blk_req_shadow *copy;
+ struct blk_seg_shadow *seg_copy;
+ struct request *req;
+ struct blkif_request *new_req;
+ int i, j, err;
+ unsigned int req_rs;
+ struct bio *biolist = NULL, *biotail = NULL, *bio;
+ unsigned long index;
+ unsigned long flags;
+
+ pr_info("Warning, migrate to older backend, some io may fail\n");
+
+ /* Stage 1: Init the new shadow state. */
+ info->ring_type = RING_TYPE_UNDEFINED;
+ err = init_shadow(info);
+ if (err)
+ return err;
+
+ req_rs = BLK_REQ_RING_SIZE;
+
+ /* Stage 2: Set up free list. */
+ info->shadow_free = info->ring.req_prod_pvt;
+
+ /* Stage 3: Find pending requests and requeue them. */
+ for (i = 0; i < req_rs; i++) {
+ req = info->req_shadow[i].request;
+ /* Not in use? */
+ if (!req)
+ continue;
+
+ if (ring_full(info))
+ goto out;
+
+ copy = &info->req_shadow[i];
+
+ /* We get a new request, reset the blkif request and shadow
state. */
+ new_req = RING_GET_REQUEST(&info->ring,
info->ring.req_prod_pvt);
+
+ if (copy->req.operation == BLKIF_OP_DISCARD) {
+ new_req->operation = BLKIF_OP_DISCARD;
+ new_req->u.discard = copy->req.u.discard;
+ new_req->u.discard.id = get_id_from_freelist(info);
+ info->shadow[new_req->u.discard.id].request = req;
+ }
+ else {
+ if (copy->req.u.rw.nr_segments >
BLKIF_MAX_SEGMENTS_PER_REQUEST)
+ continue;
+
+ new_req->u.rw.id = get_id_from_freelist(info);
+ info->shadow[new_req->u.rw.id].request = req;
+ new_req->operation = copy->req.operation;
+ new_req->u.rw.nr_segments = copy->req.u.rw.nr_segments;
+ new_req->u.rw.handle = copy->req.u.rw.handle;
+ new_req->u.rw.sector_number =
copy->req.u.rw.sector_number;
+ index = copy->req.u.rw.seg_id;
+ for (j = 0; j < new_req->u.rw.nr_segments; j++) {
+ seg_copy = &info->seg_shadow[index];
+ new_req->u.rw.seg[j].gref = seg_copy->req.gref;
+ new_req->u.rw.seg[j].first_sect =
seg_copy->req.first_sect;
+ new_req->u.rw.seg[j].last_sect =
seg_copy->req.last_sect;
+ info->shadow[new_req->u.rw.id].frame[j] =
seg_copy->frame;
+ gnttab_grant_foreign_access_ref(
+ new_req->u.rw.seg[j].gref,
+ info->xbdev->otherend_id,
+
pfn_to_mfn(info->shadow[new_req->u.rw.id].frame[j]),
+
rq_data_dir(info->shadow[new_req->u.rw.id].request));
+ index = info->seg_shadow[index].id;
+ }
+ }
+ info->shadow[new_req->u.rw.id].req = *new_req;
+ info->ring.req_prod_pvt++;
+ info->req_shadow[i].request = NULL;
+
+ }
+out:
+ xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+ spin_lock_irqsave(&info->io_lock, flags);
+
+ /* cancel the request and resubmit the bio */
+ for (i = 0; i < req_rs; i++) {
+ req = info->req_shadow[i].request;
+ if (!req)
+ continue;
+
+ blkif_completion_v2(info, i);
+
+ if (biolist == NULL)
+ biolist = req->bio;
+ else
+ biotail->bi_next = req->bio;
+ biotail = req->biotail;
+ req->bio = NULL;
+ __blk_put_request(info->rq, req);
+ }
+
+ while ((req = blk_peek_request(info->rq)) != NULL) {
+
+ blk_start_request(req);
+
+ if (biolist == NULL)
+ biolist = req->bio;
+ else
+ biotail->bi_next = req->bio;
+ biotail = req->biotail;
+ req->bio = NULL;
+ __blk_put_request(info->rq, req);
+ }
+
+ /* Now safe for us to use the shared ring */
+ info->connected = BLKIF_STATE_CONNECTED;
+
+ /* need update the queue limit setting */
+ update_blk_queue(info);
+
+ /* Send off requeued requests */
+ flush_requests(info);
+
+ /* Kick any other new requests queued since we resumed */
+ kick_pending_request_queues(info);
+
+ spin_unlock_irqrestore(&info->io_lock, flags);
+
+ /* free original shadow*/
+ kfree(info->seg_shadow);
+ kfree(info->req_shadow);
+
+ while(biolist) {
+ bio = biolist;
+ biolist = biolist->bi_next;
+ bio->bi_next = NULL;
+ submit_bio(bio->bi_rw, bio);
+ }
+
+ return 0;
+}
+
+static int blkif_recover(struct blkfront_info *info)
+{
+ int rc;
+
+ if (info->ring_type == RING_TYPE_1)
+ rc = recover_from_v1_to_v1(info);
+ else if (info->ring_type == RING_TYPE_2)
+ rc = recover_from_v2_to_v1(info);
+ else
+ rc = -EPERM;
+ return rc;
+}
+
+static int recover_from_v1_to_v2(struct blkfront_info *info)
+{
+ int i,err;
+ struct blkif_request_header *req;
+ struct blkif_request_segment *segring_req;
+ struct blk_shadow *copy;
+ int j;
+ unsigned long seg_id, last_id = 0x0fffffff;
+
+ /* Stage 1: Init the new shadow. */
+ info->ring_type = RING_TYPE_UNDEFINED;
+ err = init_shadow_v2(info);
+ if (err)
+ return err;
+
+ /* Stage 2: Set up free list. */
+ info->shadow_free = info->reqring.req_prod_pvt;
+ info->seg_shadow_free = info->segring.req_prod_pvt;
+
+ /* Stage 3: Find pending requests and requeue them. */
+ for (i = 0; i < BLK_RING_SIZE; i++) {
+ copy = &info->shadow[i];
+ /* Not in use? */
+ if (!copy->request)
+ continue;
+
+ /* We get a new request, reset the blkif request and shadow
state. */
+ req = RING_GET_REQUEST(&info->reqring,
info->reqring.req_prod_pvt);
+
+ if (copy->req.operation == BLKIF_OP_DISCARD) {
+ req->operation = BLKIF_OP_DISCARD;
+ req->u.discard = copy->req.u.discard;
+ req->u.discard.id = get_id_from_freelist_v2(info);
+ info->req_shadow[req->u.discard.id].request =
copy->request;
+ info->req_shadow[req->u.discard.id].req = *req;
+ }
+ else {
+ req->u.rw.id = get_id_from_freelist_v2(info);
+ req->operation = copy->req.operation;
+ req->u.rw.nr_segments = copy->req.u.rw.nr_segments;
+ req->u.rw.handle = copy->req.u.rw.handle;
+ req->u.rw.sector_number = copy->req.u.rw.sector_number;
+ for (j = 0; j < req->u.rw.nr_segments; j++) {
+ seg_id = get_seg_shadow_id(info);
+ if (j == 0)
+ req->u.rw.seg_id = seg_id;
+ else
+ info->seg_shadow[last_id].id = seg_id;
+ segring_req = RING_GET_REQUEST(&info->segring,
info->segring.req_prod_pvt);
+ segring_req->gref = copy->req.u.rw.seg[j].gref;
+ segring_req->first_sect =
copy->req.u.rw.seg[j].first_sect;
+ segring_req->last_sect =
copy->req.u.rw.seg[j].last_sect;
+ info->seg_shadow[seg_id].req = *segring_req;
+ info->seg_shadow[seg_id].frame = copy->frame[j];
+ info->segring.req_prod_pvt++;
+ gnttab_grant_foreign_access_ref(
+ segring_req->gref,
+ info->xbdev->otherend_id,
+ pfn_to_mfn(copy->frame[j]),
+ rq_data_dir(copy->request));
+ last_id = seg_id;
+ }
+ info->req_shadow[req->u.rw.id].req = *req;
+ info->req_shadow[req->u.rw.id].request = copy->request;
+ }
+
+ info->reqring.req_prod_pvt++;
+ }
+
+ /* need update the queue limit setting */
+ update_blk_queue(info);
+
+ /* free original shadow*/
+ kfree(info->shadow);
+
+ xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+ spin_lock_irq(&info->io_lock);
+
+ /* Now safe for us to use the shared ring */
+ info->connected = BLKIF_STATE_CONNECTED;
+
+ /* Send off requeued requests */
+ flush_requests(info);
+
+ /* Kick any other new requests queued since we resumed */
+ kick_pending_request_queues(info);
+
+ spin_unlock_irq(&info->io_lock);
+
+ return 0;
+}
+
+static int recover_from_v2_to_v2(struct blkfront_info *info)
+{
+ int i;
+ struct blkif_request_header *req;
+ struct blkif_request_segment *segring_req;
+ struct blk_req_shadow *copy;
+ struct blk_seg_shadow *seg_copy;
+ unsigned long index = 0x0fffffff, seg_id, last_id = 0x0fffffff;
+ int j;
+ unsigned int req_rs, seg_rs;
+ unsigned long flags;
+
+ req_rs = BLK_REQ_RING_SIZE;
+ seg_rs = BLK_SEG_RING_SIZE;
+
+ /* Stage 1: Make a safe copy of the shadow state. */
+ copy = kmalloc(sizeof(struct blk_req_shadow) * req_rs,
+ GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
+ if (!copy)
+ return -ENOMEM;
+
+ seg_copy = kmalloc(sizeof(struct blk_seg_shadow) * seg_rs,
+ GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
+ if (!seg_copy ) {
+ kfree(copy);
+ return -ENOMEM;
+ }
+
+ memcpy(copy, info->req_shadow, sizeof(struct blk_req_shadow) * req_rs);
+ memcpy(seg_copy, info->seg_shadow,
+ sizeof(struct blk_seg_shadow) * seg_rs);
+
+ /* Stage 2: Set up free list. */
+ for (i = 0; i < req_rs; i++)
+ info->req_shadow[i].req.u.rw.id = i+1;
+ info->req_shadow[req_rs - 1].req.u.rw.id = 0x0fffffff;
+
+ for (i = 0; i < seg_rs; i++)
+ info->seg_shadow[i].id = i+1;
+ info->seg_shadow[seg_rs - 1].id = 0x0fffffff;
+
+ info->shadow_free = info->reqring.req_prod_pvt;
+ info->seg_shadow_free = info->segring.req_prod_pvt;
+
+ /* Stage 3: Find pending requests and requeue them. */
+ for (i = 0; i < req_rs; i++) {
+ /* Not in use? */
+ if (!copy[i].request)
+ continue;
+
+ req = RING_GET_REQUEST(&info->reqring,
info->reqring.req_prod_pvt);
+ *req = copy[i].req;
+
+ req->u.rw.id = get_id_from_freelist_v2(info);
+ memcpy(&info->req_shadow[req->u.rw.id], ©[i],
sizeof(copy[i]));
+
+ if (req->operation != BLKIF_OP_DISCARD) {
+ for (j = 0; j < req->u.rw.nr_segments; j++) {
+ seg_id = get_seg_shadow_id(info);
+ if (j == 0)
+ index = req->u.rw.seg_id;
+ else
+ index = seg_copy[index].id ;
+ gnttab_grant_foreign_access_ref(
+ seg_copy[index].req.gref,
+ info->xbdev->otherend_id,
+ pfn_to_mfn(seg_copy[index].frame),
+
rq_data_dir(info->req_shadow[req->u.rw.id].request));
+ segring_req = RING_GET_REQUEST(&info->segring,
info->segring.req_prod_pvt);
+ memcpy(segring_req, &(seg_copy[index].req),
+ sizeof(struct blkif_request_segment));
+ if (j == 0)
+ req->u.rw.seg_id = seg_id;
+ else
+ info->seg_shadow[last_id].id = seg_id;
+
+ memcpy(&info->seg_shadow[seg_id],
+ &seg_copy[index], sizeof(struct
blk_seg_shadow));
+ info->segring.req_prod_pvt++;
+ last_id = seg_id;
+ }
+ }
+ info->req_shadow[req->u.rw.id].req = *req;
+
+ info->reqring.req_prod_pvt++;
+ }
+
+ kfree(seg_copy);
+ kfree(copy);
+
+ xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+ spin_lock_irqsave(&info->io_lock, flags);
+
+ /* Now safe for us to use the shared ring */
+ info->connected = BLKIF_STATE_CONNECTED;
+
+ /* Send off requeued requests */
+ flush_requests(info);
+
+ /* Kick any other new requests queued since we resumed */
+ kick_pending_request_queues(info);
+
+ spin_unlock_irqrestore(&info->io_lock, flags);
+
+ return 0;
+}
+
+static int blkif_recover_v2(struct blkfront_info *info)
+{
+ int rc;
+
+ if (info->ring_type == RING_TYPE_1)
+ rc = recover_from_v1_to_v2(info);
+ else if (info->ring_type == RING_TYPE_2)
+ rc = recover_from_v2_to_v2(info);
+ else
+ rc = -EPERM;
+ return rc;
+}
/**
* We are reconnecting to the backend, due to a suspend/resume, or a backend
* driver restart. We tear down our blkif structure and recreate it, but
@@ -1609,15 +2371,44 @@ static struct blk_front_operations blk_front_ops = {
.update_rsp_event = update_rsp_event,
.update_rsp_cons = update_rsp_cons,
.update_req_prod_pvt = update_req_prod_pvt,
+ .update_segment_rsp_cons = update_segment_rsp_cons,
.ring_push = ring_push,
.recover = blkif_recover,
.ring_full = ring_full,
+ .segring_full = segring_full,
.setup_blkring = setup_blkring,
.free_blkring = free_blkring,
.blkif_completion = blkif_completion,
.max_seg = BLKIF_MAX_SEGMENTS_PER_REQUEST,
};
+static struct blk_front_operations blk_front_ops_v2 = {
+ .ring_get_request = ring_get_request_v2,
+ .ring_get_response = ring_get_response_v2,
+ .ring_get_segment = ring_get_segment_v2,
+ .get_id = get_id_from_freelist_v2,
+ .add_id = add_id_to_freelist_v2,
+ .save_seg_shadow = save_seg_shadow_v2,
+ .save_req_shadow = save_req_shadow_v2,
+ .get_req_from_shadow = get_req_from_shadow_v2,
+ .get_rsp_prod = get_rsp_prod_v2,
+ .get_rsp_cons = get_rsp_cons_v2,
+ .get_req_prod_pvt = get_req_prod_pvt_v2,
+ .check_left_response = check_left_response_v2,
+ .update_rsp_event = update_rsp_event_v2,
+ .update_rsp_cons = update_rsp_cons_v2,
+ .update_req_prod_pvt = update_req_prod_pvt_v2,
+ .update_segment_rsp_cons = update_segment_rsp_cons_v2,
+ .ring_push = ring_push_v2,
+ .recover = blkif_recover_v2,
+ .ring_full = ring_full_v2,
+ .segring_full = segring_full_v2,
+ .setup_blkring = setup_blkring_v2,
+ .free_blkring = free_blkring_v2,
+ .blkif_completion = blkif_completion_v2,
+ .max_seg = BLKIF_MAX_SEGMENTS_PER_REQUEST_V2,
+};
+
static const struct block_device_operations xlvbd_block_fops =
{
.owner = THIS_MODULE,
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index f100ce2..a5a98b0 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -475,7 +475,7 @@ void gnttab_end_foreign_access(grant_ref_t ref, int
readonly,
/* XXX This needs to be fixed so that the ref and page are
placed on a list to be freed up later. */
printk(KERN_WARNING
- "WARNING: leaking g.e. and page still in use!\n");
+ "WARNING: ref %u leaking g.e. and page still in use!\n",
ref);
}
}
EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index ee338bf..763489a 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -108,6 +108,7 @@ typedef uint64_t blkif_sector_t;
* NB. This could be 12 if the ring indexes weren't stored in the same page.
*/
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+#define BLKIF_MAX_SEGMENTS_PER_REQUEST_V2 128
struct blkif_request_rw {
uint8_t nr_segments; /* number of segments */
@@ -125,6 +126,17 @@ struct blkif_request_rw {
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
} __attribute__((__packed__));
+struct blkif_request_rw_header {
+ uint8_t nr_segments; /* number of segments */
+ blkif_vdev_t handle; /* only for read/write requests */
+#ifdef CONFIG_X86_64
+ uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */
+#endif
+ uint64_t id; /* private guest value, echoed in resp */
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ uint64_t seg_id; /* segment id in the segment shadow */
+} __attribute__((__packed__));
+
struct blkif_request_discard {
uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */
#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */
@@ -135,7 +147,6 @@ struct blkif_request_discard {
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;
uint64_t nr_sectors;
- uint8_t _pad3;
} __attribute__((__packed__));
struct blkif_request {
@@ -146,12 +157,24 @@ struct blkif_request {
} u;
} __attribute__((__packed__));
+struct blkif_request_header {
+ uint8_t operation; /* BLKIF_OP_??? */
+ union {
+ struct blkif_request_rw_header rw;
+ struct blkif_request_discard discard;
+ } u;
+} __attribute__((__packed__));
+
struct blkif_response {
uint64_t id; /* copied from request */
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};
+struct blkif_response_segment {
+ char dummy;
+} __attribute__((__packed__));
+
/*
* STATUS RETURN CODES.
*/
@@ -167,6 +190,8 @@ struct blkif_response {
*/
DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
+DEFINE_RING_TYPES(blkif_request, struct blkif_request_header, struct
blkif_response);
+DEFINE_RING_TYPES(blkif_segment, struct blkif_request_segment, struct
blkif_response_segment);
#define VDISK_CDROM 0x1
#define VDISK_REMOVABLE 0x2
-ronghui
Attachment:
vbd_enlarge_segments_02.patch _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |