[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
On Thu, Jul 21, 2016 at 06:08:05PM +0800, Bob Liu wrote: > > On 07/21/2016 04:57 PM, Roger Pau Monné wrote: > > On Fri, Jul 15, 2016 at 05:31:49PM +0800, Bob Liu wrote: > >> The current VBD layer reserves buffer space for each attached device based > >> on > >> three statically configured settings which are read at boot time. > >> * max_indirect_segs: Maximum amount of segments. > >> * max_ring_page_order: Maximum order of pages to be used for the shared > >> ring. > >> * max_queues: Maximum of queues(rings) to be used. > >> > >> But the storage backend, workload, and guest memory result in very > >> different > >> tuning requirements. It's impossible to centrally predict application > >> characteristics so it's best to leave allow the settings can be dynamiclly > >> adjusted based on workload inside the Guest. > >> > >> Usage: > >> Show current values: > >> cat /sys/devices/vbd-xxx/max_indirect_segs > >> cat /sys/devices/vbd-xxx/max_ring_page_order > >> cat /sys/devices/vbd-xxx/max_queues > >> > >> Write new values: > >> echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs > >> echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order > >> echo <new value> > /sys/devices/vbd-xxx/max_queues > >> > >> Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx> > >> -- > >> v2: Add device lock and other comments from Konrad. > >> --- > >> drivers/block/xen-blkfront.c | 285 > >> ++++++++++++++++++++++++++++++++++++++++++- > >> 1 file changed, 283 insertions(+), 2 deletions(-) > >> > >> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > >> index 10f46a8..9a5ed22 100644 > >> --- a/drivers/block/xen-blkfront.c > >> +++ b/drivers/block/xen-blkfront.c > >> @@ -46,6 +46,7 @@ > >> #include <linux/scatterlist.h> > >> #include <linux/bitmap.h> > >> #include <linux/list.h> > >> +#include <linux/delay.h> > >> > >> #include <xen/xen.h> > >> #include <xen/xenbus.h> > >> @@ -212,6 +213,11 @@ struct blkfront_info > >> /* Save uncomplete reqs and bios for migration. */ > >> struct list_head requests; > >> struct bio_list bio_list; > >> + /* For dynamic configuration. */ > >> + unsigned int reconfiguring:1; > >> + int new_max_indirect_segments; > >> + int new_max_ring_page_order; > >> + int new_max_queues; > >> }; > >> > >> static unsigned int nr_minors; > >> @@ -1350,6 +1356,31 @@ static void blkif_free(struct blkfront_info *info, > >> int suspend) > >> for (i = 0; i < info->nr_rings; i++) > >> blkif_free_ring(&info->rinfo[i]); > >> > >> + /* Remove old xenstore nodes. */ > >> + if (info->nr_ring_pages > 1) > >> + xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order"); > >> + > >> + if (info->nr_rings == 1) { > >> + if (info->nr_ring_pages == 1) { > >> + xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref"); > >> + } else { > >> + for (i = 0; i < info->nr_ring_pages; i++) { > >> + char ring_ref_name[RINGREF_NAME_LEN]; > >> + > >> + snprintf(ring_ref_name, RINGREF_NAME_LEN, > >> "ring-ref%u", i); > >> + xenbus_rm(XBT_NIL, info->xbdev->nodename, > >> ring_ref_name); > >> + } > >> + } > >> + } else { > >> + xenbus_rm(XBT_NIL, info->xbdev->nodename, > >> "multi-queue-num-queues"); > >> + > >> + for (i = 0; i < info->nr_rings; i++) { > >> + char queuename[QUEUE_NAME_LEN]; > >> + > >> + snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i); > >> + xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename); > >> + } > >> + } > >> kfree(info->rinfo); > >> info->rinfo = NULL; > >> info->nr_rings = 0; > >> @@ -1772,6 +1803,10 @@ static int talk_to_blkback(struct xenbus_device > >> *dev, > >> info->nr_ring_pages = 1; > >> else { > >> ring_page_order = min(xen_blkif_max_ring_order, max_page_order); > >> + if (info->new_max_ring_page_order) { > > > > Instead of calling this "new_max_ring_page_order", could you just call it > > max_ring_page_order, iniitalize it to xen_blkif_max_ring_order by default > > > Sure, I can do that. > > > > and use it everywhere instead of xen_blkif_max_ring_order? > > > But "xen_blkif_max_ring_order" still have to be used here, this is the only > place "xen_blkif_max_ring_order" is used(except checking the value of it in > xlblk_init()). > > > > > >> + BUG_ON(info->new_max_ring_page_order > max_page_order); > >> + ring_page_order = info->new_max_ring_page_order; > >> + } > >> info->nr_ring_pages = 1 << ring_page_order; > >> } > >> > >> @@ -1895,6 +1930,10 @@ static int negotiate_mq(struct blkfront_info *info) > >> backend_max_queues = 1; > >> > >> info->nr_rings = min(backend_max_queues, xen_blkif_max_queues); > >> + if (info->new_max_queues) { > > > > Same here IMHO, this is going to make the code flow slightly easier to > > understand. > > > >> + BUG_ON(info->new_max_queues > backend_max_queues); > >> + info->nr_rings = info->new_max_queues; > >> + } > >> /* We need at least one ring. */ > >> if (!info->nr_rings) > >> info->nr_rings = 1; > >> @@ -2352,11 +2391,227 @@ static void > >> blkfront_gather_backend_features(struct blkfront_info *info) > >> NULL); > >> if (err) > >> info->max_indirect_segments = 0; > >> - else > >> + else { > >> info->max_indirect_segments = min(indirect_segments, > >> xen_blkif_max_segments); > >> + if (info->new_max_indirect_segments) { > >> + BUG_ON(info->new_max_indirect_segments > > >> indirect_segments); > >> + info->max_indirect_segments = > >> info->new_max_indirect_segments; > >> + } > >> + } > >> +} > >> + > >> +static ssize_t max_ring_page_order_show(struct device *dev, > >> + struct device_attribute *attr, char > >> *page) > >> +{ > >> + struct blkfront_info *info = dev_get_drvdata(dev); > >> + > >> + return sprintf(page, "%u\n", get_order(info->nr_ring_pages * > >> XEN_PAGE_SIZE)); > >> +} > >> + > >> +static ssize_t max_indirect_segs_show(struct device *dev, > >> + struct device_attribute *attr, char *page) > >> +{ > >> + struct blkfront_info *info = dev_get_drvdata(dev); > >> + > >> + return sprintf(page, "%u\n", info->max_indirect_segments); > >> +} > >> + > >> +static ssize_t max_queues_show(struct device *dev, > >> + struct device_attribute *attr, char *page) > >> +{ > >> + struct blkfront_info *info = dev_get_drvdata(dev); > >> + > >> + return sprintf(page, "%u\n", info->nr_rings); > >> +} > >> + > >> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, > >> ssize_t count) > >> +{ > >> + unsigned int i; > >> + int err = -EBUSY; > >> + > >> + /* > >> + * Make sure no migration in parallel, device lock is actually a > >> + * mutex. > >> + */ > >> + if (!device_trylock(&info->xbdev->dev)) { > >> + pr_err("Fail to acquire dev:%s lock, may be in migration.\n", > >> + dev_name(&info->xbdev->dev)); > >> + return err; > >> + } > >> + > >> + /* > >> + * Prevent new requests and guarantee no uncompleted reqs. > >> + */ > >> + blk_mq_freeze_queue(info->rq); > >> + if (part_in_flight(&info->gd->part0)) > >> + goto out; > >> + > >> + /* > >> + * Front Backend > >> + * Switch to XenbusStateClosed > >> + * frontend_changed(): > >> + * case XenbusStateClosed: > >> + * xen_blkif_disconnect() > >> + * Switch to > >> XenbusStateClosed > >> + * blkfront_resume(): > >> + * frontend_changed(): > >> + * reconnect > >> + * Wait until XenbusStateConnected > >> + */ > >> + info->reconfiguring = true; > >> + xenbus_switch_state(info->xbdev, XenbusStateClosed); > >> + > >> + /* Poll every 100ms, 1 minute timeout. */ > >> + for (i = 0; i < 600; i++) { > >> + /* > >> + * Wait backend enter XenbusStateClosed, blkback_changed() > >> + * will clear reconfiguring. > >> + */ > >> + if (!info->reconfiguring) > >> + goto resume; > >> + schedule_timeout_interruptible(msecs_to_jiffies(100)); > >> + } > > > > Instead of having this wait, could you just set info->reconfiguring = 1, > > set > > the frontend state to XenbusStateClosed and mimic exactly what a resume > > from > > suspension does? blkback_changed would have to set the frontend state to > > InitWait when it detects that the backend has switched to Closed, and call > > blkfront_resume. > > > I think that won't work. > In the real "resume" case, the power management system will trigger all > ->resume() path. > But there is no place for dynamic configuration. Hello, I think it should be possible to set info->reconfiguring and wait for the backend to switch to state Closed, at that point we should call blkif_resume (from blkback_changed) and the backend will follow the reconection. Roger. _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |