[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources



On Fri, Jul 15, 2016 at 05:31:49PM +0800, Bob Liu wrote:
> The current VBD layer reserves buffer space for each attached device based on
> three statically configured settings which are read at boot time.
>  * max_indirect_segs: Maximum amount of segments.
>  * max_ring_page_order: Maximum order of pages to be used for the shared ring.
>  * max_queues: Maximum of queues(rings) to be used.
> 
> But the storage backend, workload, and guest memory result in very different
> tuning requirements. It's impossible to centrally predict application
> characteristics so it's best to leave allow the settings can be dynamiclly
> adjusted based on workload inside the Guest.
> 
> Usage:
> Show current values:
> cat /sys/devices/vbd-xxx/max_indirect_segs
> cat /sys/devices/vbd-xxx/max_ring_page_order
> cat /sys/devices/vbd-xxx/max_queues
> 
> Write new values:
> echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs
> echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order
> echo <new value> > /sys/devices/vbd-xxx/max_queues
> 
> Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx>
> --
> v2: Add device lock and other comments from Konrad.
> ---
>  drivers/block/xen-blkfront.c | 285 
> ++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 283 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 10f46a8..9a5ed22 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -46,6 +46,7 @@
>  #include <linux/scatterlist.h>
>  #include <linux/bitmap.h>
>  #include <linux/list.h>
> +#include <linux/delay.h>
>  
>  #include <xen/xen.h>
>  #include <xen/xenbus.h>
> @@ -212,6 +213,11 @@ struct blkfront_info
>       /* Save uncomplete reqs and bios for migration. */
>       struct list_head requests;
>       struct bio_list bio_list;
> +     /* For dynamic configuration. */
> +     unsigned int reconfiguring:1;
> +     int new_max_indirect_segments;
> +     int new_max_ring_page_order;
> +     int new_max_queues;
>  };
>  
>  static unsigned int nr_minors;
> @@ -1350,6 +1356,31 @@ static void blkif_free(struct blkfront_info *info, int 
> suspend)
>       for (i = 0; i < info->nr_rings; i++)
>               blkif_free_ring(&info->rinfo[i]);
>  
> +     /* Remove old xenstore nodes. */
> +     if (info->nr_ring_pages > 1)
> +             xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order");
> +
> +     if (info->nr_rings == 1) {
> +             if (info->nr_ring_pages == 1) {
> +                     xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref");
> +             } else {
> +                     for (i = 0; i < info->nr_ring_pages; i++) {
> +                             char ring_ref_name[RINGREF_NAME_LEN];
> +
> +                             snprintf(ring_ref_name, RINGREF_NAME_LEN, 
> "ring-ref%u", i);
> +                             xenbus_rm(XBT_NIL, info->xbdev->nodename, 
> ring_ref_name);
> +                     }
> +             }
> +     } else {
> +             xenbus_rm(XBT_NIL, info->xbdev->nodename, 
> "multi-queue-num-queues");
> +
> +             for (i = 0; i < info->nr_rings; i++) {
> +                     char queuename[QUEUE_NAME_LEN];
> +
> +                     snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i);
> +                     xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename);
> +             }
> +     }
>       kfree(info->rinfo);
>       info->rinfo = NULL;
>       info->nr_rings = 0;
> @@ -1772,6 +1803,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
>               info->nr_ring_pages = 1;
>       else {
>               ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
> +             if (info->new_max_ring_page_order) {

Instead of calling this "new_max_ring_page_order", could you just call it 
max_ring_page_order, iniitalize it to xen_blkif_max_ring_order by default 
and use it everywhere instead of xen_blkif_max_ring_order?

> +                     BUG_ON(info->new_max_ring_page_order > max_page_order);
> +                     ring_page_order = info->new_max_ring_page_order;
> +             }
>               info->nr_ring_pages = 1 << ring_page_order;
>       }
>  
> @@ -1895,6 +1930,10 @@ static int negotiate_mq(struct blkfront_info *info)
>               backend_max_queues = 1;
>  
>       info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
> +     if (info->new_max_queues) {

Same here IMHO, this is going to make the code flow slightly easier to 
understand.

> +             BUG_ON(info->new_max_queues > backend_max_queues);
> +             info->nr_rings = info->new_max_queues;
> +     }
>       /* We need at least one ring. */
>       if (!info->nr_rings)
>               info->nr_rings = 1;
> @@ -2352,11 +2391,227 @@ static void blkfront_gather_backend_features(struct 
> blkfront_info *info)
>                           NULL);
>       if (err)
>               info->max_indirect_segments = 0;
> -     else
> +     else {
>               info->max_indirect_segments = min(indirect_segments,
>                                                 xen_blkif_max_segments);
> +             if (info->new_max_indirect_segments) {
> +                     BUG_ON(info->new_max_indirect_segments > 
> indirect_segments);
> +                     info->max_indirect_segments = 
> info->new_max_indirect_segments;
> +             }
> +     }
> +}
> +
> +static ssize_t max_ring_page_order_show(struct device *dev,
> +                                     struct device_attribute *attr, char 
> *page)
> +{
> +     struct blkfront_info *info = dev_get_drvdata(dev);
> +
> +     return sprintf(page, "%u\n", get_order(info->nr_ring_pages * 
> XEN_PAGE_SIZE));
> +}
> +
> +static ssize_t max_indirect_segs_show(struct device *dev,
> +                                   struct device_attribute *attr, char *page)
> +{
> +     struct blkfront_info *info = dev_get_drvdata(dev);
> +
> +     return sprintf(page, "%u\n", info->max_indirect_segments);
> +}
> +
> +static ssize_t max_queues_show(struct device *dev,
> +                            struct device_attribute *attr, char *page)
> +{
> +     struct blkfront_info *info = dev_get_drvdata(dev);
> +
> +     return sprintf(page, "%u\n", info->nr_rings);
> +}
> +
> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t 
> count)
> +{
> +     unsigned int i;
> +     int err = -EBUSY;
> +
> +     /*
> +      * Make sure no migration in parallel, device lock is actually a
> +      * mutex.
> +      */
> +     if (!device_trylock(&info->xbdev->dev)) {
> +             pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
> +                     dev_name(&info->xbdev->dev));
> +             return err;
> +     }
> +
> +     /*
> +      * Prevent new requests and guarantee no uncompleted reqs.
> +      */
> +     blk_mq_freeze_queue(info->rq);
> +     if (part_in_flight(&info->gd->part0))
> +             goto out;
> +
> +     /*
> +      * Front                                Backend
> +      * Switch to XenbusStateClosed
> +      *                                      frontend_changed():
> +      *                                       case XenbusStateClosed:
> +      *                                              xen_blkif_disconnect()
> +      *                                              Switch to 
> XenbusStateClosed
> +      * blkfront_resume():
> +      *                                      frontend_changed():
> +      *                                              reconnect
> +      * Wait until XenbusStateConnected
> +      */
> +     info->reconfiguring = true;
> +     xenbus_switch_state(info->xbdev, XenbusStateClosed);
> +
> +     /* Poll every 100ms, 1 minute timeout. */
> +     for (i = 0; i < 600; i++) {
> +             /*
> +              * Wait backend enter XenbusStateClosed, blkback_changed()
> +              * will clear reconfiguring.
> +              */
> +             if (!info->reconfiguring)
> +                     goto resume;
> +             schedule_timeout_interruptible(msecs_to_jiffies(100));
> +     }

Instead of having this wait, could you just set info->reconfiguring = 1, set 
the frontend state to XenbusStateClosed and mimic exactly what a resume from 
suspension does? blkback_changed would have to set the frontend state to 
InitWait when it detects that the backend has switched to Closed, and call 
blkfront_resume.

> +     goto out;
> +
> +resume:
> +     if (blkfront_resume(info->xbdev))
> +             goto out;
> +
> +     /* Poll every 100ms, 1 minute timeout. */
> +     for (i = 0; i < 600; i++) {
> +             /* Wait blkfront enter StateConnected which is done by 
> blkif_recover(). */
> +             if (info->xbdev->state == XenbusStateConnected) {
> +                     err = count;
> +                     goto out;
> +             }
> +             schedule_timeout_interruptible(msecs_to_jiffies(100));
> +     }

Same here, IMHO all this should be much more similar to a resume, and you 
shouldn't need all this wait loops.

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.