[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 for-xen-4.5] xen_disk: fix unmapping of persistent grants



On Thu, 13 Nov 2014, Roger Pau Monne wrote:
> This patch fixes two issues with persistent grants and the disk PV backend
> (Qdisk):
> 
>  - Keep track of memory regions where persistent grants have been mapped
>    since we need to unmap them as a whole. It is not possible to unmap a
>    single grant if it has been batch-mapped.
>  - Unmap persistent grants before switching to the closed state, so the
>    frontend can also free them.
> 
> Signed-off-by: Roger Pau Monnà <roger.pau@xxxxxxxxxx>
> Reported-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
> Cc: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
> Cc: Kevin Wolf <kwolf@xxxxxxxxxx>
> Cc: Stefan Hajnoczi <stefanha@xxxxxxxxxx>
> Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
> Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
> ---
> Xen release exception: this is obviously an important bug-fix that should be
> included in 4.5. Without this fix, trying to do a block-detach of a Qdisk
> from a running guest can lead to guest crash depending on the blkfront
> implementation.
> ---
> Changes since v1:
>  - Instead of disabling batch mappings when using persistent grants, keep
>    track of the persistently mapped regions and unmap them on disconnection.
>    This prevents unmapping single persistent grants, but the current
>    implementation doesn't require it.
> 
> This new version is slightly faster than the previous one, the following
> test results are in IOPS from 20 runs of a block-attach, fio run,
> block-detach test loop:
> 
> x batch
> + no_batch
> +----------------------------------------------------------------------+
> |                                      +   +     x    x + + +  x xx x  |
> |+  +           x                     x+  *+++   *  +x* +++x*  x xx x *|
> |                                          |_____________A_____M______||
> |                            |________________A_____M___________|      |
> +----------------------------------------------------------------------+
>     N           Min           Max        Median           Avg        Stddev
> x  20         52941         63822         62396      61180.15     2673.6497
> +  20         49967         63849         60322       59116.9     3456.3455
> Difference at 95.0% confidence
>       -2063.25 +/- 1977.66
>       -3.37242% +/- 3.23252%
>       (Student's t, pooled s = 3089.88)

Not too bad! Thanks!


>  hw/block/xen_disk.c | 77 
> ++++++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 64 insertions(+), 13 deletions(-)
> 
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 231e9a7..75bdc16 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -59,6 +59,13 @@ struct PersistentGrant {
>  
>  typedef struct PersistentGrant PersistentGrant;
>  
> +struct PersistentRegion {
> +    void *addr;
> +    int num;
> +};
> +
> +typedef struct PersistentRegion PersistentRegion;
> +
>  struct ioreq {
>      blkif_request_t     req;
>      int16_t             status;
> @@ -118,6 +125,7 @@ struct XenBlkDev {
>      gboolean            feature_discard;
>      gboolean            feature_persistent;
>      GTree               *persistent_gnts;
> +    GSList              *persistent_regions;
>      unsigned int        persistent_gnt_count;
>      unsigned int        max_grants;
>  
> @@ -164,19 +172,41 @@ static gint int_cmp(gconstpointer a, gconstpointer b, 
> gpointer user_data)
>  static void destroy_grant(gpointer pgnt)
>  {
>      PersistentGrant *grant = pgnt;
> -    XenGnttab gnt = grant->blkdev->xendev.gnttabdev;
>  
> -    if (xc_gnttab_munmap(gnt, grant->page, 1) != 0) {
> -        xen_be_printf(&grant->blkdev->xendev, 0,
> -                      "xc_gnttab_munmap failed: %s\n",
> -                      strerror(errno));
> -    }
>      grant->blkdev->persistent_gnt_count--;
>      xen_be_printf(&grant->blkdev->xendev, 3,
> -                  "unmapped grant %p\n", grant->page);
> +                  "removed grant %p\n", grant->page);
>      g_free(grant);
>  }
>  
> +static void add_persistent_region(struct XenBlkDev *blkdev, void *addr, int 
> num)
> +{
> +    PersistentRegion *region;
> +
> +    region = g_malloc0(sizeof(*region));
> +    region->addr = addr;
> +    region->num = num;
> +    blkdev->persistent_regions = g_slist_append(blkdev->persistent_regions,
> +                                                region);
> +}
> +
> +static void remove_persistent_region(gpointer data, gpointer dev)
> +{
> +    PersistentRegion *region = data;
> +    struct XenBlkDev *blkdev = dev;
> +    XenGnttab gnt = blkdev->xendev.gnttabdev;
> +
> +    if (xc_gnttab_munmap(gnt, region->addr, region->num) != 0) {
> +        xen_be_printf(&blkdev->xendev, 0,
> +                      "xc_gnttab_munmap region %p failed: %s\n",
> +                      region->addr, strerror(errno));
> +    }
> +    xen_be_printf(&blkdev->xendev, 3,
> +                  "unmapped grant region %p with %d pages\n",
> +                  region->addr, region->num);
> +    g_free(region);
> +}
> +
>  static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
>  {
>      struct ioreq *ioreq = NULL;
> @@ -421,7 +451,17 @@ static int ioreq_map(struct ioreq *ioreq)
>              }
>          }
>      }
> -    if (ioreq->blkdev->feature_persistent) {
> +    if (ioreq->blkdev->feature_persistent && new_maps &&
> +        (!batch_maps || (ioreq->blkdev->persistent_gnt_count + new_maps <=
> +        ioreq->blkdev->max_grants))) {

Do we really need this change? If so, could you please write something
about it in the commit message?


> +        /*
> +         * If we are using persistent grants and batch mappings only
> +         * add the new maps to the list of persistent grants if the whole
> +         * area can be persistently mapped.
> +         */

Is this the reason for the change above?


> +        if (batch_maps && new_maps) {
> +            add_persistent_region(ioreq->blkdev, ioreq->pages, new_maps);
> +        }
>
>          while ((ioreq->blkdev->persistent_gnt_count < 
> ioreq->blkdev->max_grants)
>                && new_maps) {
>              /* Go through the list of newly mapped grants and add as many
> @@ -437,6 +477,7 @@ static int ioreq_map(struct ioreq *ioreq)
>                  grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE;
>              } else {
>                  grant->page = ioreq->page[new_maps];
> +                add_persistent_region(ioreq->blkdev, ioreq->page[new_maps], 
> 1);

Basically in the !batch_maps case we are duplicating persistent_gnts
into persistent_regions. Couldn't we just avoid calling
add_persistent_region at all in that case and rely on the old
destroy_grant function? In fact I think we could just pass NULL as
GDestroyNotify function when creating persistent_gnts if batch_maps and
the old unmodified destroy_grant if !batch_maps.

>              }
>              grant->blkdev = ioreq->blkdev;
>              xen_be_printf(&ioreq->blkdev->xendev, 3,
> @@ -447,6 +488,7 @@ static int ioreq_map(struct ioreq *ioreq)
>                            grant);
>              ioreq->blkdev->persistent_gnt_count++;
>          }
> +        assert(!batch_maps || new_maps == 0);

Shouldn't new_maps be == 0 even in the !batch_maps case?


>      }
>      for (i = 0; i < ioreq->v.niov; i++) {
>          ioreq->v.iov[i].iov_base += (uintptr_t)page[i];
> @@ -972,6 +1014,7 @@ static int blk_connect(struct XenDevice *xendev)
>          blkdev->persistent_gnts = g_tree_new_full((GCompareDataFunc)int_cmp,
>                                               NULL, NULL,
>                                               (GDestroyNotify)destroy_grant);
> +        blkdev->persistent_regions = NULL;
>          blkdev->persistent_gnt_count = 0;
>      }
>  
> @@ -1000,6 +1043,19 @@ static void blk_disconnect(struct XenDevice *xendev)
>          blkdev->cnt_map--;
>          blkdev->sring = NULL;
>      }
> +
> +    /*
> +     * Unmap persistent grants before switching to the closed state
> +     * so the frontend can free them.
> +     */
> +    if (blkdev->feature_persistent) {
> +        g_tree_destroy(blkdev->persistent_gnts);
> +        assert(blkdev->persistent_gnt_count == 0);
> +        g_slist_foreach(blkdev->persistent_regions,
> +                        (GFunc)remove_persistent_region, blkdev);
> +        g_slist_free(blkdev->persistent_regions);
> +        blkdev->feature_persistent = false;
> +    }
>  }
>  
>  static int blk_free(struct XenDevice *xendev)
> @@ -1011,11 +1067,6 @@ static int blk_free(struct XenDevice *xendev)
>          blk_disconnect(xendev);
>      }
>  
> -    /* Free persistent grants */
> -    if (blkdev->feature_persistent) {
> -        g_tree_destroy(blkdev->persistent_gnts);
> -    }
> -
>      while (!QLIST_EMPTY(&blkdev->freelist)) {
>          ioreq = QLIST_FIRST(&blkdev->freelist);
>          QLIST_REMOVE(ioreq, list);
> -- 
> 1.9.3 (Apple Git-50)
> 
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.