[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 3/3] tools/libxc: use superpages during restore of HVM guest



On Thu, Aug 17, 2017 at 07:01:33PM +0200, Olaf Hering wrote:
[...]
> diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c 
> b/tools/libxc/xc_sr_restore_x86_hvm.c
> index 1dca85354a..60454148db 100644
> --- a/tools/libxc/xc_sr_restore_x86_hvm.c
> +++ b/tools/libxc/xc_sr_restore_x86_hvm.c
> @@ -135,6 +135,8 @@ static int x86_hvm_localise_page(struct xc_sr_context 
> *ctx,
>  static int x86_hvm_setup(struct xc_sr_context *ctx)
>  {
>      xc_interface *xch = ctx->xch;
> +    struct xc_sr_bitmap *bm;
> +    unsigned long bits;
>  
>      if ( ctx->restore.guest_type != DHDR_TYPE_X86_HVM )
>      {
> @@ -149,7 +151,30 @@ static int x86_hvm_setup(struct xc_sr_context *ctx)
>          return -1;
>      }
>  
> +    bm = &ctx->x86_hvm.restore.attempted_1g;
> +    bits = (ctx->restore.p2m_size >> SUPERPAGE_1GB_SHIFT) + 1;
> +    if ( xc_sr_bitmap_resize(bm, bits) == false )
> +        goto out;
> +
> +    bm = &ctx->x86_hvm.restore.attempted_2m;
> +    bits = (ctx->restore.p2m_size >> SUPERPAGE_2MB_SHIFT) + 1;
> +    if ( xc_sr_bitmap_resize(bm, bits) == false )
> +        goto out;
> +
> +    bm = &ctx->x86_hvm.restore.allocated_pfns;
> +    bits = ctx->restore.p2m_size + 1;
> +    if ( xc_sr_bitmap_resize(bm, bits) == false )
> +        goto out;
> +
> +    /* No superpage in 1st 2MB due to VGA hole */
> +    xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_1g);
> +    xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_2m);
> +

I don't quite get this. What about other holes such as MMIO?

>      return 0;
> +
> +out:
> +    ERROR("Unable to allocate memory for pfn bitmaps");
> +    return -1;
>  }
>  
>  /*
> @@ -224,10 +249,164 @@ static int x86_hvm_stream_complete(struct 
> xc_sr_context *ctx)
>  static int x86_hvm_cleanup(struct xc_sr_context *ctx)
>  {
>      free(ctx->x86_hvm.restore.context);
> +    xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_1g);
> +    xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_2m);
> +    xc_sr_bitmap_free(&ctx->x86_hvm.restore.allocated_pfns);
>  
>      return 0;
>  }
>  
> +/*
> + * Set a pfn as allocated, expanding the tracking structures if needed.
> + */
> +static int pfn_set_allocated(struct xc_sr_context *ctx, xen_pfn_t pfn)
> +{
> +    xc_interface *xch = ctx->xch;
> +
> +    if ( !xc_sr_set_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns) )
> +    {
> +        ERROR("Failed to realloc allocated_pfns bitmap");
> +        errno = ENOMEM;
> +        return -1;
> +    }
> +    return 0;
> +}
> +
> +/*
> + * Attempt to allocate a superpage where the pfn resides.
> + */
> +static int x86_hvm_allocate_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn)
> +{
> +    xc_interface *xch = ctx->xch;
> +    bool success = false;
> +    int rc = -1, done;
> +    unsigned int order;
> +    unsigned long i;
> +    unsigned long stat_1g = 0, stat_2m = 0, stat_4k = 0;
> +    unsigned long idx_1g, idx_2m;
> +    unsigned long count;
> +    xen_pfn_t base_pfn = 0, extnt;
> +
> +    if (xc_sr_test_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns))

Style is wrong here and in some other places.

> +        return 0;
> +
> +    idx_1g = pfn >> SUPERPAGE_1GB_SHIFT;
> +    idx_2m = pfn >> SUPERPAGE_2MB_SHIFT;
> +    if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_1g, idx_1g))
> +    {
> +        PERROR("Failed to realloc attempted_1g");
> +        return -1;
> +    }
> +    if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_2m, idx_2m))
> +    {
> +        PERROR("Failed to realloc attempted_2m");
> +        return -1;
> +    }
> +    DPRINTF("idx_1g %lu idx_2m %lu\n", idx_1g, idx_2m);
> +    if (!xc_sr_test_and_set_bit(idx_1g, &ctx->x86_hvm.restore.attempted_1g)) 
> {
> +        order = SUPERPAGE_1GB_SHIFT;
> +        count = 1UL << order;
> +        base_pfn = (pfn >> order) << order;
> +        extnt = base_pfn;
> +        done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, 
> &extnt);
> +        DPRINTF("1G base_pfn %" PRI_xen_pfn " done %d\n", base_pfn, done);
> +        if (done > 0) {
> +            struct xc_sr_bitmap *bm = &ctx->x86_hvm.restore.attempted_2m;
> +            success = true;
> +            stat_1g = done;
> +            for (i = 0; i < (count >> SUPERPAGE_2MB_SHIFT); i++)
> +                xc_sr_set_bit((base_pfn >> SUPERPAGE_2MB_SHIFT) + i, bm);
> +        }
> +    }
> +
> +    if (!xc_sr_test_and_set_bit(idx_2m, &ctx->x86_hvm.restore.attempted_2m)) 
> {
> +        order = SUPERPAGE_2MB_SHIFT;
> +        count = 1UL << order;
> +        base_pfn = (pfn >> order) << order;
> +        extnt = base_pfn;
> +        done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, 
> &extnt);
> +        DPRINTF("2M base_pfn %" PRI_xen_pfn " done %d\n", base_pfn, done);
> +        if (done > 0) {
> +            success = true;
> +            stat_2m = done;
> +        }
> +    }
> +    if (success == false) {
> +        count = 1;
> +        extnt = base_pfn = pfn;
> +        done = xc_domain_populate_physmap(xch, ctx->domid, count, 0, 0, 
> &extnt);
> +        if (done > 0) {
> +            success = true;
> +            stat_4k = count;
> +        }
> +    }
> +    DPRINTF("count %lu 1G %lu 2M %lu 4k %lu\n", count, stat_1g, stat_2m, 
> stat_4k);
> +    if (success == true) {
> +        do {
> +            count--;
> +            rc = pfn_set_allocated(ctx, base_pfn + count);
> +            if (rc)
> +                break;
> +        } while (count);
> +    }
> +    return rc;
> +}
> +
> +static int x86_hvm_populate_pfns(struct xc_sr_context *ctx, unsigned count,
> +                                 const xen_pfn_t *original_pfns,
> +                                 const uint32_t *types)
> +{
> +    xc_interface *xch = ctx->xch;
> +    xen_pfn_t min_pfn = original_pfns[0], max_pfn = original_pfns[0];
> +    unsigned i;
> +    int rc = -1;
> +
> +    for ( i = 0; i < count; ++i )
> +    {
> +        if (original_pfns[i] < min_pfn)
> +            min_pfn = original_pfns[i];
> +        if (original_pfns[i] > max_pfn)
> +            max_pfn = original_pfns[i];
> +        if ( (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
> +              types[i] != XEN_DOMCTL_PFINFO_BROKEN) &&
> +             !pfn_is_populated(ctx, original_pfns[i]) )
> +        {
> +            rc = x86_hvm_allocate_pfn(ctx, original_pfns[i]);
> +            if ( rc )
> +                goto err;
> +            rc = pfn_set_populated(ctx, original_pfns[i]);
> +            if ( rc )
> +                goto err;
> +        }
> +    }

One potential issue I can see with your algorithm is, if the stream of
page info contains pages from different super pages, the risk of going
over memory limit is high (hence failing the migration).

Is my concern unfounded?

> +
> +    while (min_pfn < max_pfn)
> +    {
> +        if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.allocated_pfns, 
> min_pfn))
> +        {
> +            PERROR("Failed to realloc allocated_pfns %" PRI_xen_pfn, 
> min_pfn);
> +            goto err;
> +        }
> +        if (!pfn_is_populated(ctx, min_pfn) &&
> +            xc_sr_test_and_clear_bit(min_pfn, 
> &ctx->x86_hvm.restore.allocated_pfns)) {
> +            xen_pfn_t pfn = min_pfn;
> +            rc = xc_domain_decrease_reservation_exact(xch, ctx->domid, 1, 0, 
> &pfn);
> +            if ( rc )
> +            {
> +                PERROR("Failed to release pfn %" PRI_xen_pfn, min_pfn);
> +                goto err;
> +            }
> +        }
> +        min_pfn++;
> +    }
> +
> +    rc = 0;
> +
> + err:
> +    return rc;
> +}
> +
> +
>  struct xc_sr_restore_ops restore_ops_x86_hvm =
>  {
>      .pfn_is_valid    = x86_hvm_pfn_is_valid,
> @@ -236,6 +415,7 @@ struct xc_sr_restore_ops restore_ops_x86_hvm =
>      .set_page_type   = x86_hvm_set_page_type,
>      .localise_page   = x86_hvm_localise_page,
>      .setup           = x86_hvm_setup,
> +    .populate_pfns   = x86_hvm_populate_pfns,
>      .process_record  = x86_hvm_process_record,
>      .stream_complete = x86_hvm_stream_complete,
>      .cleanup         = x86_hvm_cleanup,
> diff --git a/tools/libxc/xc_sr_restore_x86_pv.c 
> b/tools/libxc/xc_sr_restore_x86_pv.c
> index 50e25c162c..87957559bc 100644
> --- a/tools/libxc/xc_sr_restore_x86_pv.c
> +++ b/tools/libxc/xc_sr_restore_x86_pv.c
> @@ -936,6 +936,75 @@ static void x86_pv_set_gfn(struct xc_sr_context *ctx, 
> xen_pfn_t pfn,
>          ((uint32_t *)ctx->x86_pv.p2m)[pfn] = mfn;
>  }
>  
> +/*
> + * Given a set of pfns, obtain memory from Xen to fill the physmap for the
> + * unpopulated subset.  If types is NULL, no page type checking is performed
> + * and all unpopulated pfns are populated.
> + */
> +static int x86_pv_populate_pfns(struct xc_sr_context *ctx, unsigned count,
> +                                const xen_pfn_t *original_pfns,
> +                                const uint32_t *types)
> +{
> +    xc_interface *xch = ctx->xch;
> +    xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
> +        *pfns = malloc(count * sizeof(*pfns));
> +    unsigned i, nr_pfns = 0;
> +    int rc = -1;
> +
> +    if ( !mfns || !pfns )
> +    {
> +        ERROR("Failed to allocate %zu bytes for populating the physmap",
> +              2 * count * sizeof(*mfns));
> +        goto err;
> +    }
> +
> +    for ( i = 0; i < count; ++i )
> +    {
> +        if ( (!types || (types &&
> +                         (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
> +                          types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
> +             !pfn_is_populated(ctx, original_pfns[i]) )
> +        {
> +            rc = pfn_set_populated(ctx, original_pfns[i]);
> +            if ( rc )
> +                goto err;
> +            pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
> +            ++nr_pfns;
> +        }
> +    }
> +
> +    if ( nr_pfns )
> +    {
> +        rc = xc_domain_populate_physmap_exact(
> +            xch, ctx->domid, nr_pfns, 0, 0, mfns);
> +        if ( rc )
> +        {
> +            PERROR("Failed to populate physmap");
> +            goto err;
> +        }
> +
> +        for ( i = 0; i < nr_pfns; ++i )
> +        {
> +            if ( mfns[i] == INVALID_MFN )
> +            {
> +                ERROR("Populate physmap failed for pfn %u", i);
> +                rc = -1;
> +                goto err;
> +            }
> +
> +            ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
> +        }
> +    }
> +
> +    rc = 0;
> +
> + err:
> +    free(pfns);
> +    free(mfns);
> +
> +    return rc;
> +}
> +

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.