[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v6 06/23] libxc: allocate memory with vNUMA information for PV guest



On Thu, 2015-02-26 at 15:55 +0000, Wei Liu wrote:
> From libxc's point of view, it only needs to know vnode to pnode mapping
> and size of each vnode to allocate memory accordingly. Add these fields
> to xc_dom structure.
> 
> The caller might not pass in vNUMA information. In that case, a dummy
> layout is generated for the convenience of libxc's allocation code. The
> upper layer (libxl etc) still sees the domain has no vNUMA
> configuration.
> 
> Note that for this patch on PV x86 guest can have multiple regions of
> ram allocated.
> 
> Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>

Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

> Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
> Cc: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
> Cc: Elena Ufimtseva <ufimtseva@xxxxxxxxx>
> ---
> Changes in v6:
> 1. Ditch XC_VNUMA_NO_NODE and use XEN_NUMA_NO_NODE.
> 2. Update comment in xc_dom.h.
> 
> Changes in v5:
> 1. Ditch xc_vnuma_info.
> 
> Changes in v4:
> 1. Pack fields into a struct.
> 2. Use "page" as unit.
> 3. __FUNCTION__ -> __func__.
> 4. Don't print total_pages.
> 5. Improve comment.
> 
> Changes in v3:
> 1. Rewrite commit log.
> 2. Shorten some error messages.
> ---
>  tools/libxc/include/xc_dom.h |  12 ++++-
>  tools/libxc/xc_dom_x86.c     | 101 
> +++++++++++++++++++++++++++++++++++++------
>  2 files changed, 97 insertions(+), 16 deletions(-)
> 
> diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
> index 6b8ddf4..a7d059a 100644
> --- a/tools/libxc/include/xc_dom.h
> +++ b/tools/libxc/include/xc_dom.h
> @@ -119,8 +119,10 @@ struct xc_dom_image {
>  
>      /* physical memory
>       *
> -     * An x86 PV guest has a single contiguous block of physical RAM,
> -     * consisting of total_pages starting at rambase_pfn.
> +     * An x86 PV guest has one or more blocks of physical RAM,
> +     * consisting of total_pages starting at rambase_pfn. The start
> +     * address and size of each block is controlled by vNUMA
> +     * structures.
>       *
>       * An ARM guest has GUEST_RAM_BANKS regions of RAM, with
>       * rambank_size[i] pages in each. The lowest RAM address
> @@ -168,6 +170,12 @@ struct xc_dom_image {
>      struct xc_dom_loader *kernel_loader;
>      void *private_loader;
>  
> +    /* vNUMA information */
> +    xen_vmemrange_t *vmemranges;
> +    unsigned int nr_vmemranges;
> +    unsigned int *vnode_to_pnode;
> +    unsigned int nr_vnodes;
> +
>      /* kernel loader */
>      struct xc_dom_arch *arch_hooks;
>      /* allocate up to virt_alloc_end */
> diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
> index bea54f2..268d4db 100644
> --- a/tools/libxc/xc_dom_x86.c
> +++ b/tools/libxc/xc_dom_x86.c
> @@ -760,7 +760,8 @@ static int x86_shadow(xc_interface *xch, domid_t domid)
>  int arch_setup_meminit(struct xc_dom_image *dom)
>  {
>      int rc;
> -    xen_pfn_t pfn, allocsz, i, j, mfn;
> +    xen_pfn_t pfn, allocsz, mfn, total, pfn_base;
> +    int i, j;
>  
>      rc = x86_compat(dom->xch, dom->guest_domid, dom->guest_type);
>      if ( rc )
> @@ -811,26 +812,98 @@ int arch_setup_meminit(struct xc_dom_image *dom)
>              if ( rc )
>                  return rc;
>          }
> -        /* setup initial p2m */
> -        dom->p2m_size = dom->total_pages;
> +
> +        /* Setup dummy vNUMA information if it's not provided. Note
> +         * that this is a valid state if libxl doesn't provide any
> +         * vNUMA information.
> +         *
> +         * The dummy values make libxc allocate all pages from
> +         * arbitrary physical nodes. This is the expected behaviour if
> +         * no vNUMA configuration is provided to libxc.
> +         *
> +         * Note that the following hunk is just for the convenience of
> +         * allocation code. No defaulting happens in libxc.
> +         */
> +        if ( dom->nr_vmemranges == 0 )
> +        {
> +            dom->nr_vmemranges = 1;
> +            dom->vmemranges = xc_dom_malloc(dom, sizeof(*dom->vmemranges));
> +            dom->vmemranges[0].start = 0;
> +            dom->vmemranges[0].end   = dom->total_pages << PAGE_SHIFT;
> +            dom->vmemranges[0].flags = 0;
> +            dom->vmemranges[0].nid   = 0;
> +
> +            dom->nr_vnodes = 1;
> +            dom->vnode_to_pnode = xc_dom_malloc(dom,
> +                                      sizeof(*dom->vnode_to_pnode));
> +            dom->vnode_to_pnode[0] = XEN_NUMA_NO_NODE;
> +        }
> +
> +        total = dom->p2m_size = 0;
> +        for ( i = 0; i < dom->nr_vmemranges; i++ )
> +        {
> +            total += ((dom->vmemranges[i].end - dom->vmemranges[i].start)
> +                      >> PAGE_SHIFT);
> +            dom->p2m_size =
> +                dom->p2m_size > (dom->vmemranges[i].end >> PAGE_SHIFT) ?
> +                dom->p2m_size : (dom->vmemranges[i].end >> PAGE_SHIFT);
> +        }
> +        if ( total != dom->total_pages )
> +        {
> +            xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
> +                         "%s: vNUMA page count mismatch (0x%"PRIpfn" != 
> 0x%"PRIpfn")\n",
> +                         __func__, total, dom->total_pages);
> +            return -EINVAL;
> +        }
> +
>          dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) *
>                                        dom->p2m_size);
>          if ( dom->p2m_host == NULL )
>              return -EINVAL;
> -        for ( pfn = 0; pfn < dom->total_pages; pfn++ )
> -            dom->p2m_host[pfn] = pfn;
> +        for ( pfn = 0; pfn < dom->p2m_size; pfn++ )
> +            dom->p2m_host[pfn] = INVALID_P2M_ENTRY;
>  
>          /* allocate guest memory */
> -        for ( i = rc = allocsz = 0;
> -              (i < dom->total_pages) && !rc;
> -              i += allocsz )
> +        for ( i = 0; i < dom->nr_vmemranges; i++ )
>          {
> -            allocsz = dom->total_pages - i;
> -            if ( allocsz > 1024*1024 )
> -                allocsz = 1024*1024;
> -            rc = xc_domain_populate_physmap_exact(
> -                dom->xch, dom->guest_domid, allocsz,
> -                0, 0, &dom->p2m_host[i]);
> +            unsigned int memflags;
> +            uint64_t pages;
> +            unsigned int pnode = dom->vnode_to_pnode[dom->vmemranges[i].nid];
> +
> +            memflags = 0;
> +            if ( pnode != XEN_NUMA_NO_NODE )
> +                memflags |= XENMEMF_exact_node(pnode);
> +
> +            pages = (dom->vmemranges[i].end - dom->vmemranges[i].start)
> +                >> PAGE_SHIFT;
> +            pfn_base = dom->vmemranges[i].start >> PAGE_SHIFT;
> +
> +            for ( pfn = pfn_base; pfn < pfn_base+pages; pfn++ )
> +                dom->p2m_host[pfn] = pfn;
> +
> +            for ( j = 0; j < pages; j += allocsz )
> +            {
> +                allocsz = pages - j;
> +                if ( allocsz > 1024*1024 )
> +                    allocsz = 1024*1024;
> +
> +                rc = xc_domain_populate_physmap_exact(dom->xch,
> +                         dom->guest_domid, allocsz, 0, memflags,
> +                         &dom->p2m_host[pfn_base+j]);
> +
> +                if ( rc )
> +                {
> +                    if ( pnode != XEN_NUMA_NO_NODE )
> +                        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
> +                                     "%s: failed to allocate 0x%"PRIx64" 
> pages (v=%d, p=%d)\n",
> +                                     __func__, pages, i, pnode);
> +                    else
> +                        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
> +                                     "%s: failed to allocate 0x%"PRIx64" 
> pages\n",
> +                                     __func__, pages);
> +                    return rc;
> +                }
> +            }
>          }
>  
>          /* Ensure no unclaimed pages are left unused.



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.