[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v8 04/21] libxc: allocate memory with vNUMA information for PV guest



From libxc's point of view, it only needs to know vnode to pnode mapping
and size of each vnode to allocate memory accordingly. Add these fields
to xc_dom structure.

The caller might not pass in vNUMA information. In that case, a dummy
layout is generated for the convenience of libxc's allocation code. The
upper layer (libxl etc) still sees the domain has no vNUMA
configuration.

Note that for this patch on PV x86 guest can have multiple regions of
ram allocated.

Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
Cc: Ian Campbell <ian.campbell@xxxxxxxxxx>
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
Cc: Elena Ufimtseva <ufimtseva@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
---
Changes in v7:
1. Fall back to use our own XC_NUMA_NO_NODE.

Changes in v6:
1. Ditch XC_VNUMA_NO_NODE and use XEN_NUMA_NO_NODE.
2. Update comment in xc_dom.h.

Changes in v5:
1. Ditch xc_vnuma_info.

Changes in v4:
1. Pack fields into a struct.
2. Use "page" as unit.
3. __FUNCTION__ -> __func__.
4. Don't print total_pages.
5. Improve comment.

Changes in v3:
1. Rewrite commit log.
2. Shorten some error messages.
---
 tools/libxc/include/xc_dom.h   |  12 ++++-
 tools/libxc/include/xenguest.h |   2 +
 tools/libxc/xc_dom_x86.c       | 101 +++++++++++++++++++++++++++++++++++------
 3 files changed, 99 insertions(+), 16 deletions(-)

diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
index 6b8ddf4..a7d059a 100644
--- a/tools/libxc/include/xc_dom.h
+++ b/tools/libxc/include/xc_dom.h
@@ -119,8 +119,10 @@ struct xc_dom_image {
 
     /* physical memory
      *
-     * An x86 PV guest has a single contiguous block of physical RAM,
-     * consisting of total_pages starting at rambase_pfn.
+     * An x86 PV guest has one or more blocks of physical RAM,
+     * consisting of total_pages starting at rambase_pfn. The start
+     * address and size of each block is controlled by vNUMA
+     * structures.
      *
      * An ARM guest has GUEST_RAM_BANKS regions of RAM, with
      * rambank_size[i] pages in each. The lowest RAM address
@@ -168,6 +170,12 @@ struct xc_dom_image {
     struct xc_dom_loader *kernel_loader;
     void *private_loader;
 
+    /* vNUMA information */
+    xen_vmemrange_t *vmemranges;
+    unsigned int nr_vmemranges;
+    unsigned int *vnode_to_pnode;
+    unsigned int nr_vnodes;
+
     /* kernel loader */
     struct xc_dom_arch *arch_hooks;
     /* allocate up to virt_alloc_end */
diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
index 40bbac8..b7a924f 100644
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -23,6 +23,8 @@
 #ifndef XENGUEST_H
 #define XENGUEST_H
 
+#define XC_NUMA_NO_NODE   (~0U)
+
 #define XCFLAGS_LIVE      (1 << 0)
 #define XCFLAGS_DEBUG     (1 << 1)
 #define XCFLAGS_HVM       (1 << 2)
diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
index bea54f2..af0c9f4 100644
--- a/tools/libxc/xc_dom_x86.c
+++ b/tools/libxc/xc_dom_x86.c
@@ -760,7 +760,8 @@ static int x86_shadow(xc_interface *xch, domid_t domid)
 int arch_setup_meminit(struct xc_dom_image *dom)
 {
     int rc;
-    xen_pfn_t pfn, allocsz, i, j, mfn;
+    xen_pfn_t pfn, allocsz, mfn, total, pfn_base;
+    int i, j;
 
     rc = x86_compat(dom->xch, dom->guest_domid, dom->guest_type);
     if ( rc )
@@ -811,26 +812,98 @@ int arch_setup_meminit(struct xc_dom_image *dom)
             if ( rc )
                 return rc;
         }
-        /* setup initial p2m */
-        dom->p2m_size = dom->total_pages;
+
+        /* Setup dummy vNUMA information if it's not provided. Note
+         * that this is a valid state if libxl doesn't provide any
+         * vNUMA information.
+         *
+         * The dummy values make libxc allocate all pages from
+         * arbitrary physical nodes. This is the expected behaviour if
+         * no vNUMA configuration is provided to libxc.
+         *
+         * Note that the following hunk is just for the convenience of
+         * allocation code. No defaulting happens in libxc.
+         */
+        if ( dom->nr_vmemranges == 0 )
+        {
+            dom->nr_vmemranges = 1;
+            dom->vmemranges = xc_dom_malloc(dom, sizeof(*dom->vmemranges));
+            dom->vmemranges[0].start = 0;
+            dom->vmemranges[0].end   = dom->total_pages << PAGE_SHIFT;
+            dom->vmemranges[0].flags = 0;
+            dom->vmemranges[0].nid   = 0;
+
+            dom->nr_vnodes = 1;
+            dom->vnode_to_pnode = xc_dom_malloc(dom,
+                                      sizeof(*dom->vnode_to_pnode));
+            dom->vnode_to_pnode[0] = XC_NUMA_NO_NODE;
+        }
+
+        total = dom->p2m_size = 0;
+        for ( i = 0; i < dom->nr_vmemranges; i++ )
+        {
+            total += ((dom->vmemranges[i].end - dom->vmemranges[i].start)
+                      >> PAGE_SHIFT);
+            dom->p2m_size =
+                dom->p2m_size > (dom->vmemranges[i].end >> PAGE_SHIFT) ?
+                dom->p2m_size : (dom->vmemranges[i].end >> PAGE_SHIFT);
+        }
+        if ( total != dom->total_pages )
+        {
+            xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                         "%s: vNUMA page count mismatch (0x%"PRIpfn" != 
0x%"PRIpfn")\n",
+                         __func__, total, dom->total_pages);
+            return -EINVAL;
+        }
+
         dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) *
                                       dom->p2m_size);
         if ( dom->p2m_host == NULL )
             return -EINVAL;
-        for ( pfn = 0; pfn < dom->total_pages; pfn++ )
-            dom->p2m_host[pfn] = pfn;
+        for ( pfn = 0; pfn < dom->p2m_size; pfn++ )
+            dom->p2m_host[pfn] = INVALID_P2M_ENTRY;
 
         /* allocate guest memory */
-        for ( i = rc = allocsz = 0;
-              (i < dom->total_pages) && !rc;
-              i += allocsz )
+        for ( i = 0; i < dom->nr_vmemranges; i++ )
         {
-            allocsz = dom->total_pages - i;
-            if ( allocsz > 1024*1024 )
-                allocsz = 1024*1024;
-            rc = xc_domain_populate_physmap_exact(
-                dom->xch, dom->guest_domid, allocsz,
-                0, 0, &dom->p2m_host[i]);
+            unsigned int memflags;
+            uint64_t pages;
+            unsigned int pnode = dom->vnode_to_pnode[dom->vmemranges[i].nid];
+
+            memflags = 0;
+            if ( pnode != XC_NUMA_NO_NODE )
+                memflags |= XENMEMF_exact_node(pnode);
+
+            pages = (dom->vmemranges[i].end - dom->vmemranges[i].start)
+                >> PAGE_SHIFT;
+            pfn_base = dom->vmemranges[i].start >> PAGE_SHIFT;
+
+            for ( pfn = pfn_base; pfn < pfn_base+pages; pfn++ )
+                dom->p2m_host[pfn] = pfn;
+
+            for ( j = 0; j < pages; j += allocsz )
+            {
+                allocsz = pages - j;
+                if ( allocsz > 1024*1024 )
+                    allocsz = 1024*1024;
+
+                rc = xc_domain_populate_physmap_exact(dom->xch,
+                         dom->guest_domid, allocsz, 0, memflags,
+                         &dom->p2m_host[pfn_base+j]);
+
+                if ( rc )
+                {
+                    if ( pnode != XC_NUMA_NO_NODE )
+                        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                                     "%s: failed to allocate 0x%"PRIx64" pages 
(v=%d, p=%d)\n",
+                                     __func__, pages, i, pnode);
+                    else
+                        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                                     "%s: failed to allocate 0x%"PRIx64" 
pages\n",
+                                     __func__, pages);
+                    return rc;
+                }
+            }
         }
 
         /* Ensure no unclaimed pages are left unused.
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.