[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v4 15/21] libxc: allocate memory with vNUMA information for HVM guest



The algorithm is more or less the same as the one used for PV guest.
Libxc gets hold of the mapping of vnode to pnode and size of each vnode
then allocate memory accordingly.

And then the function returns low memory end, high memory end and mmio
start to caller. Libxl needs those values to construct vmemranges for
that guest.

Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
Cc: Ian Campbell <ian.campbell@xxxxxxxxxx>
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
Cc: Elena Ufimtseva <ufimtseva@xxxxxxxxx>
---
Changes in v4:
1. Adapt to new interface.
2. Shorten error message.
3. This patch includes only functional changes.

Changes in v3:
1. Rewrite commit log.
2. Add a few code comments.
---
 tools/libxc/include/xenguest.h |    9 ++++
 tools/libxc/xc_hvm_build_x86.c |  102 +++++++++++++++++++++++++++++++---------
 2 files changed, 90 insertions(+), 21 deletions(-)

diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
index 9b7edff..6a1ac52 100644
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -236,6 +236,15 @@ struct xc_hvm_build_args {
     struct xc_hvm_firmware_module smbios_module;
     /* Whether to use claim hypercall (1 - enable, 0 - disable). */
     int claim_enabled;
+
+    /* vNUMA information*/
+    struct xc_vnuma_info *vnuma_info;
+    unsigned int nr_vnuma_info;
+
+    /* Out parameters  */
+    uint64_t lowmem_end;
+    uint64_t highmem_end;
+    uint64_t mmio_start;
 };
 
 /**
diff --git a/tools/libxc/xc_hvm_build_x86.c b/tools/libxc/xc_hvm_build_x86.c
index ecc3224..bd27ce5 100644
--- a/tools/libxc/xc_hvm_build_x86.c
+++ b/tools/libxc/xc_hvm_build_x86.c
@@ -89,7 +89,8 @@ static int modules_init(struct xc_hvm_build_args *args,
 }
 
 static void build_hvm_info(void *hvm_info_page, uint64_t mem_size,
-                           uint64_t mmio_start, uint64_t mmio_size)
+                           uint64_t mmio_start, uint64_t mmio_size,
+                           struct xc_hvm_build_args *args)
 {
     struct hvm_info_table *hvm_info = (struct hvm_info_table *)
         (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET);
@@ -119,6 +120,10 @@ static void build_hvm_info(void *hvm_info_page, uint64_t 
mem_size,
     hvm_info->high_mem_pgend = highmem_end >> PAGE_SHIFT;
     hvm_info->reserved_mem_pgstart = ioreq_server_pfn(0);
 
+    args->lowmem_end = lowmem_end;
+    args->highmem_end = highmem_end;
+    args->mmio_start = mmio_start;
+
     /* Finish with the checksum. */
     for ( i = 0, sum = 0; i < hvm_info->length; i++ )
         sum += ((uint8_t *)hvm_info)[i];
@@ -244,7 +249,7 @@ static int setup_guest(xc_interface *xch,
                        char *image, unsigned long image_size)
 {
     xen_pfn_t *page_array = NULL;
-    unsigned long i, nr_pages = args->mem_size >> PAGE_SHIFT;
+    unsigned long i, j, nr_pages = args->mem_size >> PAGE_SHIFT;
     unsigned long target_pages = args->mem_target >> PAGE_SHIFT;
     uint64_t mmio_start = (1ull << 32) - args->mmio_size;
     uint64_t mmio_size = args->mmio_size;
@@ -258,13 +263,12 @@ static int setup_guest(xc_interface *xch,
     xen_capabilities_info_t caps;
     unsigned long stat_normal_pages = 0, stat_2mb_pages = 0, 
         stat_1gb_pages = 0;
-    int pod_mode = 0;
+    unsigned int memflags = 0;
     int claim_enabled = args->claim_enabled;
     xen_pfn_t special_array[NR_SPECIAL_PAGES];
     xen_pfn_t ioreq_server_array[NR_IOREQ_SERVER_PAGES];
-
-    if ( nr_pages > target_pages )
-        pod_mode = XENMEMF_populate_on_demand;
+    struct xc_vnuma_info dummy_vnuma_info;
+    uint64_t total_pages;
 
     memset(&elf, 0, sizeof(elf));
     if ( elf_init(&elf, image, image_size) != 0 )
@@ -276,6 +280,37 @@ static int setup_guest(xc_interface *xch,
     v_start = 0;
     v_end = args->mem_size;
 
+    if ( nr_pages > target_pages )
+        memflags |= XENMEMF_populate_on_demand;
+
+    if ( args->nr_vnuma_info == 0 )
+    {
+        /* Build dummy vnode information */
+        dummy_vnuma_info.vnode = 0;
+        dummy_vnuma_info.pnode = XC_VNUMA_NO_NODE;
+        dummy_vnuma_info.pages = args->mem_size >> PAGE_SHIFT;
+        args->nr_vnuma_info = 1;
+        args->vnuma_info = &dummy_vnuma_info;
+    }
+    else
+    {
+        if ( nr_pages > target_pages )
+        {
+            PERROR("Cannot enable vNUMA and PoD at the same time");
+            goto error_out;
+        }
+    }
+
+    total_pages = 0;
+    for ( i = 0; i < args->nr_vnuma_info; i++ )
+        total_pages += args->vnuma_info[i].pages;
+    if ( total_pages != (args->mem_size >> PAGE_SHIFT) )
+    {
+        PERROR("vNUMA memory pages mismatch (0x%"PRIx64" != 0x%"PRIx64")",
+               total_pages, args->mem_size >> PAGE_SHIFT);
+        goto error_out;
+    }
+
     if ( xc_version(xch, XENVER_capabilities, &caps) != 0 )
     {
         PERROR("Could not get Xen capabilities");
@@ -320,7 +355,7 @@ static int setup_guest(xc_interface *xch,
         }
     }
 
-    if ( pod_mode )
+    if ( memflags & XENMEMF_populate_on_demand )
     {
         /*
          * Subtract VGA_HOLE_SIZE from target_pages for the VGA
@@ -349,15 +384,32 @@ static int setup_guest(xc_interface *xch,
      * ensure that we can be preempted and hence dom0 remains responsive.
      */
     rc = xc_domain_populate_physmap_exact(
-        xch, dom, 0xa0, 0, pod_mode, &page_array[0x00]);
+        xch, dom, 0xa0, 0, memflags, &page_array[0x00]);
     cur_pages = 0xc0;
     stat_normal_pages = 0xc0;
 
+    for ( i = 0; i < args->nr_vnuma_info; i++ )
     {
-        while ( (rc == 0) && (nr_pages > cur_pages) )
+        unsigned int new_memflags = memflags;
+        uint64_t pages, finished;
+
+        if ( args->vnuma_info[i].pnode != XC_VNUMA_NO_NODE )
+        {
+            new_memflags |= XENMEMF_exact_node(args->vnuma_info[i].pnode);
+            new_memflags |= XENMEMF_exact_node_request;
+        }
+
+        pages = args->vnuma_info[i].pages;
+        /* Consider vga hole belongs to node 0 */
+        if ( i == 0 )
+            finished = 0xc0;
+        else
+            finished = 0;
+
+        while ( (rc == 0) && (pages > finished) )
         {
             /* Clip count to maximum 1GB extent. */
-            unsigned long count = nr_pages - cur_pages;
+            unsigned long count = pages - finished;
             unsigned long max_pages = SUPERPAGE_1GB_NR_PFNS;
 
             if ( count > max_pages )
@@ -388,19 +440,20 @@ static int setup_guest(xc_interface *xch,
                 unsigned long nr_extents = count >> SUPERPAGE_1GB_SHIFT;
                 xen_pfn_t sp_extents[nr_extents];
 
-                for ( i = 0; i < nr_extents; i++ )
-                    sp_extents[i] =
-                        page_array[cur_pages+(i<<SUPERPAGE_1GB_SHIFT)];
+                for ( j = 0; j < nr_extents; j++ )
+                    sp_extents[j] =
+                        page_array[cur_pages+(j<<SUPERPAGE_1GB_SHIFT)];
 
                 done = xc_domain_populate_physmap(xch, dom, nr_extents,
                                                   SUPERPAGE_1GB_SHIFT,
-                                                  pod_mode, sp_extents);
+                                                  memflags, sp_extents);
 
                 if ( done > 0 )
                 {
                     stat_1gb_pages += done;
                     done <<= SUPERPAGE_1GB_SHIFT;
                     cur_pages += done;
+                    finished += done;
                     count -= done;
                 }
             }
@@ -428,19 +481,19 @@ static int setup_guest(xc_interface *xch,
                     unsigned long nr_extents = count >> SUPERPAGE_2MB_SHIFT;
                     xen_pfn_t sp_extents[nr_extents];
 
-                    for ( i = 0; i < nr_extents; i++ )
-                        sp_extents[i] =
-                            page_array[cur_pages+(i<<SUPERPAGE_2MB_SHIFT)];
+                    for ( j = 0; j < nr_extents; j++ )
+                        sp_extents[j] =
+                            page_array[cur_pages+(j<<SUPERPAGE_2MB_SHIFT)];
 
                     done = xc_domain_populate_physmap(xch, dom, nr_extents,
                                                       SUPERPAGE_2MB_SHIFT,
-                                                      pod_mode, sp_extents);
-
+                                                      memflags, sp_extents);
                     if ( done > 0 )
                     {
                         stat_2mb_pages += done;
                         done <<= SUPERPAGE_2MB_SHIFT;
                         cur_pages += done;
+                        finished += done;
                         count -= done;
                     }
                 }
@@ -450,11 +503,15 @@ static int setup_guest(xc_interface *xch,
             if ( count != 0 )
             {
                 rc = xc_domain_populate_physmap_exact(
-                    xch, dom, count, 0, pod_mode, &page_array[cur_pages]);
+                    xch, dom, count, 0, new_memflags, &page_array[cur_pages]);
                 cur_pages += count;
+                finished += count;
                 stat_normal_pages += count;
             }
         }
+
+        if ( rc != 0 )
+            break;
     }
 
     if ( rc != 0 )
@@ -478,7 +535,7 @@ static int setup_guest(xc_interface *xch,
               xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
               HVM_INFO_PFN)) == NULL )
         goto error_out;
-    build_hvm_info(hvm_info_page, v_end, mmio_start, mmio_size);
+    build_hvm_info(hvm_info_page, v_end, mmio_start, mmio_size, args);
     munmap(hvm_info_page, PAGE_SIZE);
 
     /* Allocate and clear special pages. */
@@ -617,6 +674,9 @@ int xc_hvm_build(xc_interface *xch, uint32_t domid,
             args.acpi_module.guest_addr_out;
         hvm_args->smbios_module.guest_addr_out = 
             args.smbios_module.guest_addr_out;
+        hvm_args->lowmem_end = args.lowmem_end;
+        hvm_args->highmem_end = args.highmem_end;
+        hvm_args->mmio_start = args.mmio_start;
     }
 
     free(image);
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.