[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [qemu] Support HVM guests with more than 3.75G memory.



# HG changeset patch
# User Christian Limpach <Christian.Limpach@xxxxxxxxxxxxx>
# Node ID 078bfd250677e403cfb0e29f79042e040ee4f89a
# Parent  28e3132b9f93b4844d4d8ad8df2984c09069a7cd
[qemu] Support HVM guests with more than 3.75G memory.
Changes are:
1) M2P table and e820 table are changed to skip address space from
HVM_RAM_LIMIT_BELOW_4G to 4G.
2) shared io page location, when less than HVM_RAM_LIMIT_BELOW_4G
memory, it's the last page of RAM as today, or it's the last page of
HVM_RAM_LIMIT_BELOW_4G RAM.
3) in qemu-dm address space from HVM_RAM_LIMIT_BELOW_4G to 4G are
stuffed with mfns starting from 4G, so the 1:1 mapping can still works.
This is ugly, but another limit check patch as changeset 10757 will
prevent qemu-dm to access this range.  This ugly stuffing will be
removed when the patch to remove 1:1 mapping from qemu-dm gets accepted
in the future.

Signed-off-by: Xin Li <xin.b.li@xxxxxxxxx>
Signed-off-by: Christian Limpach <Christian.Limpach@xxxxxxxxxxxxx>
---
 tools/ioemu/hw/piix_pci.c     |    2 
 tools/ioemu/vl.c              |   71 ++++++++++++------
 tools/ioemu/vl.h              |    1 
 tools/libxc/xc_hvm_build.c    |  163 ++++++++++++++++++++++++++----------------
 xen/include/public/hvm/e820.h |    5 +
 5 files changed, 159 insertions(+), 83 deletions(-)

diff -r 28e3132b9f93 -r 078bfd250677 tools/ioemu/hw/piix_pci.c
--- a/tools/ioemu/hw/piix_pci.c Thu Aug 17 20:30:05 2006 +0100
+++ b/tools/ioemu/hw/piix_pci.c Thu Aug 17 20:31:13 2006 +0100
@@ -415,7 +415,7 @@ void pci_bios_init(void)
     uint8_t elcr[2];
 
     pci_bios_io_addr = 0xc000;
-    pci_bios_mem_addr = 0xf0000000;
+    pci_bios_mem_addr = HVM_BELOW_4G_MMIO_START;
 
     /* activate IRQ mappings */
     elcr[0] = 0x00;
diff -r 28e3132b9f93 -r 078bfd250677 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Thu Aug 17 20:30:05 2006 +0100
+++ b/tools/ioemu/vl.c  Thu Aug 17 20:31:13 2006 +0100
@@ -5835,7 +5835,7 @@ int main(int argc, char **argv)
     QEMUMachine *machine;
     char usb_devices[MAX_USB_CMDLINE][128];
     int usb_devices_index;
-    unsigned long nr_pages;
+    unsigned long nr_pages, tmp_nr_pages, shared_page_nr;
     xen_pfn_t *page_array;
     extern void *shared_page;
     extern void *buffered_io_page;
@@ -6366,17 +6366,27 @@ int main(int argc, char **argv)
     /* init the memory */
     phys_ram_size = ram_size + vga_ram_size + bios_size;
 
+#ifdef CONFIG_DM
+
+    xc_handle = xc_interface_open();
+
 #if defined (__ia64__)
     if (ram_size > MMIO_START)
-       ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */
-#endif
-
-#ifdef CONFIG_DM
+        ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */
+#endif
 
     nr_pages = ram_size/PAGE_SIZE;
-    xc_handle = xc_interface_open();
-
-    page_array = (xen_pfn_t *)malloc(nr_pages * sizeof(xen_pfn_t));
+    tmp_nr_pages = nr_pages;
+
+#if defined(__i386__) || defined(__x86_64__)
+    if (ram_size > HVM_BELOW_4G_RAM_END) {
+        tmp_nr_pages += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+        shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
+    } else
+        shared_page_nr = nr_pages - 1;
+#endif
+
+    page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t));
     if (page_array == NULL) {
         fprintf(logfile, "malloc returned error %d\n", errno);
         exit(-1);
@@ -6388,25 +6398,40 @@ int main(int argc, char **argv)
         exit(-1);
     }
 
+    if (ram_size > HVM_BELOW_4G_RAM_END)
+        for (i = 0; i < nr_pages - (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT); i++)
+            page_array[tmp_nr_pages - 1 - i] = page_array[nr_pages - 1 - i];
+
     phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
                                          PROT_READ|PROT_WRITE, page_array,
-                                         nr_pages - 3);
-    if (phys_ram_base == 0) {
-        fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno);
+                                         tmp_nr_pages);
+    if (phys_ram_base == NULL) {
+        fprintf(logfile, "batch map guest memory returned error %d\n", errno);
         exit(-1);
     }
+
+    shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE,
+                                       page_array[shared_page_nr]);
+    if (shared_page == NULL) {
+        fprintf(logfile, "map shared IO page returned error %d\n", errno);
+        exit(-1);
+    }
+
+    fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n",
+            shared_page_nr, (uint64_t)(page_array[shared_page_nr]));
 
     /* not yet add for IA64 */
     buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                       PROT_READ|PROT_WRITE,
-                                       page_array[nr_pages - 3]);
-
-    shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                       PROT_READ|PROT_WRITE,
-                                       page_array[nr_pages - 1]);
-
-    fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", nr_pages - 1,
-            (uint64_t)(page_array[nr_pages - 1]));
+                                            PROT_READ|PROT_WRITE,
+                                            page_array[shared_page_nr - 2]);
+    if (buffered_io_page == NULL) {
+        fprintf(logfile, "map buffered IO page returned error %d\n", errno);
+        exit(-1);
+    }
+
+    fprintf(logfile, "buffered io page at pfn:%lx, mfn: %"PRIx64"\n",
+            shared_page_nr - 2, (uint64_t)(page_array[shared_page_nr - 2]));
 
     free(page_array);
 
@@ -6432,9 +6457,9 @@ int main(int argc, char **argv)
     }
 
     if (ram_size > MMIO_START) {       
-       for (i = 0 ; i < MEM_G >> PAGE_SHIFT; i++)
-           page_array[MMIO_START >> PAGE_SHIFT + i] =
-               page_array[IO_PAGE_START >> PAGE_SHIFT + 1];
+        for (i = 0 ; i < MEM_G >> PAGE_SHIFT; i++)
+            page_array[MMIO_START >> PAGE_SHIFT + i] =
+                page_array[IO_PAGE_START >> PAGE_SHIFT + 1];
     }
 
     phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
diff -r 28e3132b9f93 -r 078bfd250677 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Thu Aug 17 20:30:05 2006 +0100
+++ b/tools/ioemu/vl.h  Thu Aug 17 20:31:13 2006 +0100
@@ -39,6 +39,7 @@
 #include <sys/stat.h>
 #include "xenctrl.h"
 #include "xs.h"
+#include <xen/hvm/e820.h>
 
 #ifndef O_LARGEFILE
 #define O_LARGEFILE 0
diff -r 28e3132b9f93 -r 078bfd250677 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Thu Aug 17 20:30:05 2006 +0100
+++ b/tools/libxc/xc_hvm_build.c        Thu Aug 17 20:31:13 2006 +0100
@@ -54,9 +54,19 @@ static void build_e820map(void *e820_pag
 {
     struct e820entry *e820entry =
         (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
+    unsigned long long extra_mem_size = 0;
     unsigned char nr_map = 0;
 
-    /* XXX: Doesn't work for > 4GB yet */
+    /*
+     * physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
+     * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
+     * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
+     */
+    if ( mem_size > HVM_BELOW_4G_RAM_END ) {
+        extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
+        mem_size = HVM_BELOW_4G_RAM_END;
+    }
+
     e820entry[nr_map].addr = 0x0;
     e820entry[nr_map].size = 0x9F000;
     e820entry[nr_map].type = E820_RAM;
@@ -77,53 +87,86 @@ static void build_e820map(void *e820_pag
     e820entry[nr_map].type = E820_RESERVED;
     nr_map++;
 
-#define STATIC_PAGES    3
-    /* 3 static pages:
-     * - ioreq buffer.
-     * - xenstore.
-     * - shared_page.
-     */
+/* ACPI data: 10 pages. */
+#define ACPI_DATA_PAGES     10
+/* ACPI NVS: 3 pages.   */
+#define ACPI_NVS_PAGES      3
+/* buffered io page.    */
+#define BUFFERED_IO_PAGES   1
+/* xenstore page.       */
+#define XENSTORE_PAGES      1
+/* shared io page.      */
+#define SHARED_IO_PAGES     1
+/* totally 16 static pages are reserved in E820 table */
 
     /* Most of the ram goes here */
     e820entry[nr_map].addr = 0x100000;
-    e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES * PAGE_SIZE;
+    e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE *
+                                                (ACPI_DATA_PAGES +
+                                                 ACPI_NVS_PAGES +
+                                                 BUFFERED_IO_PAGES +
+                                                 XENSTORE_PAGES +
+                                                 SHARED_IO_PAGES);
     e820entry[nr_map].type = E820_RAM;
     nr_map++;
 
     /* Statically allocated special pages */
 
+    /* For ACPI data */
+    e820entry[nr_map].addr = mem_size - PAGE_SIZE *
+                                        (ACPI_DATA_PAGES +
+                                         ACPI_NVS_PAGES +
+                                         BUFFERED_IO_PAGES +
+                                         XENSTORE_PAGES +
+                                         SHARED_IO_PAGES);
+    e820entry[nr_map].size = PAGE_SIZE * ACPI_DATA_PAGES;
+    e820entry[nr_map].type = E820_ACPI;
+    nr_map++;
+
+    /* For ACPI NVS */
+    e820entry[nr_map].addr = mem_size - PAGE_SIZE *
+                                        (ACPI_NVS_PAGES +
+                                         BUFFERED_IO_PAGES +
+                                         XENSTORE_PAGES +
+                                         SHARED_IO_PAGES);
+    e820entry[nr_map].size = PAGE_SIZE * ACPI_NVS_PAGES;
+    e820entry[nr_map].type = E820_NVS;
+    nr_map++;
+
     /* For buffered IO requests */
-    e820entry[nr_map].addr = mem_size - 3 * PAGE_SIZE;
-    e820entry[nr_map].size = PAGE_SIZE;
+    e820entry[nr_map].addr = mem_size - PAGE_SIZE *
+                                        (BUFFERED_IO_PAGES +
+                                         XENSTORE_PAGES +
+                                         SHARED_IO_PAGES);
+    e820entry[nr_map].size = PAGE_SIZE * BUFFERED_IO_PAGES;
     e820entry[nr_map].type = E820_BUFFERED_IO;
     nr_map++;
 
     /* For xenstore */
-    e820entry[nr_map].addr = mem_size - 2 * PAGE_SIZE;
-    e820entry[nr_map].size = PAGE_SIZE;
+    e820entry[nr_map].addr = mem_size - PAGE_SIZE *
+                                        (XENSTORE_PAGES +
+                                         SHARED_IO_PAGES);
+    e820entry[nr_map].size = PAGE_SIZE * XENSTORE_PAGES;
     e820entry[nr_map].type = E820_XENSTORE;
     nr_map++;
 
     /* Shared ioreq_t page */
-    e820entry[nr_map].addr = mem_size - PAGE_SIZE;
-    e820entry[nr_map].size = PAGE_SIZE;
+    e820entry[nr_map].addr = mem_size - PAGE_SIZE * SHARED_IO_PAGES;
+    e820entry[nr_map].size = PAGE_SIZE * SHARED_IO_PAGES;
     e820entry[nr_map].type = E820_SHARED_PAGE;
-    nr_map++;
-
-    e820entry[nr_map].addr = mem_size;
-    e820entry[nr_map].size = 0x3 * PAGE_SIZE;
-    e820entry[nr_map].type = E820_NVS;
-    nr_map++;
-
-    e820entry[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
-    e820entry[nr_map].size = 0xA * PAGE_SIZE;
-    e820entry[nr_map].type = E820_ACPI;
     nr_map++;
 
     e820entry[nr_map].addr = 0xFEC00000;
     e820entry[nr_map].size = 0x1400000;
     e820entry[nr_map].type = E820_IO;
     nr_map++;
+
+    if ( extra_mem_size ) {
+        e820entry[nr_map].addr = (1ULL << 32);
+        e820entry[nr_map].size = extra_mem_size;
+        e820entry[nr_map].type = E820_RAM;
+        nr_map++;
+    }
 
     *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map;
 }
@@ -147,7 +190,7 @@ static void set_hvm_info_checksum(struct
  */
 static int set_hvm_info(int xc_handle, uint32_t dom,
                         xen_pfn_t *pfn_list, unsigned int vcpus,
-                        unsigned int acpi, unsigned int apic)
+                        unsigned int acpi)
 {
     char *va_map;
     struct hvm_info_table *va_hvm;
@@ -170,8 +213,6 @@ static int set_hvm_info(int xc_handle, u
     set_hvm_info_checksum(va_hvm);
 
     munmap(va_map, PAGE_SIZE);
-
-    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);
 
     return 0;
 }
@@ -200,11 +241,7 @@ static int setup_guest(int xc_handle,
     struct domain_setup_info dsi;
     uint64_t v_end;
 
-    unsigned long shared_page_frame = 0;
-    shared_iopage_t *sp;
-
-    unsigned long ioreq_buffer_frame = 0;
-    void *ioreq_buffer_page;
+    unsigned long shared_page_nr;
 
     memset(&dsi, 0, sizeof(struct domain_setup_info));
 
@@ -256,23 +293,38 @@ static int setup_guest(int xc_handle,
     /* Write the machine->phys table entries. */
     for ( count = 0; count < nr_pages; count++ )
     {
+        unsigned long gpfn_count_skip;
+
         ptr = (unsigned long long)page_array[count] << PAGE_SHIFT;
+
+        gpfn_count_skip = 0;
+
+        /*
+         * physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
+         * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
+         * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
+         */
+        if ( count >= (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) )
+            gpfn_count_skip = HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+
         if ( xc_add_mmu_update(xc_handle, mmu,
-                               ptr | MMU_MACHPHYS_UPDATE, count) )
+                               ptr | MMU_MACHPHYS_UPDATE,
+                               count + gpfn_count_skip) )
             goto error_out;
     }
 
-    if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) )
+    if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi) )
     {
         ERROR("Couldn't set hvm info for HVM guest.\n");
         goto error_out;
     }
 
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);
 
     if ( (e820_page = xc_map_foreign_range(
               xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
-              page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
+              page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == NULL )
         goto error_out;
     memset(e820_page, 0, PAGE_SIZE);
     build_e820map(e820_page, v_end);
@@ -281,7 +333,7 @@ static int setup_guest(int xc_handle,
     /* shared_info page starts its life empty. */
     if ( (shared_info = xc_map_foreign_range(
               xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
-              shared_info_frame)) == 0 )
+              shared_info_frame)) == NULL )
         goto error_out;
     memset(shared_info, 0, PAGE_SIZE);
     /* Mask all upcalls... */
@@ -289,32 +341,25 @@ static int setup_guest(int xc_handle,
         shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
     munmap(shared_info, PAGE_SIZE);
 
+    if ( v_end > HVM_BELOW_4G_RAM_END )
+        shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
+    else
+        shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
+
+    *store_mfn = page_array[shared_page_nr - 1];
+
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, *store_mfn);
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
+
     /* Paranoia */
-    shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
-    if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
-              xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
-              shared_page_frame)) == 0 )
-        goto error_out;
-    memset(sp, 0, PAGE_SIZE);
-    munmap(sp, PAGE_SIZE);
+    /* clean the shared IO requests page */
+    if ( xc_clear_domain_page(xc_handle, dom, page_array[shared_page_nr]) )
+        goto error_out;
 
     /* clean the buffered IO requests page */
-    ioreq_buffer_frame = page_array[(v_end >> PAGE_SHIFT) - 3];
-    ioreq_buffer_page = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                             PROT_READ | PROT_WRITE,
-                                             ioreq_buffer_frame);
-
-    if ( ioreq_buffer_page == NULL )
-        goto error_out;
-
-    memset(ioreq_buffer_page, 0, PAGE_SIZE);
-
-    munmap(ioreq_buffer_page, PAGE_SIZE);
-
-    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, (v_end >> 
PAGE_SHIFT) - 2);
-    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
-
-    *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
+    if ( xc_clear_domain_page(xc_handle, dom, page_array[shared_page_nr - 2]) )
+        goto error_out;
+
     if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
         goto error_out;
 
diff -r 28e3132b9f93 -r 078bfd250677 xen/include/public/hvm/e820.h
--- a/xen/include/public/hvm/e820.h     Thu Aug 17 20:30:05 2006 +0100
+++ b/xen/include/public/hvm/e820.h     Thu Aug 17 20:31:13 2006 +0100
@@ -24,4 +24,9 @@ struct e820entry {
     uint32_t type;
 } __attribute__((packed));
 
+#define HVM_BELOW_4G_RAM_END        0xF0000000
+
+#define HVM_BELOW_4G_MMIO_START     HVM_BELOW_4G_RAM_END
+#define HVM_BELOW_4G_MMIO_LENGTH    ((1ULL << 32) - HVM_BELOW_4G_MMIO_START)
+
 #endif /* __XEN_PUBLIC_HVM_E820_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.