[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [HVM] qemu: Add guest address-space mapping cache.



# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Node ID 67a06a9b7b1dca707e1cd3b08ae0a341d6e97b3d
# Parent  3f0ca90351e268084fbdb733d70fc596cb46537d
[HVM] qemu: Add guest address-space mapping cache.

On IA32 host or IA32 PAE host, at present, generally, we can't create
an HVM guest with more than 2G memory, because generally it's almost
impossible for Qemu to find a large enough and consecutive virtual
address space to map an HVM guest's whole physical address space.
The attached patch fixes this issue using dynamic mapping based on
little blocks of memory.

Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Dexuan Cui <dexuan.cui@xxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 tools/ioemu/target-i386-dm/cpu.h     |    3 
 tools/ioemu/target-i386-dm/exec-dm.c |   17 ++-
 tools/ioemu/vl.c                     |  154 ++++++++++++++++++++++++++---------
 tools/ioemu/vl.h                     |   20 ++++
 4 files changed, 151 insertions(+), 43 deletions(-)

diff -r 3f0ca90351e2 -r 67a06a9b7b1d tools/ioemu/target-i386-dm/cpu.h
--- a/tools/ioemu/target-i386-dm/cpu.h  Thu Dec 07 10:54:43 2006 +0000
+++ b/tools/ioemu/target-i386-dm/cpu.h  Thu Dec 07 11:12:52 2006 +0000
@@ -25,7 +25,8 @@
 #ifdef TARGET_X86_64
 #define TARGET_LONG_BITS 64
 #else
-#define TARGET_LONG_BITS 32
+/* #define TARGET_LONG_BITS 32 */
+#define TARGET_LONG_BITS 64 /* for Qemu map cache */
 #endif
 
 /* target supports implicit self modifying code */
diff -r 3f0ca90351e2 -r 67a06a9b7b1d tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c      Thu Dec 07 10:54:43 2006 +0000
+++ b/tools/ioemu/target-i386-dm/exec-dm.c      Thu Dec 07 11:12:52 2006 +0000
@@ -36,6 +36,7 @@
 
 #include "cpu.h"
 #include "exec-all.h"
+#include "vl.h"
 
 //#define DEBUG_TB_INVALIDATE
 //#define DEBUG_FLUSH
@@ -426,6 +427,12 @@ static inline int paddr_is_ram(target_ph
 #endif
 }
 
+#if defined(__i386__) || defined(__x86_64__)
+#define phys_ram_addr(x) (qemu_map_cache(x))
+#elif defined(__ia64__)
+#define phys_ram_addr(x) (phys_ram_base + (x))
+#endif
+
 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, 
                             int len, int is_write)
 {
@@ -438,7 +445,7 @@ void cpu_physical_memory_rw(target_phys_
         l = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK); 
         if (l > len)
             l = len;
-       
+
         io_index = iomem_index(addr);
         if (is_write) {
             if (io_index) {
@@ -460,9 +467,10 @@ void cpu_physical_memory_rw(target_phys_
                 }
             } else if (paddr_is_ram(addr)) {
                 /* Reading from RAM */
-                memcpy(phys_ram_base + addr, buf, l);
+                ptr = phys_ram_addr(addr);
+                memcpy(ptr, buf, l);
 #ifdef __ia64__
-                sync_icache((unsigned long)(phys_ram_base + addr), l);
+                sync_icache(ptr, l);
 #endif 
             }
         } else {
@@ -485,7 +493,8 @@ void cpu_physical_memory_rw(target_phys_
                 }
             } else if (paddr_is_ram(addr)) {
                 /* Reading from RAM */
-                memcpy(buf, phys_ram_base + addr, l);
+                ptr = phys_ram_addr(addr);
+                memcpy(buf, ptr, l);
             } else {
                 /* Neither RAM nor known MMIO space */
                 memset(buf, 0xff, len); 
diff -r 3f0ca90351e2 -r 67a06a9b7b1d tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Thu Dec 07 10:54:43 2006 +0000
+++ b/tools/ioemu/vl.c  Thu Dec 07 11:12:52 2006 +0000
@@ -5807,6 +5807,92 @@ int set_mm_mapping(int xc_handle, uint32
 
     return 0;
 }
+
+#if defined(__i386__) || defined(__x86_64__)
+static struct map_cache *mapcache_entry;
+static unsigned long nr_buckets;
+
+static int qemu_map_cache_init(unsigned long nr_pages)
+{
+    unsigned long max_pages = MAX_MCACHE_SIZE >> PAGE_SHIFT;
+    int i;
+
+    if (nr_pages < max_pages)
+        max_pages = nr_pages;
+
+    nr_buckets = (max_pages << PAGE_SHIFT) >> MCACHE_BUCKET_SHIFT;
+
+    fprintf(logfile, "qemu_map_cache_init nr_buckets = %lx\n", nr_buckets);
+
+    mapcache_entry = malloc(nr_buckets * sizeof(struct map_cache));
+    if (mapcache_entry == NULL) {
+        errno = ENOMEM;
+        return -1;
+    }
+
+    memset(mapcache_entry, 0, nr_buckets * sizeof(struct map_cache));
+
+    /*
+     * To avoid ENOMEM from xc_map_foreign_batch() at runtime, we
+     * pre-fill all the map caches in advance.
+     */
+    for (i = 0; i < nr_buckets; i++)
+       (void)qemu_map_cache(((target_phys_addr_t)i) << MCACHE_BUCKET_SHIFT);
+
+    return 0;
+}
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr)
+{
+    struct map_cache *entry;
+    unsigned long address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
+    unsigned long address_offset = phys_addr & (MCACHE_BUCKET_SIZE-1);
+
+    /* For most cases (>99.9%), the page address is the same. */
+    static unsigned long last_address_index = ~0UL;
+    static uint8_t      *last_address_vaddr;
+
+    if (address_index == last_address_index)
+        return last_address_vaddr + address_offset;
+
+    entry = &mapcache_entry[address_index % nr_buckets];
+
+    if (entry->vaddr_base == NULL || entry->paddr_index != address_index)
+    { 
+        /* We need to remap a bucket. */
+        uint8_t *vaddr_base;
+        unsigned long pfns[MCACHE_BUCKET_SIZE >> PAGE_SHIFT];
+        unsigned int i;
+
+        if (entry->vaddr_base != NULL) {
+            errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
+            if (errno) {
+                fprintf(logfile, "unmap fails %d\n", errno);
+                exit(-1);
+            }
+        }
+
+        for (i = 0; i < MCACHE_BUCKET_SIZE >> PAGE_SHIFT; i++)
+            pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-PAGE_SHIFT)) + i;
+
+        vaddr_base = xc_map_foreign_batch(
+            xc_handle, domid, PROT_READ|PROT_WRITE,
+            pfns, MCACHE_BUCKET_SIZE >> PAGE_SHIFT);
+        if (vaddr_base == NULL) {
+            fprintf(logfile, "xc_map_foreign_batch error %d\n", errno);
+            exit(-1);
+        }
+
+        entry->vaddr_base  = vaddr_base;
+        entry->paddr_index = address_index;;
+    }
+
+    last_address_index = address_index;
+    last_address_vaddr = entry->vaddr_base;
+
+    return last_address_vaddr + address_offset;
+}
+#endif
 
 int main(int argc, char **argv)
 {
@@ -6130,6 +6216,7 @@ int main(int argc, char **argv)
                 break;
             case QEMU_OPTION_m:
                 ram_size = atol(optarg) * 1024 * 1024;
+                ram_size = (uint64_t)atol(optarg) * 1024 * 1024;
                 if (ram_size <= 0)
                     help();
 #ifndef CONFIG_DM
@@ -6400,50 +6487,41 @@ int main(int argc, char **argv)
         shared_page_nr = nr_pages - 1;
 #endif
 
+#if defined(__i386__) || defined(__x86_64__)
+
+    if ( qemu_map_cache_init(tmp_nr_pages) )
+    {
+        fprintf(logfile, "qemu_map_cache_init returned: error %d\n", errno);
+        exit(-1);
+    }
+
+    shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE, shared_page_nr);
+    if (shared_page == NULL) {
+        fprintf(logfile, "map shared IO page returned error %d\n", errno);
+        exit(-1);
+    }
+
+    fprintf(logfile, "shared page at pfn:%lx\n", shared_page_nr);
+
+    buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
+                                            PROT_READ|PROT_WRITE,
+                                            shared_page_nr - 2);
+    if (buffered_io_page == NULL) {
+        fprintf(logfile, "map buffered IO page returned error %d\n", errno);
+        exit(-1);
+    }
+
+    fprintf(logfile, "buffered io page at pfn:%lx\n", shared_page_nr - 2);
+
+#elif defined(__ia64__)
+
     page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t));
     if (page_array == NULL) {
         fprintf(logfile, "malloc returned error %d\n", errno);
         exit(-1);
     }
 
-#if defined(__i386__) || defined(__x86_64__)
-    for ( i = 0; i < tmp_nr_pages; i++)
-        page_array[i] = i;
-
-    phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
-                                         PROT_READ|PROT_WRITE, page_array,
-                                         tmp_nr_pages);
-    if (phys_ram_base == NULL) {
-        fprintf(logfile, "batch map guest memory returned error %d\n", errno);
-        exit(-1);
-    }
-
-    shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                       PROT_READ|PROT_WRITE,
-                                       page_array[shared_page_nr]);
-    if (shared_page == NULL) {
-        fprintf(logfile, "map shared IO page returned error %d\n", errno);
-        exit(-1);
-    }
-
-    fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n",
-            shared_page_nr, (uint64_t)(page_array[shared_page_nr]));
-
-    buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                            PROT_READ|PROT_WRITE,
-                                            page_array[shared_page_nr - 2]);
-    if (buffered_io_page == NULL) {
-        fprintf(logfile, "map buffered IO page returned error %d\n", errno);
-        exit(-1);
-    }
-
-    fprintf(logfile, "buffered io page at pfn:%lx, mfn: %"PRIx64"\n",
-            shared_page_nr - 2, (uint64_t)(page_array[shared_page_nr - 2]));
-
-    free(page_array);
-
-#elif defined(__ia64__)
-  
     if (xc_ia64_get_pfn_list(xc_handle, domid, page_array,
                              IO_PAGE_START >> PAGE_SHIFT, 3) != 3) {
         fprintf(logfile, "xc_ia64_get_pfn_list returned error %d\n", errno);
diff -r 3f0ca90351e2 -r 67a06a9b7b1d tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Thu Dec 07 10:54:43 2006 +0000
+++ b/tools/ioemu/vl.h  Thu Dec 07 11:12:52 2006 +0000
@@ -156,6 +156,26 @@ extern void *shared_vram;
 
 extern FILE *logfile;
 
+
+#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) 
+#define MAX_MCACHE_SIZE    0x40000000 /* 1GB max for x86 */
+#define MCACHE_BUCKET_SHIFT 16
+#elif defined(__x86_64__)
+#define MAX_MCACHE_SIZE    0x1000000000 /* 64GB max for x86_64 */
+#define MCACHE_BUCKET_SHIFT 20
+#endif
+
+#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
+
+struct map_cache {
+    unsigned long paddr_index;
+    uint8_t      *vaddr_base;
+};
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr);
+#endif
+
 extern int xc_handle;
 extern int domid;
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.