[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86: PV support for hugepages



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1225882641 0
# Node ID 5fd51e1e9c798f18a06a43c4cb83df93ac0698dd
# Parent  1e437b5b418a1c47c96f8c65442bc63ab35f671c
x86: PV support for hugepages

Hugepage support must be enabled via the hypervisor command line
option "allowhugepage". There is currently no support in the tools for
saving/restoring/migrating guests who use hugepages.

Signed-off-by: Dave McCracken <dave.mccracken@xxxxxxxxxx>
---
 xen/arch/x86/mm.c                 |   89 ++++++++++++++++++++++++++++++++------
 xen/arch/x86/traps.c              |   10 ++--
 xen/include/asm-x86/mm.h          |    1 
 xen/include/asm-x86/x86_32/page.h |    2 
 xen/include/asm-x86/x86_64/page.h |    2 
 5 files changed, 86 insertions(+), 18 deletions(-)

diff -r 1e437b5b418a -r 5fd51e1e9c79 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Nov 05 10:26:19 2008 +0000
+++ b/xen/arch/x86/mm.c Wed Nov 05 10:57:21 2008 +0000
@@ -160,6 +160,9 @@ unsigned long total_pages;
 
 #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
 
+int opt_allow_hugepage;
+boolean_param("allowhugepage", opt_allow_hugepage);
+
 #define l1_disallow_mask(d)                                     \
     ((d != dom_io) &&                                           \
      (rangeset_is_empty((d)->iomem_caps) &&                     \
@@ -584,6 +587,28 @@ static int get_page_and_type_from_pagenr
         put_page(page);
 
     return rc;
+}
+
+static int get_data_page(
+    struct page_info *page, struct domain *d, int writeable)
+{
+    int rc;
+
+    if ( writeable )
+        rc = get_page_and_type(page, d, PGT_writable_page);
+    else
+        rc = get_page(page, d);
+
+    return rc;
+}
+
+static void put_data_page(
+    struct page_info *page, int writeable)
+{
+    if ( writeable )
+        put_page_and_type(page);
+    else
+        put_page(page);
 }
 
 /*
@@ -700,10 +725,9 @@ get_page_from_l1e(
      * contribute to writeable mapping refcounts.  (This allows the
      * qemu-dm helper process in dom0 to map the domain's memory without
      * messing up the count of "real" writable mappings.) */
-    okay = (((l1f & _PAGE_RW) && 
-             !(unlikely(paging_mode_external(d) && (d != curr->domain))))
-            ? get_page_and_type(page, d, PGT_writable_page)
-            : get_page(page, d));
+    okay = get_data_page(
+        page, d,
+        (l1f & _PAGE_RW) && !(paging_mode_external(d) && (d != curr->domain)));
     if ( !okay )
     {
         MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
@@ -751,6 +775,7 @@ get_page_from_l2e(
 get_page_from_l2e(
     l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
 {
+    unsigned long mfn = l2e_get_pfn(l2e);
     int rc;
 
     if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
@@ -762,10 +787,37 @@ get_page_from_l2e(
         return -EINVAL;
     }
 
-    rc = get_page_and_type_from_pagenr(
-        l2e_get_pfn(l2e), PGT_l1_page_table, d, 0, 0);
-    if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
-        rc = 0;
+    if ( !(l2e_get_flags(l2e) & _PAGE_PSE) )
+    {
+        rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d, 0, 0);
+        if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
+            rc = 0;
+    }
+    else if ( !opt_allow_hugepage || (mfn & (L1_PAGETABLE_ENTRIES-1)) )
+    {
+        rc = -EINVAL;
+    }
+    else
+    {
+        unsigned long m = mfn;
+        int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW);
+  
+        do {
+            rc = get_data_page(mfn_to_page(m), d, writeable);
+            if ( unlikely(!rc) )
+            {
+                while ( m-- > mfn )
+                    put_data_page(mfn_to_page(m), writeable);
+                return -EINVAL;
+            }
+        } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+
+#ifdef __x86_64__
+        map_pages_to_xen(
+            (unsigned long)mfn_to_virt(mfn), mfn, L1_PAGETABLE_ENTRIES,
+            PAGE_HYPERVISOR | l2e_get_flags(l2e));
+#endif
+    }
 
     return rc;
 }
@@ -954,13 +1006,24 @@ void put_page_from_l1e(l1_pgentry_t l1e,
  */
 static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
 {
-    if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && 
-         (l2e_get_pfn(l2e) != pfn) )
+    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || (l2e_get_pfn(l2e) == pfn) )
+        return 1;
+
+    if ( l2e_get_flags(l2e) & _PAGE_PSE )
+    {
+        unsigned long mfn = l2e_get_pfn(l2e), m = mfn;
+        int writeable = l2e_get_flags(l2e) & _PAGE_RW;
+        ASSERT(opt_allow_hugepage && !(mfn & (L1_PAGETABLE_ENTRIES-1)));
+        do {
+            put_data_page(mfn_to_page(m), writeable);
+        } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+    }
+    else
     {
         put_page_and_type(l2e_get_page(l2e));
-        return 0;
-    }
-    return 1;
+    }
+
+    return 0;
 }
 
 static int __put_page_type(struct page_info *, int preemptible);
diff -r 1e437b5b418a -r 5fd51e1e9c79 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Wed Nov 05 10:26:19 2008 +0000
+++ b/xen/arch/x86/traps.c      Wed Nov 05 10:57:21 2008 +0000
@@ -723,7 +723,8 @@ static void pv_cpuid(struct cpu_user_reg
     {
         /* Modify Feature Information. */
         __clear_bit(X86_FEATURE_VME, &d);
-        __clear_bit(X86_FEATURE_PSE, &d);
+        if ( !opt_allow_hugepage )
+            __clear_bit(X86_FEATURE_PSE, &d);
         __clear_bit(X86_FEATURE_PGE, &d);
         __clear_bit(X86_FEATURE_MCE, &d);
         __clear_bit(X86_FEATURE_MCA, &d);
@@ -2003,9 +2004,12 @@ static int emulate_privileged_op(struct 
         case 4: /* Read CR4 */
             /*
              * Guests can read CR4 to see what features Xen has enabled. We
-             * therefore lie about PGE & PSE as they are unavailable to guests.
+             * therefore lie about PGE as it is unavailable to guests.
+             * Also disallow PSE if hugepages are not enabled.
              */
-            *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE);
+            *reg = read_cr4() & ~X86_CR4_PGE;
+            if ( !opt_allow_hugepage )
+                *reg &= ~X86_CR4_PSE;
             break;
 
         default:
diff -r 1e437b5b418a -r 5fd51e1e9c79 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Wed Nov 05 10:26:19 2008 +0000
+++ b/xen/include/asm-x86/mm.h  Wed Nov 05 10:57:21 2008 +0000
@@ -263,6 +263,7 @@ pae_copy_root(struct vcpu *v, l3_pgentry
 
 int check_descriptor(const struct domain *, struct desc_struct *d);
 
+extern int opt_allow_hugepage;
 
 /******************************************************************************
  * With shadow pagetables, the different kinds of address start 
diff -r 1e437b5b418a -r 5fd51e1e9c79 xen/include/asm-x86/x86_32/page.h
--- a/xen/include/asm-x86/x86_32/page.h Wed Nov 05 10:26:19 2008 +0000
+++ b/xen/include/asm-x86/x86_32/page.h Wed Nov 05 10:57:21 2008 +0000
@@ -112,7 +112,7 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
 #define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX)
 
 #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
-#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK & ~_PAGE_PSE)
 #define L3_DISALLOW_MASK 0xFFFFF1FEU /* must-be-zero */
 
 #endif /* __X86_32_PAGE_H__ */
diff -r 1e437b5b418a -r 5fd51e1e9c79 xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Wed Nov 05 10:26:19 2008 +0000
+++ b/xen/include/asm-x86/x86_64/page.h Wed Nov 05 10:57:21 2008 +0000
@@ -115,7 +115,7 @@ typedef l4_pgentry_t root_pgentry_t;
 #define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX)
 
 #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
-#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK & ~_PAGE_PSE)
 #define L3_DISALLOW_MASK (BASE_DISALLOW_MASK)
 #define L4_DISALLOW_MASK (BASE_DISALLOW_MASK)
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.