[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86/mm: Handle 1GiB superpages in the pagetable walker.



# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxx>
# Date 1311857109 -3600
# Node ID ba78ea7784c9eaed396cfe6c04fb8e62c9dd8407
# Parent  fa4e2ca9ecffbc432b451f495ad0a403644a6be8
x86/mm: Handle 1GiB superpages in the pagetable walker.

This allows HAP guests to use 1GiB superpages.  Shadow and PV guests
still can't use them without more support in shadow/* and mm.c.

Signed-off-by: Christoph Egger <Christoph.Egger@xxxxxxx>
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
---


diff -r fa4e2ca9ecff -r ba78ea7784c9 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Tue Jul 26 18:37:32 2011 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Thu Jul 28 13:45:09 2011 +0100
@@ -2385,6 +2385,7 @@
                                    unsigned int *ecx, unsigned int *edx)
 {
     struct vcpu *v = current;
+    struct domain *d = v->domain;
     unsigned int count = *ecx;
 
     if ( cpuid_viridian_leaves(input, eax, ebx, ecx, edx) )
@@ -2393,7 +2394,7 @@
     if ( cpuid_hypervisor_leaves(input, count, eax, ebx, ecx, edx) )
         return;
 
-    domain_cpuid(v->domain, input, *ecx, eax, ebx, ecx, edx);
+    domain_cpuid(d, input, *ecx, eax, ebx, ecx, edx);
 
     switch ( input )
     {
@@ -2429,7 +2430,7 @@
             {
                 if ( !(v->arch.xcr0 & (1ULL << sub_leaf)) )
                     continue;
-                domain_cpuid(v->domain, input, sub_leaf, &_eax, &_ebx, &_ecx, 
+                domain_cpuid(d, input, sub_leaf, &_eax, &_ebx, &_ecx, 
                              &_edx);
                 if ( (_eax + _ebx) > *ebx )
                     *ebx = _eax + _ebx;
@@ -2440,9 +2441,12 @@
     case 0x80000001:
         /* We expose RDTSCP feature to guest only when
            tsc_mode == TSC_MODE_DEFAULT and host_tsc_is_safe() returns 1 */
-        if ( v->domain->arch.tsc_mode != TSC_MODE_DEFAULT ||
+        if ( d->arch.tsc_mode != TSC_MODE_DEFAULT ||
              !host_tsc_is_safe() )
             *edx &= ~cpufeat_mask(X86_FEATURE_RDTSCP);
+        /* Hide 1GB-superpage feature if we can't emulate it. */
+        if (!hvm_pse1gb_supported(d))
+            *edx &= ~cpufeat_mask(X86_FEATURE_PAGE1GB);
         break;
     }
 }
diff -r fa4e2ca9ecff -r ba78ea7784c9 xen/arch/x86/mm/guest_walk.c
--- a/xen/arch/x86/mm/guest_walk.c      Tue Jul 26 18:37:32 2011 +0100
+++ b/xen/arch/x86/mm/guest_walk.c      Thu Jul 28 13:45:09 2011 +0100
@@ -134,7 +134,8 @@
     guest_l4e_t *l4p;
 #endif
     uint32_t gflags, mflags, iflags, rc = 0;
-    int pse, smep;
+    int smep;
+    bool_t pse1G = 0, pse2M = 0;
 
     perfc_incr(guest_walk);
     memset(gw, 0, sizeof(*gw));
@@ -181,6 +182,37 @@
     rc |= ((gflags & mflags) ^ mflags);
     if ( rc & _PAGE_PRESENT )
         goto out;
+    
+    pse1G = (gflags & _PAGE_PSE) && guest_supports_1G_superpages(v); 
+
+    if ( pse1G )
+    {
+        /* Generate a fake l1 table entry so callers don't all 
+         * have to understand superpages. */
+        gfn_t start = guest_l3e_get_gfn(gw->l3e);
+        /* Grant full access in the l1e, since all the guest entry's
+         * access controls are enforced in the l3e. */
+        int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
+                     _PAGE_ACCESSED|_PAGE_DIRTY);
+        /* Import cache-control bits. Note that _PAGE_PAT is actually
+         * _PAGE_PSE, and it is always set. We will clear it in case
+         * _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear. */
+        flags |= (guest_l3e_get_flags(gw->l3e)
+                  & (_PAGE_PAT|_PAGE_PWT|_PAGE_PCD));
+        if ( !(gfn_x(start) & 1) )
+            /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */
+            flags &= ~_PAGE_PAT;
+
+        if ( gfn_x(start) & GUEST_L3_GFN_MASK & ~0x1 )
+            rc |= _PAGE_INVALID_BITS;
+
+        /* Increment the pfn by the right number of 4k pages. */
+        start = _gfn((gfn_x(start) & ~GUEST_L3_GFN_MASK) +
+                     ((va >> PAGE_SHIFT) & GUEST_L3_GFN_MASK));
+        gw->l1e = guest_l1e_from_gfn(start, flags);
+        gw->l2mfn = gw->l1mfn = _mfn(INVALID_MFN);
+        goto set_ad;
+    }
 
 #else /* PAE only... */
 
@@ -219,10 +251,9 @@
     if ( rc & _PAGE_PRESENT )
         goto out;
 
-    pse = (guest_supports_superpages(v) && 
-           (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)); 
+    pse2M = (gflags & _PAGE_PSE) && guest_supports_superpages(v); 
 
-    if ( pse )
+    if ( pse2M )
     {
         /* Special case: this guest VA is in a PSE superpage, so there's
          * no guest l1e.  We make one up so that the propagation code
@@ -242,9 +273,7 @@
             /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */
             flags &= ~_PAGE_PAT;
 
-#define GUEST_L2_GFN_ALIGN (1 << (GUEST_L2_PAGETABLE_SHIFT - \
-                                  GUEST_L1_PAGETABLE_SHIFT))
-        if ( gfn_x(start) & (GUEST_L2_GFN_ALIGN - 1) & ~0x1 )
+        if ( gfn_x(start) & GUEST_L2_GFN_MASK & ~0x1 )
         {
 #if GUEST_PAGING_LEVELS == 2
             /*
@@ -262,7 +291,7 @@
 
         /* Increment the pfn by the right number of 4k pages.  
          * Mask out PAT and invalid bits. */
-        start = _gfn((gfn_x(start) & ~(GUEST_L2_GFN_ALIGN - 1)) +
+        start = _gfn((gfn_x(start) & ~GUEST_L2_GFN_MASK) +
                      guest_l1_table_offset(va));
         gw->l1e = guest_l1e_from_gfn(start, flags);
         gw->l1mfn = _mfn(INVALID_MFN);
@@ -282,6 +311,9 @@
         rc |= ((gflags & mflags) ^ mflags);
     }
 
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+set_ad:
+#endif
     /* Now re-invert the user-mode requirement for SMEP. */
     if ( smep ) 
         rc ^= _PAGE_USER;
@@ -295,17 +327,21 @@
 #if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */
         if ( set_ad_bits(l4p + guest_l4_table_offset(va), &gw->l4e, 0) )
             paging_mark_dirty(d, mfn_x(gw->l4mfn));
-        if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, 0) )
+        if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e,
+                         (pse1G && (pfec & PFEC_write_access))) )
             paging_mark_dirty(d, mfn_x(gw->l3mfn));
 #endif
-        if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e,
-                         (pse && (pfec & PFEC_write_access))) )
-            paging_mark_dirty(d, mfn_x(gw->l2mfn));            
-        if ( !pse ) 
+        if ( !pse1G ) 
         {
-            if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e, 
-                             (pfec & PFEC_write_access)) )
-                paging_mark_dirty(d, mfn_x(gw->l1mfn));
+            if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e,
+                             (pse2M && (pfec & PFEC_write_access))) )
+                paging_mark_dirty(d, mfn_x(gw->l2mfn));            
+            if ( !pse2M ) 
+            {
+                if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e, 
+                                 (pfec & PFEC_write_access)) )
+                    paging_mark_dirty(d, mfn_x(gw->l1mfn));
+            }
         }
     }
 
diff -r fa4e2ca9ecff -r ba78ea7784c9 xen/include/asm-x86/guest_pt.h
--- a/xen/include/asm-x86/guest_pt.h    Tue Jul 26 18:37:32 2011 +0100
+++ b/xen/include/asm-x86/guest_pt.h    Thu Jul 28 13:45:09 2011 +0100
@@ -177,6 +177,11 @@
 
 #endif /* GUEST_PAGING_LEVELS != 2 */
 
+/* Mask of the GFNs covered by an L2 or L3 superpage */
+#define GUEST_L2_GFN_MASK (GUEST_L1_PAGETABLE_ENTRIES - 1)
+#define GUEST_L3_GFN_MASK \
+    ((GUEST_L2_PAGETABLE_ENTRIES * GUEST_L1_PAGETABLE_ENTRIES) - 1)
+
 
 /* Which pagetable features are supported on this vcpu? */
 
@@ -194,6 +199,12 @@
 }
 
 static inline int
+guest_supports_1G_superpages(struct vcpu *v)
+{
+    return (GUEST_PAGING_LEVELS >= 4 && hvm_pse1gb_supported(v->domain));
+}
+
+static inline int
 guest_supports_nx(struct vcpu *v)
 {
     if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx )
diff -r fa4e2ca9ecff -r ba78ea7784c9 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Tue Jul 26 18:37:32 2011 +0100
+++ b/xen/include/asm-x86/hvm/hvm.h     Thu Jul 28 13:45:09 2011 +0100
@@ -219,11 +219,16 @@
 #define hvm_nx_enabled(v) \
     (!!((v)->arch.hvm_vcpu.guest_efer & EFER_NX))
 
+/* Can we use superpages in the HAP p2m table? */
 #define hvm_hap_has_1gb(d) \
     (hvm_funcs.hap_capabilities & HVM_HAP_SUPERPAGE_1GB)
 #define hvm_hap_has_2mb(d) \
     (hvm_funcs.hap_capabilities & HVM_HAP_SUPERPAGE_2MB)
 
+/* Can the guest use 1GB superpages in its own pagetables? */
+#define hvm_pse1gb_supported(d) \
+    (cpu_has_page1gb && paging_mode_hap(d))
+
 #ifdef __x86_64__
 #define hvm_long_mode_enabled(v) \
     ((v)->arch.hvm_vcpu.guest_efer & EFER_LMA)

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.