[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] The internal Xen x86 emulator is fixed to handle shared/sharable pages corretly.



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1261031276 0
# Node ID 7c47306f59bf9128150b0f7d9710b000f6e75b71
# Parent  257bd5e90294b7e768224c89908745e66efcbcac
The internal Xen x86 emulator is fixed to handle shared/sharable pages corretly.
If pages cannot be unshared immediately (due to lack of free memory required to
create private copies) the VCPU under emulation is paused, and the emulator
returns X86EMUL_RETRY, which will get resolved after some memory is freed back
to Xen (possibly through host paging).

Signed-off-by: Grzegorz Milos <Grzegorz.Milos@xxxxxxxxxx>
---
 xen/arch/x86/hvm/emulate.c        |   14 ++++-
 xen/arch/x86/hvm/hvm.c            |   23 +++++++--
 xen/arch/x86/hvm/intercept.c      |   19 +++++--
 xen/arch/x86/hvm/io.c             |   16 ++++--
 xen/arch/x86/mm/guest_walk.c      |   96 +++++++++++++++++++-------------------
 xen/arch/x86/mm/hap/guest_walk.c  |   18 ++++++-
 xen/include/asm-x86/hvm/support.h |    1 
 xen/include/asm-x86/page.h        |    1 
 xen/include/asm-x86/processor.h   |    1 
 9 files changed, 126 insertions(+), 63 deletions(-)

diff -r 257bd5e90294 -r 7c47306f59bf xen/arch/x86/hvm/emulate.c
--- a/xen/arch/x86/hvm/emulate.c        Thu Dec 17 06:27:56 2009 +0000
+++ b/xen/arch/x86/hvm/emulate.c        Thu Dec 17 06:27:56 2009 +0000
@@ -62,12 +62,14 @@ static int hvmemul_do_io(
     int rc;
 
     /* Check for paged out page */
-    ram_mfn = gfn_to_mfn(current->domain, ram_gfn, &p2mt);
+    ram_mfn = gfn_to_mfn_unshare(current->domain, ram_gfn, &p2mt, 0);
     if ( p2m_is_paging(p2mt) )
     {
         p2m_mem_paging_populate(curr->domain, ram_gfn);
         return X86EMUL_RETRY;
     }
+    if ( p2m_is_shared(p2mt) )
+        return X86EMUL_RETRY;
 
     /*
      * Weird-sized accesses have undefined behaviour: we discard writes
@@ -282,7 +284,7 @@ static int hvmemul_linear_to_phys(
     }
     else if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN )
     {
-        if ( pfec == PFEC_page_paged )
+        if ( pfec == PFEC_page_paged || pfec == PFEC_page_shared )
             return X86EMUL_RETRY;
         hvm_inject_exception(TRAP_page_fault, pfec, addr);
         return X86EMUL_EXCEPTION;
@@ -299,7 +301,7 @@ static int hvmemul_linear_to_phys(
         /* Is it contiguous with the preceding PFNs? If not then we're done. */
         if ( (npfn == INVALID_GFN) || (npfn != (pfn + (reverse ? -i : i))) )
         {
-            if ( pfec == PFEC_page_paged )
+            if ( pfec == PFEC_page_paged || pfec == PFEC_page_shared )
                 return X86EMUL_RETRY;
             done /= bytes_per_rep;
             if ( done == 0 )
@@ -441,6 +443,8 @@ static int __hvmemul_read(
         return hvmemul_do_mmio(gpa, &reps, bytes, 0, IOREQ_READ, 0, p_data);
     case HVMCOPY_gfn_paged_out:
         return X86EMUL_RETRY;
+    case HVMCOPY_gfn_shared:
+        return X86EMUL_RETRY;
     default:
         break;
     }
@@ -533,6 +537,8 @@ static int hvmemul_write(
                                IOREQ_WRITE, 0, p_data);
     case HVMCOPY_gfn_paged_out:
         return X86EMUL_RETRY;
+    case HVMCOPY_gfn_shared:
+        return X86EMUL_RETRY;
     default:
         break;
     }
@@ -707,6 +713,8 @@ static int hvmemul_rep_movs(
     xfree(buf);
 
     if ( rc == HVMCOPY_gfn_paged_out )
+        return X86EMUL_RETRY;
+    if ( rc == HVMCOPY_gfn_shared )
         return X86EMUL_RETRY;
     if ( rc != HVMCOPY_okay )
     {
diff -r 257bd5e90294 -r 7c47306f59bf xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Thu Dec 17 06:27:56 2009 +0000
+++ b/xen/arch/x86/hvm/hvm.c    Thu Dec 17 06:27:56 2009 +0000
@@ -311,7 +311,7 @@ static int hvm_set_ioreq_page(
     unsigned long mfn;
     void *va;
 
-    mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
+    mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn, &p2mt, 0));
     if ( !p2m_is_ram(p2mt) )
         return -EINVAL;
     if ( p2m_is_paging(p2mt) )
@@ -319,6 +319,8 @@ static int hvm_set_ioreq_page(
         p2m_mem_paging_populate(d, gmfn);
         return -ENOENT;
     }
+    if ( p2m_is_shared(p2mt) )
+        return -ENOENT;
     ASSERT(mfn_valid(mfn));
 
     page = mfn_to_page(mfn);
@@ -1323,7 +1325,7 @@ static void *hvm_map_entry(unsigned long
      * we still treat it as a kernel-mode read (i.e. no access checks). */
     pfec = PFEC_page_present;
     gfn = paging_gva_to_gfn(current, va, &pfec);
-    if ( pfec == PFEC_page_paged )
+    if ( pfec == PFEC_page_paged || pfec == PFEC_page_shared )
         return NULL;
     mfn = mfn_x(gfn_to_mfn_unshare(current->domain, gfn, &p2mt, 0));
     if ( p2m_is_paging(p2mt) )
@@ -1557,6 +1559,8 @@ void hvm_task_switch(
         goto out;
     if ( rc == HVMCOPY_gfn_paged_out )
         goto out;
+    if ( rc == HVMCOPY_gfn_shared )
+        goto out;
 
     eflags = regs->eflags;
     if ( taskswitch_reason == TSW_iret )
@@ -1595,6 +1599,8 @@ void hvm_task_switch(
         goto out;
     if ( rc == HVMCOPY_gfn_paged_out )
         goto out;
+    if ( rc == HVMCOPY_gfn_shared )
+        goto out;
 
     rc = hvm_copy_from_guest_virt(
         &tss, tr.base, sizeof(tss), PFEC_page_present);
@@ -1602,6 +1608,11 @@ void hvm_task_switch(
         goto out;
     if ( rc == HVMCOPY_gfn_paged_out )
         goto out;
+    /* Note: this could be optimised, if the callee functions knew we want RO
+     * access */
+    if ( rc == HVMCOPY_gfn_shared )
+        goto out;
+
 
     if ( hvm_set_cr3(tss.cr3) )
         goto out;
@@ -1639,6 +1650,8 @@ void hvm_task_switch(
         exn_raised = 1;
     if ( rc == HVMCOPY_gfn_paged_out )
         goto out;
+    if ( rc == HVMCOPY_gfn_shared )
+        goto out;
 
     if ( (tss.trace & 1) && !exn_raised )
         hvm_inject_exception(TRAP_debug, tss_sel & 0xfff8, 0);
@@ -1700,6 +1713,8 @@ static enum hvm_copy_result __hvm_copy(
             {
                 if ( pfec == PFEC_page_paged )
                     return HVMCOPY_gfn_paged_out;
+                if ( pfec == PFEC_page_shared )
+                    return HVMCOPY_gfn_shared;
                 if ( flags & HVMCOPY_fault )
                     hvm_inject_exception(TRAP_page_fault, pfec, addr);
                 return HVMCOPY_bad_gva_to_gfn;
@@ -1710,13 +1725,15 @@ static enum hvm_copy_result __hvm_copy(
             gfn = addr >> PAGE_SHIFT;
         }
 
-        mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
+        mfn = mfn_x(gfn_to_mfn_unshare(current->domain, gfn, &p2mt, 0));
 
         if ( p2m_is_paging(p2mt) )
         {
             p2m_mem_paging_populate(curr->domain, gfn);
             return HVMCOPY_gfn_paged_out;
         }
+        if ( p2m_is_shared(p2mt) )
+            return HVMCOPY_gfn_shared;
         if ( p2m_is_grant(p2mt) )
             return HVMCOPY_unhandleable;
         if ( !p2m_is_ram(p2mt) )
diff -r 257bd5e90294 -r 7c47306f59bf xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c      Thu Dec 17 06:27:56 2009 +0000
+++ b/xen/arch/x86/hvm/intercept.c      Thu Dec 17 06:27:56 2009 +0000
@@ -72,12 +72,17 @@ static int hvm_mmio_access(struct vcpu *
     {
         for ( i = 0; i < p->count; i++ )
         {
+            int ret;
+
             rc = read_handler(v, p->addr + (sign * i * p->size), p->size,
                               &data);
             if ( rc != X86EMUL_OKAY )
                 break;
-            if ( hvm_copy_to_guest_phys(p->data + (sign * i * p->size), &data,
-                                        p->size) == HVMCOPY_gfn_paged_out )
+            ret = hvm_copy_to_guest_phys(p->data + (sign * i * p->size),
+                                         &data,
+                                         p->size);
+            if ( (ret == HVMCOPY_gfn_paged_out) || 
+                 (ret == HVMCOPY_gfn_shared) )
             {
                 rc = X86EMUL_RETRY;
                 break;
@@ -88,9 +93,13 @@ static int hvm_mmio_access(struct vcpu *
     {
         for ( i = 0; i < p->count; i++ )
         {
-            if ( hvm_copy_from_guest_phys(&data,
-                                          p->data + (sign * i * p->size),
-                                          p->size) == HVMCOPY_gfn_paged_out )
+            int ret;
+
+            ret = hvm_copy_from_guest_phys(&data,
+                                           p->data + (sign * i * p->size),
+                                           p->size);
+            if ( (ret == HVMCOPY_gfn_paged_out) || 
+                 (ret == HVMCOPY_gfn_shared) )
             {
                 rc = X86EMUL_RETRY;
                 break;
diff -r 257bd5e90294 -r 7c47306f59bf xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c     Thu Dec 17 06:27:56 2009 +0000
+++ b/xen/arch/x86/hvm/io.c     Thu Dec 17 06:27:56 2009 +0000
@@ -263,8 +263,11 @@ static int dpci_ioport_read(uint32_t mpo
 
         if ( p->data_is_ptr )
         {
-            if ( hvm_copy_to_guest_phys(p->data + (sign * i * p->size), &data,
-                                        p->size) ==  HVMCOPY_gfn_paged_out )
+            int ret;
+            ret = hvm_copy_to_guest_phys(p->data + (sign * i * p->size), &data,
+                                         p->size);
+            if ( (ret == HVMCOPY_gfn_paged_out) ||
+                 (ret == HVMCOPY_gfn_shared) )
                 return X86EMUL_RETRY;
         }
         else
@@ -284,8 +287,13 @@ static int dpci_ioport_write(uint32_t mp
         data = p->data;
         if ( p->data_is_ptr )
         {
-            if ( hvm_copy_from_guest_phys(&data, p->data + (sign * i * 
p->size),
-                                          p->size) ==  HVMCOPY_gfn_paged_out )
+            int ret;
+            
+            ret = hvm_copy_from_guest_phys(&data, 
+                                           p->data + (sign * i * p->size),
+                                           p->size);
+            if ( (ret == HVMCOPY_gfn_paged_out) &&
+                 (ret == HVMCOPY_gfn_shared) )
                 return X86EMUL_RETRY;
         }
 
diff -r 257bd5e90294 -r 7c47306f59bf xen/arch/x86/mm/guest_walk.c
--- a/xen/arch/x86/mm/guest_walk.c      Thu Dec 17 06:27:56 2009 +0000
+++ b/xen/arch/x86/mm/guest_walk.c      Thu Dec 17 06:27:56 2009 +0000
@@ -86,6 +86,36 @@ static uint32_t set_ad_bits(void *guest_
     return 0;
 }
 
+static inline void *map_domain_gfn(struct domain *d,
+                                   gfn_t gfn, 
+                                   mfn_t *mfn,
+                                   p2m_type_t *p2mt,
+                                   uint32_t *rc) 
+{
+    /* Translate the gfn, unsharing if shared */
+    *mfn = gfn_to_mfn_unshare(d, gfn_x(gfn), p2mt, 0);
+    if ( p2m_is_paging(*p2mt) )
+    {
+        p2m_mem_paging_populate(d, gfn_x(gfn));
+
+        *rc = _PAGE_PAGED;
+        return NULL;
+    }
+    if ( p2m_is_shared(*p2mt) )
+    {
+        *rc = _PAGE_SHARED;
+        return NULL;
+    }
+    if ( !p2m_is_ram(*p2mt) ) 
+    {
+        *rc |= _PAGE_PRESENT;
+        return NULL;
+    }
+    ASSERT(mfn_valid(mfn_x(*mfn)));
+    
+    return map_domain_page(mfn_x(*mfn));
+}
+
 
 /* Walk the guest pagetables, after the manner of a hardware walker. */
 uint32_t
@@ -124,23 +154,14 @@ guest_walk_tables(struct vcpu *v, unsign
     if ( rc & _PAGE_PRESENT ) goto out;
 
     /* Map the l3 table */
-    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt);
-    if ( p2m_is_paging(p2mt) )
-    {
-        p2m_mem_paging_populate(d, gfn_x(guest_l4e_get_gfn(gw->l4e)));
-
-        rc = _PAGE_PAGED;
-        goto out;
-    }
-    if ( !p2m_is_ram(p2mt) ) 
-    {
-        rc |= _PAGE_PRESENT;
-        goto out;
-    }
-    ASSERT(mfn_valid(mfn_x(gw->l3mfn)));
-
+    l3p = map_domain_gfn(d, 
+                         guest_l4e_get_gfn(gw->l4e), 
+                         &gw->l3mfn,
+                         &p2mt, 
+                         &rc); 
+    if(l3p == NULL)
+        goto out;
     /* Get the l3e and check its flags*/
-    l3p = map_domain_page(mfn_x(gw->l3mfn));
     gw->l3e = l3p[guest_l3_table_offset(va)];
     gflags = guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
     rc |= ((gflags & mflags) ^ mflags);
@@ -160,23 +181,14 @@ guest_walk_tables(struct vcpu *v, unsign
 #endif /* PAE or 64... */
 
     /* Map the l2 table */
-    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt);
-    if ( p2m_is_paging(p2mt) )
-    {
-        p2m_mem_paging_populate(d, gfn_x(guest_l3e_get_gfn(gw->l3e)));
-
-        rc = _PAGE_PAGED;
-        goto out;
-    }
-    if ( !p2m_is_ram(p2mt) )
-    {
-        rc |= _PAGE_PRESENT;
-        goto out;
-    }
-    ASSERT(mfn_valid(mfn_x(gw->l2mfn)));
-
+    l2p = map_domain_gfn(d, 
+                         guest_l3e_get_gfn(gw->l3e), 
+                         &gw->l2mfn,
+                         &p2mt, 
+                         &rc); 
+    if(l2p == NULL)
+        goto out;
     /* Get the l2e */
-    l2p = map_domain_page(mfn_x(gw->l2mfn));
     gw->l2e = l2p[guest_l2_table_offset(va)];
 
 #else /* 32-bit only... */
@@ -225,21 +237,13 @@ guest_walk_tables(struct vcpu *v, unsign
     else 
     {
         /* Not a superpage: carry on and find the l1e. */
-        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(gw->l2e), &p2mt);
-        if ( p2m_is_paging(p2mt) )
-        {
-            p2m_mem_paging_populate(d, gfn_x(guest_l2e_get_gfn(gw->l2e)));
-
-            rc = _PAGE_PAGED;
+        l1p = map_domain_gfn(d, 
+                             guest_l2e_get_gfn(gw->l2e), 
+                             &gw->l1mfn,
+                             &p2mt,
+                             &rc);
+        if(l1p == NULL)
             goto out;
-        }
-        if ( !p2m_is_ram(p2mt) )
-        {
-            rc |= _PAGE_PRESENT;
-            goto out;
-        }
-        ASSERT(mfn_valid(mfn_x(gw->l1mfn)));
-        l1p = map_domain_page(mfn_x(gw->l1mfn));
         gw->l1e = l1p[guest_l1_table_offset(va)];
         gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
         rc |= ((gflags & mflags) ^ mflags);
diff -r 257bd5e90294 -r 7c47306f59bf xen/arch/x86/mm/hap/guest_walk.c
--- a/xen/arch/x86/mm/hap/guest_walk.c  Thu Dec 17 06:27:56 2009 +0000
+++ b/xen/arch/x86/mm/hap/guest_walk.c  Thu Dec 17 06:27:56 2009 +0000
@@ -32,6 +32,7 @@
 #if GUEST_PAGING_LEVELS <= CONFIG_PAGING_LEVELS
 
 #include <asm/guest_pt.h>
+#include <asm/p2m.h>
 
 unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
     struct vcpu *v, unsigned long gva, uint32_t *pfec)
@@ -45,13 +46,18 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
 
     /* Get the top-level table's MFN */
     cr3 = v->arch.hvm_vcpu.guest_cr[3];
-    top_mfn = gfn_to_mfn(v->domain, _gfn(cr3 >> PAGE_SHIFT), &p2mt);
+    top_mfn = gfn_to_mfn_unshare(v->domain, cr3 >> PAGE_SHIFT, &p2mt, 0);
     if ( p2m_is_paging(p2mt) )
     {
 //        if ( p2m_is_paged(p2mt) )
             p2m_mem_paging_populate(v->domain, cr3 >> PAGE_SHIFT);
 
         pfec[0] = PFEC_page_paged;
+        return INVALID_GFN;
+    }
+    if ( p2m_is_shared(p2mt) )
+    {
+        pfec[0] = PFEC_page_shared;
         return INVALID_GFN;
     }
     if ( !p2m_is_ram(p2mt) )
@@ -73,13 +79,18 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
     if ( missing == 0 )
     {
         gfn_t gfn = guest_l1e_get_gfn(gw.l1e);
-        gfn_to_mfn(v->domain, gfn, &p2mt);
+        gfn_to_mfn_unshare(v->domain, gfn_x(gfn), &p2mt, 0);
         if ( p2m_is_paging(p2mt) )
         {
 //            if ( p2m_is_paged(p2mt) )
                 p2m_mem_paging_populate(v->domain, gfn_x(gfn));
 
             pfec[0] = PFEC_page_paged;
+            return INVALID_GFN;
+        }
+        if ( p2m_is_shared(p2mt) )
+        {
+            pfec[0] = PFEC_page_shared;
             return INVALID_GFN;
         }
 
@@ -91,6 +102,9 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
 
     if ( missing & _PAGE_PAGED )
         pfec[0] = PFEC_page_paged;
+
+    if ( missing & _PAGE_SHARED )
+        pfec[0] = PFEC_page_shared;
 
     return INVALID_GFN;
 }
diff -r 257bd5e90294 -r 7c47306f59bf xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Thu Dec 17 06:27:56 2009 +0000
+++ b/xen/include/asm-x86/hvm/support.h Thu Dec 17 06:27:56 2009 +0000
@@ -74,6 +74,7 @@ enum hvm_copy_result {
     HVMCOPY_bad_gfn_to_mfn,
     HVMCOPY_unhandleable,
     HVMCOPY_gfn_paged_out,
+    HVMCOPY_gfn_shared,
 };
 
 /*
diff -r 257bd5e90294 -r 7c47306f59bf xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Thu Dec 17 06:27:56 2009 +0000
+++ b/xen/include/asm-x86/page.h        Thu Dec 17 06:27:56 2009 +0000
@@ -312,6 +312,7 @@ void setup_idle_pagetable(void);
 #define _PAGE_AVAIL    0xE00U
 #define _PAGE_PSE_PAT 0x1000U
 #define _PAGE_PAGED   0x2000U
+#define _PAGE_SHARED  0x4000U
 
 /*
  * Debug option: Ensure that granted mappings are not implicitly unmapped.
diff -r 257bd5e90294 -r 7c47306f59bf xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Thu Dec 17 06:27:56 2009 +0000
+++ b/xen/include/asm-x86/processor.h   Thu Dec 17 06:27:56 2009 +0000
@@ -133,6 +133,7 @@
 #define PFEC_reserved_bit   (1U<<3)
 #define PFEC_insn_fetch     (1U<<4)
 #define PFEC_page_paged     (1U<<5)
+#define PFEC_page_shared    (1U<<6)
 
 #ifndef __ASSEMBLY__
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.