[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out


  • To: xen-devel@xxxxxxxxxxxxxxxxxxx
  • From: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
  • Date: Thu, 24 Nov 2011 08:41:31 -0500
  • Cc: andres@xxxxxxxxxxxxxx, keir.xen@xxxxxxxxx, tim@xxxxxxx, JBeulich@xxxxxxxx, adin@xxxxxxxxxxxxxx
  • Delivery-date: Thu, 24 Nov 2011 13:48:47 +0000
  • Domainkey-signature: a=rsa-sha1; c=nofws; d=lagarcavilla.org; h=content-type :mime-version:content-transfer-encoding:subject:message-id :in-reply-to:references:date:from:to:cc; q=dns; s= lagarcavilla.org; b=LO9h2gikVeX7YTBnkt2sXgZA8njFgvshenXo23ZmcrUJ VpL7rFIJ2ZGBw1z8P4XuOk2uOZRZrLgiT+87+ZkMZnL3hOuwWxFR2hWlivGIs1tO qYJNyuNSspeZYTQVln9vnRCEu/vtoFzQ8sLREFPmi5R3UwnSUC4c4Um5gYzTJpM=
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>

 xen/arch/x86/hvm/hvm.c             |  69 ++++++++++++++++++++++++-------------
 xen/arch/x86/hvm/nestedhvm.c       |   4 +-
 xen/arch/x86/hvm/svm/nestedsvm.c   |  23 ++++++------
 xen/arch/x86/hvm/vmx/vvmx.c        |  36 +++++++++++--------
 xen/include/asm-x86/hvm/hvm.h      |   9 +++-
 xen/include/asm-x86/hvm/vcpu.h     |   1 +
 xen/include/asm-x86/hvm/vmx/vvmx.h |   1 +
 7 files changed, 88 insertions(+), 55 deletions(-)


The nested hvm code maps pages of the guest hvm. These maps live beyond
a hypervisor entry/exit pair, and thus their liveness cannot be ensured
with get_gfn/put_gfn critical sections. Ensure their liveness by
increasing the page ref count, instead.

Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>

diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1801,11 +1801,15 @@ int hvm_virtual_to_linear_addr(
     return 0;
 }
 
-/* We leave this function holding a lock on the p2m entry */
-static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable)
+/* On non-NULL return, we leave this function holding an additional 
+ * ref on the underlying mfn, if any */
+static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable,
+                                    struct page_info **page)
 {
+    void *map;
     unsigned long mfn;
     p2m_type_t p2mt;
+    struct page_info *pg;
     struct domain *d = current->domain;
 
     mfn = mfn_x(writable
@@ -1828,27 +1832,43 @@ static void *__hvm_map_guest_frame(unsig
     if ( writable )
         paging_mark_dirty(d, mfn);
 
-    return map_domain_page(mfn);
+    pg = mfn_to_page(mfn);
+    if (page)
+    {
+        page_get_owner_and_reference(pg);
+        *page = pg;
+    }
+
+    map = map_domain_page(mfn);
+    put_gfn(d, gfn);
+    return map;
 }
 
-void *hvm_map_guest_frame_rw(unsigned long gfn)
+void *hvm_map_guest_frame_rw(unsigned long gfn,
+                             struct page_info **page)
 {
-    return __hvm_map_guest_frame(gfn, 1);
+    return __hvm_map_guest_frame(gfn, 1, page);
 }
 
-void *hvm_map_guest_frame_ro(unsigned long gfn)
+void *hvm_map_guest_frame_ro(unsigned long gfn,
+                             struct page_info **page)
 {
-    return __hvm_map_guest_frame(gfn, 0);
+    return __hvm_map_guest_frame(gfn, 0, page);
 }
 
-void hvm_unmap_guest_frame(void *p)
+void hvm_unmap_guest_frame(void *p, struct page_info *page)
 {
     if ( p )
+    {
         unmap_domain_page(p);
+        if ( page )
+            put_page(page);
+    }
 }
 
-static void *hvm_map_entry(unsigned long va, unsigned long *gfn)
+static void *hvm_map_entry(unsigned long va, struct page_info **page)
 {
+    unsigned long gfn;
     uint32_t pfec;
     char *v;
 
@@ -1865,11 +1885,11 @@ static void *hvm_map_entry(unsigned long
      * treat it as a kernel-mode read (i.e. no access checks).
      */
     pfec = PFEC_page_present;
-    *gfn = paging_gva_to_gfn(current, va, &pfec);
+    gfn = paging_gva_to_gfn(current, va, &pfec);
     if ( (pfec == PFEC_page_paged) || (pfec == PFEC_page_shared) )
         goto fail;
 
-    v = hvm_map_guest_frame_rw(*gfn);
+    v = hvm_map_guest_frame_rw(gfn, page);
     if ( v == NULL )
         goto fail;
 
@@ -1880,11 +1900,9 @@ static void *hvm_map_entry(unsigned long
     return NULL;
 }
 
-static void hvm_unmap_entry(void *p, unsigned long gfn)
+static void hvm_unmap_entry(void *p, struct page_info *page)
 {
-    hvm_unmap_guest_frame(p);
-    if ( p && (gfn != INVALID_GFN) )
-        put_gfn(current->domain, gfn);
+    hvm_unmap_guest_frame(p, page);
 }
 
 static int hvm_load_segment_selector(
@@ -1896,7 +1914,7 @@ static int hvm_load_segment_selector(
     int fault_type = TRAP_invalid_tss;
     struct cpu_user_regs *regs = guest_cpu_user_regs();
     struct vcpu *v = current;
-    unsigned long pdesc_gfn = INVALID_GFN;
+    struct page_info *pdesc_pg = NULL;
 
     if ( regs->eflags & X86_EFLAGS_VM )
     {
@@ -1930,7 +1948,7 @@ static int hvm_load_segment_selector(
     if ( ((sel & 0xfff8) + 7) > desctab.limit )
         goto fail;
 
-    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_gfn);
+    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_pg);
     if ( pdesc == NULL )
         goto hvm_map_fail;
 
@@ -1990,7 +2008,7 @@ static int hvm_load_segment_selector(
     desc.b |= 0x100;
 
  skip_accessed_flag:
-    hvm_unmap_entry(pdesc, pdesc_gfn);
+    hvm_unmap_entry(pdesc, pdesc_pg);
 
     segr.base = (((desc.b <<  0) & 0xff000000u) |
                  ((desc.b << 16) & 0x00ff0000u) |
@@ -2006,7 +2024,7 @@ static int hvm_load_segment_selector(
     return 0;
 
  unmap_and_fail:
-    hvm_unmap_entry(pdesc, pdesc_gfn);
+    hvm_unmap_entry(pdesc, pdesc_pg);
  fail:
     hvm_inject_exception(fault_type, sel & 0xfffc, 0);
  hvm_map_fail:
@@ -2021,7 +2039,8 @@ void hvm_task_switch(
     struct cpu_user_regs *regs = guest_cpu_user_regs();
     struct segment_register gdt, tr, prev_tr, segr;
     struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
-    unsigned long eflags, optss_gfn = INVALID_GFN, nptss_gfn = INVALID_GFN;
+    unsigned long eflags;
+    struct page_info *optss_pg = NULL, *nptss_pg = NULL;
     int exn_raised, rc;
     struct {
         u16 back_link,__blh;
@@ -2047,11 +2066,13 @@ void hvm_task_switch(
         goto out;
     }
 
-    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), &optss_gfn);
+    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), 
+                                &optss_pg);
     if ( optss_desc == NULL )
         goto out;
 
-    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), &nptss_gfn);
+    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), 
+                                &nptss_pg);
     if ( nptss_desc == NULL )
         goto out;
 
@@ -2216,8 +2237,8 @@ void hvm_task_switch(
     }
 
  out:
-    hvm_unmap_entry(optss_desc, optss_gfn);
-    hvm_unmap_entry(nptss_desc, nptss_gfn);
+    hvm_unmap_entry(optss_desc, optss_pg);
+    hvm_unmap_entry(nptss_desc, nptss_pg);
 }
 
 #define HVMCOPY_from_guest (0u<<0)
diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/nestedhvm.c
--- a/xen/arch/x86/hvm/nestedhvm.c
+++ b/xen/arch/x86/hvm/nestedhvm.c
@@ -56,9 +56,11 @@ nestedhvm_vcpu_reset(struct vcpu *v)
     nv->nv_ioportED = 0;
 
     if (nv->nv_vvmcx)
-        hvm_unmap_guest_frame(nv->nv_vvmcx);
+        hvm_unmap_guest_frame(nv->nv_vvmcx, 
+                              nv->nv_vvmcx_pg);
     nv->nv_vvmcx = NULL;
     nv->nv_vvmcxaddr = VMCX_EADDR;
+    nv->nv_vvmcx_pg  = NULL;
     nv->nv_flushp2m = 0;
     nv->nv_p2m = NULL;
 
diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/svm/nestedsvm.c
--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -71,20 +71,18 @@ int nestedsvm_vmcb_map(struct vcpu *v, u
     if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) {
         ASSERT(nv->nv_vvmcx != NULL);
         ASSERT(nv->nv_vvmcxaddr != VMCX_EADDR);
-        hvm_unmap_guest_frame(nv->nv_vvmcx);
+        hvm_unmap_guest_frame(nv->nv_vvmcx, nv->nv_vvmcx_pg);
         nv->nv_vvmcx = NULL;
         nv->nv_vvmcxaddr = VMCX_EADDR;
+        nv->nv_vvmcx_pg = NULL;
     }
 
     if (nv->nv_vvmcx == NULL) {
-        nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT);
+        nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT,
+                                                &nv->nv_vvmcx_pg);
         if (nv->nv_vvmcx == NULL)
             return 0;
         nv->nv_vvmcxaddr = vmcbaddr;
-        /* put_gfn here even though the map survives beyond this caller.
-         * The map can likely survive beyond a hypervisor exit, thus we
-         * need to put the gfn */
-        put_gfn(current->domain, vmcbaddr >> PAGE_SHIFT);
     }
 
     return 1;
@@ -336,6 +334,7 @@ static int nsvm_vmrun_permissionmap(stru
     unsigned int i;
     enum hvm_copy_result ret;
     unsigned long *ns_viomap;
+    struct page_info *ns_viomap_pg = NULL;
     bool_t ioport_80, ioport_ed;
 
     ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
@@ -353,12 +352,12 @@ static int nsvm_vmrun_permissionmap(stru
     svm->ns_oiomap_pa = svm->ns_iomap_pa;
     svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa;
 
-    ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT);
+    ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT,
+                                        &ns_viomap_pg);
     ASSERT(ns_viomap != NULL);
     ioport_80 = test_bit(0x80, ns_viomap);
     ioport_ed = test_bit(0xed, ns_viomap);
-    hvm_unmap_guest_frame(ns_viomap);
-    put_gfn(current->domain, svm->ns_iomap_pa >> PAGE_SHIFT);
+    hvm_unmap_guest_frame(ns_viomap, ns_viomap_pg);
 
     svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed);
 
@@ -863,6 +862,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
     uint16_t port;
     bool_t enabled;
     unsigned long gfn = 0; /* gcc ... */
+    struct page_info *io_bitmap_pg = NULL;
 
     ioinfo.bytes = exitinfo1;
     port = ioinfo.fields.port;
@@ -880,7 +880,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
         break;
     }
 
-    io_bitmap = hvm_map_guest_frame_ro(gfn);
+    io_bitmap = hvm_map_guest_frame_ro(gfn, &io_bitmap_pg);
     if (io_bitmap == NULL) {
         gdprintk(XENLOG_ERR,
             "IOIO intercept: mapping of permission map failed\n");
@@ -888,8 +888,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t 
     }
 
     enabled = test_bit(port, io_bitmap);
-    hvm_unmap_guest_frame(io_bitmap);
-    put_gfn(current->domain, gfn);
+    hvm_unmap_guest_frame(io_bitmap, io_bitmap_pg);
 
     if (!enabled)
         return NESTEDHVM_VMEXIT_HOST;
diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/vmx/vvmx.c
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -44,10 +44,13 @@ int nvmx_vcpu_initialise(struct vcpu *v)
     nvmx->vmxon_region_pa = 0;
     nvcpu->nv_vvmcx = NULL;
     nvcpu->nv_vvmcxaddr = VMCX_EADDR;
+    nvcpu->nv_vvmcx_pg = NULL;
     nvmx->intr.intr_info = 0;
     nvmx->intr.error_code = 0;
     nvmx->iobitmap[0] = NULL;
     nvmx->iobitmap[1] = NULL;
+    nvmx->iobitmap_pg[0] = NULL;
+    nvmx->iobitmap_pg[1] = NULL;
     return 0;
 out:
     return -ENOMEM;
@@ -558,12 +561,14 @@ static void __map_io_bitmap(struct vcpu 
 
     index = vmcs_reg == IO_BITMAP_A ? 0 : 1;
     if (nvmx->iobitmap[index])
-        hvm_unmap_guest_frame (nvmx->iobitmap[index]); 
+    {
+        hvm_unmap_guest_frame (nvmx->iobitmap[index],
+                               nvmx->iobitmap_pg[index]); 
+        nvmx->iobitmap_pg[index] = NULL;
+    }
     gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg);
-    nvmx->iobitmap[index] = hvm_map_guest_frame_ro (gpa >> PAGE_SHIFT);
-    /* See comment in nestedsvm_vmcb_map re putting this gfn and 
-     * liveness of the map it backs */
-    put_gfn(current->domain, gpa >> PAGE_SHIFT);
+    nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT,
+                                             &nvmx->iobitmap_pg[index]);
 }
 
 static inline void map_io_bitmap_all(struct vcpu *v)
@@ -580,13 +585,16 @@ static void nvmx_purge_vvmcs(struct vcpu
 
     __clear_current_vvmcs(v);
     if ( nvcpu->nv_vvmcxaddr != VMCX_EADDR )
-        hvm_unmap_guest_frame(nvcpu->nv_vvmcx);
-    nvcpu->nv_vvmcx == NULL;
+        hvm_unmap_guest_frame(nvcpu->nv_vvmcx, nvcpu->nv_vvmcx_pg);
+    nvcpu->nv_vvmcx = NULL;
     nvcpu->nv_vvmcxaddr = VMCX_EADDR;
+    nvcpu->nv_vvmcx_pg = NULL;
     for (i=0; i<2; i++) {
         if ( nvmx->iobitmap[i] ) {
-            hvm_unmap_guest_frame(nvmx->iobitmap[i]); 
+            hvm_unmap_guest_frame(nvmx->iobitmap[i], 
+                                  nvmx->iobitmap_pg[i]); 
             nvmx->iobitmap[i] = NULL;
+            nvmx->iobitmap_pg[i] = NULL;
         }
     }
 }
@@ -1138,12 +1146,10 @@ int nvmx_handle_vmptrld(struct cpu_user_
 
     if ( nvcpu->nv_vvmcxaddr == VMCX_EADDR )
     {
-        nvcpu->nv_vvmcx = hvm_map_guest_frame_rw (gpa >> PAGE_SHIFT);
+        nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT,
+                                                 &nvcpu->nv_vvmcx_pg);
         nvcpu->nv_vvmcxaddr = gpa;
         map_io_bitmap_all (v);
-        /* See comment in nestedsvm_vmcb_map regarding putting this 
-         * gfn and liveness of the map that uses it */
-        put_gfn(current->domain, gpa >> PAGE_SHIFT);
     }
 
     vmreturn(regs, VMSUCCEED);
@@ -1201,11 +1207,11 @@ int nvmx_handle_vmclear(struct cpu_user_
     else 
     {
         /* Even if this VMCS isn't the current one, we must clear it. */
-        vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT);
+        struct page_info *vvmcs_pg = NULL;
+        vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, &vvmcs_pg);
         if ( vvmcs ) 
             __set_vvmcs(vvmcs, NVMX_LAUNCH_STATE, 0);
-        hvm_unmap_guest_frame(vvmcs);
-        put_gfn(current->domain, gpa >> PAGE_SHIFT);
+        hvm_unmap_guest_frame(vvmcs, vvmcs_pg);
     }
 
     vmreturn(regs, VMSUCCEED);
diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -26,6 +26,7 @@
 #include <asm/hvm/asid.h>
 #include <public/domctl.h>
 #include <public/hvm/save.h>
+#include <asm/mm.h>
 
 /* Interrupt acknowledgement sources. */
 enum hvm_intsrc {
@@ -392,9 +393,11 @@ int hvm_virtual_to_linear_addr(
     unsigned int addr_size,
     unsigned long *linear_addr);
 
-void *hvm_map_guest_frame_rw(unsigned long gfn);
-void *hvm_map_guest_frame_ro(unsigned long gfn);
-void hvm_unmap_guest_frame(void *p);
+void *hvm_map_guest_frame_rw(unsigned long gfn,
+                             struct page_info **page);
+void *hvm_map_guest_frame_ro(unsigned long gfn,
+                             struct page_info **page);
+void hvm_unmap_guest_frame(void *p, struct page_info *page);
 
 static inline void hvm_set_info_guest(struct vcpu *v)
 {
diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -77,6 +77,7 @@ struct nestedvcpu {
     void *nv_n2vmcx; /* shadow VMCB/VMCS used to run l2 guest */
 
     uint64_t nv_vvmcxaddr; /* l1 guest physical address of nv_vvmcx */
+    struct page_info *nv_vvmcx_pg; /* page where nv_vvmcx resides */
     uint64_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */
     uint64_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */
 
diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vmx/vvmx.h
--- a/xen/include/asm-x86/hvm/vmx/vvmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vvmx.h
@@ -26,6 +26,7 @@
 struct nestedvmx {
     paddr_t    vmxon_region_pa;
     void       *iobitmap[2];           /* map (va) of L1 guest I/O bitmap */
+    struct page_info *iobitmap_pg[2]; /* pages backing L1 guest I/O bitmap */
     /* deferred nested interrupt */
     struct {
         unsigned long intr_info;

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.