[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC v2 2/4] x86/mem_access: mem_access and mem_event changes to support PV domains



mem_access changes
------------------
New memory access sub-ops, XENMEM_access_op_set_default,
XENMEM_access_op_create_ring_page and XENMEM_access_op_get_ring_mfn have
been added. The mem_access listener makes these calls during setup.
The ring page is created from the xenheap and shared with the guest. It
is freed when mem_access is disabled or when the domain is shutdown or
destroyed.

XENMEM_access_op_set_default has been added to set the default
permission for the pages belonging to the PV domain. Unlike for a HVM 
domain, the mem_access listener cannot set access permissions for all
pages since it does not know all the mfns that belong to the PV domain.
The other reason for adding this is as a seperate sub-op and not folding
it into p2m_set_mem_access() is that the page_info pointer to start
setting default permissions for in the case of a return from hypercall
continuation will not fit in the hypercall op field.

XENMEM_access_op_[sg]et_access hypercalls are modified to accomodate
calls for a PV domain. When setting the access permissions for a mfn,
all shadows for that mfn are dropped. They get recreated with the new
permissions on the next page fault for that mfn. To get the permissions
for a mfn, value is returned from the shadow_flags.

mem_event changes
-----------------
The XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE/DISABLE hypercalls are
modified to allow mem_access to work with PV domains. When the access
listener goes to enable mem_access for a PV domain, shadow mode is turned
on and the p2m structures are initialized. When disabling, shadow mode is
turned off.

Signed-off-by: Aravindh Puthiyaparambil <aravindp@xxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Keir Fraser <keir@xxxxxxx>
Cc: Tim Deegan <tim@xxxxxxx>

---
Changes from RFC v1:
Fallout due to changes in p2m and shadow code.
Add XENMEM_access_op_set_default access sub-op.

 xen/arch/x86/domain.c            |  12 ++
 xen/arch/x86/mm/mem_access.c     | 244 +++++++++++++++++++++++++++++++++++++--
 xen/arch/x86/mm/mem_event.c      |  62 +++++++---
 xen/include/asm-x86/domain.h     |   3 +
 xen/include/asm-x86/mem_access.h |   3 +
 xen/include/public/memory.h      |   3 +
 6 files changed, 307 insertions(+), 20 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index e896210..49d8545 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -57,6 +57,7 @@
 #include <asm/nmi.h>
 #include <asm/mce.h>
 #include <asm/amd.h>
+#include <asm/mem_access.h>
 #include <xen/numa.h>
 #include <xen/iommu.h>
 #include <compat/vcpu.h>
@@ -593,8 +594,11 @@ int arch_domain_create(struct domain *d, unsigned int 
domcr_flags)
         }
     }
     else
+    {
         /* 64-bit PV guest by default. */
         d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
+        d->arch.pv_domain.access_ring_mfn = _mfn(INVALID_MFN);
+    }
 
     /* initialize default tsc behavior in case tools don't */
     tsc_set_info(d, TSC_MODE_DEFAULT, 0UL, 0, 0);
@@ -632,8 +636,16 @@ void arch_domain_destroy(struct domain *d)
 
     free_perdomain_mappings(d);
     if ( is_pv_domain(d) )
+    {
         free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab);
 
+        /*
+         * Free the PV mem_access ring xenheap page in the case where a
+         * mem_access listener is present while the domain is being destroyed.
+         */
+        mem_access_free_pv_ring(d);
+    }
+
     free_xenheap_page(d->shared_info);
     cleanup_domain_irq_mapping(d);
 }
diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c
index e8465a5..8060446 100644
--- a/xen/arch/x86/mm/mem_access.c
+++ b/xen/arch/x86/mm/mem_access.c
@@ -26,8 +26,122 @@
 #include <xen/hypercall.h>
 #include <asm/p2m.h>
 #include <asm/mem_event.h>
+#include <asm/event.h>
+#include <asm/shadow.h>
 #include <xsm/xsm.h>
+#include "mm-locks.h"
 
+/* Override macros from asm/page.h to make them work with mfn_t */
+#undef mfn_valid
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
+#undef mfn_to_page
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
+
+static inline bool_t domain_valid_for_mem_access(struct domain *d)
+{
+    if ( is_hvm_domain(d) )
+    {
+        /* Only HAP is supported */
+        if ( !hap_enabled(d) )
+            return 0;
+
+        /* Currently only EPT is supported */
+        if ( !cpu_has_vmx )
+            return 0;
+    }
+    /*
+     * Only PV guests using shadow mode and running on CPUs with the NX bit are
+     * supported.
+     */
+    else if ( !shadow_mode_enabled(d) || !cpu_has_nx )
+        return 0;
+
+    return 1;
+}
+
+/*
+ * Set the default permission for all pages for a PV domain.
+ * Unlike for a HVM domain, the mem_access listener cannot set access
+ * permissions for all pages since it does not know all the mfns that belong to
+ * the PV domain. All it can do is set permission for individual pages. This
+ * functions blows away the shadows in lieu of that so that new faults will set
+ * the pagetable entry permissions to the default value. The function also sets
+ * the default access value in the page_info->shadow_flags for each page in the
+ * domain. start_page is used for the page to start setting default permissions
+ * in the case of hypercall continuation. This is also the reason, why this
+ * function cannot be folded in to p2m_set_mem_access(), as the pointer won't
+ * fit in the hypercall op field.
+ */
+static int mem_access_set_default(struct domain *d, uint64_t *start_page,
+                           xenmem_access_t access)
+{
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    struct page_info *page;
+    struct page_list_head head;
+    p2m_access_t a;
+    int rc = 0, ctr = 0;
+
+    if ( !is_pv_domain(d) )
+        return -ENOSYS;
+
+    ASSERT(shadow_mode_enabled(d));
+
+    rc = p2m_convert_xenmem_access(p2m, access, &a);
+    if ( rc != 0 )
+        return rc;
+
+    /*
+     * For PV domains we only support r, rw, rx, rx2rw and rwx access
+     * permissions
+     */
+    switch ( a )
+    {
+    case p2m_access_n:
+    case p2m_access_w:
+    case p2m_access_x:
+    case p2m_access_wx:
+    case p2m_access_n2rwx:
+        return -EINVAL;
+    default:
+        break;
+    }
+
+    paging_lock_recursive(d);
+
+    if ( *start_page )
+    {
+        head.next = (struct page_info *)*start_page;
+        head.tail = d->page_list.tail;
+    }
+    else
+        head = d->page_list;
+
+    page_list_for_each(page, &head)
+    {
+        shadow_set_access(page, access);
+        if ( page != head.tail && !(++ctr & MEMOP_CMD_MASK) &&
+             hypercall_preempt_check() )
+        {
+            struct page_info *next = page_list_next(page, &head);
+            if ( next )
+            {
+                *start_page = (uint64_t)next;
+                rc = -EAGAIN;
+            }
+            break;
+        }
+    }
+
+    if ( rc == 0 )
+    {
+        p2m->default_access = a;
+        shadow_blow_tables(d);
+    }
+
+    paging_unlock(d);
+
+    return rc;
+}
 
 int mem_access_memop(unsigned long cmd,
                      XEN_GUEST_HANDLE_PARAM(xen_mem_access_op_t) arg)
@@ -43,16 +157,14 @@ int mem_access_memop(unsigned long cmd,
     if ( rc )
         return rc;
 
-    rc = -EINVAL;
-    if ( !is_hvm_domain(d) )
-        goto out;
-
     rc = xsm_mem_event_op(XSM_DM_PRIV, d, XENMEM_access_op);
     if ( rc )
         goto out;
 
     rc = -ENODEV;
-    if ( unlikely(!d->mem_event->access.ring_page) )
+    if ( unlikely(!d->mem_event->access.ring_page) &&
+         mao.op != XENMEM_access_op_create_ring_page &&
+         mao.op != XENMEM_access_op_get_ring_mfn )
         goto out;
 
     switch ( mao.op )
@@ -67,10 +179,21 @@ int mem_access_memop(unsigned long cmd,
         unsigned long start_iter = cmd & ~MEMOP_CMD_MASK;
 
         rc = -EINVAL;
+        if ( !domain_valid_for_mem_access(d) )
+            break;
+
+        /*
+         * max_pfn for PV domains is obtained from the shared_info structures
+         * that the guest maintains. It is up to the guest to maintain this and
+         * is not filled in during early boot. So we do not check if we are
+         * crossing max_pfn here and will depend on the checks in
+         * p2m_mem_access_set_entry().
+         */
         if ( (mao.pfn != ~0ull) &&
              (mao.nr < start_iter ||
               ((mao.pfn + mao.nr - 1) < mao.pfn) ||
-              ((mao.pfn + mao.nr - 1) > domain_get_maximum_gpfn(d))) )
+              ((mao.pfn + mao.nr - 1) > domain_get_maximum_gpfn(d) &&
+                !is_pv_domain(d))) )
             break;
 
         rc = p2m_set_mem_access(d, mao.pfn, mao.nr, start_iter,
@@ -89,7 +212,18 @@ int mem_access_memop(unsigned long cmd,
         xenmem_access_t access;
 
         rc = -EINVAL;
-        if ( (mao.pfn > domain_get_maximum_gpfn(d)) && mao.pfn != ~0ull )
+        if ( !domain_valid_for_mem_access(d) )
+            break;
+
+        /*
+         * max_pfn for PV domains is obtained from the shared_info structures
+         * that the guest maintains. It is up to the guest to maintain this and
+         * is not filled in during early boot. So we do not check if we are
+         * crossing max_pfn here and will depend on the checks in
+         * p2m_mem_access_get_entry().
+         */
+        if ( (mao.pfn > domain_get_maximum_gpfn(d) && !is_pv_domain(d)) &&
+             mao.pfn != ~0ull )
             break;
 
         rc = p2m_get_mem_access(d, mao.pfn, &access);
@@ -102,6 +236,87 @@ int mem_access_memop(unsigned long cmd,
         break;
     }
 
+    case XENMEM_access_op_set_default:
+        /*
+         * mem_access listeners for HVM domains calls
+         * xc_set_mem_access(first_pfn = ~0) to set default access.
+         */
+        rc = -ENOSYS;
+        if ( !is_pv_domain(d) )
+            break;
+
+        rc = mem_access_set_default(d, (uint64_t *)&mao.pfn, mao.access);
+        if ( rc == -EAGAIN )
+        {
+            ASSERT(mao.pfn != 0);
+            rc = __copy_field_to_guest(arg, &mao, pfn) ? -EFAULT : 0;
+            if ( rc == 0 )
+                rc = hypercall_create_continuation(__HYPERVISOR_memory_op, 
"lh",
+                                                   XENMEM_access_op, arg);
+
+        }
+        break;
+
+    case XENMEM_access_op_create_ring_page:
+    {
+        void *access_ring_va;
+
+        /*
+         * The special ring page for HVM domains would have been setup during
+         * domain creation.
+         */
+        rc = -ENOSYS;
+        if ( !is_pv_domain(d) )
+            break;
+
+        /*
+         * The ring page was created by a mem_access listener but was not
+         * freed. Do not allow another xenheap page to be allocated.
+         */
+        if ( mfn_valid(d->arch.pv_domain.access_ring_mfn) )
+        {
+            rc = -EPERM;
+            break;
+        }
+
+        access_ring_va = alloc_xenheap_page();
+        if ( access_ring_va == NULL )
+        {
+            rc = -ENOMEM;
+            break;
+        }
+
+        clear_page(access_ring_va);
+        share_xen_page_with_guest(virt_to_page(access_ring_va), d,
+                                  XENSHARE_writable);
+
+        d->arch.pv_domain.access_ring_mfn = _mfn(virt_to_mfn(access_ring_va));
+
+        rc = 0;
+        break;
+    }
+
+    case XENMEM_access_op_get_ring_mfn:
+        /*
+         * mem_access listeners for HVM domains should call xc_hvm_param_get()
+         * instead of xc_mem_access_get_ring_mfn().
+         */
+        rc = -ENOSYS;
+        if ( !is_pv_domain(d) )
+            break;
+
+        if ( !mfn_valid(d->arch.pv_domain.access_ring_mfn) )
+        {
+            rc = -ENODEV;
+            break;
+        }
+
+        mao.pfn = mfn_x(d->arch.pv_domain.access_ring_mfn);
+        rc = __copy_field_to_guest(arg, &mao, pfn) ? -EFAULT : 0;
+
+        rc = 0;
+        break;
+
     default:
         rc = -ENOSYS;
         break;
@@ -123,6 +338,21 @@ int mem_access_send_req(struct domain *d, 
mem_event_request_t *req)
     return 0;
 } 
 
+/* Free the xenheap page used for the PV access ring */
+void mem_access_free_pv_ring(struct domain *d)
+{
+    struct page_info *pg = mfn_to_page(d->arch.pv_domain.access_ring_mfn);
+
+    if ( !mfn_valid(d->arch.pv_domain.access_ring_mfn) )
+        return;
+
+    BUG_ON(page_get_owner(pg) != d);
+    if ( test_and_clear_bit(_PGC_allocated, &pg->count_info) )
+        put_page(pg);
+    free_xenheap_page(mfn_to_virt(mfn_x(d->arch.pv_domain.access_ring_mfn)));
+    d->arch.pv_domain.access_ring_mfn = _mfn(INVALID_MFN);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c
index 40ae841..06ac9f4 100644
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -25,6 +25,7 @@
 #include <xen/event.h>
 #include <xen/wait.h>
 #include <asm/p2m.h>
+#include <asm/shadow.h>
 #include <asm/mem_event.h>
 #include <asm/mem_paging.h>
 #include <asm/mem_access.h>
@@ -49,7 +50,12 @@ static int mem_event_enable(
     xen_event_channel_notification_t notification_fn)
 {
     int rc;
-    unsigned long ring_gfn = d->arch.hvm_domain.params[param];
+    unsigned long ring_gfn;
+
+    if ( is_pv_domain(d) && param == HVM_PARAM_ACCESS_RING_PFN )
+        ring_gfn = mfn_x(d->arch.pv_domain.access_ring_mfn);
+    else
+        ring_gfn = d->arch.hvm_domain.params[param];
 
     /* Only one helper at a time. If the helper crashed,
      * the ring is in an undefined state and so is the guest.
@@ -587,28 +593,58 @@ int mem_event_domctl(struct domain *d, 
xen_domctl_mem_event_op_t *mec,
         switch( mec->op )
         {
         case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE:
-        {
             rc = -ENODEV;
-            /* Only HAP is supported */
-            if ( !hap_enabled(d) )
-                break;
+            if ( !is_pv_domain(d) )
+            {
+                /* Only HAP is supported */
+                if ( !hap_enabled(d) )
+                    break;
 
-            /* Currently only EPT is supported */
-            if ( !cpu_has_vmx )
-                break;
+                /* Currently only EPT is supported */
+                if ( !cpu_has_vmx )
+                    break;
+            }
+            /* PV guests use shadow mem_access mode */
+            else
+            {
+                if ( !shadow_mode_enabled(d) )
+                {
+                    rc = shadow_enable_mem_access(d);
+                    if ( rc != 0 )
+                        goto pv_out;
+                }
+                p2m_mem_access_init(p2m_get_hostp2m(d));
+            }
 
             rc = mem_event_enable(d, mec, med, _VPF_mem_access, 
                                     HVM_PARAM_ACCESS_RING_PFN,
                                     mem_access_notification);
-        }
-        break;
+
+ pv_out:
+            if ( rc != 0 && is_pv_domain(d) )
+            {
+                p2m_mem_access_reset(p2m_get_hostp2m(d));
+                if ( shadow_mode_enabled(d) )
+                    shadow_disable_mem_access(d);
+                mem_access_free_pv_ring(d);
+            }
+            break;
 
         case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
-        {
             if ( med->ring_page )
                 rc = mem_event_disable(d, med);
-        }
-        break;
+
+            if ( is_pv_domain(d) )
+            {
+                domain_pause(d);
+                p2m_mem_access_reset(p2m_get_hostp2m(d));
+                if ( shadow_mode_enabled(d) )
+                    shadow_disable_mem_access(d);
+
+                mem_access_free_pv_ring(d);
+                domain_unpause(d);
+            }
+            break;
 
         default:
             rc = -ENOSYS;
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index f7b0262..cf2ae2a 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -227,6 +227,9 @@ struct pv_domain
 
     /* map_domain_page() mapping cache. */
     struct mapcache_domain mapcache;
+
+    /* mfn of the mem_access ring page for PV domains */
+    mfn_t access_ring_mfn;
 };
 
 struct arch_domain
diff --git a/xen/include/asm-x86/mem_access.h b/xen/include/asm-x86/mem_access.h
index 5c7c5fd..bf9fce9 100644
--- a/xen/include/asm-x86/mem_access.h
+++ b/xen/include/asm-x86/mem_access.h
@@ -27,6 +27,9 @@ int mem_access_memop(unsigned long cmd,
                      XEN_GUEST_HANDLE_PARAM(xen_mem_access_op_t) arg);
 int mem_access_send_req(struct domain *d, mem_event_request_t *req);
 
+/* Free the xenheap page used for the access ring */
+void mem_access_free_pv_ring(struct domain *d);
+
 #endif /* _XEN_ASM_MEM_ACCESS_H */
 
 /*
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 2c57aa0..5ba1581 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -389,6 +389,9 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t);
 #define XENMEM_access_op_resume             0
 #define XENMEM_access_op_set_access         1
 #define XENMEM_access_op_get_access         2
+#define XENMEM_access_op_set_default        3
+#define XENMEM_access_op_create_ring_page   4
+#define XENMEM_access_op_get_ring_mfn       5
 
 typedef enum {
     XENMEM_access_n,
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.