[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC 1/4] x86/mm: Shadow and p2m changes for PV mem_access



Shadow mem_access mode
----------------------
Add a new shadow mode for mem_access. This should only be enabled by a
mem_access listener when it calls XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE
for a PV domain.

Changes to shadow code
----------------------
If the shadow pagefault handler detects that a mem_access listener is
present, then it checks if an violation occurred. If it did, then the
vCPU is paused and an event is sent to the listener.
Similarly if the propagation code detects that a mem_access listener is
present, then it creates the PTE after applying access permissions to it.

P2M changes
-----------
Add a new p2m implementation for mem_access. Central to this is the
access lookup table. This is an array of mfns. Each mfn is a page
allocated from the domain's shadow memory. The pages hold the
p2m_access_t values for each guest gmfn. p2m_mem_access_set_entry()
sets the access value of the mfn given as input and blows the shadow
entries for the mfn. p2m_mem_access_get_entry() returns the access
value of the mfn given as input.

Signed-off-by: Aravindh Puthiyaparambil <aravindp@xxxxxxxxx>
Cc: Ian Campbell <ian.campbell@xxxxxxxxxx>
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Keir Fraser <keir@xxxxxxx>
Cc: Tim Deegan <tim@xxxxxxx>
---
 xen/arch/x86/mm/Makefile        |   2 +-
 xen/arch/x86/mm/p2m-ma.c        | 286 ++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/p2m.c           |  19 ++-
 xen/arch/x86/mm/paging.c        |  16 +++
 xen/arch/x86/mm/shadow/common.c |  63 ++++++++-
 xen/arch/x86/mm/shadow/multi.c  | 104 ++++++++++++++-
 xen/include/asm-x86/p2m.h       |  51 +++++++
 xen/include/asm-x86/paging.h    |   3 +
 xen/include/asm-x86/shadow.h    |  11 ++
 xen/include/public/domctl.h     |   4 +
 10 files changed, 549 insertions(+), 10 deletions(-)
 create mode 100644 xen/arch/x86/mm/p2m-ma.c

diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 73dcdf4..41128a4 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -2,7 +2,7 @@ subdir-y += shadow
 subdir-y += hap
 
 obj-y += paging.o
-obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o
+obj-y += p2m.o p2m-pt.o p2m-ept.o p2m-pod.o p2m-ma.o
 obj-y += guest_walk_2.o
 obj-y += guest_walk_3.o
 obj-$(x86_64) += guest_walk_4.o
diff --git a/xen/arch/x86/mm/p2m-ma.c b/xen/arch/x86/mm/p2m-ma.c
new file mode 100644
index 0000000..634b0eb
--- /dev/null
+++ b/xen/arch/x86/mm/p2m-ma.c
@@ -0,0 +1,286 @@
+/******************************************************************************
+ * arch/x86/mm/p2m-ma.c
+ *
+ * Implementation of p2m data structures, for use by PV mem_access code.
+ *
+ * Copyright (c) 2014 Cisco Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <asm/p2m.h>
+#include <asm/shadow.h>
+#include "mm-locks.h"
+
+/* Override macros from asm/page.h to make them work with mfn_t */
+#undef mfn_valid
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
+#undef mfn_to_page
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
+#undef page_to_mfn
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
+
+/* Convert access restrictions to page table flags */
+void p2m_access_to_flags(u32 *flags, u32 gflags, p2m_access_t access)
+{
+
+    /* Restrict with access permissions */
+    switch (access)
+    {
+        case p2m_access_r:
+            *flags &= ~(_PAGE_RW);
+            *flags |= (_PAGE_NX_BIT|_PAGE_PRESENT);
+            break;
+        case p2m_access_rx:
+        case p2m_access_rx2rw:
+            *flags &= ~(_PAGE_NX_BIT|_PAGE_RW);
+            *flags |= _PAGE_PRESENT;
+            break;
+        case p2m_access_rw:
+            *flags |= (_PAGE_NX_BIT|_PAGE_RW|_PAGE_PRESENT);
+            break;
+        case p2m_access_rwx:
+        default:
+            *flags &= ~(_PAGE_NX_BIT);
+            *flags |= (_PAGE_RW|_PAGE_PRESENT);
+            break;
+    }
+
+    // Allow more restrictive guest flags to be propagated instead of access
+    // permissions
+    if ( !(gflags & _PAGE_RW) )
+        *flags &= ~(_PAGE_RW);
+
+    if ( gflags & _PAGE_NX_BIT )
+        *flags |= _PAGE_NX_BIT;
+
+}
+
+/*
+ * Set the page permission of the mfn. This in effect removes all shadow
+ * mappings of that mfn. The access type of that mfn is stored in the access
+ * lookup table.
+ */
+static int
+p2m_mem_access_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
+                         unsigned int page_order, p2m_type_t p2mt,
+                         p2m_access_t p2ma)
+{
+    struct domain *d = p2m->domain;
+    mfn_t *access_lookup_table = p2m->access_lookup_table;
+    uint table_idx;
+    uint page_idx;
+    uint8_t *access_table_page;
+
+    ASSERT(shadow_mode_mem_access(d) && access_lookup_table != NULL);
+
+    /* For PV domains we only support rw, rx, rx2rw, rwx access permissions */
+    if ( unlikely(p2ma != p2m_access_r &&
+                  p2ma != p2m_access_rw &&
+                  p2ma != p2m_access_rx &&
+                  p2ma != p2m_access_rwx &&
+                  p2ma != p2m_access_rx2rw) )
+        return -EINVAL;
+
+    if ( page_get_owner(mfn_to_page(mfn)) != d )
+        return -ENOENT;
+
+    gfn = get_gpfn_from_mfn(mfn_x(mfn));
+
+    /*
+     * Values with the MSB set denote MFNs that aren't really part of the
+     * domain's pseudo-physical memory map (e.g., the shared info frame).
+     * Nothing to do here.
+     */
+    if ( unlikely(!VALID_M2P(gfn)) )
+        return 0;
+
+    if ( gfn > (d->tot_pages - 1) )
+        return -EINVAL;
+
+    paging_lock(d);
+
+    table_idx = MEM_ACCESS_TABLE_IDX(gfn);
+    page_idx = MEM_ACCESS_PAGE_IDX(gfn);
+    access_table_page = map_domain_page(mfn_x(access_lookup_table[table_idx]));
+    access_table_page[page_idx] = p2ma;
+    unmap_domain_page(access_table_page);
+
+    if ( sh_remove_all_mappings(d->vcpu[0], mfn) )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+
+    paging_unlock(d);
+
+    return 0;
+}
+
+/* Get the page permission of the mfn from the access lookup table */
+static mfn_t
+p2m_mem_access_get_entry(struct p2m_domain *p2m, unsigned long gfn,
+                         p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
+                         unsigned int *page_order)
+{
+    struct domain *d = p2m->domain;
+    mfn_t *access_lookup_table = p2m->access_lookup_table;
+    uint table_idx;
+    uint page_idx;
+    uint8_t *access_table_page;
+    mfn_t mfn = _mfn(gfn); // For PV guests mfn == gfn
+
+    ASSERT(shadow_mode_mem_access(d) && access_lookup_table != NULL);
+
+    /* Not necessarily true, but for non-translated guests, we claim
+     * it's the most generic kind of memory */
+    *t = p2m_ram_rw;
+
+    if ( page_get_owner(mfn_to_page(mfn)) != d )
+        return _mfn(INVALID_MFN);
+
+    gfn = get_gpfn_from_mfn(mfn_x(mfn));
+
+    /*
+     * Values with the MSB set denote MFNs that aren't really part of the
+     * domain's pseudo-physical memory map (e.g., the shared info frame).
+     * Return mfn with RW access.
+     */
+    if ( unlikely(!VALID_M2P(gfn)) )
+    {
+        *a = p2m_access_rw;
+        return mfn;
+    }
+
+    if ( gfn > (d->tot_pages - 1) )
+    {
+        *a = p2m->default_access;
+        return _mfn(INVALID_MFN);
+    }
+
+    table_idx = MEM_ACCESS_TABLE_IDX(gfn);
+    page_idx = MEM_ACCESS_PAGE_IDX(gfn);
+
+    access_table_page = map_domain_page(mfn_x(access_lookup_table[table_idx]));
+
+    /* This is a hint to take the default permissions */
+    if ( access_table_page[page_idx] == p2m_access_n )
+        access_table_page[page_idx] = p2m->default_access;
+    *a = access_table_page[page_idx];
+
+    unmap_domain_page(access_table_page);
+
+    return mfn;
+}
+
+/* Check the default_access value and blow away all the shadows */
+int p2m_mem_access_set_default(struct p2m_domain *p2m)
+{
+    struct domain *d = p2m->domain;
+
+    ASSERT(shadow_mode_mem_access(d));
+
+    /* For PV domains we only support r, rw, rx, rwx access permissions */
+    if ( p2m->default_access != p2m_access_r &&
+         p2m->default_access != p2m_access_rw &&
+         p2m->default_access != p2m_access_rx &&
+         p2m->default_access != p2m_access_rwx &&
+         p2m->default_access != p2m_access_rx2rw )
+        return -EINVAL;
+
+    paging_lock_recursive(d);
+    shadow_blow_tables(d);
+    paging_unlock(d);
+
+    return 0;
+}
+
+/*
+ * Free all the shadow pages used in the access lookup table and free the table
+ * itself.
+ */
+void p2m_mem_access_teardown(struct p2m_domain *p2m)
+{
+    struct domain *d = p2m->domain;
+    mfn_t *access_lookup_table = p2m->access_lookup_table;
+    uint32_t nr_access_table_pages;
+    uint32_t ctr;
+
+    /* Reset the set_entry and get_entry function pointers */
+    p2m_pt_init(p2m);
+
+    if ( !access_lookup_table  )
+        return;
+
+    nr_access_table_pages = get_domain_nr_access_table_pages(d);
+
+    for ( ctr = 0; ctr < nr_access_table_pages; ctr++ )
+    {
+        if ( !mfn_valid(access_lookup_table[ctr]) )
+                break;
+        d->arch.paging.free_page(d,
+                                 mfn_to_page(access_lookup_table[ctr]));
+    }
+
+    xfree(p2m->access_lookup_table);
+    p2m->access_lookup_table = NULL;
+}
+
+/*
+ * Allocate the access lookup table array. This is an array of mfns. Each mfn
+ * is a page allocated from the domain's shadow memory. The size of the table
+ * will depend on the domain's memory size.
+ */
+int p2m_mem_access_init(struct p2m_domain *p2m)
+{
+    struct domain *d = p2m->domain;
+    mfn_t *access_lookup_table;
+    uint32_t nr_access_table_pages;
+    uint32_t ctr;
+
+    nr_access_table_pages = get_domain_nr_access_table_pages(d);
+    access_lookup_table = xzalloc_array(mfn_t, nr_access_table_pages);
+    if ( !access_lookup_table )
+        return -ENOMEM;
+
+    p2m->access_lookup_table = access_lookup_table;
+
+    for ( ctr = 0; ctr < nr_access_table_pages; ctr++ )
+    {
+        struct page_info *page;
+
+        page = d->arch.paging.alloc_page(d);
+        if ( page == NULL )
+        {
+            /* This a hint to p2m_mem_access_teardown() to stop freeing */
+            access_lookup_table[ctr] = _mfn(INVALID_MFN);
+            p2m_mem_access_teardown(p2m);
+            return -ENOMEM;
+        }
+
+        access_lookup_table[ctr] = page_to_mfn(page);
+    }
+
+    p2m->set_entry = p2m_mem_access_set_entry;
+    p2m->get_entry = p2m_mem_access_get_entry;
+
+    return 0;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 1d0528b..c4b9dc4 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -33,6 +33,7 @@
 #include <asm/mem_event.h>
 #include <public/mem_event.h>
 #include <asm/mem_sharing.h>
+#include <asm/shadow.h>
 #include <xen/event.h>
 #include <asm/hvm/nestedhvm.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
@@ -222,7 +223,9 @@ mfn_t __get_gfn_type_access(struct p2m_domain *p2m, 
unsigned long gfn,
     if ( q & P2M_UNSHARE )
         q |= P2M_ALLOC;
 
-    if ( !p2m || !paging_mode_translate(p2m->domain) )
+    if ( !p2m ||
+          (!paging_mode_translate(p2m->domain) &&
+           !paging_mode_mem_access(p2m->domain)) )
     {
         /* Not necessarily true, but for non-translated guests, we claim
          * it's the most generic kind of memory */
@@ -259,7 +262,9 @@ mfn_t __get_gfn_type_access(struct p2m_domain *p2m, 
unsigned long gfn,
 
 void __put_gfn(struct p2m_domain *p2m, unsigned long gfn)
 {
-    if ( !p2m || !paging_mode_translate(p2m->domain) )
+    if ( !p2m ||
+         (!paging_mode_translate(p2m->domain) &&
+          !paging_mode_mem_access(p2m->domain)) )
         /* Nothing to do in this case */
         return;
 
@@ -1414,6 +1419,8 @@ long p2m_set_mem_access(struct domain *d, unsigned long 
pfn, uint32_t nr,
     if ( pfn == ~0ul )
     {
         p2m->default_access = a;
+        if ( is_pv_domain(d) )
+            return p2m_mem_access_set_default(p2m);
         return 0;
     }
 
@@ -1421,6 +1428,14 @@ long p2m_set_mem_access(struct domain *d, unsigned long 
pfn, uint32_t nr,
     for ( pfn += start; nr > start; ++pfn )
     {
         mfn = p2m->get_entry(p2m, pfn, &t, &_a, 0, NULL);
+
+        /* Return error on invalid MFN for PV guests */
+        if ( unlikely(is_pv_domain(d) && !mfn_valid(mfn)) )
+        {
+            rc = -EFAULT;
+            break;
+        }
+
         rc = p2m->set_entry(p2m, pfn, mfn, PAGE_ORDER_4K, t, a);
         if ( rc )
             break;
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index f956aa5..bedf1cd 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -585,9 +585,17 @@ int paging_domctl(struct domain *d, xen_domctl_shadow_op_t 
*sc,
     {
 
     case XEN_DOMCTL_SHADOW_OP_ENABLE:
+        /*
+         * Shadow mem_access mode should only be enabled when mem_access is
+         * enabled in XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE.
+         */
+        if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_MEM_ACCESS )
+            return -EINVAL;
+
         if ( !(sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY) )
             break;
         /* Else fall through... */
+
     case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
         if ( hap_enabled(d) )
             hap_logdirty_init(d);
@@ -622,6 +630,14 @@ void paging_teardown(struct domain *d)
     /* clean up log dirty resources. */
     paging_log_dirty_teardown(d);
 
+    /*
+     * Free the PV mem_access p2m resources in the case where a mem_access
+     * listener is present while the domain is being destroyed or it crashed
+     * without cleaning up.
+     */
+    if ( is_pv_domain(d) )
+        p2m_mem_access_teardown(p2m_get_hostp2m(d));
+
     /* Move populate-on-demand cache back to domain_list for destruction */
     p2m_pod_empty_cache(d);
 }
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 3c803b6..572bd8d 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -1356,7 +1356,7 @@ void shadow_prealloc(struct domain *d, u32 type, unsigned 
int count)
 
 /* Deliberately free all the memory we can: this will tear down all of
  * this domain's shadows */
-static void shadow_blow_tables(struct domain *d) 
+void shadow_blow_tables(struct domain *d)
 {
     struct page_info *sp, *t;
     struct vcpu *v = d->vcpu[0];
@@ -2443,7 +2443,8 @@ int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
         if ( !(shadow_mode_external(v->domain)
                && (page->count_info & PGC_count_mask) <= 3
                && ((page->u.inuse.type_info & PGT_count_mask)
-                   == !!is_xen_heap_page(page))) )
+                       == !!is_xen_heap_page(page)))
+                    && !(shadow_mode_mem_access(v->domain)) )
         {
             SHADOW_ERROR("can't find all mappings of mfn %lx: "
                           "c=%08lx t=%08lx\n", mfn_x(gmfn), 
@@ -2953,7 +2954,7 @@ int shadow_enable(struct domain *d, u32 mode)
         paging_unlock(d);
     }
 
-    /* Allow p2m and log-dirty code to borrow shadow memory */
+    /* Allow p2m, log-dirty and mem_access code to borrow shadow memory */
     d->arch.paging.alloc_page = shadow_alloc_p2m_page;
     d->arch.paging.free_page = shadow_free_p2m_page;
 
@@ -3130,6 +3131,9 @@ void shadow_teardown(struct domain *d)
      * calls now that we've torn down the bitmap */
     d->arch.paging.mode &= ~PG_log_dirty;
 
+    /* Clear the mem_access mode bit */
+    d->arch.paging.mode &= ~PG_mem_access;
+
     if (d->arch.hvm_domain.dirty_vram) {
         xfree(d->arch.hvm_domain.dirty_vram->sl1ma);
         xfree(d->arch.hvm_domain.dirty_vram->dirty_bitmap);
@@ -3179,8 +3183,14 @@ static int shadow_one_bit_enable(struct domain *d, u32 
mode)
 {
     ASSERT(paging_locked_by_me(d));
 
-    /* Sanity check the call */
-    if ( d == current->domain || (d->arch.paging.mode & mode) == mode )
+    /*
+     * Sanity check the call.
+     * Do not allow shadow mem_access mode to be enabled when
+     * log-dirty or translate mode is enabled.
+     */
+    if ( d == current->domain || (d->arch.paging.mode & mode) == mode ||
+         ((mode & PG_mem_access) &&
+         (shadow_mode_log_dirty(d) | shadow_mode_translate(d))) )
     {
         return -EINVAL;
     }
@@ -3197,7 +3207,7 @@ static int shadow_one_bit_enable(struct domain *d, u32 
mode)
         }
     }
 
-    /* Allow p2m and log-dirty code to borrow shadow memory */
+    /* Allow p2m, log-dirty and mem_access code to borrow shadow memory */
     d->arch.paging.alloc_page = shadow_alloc_p2m_page;
     d->arch.paging.free_page = shadow_free_p2m_page;
 
@@ -3661,6 +3671,47 @@ out:
 }
 
 /**************************************************************************/
+/* mem_access mode support */
+
+/* Shadow specific code which is called in
+ * XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE for PV guests.
+ * Return 0 on success.
+ */
+int shadow_enable_mem_access(struct domain *d)
+{
+    int ret;
+
+    paging_lock(d);
+
+#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
+    /* 32bit PV guests on 64bit xen behave like older 64bit linux: they
+     * change an l4e instead of cr3 to switch tables.  Give them the
+     * same optimization */
+    if ( is_pv_32on64_domain(d) )
+        d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
+#endif
+
+    ret = shadow_one_bit_enable(d, PG_mem_access);
+    paging_unlock(d);
+
+    return ret;
+}
+
+/* shadow specfic code which is called in
+ * XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE for PV guests
+ */
+int shadow_disable_mem_access(struct domain *d)
+{
+    int ret;
+
+    paging_lock(d);
+    ret = shadow_one_bit_disable(d, PG_mem_access);
+    paging_unlock(d);
+
+    return ret;
+}
+
+/**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */
 
 int shadow_domctl(struct domain *d, 
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index c6c9d10..aee061c 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -38,6 +38,8 @@
 #include <asm/hvm/cacheattr.h>
 #include <asm/mtrr.h>
 #include <asm/guest_pt.h>
+#include <asm/mem_event.h>
+#include <asm/mem_access.h>
 #include <public/sched.h>
 #include "private.h"
 #include "types.h"
@@ -625,6 +627,20 @@ _sh_propagate(struct vcpu *v,
             }
     }
 
+    /* Propagate access permissions */
+    if ( unlikely((level == 1) &&
+                  mem_event_check_ring(&d->mem_event->access) &&
+                  !sh_mfn_is_a_page_table(target_mfn)) )
+    {
+        struct p2m_domain *p2m = p2m_get_hostp2m(d);
+        p2m_access_t a;
+        p2m_type_t t;
+        mfn_t mfn;
+        mfn = p2m->get_entry(p2m, mfn_x(target_mfn), &t, &a, 0, NULL);
+        if ( mfn_valid(mfn) )
+            p2m_access_to_flags(&sflags, gflags, a);
+    }
+
     // Set the A&D bits for higher level shadows.
     // Higher level entries do not, strictly speaking, have dirty bits, but
     // since we use shadow linear tables, each of these entries may, at some
@@ -2822,6 +2838,8 @@ static int sh_page_fault(struct vcpu *v,
     int r;
     fetch_type_t ft = 0;
     p2m_type_t p2mt;
+    p2m_access_t p2ma;
+    mem_event_request_t *req_ptr = NULL;
     uint32_t rc;
     int version;
 #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
@@ -3009,7 +3027,80 @@ static int sh_page_fault(struct vcpu *v,
 
     /* What mfn is the guest trying to access? */
     gfn = guest_l1e_get_gfn(gw.l1e);
-    gmfn = get_gfn(d, gfn, &p2mt);
+    if ( likely(!mem_event_check_ring(&d->mem_event->access)) )
+        gmfn = get_gfn(d, gfn, &p2mt);
+    /*
+     * A mem_access listener is present, so we will first check if a violation
+     * has occurred.
+     */
+    else
+    {
+        struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
+
+        gmfn = get_gfn_type_access(p2m, gfn_x(gfn), &p2mt, &p2ma, 0, NULL);
+        if ( mfn_valid(gmfn) && !sh_mfn_is_a_page_table(gmfn)
+             && (regs->error_code & PFEC_page_present)
+             && !(regs->error_code & PFEC_reserved_bit) )
+        {
+            int violation = 0;
+            bool_t access_w = !!(regs->error_code & PFEC_write_access);
+            bool_t access_x = !!(regs->error_code & PFEC_insn_fetch);
+            bool_t access_r = access_x ? 0 : !(access_w);
+
+            /* If the access is against the permissions, then send to 
mem_event */
+            switch (p2ma)
+            {
+            case p2m_access_r:
+                violation = access_w || access_x;
+                break;
+            case p2m_access_rx:
+            case p2m_access_rx2rw:
+                violation = access_w;
+                break;
+            case p2m_access_rw:
+                violation = access_x;
+                break;
+            case p2m_access_rwx:
+            default:
+                break;
+            }
+
+            /*
+             * Do not police writes to guest memory emanating from the Xen
+             * kernel. Trying to do so will cause the same pagefault to occur
+             * over and over again with an event being sent to the access
+             * listener for each fault. If the access listener's vcpu is not
+             * scheduled during this time, the violation is never resolved and
+             * will eventually end with the host crashing.
+             */
+            if ( (violation && access_w) &&
+                 (regs->eip >= XEN_VIRT_START && regs->eip <= XEN_VIRT_END) )
+            {
+                violation = 0;
+                rc = p2m->set_entry(p2m, gfn_x(gfn), gmfn, PAGE_ORDER_4K,
+                                    p2m_ram_rw, p2m_access_rw);
+            }
+
+            if ( violation )
+            {
+                paddr_t gpa = (mfn_x(gmfn) << PAGE_SHIFT) +
+                              (va & ((1 << PAGE_SHIFT) - 1));
+                if ( !p2m_mem_access_check(gpa, 1, va,
+                                           access_r, access_w, access_x,
+                                           &req_ptr) )
+                {
+                    SHADOW_PRINTK("Page access %c%c%c for gmfn=%"PRI_mfn" "
+                                  "p2ma: %d\n",
+                                  (access_r ? 'r' : '-'),
+                                  (access_w ? 'w' : '-'),
+                                  (access_x ? 'x' : '-'),
+                                  mfn_x(gmfn), p2ma);
+                    /* Rights not promoted, vcpu paused, work here is done */
+                    goto out_put_gfn;
+                }
+            }
+        }
+    }
 
     if ( shadow_mode_refcounts(d) && 
          ((!p2m_is_valid(p2mt) && !p2m_is_grant(p2mt)) ||
@@ -3214,7 +3305,18 @@ static int sh_page_fault(struct vcpu *v,
     SHADOW_PRINTK("fixed\n");
     shadow_audit_tables(v);
     paging_unlock(d);
+ out_put_gfn:
     put_gfn(d, gfn_x(gfn));
+
+    /* Send access violation to mem_access listener */
+    if ( unlikely(req_ptr != NULL) )
+    {
+        SHADOW_PRINTK("mem_access SEND violation mfn: 0x%"PRI_mfn"\n",
+                      mfn_x(gmfn));
+        mem_access_send_req(d, req_ptr);
+        xfree(req_ptr);
+    }
+
     return EXCRET_fault_fixed;
 
  emulate:
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 86847e9..c20277e 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -283,6 +283,8 @@ struct p2m_domain {
         struct ept_data ept;
         /* NPT-equivalent structure could be added here. */
     };
+    /* Page access lookup table for PV domains */
+    mfn_t *access_lookup_table;
 };
 
 /* get host p2m table */
@@ -665,6 +667,55 @@ void p2m_flush_nestedp2m(struct domain *d);
 void nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
     l1_pgentry_t *p, l1_pgentry_t new, unsigned int level);
 
+/*
+ * Functions specific to the p2m-ma implementation
+ */
+
+/* Set up p2m function pointers and structures for mem_access implementation */
+int p2m_mem_access_init(struct p2m_domain *p2m);
+
+/* Reset p2m function pointers and free mem_access structures */
+void p2m_mem_access_teardown(struct p2m_domain *p2m);
+
+/* Set the default permission for all pages
+ * Unlike an HVM guest, the mem_access listener cannot set access permissions
+ * for all pages since the gfn to mfn translation is done by the guest. All it
+ * can do is set permission for individual pages. This functions blows away the
+ * shadows in lieu of that so that new faults will set the page permissions to
+ * the default value.
+ */
+int p2m_mem_access_set_default(struct p2m_domain *p2m);
+
+/* Convert access restrictions to page table flags */
+void p2m_access_to_flags(u32 *flags, u32 gflags, p2m_access_t access);
+
+/* Round it up to 8 bits i.e. sizeof(uint8_t) */
+#define P2M_ACCESS_SIZE sizeof(uint8_t)
+
+/* Number of p2m_access_t in a page */
+#define NR_ACCESS (PAGE_SIZE / P2M_ACCESS_SIZE)
+
+/* Number of access table pages for a PV domain */
+#define get_domain_nr_access_table_pages(d) \
+        DIV_ROUND_UP(P2M_ACCESS_SIZE * (d->tot_pages - 1), PAGE_SIZE)
+
+/*
+ * The mem_access lookup table is an array of mfn_ts. Each mfn points to a page
+ * that holds NR_ACCESS p2m_access_t values, each corresponding to a gfn.
+ * access_lookup_table[0] will point to a mfn that has p2m_access_t values for
+ * gfns 0 to NR_ACCESS - 1.
+ * access_lookup_table[1] will point to a mfn that has p2m_access_t values for
+ * gfns NR_ACCESS to (2 * NR_ACCESS) - 1. And so on...
+ */
+
+/* Index in the mem_access lookup table that has page which holds the
+ *  p2m_access_t for the gfn
+ */
+#define MEM_ACCESS_TABLE_IDX(gfn) (gfn / NR_ACCESS);
+
+/* Location in the page that has the p2m_access_t for the gfn */
+#define MEM_ACCESS_PAGE_IDX(gfn) (gfn % NR_ACCESS);
+
 #endif /* _XEN_P2M_H */
 
 /*
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index 9b8f8de..958e3ce 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -53,6 +53,8 @@
 /* Xen does not steal address space from the domain for its own booking;
  * requires VT or similar mechanisms */
 #define PG_external    (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << PG_mode_shift)
+/* Enable shadow mem_access mode for PV domains */
+#define PG_mem_access  (XEN_DOMCTL_SHADOW_ENABLE_MEM_ACCESS << PG_mode_shift)
 
 #define paging_mode_enabled(_d)   ((_d)->arch.paging.mode)
 #define paging_mode_shadow(_d)    ((_d)->arch.paging.mode & PG_SH_enable)
@@ -62,6 +64,7 @@
 #define paging_mode_log_dirty(_d) ((_d)->arch.paging.mode & PG_log_dirty)
 #define paging_mode_translate(_d) ((_d)->arch.paging.mode & PG_translate)
 #define paging_mode_external(_d)  ((_d)->arch.paging.mode & PG_external)
+#define paging_mode_mem_access(_d) ((_d)->arch.paging.mode & PG_mem_access)
 
 /* flags used for paging debug */
 #define PAGING_DEBUG_LOGDIRTY 0
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index f40cab4..ea2a47c 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -43,6 +43,8 @@
                                    paging_mode_translate(_d))
 #define shadow_mode_external(_d)  (paging_mode_shadow(_d) && \
                                    paging_mode_external(_d))
+#define shadow_mode_mem_access(_d) (paging_mode_shadow(_d) && \
+                                    paging_mode_mem_access(_d))
 
 /*****************************************************************************
  * Entry points into the shadow code */
@@ -86,6 +88,12 @@ int shadow_disable_log_dirty(struct domain *d);
 /* shadow code to call when bitmap is being cleaned */
 void shadow_clean_dirty_bitmap(struct domain *d);
 
+/* shadow code to call when mem_access is enabled */
+int shadow_enable_mem_access(struct domain *d);
+
+/* shadow code to call when mem access is disabled */
+int shadow_disable_mem_access(struct domain *d);
+
 /* Update all the things that are derived from the guest's CR0/CR3/CR4.
  * Called to initialize paging structures if the paging mode
  * has changed, and when bringing up a VCPU for the first time. */
@@ -114,6 +122,9 @@ static inline void shadow_remove_all_shadows(struct vcpu 
*v, mfn_t gmfn)
 /* Discard _all_ mappings from the domain's shadows. */
 void shadow_blow_tables_per_domain(struct domain *d);
 
+/* Tear down all of this domain's shadows */
+void shadow_blow_tables(struct domain *d);
+
 #endif /* _XEN_SHADOW_H */
 
 /*
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 1b75ab2..209ffdd 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -225,6 +225,10 @@ struct xen_domctl_getpageframeinfo3 {
   * Requires HVM support.
   */
 #define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL  (1 << 4)
+ /*
+  * Enable shadow mem_access mode for PV domains
+  */
+#define XEN_DOMCTL_SHADOW_ENABLE_MEM_ACCESS (1 << 5)
 
 struct xen_domctl_shadow_op_stats {
     uint32_t fault_count;
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.