[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH 5/7] x86/m2b: Add a tracking structure for mfn to bfn mappings per page



Add a pointer to the page struct which refers to the head of
m2b tracking structure for that page.

Atomically add a PGC bit to the page struct when setting the pointer to
the m2b tracking structure.

Adding elements to the per page m2b tracking structure is done via a
RCU protected linked list.

Callers of add_m2b and del_m2b must hold the page lock for the page.
This prevents the m2b code racing with itself and allows for fine
gain locking. References to the page are also expected to be taken and
dropped by the callers to the m2b code.

m2b references are forcibly removed as part of domain destroy to ensure 
malicious
guests do not cause memory

To aid emulators tracking state, with asynchronous domain destruction, an
ioreq is issued to all affected emulators for each m2b mapping being removed.

Signed-off-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>
--
Cc: keir@xxxxxxx
Cc: jbeulich@xxxxxxxx
Cc: andrew.cooper3@xxxxxxxxxx
Cc: george.dunlap@xxxxxxxxxxxxx
Cc: xen-devel@xxxxxxxxxxxxx
---
 xen/arch/x86/domain.c          |  12 ++-
 xen/arch/x86/mm/Makefile       |   1 +
 xen/arch/x86/mm/m2b.c          | 209 +++++++++++++++++++++++++++++++++++++++++
 xen/common/memory.c            |  11 +++
 xen/include/asm-x86/domain.h   |   1 +
 xen/include/asm-x86/m2b.h      |  59 ++++++++++++
 xen/include/asm-x86/mm.h       |  12 ++-
 xen/include/public/hvm/ioreq.h |   1 +
 xen/include/xen/sched.h        |   4 +
 9 files changed, 308 insertions(+), 2 deletions(-)
 create mode 100644 xen/arch/x86/mm/m2b.c
 create mode 100644 xen/include/asm-x86/m2b.h

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 9d43f7b..715502e 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -62,6 +62,7 @@
 #include <xen/iommu.h>
 #include <compat/vcpu.h>
 #include <asm/psr.h>
+#include <asm/m2b.h>
 
 DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
 DEFINE_PER_CPU(unsigned long, cr4);
@@ -2511,7 +2512,7 @@ int domain_relinquish_resources(struct domain *d)
                 return ret;
         }
 
-        d->arch.relmem = RELMEM_xen;
+        d->arch.relmem = RELMEM_m2b;
 
         spin_lock(&d->page_alloc_lock);
         page_list_splice(&d->arch.relmem_list, &d->page_list);
@@ -2519,6 +2520,15 @@ int domain_relinquish_resources(struct domain *d)
         spin_unlock(&d->page_alloc_lock);
 
         /* Fallthrough. Relinquish every page of memory. */
+
+    case RELMEM_m2b:
+
+        ret = m2b_domain_destroy(d, d->m2b_destroy_mfn);
+        if ( ret )
+            return ret;
+        d->arch.relmem = RELMEM_xen;
+        /* fallthrough */
+
     case RELMEM_xen:
         ret = relinquish_memory(d, &d->xenpage_list, ~0UL);
         if ( ret )
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 9804c3a..84ad8c0 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -9,6 +9,7 @@ obj-y += guest_walk_3.o
 obj-y += guest_walk_4.o
 obj-y += mem_paging.o
 obj-y += mem_sharing.o
+obj-y += m2b.o
 
 guest_walk_%.o: guest_walk.c Makefile
        $(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@
diff --git a/xen/arch/x86/mm/m2b.c b/xen/arch/x86/mm/m2b.c
new file mode 100644
index 0000000..078944a
--- /dev/null
+++ b/xen/arch/x86/mm/m2b.c
@@ -0,0 +1,209 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <asm/m2b.h>
+#include <asm/event.h>
+
+struct m2b_head
+{
+    struct page_info *pg;
+    struct list_head head;
+    unsigned int count;
+    struct rcu_head rcu_head;
+};
+
+DEFINE_RCU_READ_LOCK(m2b_rcu);
+
+struct m2b_entry *lookup_m2b_entry(struct page_info *page, struct domain *d,
+                                   ioservid_t ioserver, unsigned long bfn)
+{
+    struct m2b_entry *m2b_e = NULL;
+    struct list_head *entry;
+    domid_t domain = d->domain_id;
+
+    if ( !test_bit(_PGC_foreign_map, &page->count_info) )
+        return NULL;
+
+    rcu_read_lock(&m2b_rcu);
+    list_for_each_rcu(entry, &page->pv_iommu->head)
+    {
+        m2b_e = list_entry(entry, struct m2b_entry, list);
+        if ( m2b_e->domain == domain && m2b_e->ioserver == ioserver &&
+                m2b_e->bfn == bfn )
+                    goto done;
+        else if ( ioserver == IOSERVER_ANY && m2b_e->domain == domain &&
+                  m2b_e->bfn == bfn )
+                    goto done;
+        else if ( bfn == BFN_ANY && m2b_e->domain == domain &&
+                  m2b_e->ioserver == ioserver )
+                    goto done;
+        else if ( bfn == BFN_ANY && ioserver == IOSERVER_ANY &&
+                  m2b_e->domain == domain )
+                    goto done;
+    }
+done:
+    rcu_read_unlock(&m2b_rcu);
+
+    /* Nothing was found */
+    return m2b_e;
+}
+
+void notify_m2b_entries(struct page_info *page)
+{
+    return;
+}
+
+/* Must be called with page_lock held */
+int add_m2b_entry(struct page_info *page, struct domain *d,
+                  ioservid_t ioserver, uint64_t bfn)
+{
+    struct m2b_entry *m2b_e;
+    int head_allocated = 0;
+    domid_t domain = d->domain_id;
+
+    if ( !test_bit(_PGC_foreign_map, &page->count_info) )
+    {
+        page->pv_iommu = xmalloc(struct m2b_head);
+        if ( !page->pv_iommu )
+            return -ENOMEM;
+        head_allocated = 1;
+        INIT_LIST_HEAD(&page->pv_iommu->head);
+        INIT_RCU_HEAD(&page->pv_iommu->rcu_head);
+        set_bit(_PGC_foreign_map, &page->count_info);
+        page->pv_iommu->count = 0;
+    }
+
+    m2b_e = xmalloc(struct m2b_entry);
+    if ( !m2b_e )
+    {
+        if ( head_allocated )
+            xfree(page->pv_iommu);
+
+        return -ENOMEM;
+    }
+
+    m2b_e->domain = domain;
+    m2b_e->ioserver = ioserver;
+    m2b_e->bfn = bfn;
+
+    INIT_LIST_HEAD(&m2b_e->list);
+    INIT_RCU_HEAD(&m2b_e->rcu);
+    list_add_rcu(&m2b_e->list, &page->pv_iommu->head);
+
+    atomic_inc(&d->m2b_count);
+    page->pv_iommu->count++;
+    return 0;
+}
+
+void free_m2b_entry(struct rcu_head *rcu)
+{
+    xfree(container_of(rcu, struct m2b_entry, rcu));
+}
+
+/* Must be called with page_lock held */
+int del_m2b_entry(struct page_info *page, struct domain *d, ioservid_t 
ioserver,
+                  unsigned long bfn)
+{
+    struct m2b_entry *m2b_e;
+
+    m2b_e = lookup_m2b_entry(page, d, ioserver, bfn);
+    if ( !m2b_e )
+        return -ENOENT;
+
+    list_del_rcu(&m2b_e->list);
+    call_rcu(&m2b_e->rcu, free_m2b_entry);
+    page->pv_iommu->count--;
+    atomic_dec(&d->m2b_count);
+
+    if ( page->pv_iommu->count == 0 )
+    {
+        clear_bit(_PGC_foreign_map, &page->count_info);
+        xfree(page->pv_iommu);
+    }
+    return 0;
+}
+
+
+/* Remove all M2B entries created by the domain being destroyed */
+int m2b_domain_destroy(struct domain *d, unsigned long mfn)
+{
+    struct m2b_entry *m2b_e, *m2b_e_hwdom;
+    struct page_info *page;
+    int locked;
+
+    if ( ! atomic_read(&d->m2b_count) )
+        return 0;
+
+
+    for ( ; mfn < max_page; mfn++ )
+    {
+        /* Check for preemption every 4 MB */
+        if ( mfn % 0x1000 == 0 && mfn != d->m2b_destroy_mfn)
+        {
+            if ( hypercall_preempt_check() )
+            {
+                d->m2b_destroy_mfn = mfn;
+                return -ERESTART;
+            }
+        }
+        if (!mfn_valid(mfn))
+            continue;
+
+        page = mfn_to_page(mfn);
+        if ( !page || (page_get_owner(page) != d) )
+            continue;
+
+        m2b_e = lookup_m2b_entry(page, d,
+                        IOSERVER_ANY, BFN_ANY);
+        m2b_e_hwdom = lookup_m2b_entry(page, hardware_domain,
+                        IOSERVER_ANY, BFN_ANY);
+        if ( !m2b_e && !m2b_e_hwdom )
+            continue;
+
+        locked = page_lock(page);
+
+        /* Remove all M2B entries for this domain */
+        while ( (m2b_e = lookup_m2b_entry(page, d,
+                                          IOSERVER_ANY, BFN_ANY)) )
+        {
+            del_m2b_entry(page, d,
+                          m2b_e->ioserver,
+                          m2b_e->bfn);
+        }
+        /* Remove all M2B entries for hwdom */
+        while ( (m2b_e = lookup_m2b_entry(page, hardware_domain,
+                                          IOSERVER_ANY, BFN_ANY)) )
+        {
+            del_m2b_entry(page, hardware_domain,
+                          m2b_e->ioserver,
+                          m2b_e->bfn);
+            atomic_dec(&d->m2b_count);
+        }
+
+        if ( locked )
+            page_unlock(page);
+        /* Remove this domains reference */
+        put_page(page);
+        if ( ! atomic_read(&d->m2b_count) )
+            break;
+    }
+
+    return 0;
+}
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 9ff1145..df470ba 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -29,6 +29,7 @@
 #include <asm/p2m.h>
 #include <public/memory.h>
 #include <xsm/xsm.h>
+#include <asm/m2b.h>
 
 struct memop_args {
     /* INPUT */
@@ -316,6 +317,16 @@ int guest_remove_page(struct domain *d, unsigned long gmfn)
          test_and_clear_bit(_PGC_allocated, &page->count_info) )
         put_page(page);
 
+#ifdef CONFIG_X86
+    /* Check for M2B mapping */
+    if ( is_hvm_domain(d) &&
+            test_bit(_PGC_foreign_map, &page->count_info) )
+    {
+        /* Notifiy ioserver with M2B mappings */
+        notify_m2b_entries(page);
+    }
+#endif
+
     guest_physmap_remove_page(d, gmfn, mfn, 0);
 
     put_page(page);
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 4072e27..6dc6ee5 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -302,6 +302,7 @@ struct arch_domain
     enum {
         RELMEM_not_started,
         RELMEM_shared,
+        RELMEM_m2b,
         RELMEM_xen,
         RELMEM_l4,
         RELMEM_l3,
diff --git a/xen/include/asm-x86/m2b.h b/xen/include/asm-x86/m2b.h
new file mode 100644
index 0000000..a21502c
--- /dev/null
+++ b/xen/include/asm-x86/m2b.h
@@ -0,0 +1,59 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_M2B_H__
+#define __XEN_M2B_H__
+
+#include <xen/sched.h>
+
+struct m2b_entry
+{
+    struct list_head list;
+    domid_t domain;
+    ioservid_t ioserver;
+    uint64_t bfn;
+    struct rcu_head rcu;
+};
+
+int m2b_domain_destroy(struct domain *d, unsigned long mfn);
+
+void notify_m2b_entries(struct page_info *page);
+
+#define BFN_ANY         ~0UL
+#define IOSERVER_ANY    ~0
+
+struct m2b_entry *lookup_m2b_entry(struct page_info *page, struct domain *d,
+                                   ioservid_t ioserver, unsigned long bfn);
+int add_m2b_entry(struct page_info *page, struct domain *d,
+                  ioservid_t ioserver, uint64_t bfn);
+int del_m2b_entry(struct page_info *page, struct domain *d, ioservid_t 
ioserver,
+                  unsigned long bfn);
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index a097382..620ccd5 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -65,6 +65,12 @@ struct page_info
          */
         struct page_sharing_info *sharing;
     };
+    /* For foreign mapped pages, we use a doubly-linked list
+     * of all the {pfn,domain, ioserver} tuples that map this page.
+     * This list is allocated and freed from a page is foreign
+     * mapped/unmapped.
+     */
+    struct pv_iommu_info *pv_iommu;
 
     /* Reference count and various PGC_xxx flags and fields. */
     unsigned long count_info;
@@ -230,8 +236,12 @@ struct page_info
 #define PGC_state_free    PG_mask(3, 9)
 #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
 
+/* Page has foreign mappings? */
+#define _PGC_foreign_map  PG_shift(10)
+#define PGC_foreign_map   PG_mask(1, 10)
+
  /* Count of references to this frame. */
-#define PGC_count_width   PG_shift(9)
+#define PGC_count_width   PG_shift(10)
 #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
 
 struct spage_info
diff --git a/xen/include/public/hvm/ioreq.h b/xen/include/public/hvm/ioreq.h
index 2e5809b..c8ecc6e 100644
--- a/xen/include/public/hvm/ioreq.h
+++ b/xen/include/public/hvm/ioreq.h
@@ -37,6 +37,7 @@
 #define IOREQ_TYPE_PCI_CONFIG   2
 #define IOREQ_TYPE_TIMEOFFSET   7
 #define IOREQ_TYPE_INVALIDATE   8 /* mapcache */
+#define IOREQ_TYPE_INVAL_BFN    9 /* bfn */
 
 /*
  * VMExit dispatcher should cooperate with instruction decoder to
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index b47a3fe..3085be2 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -464,6 +464,10 @@ struct domain
     /* vNUMA topology accesses are protected by rwlock. */
     rwlock_t vnuma_rwlock;
     struct vnuma_info *vnuma;
+
+    /* Progress of cleaning m2b for domain destroy */
+    unsigned long m2b_destroy_mfn;
+    atomic_t m2b_count;
 };
 
 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
-- 
1.7.12.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.