[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC XEN PATCH 04/16] xen/x86: add XENMEM_populate_pmemmap to map host pmem pages to guest



XENMEM_populate_pmemmap is used by toolstack to map given host pmem pages
to given guest pages. Only pages in the data area of a pmem region are
allowed to be mapped to guest.

Signed-off-by: Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
---
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 tools/libxc/include/xenctrl.h |   8 +++
 tools/libxc/xc_domain.c       |  14 +++++
 xen/arch/x86/pmem.c           | 123 ++++++++++++++++++++++++++++++++++++++++++
 xen/common/domain.c           |   3 ++
 xen/common/memory.c           |  31 +++++++++++
 xen/include/public/memory.h   |  14 ++++-
 xen/include/xen/pmem.h        |  10 ++++
 xen/include/xen/sched.h       |   3 ++
 8 files changed, 205 insertions(+), 1 deletion(-)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 2c83544..46c71fc 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2710,6 +2710,14 @@ int xc_livepatch_revert(xc_interface *xch, char *name, 
uint32_t timeout);
 int xc_livepatch_unload(xc_interface *xch, char *name, uint32_t timeout);
 int xc_livepatch_replace(xc_interface *xch, char *name, uint32_t timeout);
 
+/**
+ * Map host pmem pages at PFNs @mfn ~ (@mfn + @nr_mfns - 1) to
+ * guest physical pages at guest PFNs @gpfn ~ (@gpfn + @nr_mfns - 1)
+ */
+int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid,
+                               xen_pfn_t mfn, xen_pfn_t gpfn,
+                               unsigned int nr_mfns);
+
 /* Compat shims */
 #include "xenctrl_compat.h"
 
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 296b852..81a90a1 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -2520,6 +2520,20 @@ int xc_domain_soft_reset(xc_interface *xch,
     domctl.domain = (domid_t)domid;
     return do_domctl(xch, &domctl);
 }
+
+int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid,
+                               xen_pfn_t mfn, xen_pfn_t gpfn,
+                               unsigned int nr_mfns)
+{
+    struct xen_pmemmap pmemmap = {
+        .domid   = domid,
+        .mfn     = mfn,
+        .gpfn    = gpfn,
+        .nr_mfns = nr_mfns,
+    };
+    return do_memory_op(xch, XENMEM_populate_pmemmap, &pmemmap, 
sizeof(pmemmap));
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/pmem.c b/xen/arch/x86/pmem.c
index 70358ed..e4dc685 100644
--- a/xen/arch/x86/pmem.c
+++ b/xen/arch/x86/pmem.c
@@ -24,6 +24,9 @@
 #include <xen/spinlock.h>
 #include <xen/pmem.h>
 #include <xen/iocap.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <xen/paging.h>
 #include <asm-x86/mm.h>
 
 /*
@@ -63,6 +66,48 @@ static int check_reserved_size(unsigned long rsv_mfns, 
unsigned long total_mfns)
         ((sizeof(*machine_to_phys_mapping) * total_mfns) >> PAGE_SHIFT);
 }
 
+static int is_data_mfn(unsigned long mfn)
+{
+    struct list_head *cur;
+    int data = 0;
+
+    ASSERT(spin_is_locked(&pmem_list_lock));
+
+    list_for_each(cur, &pmem_list)
+    {
+        struct pmem *pmem = list_entry(cur, struct pmem, link);
+
+        if ( pmem->data_spfn <= mfn && mfn < pmem->data_epfn )
+        {
+            data = 1;
+            break;
+        }
+    }
+
+    return data;
+}
+
+static int pmem_page_valid(struct page_info *page, struct domain *d)
+{
+    /* only data area can be mapped to guest */
+    if ( !is_data_mfn(page_to_mfn(page)) )
+    {
+        dprintk(XENLOG_DEBUG, "pmem: mfn 0x%lx is not a pmem data page\n",
+                page_to_mfn(page));
+        return 0;
+    }
+
+    /* inuse/offlined/offlining pmem page cannot be mapped to guest */
+    if ( !page_state_is(page, free) )
+    {
+        dprintk(XENLOG_DEBUG, "pmem: invalid page state of mfn 0x%lx: 0x%lx\n",
+                page_to_mfn(page), page->count_info & PGC_state);
+        return 0;
+    }
+
+    return 1;
+}
+
 static int pmem_add_check(unsigned long spfn, unsigned long epfn,
                           unsigned long rsv_spfn, unsigned long rsv_epfn,
                           unsigned long data_spfn, unsigned long data_epfn)
@@ -159,3 +204,81 @@ int pmem_add(unsigned long spfn, unsigned long epfn,
  out:
     return ret;
 }
+
+static int pmem_assign_pages(struct domain *d,
+                             struct page_info *pg, unsigned int order)
+{
+    int rc = 0;
+    unsigned long i;
+
+    spin_lock(&d->pmem_lock);
+
+    if ( unlikely(d->is_dying) )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    for ( i = 0; i < (1 << order); i++ )
+    {
+        ASSERT(page_get_owner(&pg[i]) == NULL);
+        ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
+        page_set_owner(&pg[i], d);
+        smp_wmb();
+        pg[i].count_info = PGC_allocated | 1;
+        page_list_add_tail(&pg[i], &d->pmem_page_list);
+    }
+
+ out:
+    spin_unlock(&d->pmem_lock);
+    return rc;
+}
+
+int pmem_populate(struct xen_pmemmap_args *args)
+{
+    struct domain *d = args->domain;
+    unsigned long i, mfn, gpfn;
+    struct page_info *page;
+    int rc = 0;
+
+    if ( !has_hvm_container_domain(d) || !paging_mode_translate(d) )
+        return -EINVAL;
+
+    for ( i = args->nr_done, mfn = args->mfn + i, gpfn = args->gpfn + i;
+          i < args->nr_mfns;
+          i++, mfn++, gpfn++ )
+    {
+        if ( i != args->nr_done && hypercall_preempt_check() )
+        {
+            args->preempted = 1;
+            goto out;
+        }
+
+        page = mfn_to_page(mfn);
+
+        spin_lock(&pmem_list_lock);
+        if ( !pmem_page_valid(page, d) )
+        {
+            dprintk(XENLOG_DEBUG, "pmem: MFN 0x%lx not a valid pmem page\n", 
mfn);
+            spin_unlock(&pmem_list_lock);
+            rc = -EINVAL;
+            goto out;
+        }
+        page->count_info = PGC_state_inuse;
+        spin_unlock(&pmem_list_lock);
+
+        page->u.inuse.type_info = 0;
+
+        guest_physmap_add_page(d, _gfn(gpfn), _mfn(mfn), 0);
+        if ( pmem_assign_pages(d, page, 0) )
+        {
+            guest_physmap_remove_page(d, _gfn(gpfn), _mfn(mfn), 0);
+            rc = -EFAULT;
+            goto out;
+        }
+    }
+
+ out:
+    args->nr_done = i;
+    return rc;
+}
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 3abaca9..8192548 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -288,6 +288,9 @@ struct domain *domain_create(domid_t domid, unsigned int 
domcr_flags,
     INIT_PAGE_LIST_HEAD(&d->page_list);
     INIT_PAGE_LIST_HEAD(&d->xenpage_list);
 
+    spin_lock_init_prof(d, pmem_lock);
+    INIT_PAGE_LIST_HEAD(&d->pmem_page_list);
+
     spin_lock_init(&d->node_affinity_lock);
     d->node_affinity = NODE_MASK_ALL;
     d->auto_node_affinity = 1;
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 21797ca..09cb1c9 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -24,6 +24,7 @@
 #include <xen/numa.h>
 #include <xen/mem_access.h>
 #include <xen/trace.h>
+#include <xen/pmem.h>
 #include <asm/current.h>
 #include <asm/hardirq.h>
 #include <asm/p2m.h>
@@ -1329,6 +1330,36 @@ long do_memory_op(unsigned long cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
     }
 #endif
 
+    case XENMEM_populate_pmemmap:
+    {
+        struct xen_pmemmap pmemmap;
+        struct xen_pmemmap_args args;
+
+        if ( copy_from_guest(&pmemmap, arg, 1) )
+            return -EFAULT;
+
+        d = rcu_lock_domain_by_any_id(pmemmap.domid);
+        if ( !d )
+            return -EINVAL;
+
+        args.domain = d;
+        args.mfn = pmemmap.mfn;
+        args.gpfn = pmemmap.gpfn;
+        args.nr_mfns = pmemmap.nr_mfns;
+        args.nr_done = start_extent;
+        args.preempted = 0;
+
+        rc = pmem_populate(&args);
+        rcu_unlock_domain(d);
+
+        if ( !rc && args.preempted )
+            return hypercall_create_continuation(
+                __HYPERVISOR_memory_op, "lh",
+                op | (args.nr_done << MEMOP_EXTENT_SHIFT), arg);
+
+        break;
+    }
+
     default:
         rc = arch_memory_op(cmd, arg);
         break;
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 5bf840f..8c048fc 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -646,7 +646,19 @@ struct xen_vnuma_topology_info {
 typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t;
 DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t);
 
-/* Next available subop number is 28 */
+#define XENMEM_populate_pmemmap 28
+
+struct xen_pmemmap {
+    /* IN */
+    domid_t domid;
+    xen_pfn_t mfn;
+    xen_pfn_t gpfn;
+    unsigned int nr_mfns;
+};
+typedef struct xen_pmemmap xen_pmemmap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pmemmap_t);
+
+/* Next available subop number is 29 */
 
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
 
diff --git a/xen/include/xen/pmem.h b/xen/include/xen/pmem.h
index a670ab8..60adf56 100644
--- a/xen/include/xen/pmem.h
+++ b/xen/include/xen/pmem.h
@@ -24,8 +24,18 @@
 
 #include <xen/types.h>
 
+struct xen_pmemmap_args {
+    struct domain *domain;
+    xen_pfn_t mfn;
+    xen_pfn_t gpfn;
+    unsigned int nr_mfns;
+    unsigned int nr_done;
+    int preempted;
+};
+
 int pmem_add(unsigned long spfn, unsigned long epfn,
              unsigned long rsv_spfn, unsigned long rsv_epfn,
              unsigned long data_spfn, unsigned long data_epfn);
+int pmem_populate(struct xen_pmemmap_args *args);
 
 #endif /* __XEN_PMEM_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 1fbda87..3c66225 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -329,6 +329,9 @@ struct domain
     atomic_t         shr_pages;       /* number of shared pages             */
     atomic_t         paged_pages;     /* number of paged-out pages          */
 
+    spinlock_t       pmem_lock;       /* protect all following pmem_ fields */
+    struct page_list_head pmem_page_list; /* linked list of pmem pages      */
+
     /* Scheduling. */
     void            *sched_priv;    /* scheduler-specific data */
     struct cpupool  *cpupool;
-- 
2.10.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.