[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC XEN PATCH v4 22/41] xen/pmem: support setup PMEM region for guest data usage



Allow the command XEN_SYSCTL_nvdimm_pmem_setup of hypercall
XEN_SYSCTL_nvdimm_op to setup a PMEM region for guest data
usage. After the setup, that PMEM region will be able to be
mapped to guest address space.

Signed-off-by: Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
---
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Cc: George Dunlap <George.Dunlap@xxxxxxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx>
Cc: Tim Deegan <tim@xxxxxxx>
---
 tools/libxc/include/xenctrl.h |  22 ++++++++
 tools/libxc/xc_misc.c         |  17 ++++++
 xen/common/pmem.c             | 118 +++++++++++++++++++++++++++++++++++++++++-
 xen/include/public/sysctl.h   |   3 +-
 4 files changed, 157 insertions(+), 3 deletions(-)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 935885d6a7..5194d3ff5e 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2656,6 +2656,28 @@ int xc_nvdimm_pmem_get_regions(xc_interface *xch, 
uint8_t type,
 int xc_nvdimm_pmem_setup_mgmt(xc_interface *xch,
                               unsigned long smfn, unsigned long emfn);
 
+/*
+ * Setup the specified PMEM pages for guest data usage. If success,
+ * these PMEM page can be mapped to guest and be used as the backend
+ * of vNDIMM devices.
+ *
+ * Parameters:
+ *  xch:        xc interface handle
+ *  smfn, emfn: the start and end of the PMEM region
+ *  mgmt_smfn,
+
+ *  mgmt_emfn:  the start and the end MFN of the PMEM region that is
+ *              used to manage this PMEM region. It must be in one of
+ *              those added by xc_nvdimm_pmem_setup_mgmt() calls, and
+ *              not overlap with @smfn - @emfn.
+ *
+ * Return:
+ *  On success, return 0. Otherwise, return a non-zero error code.
+ */
+int xc_nvdimm_pmem_setup_data(xc_interface *xch,
+                              unsigned long smfn, unsigned long emfn,
+                              unsigned long mgmt_smfn, unsigned long 
mgmt_emfn);
+
 /* Compat shims */
 #include "xenctrl_compat.h"
 
diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
index 77f93ffd9a..940bf61931 100644
--- a/tools/libxc/xc_misc.c
+++ b/tools/libxc/xc_misc.c
@@ -1016,6 +1016,23 @@ int xc_nvdimm_pmem_setup_mgmt(xc_interface *xch,
     return rc;
 }
 
+int xc_nvdimm_pmem_setup_data(xc_interface *xch,
+                              unsigned long smfn, unsigned long emfn,
+                              unsigned long mgmt_smfn, unsigned long mgmt_emfn)
+{
+    DECLARE_SYSCTL;
+    int rc;
+
+    xc_nvdimm_pmem_setup_common(&sysctl, smfn, emfn, mgmt_smfn, mgmt_emfn);
+    sysctl.u.nvdimm.u.pmem_setup.type = PMEM_REGION_TYPE_DATA;
+
+    rc = do_sysctl(xch, &sysctl);
+    if ( rc && sysctl.u.nvdimm.err )
+        rc = -sysctl.u.nvdimm.err;
+
+    return rc;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/common/pmem.c b/xen/common/pmem.c
index e286d033f2..ed4eba7f64 100644
--- a/xen/common/pmem.c
+++ b/xen/common/pmem.c
@@ -34,16 +34,26 @@ static unsigned int nr_raw_regions;
 /*
  * All PMEM regions reserved for management purpose are linked to this
  * list. All of them must be covered by one or multiple PMEM regions
- * in list pmem_raw_regions.
+ * in list pmem_raw_regions, and not appear in list pmem_data_regions.
  */
 static LIST_HEAD(pmem_mgmt_regions);
 static DEFINE_SPINLOCK(pmem_mgmt_lock);
 static unsigned int nr_mgmt_regions;
 
+/*
+ * All PMEM regions that can be mapped to guest are linked to this
+ * list. All of them must be covered by one or multiple PMEM regions
+ * in list pmem_raw_regions, and not appear in list pmem_mgmt_regions.
+ */
+static LIST_HEAD(pmem_data_regions);
+static DEFINE_SPINLOCK(pmem_data_lock);
+static unsigned int nr_data_regions;
+
 struct pmem {
     struct list_head link; /* link to one of PMEM region list */
     unsigned long smfn;    /* start MFN of the PMEM region */
     unsigned long emfn;    /* end MFN of the PMEM region */
+    spinlock_t lock;
 
     union {
         struct {
@@ -53,6 +63,11 @@ struct pmem {
         struct {
             unsigned long used; /* # of used pages in MGMT PMEM region */
         } mgmt;
+
+        struct {
+            unsigned long mgmt_smfn; /* start MFN of management region */
+            unsigned long mgmt_emfn; /* end MFN of management region */
+        } data;
     } u;
 };
 
@@ -105,6 +120,7 @@ static int pmem_list_add(struct list_head *list,
 
     new_pmem->smfn = smfn;
     new_pmem->emfn = emfn;
+    spin_lock_init(&new_pmem->lock);
     list_add(&new_pmem->link, cur);
     if ( entry )
         *entry = new_pmem;
@@ -253,9 +269,16 @@ static int 
pmem_get_regions(xen_sysctl_nvdimm_pmem_regions_t *regions)
 
 static bool check_mgmt_size(unsigned long mgmt_mfns, unsigned long total_mfns)
 {
-    return mgmt_mfns >=
+    unsigned long required =
         ((sizeof(struct page_info) * total_mfns) >> PAGE_SHIFT) +
         ((sizeof(*machine_to_phys_mapping) * total_mfns) >> PAGE_SHIFT);
+
+    if ( required > mgmt_mfns )
+        printk(XENLOG_DEBUG "PMEM: insufficient management pages, "
+               "0x%lx pages required, 0x%lx pages available\n",
+               required, mgmt_mfns);
+
+    return mgmt_mfns >= required;
 }
 
 static bool check_address_and_pxm(unsigned long smfn, unsigned long emfn,
@@ -333,6 +356,93 @@ static int pmem_setup_mgmt(unsigned long smfn, unsigned 
long emfn)
     return rc;
 }
 
+static struct pmem *find_mgmt_region(unsigned long smfn, unsigned long emfn)
+{
+    struct list_head *cur;
+
+    ASSERT(spin_is_locked(&pmem_mgmt_lock));
+
+    list_for_each(cur, &pmem_mgmt_regions)
+    {
+        struct pmem *mgmt = list_entry(cur, struct pmem, link);
+
+        if ( smfn >= mgmt->smfn && emfn <= mgmt->emfn )
+            return mgmt;
+    }
+
+    return NULL;
+}
+
+static int pmem_setup_data(unsigned long smfn, unsigned long emfn,
+                           unsigned long mgmt_smfn, unsigned long mgmt_emfn)
+{
+    struct pmem *data, *mgmt = NULL;
+    unsigned long used_mgmt_mfns;
+    unsigned int pxm;
+    int rc;
+
+    if ( smfn == mfn_x(INVALID_MFN) || emfn == mfn_x(INVALID_MFN) ||
+         smfn >= emfn )
+        return -EINVAL;
+
+    /*
+     * Require the PMEM region in one proximity domain, in order to
+     * avoid the error recovery from multiple calls to pmem_arch_setup()
+     * which is not revertible.
+     */
+    if ( !check_address_and_pxm(smfn, emfn, &pxm) )
+        return -EINVAL;
+
+    if ( mgmt_smfn == mfn_x(INVALID_MFN) || mgmt_emfn == mfn_x(INVALID_MFN) ||
+         mgmt_smfn >= mgmt_emfn )
+        return -EINVAL;
+
+    spin_lock(&pmem_mgmt_lock);
+    mgmt = find_mgmt_region(mgmt_smfn, mgmt_emfn);
+    if ( !mgmt )
+    {
+        spin_unlock(&pmem_mgmt_lock);
+        return -ENXIO;
+    }
+    spin_unlock(&pmem_mgmt_lock);
+
+    spin_lock(&mgmt->lock);
+
+    mgmt_smfn = mgmt->smfn + mgmt->u.mgmt.used;
+    if ( !check_mgmt_size(mgmt_emfn - mgmt_smfn, emfn - smfn) )
+    {
+        spin_unlock(&mgmt->lock);
+        return -ENOSPC;
+    }
+
+    spin_lock(&pmem_data_lock);
+
+    rc = pmem_list_add(&pmem_data_regions, smfn, emfn, &data);
+    if ( rc )
+        goto out;
+    data->u.data.mgmt_smfn = data->u.data.mgmt_emfn = mfn_x(INVALID_MFN);
+
+    rc = pmem_arch_setup(smfn, emfn, pxm,
+                         mgmt_smfn, mgmt_emfn, &used_mgmt_mfns);
+    if ( rc )
+    {
+        pmem_list_del(data);
+        goto out;
+    }
+
+    mgmt->u.mgmt.used = mgmt_smfn - mgmt->smfn + used_mgmt_mfns;
+    data->u.data.mgmt_smfn = mgmt_smfn;
+    data->u.data.mgmt_emfn = mgmt->smfn + mgmt->u.mgmt.used;
+
+    nr_data_regions++;
+
+ out:
+    spin_unlock(&pmem_data_lock);
+    spin_unlock(&mgmt->lock);
+
+    return rc;
+}
+
 static int pmem_setup(unsigned long smfn, unsigned long emfn,
                       unsigned long mgmt_smfn, unsigned long mgmt_emfn,
                       unsigned int type)
@@ -352,6 +462,10 @@ static int pmem_setup(unsigned long smfn, unsigned long 
emfn,
 
         break;
 
+    case PMEM_REGION_TYPE_DATA:
+        rc = pmem_setup_data(smfn, emfn, mgmt_smfn, mgmt_emfn);
+        break;
+
     default:
         rc = -EINVAL;
     }
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 703dd860e7..d1fbb30247 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -1052,6 +1052,7 @@ struct xen_sysctl_set_parameter {
 /* Types of PMEM regions */
 #define PMEM_REGION_TYPE_RAW        0 /* PMEM regions detected by Xen */
 #define PMEM_REGION_TYPE_MGMT       1 /* PMEM regions for management usage */
+#define PMEM_REGION_TYPE_DATA       2 /* PMEM regions for guest data */
 
 /* PMEM_REGION_TYPE_RAW */
 struct xen_sysctl_nvdimm_pmem_raw_region {
@@ -1107,7 +1108,7 @@ struct xen_sysctl_nvdimm_pmem_setup {
                         /* above PMEM region. If the above PMEM region is */
                         /* a management region, mgmt_{s,e}mfn is required */
                         /* to be identical to {s,e}mfn. */
-    uint8_t  type;      /* Only PMEM_REGION_TYPE_MGMT is supported now */
+    uint8_t  type;      /* Must be one of PMEM_REGION_TYPE_{MGMT, DATA} */
 };
 typedef struct xen_sysctl_nvdimm_pmem_setup xen_sysctl_nvdimm_pmem_setup_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_nvdimm_pmem_setup_t);
-- 
2.15.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.