[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC XEN PATCH 03/16] xen/x86: add a hypercall XENPF_pmem_add to report host pmem regions



Xen hypervisor does not include a pmem driver. Instead, it relies on the
pmem driver in Dom0 to report the PFN ranges of the entire pmem region,
its reserved area and data area via XENPF_pmem_add. The reserved area is
used by Xen hypervisor to place the frame table and M2P table, and is
disallowed to be accessed from Dom0 once it's reported.

Signed-off-by: Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
---
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Cc: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
---
 xen/arch/x86/Makefile             |   1 +
 xen/arch/x86/platform_hypercall.c |   7 ++
 xen/arch/x86/pmem.c               | 161 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/x86_64/mm.c          |  54 +++++++++++++
 xen/include/asm-x86/mm.h          |   4 +
 xen/include/public/platform.h     |  14 ++++
 xen/include/xen/pmem.h            |  31 ++++++++
 xen/xsm/flask/hooks.c             |   1 +
 8 files changed, 273 insertions(+)
 create mode 100644 xen/arch/x86/pmem.c
 create mode 100644 xen/include/xen/pmem.h

diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 931917d..9cf2da1 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_TBOOT) += tboot.o
 obj-y += hpet.o
 obj-y += vm_event.o
 obj-y += xstate.o
+obj-y += pmem.o
 
 x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
 
diff --git a/xen/arch/x86/platform_hypercall.c 
b/xen/arch/x86/platform_hypercall.c
index 0879e19..c47eea4 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -24,6 +24,7 @@
 #include <xen/pmstat.h>
 #include <xen/irq.h>
 #include <xen/symbols.h>
+#include <xen/pmem.h>
 #include <asm/current.h>
 #include <public/platform.h>
 #include <acpi/cpufreq/processor_perf.h>
@@ -822,6 +823,12 @@ ret_t 
do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t) u_xenpf_op)
     }
     break;
 
+    case XENPF_pmem_add:
+        ret = pmem_add(op->u.pmem_add.spfn, op->u.pmem_add.epfn,
+                       op->u.pmem_add.rsv_spfn, op->u.pmem_add.rsv_epfn,
+                       op->u.pmem_add.data_spfn, op->u.pmem_add.data_epfn);
+        break;
+
     default:
         ret = -ENOSYS;
         break;
diff --git a/xen/arch/x86/pmem.c b/xen/arch/x86/pmem.c
new file mode 100644
index 0000000..70358ed
--- /dev/null
+++ b/xen/arch/x86/pmem.c
@@ -0,0 +1,161 @@
+/******************************************************************************
+ * arch/x86/pmem.c
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
+ */
+
+#include <xen/guest_access.h>
+#include <xen/list.h>
+#include <xen/spinlock.h>
+#include <xen/pmem.h>
+#include <xen/iocap.h>
+#include <asm-x86/mm.h>
+
+/*
+ * All pmem regions reported from Dom0 are linked in pmem_list, which
+ * is proected by pmem_list_lock. Its entries are of type struct pmem
+ * and sorted incrementally by field spa.
+ */
+static DEFINE_SPINLOCK(pmem_list_lock);
+static LIST_HEAD(pmem_list);
+
+struct pmem {
+    struct list_head link;   /* link to pmem_list */
+    unsigned long spfn;      /* start PFN of the whole pmem region */
+    unsigned long epfn;      /* end PFN of the whole pmem region */
+    unsigned long rsv_spfn;  /* start PFN of the reserved area */
+    unsigned long rsv_epfn;  /* end PFN of the reserved area */
+    unsigned long data_spfn; /* start PFN of the data area */
+    unsigned long data_epfn; /* end PFN of the data area */
+};
+
+static int is_included(unsigned long s1, unsigned long e1,
+                       unsigned long s2, unsigned long e2)
+{
+    return s1 <= s2 && s2 < e2 && e2 <= e1;
+}
+
+static int is_overlaped(unsigned long s1, unsigned long e1,
+                        unsigned long s2, unsigned long e2)
+{
+    return (s1 <= s2 && s2 < e1) || (s2 < s1 && s1 < e2);
+}
+
+static int check_reserved_size(unsigned long rsv_mfns, unsigned long 
total_mfns)
+{
+    return rsv_mfns >=
+        ((sizeof(struct page_info) * total_mfns) >> PAGE_SHIFT) +
+        ((sizeof(*machine_to_phys_mapping) * total_mfns) >> PAGE_SHIFT);
+}
+
+static int pmem_add_check(unsigned long spfn, unsigned long epfn,
+                          unsigned long rsv_spfn, unsigned long rsv_epfn,
+                          unsigned long data_spfn, unsigned long data_epfn)
+{
+    if ( spfn >= epfn || rsv_spfn >= rsv_epfn || data_spfn >= data_epfn )
+        return 0;
+
+    if ( !is_included(spfn, epfn, rsv_spfn, rsv_epfn) ||
+         !is_included(spfn, epfn, data_spfn, data_epfn) )
+        return 0;
+
+    if ( is_overlaped(rsv_spfn, rsv_epfn, data_spfn, data_epfn) )
+        return 0;
+
+    if ( !check_reserved_size(rsv_epfn - rsv_spfn, epfn - spfn) )
+        return 0;
+
+    return 1;
+}
+
+static int pmem_list_add(unsigned long spfn, unsigned long epfn,
+                         unsigned long rsv_spfn, unsigned long rsv_epfn,
+                         unsigned long data_spfn, unsigned long data_epfn)
+{
+    struct list_head *cur;
+    struct pmem *new_pmem;
+    int ret = 0;
+
+    spin_lock(&pmem_list_lock);
+
+    list_for_each_prev(cur, &pmem_list)
+    {
+        struct pmem *cur_pmem = list_entry(cur, struct pmem, link);
+        unsigned long cur_spfn = cur_pmem->spfn;
+        unsigned long cur_epfn = cur_pmem->epfn;
+
+        if ( (cur_spfn <= spfn && spfn < cur_epfn) ||
+             (spfn <= cur_spfn && cur_spfn < epfn) )
+        {
+            ret = -EINVAL;
+            goto out;
+        }
+
+        if ( cur_spfn < spfn )
+            break;
+    }
+
+    new_pmem = xmalloc(struct pmem);
+    if ( !new_pmem )
+    {
+        ret = -ENOMEM;
+        goto out;
+    }
+    new_pmem->spfn      = spfn;
+    new_pmem->epfn      = epfn;
+    new_pmem->rsv_spfn  = rsv_spfn;
+    new_pmem->rsv_epfn  = rsv_epfn;
+    new_pmem->data_spfn = data_spfn;
+    new_pmem->data_epfn = data_epfn;
+    list_add(&new_pmem->link, cur);
+
+ out:
+    spin_unlock(&pmem_list_lock);
+    return ret;
+}
+
+int pmem_add(unsigned long spfn, unsigned long epfn,
+             unsigned long rsv_spfn, unsigned long rsv_epfn,
+             unsigned long data_spfn, unsigned long data_epfn)
+{
+    int ret;
+
+    if ( !pmem_add_check(spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn) 
)
+        return -EINVAL;
+
+    ret = pmem_setup(spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn);
+    if ( ret )
+        goto out;
+
+    ret = iomem_deny_access(current->domain, rsv_spfn, rsv_epfn);
+    if ( ret )
+        goto out;
+
+    ret = pmem_list_add(spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn);
+    if ( ret )
+        goto out;
+
+    printk(XENLOG_INFO
+           "pmem: pfns     0x%lx - 0x%lx\n"
+           "      reserved 0x%lx - 0x%lx\n"
+           "      data     0x%lx - 0x%lx\n",
+           spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn);
+
+ out:
+    return ret;
+}
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index 5c0f527..b1f92f6 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1474,6 +1474,60 @@ destroy_frametable:
     return ret;
 }
 
+int pmem_setup(unsigned long spfn, unsigned long epfn,
+               unsigned long rsv_spfn, unsigned long rsv_epfn,
+               unsigned long data_spfn, unsigned long data_epfn)
+{
+    unsigned old_max = max_page, old_total = total_pages;
+    struct mem_hotadd_info info =
+        { .spfn = spfn, .epfn = epfn, .cur = spfn };
+    struct mem_hotadd_info rsv_info =
+        { .spfn = rsv_spfn, .epfn = rsv_epfn, .cur = rsv_spfn };
+    int ret;
+    unsigned long i;
+    struct page_info *pg;
+
+    if ( !mem_hotadd_check(spfn, epfn) )
+        return -EINVAL;
+
+    ret = extend_frame_table(&info, &rsv_info);
+    if ( ret )
+        goto destroy_frametable;
+
+    if ( max_page < epfn )
+    {
+        max_page = epfn;
+        max_pdx = pfn_to_pdx(max_page - 1) + 1;
+    }
+    total_pages += epfn - spfn;
+
+    set_pdx_range(spfn, epfn);
+    ret = setup_m2p_table(&info, &rsv_info);
+    if ( ret )
+        goto destroy_m2p;
+
+    share_hotadd_m2p_table(&info);
+
+    for ( i = spfn; i < epfn; i++ )
+    {
+        pg = mfn_to_page(i);
+        pg->count_info = (rsv_spfn <= i && i < rsv_info.cur) ?
+                         PGC_state_inuse : PGC_state_free;
+    }
+
+    return 0;
+
+destroy_m2p:
+    destroy_m2p_mapping(&info);
+    max_page = old_max;
+    total_pages = old_total;
+    max_pdx = pfn_to_pdx(max_page - 1) + 1;
+destroy_frametable:
+    cleanup_frame_table(&info);
+
+    return ret;
+}
+
 #include "compat/mm.c"
 
 /*
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index b781495..e31f1c8 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -597,4 +597,8 @@ typedef struct mm_rwlock {
 
 extern const char zero_page[];
 
+int pmem_setup(unsigned long spfn, unsigned long epfn,
+               unsigned long rsv_spfn, unsigned long rsv_epfn,
+               unsigned long data_spfn, unsigned long data_epfn);
+
 #endif /* __ASM_X86_MM_H__ */
diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h
index 1e6a6ce..c7e7cce 100644
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -608,6 +608,19 @@ struct xenpf_symdata {
 typedef struct xenpf_symdata xenpf_symdata_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_symdata_t);
 
+#define XENPF_pmem_add     64
+struct xenpf_pmem_add {
+    /* IN variables */
+    uint64_t spfn;      /* start PFN of the whole pmem region */
+    uint64_t epfn;      /* end PFN of the whole pmem region */
+    uint64_t rsv_spfn;  /* start PFN of the reserved area within the region */
+    uint64_t rsv_epfn;  /* end PFN of the reserved area within the region */
+    uint64_t data_spfn; /* start PFN of the data area within the region */
+    uint64_t data_epfn; /* end PFN of the data area within the region */
+};
+typedef struct xenpf_pmem_add xenpf_pmem_add_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_pmem_add_t);
+
 /*
  * ` enum neg_errnoval
  * ` HYPERVISOR_platform_op(const struct xen_platform_op*);
@@ -638,6 +651,7 @@ struct xen_platform_op {
         struct xenpf_core_parking      core_parking;
         struct xenpf_resource_op       resource_op;
         struct xenpf_symdata           symdata;
+        struct xenpf_pmem_add          pmem_add;
         uint8_t                        pad[128];
     } u;
 };
diff --git a/xen/include/xen/pmem.h b/xen/include/xen/pmem.h
new file mode 100644
index 0000000..a670ab8
--- /dev/null
+++ b/xen/include/xen/pmem.h
@@ -0,0 +1,31 @@
+/*
+ * xen/include/xen/pmem.h
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
+ */
+
+#ifndef __XEN_PMEM_H__
+#define __XEN_PMEM_H__
+
+#include <xen/types.h>
+
+int pmem_add(unsigned long spfn, unsigned long epfn,
+             unsigned long rsv_spfn, unsigned long rsv_epfn,
+             unsigned long data_spfn, unsigned long data_epfn);
+
+#endif /* __XEN_PMEM_H__ */
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 177c11f..948a161 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1360,6 +1360,7 @@ static int flask_platform_op(uint32_t op)
     case XENPF_cpu_offline:
     case XENPF_cpu_hotadd:
     case XENPF_mem_hotadd:
+    case XENPF_pmem_add:
         return 0;
 #endif
 
-- 
2.10.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.