[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Minios-devel] [UNIKRAFT PATCH 2/6] plat/xen: Add new memory region - demand area (x86_64)


  • To: minios-devel@xxxxxxxxxxxxx
  • From: Costin Lupu <costin.lupu@xxxxxxxxx>
  • Date: Fri, 22 Jun 2018 14:16:52 +0300
  • Cc: simon.kuenzer@xxxxxxxxx, sharan.santhanam@xxxxxxxxx, yuri.volchkov@xxxxxxxxx
  • Delivery-date: Fri, 22 Jun 2018 11:17:09 +0000
  • Ironport-phdr: 9a23:2d7S8RZqiQ1jGSVwwKLkQOz/LSx+4OfEezUN459isYplN5qZrs+8bnLW6fgltlLVR4KTs6sC17KL9fi4EUU7or+5+EgYd5JNUxJXwe43pCcHRPC/NEvgMfTxZDY7FskRHHVs/nW8LFQHUJ2mPw6arXK99yMdFQviPgRpOOv1BpTSj8Oq3Oyu5pHfeQpFiCa9bL9oMBm6sRjau9ULj4dlNqs/0AbCrGFSe+RRy2NoJFaTkAj568yt4pNt8Dletuw4+cJYXqr0Y6o3TbpDDDQ7KG81/9HktQPCTQSU+HQRVHgdnwdSDAjE6BH6WYrxsjf/u+Fg1iSWIdH6QLYpUjmk8qxlSgLniD0fOjA57m/Zl8J+gqFcrh2jqRxy2JLYbJ2POfZiZK7RYc8WSGxcVchRTSxBBYa8YpMRAuoGJuZYs4j9p10TphW4GAmsHP7vwSJPi3Dq2q06yPghEQDA3AA6G9IOrWzUrMjuOagOSuC51rHIzSjYYP9Mwzf975HFfxY8qv+PRbJ9adfdxEYyGw/fjVidqZbpMy2L2ukPqWSW4fJsWf+ghmI6sQ18oTiiyt0yhoTHiI8Z0E3I+CpnzIszONa2UlR0YcS+H5tVryyaMox2Td48TGxwoyY6z6EGuYa8fCgX1JQr3x7fZOKDc4iP+h/jSuORLi15hHJhYr6wmw2y/VK4yu3hTca4ykxKri1dntnNsHACyQDT59CaRvZy40utwzWC2gDJ5u1aP0w5l7DXJpA5zr41jJUTsEDDHiHsmEXxia+bblkr+u+z6+T7Y7XmoIWQN4tpigHmL6QjgdCwAf8iPggWQmiU4v6w1Kfk/UHhWrVFkuU2krXFsJDdPckbo6+5AwlU0oYk8BazFiqp38oGnXYZKFJIYxaHj4nyO1HSO/D0F/i+jEqqkDtxwPDGJLLhCI3XLnffiLfhYap960lExQoozdBQ/YhUBasHIP7pQU/+rtrYDgM5MwOuxeboEtB925gYWW2RHqCZNLndvkSS6u0xPumGfJUVtyrlK/g5+/7uimc0mUQcfams2psXbmq0Hvd7I0qHZ3rtg8kOEX0Rswo4UuPllFmCXiRIaHqoQa08+ykxCJi6AofbWoCtnLuB0T+7H51LfGBGC0qAEWnvd4WAR/gMaCGSIsh/kjEfU7iuVZMu1RW0uwDh0bZoMPfUqWUkssfm1d504PaWmRws+DhcC8WGz3rLX2xy2GQSSGwYxqd69Gd60UuC1+BcnuRFXYhY4OhVUwF8MYPE0sRxEJbqRwiHZNDfGwXuecmvHTxkFoF5+NQJeUsoQ9g=
  • List-id: Mini-os development list <minios-devel.lists.xenproject.org>

Demand area is a new memory region used for Xen for on-demand memory
mappings. An example of use case for on-demand mappins is the grant
subsystem which requires a virtual memory region different than the
memory used for heap.

This patch also introduces the functions for reading and writing page
table entries, needed when creating memory mappings in the demand area.

Signed-off-by: Costin Lupu <costin.lupu@xxxxxxxxx>
---
 plat/xen/include/xen-x86/mm.h |  17 ++-
 plat/xen/x86/mm.c             | 323 +++++++++++++++++++++++++++++++++++++++++-
 plat/xen/x86/setup.c          |  13 +-
 3 files changed, 346 insertions(+), 7 deletions(-)

diff --git a/plat/xen/include/xen-x86/mm.h b/plat/xen/include/xen-x86/mm.h
index a1dbb26..48ab295 100644
--- a/plat/xen/include/xen-x86/mm.h
+++ b/plat/xen/include/xen-x86/mm.h
@@ -275,11 +275,17 @@ static __inline__ paddr_t machine_to_phys(maddr_t machine)
 })
 #define virtual_to_mfn(_virt)     pte_to_mfn(virtual_to_pte(_virt))
 
-#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, 
L1_PROT)
-#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, NULL, 
L1_PROT_RO)
-#define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, 
DOMID_SELF, NULL, L1_PROT_RO)
-
-pgentry_t *need_pgt(unsigned long addr);
+void *map_frames_ex(const unsigned long *mfns, unsigned long n,
+               unsigned long stride, unsigned long incr,
+               unsigned long alignment,
+               domid_t id, int *err, unsigned long prot,
+               struct uk_alloc *a);
+#define map_frames(f, n, a) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, 
L1_PROT, a)
+#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, NULL, 
L1_PROT_RO, a)
+#define do_map_zero(start, n, a) do_map_frames(start, &mfn_zero, n, 0, 0, 
DOMID_SELF, NULL, L1_PROT_RO, a)
+
+struct uk_alloc;
+pgentry_t *need_pte(unsigned long addr, struct uk_alloc *a);
 void arch_mm_preinit(void *p);
 unsigned long alloc_virt_kernel(unsigned n_pages);
 
@@ -294,5 +300,6 @@ void _init_mem_build_pagetable(unsigned long *start_pfn,
                                                           unsigned long 
*max_pfn);
 void _init_mem_set_readonly(void *text, void *etext);
 void _init_mem_clear_bootstrap(void);
+void _init_mem_demand_area(unsigned long start, unsigned long page_num);
 
 #endif /* _ARCH_MM_H_ */
diff --git a/plat/xen/x86/mm.c b/plat/xen/x86/mm.c
index a77868a..818653f 100644
--- a/plat/xen/x86/mm.c
+++ b/plat/xen/x86/mm.c
@@ -36,7 +36,9 @@
  */
 
 #include <string.h>
-#include <xen-x86/os.h>
+#include <errno.h>
+#include <uk/alloc.h>
+#include <uk/plat/config.h>
 #include <xen-x86/mm.h>
 #include <xen/memory.h>
 #include <uk/print.h>
@@ -224,6 +226,312 @@ void _init_mem_build_pagetable(unsigned long *start_pfn, 
unsigned long *max_pfn)
 }
 
 /*
+ * Get the PTE for virtual address va if it exists. Otherwise NULL.
+ */
+static pgentry_t *get_pte(unsigned long va)
+{
+       unsigned long mfn;
+       pgentry_t *tab;
+       unsigned int offset;
+
+       tab = pt_base;
+       mfn = virt_to_mfn(pt_base);
+#if defined(__x86_64__)
+       offset = l4_table_offset(va);
+       if (!(tab[offset] & _PAGE_PRESENT))
+               return NULL;
+
+       mfn = pte_to_mfn(tab[offset]);
+       tab = mfn_to_virt(mfn);
+#endif
+       offset = l3_table_offset(va);
+       if (!(tab[offset] & _PAGE_PRESENT))
+               return NULL;
+
+       mfn = pte_to_mfn(tab[offset]);
+       tab = mfn_to_virt(mfn);
+       offset = l2_table_offset(va);
+       if (!(tab[offset] & _PAGE_PRESENT))
+               return NULL;
+
+       if (tab[offset] & _PAGE_PSE)
+               return &tab[offset];
+
+       mfn = pte_to_mfn(tab[offset]);
+       tab = mfn_to_virt(mfn);
+       offset = l1_table_offset(va);
+
+       return &tab[offset];
+}
+
+/*
+ * Return a valid PTE for a given virtual address.
+ * If PTE does not exist, allocate page-table pages.
+ */
+pgentry_t *need_pte(unsigned long va, struct uk_alloc *a)
+{
+       unsigned long pt_mfn;
+       pgentry_t *tab;
+       unsigned long pt_pfn;
+       unsigned int offset;
+
+       tab = pt_base;
+       pt_mfn = virt_to_mfn(pt_base);
+#if defined(__x86_64__)
+       offset = l4_table_offset(va);
+       if (!(tab[offset] & _PAGE_PRESENT)) {
+               pt_pfn = virt_to_pfn(uk_malloc_page(a));
+               if (!pt_pfn)
+                       return NULL;
+               new_pt_frame(&pt_pfn, pt_mfn, offset, L3_FRAME);
+       }
+       UK_ASSERT(tab[offset] & _PAGE_PRESENT);
+
+       pt_mfn = pte_to_mfn(tab[offset]);
+       tab = mfn_to_virt(pt_mfn);
+#endif
+       offset = l3_table_offset(va);
+       if (!(tab[offset] & _PAGE_PRESENT)) {
+               pt_pfn = virt_to_pfn(uk_malloc_page(a));
+               if (!pt_pfn)
+                       return NULL;
+               new_pt_frame(&pt_pfn, pt_mfn, offset, L2_FRAME);
+       }
+       UK_ASSERT(tab[offset] & _PAGE_PRESENT);
+
+       pt_mfn = pte_to_mfn(tab[offset]);
+       tab = mfn_to_virt(pt_mfn);
+       offset = l2_table_offset(va);
+       if (!(tab[offset] & _PAGE_PRESENT)) {
+               pt_pfn = virt_to_pfn(uk_malloc_page(a));
+               if (!pt_pfn)
+                       return NULL;
+               new_pt_frame(&pt_pfn, pt_mfn, offset, L1_FRAME);
+       }
+       UK_ASSERT(tab[offset] & _PAGE_PRESENT);
+
+       if (tab[offset] & _PAGE_PSE)
+               return &tab[offset];
+
+       pt_mfn = pte_to_mfn(tab[offset]);
+       tab = mfn_to_virt(pt_mfn);
+       offset = l1_table_offset(va);
+
+       return &tab[offset];
+}
+
+/*
+ * Map an array of MFNs contiguously into virtual address space starting at
+ * va. map f[i*stride]+i*increment for i in 0..n-1.
+ */
+#define MAP_BATCH ((STACK_SIZE / 2) / sizeof(mmu_update_t))
+int do_map_frames(unsigned long va,
+               const unsigned long *mfns, unsigned long n,
+               unsigned long stride, unsigned long incr,
+               domid_t id, int *err, unsigned long prot,
+               struct uk_alloc *a)
+{
+       pgentry_t *pgt = NULL;
+       unsigned long mapped = 0;
+
+       if (!mfns) {
+               uk_printd(DLVL_WARN, "do_map_frames: no mfns supplied\n");
+               return -EINVAL;
+       }
+
+       uk_printd(DLVL_EXTRA,
+               "va=%p n=0x%lx, mfns[0]=0x%lx stride=0x%lx incr=0x%lx 
prot=0x%lx\n",
+               (void *) va, n, mfns[0], stride, incr, prot);
+
+       if (err)
+               memset(err, 0, n * sizeof(int));
+
+       while (mapped < n) {
+#ifdef CONFIG_PARAVIRT
+               unsigned long i;
+               int rc;
+               unsigned long batched;
+
+               if (err)
+                       batched = 1;
+               else
+                       batched = n - mapped;
+
+               if (batched > MAP_BATCH)
+                       batched = MAP_BATCH;
+
+               {
+                       mmu_update_t mmu_updates[batched];
+
+                       for (i = 0; i < batched; i++, va += PAGE_SIZE, pgt++) {
+                               if (!pgt || !(va & L1_MASK))
+                                       pgt = need_pte(va, a);
+                               if (!pgt)
+                                       return -ENOMEM;
+
+                               mmu_updates[i].ptr =
+                                       virt_to_mach(pgt) | 
MMU_NORMAL_PT_UPDATE;
+                               mmu_updates[i].val =
+                                       ((pgentry_t) (mfns[(mapped + i) * 
stride]
+                                       + (mapped + i) * incr) << PAGE_SHIFT) | 
prot;
+                       }
+
+                       rc = HYPERVISOR_mmu_update(mmu_updates, batched, NULL, 
id);
+                       if (rc < 0) {
+                               if (err)
+                                       err[mapped * stride] = rc;
+                               else
+                                       UK_CRASH(
+                                               "Map %ld (%lx, ...) at %lx 
failed: %d.\n",
+                                               batched, mfns[mapped * stride] 
+ mapped * incr, va, rc);
+                       }
+               }
+               mapped += batched;
+#else
+               if (!pgt || !(va & L1_MASK))
+                       pgt = need_pte(va & ~L1_MASK, a);
+               if (!pgt)
+                       return -ENOMEM;
+
+               UK_ASSERT(!(*pgt & _PAGE_PSE));
+               pgt[l1_table_offset(va)] =
+                       (pgentry_t) (((mfns[mapped * stride]
+                       + mapped * incr) << PAGE_SHIFT) | prot);
+               mapped++;
+#endif
+       }
+
+       return 0;
+}
+
+static unsigned long demand_map_area_start;
+static unsigned long demand_map_area_end;
+
+unsigned long allocate_ondemand(unsigned long n, unsigned long align)
+{
+       unsigned long page_idx, contig = 0;
+
+       /* Find a properly aligned run of n contiguous frames */
+       for (page_idx = 0;
+               page_idx <= DEMAND_MAP_PAGES - n;
+               page_idx = (page_idx + contig + 1 + align - 1) & ~(align - 1)) {
+
+               unsigned long addr =
+                       demand_map_area_start + page_idx * PAGE_SIZE;
+               pgentry_t *pte = get_pte(addr);
+
+               for (contig = 0; contig < n; contig++, addr += PAGE_SIZE) {
+                       if (!(addr & L1_MASK))
+                               pte = get_pte(addr);
+
+                       if (pte) {
+                               if (*pte & _PAGE_PRESENT)
+                                       break;
+
+                               pte++;
+                       }
+               }
+
+               if (contig == n)
+                       break;
+       }
+
+       if (contig != n) {
+               uk_printd(DLVL_ERR, "Failed to find %ld frames!\n", n);
+               return 0;
+       }
+
+       return demand_map_area_start + page_idx * PAGE_SIZE;
+}
+
+/*
+ * Map an array of MFNs contiguous into virtual address space.
+ * Virtual addresses are allocated from the on demand area.
+ */
+void *map_frames_ex(const unsigned long *mfns, unsigned long n,
+               unsigned long stride, unsigned long incr,
+               unsigned long alignment,
+               domid_t id, int *err, unsigned long prot,
+               struct uk_alloc *a)
+{
+       unsigned long va = allocate_ondemand(n, alignment);
+
+       if (!va)
+               return NULL;
+
+       if (do_map_frames(va, mfns, n, stride, incr, id, err, prot, a))
+               return NULL;
+
+       return (void *) va;
+}
+
+/*
+ * Unmap nun_frames frames mapped at virtual address va.
+ */
+#define UNMAP_BATCH ((STACK_SIZE / 2) / sizeof(multicall_entry_t))
+int unmap_frames(unsigned long va, unsigned long num_frames)
+{
+#ifdef CONFIG_PARAVIRT
+       unsigned long i, n = UNMAP_BATCH;
+       multicall_entry_t call[n];
+       int ret;
+#else
+       pgentry_t *pte;
+#endif
+
+       UK_ASSERT(!((unsigned long) va & ~PAGE_MASK));
+
+       uk_printd(DLVL_EXTRA, "va=%p, num=0x%lx\n", (void *) va, num_frames);
+
+       while (num_frames) {
+#ifdef CONFIG_PARAVIRT
+               if (n > num_frames)
+                       n = num_frames;
+
+               for (i = 0; i < n; i++) {
+                       int arg = 0;
+                       /* simply update the PTE for the VA and invalidate TLB 
*/
+                       call[i].op = __HYPERVISOR_update_va_mapping;
+                       call[i].args[arg++] = va;
+                       call[i].args[arg++] = 0;
+#ifdef __i386__
+                       call[i].args[arg++] = 0;
+#endif
+                       call[i].args[arg++] = UVMF_INVLPG;
+
+                       va += PAGE_SIZE;
+               }
+
+               ret = HYPERVISOR_multicall(call, n);
+               if (ret) {
+                       uk_printd(DLVL_ERR,
+                               "update_va_mapping hypercall failed with 
rc=%d.\n", ret);
+                       return -ret;
+               }
+
+               for (i = 0; i < n; i++) {
+                       if (call[i].result) {
+                               uk_printd(DLVL_ERR,
+                                       "update_va_mapping failed for with 
rc=%d.\n", ret);
+                               return -(call[i].result);
+                       }
+               }
+               num_frames -= n;
+#else
+               pte = get_pte(va);
+               if (pte) {
+                       UK_ASSERT(!(*pte & _PAGE_PSE));
+                       *pte = 0;
+                       invlpg(va);
+               }
+               va += PAGE_SIZE;
+               num_frames--;
+#endif
+       }
+       return 0;
+}
+
+/*
  * Mark portion of the address space read only.
  */
 extern struct shared_info _libxenplat_shared_info;
@@ -352,3 +660,16 @@ void _init_mem_prepare(unsigned long *start_pfn, unsigned 
long *max_pfn)
 #error "Please port (see Mini-OS's arch_mm_preinit())"
 #endif
 }
+
+/*
+ * Reserve an area of virtual address space for mappings
+ */
+
+void _init_mem_demand_area(unsigned long start, unsigned long page_num)
+{
+       demand_map_area_start = start;
+       demand_map_area_end = demand_map_area_start + page_num * PAGE_SIZE;
+
+       uk_printd(DLVL_INFO, "Demand map pfns at %lx-%lx.\n",
+               demand_map_area_start, demand_map_area_end);
+}
diff --git a/plat/xen/x86/setup.c b/plat/xen/x86/setup.c
index ad603de..8971f6f 100644
--- a/plat/xen/x86/setup.c
+++ b/plat/xen/x86/setup.c
@@ -165,7 +165,18 @@ static inline void _init_mem(void)
 #if CONFIG_UKPLAT_MEMRNAME
        _libxenplat_mrd[0].name  = "heap";
 #endif
-       _libxenplat_mrd_num = 1;
+
+       /* demand area */
+       _libxenplat_mrd[1].base  = (void *) VIRT_DEMAND_AREA;
+       _libxenplat_mrd[1].len   = DEMAND_MAP_PAGES * PAGE_SIZE;
+       _libxenplat_mrd[1].flags = (UKPLAT_MEMRF_RESERVED);
+#if CONFIG_UKPLAT_MEMRNAME
+       _libxenplat_mrd[1].name  = "demand";
+#endif
+       _init_mem_demand_area((unsigned long) _libxenplat_mrd[1].base,
+                       DEMAND_MAP_PAGES);
+
+       _libxenplat_mrd_num = 2;
 }
 
 void _libxenplat_x86entry(void *start_info) __noreturn;
-- 
2.11.0


_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.