[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Minios-devel] [UNIKRAFT PATCH v2 2/6] plat/xen: Add new memory region - demand area (x86_64)


  • To: minios-devel@xxxxxxxxxxxxx
  • From: Costin Lupu <costin.lupu@xxxxxxxxx>
  • Date: Thu, 28 Jun 2018 15:38:31 +0300
  • Cc: simon.kuenzer@xxxxxxxxx, sharan.santhanam@xxxxxxxxx, yuri.volchkov@xxxxxxxxx
  • Delivery-date: Thu, 28 Jun 2018 12:38:48 +0000
  • Ironport-phdr: 9a23:w+yKsRU3OOkD8TMaJcIo/8ib4NvV8LGtZVwlr6E/grcLSJyIuqrYbReBt8tkgFKBZ4jH8fUM07OQ7/i+HzRYqb+681k6OKRWUBEEjchE1ycBO+WiTXPBEfjxciYhF95DXlI2t1uyMExSBdqsLwaK+i764jEdAAjwOhRoLerpBIHSk9631+ev8JHPfglEnjWwba9zIRmssQndqtQdjJd/JKo21hbHuGZDdf5MxWNvK1KTnhL86dm18ZV+7SleuO8v+tBZX6nicKs2UbJXDDI9M2Ao/8LrrgXMTRGO5nQHTGoblAdDDhXf4xH7WpfxtTb6tvZ41SKHM8D6Uaw4VDK/5KptVRTmijoINyQh/W/XlMJ+kaxVrhGmqRN9zY7Ze52ZOOZkc6/BZ94WWXZNU8BMXCJBGIO8aI4PAvIfMOZftIn9u1oOrR2jDgeqGePv0iJIjWLx0KIm1OQhCh3G3A0mH94UtHTUsc31NLoJXO+p16nE1y3Db+5N1jvn8ojIdQksrPeRVrxzacrc0VQjGgzYglmKqoHoPymZ2voTv2WZ9eZtW/qjh3Mopg1suDSixcchhpPXio8bxF3I7zh1zYkrKdC+VUV1e8SrEIFKuCGfL4Z2R8QiTHx2tysi0b0GvIK7fDANyJQ62x7Tc/yHfJaM4hLkTOuRPzJ4i2hkeLK7nRq97VOgxvfkVsmpy1lGtDBFkt7UunAM0Rzc9NSHR+Ng8kquxDqDzQPe5+FeLUwqi6bWKYQtz7AompoWq0vDHyv2mEvsjK+Rc0Up4vSn6+P8YrX6p5+cMZV4igXjMqQ2h8yyGvg3MhMUU2iD5OSzz6fv/Vf+QLlSlP05jrHZsIzGJcQcvqO5Bg5V0oAi6xa4FDeqyckXkmcZLF1bfBKKlIzpO1DVIPDkFvezmVutnyltx/zcOb3hGJrNJGDZkLj9ZbZ991JcyA0rwNBE55JbEKsBL+j1WkPrr9zXFBk5MxC0w+n9DtVyzIYeVHmJAqCHLKzStUWH6fk1L+aSeY8foCz9JOQ95/7ykX85nkcQfbKo3ZsSb3C4GO5qI0aEYXXyhNcAEHkFvhAlQezsllKNTTpTaGypUK4m+D47FZqsDZ3fSYC1nLyBwCC7E4VWZ2BbEFCMEG3kd5uDW/cIdC2SIsxhkjsaVbW6TY8uyw2htAvgxLV8NObb5DAY5trf041w5uvSkgp3+TFqAsC112CWU3oyjm4OATgs0/NRu0t4n3yEyrR5hbR8CMROr6dCVRwmNJiay/FiFvj5QUTZY93PUlHwEYbuOi04Ut9km4xGWE16Adj31h0=
  • List-id: Mini-os development list <minios-devel.lists.xenproject.org>

Demand area is a new memory region used for Xen for on-demand memory
mappings. An example of use case for on-demand mappins is the grant
subsystem which requires a virtual memory region different than the
memory used for heap.

This patch also introduces the functions for reading and writing page
table entries, needed when creating memory mappings in the demand area.

Signed-off-by: Costin Lupu <costin.lupu@xxxxxxxxx>
---
 plat/xen/include/xen-x86/mm.h |  16 +-
 plat/xen/x86/mm.c             | 359 +++++++++++++++++++++++++++++++++++++++++-
 plat/xen/x86/setup.c          |  13 +-
 3 files changed, 382 insertions(+), 6 deletions(-)

diff --git a/plat/xen/include/xen-x86/mm.h b/plat/xen/include/xen-x86/mm.h
index a1dbb26..0e59796 100644
--- a/plat/xen/include/xen-x86/mm.h
+++ b/plat/xen/include/xen-x86/mm.h
@@ -275,11 +275,18 @@ static __inline__ paddr_t machine_to_phys(maddr_t machine)
 })
 #define virtual_to_mfn(_virt)     pte_to_mfn(virtual_to_pte(_virt))
 
-#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, 
L1_PROT)
-#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, NULL, 
L1_PROT_RO)
-#define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, 
DOMID_SELF, NULL, L1_PROT_RO)
+void *map_frames_ex(const unsigned long *mfns, unsigned long n,
+               unsigned long stride, unsigned long incr,
+               unsigned long alignment,
+               domid_t id, int *err, unsigned long prot,
+               struct uk_alloc *a);
+#define map_frames(f, n, a) \
+       map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, L1_PROT, a)
+#define map_zero(n, a) \
+       map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, NULL, L1_PROT_RO, a)
+#define do_map_zero(start, n, a) \
+       do_map_frames(start, &mfn_zero, n, 0, 0, DOMID_SELF, NULL, L1_PROT_RO, 
a)
 
-pgentry_t *need_pgt(unsigned long addr);
 void arch_mm_preinit(void *p);
 unsigned long alloc_virt_kernel(unsigned n_pages);
 
@@ -294,5 +301,6 @@ void _init_mem_build_pagetable(unsigned long *start_pfn,
                                                           unsigned long 
*max_pfn);
 void _init_mem_set_readonly(void *text, void *etext);
 void _init_mem_clear_bootstrap(void);
+void _init_mem_demand_area(unsigned long start, unsigned long page_num);
 
 #endif /* _ARCH_MM_H_ */
diff --git a/plat/xen/x86/mm.c b/plat/xen/x86/mm.c
index a77868a..3a95ad0 100644
--- a/plat/xen/x86/mm.c
+++ b/plat/xen/x86/mm.c
@@ -36,7 +36,9 @@
  */
 
 #include <string.h>
-#include <xen-x86/os.h>
+#include <errno.h>
+#include <uk/alloc.h>
+#include <uk/plat/config.h>
 #include <xen-x86/mm.h>
 #include <xen/memory.h>
 #include <uk/print.h>
@@ -224,6 +226,348 @@ void _init_mem_build_pagetable(unsigned long *start_pfn, 
unsigned long *max_pfn)
 }
 
 /*
+ * Get the PTE for virtual address va if it exists. Otherwise NULL.
+ */
+static pgentry_t *get_pte(unsigned long va)
+{
+       unsigned long mfn;
+       pgentry_t *tab;
+       unsigned int offset;
+
+       tab = pt_base;
+
+#if defined(__x86_64__)
+       offset = l4_table_offset(va);
+       if (!(tab[offset] & _PAGE_PRESENT))
+               return NULL;
+
+       mfn = pte_to_mfn(tab[offset]);
+       tab = mfn_to_virt(mfn);
+#endif
+       offset = l3_table_offset(va);
+       if (!(tab[offset] & _PAGE_PRESENT))
+               return NULL;
+
+       mfn = pte_to_mfn(tab[offset]);
+       tab = mfn_to_virt(mfn);
+       offset = l2_table_offset(va);
+       if (!(tab[offset] & _PAGE_PRESENT))
+               return NULL;
+
+       if (tab[offset] & _PAGE_PSE)
+               return &tab[offset];
+
+       mfn = pte_to_mfn(tab[offset]);
+       tab = mfn_to_virt(mfn);
+       offset = l1_table_offset(va);
+
+       return &tab[offset];
+}
+
+/*
+ * Return a valid PTE for a given virtual address.
+ * If PTE does not exist, allocate page-table pages.
+ */
+static pgentry_t *need_pte(unsigned long va, struct uk_alloc *a)
+{
+       unsigned long pt_mfn;
+       pgentry_t *tab;
+       unsigned long pt_pfn;
+       unsigned int offset;
+
+       tab = pt_base;
+       pt_mfn = virt_to_mfn(pt_base);
+#if defined(__x86_64__)
+       offset = l4_table_offset(va);
+       if (!(tab[offset] & _PAGE_PRESENT)) {
+               pt_pfn = virt_to_pfn(uk_malloc_page(a));
+               if (!pt_pfn)
+                       return NULL;
+               new_pt_frame(&pt_pfn, pt_mfn, offset, L3_FRAME);
+       }
+       UK_ASSERT(tab[offset] & _PAGE_PRESENT);
+
+       pt_mfn = pte_to_mfn(tab[offset]);
+       tab = mfn_to_virt(pt_mfn);
+#endif
+       offset = l3_table_offset(va);
+       if (!(tab[offset] & _PAGE_PRESENT)) {
+               pt_pfn = virt_to_pfn(uk_malloc_page(a));
+               if (!pt_pfn)
+                       return NULL;
+               new_pt_frame(&pt_pfn, pt_mfn, offset, L2_FRAME);
+       }
+       UK_ASSERT(tab[offset] & _PAGE_PRESENT);
+
+       pt_mfn = pte_to_mfn(tab[offset]);
+       tab = mfn_to_virt(pt_mfn);
+       offset = l2_table_offset(va);
+       if (!(tab[offset] & _PAGE_PRESENT)) {
+               pt_pfn = virt_to_pfn(uk_malloc_page(a));
+               if (!pt_pfn)
+                       return NULL;
+               new_pt_frame(&pt_pfn, pt_mfn, offset, L1_FRAME);
+       }
+       UK_ASSERT(tab[offset] & _PAGE_PRESENT);
+
+       if (tab[offset] & _PAGE_PSE)
+               return &tab[offset];
+
+       pt_mfn = pte_to_mfn(tab[offset]);
+       tab = mfn_to_virt(pt_mfn);
+       offset = l1_table_offset(va);
+
+       return &tab[offset];
+}
+
+/**
+ * do_map_frames - Map an array of MFNs contiguously into virtual
+ * address space starting at va.
+ * @va: Starting address of the virtual address range
+ * @mfns: Array of MFNs
+ * @n: Number of MFNs
+ * @stride: Stride used for selecting the MFNs on each iteration
+ * @incr: Increment added to MFNs on each iteration
+ * @id: Domain id
+ * @err: Array of errors statuses
+ * @prot: Page protection flags
+ * @a: Memory allocator used when new page table entries are needed
+ *
+ * Note that either @stride or @incr must be non-zero, not both of
+ * them. One should use a non-zero value for @stride when providing
+ * an array of MFNs. @incr parameter should be used when only the
+ * first MFN is provided and the subsequent MFNs values are simply
+ * derived by adding @incr.
+ */
+#define MAP_BATCH ((STACK_SIZE / 4) / sizeof(mmu_update_t))
+int do_map_frames(unsigned long va,
+               const unsigned long *mfns, unsigned long n,
+               unsigned long stride, unsigned long incr,
+               domid_t id, int *err, unsigned long prot,
+               struct uk_alloc *a)
+{
+       pgentry_t *pte = NULL;
+       unsigned long mapped = 0;
+
+       if (!mfns) {
+               uk_printd(DLVL_WARN, "do_map_frames: no mfns supplied\n");
+               return -EINVAL;
+       }
+
+       uk_printd(DLVL_EXTRA,
+               "Mapping va=%p n=%lu, mfns[0]=0x%lx stride=%lu incr=%lu 
prot=0x%lx\n",
+               (void *) va, n, mfns[0], stride, incr, prot);
+
+       if (err)
+               memset(err, 0, n * sizeof(int));
+
+       while (mapped < n) {
+#ifdef CONFIG_PARAVIRT
+               unsigned long i;
+               int rc;
+               unsigned long batched;
+
+               if (err)
+                       batched = 1;
+               else
+                       batched = n - mapped;
+
+               if (batched > MAP_BATCH)
+                       batched = MAP_BATCH;
+
+               mmu_update_t mmu_updates[batched];
+
+               for (i = 0; i < batched; i++, va += PAGE_SIZE, pte++) {
+                       if (!pte || !(va & L1_MASK))
+                               pte = need_pte(va, a);
+                       if (!pte)
+                               return -ENOMEM;
+
+                       mmu_updates[i].ptr =
+                               virt_to_mach(pte) | MMU_NORMAL_PT_UPDATE;
+                       mmu_updates[i].val =
+                               ((pgentry_t) (mfns[(mapped + i) * stride]
+                               + (mapped + i) * incr) << PAGE_SHIFT) | prot;
+               }
+
+               rc = HYPERVISOR_mmu_update(mmu_updates, batched, NULL, id);
+               if (rc < 0) {
+                       if (err)
+                               err[mapped * stride] = rc;
+                       else
+                               UK_CRASH(
+                                       "Map %ld (%lx, ...) at %lx failed: 
%d.\n",
+                                       batched,
+                                       mfns[mapped * stride] + mapped * incr,
+                                       va, rc);
+               }
+               mapped += batched;
+#else
+               if (!pte || !(va & L1_MASK))
+                       pte = need_pte(va & ~L1_MASK, a);
+               if (!pte)
+                       return -ENOMEM;
+
+               UK_ASSERT(!(*pte & _PAGE_PSE));
+               pte[l1_table_offset(va)] =
+                       (pgentry_t) (((mfns[mapped * stride]
+                       + mapped * incr) << PAGE_SHIFT) | prot);
+               mapped++;
+#endif
+       }
+
+       return 0;
+}
+
+static unsigned long demand_map_area_start;
+static unsigned long demand_map_area_end;
+
+unsigned long allocate_ondemand(unsigned long n, unsigned long align)
+{
+       unsigned long page_idx, contig = 0;
+
+       /* Find a properly aligned run of n contiguous frames */
+       for (page_idx = 0;
+               page_idx <= DEMAND_MAP_PAGES - n;
+               page_idx = (page_idx + contig + 1 + align - 1) & ~(align - 1)) {
+
+               unsigned long addr =
+                       demand_map_area_start + page_idx * PAGE_SIZE;
+               pgentry_t *pte = get_pte(addr);
+
+               for (contig = 0; contig < n; contig++, addr += PAGE_SIZE) {
+                       if (!(addr & L1_MASK))
+                               pte = get_pte(addr);
+
+                       if (pte) {
+                               if (*pte & _PAGE_PRESENT)
+                                       break;
+
+                               pte++;
+                       }
+               }
+
+               if (contig == n)
+                       break;
+       }
+
+       if (contig != n) {
+               uk_printd(DLVL_ERR, "Failed to find %ld frames!\n", n);
+               return 0;
+       }
+
+       return demand_map_area_start + page_idx * PAGE_SIZE;
+}
+
+/**
+ * map_frames_ex - Map an array of MFNs contiguously into virtual
+ * address space. Virtual addresses are allocated from the on demand
+ * area.
+ * @mfns: Array of MFNs
+ * @n: Number of MFNs
+ * @stride: Stride used for selecting the MFNs on each iteration
+ * (e.g. 1 for every element, 0 always first element)
+ * @incr: Increment added to MFNs on each iteration
+ * @alignment: Alignment
+ * @id: Domain id
+ * @err: Array of errors statuses
+ * @prot: Page protection flags
+ * @a: Memory allocator used when new page table entries are needed
+ *
+ * Note that either @stride or @incr must be non-zero, not both of
+ * them. One should use a non-zero value for @stride when providing
+ * an array of MFNs. @incr parameter should be used when only the
+ * first MFN is provided and the subsequent MFNs values are simply
+ * derived by adding @incr.
+ */
+void *map_frames_ex(const unsigned long *mfns, unsigned long n,
+               unsigned long stride, unsigned long incr,
+               unsigned long alignment,
+               domid_t id, int *err, unsigned long prot,
+               struct uk_alloc *a)
+{
+       unsigned long va = allocate_ondemand(n, alignment);
+
+       if (!va)
+               return NULL;
+
+       if (do_map_frames(va, mfns, n, stride, incr, id, err, prot, a))
+               return NULL;
+
+       return (void *) va;
+}
+
+/*
+ * Unmap num_frames frames mapped at virtual address va.
+ */
+#define UNMAP_BATCH ((STACK_SIZE / 4) / sizeof(multicall_entry_t))
+int unmap_frames(unsigned long va, unsigned long num_frames)
+{
+#ifdef CONFIG_PARAVIRT
+       unsigned long i, n = UNMAP_BATCH;
+       multicall_entry_t call[n];
+       int ret;
+#else
+       pgentry_t *pte;
+#endif
+
+       UK_ASSERT(!((unsigned long) va & ~PAGE_MASK));
+
+       uk_printd(DLVL_EXTRA,
+               "Unmapping va=%p, num=%lu\n", (void *) va, num_frames);
+
+       while (num_frames) {
+#ifdef CONFIG_PARAVIRT
+               if (n > num_frames)
+                       n = num_frames;
+
+               for (i = 0; i < n; i++) {
+                       int arg = 0;
+                       /*
+                        * simply update the PTE for the VA and
+                        * invalidate TLB
+                        */
+                       call[i].op = __HYPERVISOR_update_va_mapping;
+                       call[i].args[arg++] = va;
+                       call[i].args[arg++] = 0;
+#ifdef __i386__
+                       call[i].args[arg++] = 0;
+#endif
+                       call[i].args[arg++] = UVMF_INVLPG;
+
+                       va += PAGE_SIZE;
+               }
+
+               ret = HYPERVISOR_multicall(call, n);
+               if (ret) {
+                       uk_printd(DLVL_ERR,
+                               "update_va_mapping hypercall failed with 
rc=%d.\n", ret);
+                       return -ret;
+               }
+
+               for (i = 0; i < n; i++) {
+                       if (call[i].result) {
+                               uk_printd(DLVL_ERR,
+                                       "update_va_mapping failed for with 
rc=%d.\n", ret);
+                               return -(call[i].result);
+                       }
+               }
+               num_frames -= n;
+#else
+               pte = get_pte(va);
+               if (pte) {
+                       UK_ASSERT(!(*pte & _PAGE_PSE));
+                       *pte = 0;
+                       invlpg(va);
+               }
+               va += PAGE_SIZE;
+               num_frames--;
+#endif
+       }
+       return 0;
+}
+
+/*
  * Mark portion of the address space read only.
  */
 extern struct shared_info _libxenplat_shared_info;
@@ -352,3 +696,16 @@ void _init_mem_prepare(unsigned long *start_pfn, unsigned 
long *max_pfn)
 #error "Please port (see Mini-OS's arch_mm_preinit())"
 #endif
 }
+
+/*
+ * Reserve an area of virtual address space for mappings
+ */
+
+void _init_mem_demand_area(unsigned long start, unsigned long page_num)
+{
+       demand_map_area_start = start;
+       demand_map_area_end = demand_map_area_start + page_num * PAGE_SIZE;
+
+       uk_printd(DLVL_INFO, "Demand map pfns at %lx-%lx.\n",
+               demand_map_area_start, demand_map_area_end);
+}
diff --git a/plat/xen/x86/setup.c b/plat/xen/x86/setup.c
index ad603de..31d3a90 100644
--- a/plat/xen/x86/setup.c
+++ b/plat/xen/x86/setup.c
@@ -165,7 +165,18 @@ static inline void _init_mem(void)
 #if CONFIG_UKPLAT_MEMRNAME
        _libxenplat_mrd[0].name  = "heap";
 #endif
-       _libxenplat_mrd_num = 1;
+
+       /* demand area */
+       _libxenplat_mrd[1].base  = (void *) VIRT_DEMAND_AREA;
+       _libxenplat_mrd[1].len   = DEMAND_MAP_PAGES * PAGE_SIZE;
+       _libxenplat_mrd[1].flags = UKPLAT_MEMRF_RESERVED;
+#if CONFIG_UKPLAT_MEMRNAME
+       _libxenplat_mrd[1].name  = "demand";
+#endif
+       _init_mem_demand_area((unsigned long) _libxenplat_mrd[1].base,
+                       DEMAND_MAP_PAGES);
+
+       _libxenplat_mrd_num = 2;
 }
 
 void _libxenplat_x86entry(void *start_info) __noreturn;
-- 
2.11.0


_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.