[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC v2 1/4] HVM x86 deprivileged mode: Create deprivileged page tables



The paging structure mappings for the deprivileged mode are added to the monitor
page table for HVM guests for HAP and shadow table paging. The entries are
generated by walking the page tables and mapping in new pages. Access bits are
flipped as needed.

The page entries are generated for deprivileged .text, .data and a stack. The
.text section is only allocated once at HVM domain initialisation and then we
alias it from then onwards. The data section is copied from sections allocated
by the linker. The mappings are setup in an unused portion of the Xen virtual
address space. The pages are mapped in as user mode accessible, with NX bits set
for the data and stack regions and the code region is set to be executable and
read-only.

The needed pages are allocated on the paging heap and are deallocated when
those heap pages are deallocated (on domain destruction).

Signed-off-by: Ben Catterall <Ben.Catterall@xxxxxxxxxx>

Changes since v1
----------------
 * .text section is now aliased when needed
 * Reduced user stack size to two pages
 * Changed allocator used for pages
 * Changed types to using __hvm_$foo[] for linker variables
 * Moved some #define's to page.h
 * Small bug fix: Testing global bit on L3 not relevant
---
 xen/arch/x86/hvm/Makefile          |   1 +
 xen/arch/x86/hvm/deprivileged.c    | 514 +++++++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/hap/hap.c          |   8 +
 xen/arch/x86/mm/shadow/multi.c     |   8 +
 xen/arch/x86/xen.lds.S             |  19 ++
 xen/include/asm-x86/config.h       |  29 ++-
 xen/include/asm-x86/x86_64/page.h  |  15 ++
 xen/include/xen/hvm/deprivileged.h |  90 +++++++
 xen/include/xen/sched.h            |   4 +
 9 files changed, 681 insertions(+), 7 deletions(-)
 create mode 100644 xen/arch/x86/hvm/deprivileged.c
 create mode 100644 xen/include/xen/hvm/deprivileged.h

diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index 794e793..df5ebb8 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -2,6 +2,7 @@ subdir-y += svm
 subdir-y += vmx
 
 obj-y += asid.o
+obj-y += deprivileged.o
 obj-y += emulate.o
 obj-y += event.o
 obj-y += hpet.o
diff --git a/xen/arch/x86/hvm/deprivileged.c b/xen/arch/x86/hvm/deprivileged.c
new file mode 100644
index 0000000..f34ed67
--- /dev/null
+++ b/xen/arch/x86/hvm/deprivileged.c
@@ -0,0 +1,514 @@
+/*
+ * HVM deprivileged mode to provide support for running operations in
+ * user mode from Xen
+ */
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <xen/domain_page.h>
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <asm/paging.h>
+#include <xen/compiler.h>
+#include <asm/hap.h>
+#include <asm/paging.h>
+#include <asm-x86/page.h>
+#include <public/domctl.h>
+#include <xen/domain_page.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <xen/hvm/deprivileged.h>
+
+void hvm_deprivileged_init(struct domain *d, l4_pgentry_t *l4t_base)
+{
+    void *p;
+    unsigned long size;
+    unsigned int l4t_idx_code = l4_table_offset(HVM_DEPRIVILEGED_TEXT_ADDR);
+    int ret;
+
+    /* If there is already an entry here */
+    ASSERT(!l4e_get_intpte(l4t_base[l4t_idx_code]));
+
+    /*
+     * We alias the .text segment for deprivileged mode to save memory.
+     * Additionally, to save allocating page tables for each vcpu's 
deprivileged
+     * mode .text segment, we reuse them.
+     *
+     * If we have not already created a mapping (valid_l4e_code is false) then
+     * we create one and generate the page tables. To save doing this for each
+     * vcpu, if we already have a set of valid page tables then we reuse them.
+     * So, if we have the page tables and there is no entry at the desired PML4
+     * slot, then we can just reuse those page tables.
+     *
+     * The mappings are per-domain as we use the domain's page pool memory
+     * allocator for the new page structure and page frame pages.
+     */
+    if( !d->hvm_depriv_valid_l4e_code )
+    {
+        /*
+         * Build the alias mappings for the .text segment for deprivileged code
+         *
+         * NOTE: If there are other pages here, then this method will map 
around
+         * them. Which means that any future alias will use this mapping. If 
the
+         * HVM depriv section no longer has a unique PML4 entry in the Xen
+         * memory map, this will need to be accounted for.
+         */
+        size = (unsigned long)__hvm_deprivileged_text_end -
+               (unsigned long)__hvm_deprivileged_text_start;
+
+        ret = hvm_deprivileged_map_l4(d, l4t_base,
+                                   (unsigned 
long)__hvm_deprivileged_text_start,
+                                   (unsigned long)HVM_DEPRIVILEGED_TEXT_ADDR,
+                                   size, 0 /* No write */, HVM_DEPRIV_ALIAS);
+
+        if( ret )
+        {
+            printk(XENLOG_ERR "HVM: Error when initialising depriv .text. 
Code: %d",
+                   ret);
+
+            domain_crash(d);
+            return;
+        }
+
+        d->hvm_depriv_l4e_code = l4t_base[l4t_idx_code];
+        d->hvm_depriv_valid_l4e_code = 1;
+    }
+    else
+    {
+        /* Just copy the PML4 entry across */
+        l4t_base[l4t_idx_code] = d->hvm_depriv_l4e_code;
+    }
+
+    /* Copy the .data segment for ring3 code */
+    size = (unsigned long)__hvm_deprivileged_data_end -
+           (unsigned long)__hvm_deprivileged_data_start;
+
+    ret = hvm_deprivileged_map_l4(d, l4t_base,
+                                  (unsigned long)__hvm_deprivileged_data_start,
+                                  (unsigned long)HVM_DEPRIVILEGED_DATA_ADDR,
+                                  size, _PAGE_NX | _PAGE_RW, HVM_DEPRIV_COPY);
+
+    if( ret )
+    {
+        printk(XENLOG_ERR "HVM: Error when initialising depriv .data. Code: 
%d",
+               ret);
+        domain_crash(d);
+        return;
+    }
+
+    /*
+     * THIS IS A BIT OF A HACK...
+     * Setup the deprivileged mode stack mappings. By allocating a blank area
+     * we can reuse hvm_deprivileged_map_l4.
+     */
+    size = HVM_DEPRIV_STACK_SIZE;
+
+    p = alloc_xenheap_pages(HVM_DEPRIV_STACK_ORDER, 0);
+    if( p == NULL )
+    {
+        printk(XENLOG_ERR "HVM: Out of memory on deprivileged mode stack 
init.\n");
+        domain_crash(d);
+        return;
+    }
+
+    ret = hvm_deprivileged_map_l4(d, l4t_base,
+                                  (unsigned long)p,
+                                  (unsigned long)HVM_DEPRIVILEGED_STACK_ADDR,
+                                  size, _PAGE_NX | _PAGE_RW, HVM_DEPRIV_COPY);
+
+    free_xenheap_pages(p, HVM_DEPRIV_STACK_ORDER);
+
+    if( ret )
+    {
+        printk(XENLOG_ERR "HVM: Error when initialising depriv stack. Code: 
%d",
+               ret);
+        domain_crash(d);
+        return;
+    }
+}
+
+void hvm_deprivileged_destroy(struct domain *d)
+{
+
+}
+
+/*
+ * Create a copy or alias of the data at the specified virtual address. The
+ * page table hierarchy is walked and new levels are created if needed.
+ *
+ * If we find a leaf entry in a page table (one which holds the
+ * mfn of a 4KB, 2MB, etc. page frame) which has already been
+ * mapped in, then we bail as we have a collision and this likely
+ * means a bug or the memory configuration has been changed.
+ *
+ * Pages have PAGE_USER, PAGE_GLOBAL (if supported) and PAGE_PRESENT set by
+ * default. The extra l1_flags are used for extra control e.g. PAGE_RW.
+ * The PAGE_RW flag will be enabled for all page structure entries
+ * above the leaf page if that leaf page has PAGE_RW set. This is needed to
+ * permit the writes on the leaf pages. See the Intel manual 3A section 4.6.
+ *
+ * TODO: We proceed down to L1 4KB pages and then map these in. We should
+ * stop the recursion on L3/L2 for a 1GB or 2MB page which would mean faster
+ * page access. When we stop would depend on size (e.g. use 2MB pages for a
+ * few MBs). We'll need to be careful though about aliasing such large pages.
+ * As this then means those pages would need to be aligned to these larger 
sizes
+ * otherwise we'd share extra data via the alias.
+ */
+int hvm_deprivileged_map_l4(struct domain *d,
+                            l4_pgentry_t *l4t_base,
+                            unsigned long src_start,
+                            unsigned long dst_start,
+                            unsigned long size,
+                            unsigned int l1_flags,
+                            unsigned int op)
+{
+    struct page_info *l3t_pg; /* the destination page */
+    l3_pgentry_t *l3t_base;
+    unsigned long l4t_idx_dst_start;
+    unsigned long l4t_idx_dst_end;
+    unsigned long flags = _PAGE_USER | _PAGE_PRESENT;
+    unsigned int i;
+
+    /* Leaf page needs RW? */
+    if( l1_flags & _PAGE_RW )
+        flags |= _PAGE_RW;
+
+    /*
+     * Calculate where in the destination we need pages
+     * The PML4 page table doesn't map all of virtual memory: for a
+     * 48-bit implementation it's just 512 L4 slots. We also need to
+     * know which L4 slots our entries lie in.
+     */
+    l4t_idx_dst_start = l4_table_offset(dst_start);
+    l4t_idx_dst_end   = l4_table_offset(dst_start + size) +
+        !!( ((dst_start + size) & ((1ul << L4_PAGETABLE_SHIFT) - 1)) %
+            L4_PAGE_RANGE );
+
+    for( i = l4t_idx_dst_start; i < l4t_idx_dst_end; i++ )
+    {
+        ASSERT( size >= 0 );
+
+        /* Is this an empty entry? */
+        if( !(l4e_get_intpte(l4t_base[i])) )
+        {
+            /* Allocate a new L3 table */
+            if( (l3t_pg = hvm_deprivileged_alloc_page(d)) == NULL )
+                return HVM_ERR_PG_ALLOC;
+
+            l3t_base = map_domain_page(_mfn(page_to_mfn(l3t_pg)));
+
+            /* Add the page into the L4 table */
+            l4t_base[i] = l4e_from_page(l3t_pg, flags);
+
+            hvm_deprivileged_map_l3(d, l3t_base, src_start, dst_start,
+                             (size > L4_PAGE_RANGE) ? L4_PAGE_RANGE : size,
+                             l1_flags, op);
+
+            unmap_domain_page(l3t_base);
+        }
+        else
+        {
+            /*
+             * If there is already page information then the page has been
+             * prepared by something else
+             *
+             * We can try recursing on this and see if where we want to put our
+             * new pages is empty.
+             *
+             * We do need to flip this to be a user mode page though so that
+             * the usermode children can be accessed. This is fine as long as
+             * we preserve the access bits of any supervisor entries that are
+             * used in the leaf case.
+             */
+
+            l3t_base = map_l3t_from_l4e(l4t_base[i]);
+
+            hvm_deprivileged_map_l3(d, l3t_base, src_start, dst_start,
+                             (size > L4_PAGE_RANGE) ? L4_PAGE_RANGE : size,
+                             l1_flags, op);
+
+            l4t_base[i] = l4e_from_intpte(l4e_get_intpte(l4t_base[i]) | flags);
+
+            unmap_domain_page(l3t_base);
+        }
+
+        size -= L4_PAGE_RANGE;
+        src_start += L4_PAGE_RANGE;
+        dst_start += L4_PAGE_RANGE;
+    }
+
+    return 0;
+}
+
+int hvm_deprivileged_map_l3(struct domain *d,
+                            l3_pgentry_t *l3t_base,
+                            unsigned long src_start,
+                            unsigned long dst_start,
+                            unsigned long size,
+                            unsigned int l1_flags,
+                            unsigned int op)
+{
+    struct page_info *l2t_pg; /* the destination page */
+    l2_pgentry_t *l2t_base;
+    unsigned long l3t_idx_dst_start;
+    unsigned long l3t_idx_dst_end;
+    unsigned long flags = _PAGE_USER | _PAGE_PRESENT;
+    unsigned int i;
+
+    /* Leaf page needs RW? */
+    if( l1_flags & _PAGE_RW )
+        flags |= _PAGE_RW;
+
+    /* Calculate where in the destination we need pages */
+    l3t_idx_dst_start = l3_table_offset(dst_start);
+    l3t_idx_dst_end   = l3_table_offset(dst_start + size) +
+        !!( ((dst_start + size) & ((1ul << L3_PAGETABLE_SHIFT) - 1)) %
+             L3_PAGE_RANGE );
+
+    for( i = l3t_idx_dst_start; i < l3t_idx_dst_end; i++ )
+    {
+        ASSERT( size >= 0 );
+
+        /* Is this an empty entry? */
+        if( !(l3e_get_intpte(l3t_base[i])) )
+        {
+            /* Allocate a new L2 table */
+            if( (l2t_pg = hvm_deprivileged_alloc_page(d)) == NULL )
+                return HVM_ERR_PG_ALLOC;
+
+            l2t_base = map_domain_page(_mfn(page_to_mfn(l2t_pg)));
+
+            /* Add the page into the L3 table */
+            l3t_base[i] = l3e_from_page(l2t_pg, flags);
+
+            hvm_deprivileged_map_l2(d, l2t_base, src_start, dst_start,
+                             (size > L3_PAGE_RANGE) ? L3_PAGE_RANGE : size,
+                             l1_flags, op);
+
+            unmap_domain_page(l2t_base);
+        }
+        else
+        {
+            /*
+             * If there is already page information then the page has been
+             * prepared by something else
+             *
+             * If the PSE bit is set, then we can't recurse as this is
+             * a leaf page so we fail.
+             */
+            if( (l3e_get_flags(l3t_base[i]) & _PAGE_PSE) )
+            {
+                panic("HVM: L3 leaf page is already mapped\n");
+            }
+
+            /*
+             * We can try recursing on this and see if where we want to put our
+             * new pages is empty.
+             *
+             * We do need to flip this to be a user mode page though so that
+             * the usermode children can be accessed. This is fine as long as
+             * we preserve the access bits of any supervisor entries that are
+             * used in the leaf case.
+             */
+
+            l2t_base = map_l2t_from_l3e(l3t_base[i]);
+
+            hvm_deprivileged_map_l2(d, l2t_base, src_start, dst_start,
+                             (size > L3_PAGE_RANGE) ? L3_PAGE_RANGE : size,
+                             l1_flags, op);
+
+            l3t_base[i] = l3e_from_intpte(l3e_get_intpte(l3t_base[i]) | flags);
+
+            unmap_domain_page(l2t_base);
+        }
+
+        size -= L3_PAGE_RANGE;
+        src_start += L3_PAGE_RANGE;
+        dst_start += L3_PAGE_RANGE;
+    }
+
+    return 0;
+}
+
+int hvm_deprivileged_map_l2(struct domain *d,
+                            l2_pgentry_t *l2t_base,
+                            unsigned long src_start,
+                            unsigned long dst_start,
+                            unsigned long size,
+                            unsigned int l1_flags,
+                            unsigned int op)
+{
+    struct page_info *l1t_pg; /* the destination page */
+    l1_pgentry_t *l1t_base;
+    unsigned long l2t_idx_dst_start;
+    unsigned long l2t_idx_dst_end;
+    unsigned long flags = _PAGE_USER | _PAGE_PRESENT;
+    unsigned int i;
+
+    /* Leaf page needs RW? */
+    if( l1_flags & _PAGE_RW )
+        flags |= _PAGE_RW;
+
+    /* Calculate where in the destination we need pages */
+    l2t_idx_dst_start = l2_table_offset(dst_start);
+    l2t_idx_dst_end   = l2_table_offset(dst_start + size) +
+        !!( ((dst_start + size) & ((1ul << L2_PAGETABLE_SHIFT) - 1)) %
+             L2_PAGE_RANGE );
+
+    for( i = l2t_idx_dst_start; i < l2t_idx_dst_end; i++ )
+    {
+        ASSERT( size >= 0 );
+
+        /* Is this an empty entry? */
+        if( !(l2e_get_intpte(l2t_base[i])) )
+        {
+            /* Allocate a new L1 table */
+            if( (l1t_pg = hvm_deprivileged_alloc_page(d)) == NULL )
+                return HVM_ERR_PG_ALLOC;
+
+            l1t_base = map_domain_page(_mfn(page_to_mfn(l1t_pg)));
+
+            /* Add the page into the L2 table */
+            l2t_base[i] = l2e_from_page(l1t_pg, flags);
+
+            hvm_deprivileged_map_l1(d, l1t_base, src_start, dst_start,
+                             (size > L2_PAGE_RANGE) ? L2_PAGE_RANGE : size,
+                             l1_flags, op);
+
+            unmap_domain_page(l1t_base);
+        }
+        else
+        {
+            /*
+             * If there is already page information then the page has been
+             * prepared by something else
+             *
+             * If the PSE bit is set, then we can't recurse as this is
+             * a leaf page so we fail.
+             */
+            if( (l2e_get_flags(l2t_base[i]) & _PAGE_PSE) )
+            {
+                panic("HVM: L2 Leaf page is already mapped\n");
+            }
+
+            /*
+             * We can try recursing on this and see if where we want to put our
+             * new pages is empty.
+             *
+             * We do need to flip this to be a user mode page though so that
+             * the usermode children can be accessed. This is fine as long as
+             * we preserve the access bits of any supervisor entries that are
+             * used in the leaf case.
+             */
+
+            l1t_base = map_l1t_from_l2e(l2t_base[i]);
+
+            hvm_deprivileged_map_l1(d, l1t_base, src_start, dst_start,
+                             (size > L2_PAGE_RANGE) ? L2_PAGE_RANGE : size,
+                             l1_flags, op);
+
+            l2t_base[i] = l2e_from_intpte(l2e_get_intpte(l2t_base[i]) | flags);
+
+            unmap_domain_page(l1t_base);
+        }
+
+        size -= L2_PAGE_RANGE;
+        src_start += L2_PAGE_RANGE;
+        dst_start += L2_PAGE_RANGE;
+    }
+    return 0;
+}
+
+int hvm_deprivileged_map_l1(struct domain *d,
+                            l1_pgentry_t *l1t_base,
+                            unsigned long src_start,
+                            unsigned long dst_start,
+                            unsigned long size,
+                            unsigned int l1_flags,
+                            unsigned int op)
+{
+    struct page_info *dst_pg; /* the destination page */
+    char *src_data;
+    char *dst_data; /* Pointer for writing into the page */
+    unsigned long l1t_idx_dst_start;
+    unsigned long l1t_idx_dst_end;
+    unsigned long flags = _PAGE_USER | _PAGE_GLOBAL | _PAGE_PRESENT;
+    unsigned int i;
+
+    /* Calculate where in the destination we need pages */
+    l1t_idx_dst_start = l1_table_offset(dst_start);
+    l1t_idx_dst_end   = l1_table_offset(dst_start + size) +
+        !!( ((dst_start + size) & ((1ul << L1_PAGETABLE_SHIFT) - 1)) %
+             L1_PAGE_RANGE );
+
+    for( i = l1t_idx_dst_start; i < l1t_idx_dst_end; i++ )
+    {
+        ASSERT( size >= 0 );
+
+        /* Is this an empty entry? */
+        if( !(l1e_get_intpte(l1t_base[i])) )
+        {
+            if( op == HVM_DEPRIV_ALIAS )
+            {
+                /*
+                 * To alias a page, put the mfn of the page into our page table
+                 * The source should be page aligned to prevent us mapping in
+                 * more data than we should.
+                 */
+                l1t_base[i] = l1e_from_pfn(virt_to_mfn(src_start),
+                                           flags | l1_flags);
+            }
+            else
+            {
+                /* Create a new 4KB page */
+                if( (dst_pg = hvm_deprivileged_alloc_page(d)) == NULL )
+                    return HVM_ERR_PG_ALLOC;
+
+                /*
+                 * Map in src and dst, perform the copy then add it to the
+                 * L1 table
+                 */
+                dst_data = map_domain_page(_mfn(page_to_mfn(dst_pg)));
+                src_data = map_domain_page(_mfn(virt_to_mfn(src_start)));
+                ASSERT( dst_data != NULL && src_data != NULL );
+
+                memcpy(dst_data, src_data,
+                       (size > PAGESIZE_4KB) ? PAGESIZE_4KB : size);
+
+                unmap_domain_page(src_data);
+                unmap_domain_page(dst_data);
+
+                l1t_base[i] = l1e_from_page(dst_pg, flags | l1_flags);
+            }
+
+            size -= PAGESIZE_4KB;
+            src_start += PAGESIZE_4KB;
+            dst_start += PAGESIZE_4KB;
+        }
+        else
+        {
+            /*
+             * If there is already page information then the page has been
+             * prepared by something else, and we can't overwrite it
+             * as this is the leaf case.
+             */
+            panic("HVM: L1 Region already mapped: %lx\nat(%lx)\n",
+                  l1e_get_intpte(l1t_base[i]), dst_start);
+        }
+    }
+    return 0;
+}
+
+/* Allocates a page form the domain helper */
+struct page_info *hvm_deprivileged_alloc_page(struct domain *d)
+{
+    struct page_info *pg;
+
+    if( (pg = d->arch.paging.alloc_page(d)) == NULL )
+    {
+        printk(XENLOG_ERR "HVM: Out of memory allocating HVM page\n");
+        domain_crash(d);
+        return NULL;
+    }
+
+    return pg;
+}
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index e9c0080..4048929 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -42,6 +42,7 @@
 #include <asm/hvm/nestedhvm.h>
 
 #include "private.h"
+#include <xen/hvm/deprivileged.h>
 
 /* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
@@ -401,6 +402,9 @@ static void hap_install_xen_entries_in_l4(struct vcpu *v, 
mfn_t l4mfn)
            &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
            ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
 
+    /* Initialise the HVM deprivileged mode feature */
+    hvm_deprivileged_init(d, l4e);
+
     /* Install the per-domain mappings for this domain */
     l4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
         l4e_from_pfn(mfn_x(page_to_mfn(d->arch.perdomain_l3_pg)),
@@ -439,6 +443,10 @@ static void hap_destroy_monitor_table(struct vcpu* v, 
mfn_t mmfn)
 
     /* Put the memory back in the pool */
     hap_free(d, mmfn);
+
+    /* Destroy the HVM tables */
+    ASSERT(paging_locked_by_me(d));
+    hvm_deprivileged_destroy(d);
 }
 
 /************************************************/
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 22081a1..deed4fd 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -38,6 +38,7 @@
 #include <asm/mtrr.h>
 #include <asm/guest_pt.h>
 #include <public/sched.h>
+#include <xen/hvm/deprivileged.h>
 #include "private.h"
 #include "types.h"
 
@@ -1429,6 +1430,13 @@ void sh_install_xen_entries_in_l4(struct domain *d, 
mfn_t gl4mfn, mfn_t sl4mfn)
            &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
            slots * sizeof(l4_pgentry_t));
 
+    /*
+     * Initialise the HVM deprivileged mode feature.
+     * The shadow_l4e_t is a typedef for l4_pgentry_t as are all of the
+     * paging structure so this method will work for the shadow table as well.
+     */
+    hvm_deprivileged_init(d, (l4_pgentry_t *)sl4e);
+
     /* Install the per-domain mappings for this domain */
     sl4e[shadow_l4_table_offset(PERDOMAIN_VIRT_START)] =
         shadow_l4e_from_mfn(page_to_mfn(d->arch.perdomain_l3_pg),
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 6553cff..0bfe0cf 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -50,6 +50,25 @@ SECTIONS
        _etext = .;             /* End of text section */
   } :text = 0x9090
 
+  /* HVM deprivileged mode segments
+   * Used to map the ring3 static data and .text
+   */
+
+  . = ALIGN(PAGE_SIZE);
+  .hvm_deprivileged_text : {
+      __hvm_deprivileged_text_start = . ;
+      *(.hvm_deprivileged_enhancement.text)
+      __hvm_deprivileged_text_end = . ;
+  } : text
+
+  . = ALIGN(PAGE_SIZE);
+  .hvm_deprivileged_data : {
+      __hvm_deprivileged_data_start = . ;
+      *(.hvm_deprivileged_enhancement.data)
+      __hvm_deprivileged_data_end = . ;
+  } : text
+
+  . = ALIGN(PAGE_SIZE);
   .rodata : {
        /* Bug frames table */
        . = ALIGN(4);
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index 3e9be83..b5f4e14 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -183,10 +183,12 @@ extern unsigned char boot_edid_info[128];
 #endif
  *  0xffff880000000000 - 0xffffffffffffffff [120TB,             PML4:272-511]
  *    PV: Guest-defined use.
- *  0xffff880000000000 - 0xffffff7fffffffff [119.5TB,           PML4:272-510]
+ *  0xffff880000000000 - 0xffffff0000000000 [119TB,             PML4:272-509]
  *    HVM/idle: continuation of 1:1 mapping
+ *  0xffffff0000000000 - 0xfffff7ffffffffff [512GB, 2^39 bytes  PML4:510]
+ *    HVM: HVM deprivileged mode .text segment
  *  0xffffff8000000000 - 0xffffffffffffffff [512GB, 2^39 bytes  PML4:511]
- *    HVM/idle: unused
+ *    HVM: HVM deprivileged mode data and stack segments
  *
  * Compatibility guest area layout:
  *  0x0000000000000000 - 0x00000000f57fffff [3928MB,            PML4:0]
@@ -201,7 +203,6 @@ extern unsigned char boot_edid_info[128];
  *    Reserved for future use.
  */
 
-
 #define ROOT_PAGETABLE_FIRST_XEN_SLOT 256
 #define ROOT_PAGETABLE_LAST_XEN_SLOT  271
 #define ROOT_PAGETABLE_XEN_SLOTS \
@@ -270,16 +271,30 @@ extern unsigned char boot_edid_info[128];
 #define FRAMETABLE_VIRT_START   (FRAMETABLE_VIRT_END - FRAMETABLE_SIZE)
 
 #ifndef CONFIG_BIGMEM
-/* Slot 262-271/510: A direct 1:1 mapping of all of physical memory. */
+/* Slot 262-271/509: A direct 1:1 mapping of all of physical memory. */
 #define DIRECTMAP_VIRT_START    (PML4_ADDR(262))
-#define DIRECTMAP_SIZE          (PML4_ENTRY_BYTES * (511 - 262))
+#define DIRECTMAP_SIZE          (PML4_ENTRY_BYTES * (510 - 262))
 #else
-/* Slot 265-271/510: A direct 1:1 mapping of all of physical memory. */
+/* Slot 265-271/509: A direct 1:1 mapping of all of physical memory. */
 #define DIRECTMAP_VIRT_START    (PML4_ADDR(265))
-#define DIRECTMAP_SIZE          (PML4_ENTRY_BYTES * (511 - 265))
+#define DIRECTMAP_SIZE          (PML4_ENTRY_BYTES * (510 - 265))
 #endif
 #define DIRECTMAP_VIRT_END      (DIRECTMAP_VIRT_START + DIRECTMAP_SIZE)
 
+/*
+ * Slots 510-511: HVM deprivileged mode
+ * The virtual addresses where the .text, .data and stack should be
+ * placed.
+ * We put the .text section in 510 by itself so that, we can easily create an
+ * alias of it. This is because we use the same mfn for a page entry when
+ * aliasign it and so, if we put the data and text in with the .text at PML4
+ * level then, they would conflict with the other address spaces which is not
+ * correct.
+ */
+#define HVM_DEPRIVILEGED_TEXT_ADDR  (PML4_ADDR(510))
+#define HVM_DEPRIVILEGED_DATA_ADDR  (PML4_ADDR(511) + 0xa000000)
+#define HVM_DEPRIVILEGED_STACK_ADDR (PML4_ADDR(511) + 0xc000000)
+
 #ifndef __ASSEMBLY__
 
 /* This is not a fixed value, just a lower limit. */
diff --git a/xen/include/asm-x86/x86_64/page.h 
b/xen/include/asm-x86/x86_64/page.h
index 19ab4d0..8ecb877 100644
--- a/xen/include/asm-x86/x86_64/page.h
+++ b/xen/include/asm-x86/x86_64/page.h
@@ -22,6 +22,21 @@
 #define __PAGE_OFFSET           DIRECTMAP_VIRT_START
 #define __XEN_VIRT_START        XEN_VIRT_START
 
+/* The sizes of the pages */
+#define PAGESIZE_1GB (1ul << L3_PAGETABLE_SHIFT)
+#define PAGESIZE_2MB (1ul << L2_PAGETABLE_SHIFT)
+#define PAGESIZE_4KB (1ul << L1_PAGETABLE_SHIFT)
+
+/*
+ * The size in bytes that a single L(1,2,3,4} entry covers.
+ * There are 512 (left shift by 9) entries in each page-structure.
+ */
+#define L4_PAGE_RANGE (PAGESIZE_1GB << 9)
+#define L3_PAGE_RANGE (PAGESIZE_2MB << 9)
+#define L2_PAGE_RANGE (PAGESIZE_4KB << 9)
+#define L1_PAGE_RANGE (PAGESIZE_4KB     )
+
+
 /* These are architectural limits. Current CPUs support only 40-bit phys. */
 #define PADDR_BITS              52
 #define VADDR_BITS              48
diff --git a/xen/include/xen/hvm/deprivileged.h 
b/xen/include/xen/hvm/deprivileged.h
new file mode 100644
index 0000000..bcc8c50
--- /dev/null
+++ b/xen/include/xen/hvm/deprivileged.h
@@ -0,0 +1,90 @@
+#ifndef __X86_HVM_DEPRIVILEGED
+
+#define __X86_HVM_DEPRIVILEGED
+
+#include <asm/page.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <xen/domain_page.h>
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <asm/paging.h>
+#include <asm/hap.h>
+#include <asm/paging.h>
+#include <asm-x86/page.h>
+#include <public/domctl.h>
+#include <xen/domain_page.h>
+
+/*
+ * Initialise the HVM deprivileged mode. This just sets up the general
+ * page mappings for .text and .data. It does not prepare each HVM vcpu's data
+ * or stack which needs to be done separately using
+ * hvm_deprivileged_prepare_vcpu.
+ */
+void hvm_deprivileged_init(struct domain *d, l4_pgentry_t *l4t_base);
+
+/*
+ * Free up the data used by the HVM deprivileged enhancements.
+ * This frees general page mappings. It does not destroy the per-vcpu
+ * data so hvm_deprivileged_destroy_vcpu also needs to be called for each vcpu.
+ * This method should be called after those per-vcpu destruction routines.
+ */
+void hvm_deprivileged_destroy(struct domain *d);
+
+/*
+ * Use create a map of the data at the specified virtual address.
+ * When writing to the source, it will walk the page table hierarchy, creating
+ * new levels as needed, and then either copy or alias the data.
+ */
+int hvm_deprivileged_map_l4(struct domain *d,
+                            l4_pgentry_t *l4e_base,
+                            unsigned long src_start,
+                            unsigned long dst_start,
+                            unsigned long size,
+                            unsigned int l1_flags,
+                            unsigned int op);
+
+int hvm_deprivileged_map_l3(struct domain *d,
+                            l3_pgentry_t *l1t_base,
+                            unsigned long src_start,
+                            unsigned long dst_start,
+                            unsigned long size,
+                            unsigned int l1_flags,
+                            unsigned int op);
+
+int hvm_deprivileged_map_l2(struct domain *d,
+                            l2_pgentry_t *l1t_base,
+                            unsigned long src_start,
+                            unsigned long dst_start,
+                            unsigned long size,
+                            unsigned int l1_flags,
+                            unsigned int op);
+/*
+ * The leaf case of the map. Will allocate the pages and actually copy or alias
+ * the data.
+ */
+int hvm_deprivileged_map_l1(struct domain *d,
+                            l1_pgentry_t *l1t_base,
+                            unsigned long src_start,
+                            unsigned long dst_start,
+                            unsigned long size,
+                            unsigned int l1_flags,
+                            unsigned int op);
+
+/* Used to allocate a page for the deprivileged mode */
+struct page_info *hvm_deprivileged_alloc_page(struct domain *d);
+
+/* The segments where the user mode .text and .data are stored */
+extern unsigned long __hvm_deprivileged_text_start[];
+extern unsigned long __hvm_deprivileged_text_end[];
+extern unsigned long __hvm_deprivileged_data_start[];
+extern unsigned long __hvm_deprivileged_data_end[];
+#define HVM_DEPRIV_STACK_SIZE (PAGE_SIZE << 1)
+#define HVM_DEPRIV_STACK_ORDER 1
+#define HVM_DEPRIV_MODE 1
+#define HVM_ERR_PG_ALLOC -1
+#define HVM_DEPRIV_ALIAS 1
+#define HVM_DEPRIV_COPY 0
+
+#endif
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 73d3bc8..66f4f5e 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -462,6 +462,10 @@ struct domain
     /* vNUMA topology accesses are protected by rwlock. */
     rwlock_t vnuma_rwlock;
     struct vnuma_info *vnuma;
+
+    /* HVM deprivileged mode data */
+    int hvm_depriv_valid_l4e_code;
+    l4_pgentry_t hvm_depriv_l4e_code;
 };
 
 struct domain_setup_info
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.