[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 03/11] x86: re-introduce map_domain_page() et al


  • To: "xen-devel" <xen-devel@xxxxxxxxxxxxx>
  • From: "Jan Beulich" <JBeulich@xxxxxxxx>
  • Date: Tue, 22 Jan 2013 10:50:55 +0000
  • Delivery-date: Tue, 22 Jan 2013 10:53:19 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xen.org>

This is being done mostly in the form previously used on x86-32,
utilizing the second L3 page table slot within the per-domain mapping
area for those mappings. It remains to be determined whether that
concept is really suitable, or whether instead re-implementing at least
the non-global variant from scratch would be better.

Also add the helpers {clear,copy}_domain_page() as well as initial uses
of them.

One question is whether, to exercise the non-trivial code paths, we
shouldn't make the trivial shortcuts conditional upon NDEBUG being
defined. See the debugging patch at the end of the series.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -19,6 +19,7 @@ obj-bin-y += dmi_scan.init.o
 obj-y += domctl.o
 obj-y += domain.o
 obj-bin-y += domain_build.init.o
+obj-y += domain_page.o
 obj-y += e820.o
 obj-y += extable.o
 obj-y += flushtlb.o
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -397,10 +397,14 @@ int vcpu_initialise(struct vcpu *v)
             return -ENOMEM;
         clear_page(page_to_virt(pg));
         perdomain_pt_page(d, idx) = pg;
-        d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+idx]
+        d->arch.mm_perdomain_l2[0][l2_table_offset(PERDOMAIN_VIRT_START)+idx]
             = l2e_from_page(pg, __PAGE_HYPERVISOR);
     }
 
+    rc = mapcache_vcpu_init(v);
+    if ( rc )
+        return rc;
+
     paging_vcpu_init(v);
 
     v->arch.perdomain_ptes = perdomain_ptes(d, v);
@@ -526,8 +530,8 @@ int arch_domain_create(struct domain *d,
     pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
     if ( pg == NULL )
         goto fail;
-    d->arch.mm_perdomain_l2 = page_to_virt(pg);
-    clear_page(d->arch.mm_perdomain_l2);
+    d->arch.mm_perdomain_l2[0] = page_to_virt(pg);
+    clear_page(d->arch.mm_perdomain_l2[0]);
 
     pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
     if ( pg == NULL )
@@ -535,8 +539,10 @@ int arch_domain_create(struct domain *d,
     d->arch.mm_perdomain_l3 = page_to_virt(pg);
     clear_page(d->arch.mm_perdomain_l3);
     d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
-        l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
-                            __PAGE_HYPERVISOR);
+        l3e_from_pfn(virt_to_mfn(d->arch.mm_perdomain_l2[0]),
+                     __PAGE_HYPERVISOR);
+
+    mapcache_domain_init(d);
 
     HYPERVISOR_COMPAT_VIRT_START(d) =
         is_hvm_domain(d) ? ~0u : __HYPERVISOR_COMPAT_VIRT_START;
@@ -609,8 +615,9 @@ int arch_domain_create(struct domain *d,
     free_xenheap_page(d->shared_info);
     if ( paging_initialised )
         paging_final_teardown(d);
-    if ( d->arch.mm_perdomain_l2 )
-        free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2));
+    mapcache_domain_exit(d);
+    if ( d->arch.mm_perdomain_l2[0] )
+        free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2[0]));
     if ( d->arch.mm_perdomain_l3 )
         free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3));
     if ( d->arch.mm_perdomain_pt_pages )
@@ -633,13 +640,15 @@ void arch_domain_destroy(struct domain *
 
     paging_final_teardown(d);
 
+    mapcache_domain_exit(d);
+
     for ( i = 0; i < PDPT_L2_ENTRIES; ++i )
     {
         if ( perdomain_pt_page(d, i) )
             free_domheap_page(perdomain_pt_page(d, i));
     }
     free_domheap_page(virt_to_page(d->arch.mm_perdomain_pt_pages));
-    free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2));
+    free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2[0]));
     free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3));
 
     free_xenheap_page(d->shared_info);
--- /dev/null
+++ b/xen/arch/x86/domain_page.c
@@ -0,0 +1,471 @@
+/******************************************************************************
+ * domain_page.h
+ *
+ * Allow temporary mapping of domain pages.
+ *
+ * Copyright (c) 2003-2006, Keir Fraser <keir@xxxxxxxxxxxxx>
+ */
+
+#include <xen/domain_page.h>
+#include <xen/mm.h>
+#include <xen/perfc.h>
+#include <xen/pfn.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#include <asm/flushtlb.h>
+#include <asm/hardirq.h>
+
+static inline struct vcpu *mapcache_current_vcpu(void)
+{
+    /* In the common case we use the mapcache of the running VCPU. */
+    struct vcpu *v = current;
+
+    /*
+     * When current isn't properly set up yet, this is equivalent to
+     * running in an idle vCPU (callers must check for NULL).
+     */
+    if ( v == (struct vcpu *)0xfffff000 )
+        return NULL;
+
+    /*
+     * If guest_table is NULL, and we are running a paravirtualised guest,
+     * then it means we are running on the idle domain's page table and must
+     * therefore use its mapcache.
+     */
+    if ( unlikely(pagetable_is_null(v->arch.guest_table)) && !is_hvm_vcpu(v) )
+    {
+        /* If we really are idling, perform lazy context switch now. */
+        if ( (v = idle_vcpu[smp_processor_id()]) == current )
+            sync_local_execstate();
+        /* We must now be running on the idle page table. */
+        ASSERT(read_cr3() == __pa(idle_pg_table));
+    }
+
+    return v;
+}
+
+#define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER)
+#define MAPCACHE_L2_ENTRIES (mapcache_l2_entry(MAPCACHE_ENTRIES - 1) + 1)
+#define DCACHE_L1ENT(dc, idx) \
+    ((dc)->l1tab[(idx) >> PAGETABLE_ORDER] \
+                [(idx) & ((1 << PAGETABLE_ORDER) - 1)])
+
+void *map_domain_page(unsigned long mfn)
+{
+    unsigned long flags;
+    unsigned int idx, i;
+    struct vcpu *v;
+    struct mapcache_domain *dcache;
+    struct mapcache_vcpu *vcache;
+    struct vcpu_maphash_entry *hashent;
+
+    if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
+        return mfn_to_virt(mfn);
+
+    v = mapcache_current_vcpu();
+    if ( !v || is_hvm_vcpu(v) )
+        return mfn_to_virt(mfn);
+
+    dcache = &v->domain->arch.pv_domain.mapcache;
+    vcache = &v->arch.pv_vcpu.mapcache;
+    if ( !dcache->l1tab )
+        return mfn_to_virt(mfn);
+
+    perfc_incr(map_domain_page_count);
+
+    local_irq_save(flags);
+
+    hashent = &vcache->hash[MAPHASH_HASHFN(mfn)];
+    if ( hashent->mfn == mfn )
+    {
+        idx = hashent->idx;
+        ASSERT(idx < dcache->entries);
+        hashent->refcnt++;
+        ASSERT(hashent->refcnt);
+        ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, idx)) == mfn);
+        goto out;
+    }
+
+    spin_lock(&dcache->lock);
+
+    /* Has some other CPU caused a wrap? We must flush if so. */
+    if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
+    {
+        vcache->shadow_epoch = dcache->epoch;
+        if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) )
+        {
+            perfc_incr(domain_page_tlb_flush);
+            flush_tlb_local();
+        }
+    }
+
+    idx = find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cursor);
+    if ( unlikely(idx >= dcache->entries) )
+    {
+        unsigned long accum = 0;
+
+        /* /First/, clean the garbage map and update the inuse list. */
+        for ( i = 0; i < BITS_TO_LONGS(dcache->entries); i++ )
+        {
+            dcache->inuse[i] &= ~xchg(&dcache->garbage[i], 0);
+            accum |= ~dcache->inuse[i];
+        }
+
+        if ( accum )
+            idx = find_first_zero_bit(dcache->inuse, dcache->entries);
+        else
+        {
+            /* Replace a hash entry instead. */
+            i = MAPHASH_HASHFN(mfn);
+            do {
+                hashent = &vcache->hash[i];
+                if ( hashent->idx != MAPHASHENT_NOTINUSE && !hashent->refcnt )
+                {
+                    idx = hashent->idx;
+                    ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, idx)) ==
+                           hashent->mfn);
+                    l1e_write(&DCACHE_L1ENT(dcache, idx), l1e_empty());
+                    hashent->idx = MAPHASHENT_NOTINUSE;
+                    hashent->mfn = ~0UL;
+                    break;
+                }
+                if ( ++i == MAPHASH_ENTRIES )
+                    i = 0;
+            } while ( i != MAPHASH_HASHFN(mfn) );
+        }
+        BUG_ON(idx >= dcache->entries);
+
+        /* /Second/, flush TLBs. */
+        perfc_incr(domain_page_tlb_flush);
+        flush_tlb_local();
+        vcache->shadow_epoch = ++dcache->epoch;
+        dcache->tlbflush_timestamp = tlbflush_current_time();
+    }
+
+    set_bit(idx, dcache->inuse);
+    dcache->cursor = idx + 1;
+
+    spin_unlock(&dcache->lock);
+
+    l1e_write(&DCACHE_L1ENT(dcache, idx),
+              l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
+
+ out:
+    local_irq_restore(flags);
+    return (void *)MAPCACHE_VIRT_START + pfn_to_paddr(idx);
+}
+
+void unmap_domain_page(const void *ptr)
+{
+    unsigned int idx;
+    struct vcpu *v;
+    struct mapcache_domain *dcache;
+    unsigned long va = (unsigned long)ptr, mfn, flags;
+    struct vcpu_maphash_entry *hashent;
+
+    if ( va >= DIRECTMAP_VIRT_START )
+        return;
+
+    ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
+
+    v = mapcache_current_vcpu();
+    ASSERT(v && !is_hvm_vcpu(v));
+
+    dcache = &v->domain->arch.pv_domain.mapcache;
+    ASSERT(dcache->l1tab);
+
+    idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
+    mfn = l1e_get_pfn(DCACHE_L1ENT(dcache, idx));
+    hashent = &v->arch.pv_vcpu.mapcache.hash[MAPHASH_HASHFN(mfn)];
+
+    local_irq_save(flags);
+
+    if ( hashent->idx == idx )
+    {
+        ASSERT(hashent->mfn == mfn);
+        ASSERT(hashent->refcnt);
+        hashent->refcnt--;
+    }
+    else if ( !hashent->refcnt )
+    {
+        if ( hashent->idx != MAPHASHENT_NOTINUSE )
+        {
+            /* /First/, zap the PTE. */
+            ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, hashent->idx)) ==
+                   hashent->mfn);
+            l1e_write(&DCACHE_L1ENT(dcache, hashent->idx), l1e_empty());
+            /* /Second/, mark as garbage. */
+            set_bit(hashent->idx, dcache->garbage);
+        }
+
+        /* Add newly-freed mapping to the maphash. */
+        hashent->mfn = mfn;
+        hashent->idx = idx;
+    }
+    else
+    {
+        /* /First/, zap the PTE. */
+        l1e_write(&DCACHE_L1ENT(dcache, idx), l1e_empty());
+        /* /Second/, mark as garbage. */
+        set_bit(idx, dcache->garbage);
+    }
+
+    local_irq_restore(flags);
+}
+
+void clear_domain_page(unsigned long mfn)
+{
+    void *ptr = map_domain_page(mfn);
+
+    clear_page(ptr);
+    unmap_domain_page(ptr);
+}
+
+void copy_domain_page(unsigned long dmfn, unsigned long smfn)
+{
+    const void *src = map_domain_page(smfn);
+    void *dst = map_domain_page(dmfn);
+
+    copy_page(dst, src);
+    unmap_domain_page(dst);
+    unmap_domain_page(src);
+}
+
+int mapcache_domain_init(struct domain *d)
+{
+    struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
+    unsigned int i, bitmap_pages, memf = MEMF_node(domain_to_node(d));
+    unsigned long *end;
+
+    if ( is_hvm_domain(d) || is_idle_domain(d) )
+        return 0;
+
+    if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
+        return 0;
+
+    dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + 1);
+    d->arch.mm_perdomain_l2[MAPCACHE_SLOT] = alloc_xenheap_pages(0, memf);
+    if ( !dcache->l1tab || !d->arch.mm_perdomain_l2[MAPCACHE_SLOT] )
+        return -ENOMEM;
+
+    clear_page(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]);
+    d->arch.mm_perdomain_l3[l3_table_offset(MAPCACHE_VIRT_START)] =
+        l3e_from_paddr(__pa(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]),
+                       __PAGE_HYPERVISOR);
+
+    BUILD_BUG_ON(MAPCACHE_VIRT_END + 3 +
+                 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long)) >
+                 MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
+    bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
+    dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
+    dcache->garbage = dcache->inuse +
+                      (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
+    end = dcache->garbage + bitmap_pages * PAGE_SIZE / sizeof(long);
+
+    for ( i = l2_table_offset((unsigned long)dcache->inuse);
+          i <= l2_table_offset((unsigned long)(end - 1)); ++i )
+    {
+        ASSERT(i <= MAPCACHE_L2_ENTRIES);
+        dcache->l1tab[i] = alloc_xenheap_pages(0, memf);
+        if ( !dcache->l1tab[i] )
+            return -ENOMEM;
+        clear_page(dcache->l1tab[i]);
+        d->arch.mm_perdomain_l2[MAPCACHE_SLOT][i] =
+            l2e_from_paddr(__pa(dcache->l1tab[i]), __PAGE_HYPERVISOR);
+    }
+
+    spin_lock_init(&dcache->lock);
+
+    return 0;
+}
+
+void mapcache_domain_exit(struct domain *d)
+{
+    struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
+
+    if ( is_hvm_domain(d) )
+        return;
+
+    if ( dcache->l1tab )
+    {
+        unsigned long i;
+
+        for ( i = (unsigned long)dcache->inuse; ; i += PAGE_SIZE )
+        {
+            l1_pgentry_t *pl1e;
+
+            if ( l2_table_offset(i) > MAPCACHE_L2_ENTRIES ||
+                 !dcache->l1tab[l2_table_offset(i)] )
+                break;
+
+            pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)];
+            if ( l1e_get_flags(*pl1e) )
+                free_domheap_page(l1e_get_page(*pl1e));
+        }
+
+        for ( i = 0; i < MAPCACHE_L2_ENTRIES + 1; ++i )
+            free_xenheap_page(dcache->l1tab[i]);
+
+        xfree(dcache->l1tab);
+    }
+    free_xenheap_page(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]);
+}
+
+int mapcache_vcpu_init(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
+    unsigned long i;
+    unsigned int memf = MEMF_node(vcpu_to_node(v));
+
+    if ( is_hvm_vcpu(v) || !dcache->l1tab )
+        return 0;
+
+    while ( dcache->entries < d->max_vcpus * MAPCACHE_VCPU_ENTRIES )
+    {
+        unsigned int ents = dcache->entries + MAPCACHE_VCPU_ENTRIES;
+        l1_pgentry_t *pl1e;
+
+        /* Populate page tables. */
+        if ( !dcache->l1tab[i = mapcache_l2_entry(ents - 1)] )
+        {
+            dcache->l1tab[i] = alloc_xenheap_pages(0, memf);
+            if ( !dcache->l1tab[i] )
+                return -ENOMEM;
+            clear_page(dcache->l1tab[i]);
+            d->arch.mm_perdomain_l2[MAPCACHE_SLOT][i] =
+                l2e_from_paddr(__pa(dcache->l1tab[i]), __PAGE_HYPERVISOR);
+        }
+
+        /* Populate bit maps. */
+        i = (unsigned long)(dcache->inuse + BITS_TO_LONGS(ents));
+        pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)];
+        if ( !l1e_get_flags(*pl1e) )
+        {
+            struct page_info *pg = alloc_domheap_page(NULL, memf);
+
+            if ( !pg )
+                return -ENOMEM;
+            clear_domain_page(page_to_mfn(pg));
+            *pl1e = l1e_from_page(pg, __PAGE_HYPERVISOR);
+
+            i = (unsigned long)(dcache->garbage + BITS_TO_LONGS(ents));
+            pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)];
+            ASSERT(!l1e_get_flags(*pl1e));
+
+            pg = alloc_domheap_page(NULL, memf);
+            if ( !pg )
+                return -ENOMEM;
+            clear_domain_page(page_to_mfn(pg));
+            *pl1e = l1e_from_page(pg, __PAGE_HYPERVISOR);
+        }
+
+        dcache->entries = ents;
+    }
+
+    /* Mark all maphash entries as not in use. */
+    BUILD_BUG_ON(MAPHASHENT_NOTINUSE < MAPCACHE_ENTRIES);
+    for ( i = 0; i < MAPHASH_ENTRIES; i++ )
+    {
+        struct vcpu_maphash_entry *hashent = &v->arch.pv_vcpu.mapcache.hash[i];
+
+        hashent->mfn = ~0UL; /* never valid to map */
+        hashent->idx = MAPHASHENT_NOTINUSE;
+    }
+
+    return 0;
+}
+
+#define GLOBALMAP_BITS (GLOBALMAP_GBYTES << (30 - PAGE_SHIFT))
+static unsigned long inuse[BITS_TO_LONGS(GLOBALMAP_BITS)];
+static unsigned long garbage[BITS_TO_LONGS(GLOBALMAP_BITS)];
+static unsigned int inuse_cursor;
+static DEFINE_SPINLOCK(globalmap_lock);
+
+void *map_domain_page_global(unsigned long mfn)
+{
+    l1_pgentry_t *pl1e;
+    unsigned int idx, i;
+    unsigned long va;
+
+    ASSERT(!in_irq() && local_irq_is_enabled());
+
+    if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
+        return mfn_to_virt(mfn);
+
+    spin_lock(&globalmap_lock);
+
+    idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor);
+    va = GLOBALMAP_VIRT_START + pfn_to_paddr(idx);
+    if ( unlikely(va >= GLOBALMAP_VIRT_END) )
+    {
+        /* /First/, clean the garbage map and update the inuse list. */
+        for ( i = 0; i < ARRAY_SIZE(garbage); i++ )
+            inuse[i] &= ~xchg(&garbage[i], 0);
+
+        /* /Second/, flush all TLBs to get rid of stale garbage mappings. */
+        flush_tlb_all();
+
+        idx = find_first_zero_bit(inuse, GLOBALMAP_BITS);
+        va = GLOBALMAP_VIRT_START + pfn_to_paddr(idx);
+        if ( unlikely(va >= GLOBALMAP_VIRT_END) )
+        {
+            spin_unlock(&globalmap_lock);
+            return NULL;
+        }
+    }
+
+    set_bit(idx, inuse);
+    inuse_cursor = idx + 1;
+
+    spin_unlock(&globalmap_lock);
+
+    pl1e = virt_to_xen_l1e(va);
+    if ( !pl1e )
+        return NULL;
+    l1e_write(pl1e, l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
+
+    return (void *)va;
+}
+
+void unmap_domain_page_global(const void *ptr)
+{
+    unsigned long va = (unsigned long)ptr;
+    l1_pgentry_t *pl1e;
+
+    if ( va >= DIRECTMAP_VIRT_START )
+        return;
+
+    ASSERT(va >= GLOBALMAP_VIRT_START && va < GLOBALMAP_VIRT_END);
+
+    /* /First/, we zap the PTE. */
+    pl1e = virt_to_xen_l1e(va);
+    BUG_ON(!pl1e);
+    l1e_write(pl1e, l1e_empty());
+
+    /* /Second/, we add to the garbage map. */
+    set_bit(PFN_DOWN(va - GLOBALMAP_VIRT_START), garbage);
+}
+
+/* Translate a map-domain-page'd address to the underlying MFN */
+unsigned long domain_page_map_to_mfn(const void *ptr)
+{
+    unsigned long va = (unsigned long)ptr;
+    const l1_pgentry_t *pl1e;
+
+    if ( va >= DIRECTMAP_VIRT_START )
+        return virt_to_mfn(ptr);
+
+    if ( va >= GLOBALMAP_VIRT_START && va < GLOBALMAP_VIRT_END )
+    {
+        pl1e = virt_to_xen_l1e(va);
+        BUG_ON(!pl1e);
+    }
+    else
+    {
+        ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
+        pl1e = &__linear_l1_table[l1_linear_offset(va)];
+    }
+
+    return l1e_get_pfn(*pl1e);
+}
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2661,9 +2661,6 @@ static inline int vcpumask_to_pcpumask(
     }
 }
 
-#define fixmap_domain_page(mfn) mfn_to_virt(mfn)
-#define fixunmap_domain_page(ptr) ((void)(ptr))
-
 long do_mmuext_op(
     XEN_GUEST_HANDLE_PARAM(mmuext_op_t) uops,
     unsigned int count,
@@ -2983,7 +2980,6 @@ long do_mmuext_op(
 
         case MMUEXT_CLEAR_PAGE: {
             struct page_info *page;
-            unsigned char *ptr;
 
             page = get_page_from_gfn(d, op.arg1.mfn, NULL, P2M_ALLOC);
             if ( !page || !get_page_type(page, PGT_writable_page) )
@@ -2998,9 +2994,7 @@ long do_mmuext_op(
             /* A page is dirtied when it's being cleared. */
             paging_mark_dirty(d, page_to_mfn(page));
 
-            ptr = fixmap_domain_page(page_to_mfn(page));
-            clear_page(ptr);
-            fixunmap_domain_page(ptr);
+            clear_domain_page(page_to_mfn(page));
 
             put_page_and_type(page);
             break;
@@ -3008,8 +3002,6 @@ long do_mmuext_op(
 
         case MMUEXT_COPY_PAGE:
         {
-            const unsigned char *src;
-            unsigned char *dst;
             struct page_info *src_page, *dst_page;
 
             src_page = get_page_from_gfn(d, op.arg2.src_mfn, NULL, P2M_ALLOC);
@@ -3034,11 +3026,7 @@ long do_mmuext_op(
             /* A page is dirtied when it's being copied to. */
             paging_mark_dirty(d, page_to_mfn(dst_page));
 
-            src = __map_domain_page(src_page);
-            dst = fixmap_domain_page(page_to_mfn(dst_page));
-            copy_page(dst, src);
-            fixunmap_domain_page(dst);
-            unmap_domain_page(src);
+            copy_domain_page(page_to_mfn(dst_page), page_to_mfn(src_page));
 
             put_page_and_type(dst_page);
             put_page(src_page);
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -27,6 +27,7 @@
 #define CONFIG_DISCONTIGMEM 1
 #define CONFIG_NUMA_EMU 1
 #define CONFIG_PAGEALLOC_MAX_ORDER (2 * PAGETABLE_ORDER)
+#define CONFIG_DOMAIN_PAGE 1
 
 /* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */
 #define CONFIG_X86_L1_CACHE_SHIFT 7
@@ -147,12 +148,14 @@ extern unsigned char boot_edid_info[128]
  *  0xffff82c000000000 - 0xffff82c3ffffffff [16GB,  2^34 bytes, PML4:261]
  *    vmap()/ioremap()/fixmap area.
  *  0xffff82c400000000 - 0xffff82c43fffffff [1GB,   2^30 bytes, PML4:261]
- *    Compatibility machine-to-phys translation table.
+ *    Global domain page map area.
  *  0xffff82c440000000 - 0xffff82c47fffffff [1GB,   2^30 bytes, PML4:261]
- *    High read-only compatibility machine-to-phys translation table.
+ *    Compatibility machine-to-phys translation table.
  *  0xffff82c480000000 - 0xffff82c4bfffffff [1GB,   2^30 bytes, PML4:261]
+ *    High read-only compatibility machine-to-phys translation table.
+ *  0xffff82c4c0000000 - 0xffff82c4ffffffff [1GB,   2^30 bytes, PML4:261]
  *    Xen text, static data, bss.
- *  0xffff82c4c0000000 - 0xffff82dffbffffff [109GB - 64MB,      PML4:261]
+ *  0xffff82c500000000 - 0xffff82dffbffffff [108GB - 64MB,      PML4:261]
  *    Reserved for future use.
  *  0xffff82dffc000000 - 0xffff82dfffffffff [64MB,  2^26 bytes, PML4:261]
  *    Super-page information array.
@@ -201,18 +204,24 @@ extern unsigned char boot_edid_info[128]
 /* Slot 259: linear page table (shadow table). */
 #define SH_LINEAR_PT_VIRT_START (PML4_ADDR(259))
 #define SH_LINEAR_PT_VIRT_END   (SH_LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
-/* Slot 260: per-domain mappings. */
+/* Slot 260: per-domain mappings (including map cache). */
 #define PERDOMAIN_VIRT_START    (PML4_ADDR(260))
-#define PERDOMAIN_VIRT_END      (PERDOMAIN_VIRT_START + (PERDOMAIN_MBYTES<<20))
-#define PERDOMAIN_MBYTES        (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
+#define PERDOMAIN_SLOT_MBYTES   (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
+#define PERDOMAIN_SLOTS         2
+#define PERDOMAIN_VIRT_SLOT(s)  (PERDOMAIN_VIRT_START + (s) * \
+                                 (PERDOMAIN_SLOT_MBYTES << 20))
 /* Slot 261: machine-to-phys conversion table (256GB). */
 #define RDWR_MPT_VIRT_START     (PML4_ADDR(261))
 #define RDWR_MPT_VIRT_END       (RDWR_MPT_VIRT_START + MPT_VIRT_SIZE)
 /* Slot 261: vmap()/ioremap()/fixmap area (16GB). */
 #define VMAP_VIRT_START         RDWR_MPT_VIRT_END
 #define VMAP_VIRT_END           (VMAP_VIRT_START + GB(16))
+/* Slot 261: global domain page map area (1GB). */
+#define GLOBALMAP_GBYTES        1
+#define GLOBALMAP_VIRT_START    VMAP_VIRT_END
+#define GLOBALMAP_VIRT_END      (GLOBALMAP_VIRT_START + (GLOBALMAP_GBYTES<<30))
 /* Slot 261: compatibility machine-to-phys conversion table (1GB). */
-#define RDWR_COMPAT_MPT_VIRT_START VMAP_VIRT_END
+#define RDWR_COMPAT_MPT_VIRT_START GLOBALMAP_VIRT_END
 #define RDWR_COMPAT_MPT_VIRT_END (RDWR_COMPAT_MPT_VIRT_START + GB(1))
 /* Slot 261: high read-only compat machine-to-phys conversion table (1GB). */
 #define HIRO_COMPAT_MPT_VIRT_START RDWR_COMPAT_MPT_VIRT_END
@@ -279,9 +288,9 @@ extern unsigned long xen_phys_start;
 /* GDT/LDT shadow mapping area. The first per-domain-mapping sub-area. */
 #define GDT_LDT_VCPU_SHIFT       5
 #define GDT_LDT_VCPU_VA_SHIFT    (GDT_LDT_VCPU_SHIFT + PAGE_SHIFT)
-#define GDT_LDT_MBYTES           PERDOMAIN_MBYTES
+#define GDT_LDT_MBYTES           PERDOMAIN_SLOT_MBYTES
 #define MAX_VIRT_CPUS            (GDT_LDT_MBYTES << (20-GDT_LDT_VCPU_VA_SHIFT))
-#define GDT_LDT_VIRT_START       PERDOMAIN_VIRT_START
+#define GDT_LDT_VIRT_START       PERDOMAIN_VIRT_SLOT(0)
 #define GDT_LDT_VIRT_END         (GDT_LDT_VIRT_START + (GDT_LDT_MBYTES << 20))
 
 /* The address of a particular VCPU's GDT or LDT. */
@@ -290,8 +299,16 @@ extern unsigned long xen_phys_start;
 #define LDT_VIRT_START(v)    \
     (GDT_VIRT_START(v) + (64*1024))
 
+/* map_domain_page() map cache. The last per-domain-mapping sub-area. */
+#define MAPCACHE_VCPU_ENTRIES    (CONFIG_PAGING_LEVELS * CONFIG_PAGING_LEVELS)
+#define MAPCACHE_ENTRIES         (MAX_VIRT_CPUS * MAPCACHE_VCPU_ENTRIES)
+#define MAPCACHE_SLOT            (PERDOMAIN_SLOTS - 1)
+#define MAPCACHE_VIRT_START      PERDOMAIN_VIRT_SLOT(MAPCACHE_SLOT)
+#define MAPCACHE_VIRT_END        (MAPCACHE_VIRT_START + \
+                                  MAPCACHE_ENTRIES * PAGE_SIZE)
+
 #define PDPT_L1_ENTRIES       \
-    ((PERDOMAIN_VIRT_END - PERDOMAIN_VIRT_START) >> PAGE_SHIFT)
+    ((PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS - 1) - PERDOMAIN_VIRT_START) >> 
PAGE_SHIFT)
 #define PDPT_L2_ENTRIES       \
     ((PDPT_L1_ENTRIES + (1 << PAGETABLE_ORDER) - 1) >> PAGETABLE_ORDER)
 
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -39,7 +39,7 @@ struct trap_bounce {
 
 #define MAPHASH_ENTRIES 8
 #define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1))
-#define MAPHASHENT_NOTINUSE ((u16)~0U)
+#define MAPHASHENT_NOTINUSE ((u32)~0U)
 struct mapcache_vcpu {
     /* Shadow of mapcache_domain.epoch. */
     unsigned int shadow_epoch;
@@ -47,16 +47,15 @@ struct mapcache_vcpu {
     /* Lock-free per-VCPU hash of recently-used mappings. */
     struct vcpu_maphash_entry {
         unsigned long mfn;
-        uint16_t      idx;
-        uint16_t      refcnt;
+        uint32_t      idx;
+        uint32_t      refcnt;
     } hash[MAPHASH_ENTRIES];
 };
 
-#define MAPCACHE_ORDER   10
-#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
 struct mapcache_domain {
     /* The PTEs that provide the mappings, and a cursor into the array. */
-    l1_pgentry_t *l1tab;
+    l1_pgentry_t **l1tab;
+    unsigned int entries;
     unsigned int cursor;
 
     /* Protects map_domain_page(). */
@@ -67,12 +66,13 @@ struct mapcache_domain {
     u32 tlbflush_timestamp;
 
     /* Which mappings are in use, and which are garbage to reap next epoch? */
-    unsigned long inuse[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
-    unsigned long garbage[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
+    unsigned long *inuse;
+    unsigned long *garbage;
 };
 
-void mapcache_domain_init(struct domain *);
-void mapcache_vcpu_init(struct vcpu *);
+int mapcache_domain_init(struct domain *);
+void mapcache_domain_exit(struct domain *);
+int mapcache_vcpu_init(struct vcpu *);
 
 /* x86/64: toggle guest between kernel and user modes. */
 void toggle_guest_mode(struct vcpu *);
@@ -229,6 +229,9 @@ struct pv_domain
      * unmask the event channel */
     bool_t auto_unmask;
 
+    /* map_domain_page() mapping cache. */
+    struct mapcache_domain mapcache;
+
     /* Pseudophysical e820 map (XENMEM_memory_map).  */
     spinlock_t e820_lock;
     struct e820entry *e820;
@@ -238,7 +241,7 @@ struct pv_domain
 struct arch_domain
 {
     struct page_info **mm_perdomain_pt_pages;
-    l2_pgentry_t *mm_perdomain_l2;
+    l2_pgentry_t *mm_perdomain_l2[PERDOMAIN_SLOTS];
     l3_pgentry_t *mm_perdomain_l3;
 
     unsigned int hv_compat_vstart;
@@ -324,6 +327,9 @@ struct arch_domain
 
 struct pv_vcpu
 {
+    /* map_domain_page() mapping cache. */
+    struct mapcache_vcpu mapcache;
+
     struct trap_info *trap_ctxt;
 
     unsigned long gdt_frames[FIRST_RESERVED_GDT_PAGE];
--- a/xen/include/xen/domain_page.h
+++ b/xen/include/xen/domain_page.h
@@ -25,11 +25,16 @@ void *map_domain_page(unsigned long mfn)
  */
 void unmap_domain_page(const void *va);
 
+/*
+ * Clear a given page frame, or copy between two of them.
+ */
+void clear_domain_page(unsigned long mfn);
+void copy_domain_page(unsigned long dmfn, unsigned long smfn);
 
 /* 
  * Given a VA from map_domain_page(), return its underlying MFN.
  */
-unsigned long domain_page_map_to_mfn(void *va);
+unsigned long domain_page_map_to_mfn(const void *va);
 
 /*
  * Similar to the above calls, except the mapping is accessible in all
@@ -107,6 +112,9 @@ domain_mmap_cache_destroy(struct domain_
 #define map_domain_page(mfn)                mfn_to_virt(mfn)
 #define __map_domain_page(pg)               page_to_virt(pg)
 #define unmap_domain_page(va)               ((void)(va))
+#define clear_domain_page(mfn)              clear_page(mfn_to_virt(mfn))
+#define copy_domain_page(dmfn, smfn)        copy_page(mfn_to_virt(dmfn), \
+                                                      mfn_to_virt(smfn))
 #define domain_page_map_to_mfn(va)          virt_to_mfn((unsigned long)(va))
 
 #define map_domain_page_global(mfn)         mfn_to_virt(mfn)


Attachment: x86-map-domain-page.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.