[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen master] xen: arm: use superpages in p2m when pages are suitably aligned



commit 5df01f6e2653ba38b3c55ef137dfc83fb99fad70
Author:     Ian Campbell <ian.campbell@xxxxxxxxxx>
AuthorDate: Wed Jul 9 13:07:45 2014 +0100
Commit:     Ian Campbell <ian.campbell@xxxxxxxxxx>
CommitDate: Thu Jul 10 15:36:24 2014 +0100

    xen: arm: use superpages in p2m when pages are suitably aligned
    
    This creates superpage (1G and 2M) mappings in the p2m for both domU and 
dom0
    when the guest and machine addresses are suitably aligned. This relies on 
the
    domain builder to allocate suitably sized regions, this is trivially true 
for
    1:1 mapped dom0's and was arranged for guests in a previous patch. A non-1:1
    dom0's memory is not currently deliberately aligned.
    
    Since ARM pagetables are (mostly) consistent at each level this is 
implemented
    by refactoring the handling of a single level of pagetable into a common
    function. The two inconsistencies are that there are no superpage mappings 
at
    level zero and that level three entry mappings must set the table bit.
    
    When inserting new mappings the code shatters superpage mappings as 
necessary,
    but currently makes no attempt to coalesce anything again. In particular 
when
    inserting a mapping which could be a superpage over an existing table 
mapping
    we do not attempt to free that lower page table and instead descend into it.
    
    Some p2m statistics are added to keep track of the number of each level of
    mapping and the number of times we've had to shatter an existing mapping.
    
    Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
    Acked-by: Julien Grall <julien.grall@xxxxxxxxxx>
---
 xen/arch/arm/domain.c     |    1 +
 xen/arch/arm/p2m.c        |  498 ++++++++++++++++++++++++++++++++++-----------
 xen/include/asm-arm/p2m.h |   12 +
 3 files changed, 390 insertions(+), 121 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 0c16f23..87902ef 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -761,6 +761,7 @@ int domain_relinquish_resources(struct domain *d)
 
 void arch_dump_domain_info(struct domain *d)
 {
+    p2m_dump_info(d);
 }
 
 
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index cb22e9c..c0c011a 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -1,6 +1,7 @@
 #include <xen/config.h>
 #include <xen/sched.h>
 #include <xen/lib.h>
+#include <xen/stdbool.h>
 #include <xen/errno.h>
 #include <xen/domain_page.h>
 #include <xen/bitops.h>
@@ -25,12 +26,26 @@ static bool_t p2m_table(lpae_t pte)
 {
     return p2m_valid(pte) && pte.p2m.table;
 }
-#if 0
 static bool_t p2m_mapping(lpae_t pte)
 {
     return p2m_valid(pte) && !pte.p2m.table;
 }
-#endif
+
+void p2m_dump_info(struct domain *d)
+{
+    struct p2m_domain *p2m = &d->arch.p2m;
+
+    spin_lock(&p2m->lock);
+    printk("p2m mappings for domain %d (vmid %d):\n",
+           d->domain_id, p2m->vmid);
+    BUG_ON(p2m->stats.mappings[0] || p2m->stats.shattered[0]);
+    printk("  1G mappings: %ld (shattered %ld)\n",
+           p2m->stats.mappings[1], p2m->stats.shattered[1]);
+    printk("  2M mappings: %ld (shattered %ld)\n",
+           p2m->stats.mappings[2], p2m->stats.shattered[2]);
+    printk("  4K mappings: %ld\n", p2m->stats.mappings[3]);
+    spin_unlock(&p2m->lock);
+}
 
 void dump_p2m_lookup(struct domain *d, paddr_t addr)
 {
@@ -287,15 +302,26 @@ static inline void p2m_write_pte(lpae_t *p, lpae_t pte, 
bool_t flush_cache)
         clean_xen_dcache(*p);
 }
 
-/* Allocate a new page table page and hook it in via the given entry */
-static int p2m_create_table(struct domain *d, lpae_t *entry, bool_t 
flush_cache)
+/*
+ * Allocate a new page table page and hook it in via the given entry.
+ * apply_one_level relies on this returning 0 on success
+ * and -ve on failure.
+ *
+ * If the existing entry is present then it must be a mapping and not
+ * a table and it will be shattered into the next level down.
+ *
+ * level_shift is the number of bits at the level we want to create.
+ */
+static int p2m_create_table(struct domain *d, lpae_t *entry,
+                            int level_shift, bool_t flush_cache)
 {
     struct p2m_domain *p2m = &d->arch.p2m;
     struct page_info *page;
-    void *p;
+    lpae_t *p;
     lpae_t pte;
+    int splitting = p2m_valid(*entry);
 
-    BUG_ON(entry->p2m.valid);
+    BUG_ON(p2m_table(*entry));
 
     page = alloc_domheap_page(NULL, 0);
     if ( page == NULL )
@@ -304,9 +330,37 @@ static int p2m_create_table(struct domain *d, lpae_t 
*entry, bool_t flush_cache)
     page_list_add(page, &p2m->pages);
 
     p = __map_domain_page(page);
-    clear_page(p);
+    if ( splitting )
+    {
+        p2m_type_t t = entry->p2m.type;
+        unsigned long base_pfn = entry->p2m.base;
+        int i;
+
+        /*
+         * We are either splitting a first level 1G page into 512 second level
+         * 2M pages, or a second level 2M page into 512 third level 4K pages.
+         */
+         for ( i=0 ; i < LPAE_ENTRIES; i++ )
+         {
+             pte = mfn_to_p2m_entry(base_pfn + (i<<(level_shift-LPAE_SHIFT)),
+                                    MATTR_MEM, t);
+
+             /*
+              * First and second level super pages set p2m.table = 0, but
+              * third level entries set table = 1.
+              */
+             if ( level_shift - LPAE_SHIFT )
+                 pte.p2m.table = 0;
+
+             write_pte(&p[i], pte);
+         }
+    }
+    else
+        clear_page(p);
+
     if ( flush_cache )
         clean_xen_dcache_va_range(p, PAGE_SIZE);
+
     unmap_domain_page(p);
 
     pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid);
@@ -324,8 +378,14 @@ enum p2m_operation {
     CACHEFLUSH,
 };
 
-static void p2m_put_page(const lpae_t pte)
+/* Put any references on the single 4K page referenced by pte.  TODO:
+ * Handle superpages, for now we only take special references for leaf
+ * pages (specifically foreign ones, which can't be super mapped today).
+ */
+static void p2m_put_l3_page(const lpae_t pte)
 {
+    ASSERT(p2m_valid(pte));
+
     /* TODO: Handle other p2m types
      *
      * It's safe to do the put_page here because page_alloc will
@@ -341,6 +401,265 @@ static void p2m_put_page(const lpae_t pte)
     }
 }
 
+/*
+ * Returns true if start_gpaddr..end_gpaddr contains at least one
+ * suitably aligned level_size mappping of maddr.
+ *
+ * So long as the range is large enough the end_gpaddr need not be
+ * aligned (callers should create one superpage mapping based on this
+ * result and then call this again on the new range, eventually the
+ * slop at the end will cause this function to return false).
+ */
+static bool_t is_mapping_aligned(const paddr_t start_gpaddr,
+                                 const paddr_t end_gpaddr,
+                                 const paddr_t maddr,
+                                 const paddr_t level_size)
+{
+    const paddr_t level_mask = level_size - 1;
+
+    /* No hardware superpages at level 0 */
+    if ( level_size == ZEROETH_SIZE )
+        return false;
+
+    /*
+     * A range smaller than the size of a superpage at this level
+     * cannot be superpage aligned.
+     */
+    if ( ( end_gpaddr - start_gpaddr ) < level_size - 1 )
+        return false;
+
+    /* Both the gpaddr and maddr must be aligned */
+    if ( start_gpaddr & level_mask )
+        return false;
+    if ( maddr & level_mask )
+        return false;
+    return true;
+}
+
+#define P2M_ONE_DESCEND        0
+#define P2M_ONE_PROGRESS_NOP   0x1
+#define P2M_ONE_PROGRESS       0x10
+
+/*
+ * 0   == (P2M_ONE_DESCEND) continue to descend the tree
+ * +ve == (P2M_ONE_PROGRESS_*) handled at this level, continue, flush,
+ *        entry, addr and maddr updated.  Return value is an
+ *        indication of the amount of work done (for preemption).
+ * -ve == (-Exxx) error.
+ */
+static int apply_one_level(struct domain *d,
+                           lpae_t *entry,
+                           unsigned int level,
+                           bool_t flush_cache,
+                           enum p2m_operation op,
+                           paddr_t start_gpaddr,
+                           paddr_t end_gpaddr,
+                           paddr_t *addr,
+                           paddr_t *maddr,
+                           bool_t *flush,
+                           int mattr,
+                           p2m_type_t t)
+{
+    /* Helpers to lookup the properties of each level */
+    const paddr_t level_sizes[] =
+        { ZEROETH_SIZE, FIRST_SIZE, SECOND_SIZE, THIRD_SIZE };
+    const paddr_t level_masks[] =
+        { ZEROETH_MASK, FIRST_MASK, SECOND_MASK, THIRD_MASK };
+    const paddr_t level_shifts[] =
+        { ZEROETH_SHIFT, FIRST_SHIFT, SECOND_SHIFT, THIRD_SHIFT };
+    const paddr_t level_size = level_sizes[level];
+    const paddr_t level_mask = level_masks[level];
+    const paddr_t level_shift = level_shifts[level];
+
+    struct p2m_domain *p2m = &d->arch.p2m;
+    lpae_t pte;
+    const lpae_t orig_pte = *entry;
+    int rc;
+
+    BUG_ON(level > 3);
+
+    switch ( op )
+    {
+    case ALLOCATE:
+        ASSERT(level < 3 || !p2m_valid(orig_pte));
+        ASSERT(*maddr == 0);
+
+        if ( p2m_valid(orig_pte) )
+            return P2M_ONE_DESCEND;
+
+        if ( is_mapping_aligned(*addr, end_gpaddr, 0, level_size) )
+        {
+            struct page_info *page;
+
+            page = alloc_domheap_pages(d, level_shift - PAGE_SHIFT, 0);
+            if ( page )
+            {
+                pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t);
+                if ( level < 3 )
+                    pte.p2m.table = 0;
+                p2m_write_pte(entry, pte, flush_cache);
+                p2m->stats.mappings[level]++;
+
+                *addr += level_size;
+                *maddr += level_size;
+
+                return P2M_ONE_PROGRESS;
+            }
+            else if ( level == 3 )
+                return -ENOMEM;
+        }
+
+        /* L3 is always suitably aligned for mapping (handled, above) */
+        BUG_ON(level == 3);
+
+        /*
+         * If we get here then we failed to allocate a sufficiently
+         * large contiguous region for this level (which can't be
+         * L3). Create a page table and continue to descend so we try
+         * smaller allocations.
+         */
+        rc = p2m_create_table(d, entry, 0, flush_cache);
+        if ( rc < 0 )
+            return rc;
+
+        return P2M_ONE_DESCEND;
+
+    case INSERT:
+        if ( is_mapping_aligned(*addr, end_gpaddr, *maddr, level_size) &&
+           /* We do not handle replacing an existing table with a superpage */
+             (level == 3 || !p2m_table(orig_pte)) )
+        {
+            /* New mapping is superpage aligned, make it */
+            pte = mfn_to_p2m_entry(*maddr >> PAGE_SHIFT, mattr, t);
+            if ( level < 3 )
+                pte.p2m.table = 0; /* Superpage entry */
+
+            p2m_write_pte(entry, pte, flush_cache);
+
+            *flush |= p2m_valid(orig_pte);
+
+            *addr += level_size;
+            *maddr += level_size;
+
+            if ( p2m_valid(orig_pte) )
+            {
+                /*
+                 * We can't currently get here for an existing table
+                 * mapping, since we don't handle replacing an
+                 * existing table with a superpage. If we did we would
+                 * need to handle freeing (and accounting) for the bit
+                 * of the p2m tree which we would be about to lop off.
+                 */
+                BUG_ON(level < 3 && p2m_table(orig_pte));
+                if ( level == 3 )
+                    p2m_put_l3_page(orig_pte);
+            }
+            else /* New mapping */
+                p2m->stats.mappings[level]++;
+
+            return P2M_ONE_PROGRESS;
+        }
+        else
+        {
+            /* New mapping is not superpage aligned, create a new table entry 
*/
+
+            /* L3 is always suitably aligned for mapping (handled, above) */
+            BUG_ON(level == 3);
+
+            /* Not present -> create table entry and descend */
+            if ( !p2m_valid(orig_pte) )
+            {
+                rc = p2m_create_table(d, entry, 0, flush_cache);
+                if ( rc < 0 )
+                    return rc;
+                return P2M_ONE_DESCEND;
+            }
+
+            /* Existing superpage mapping -> shatter and descend */
+            if ( p2m_mapping(orig_pte) )
+            {
+                *flush = true;
+                rc = p2m_create_table(d, entry,
+                                      level_shift - PAGE_SHIFT, flush_cache);
+                if ( rc < 0 )
+                    return rc;
+
+                p2m->stats.shattered[level]++;
+                p2m->stats.mappings[level]--;
+                p2m->stats.mappings[level+1] += LPAE_ENTRIES;
+            } /* else: an existing table mapping -> descend */
+
+            BUG_ON(!p2m_table(*entry));
+
+            return P2M_ONE_DESCEND;
+        }
+
+        break;
+
+    case RELINQUISH:
+    case REMOVE:
+        if ( !p2m_valid(orig_pte) )
+        {
+            /* Progress up to next boundary */
+            *addr = (*addr + level_size) & level_mask;
+            return P2M_ONE_PROGRESS_NOP;
+        }
+
+        if ( level < 3 && p2m_table(orig_pte) )
+            return P2M_ONE_DESCEND;
+
+        *flush = true;
+
+        memset(&pte, 0x00, sizeof(pte));
+        p2m_write_pte(entry, pte, flush_cache);
+
+        *addr += level_size;
+
+        p2m->stats.mappings[level]--;
+
+        if ( level == 3 )
+            p2m_put_l3_page(orig_pte);
+
+        /*
+         * This is still a single pte write, no matter the level, so no need to
+         * scale.
+         */
+        return P2M_ONE_PROGRESS;
+
+    case CACHEFLUSH:
+        if ( !p2m_valid(orig_pte) )
+        {
+            *addr = (*addr + level_size) & level_mask;
+            return P2M_ONE_PROGRESS_NOP;
+        }
+
+        if ( level < 3 && p2m_table(orig_pte) )
+            return P2M_ONE_DESCEND;
+
+        /*
+         * could flush up to the next superpage boundary, but would
+         * need to be careful about preemption, so just do one 4K page
+         * now and return P2M_ONE_PROGRESS{,_NOP} so that the caller will
+         * continue to loop over the rest of the range.
+         */
+        if ( p2m_is_ram(orig_pte.p2m.type) )
+        {
+            unsigned long offset = paddr_to_pfn(*addr & ~level_mask);
+            flush_page_to_ram(orig_pte.p2m.base + offset);
+
+            *addr += PAGE_SIZE;
+            return P2M_ONE_PROGRESS;
+        }
+        else
+        {
+            *addr += PAGE_SIZE;
+            return P2M_ONE_PROGRESS_NOP;
+        }
+    }
+
+    BUG(); /* Should never get here */
+}
+
 static int apply_p2m_changes(struct domain *d,
                      enum p2m_operation op,
                      paddr_t start_gpaddr,
@@ -349,7 +668,7 @@ static int apply_p2m_changes(struct domain *d,
                      int mattr,
                      p2m_type_t t)
 {
-    int rc;
+    int rc, ret;
     struct p2m_domain *p2m = &d->arch.p2m;
     lpae_t *first = NULL, *second = NULL, *third = NULL;
     paddr_t addr;
@@ -357,9 +676,7 @@ static int apply_p2m_changes(struct domain *d,
                   cur_first_offset = ~0,
                   cur_second_offset = ~0;
     unsigned long count = 0;
-    unsigned int flush = 0;
-    bool_t populate = (op == INSERT || op == ALLOCATE);
-    lpae_t pte;
+    bool_t flush = false;
     bool_t flush_pt;
 
     /* Some IOMMU don't support coherent PT walk. When the p2m is
@@ -373,6 +690,25 @@ static int apply_p2m_changes(struct domain *d,
     addr = start_gpaddr;
     while ( addr < end_gpaddr )
     {
+        /*
+         * Arbitrarily, preempt every 512 operations or 8192 nops.
+         * 512*P2M_ONE_PROGRESS == 8192*P2M_ONE_PROGRESS_NOP == 0x2000
+         *
+         * count is initialised to 0 above, so we are guaranteed to
+         * always make at least one pass.
+         */
+
+        if ( op == RELINQUISH && count >= 0x2000 )
+        {
+            if ( hypercall_preempt_check() )
+            {
+                p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT;
+                rc = -ERESTART;
+                goto out;
+            }
+            count = 0;
+        }
+
         if ( cur_first_page != p2m_first_level_index(addr) )
         {
             if ( first ) unmap_domain_page(first);
@@ -385,22 +721,18 @@ static int apply_p2m_changes(struct domain *d,
             cur_first_page = p2m_first_level_index(addr);
         }
 
-        if ( !p2m_valid(first[first_table_offset(addr)]) )
-        {
-            if ( !populate )
-            {
-                addr = (addr + FIRST_SIZE) & FIRST_MASK;
-                continue;
-            }
+        /* We only use a 3 level p2m at the moment, so no level 0,
+         * current hardware doesn't support super page mappings at
+         * level 0 anyway */
 
-            rc = p2m_create_table(d, &first[first_table_offset(addr)],
-                                  flush_pt);
-            if ( rc < 0 )
-            {
-                printk("p2m_populate_ram: L1 failed\n");
-                goto out;
-            }
-        }
+        ret = apply_one_level(d, &first[first_table_offset(addr)],
+                              1, flush_pt, op,
+                              start_gpaddr, end_gpaddr,
+                              &addr, &maddr, &flush,
+                              mattr, t);
+        if ( ret < 0 ) { rc = ret ; goto out; }
+        count += ret;
+        if ( ret != P2M_ONE_DESCEND ) continue;
 
         BUG_ON(!p2m_valid(first[first_table_offset(addr)]));
 
@@ -412,23 +744,16 @@ static int apply_p2m_changes(struct domain *d,
         }
         /* else: second already valid */
 
-        if ( !p2m_valid(second[second_table_offset(addr)]) )
-        {
-            if ( !populate )
-            {
-                addr = (addr + SECOND_SIZE) & SECOND_MASK;
-                continue;
-            }
-
-            rc = p2m_create_table(d, &second[second_table_offset(addr)],
-                                  flush_pt);
-            if ( rc < 0 ) {
-                printk("p2m_populate_ram: L2 failed\n");
-                goto out;
-            }
-        }
+        ret = apply_one_level(d,&second[second_table_offset(addr)],
+                              2, flush_pt, op,
+                              start_gpaddr, end_gpaddr,
+                              &addr, &maddr, &flush,
+                              mattr, t);
+        if ( ret < 0 ) { rc = ret ; goto out; }
+        count += ret;
+        if ( ret != P2M_ONE_DESCEND ) continue;
 
-        BUG_ON(!second[second_table_offset(addr)].p2m.valid);
+        BUG_ON(!p2m_valid(second[second_table_offset(addr)]));
 
         if ( cur_second_offset != second_table_offset(addr) )
         {
@@ -438,84 +763,15 @@ static int apply_p2m_changes(struct domain *d,
             cur_second_offset = second_table_offset(addr);
         }
 
-        pte = third[third_table_offset(addr)];
-
-        flush |= pte.p2m.valid;
-
-        switch (op) {
-            case ALLOCATE:
-                {
-                    /* Allocate a new RAM page and attach */
-                    struct page_info *page;
-
-                    ASSERT(!pte.p2m.valid);
-                    rc = -ENOMEM;
-                    page = alloc_domheap_page(d, 0);
-                    if ( page == NULL ) {
-                        printk("p2m_populate_ram: failed to allocate page\n");
-                        goto out;
-                    }
-
-                    pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t);
-
-                    p2m_write_pte(&third[third_table_offset(addr)],
-                                  pte, flush_pt);
-                }
-                break;
-            case INSERT:
-                {
-                    if ( pte.p2m.valid )
-                        p2m_put_page(pte);
-                    pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t);
-                    p2m_write_pte(&third[third_table_offset(addr)],
-                                  pte, flush_pt);
-                    maddr += PAGE_SIZE;
-                }
-                break;
-            case RELINQUISH:
-            case REMOVE:
-                {
-                    if ( !pte.p2m.valid )
-                    {
-                        count++;
-                        break;
-                    }
-
-                    p2m_put_page(pte);
-
-                    count += 0x10;
-
-                    memset(&pte, 0x00, sizeof(pte));
-                    p2m_write_pte(&third[third_table_offset(addr)],
-                                  pte, flush_pt);
-                    count++;
-                }
-                break;
-
-            case CACHEFLUSH:
-                {
-                    if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) )
-                        break;
-
-                    flush_page_to_ram(pte.p2m.base);
-                }
-                break;
-        }
-
-        /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */
-        if ( op == RELINQUISH && count >= 0x2000 )
-        {
-            if ( hypercall_preempt_check() )
-            {
-                p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT;
-                rc = -ERESTART;
-                goto out;
-            }
-            count = 0;
-        }
-
-        /* Got the next page */
-        addr += PAGE_SIZE;
+        ret = apply_one_level(d, &third[third_table_offset(addr)],
+                              3, flush_pt, op,
+                              start_gpaddr, end_gpaddr,
+                              &addr, &maddr, &flush,
+                              mattr, t);
+        if ( ret < 0 ) { rc = ret ; goto out; }
+        /* L3 had better have done something! We cannot descend any further */
+        BUG_ON(ret == P2M_ONE_DESCEND);
+        count += ret;
     }
 
     if ( flush )
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
index 911d32d..327a79d 100644
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -29,6 +29,15 @@ struct p2m_domain {
      * resume the search. Apart from during teardown this can only
      * decrease. */
     unsigned long lowest_mapped_gfn;
+
+    /* Gather some statistics for information purposes only */
+    struct {
+        /* Number of mappings at each p2m tree level */
+        unsigned long mappings[4];
+        /* Number of times we have shattered a mapping
+         * at each p2m tree level. */
+        unsigned long shattered[4];
+    } stats;
 };
 
 /* List of possible type for each page in the p2m entry.
@@ -79,6 +88,9 @@ int p2m_alloc_table(struct domain *d);
 void p2m_save_state(struct vcpu *p);
 void p2m_restore_state(struct vcpu *n);
 
+/* Print debugging/statistial info about a domain's p2m */
+void p2m_dump_info(struct domain *d);
+
 /* Look up the MFN corresponding to a domain's PFN. */
 paddr_t p2m_lookup(struct domain *d, paddr_t gpfn, p2m_type_t *t);
 
--
generated by git-patchbot for /home/xen/git/xen.git#master

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.