[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC v3 5/6] xen/arm: Add log_dirty support for ARM



This patch implements log_dirty for ARM guest VMs. This feature
is provided via two basic blocks: dirty_bit_map and VLPT
(virtual-linear page table)

1. VLPT provides fast accessing of 3rd PTE of guest P2M.
When creating a mapping for VLPT, the page table mapping
becomes the following:
   xen's 1st PTE --> xen's 2nd PTE --> guest p2m's 2nd PTE -->
   guest p2m's 3rd PTE

With VLPT, xen can immediately locate the 3rd PTE of guest P2M
and modify PTE attirbute during dirty page tracking. The following
link shows the performance comparison for handling a dirty-page
between VLPT and typical page table walking.
http://lists.xen.org/archives/html/xen-devel/2013-08/msg01503.html

For more info about VLPT, please see
http://www.technovelty.org/linux/virtual-linear-page-table.html.

2. Dirty bitmap
The dirty bitmap is used to mark the pages which are dirty during
migration. The info is used by Xen tools, via DOMCTL_SHADOW_OP_*,
to figure out which guest pages need to be resent.

Signed-off-by: Jaeyong Yoo <jaeyong.yoo@xxxxxxxxxxx>
Signed-off-by: Evgeny Fedotov <e.fedotov@xxxxxxxxxxx>
Signed-off-by: Wei Huang <w1.huang@xxxxxxxxxxx>
---
 xen/arch/arm/domain.c           |    6 +
 xen/arch/arm/domctl.c           |   31 +++-
 xen/arch/arm/mm.c               |  298 ++++++++++++++++++++++++++++++++++++++-
 xen/arch/arm/p2m.c              |  204 +++++++++++++++++++++++++++
 xen/arch/arm/traps.c            |    9 ++
 xen/include/asm-arm/config.h    |   12 +-
 xen/include/asm-arm/domain.h    |   19 +++
 xen/include/asm-arm/mm.h        |   23 +++
 xen/include/asm-arm/p2m.h       |    8 +-
 xen/include/asm-arm/processor.h |    2 +
 10 files changed, 599 insertions(+), 13 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 40f1c3a..2eb5ce0 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -208,6 +208,9 @@ static void ctxt_switch_to(struct vcpu *n)
 
     isb();
 
+    /* Dirty-page tracing */
+    log_dirty_restore(n->domain);
+
     /* This is could trigger an hardware interrupt from the virtual
      * timer. The interrupt needs to be injected into the guest. */
     WRITE_SYSREG32(n->arch.cntkctl, CNTKCTL_EL1);
@@ -504,6 +507,9 @@ int arch_domain_create(struct domain *d, unsigned int 
domcr_flags)
     /* Default the virtual ID to match the physical */
     d->arch.vpidr = boot_cpu_data.midr.bits;
 
+    /* Init log dirty support */
+    log_dirty_init(d);
+
     clear_page(d->shared_info);
     share_xen_page_with_guest(
         virt_to_page(d->shared_info), d, XENSHARE_writable);
diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
index 45974e7..f1c34da 100644
--- a/xen/arch/arm/domctl.c
+++ b/xen/arch/arm/domctl.c
@@ -10,30 +10,53 @@
 #include <xen/errno.h>
 #include <xen/sched.h>
 #include <xen/hypercall.h>
+#include <xen/guest_access.h>
 #include <public/domctl.h>
 
 long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
                     XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
 {
+    long ret = 0;
+    bool_t copyback = 0;
+
     switch ( domctl->cmd )
     {
+    case XEN_DOMCTL_shadow_op:
+    {
+        ret = -EINVAL;
+        copyback = 1;
+
+        if ( (d == current->domain) )   /* no domain_pause() */
+            break;
+
+        domain_pause(d);
+        ret = dirty_mode_op(d, &domctl->u.shadow_op);
+        domain_unpause(d);
+    }
+    break;
+
     case XEN_DOMCTL_cacheflush:
     {
         unsigned long s = domctl->u.cacheflush.start_pfn;
         unsigned long e = s + domctl->u.cacheflush.nr_pfns;
 
         if ( domctl->u.cacheflush.nr_pfns > (1U<<MAX_ORDER) )
-            return -EINVAL;
+            ret = -EINVAL;
 
         if ( e < s )
-            return -EINVAL;
+            ret = -EINVAL;
 
-        return p2m_cache_flush(d, s, e);
+        ret = p2m_cache_flush(d, s, e);
     }
 
     default:
-        return subarch_do_domctl(domctl, d, u_domctl);
+        ret = subarch_do_domctl(domctl, d, u_domctl);
     }
+
+    if ( copyback && __copy_to_guest(u_domctl, domctl, 1) )
+        ret = -EFAULT;
+
+    return ret;
 }
 
 void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index eac228c..81c0691 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -865,7 +865,6 @@ void destroy_xen_mappings(unsigned long v, unsigned long e)
     create_xen_entries(REMOVE, v, 0, (e - v) >> PAGE_SHIFT, 0);
 }
 
-enum mg { mg_clear, mg_ro, mg_rw, mg_rx };
 static void set_pte_flags_on_range(const char *p, unsigned long l, enum mg mg)
 {
     lpae_t pte;
@@ -945,11 +944,6 @@ int page_is_ram_type(unsigned long mfn, unsigned long 
mem_type)
     return 0;
 }
 
-unsigned long domain_get_maximum_gpfn(struct domain *d)
-{
-    return -ENOSYS;
-}
-
 void share_xen_page_with_guest(struct page_info *page,
                           struct domain *d, int readonly)
 {
@@ -1235,6 +1229,298 @@ int is_iomem_page(unsigned long mfn)
         return 1;
     return 0;
 }
+
+
+/* Return start and end addr of guest RAM. Note this function only reports 
+ * regular RAM. It does not cover other areas such as foreign mapped
+ * pages or MMIO space. */
+void domain_get_ram_range(struct domain *d, paddr_t *start, paddr_t *end)
+{
+    if ( start )
+        *start = GUEST_RAM_BASE;
+
+    if ( end )
+        *end = GUEST_RAM_BASE + ((paddr_t) d->max_pages << PAGE_SHIFT);
+}
+
+/* Return the maximum GPFN of guest VM. It covers all guest memory types. */
+unsigned long domain_get_maximum_gpfn(struct domain *d)
+{
+    struct p2m_domain *p2m = &d->arch.p2m;
+
+    return p2m->max_mapped_gfn;
+}
+
+/************************************/
+/*    Dirty Page Tracking Support   */
+/************************************/
+/* Mark the bitmap for a corresponding page as dirty */
+static inline void bitmap_mark_dirty(struct domain *d, paddr_t addr)
+{
+    paddr_t ram_base = (paddr_t) GUEST_RAM_BASE;
+    int bit_index = PFN_DOWN(addr - ram_base);
+    int page_index = bit_index >> (PAGE_SHIFT + 3);
+    int bit_index_residual = bit_index & ((1ul << (PAGE_SHIFT + 3)) - 1);
+
+    set_bit(bit_index_residual, d->arch.dirty.bitmap[page_index]);
+}
+
+/* Allocate dirty bitmap resource */
+static int bitmap_init(struct domain *d)
+{
+    paddr_t gma_start = 0;
+    paddr_t gma_end = 0;
+    int nr_bytes;
+    int nr_pages;
+    int i;
+
+    domain_get_ram_range(d, &gma_start, &gma_end);
+
+    nr_bytes = (PFN_DOWN(gma_end - gma_start) + 7) / 8;
+    nr_pages = (nr_bytes + PAGE_SIZE - 1) / PAGE_SIZE;
+
+    BUG_ON(nr_pages > MAX_DIRTY_BITMAP_PAGES);
+
+    for ( i = 0; i < nr_pages; ++i )
+    {
+        struct page_info *page;
+        page = alloc_domheap_page(NULL, 0);
+        if ( page == NULL )
+            goto cleanup_on_failure;
+
+        d->arch.dirty.bitmap[i] = map_domain_page_global(__page_to_mfn(page));
+        clear_page(d->arch.dirty.bitmap[i]);
+    }
+
+    d->arch.dirty.bitmap_pages = nr_pages;
+    return 0;
+
+cleanup_on_failure:
+    nr_pages = i;
+    for ( i = 0; i < nr_pages; ++i )
+    {
+        unmap_domain_page_global(d->arch.dirty.bitmap[i]);
+    }
+
+    return -ENOMEM;
+}
+
+/* Cleanup dirty bitmap resource */
+static void bitmap_cleanup(struct domain *d)
+{
+    int i;
+
+    for ( i = 0; i < d->arch.dirty.bitmap_pages; ++i )
+    {
+        unmap_domain_page_global(d->arch.dirty.bitmap[i]);
+    }
+}
+
+/* Flush VLPT area */
+static void vlpt_flush(struct domain *d)
+{
+    int flush_size;
+    flush_size = (d->arch.dirty.second_lvl_end - 
+                  d->arch.dirty.second_lvl_start) << SECOND_SHIFT;
+
+    /* flushing the 3rd level mapping */
+    flush_xen_data_tlb_range_va(d->arch.dirty.second_lvl_start << SECOND_SHIFT,
+                                flush_size);
+}
+
+/* Set up a page table for VLPT mapping */
+static int vlpt_init(struct domain *d)
+{
+    uint64_t required, avail = VIRT_LIN_P2M_END - VIRT_LIN_P2M_START;
+    int xen_second_linear_base;
+    int gp2m_start_index, gp2m_end_index;
+    struct p2m_domain *p2m = &d->arch.p2m;
+    struct page_info *second_lvl_page;
+    paddr_t gma_start = 0;
+    paddr_t gma_end = 0;
+    lpae_t *first[2];
+    int i;
+
+    /* Check if reserved space is enough to cover guest physical address space.
+     * Note that each LPAE page table entry is 64-bit (8 bytes). So we only
+     * shift left with LPAE_SHIFT instead of PAGE_SHIFT. */
+    domain_get_ram_range(d, &gma_start, &gma_end);
+    required = (gma_end - gma_start) >> LPAE_SHIFT;
+    if ( required > avail )
+    {
+        dprintk(XENLOG_ERR, "Available VLPT is small for domU guest (avail: "
+                "%#llx, required: %#llx)\n", (unsigned long long)avail,
+                (unsigned long long)required);
+        return -ENOMEM;
+    }
+
+    /* Caulculate the base of 2nd linear table base for VIRT_LIN_P2M_START */
+    xen_second_linear_base = second_linear_offset(VIRT_LIN_P2M_START);
+
+    gp2m_start_index = gma_start >> FIRST_SHIFT;
+    gp2m_end_index = (gma_end >> FIRST_SHIFT) + 1;
+
+    if ( xen_second_linear_base + gp2m_end_index >= LPAE_ENTRIES * 2 )
+    {
+        dprintk(XENLOG_ERR, "xen second page is small for VLPT for domU");
+        return -ENOMEM;
+    }
+
+    /* Two pages are allocated to backup the related PTE content of guest 
+     * VM's 1st-level table. */
+    second_lvl_page = alloc_domheap_pages(NULL, 1, 0);
+    if ( second_lvl_page == NULL )
+        return -ENOMEM;
+    d->arch.dirty.second_lvl[0] = map_domain_page_global(
+        page_to_mfn(second_lvl_page) );
+    d->arch.dirty.second_lvl[1] = map_domain_page_global(
+        page_to_mfn(second_lvl_page+1) );
+
+    /* 1st level P2M of guest VM is 2 consecutive pages */
+    first[0] = __map_domain_page(p2m->first_level);
+    first[1] = __map_domain_page(p2m->first_level+1);
+
+    for ( i = gp2m_start_index; i < gp2m_end_index; ++i )
+    {
+        int k = i % LPAE_ENTRIES;
+        int l = i / LPAE_ENTRIES;
+        int k2 = (xen_second_linear_base + i) % LPAE_ENTRIES;
+        int l2 = (xen_second_linear_base + i) / LPAE_ENTRIES;
+
+        /* Update 2nd-level PTE of Xen linear table. With this, Xen linear 
+         * page table layout becomes: 1st Xen linear ==> 2nd Xen linear ==> 
+         * 2nd guest P2M (i.e. 3rd Xen linear) ==> 3rd guest P2M (i.e. Xen 
+         * linear content) for VIRT_LIN_P2M_START address space. */
+        write_pte(&xen_second[xen_second_linear_base+i], first[l][k]);
+
+        /* We copy the mapping into domain's structure as a reference
+         * in case of the context switch (used in vlpt_restore function ) */
+        d->arch.dirty.second_lvl[l2][k2] = first[l][k];
+    }
+    unmap_domain_page(first[0]);
+    unmap_domain_page(first[1]);
+
+    /* storing the start and end index */
+    d->arch.dirty.second_lvl_start = xen_second_linear_base + gp2m_start_index;
+    d->arch.dirty.second_lvl_end = xen_second_linear_base + gp2m_end_index;
+
+    vlpt_flush(d);
+
+    return 0;
+}
+
+static void vlpt_cleanup(struct domain *d)
+{
+    /* First level p2m is 2 consecutive pages */
+    unmap_domain_page_global(d->arch.dirty.second_lvl[0]);
+    unmap_domain_page_global(d->arch.dirty.second_lvl[1]);
+}
+
+/* Returns zero if addr is not valid or dirty mode is not set */
+int handle_page_fault(struct domain *d, paddr_t addr)
+{
+    lpae_t *vlp2m_pte = 0;
+    paddr_t gma_start = 0;
+    paddr_t gma_end = 0;
+
+    if ( !d->arch.dirty.mode )
+        return 0;
+
+    domain_get_ram_range(d, &gma_start, &gma_end);
+
+    /* Ensure that addr is inside guest's RAM */
+    if ( addr < gma_start || addr > gma_end )
+        return 0;
+
+    vlp2m_pte = vlpt_get_3lvl_pte(addr);
+    if ( vlp2m_pte->p2m.valid && vlp2m_pte->p2m.write == 0 &&
+         vlp2m_pte->p2m.type == p2m_ram_logdirty )
+    {
+        lpae_t pte = *vlp2m_pte;
+        pte.p2m.write = 1;
+        write_pte(vlp2m_pte, pte);
+        flush_tlb_local();
+
+        /* only necessary to lock between get-dirty bitmap and mark dirty
+         * bitmap. If get-dirty bitmap happens immediately before this
+         * lock, the corresponding dirty-page would be marked at the next
+         * round of get-dirty bitmap */
+        spin_lock(&d->arch.dirty.lock);
+        bitmap_mark_dirty(d, addr);
+        spin_unlock(&d->arch.dirty.lock);
+    }
+
+    return 1;
+}
+
+/* Restore the xen page table for vlpt mapping for domain */
+void log_dirty_restore(struct domain *d)
+{
+    int i;
+
+    /* Nothing to do as log dirty mode is off */
+    if ( !(d->arch.dirty.mode) )
+        return;
+
+    dsb(sy);
+
+    for ( i = d->arch.dirty.second_lvl_start; i < d->arch.dirty.second_lvl_end;
+          ++i )
+    {
+        int k = i % LPAE_ENTRIES;
+        int l = i / LPAE_ENTRIES;
+
+        if ( xen_second[i].bits != d->arch.dirty.second_lvl[l][k].bits )
+        {
+            write_pte(&xen_second[i], d->arch.dirty.second_lvl[l][k]);
+            flush_xen_data_tlb_range_va(i << SECOND_SHIFT, 1 << SECOND_SHIFT);
+        }
+    }
+
+    dsb(sy);
+    isb();
+}
+
+/* Turn on log dirty */
+int log_dirty_on(struct domain *d)
+{
+    if ( vlpt_init(d) || bitmap_init(d) )
+        return -EINVAL;
+
+    return 0;
+}
+
+/* Turn off log dirty */
+void log_dirty_off(struct domain *d)
+{
+    bitmap_cleanup(d);
+    vlpt_cleanup(d);
+}
+
+/* Initialize log dirty fields */
+int log_dirty_init(struct domain *d)
+{
+    d->arch.dirty.count = 0;
+    d->arch.dirty.mode = 0;
+    spin_lock_init(&d->arch.dirty.lock);
+
+    d->arch.dirty.second_lvl_start = 0;
+    d->arch.dirty.second_lvl_end = 0;
+    d->arch.dirty.second_lvl[0] = NULL;
+    d->arch.dirty.second_lvl[1] = NULL;
+
+    memset(d->arch.dirty.bitmap, 0, sizeof(d->arch.dirty.bitmap));
+    d->arch.dirty.bitmap_pages = 0;
+
+    return 0;
+}
+
+/* Log dirty tear down */
+void log_dirty_teardown(struct domain *d)
+{
+    return;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index 603c097..0808cc9 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -6,6 +6,8 @@
 #include <xen/bitops.h>
 #include <asm/flushtlb.h>
 #include <asm/gic.h>
+#include <xen/guest_access.h>
+#include <xen/pfn.h>
 #include <asm/event.h>
 #include <asm/hardirq.h>
 #include <asm/page.h>
@@ -208,6 +210,7 @@ static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned 
int mattr,
         break;
 
     case p2m_ram_ro:
+    case p2m_ram_logdirty:
         e.p2m.xn = 0;
         e.p2m.write = 0;
         break;
@@ -261,6 +264,10 @@ static int p2m_create_table(struct domain *d,
 
     pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid);
 
+    /* mark the write bit (page table's case, ro bit) as 0
+     * so, it is writable in case of vlpt access */
+    pte.pt.ro = 0;
+
     write_pte(entry, pte);
 
     return 0;
@@ -696,6 +703,203 @@ unsigned long gmfn_to_mfn(struct domain *d, unsigned long 
gpfn)
     return p >> PAGE_SHIFT;
 }
 
+/* Change types across all p2m entries in a domain */
+void p2m_change_entry_type_global(struct domain *d, enum mg nt)
+{
+    struct p2m_domain *p2m = &d->arch.p2m;
+    paddr_t ram_base;
+    int i1, i2, i3;
+    int first_index, second_index, third_index;
+    lpae_t *first = __map_domain_page(p2m->first_level);
+    lpae_t pte, *second = NULL, *third = NULL;
+
+    domain_get_ram_range(d, &ram_base, NULL);
+
+    first_index = first_table_offset((uint64_t)ram_base);
+    second_index = second_table_offset((uint64_t)ram_base);
+    third_index = third_table_offset((uint64_t)ram_base);
+
+    BUG_ON(!first);
+
+    spin_lock(&p2m->lock);
+
+    for ( i1 = first_index; i1 < LPAE_ENTRIES*2; ++i1 )
+    {
+        lpae_walk_t first_pte = first[i1].walk;
+        if ( !first_pte.valid || !first_pte.table )
+            goto out;
+
+        second = map_domain_page(first_pte.base);
+        BUG_ON(!second);
+
+        for ( i2 = second_index; i2 < LPAE_ENTRIES; ++i2 )
+        {
+            lpae_walk_t second_pte = second[i2].walk;
+
+            if ( !second_pte.valid || !second_pte.table )
+                goto out;
+
+            third = map_domain_page(second_pte.base);
+            BUG_ON(!third);
+
+            for ( i3 = third_index; i3 < LPAE_ENTRIES; ++i3 )
+            {
+                lpae_walk_t third_pte = third[i3].walk;
+
+                if ( !third_pte.valid )
+                    goto out;
+
+                pte = third[i3];
+
+                if ( nt == mg_ro )
+                {
+                    if ( pte.p2m.write == 1 )
+                    {
+                        pte.p2m.write = 0;
+                        pte.p2m.type = p2m_ram_logdirty;
+                    }
+                    else
+                    {
+                        /* reuse avail bit as an indicator of 'actual' 
+                         * read-only */
+                        pte.p2m.type = p2m_ram_rw;
+                    }
+                }
+                else if ( nt == mg_rw )
+                {
+                    if ( pte.p2m.write == 0 && 
+                         pte.p2m.type == p2m_ram_logdirty )
+                    {
+                        pte.p2m.write = p2m_ram_rw;
+                    }
+                }
+                write_pte(&third[i3], pte);
+            }
+            unmap_domain_page(third);
+
+            third = NULL;
+            third_index = 0;
+        }
+        unmap_domain_page(second);
+
+        second = NULL;
+        second_index = 0;
+        third_index = 0;
+    }
+
+out:
+    flush_tlb_all_local();
+    if ( third ) unmap_domain_page(third);
+    if ( second ) unmap_domain_page(second);
+    if ( first ) unmap_domain_page(first);
+
+    spin_unlock(&p2m->lock);
+}
+
+/* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN, 
+ * clear the bitmap and stats. */
+int log_dirty_op(struct domain *d, xen_domctl_shadow_op_t *sc)
+{
+    int peek = 1;
+    int i;
+    int bitmap_size;
+    paddr_t gma_start, gma_end;
+
+    /* this hypercall is called from domain 0, and we don't know which guest's
+     * vlpt is mapped in xen_second, so, to be sure, we restore vlpt here */
+    log_dirty_restore(d);
+
+    domain_get_ram_range(d, &gma_start, &gma_end);
+    bitmap_size = (gma_end - gma_start) / 8;
+
+    if ( guest_handle_is_null(sc->dirty_bitmap) )
+    {
+        peek = 0;
+    }
+    else
+    {
+        spin_lock(&d->arch.dirty.lock);
+
+        for ( i = 0; i < d->arch.dirty.bitmap_pages; ++i )
+        {
+            int j = 0;
+            uint8_t *bitmap;
+
+            copy_to_guest_offset(sc->dirty_bitmap, i * PAGE_SIZE,
+                                 d->arch.dirty.bitmap[i],
+                                 bitmap_size < PAGE_SIZE ? bitmap_size :
+                                                           PAGE_SIZE);
+            bitmap_size -= PAGE_SIZE;
+
+            /* set p2m page table read-only */
+            bitmap = d->arch.dirty.bitmap[i];
+            while ((j = find_next_bit((const long unsigned int *)bitmap,
+                                      PAGE_SIZE*8, j)) < PAGE_SIZE*8)
+            {
+                lpae_t *vlpt;
+                paddr_t addr = gma_start + (i << (2*PAGE_SHIFT+3)) +
+                    (j << PAGE_SHIFT);
+                vlpt = vlpt_get_3lvl_pte(addr);
+                vlpt->p2m.write = 0;
+                j++;
+            }
+        }
+
+        if ( sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN )
+        {
+            for ( i = 0; i < d->arch.dirty.bitmap_pages; ++i )
+            {
+                clear_page(d->arch.dirty.bitmap[i]);
+            }
+        }
+
+        spin_unlock(&d->arch.dirty.lock);
+        flush_tlb_local();
+    }
+
+    sc->stats.dirty_count = d->arch.dirty.count;
+
+    return 0;
+}
+
+long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc)
+{
+    long ret = 0;
+    switch (sc->op)
+    {
+        case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
+        case XEN_DOMCTL_SHADOW_OP_OFF:
+        {
+            enum mg nt = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? mg_rw : mg_ro;
+
+            d->arch.dirty.mode = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? 0 : 1;
+            p2m_change_entry_type_global(d, nt);
+
+            if ( sc->op == XEN_DOMCTL_SHADOW_OP_OFF )
+            {
+                log_dirty_off(d);
+            }
+            else
+            {
+                if ( (ret = log_dirty_on(d)) )
+                    return ret;
+            }
+        }
+        break;
+
+        case XEN_DOMCTL_SHADOW_OP_CLEAN:
+        case XEN_DOMCTL_SHADOW_OP_PEEK:
+        {
+            ret = log_dirty_op(d, sc);
+        }
+        break;
+
+        default:
+            return -ENOSYS;
+    }
+    return ret;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index df4d375..b652565 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -1556,6 +1556,8 @@ static void do_trap_data_abort_guest(struct cpu_user_regs 
*regs,
     struct hsr_dabt dabt = hsr.dabt;
     int rc;
     mmio_info_t info;
+    int page_fault = ( dabt.write && ((dabt.dfsc & FSC_MASK) == 
+                                      (FSC_FLT_PERM|FSC_3RD_LEVEL)) );
 
     if ( !check_conditional_instr(regs, hsr) )
     {
@@ -1577,6 +1579,13 @@ static void do_trap_data_abort_guest(struct 
cpu_user_regs *regs,
     if ( rc == -EFAULT )
         goto bad_data_abort;
 
+    /* domU page fault handling for guest live migration. Note that 
+     * dabt.valid can be 0 here */
+    if ( page_fault && handle_page_fault(current->domain, info.gpa) )
+    {
+        /* Do not modify PC as guest needs to repeat memory operation */
+        return;
+    }
     /* XXX: Decode the instruction if ISS is not valid */
     if ( !dabt.valid )
         goto bad_data_abort;
diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h
index ef291ff..f18fae4 100644
--- a/xen/include/asm-arm/config.h
+++ b/xen/include/asm-arm/config.h
@@ -87,6 +87,7 @@
  *   0  -   8M   <COMMON>
  *
  *  32M - 128M   Frametable: 24 bytes per page for 16GB of RAM
+ * 128M - 256M   Virtual-linear mapping to P2M table
  * 256M -   1G   VMAP: ioremap and early_ioremap use this virtual address
  *                    space
  *
@@ -124,13 +125,15 @@
 #define CONFIG_SEPARATE_XENHEAP 1
 
 #define FRAMETABLE_VIRT_START  _AT(vaddr_t,0x02000000)
-#define VMAP_VIRT_START  _AT(vaddr_t,0x10000000)
+#define VIRT_LIN_P2M_START     _AT(vaddr_t,0x08000000)
+#define VMAP_VIRT_START        _AT(vaddr_t,0x10000000)
+#define VIRT_LIN_P2M_END       VMAP_VIRT_START
 #define XENHEAP_VIRT_START     _AT(vaddr_t,0x40000000)
 #define XENHEAP_VIRT_END       _AT(vaddr_t,0x7fffffff)
 #define DOMHEAP_VIRT_START     _AT(vaddr_t,0x80000000)
 #define DOMHEAP_VIRT_END       _AT(vaddr_t,0xffffffff)
 
-#define VMAP_VIRT_END    XENHEAP_VIRT_START
+#define VMAP_VIRT_END          XENHEAP_VIRT_START
 
 #define DOMHEAP_ENTRIES        1024  /* 1024 2MB mapping slots */
 
@@ -157,6 +160,11 @@
 
 #define HYPERVISOR_VIRT_END    DIRECTMAP_VIRT_END
 
+/* Definition for VIRT_LIN_P2M_START and VIRT_LIN_P2M_END (64-bit)
+ * TODO: Needs evaluation. */
+#define VIRT_LIN_P2M_START     _AT(vaddr_t, 0x08000000)
+#define VIRT_LIN_P2M_END       VMAP_VIRT_START
+
 #endif
 
 /* Fixmap slots */
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index aabeb51..ac82643 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -162,6 +162,25 @@ struct arch_domain
     } vuart;
 
     unsigned int evtchn_irq;
+
+    /* dirty page tracing */
+    struct {
+        spinlock_t lock;
+        volatile int mode;               /* 1 if dirty pages tracing enabled */
+        volatile unsigned int count;     /* dirty pages counter */
+
+        /* vlpt context switch */
+        volatile int second_lvl_start; /* start idx of virt linear space 2nd */
+        volatile int second_lvl_end;   /* end idx of virt linear space 2nd */
+        lpae_t *second_lvl[2];         /* copy of guest P2M 1st-lvl content */
+
+        /* bitmap to track dirty pages */
+#define MAX_DIRTY_BITMAP_PAGES 64
+        /* Because each bit represents a dirty page, the total supported guest 
+         * memory is (64 entries x 4KB/entry x 8bits/byte x 4KB) = 8GB. */
+        uint8_t *bitmap[MAX_DIRTY_BITMAP_PAGES]; /* dirty bitmap */
+        int bitmap_pages;                        /* # of dirty bitmap pages */
+    } dirty;
 }  __cacheline_aligned;
 
 struct arch_vcpu
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index b8d4e7d..ab19025 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -4,6 +4,7 @@
 #include <xen/config.h>
 #include <xen/kernel.h>
 #include <asm/page.h>
+#include <asm/config.h>
 #include <public/xen.h>
 
 /* Align Xen to a 2 MiB boundary. */
@@ -320,6 +321,7 @@ int donate_page(
 #define domain_clamp_alloc_bitsize(d, b) (b)
 
 unsigned long domain_get_maximum_gpfn(struct domain *d);
+void domain_get_ram_range(struct domain *d, paddr_t *start, paddr_t *end);
 
 extern struct domain *dom_xen, *dom_io, *dom_cow;
 
@@ -341,6 +343,27 @@ static inline void put_page_and_type(struct page_info 
*page)
     put_page(page);
 }
 
+enum mg { mg_clear, mg_ro, mg_rw, mg_rx };
+
+/************************************/
+/*    Log-dirty support functions   */
+/************************************/
+int log_dirty_on(struct domain *d);
+void log_dirty_off(struct domain *d);
+int log_dirty_init(struct domain *d);
+void log_dirty_teardown(struct domain *d);
+void log_dirty_restore(struct domain *d);
+int handle_page_fault(struct domain *d, paddr_t addr);
+/* access leaf PTE of a given guest address (GPA) */
+static inline lpae_t * vlpt_get_3lvl_pte(paddr_t addr)
+{
+    lpae_t *table = (lpae_t *)VIRT_LIN_P2M_START;
+
+    /* Since we slotted the guest's first p2m page table to xen's
+     * second page table, one shift is enough for calculating the
+     * index of guest p2m table entry */
+    return &table[addr >> PAGE_SHIFT];
+}
 #endif /*  __ARCH_ARM_MM__ */
 /*
  * Local variables:
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
index bd71abe..0cecbe7 100644
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -2,6 +2,7 @@
 #define _XEN_P2M_H
 
 #include <xen/mm.h>
+#include <public/domctl.h>
 
 struct domain;
 
@@ -41,6 +42,7 @@ typedef enum {
     p2m_invalid = 0,    /* Nothing mapped here */
     p2m_ram_rw,         /* Normal read/write guest RAM */
     p2m_ram_ro,         /* Read-only; writes are silently dropped */
+    p2m_ram_logdirty,   /* Read-only: special mode for log dirty */
     p2m_mmio_direct,    /* Read/write mapping of genuine MMIO area */
     p2m_map_foreign,    /* Ram pages from foreign domain */
     p2m_grant_map_rw,   /* Read/write grant mapping */
@@ -49,7 +51,8 @@ typedef enum {
 } p2m_type_t;
 
 #define p2m_is_foreign(_t)  ((_t) == p2m_map_foreign)
-#define p2m_is_ram(_t)      ((_t) == p2m_ram_rw || (_t) == p2m_ram_ro)
+#define p2m_is_ram(_t)      ((_t) == p2m_ram_rw || (_t) == p2m_ram_ro ||  \
+                             (_t) == p2m_ram_logdirty)
 
 /* Initialise vmid allocator */
 void p2m_vmid_allocator_init(void);
@@ -178,6 +181,9 @@ static inline int get_page_and_type(struct page_info *page,
     return rc;
 }
 
+void p2m_change_entry_type_global(struct domain *d, enum mg nt);
+long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc);
+
 #endif /* _XEN_P2M_H */
 
 /*
diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index 750864a..0bf3d67 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -407,6 +407,8 @@ union hsr {
 #define FSC_CPR        (0x3a) /* Coprocossor Abort */
 
 #define FSC_LL_MASK    (_AC(0x03,U)<<0)
+#define FSC_MASK       (0x3f) /* Fault status mask */
+#define FSC_3RD_LEVEL  (0x03) /* Third level fault */
 
 /* Time counter hypervisor control register */
 #define CNTHCTL_PA      (1u<<0)  /* Kernel/user access to physical counter */
-- 
1.7.9.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.