[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC v3 5/6] xen/arm: Add log_dirty support for ARM
On 08/05/2014 22:18, Wei Huang wrote: > This patch implements log_dirty for ARM guest VMs. This feature > is provided via two basic blocks: dirty_bit_map and VLPT > (virtual-linear page table) > > 1. VLPT provides fast accessing of 3rd PTE of guest P2M. > When creating a mapping for VLPT, the page table mapping > becomes the following: > xen's 1st PTE --> xen's 2nd PTE --> guest p2m's 2nd PTE --> > guest p2m's 3rd PTE > > With VLPT, xen can immediately locate the 3rd PTE of guest P2M > and modify PTE attirbute during dirty page tracking. The following > link shows the performance comparison for handling a dirty-page > between VLPT and typical page table walking. > http://lists.xen.org/archives/html/xen-devel/2013-08/msg01503.html > > For more info about VLPT, please see > http://www.technovelty.org/linux/virtual-linear-page-table.html. > > 2. Dirty bitmap > The dirty bitmap is used to mark the pages which are dirty during > migration. The info is used by Xen tools, via DOMCTL_SHADOW_OP_*, > to figure out which guest pages need to be resent. > > Signed-off-by: Jaeyong Yoo <jaeyong.yoo@xxxxxxxxxxx> > Signed-off-by: Evgeny Fedotov <e.fedotov@xxxxxxxxxxx> > Signed-off-by: Wei Huang <w1.huang@xxxxxxxxxxx> > --- > xen/arch/arm/domain.c | 6 + > xen/arch/arm/domctl.c | 31 +++- > xen/arch/arm/mm.c | 298 > ++++++++++++++++++++++++++++++++++++++- > xen/arch/arm/p2m.c | 204 +++++++++++++++++++++++++++ > xen/arch/arm/traps.c | 9 ++ > xen/include/asm-arm/config.h | 12 +- > xen/include/asm-arm/domain.h | 19 +++ > xen/include/asm-arm/mm.h | 23 +++ > xen/include/asm-arm/p2m.h | 8 +- > xen/include/asm-arm/processor.h | 2 + > 10 files changed, 599 insertions(+), 13 deletions(-) > > diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c > index 40f1c3a..2eb5ce0 100644 > --- a/xen/arch/arm/domain.c > +++ b/xen/arch/arm/domain.c > @@ -208,6 +208,9 @@ static void ctxt_switch_to(struct vcpu *n) > > isb(); > > + /* Dirty-page tracing */ > + log_dirty_restore(n->domain); > + > /* This is could trigger an hardware interrupt from the virtual > * timer. The interrupt needs to be injected into the guest. */ > WRITE_SYSREG32(n->arch.cntkctl, CNTKCTL_EL1); > @@ -504,6 +507,9 @@ int arch_domain_create(struct domain *d, unsigned int > domcr_flags) > /* Default the virtual ID to match the physical */ > d->arch.vpidr = boot_cpu_data.midr.bits; > > + /* Init log dirty support */ > + log_dirty_init(d); > + > clear_page(d->shared_info); > share_xen_page_with_guest( > virt_to_page(d->shared_info), d, XENSHARE_writable); > diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c > index 45974e7..f1c34da 100644 > --- a/xen/arch/arm/domctl.c > +++ b/xen/arch/arm/domctl.c > @@ -10,30 +10,53 @@ > #include <xen/errno.h> > #include <xen/sched.h> > #include <xen/hypercall.h> > +#include <xen/guest_access.h> > #include <public/domctl.h> > > long arch_do_domctl(struct xen_domctl *domctl, struct domain *d, > XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) > { > + long ret = 0; 'rc' is the more common name. > + bool_t copyback = 0; > + > switch ( domctl->cmd ) > { > + case XEN_DOMCTL_shadow_op: > + { > + ret = -EINVAL; > + copyback = 1; > + > + if ( (d == current->domain) ) /* no domain_pause() */ > + break; > + > + domain_pause(d); > + ret = dirty_mode_op(d, &domctl->u.shadow_op); > + domain_unpause(d); > + } > + break; > + > case XEN_DOMCTL_cacheflush: > { > unsigned long s = domctl->u.cacheflush.start_pfn; > unsigned long e = s + domctl->u.cacheflush.nr_pfns; > > if ( domctl->u.cacheflush.nr_pfns > (1U<<MAX_ORDER) ) > - return -EINVAL; > + ret = -EINVAL; This breaks the error handling. The prevailing style would be: rc = -EINVAL; if ( something bad ) goto out; > > if ( e < s ) > - return -EINVAL; > + ret = -EINVAL; > > - return p2m_cache_flush(d, s, e); > + ret = p2m_cache_flush(d, s, e); > } > > default: > - return subarch_do_domctl(domctl, d, u_domctl); > + ret = subarch_do_domctl(domctl, d, u_domctl); > } > + > + if ( copyback && __copy_to_guest(u_domctl, domctl, 1) ) > + ret = -EFAULT; > + > + return ret; > } > > void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c) > diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c > index eac228c..81c0691 100644 > --- a/xen/arch/arm/mm.c > +++ b/xen/arch/arm/mm.c > @@ -865,7 +865,6 @@ void destroy_xen_mappings(unsigned long v, unsigned long > e) > create_xen_entries(REMOVE, v, 0, (e - v) >> PAGE_SHIFT, 0); > } > > -enum mg { mg_clear, mg_ro, mg_rw, mg_rx }; > static void set_pte_flags_on_range(const char *p, unsigned long l, enum mg > mg) > { > lpae_t pte; > @@ -945,11 +944,6 @@ int page_is_ram_type(unsigned long mfn, unsigned long > mem_type) > return 0; > } > > -unsigned long domain_get_maximum_gpfn(struct domain *d) > -{ > - return -ENOSYS; > -} > - > void share_xen_page_with_guest(struct page_info *page, > struct domain *d, int readonly) > { > @@ -1235,6 +1229,298 @@ int is_iomem_page(unsigned long mfn) > return 1; > return 0; > } > + > + > +/* Return start and end addr of guest RAM. Note this function only reports > + * regular RAM. It does not cover other areas such as foreign mapped > + * pages or MMIO space. */ > +void domain_get_ram_range(struct domain *d, paddr_t *start, paddr_t *end) const struct domain *d; > +{ > + if ( start ) > + *start = GUEST_RAM_BASE; > + > + if ( end ) > + *end = GUEST_RAM_BASE + ((paddr_t) d->max_pages << PAGE_SHIFT); > +} > + > +/* Return the maximum GPFN of guest VM. It covers all guest memory types. */ > +unsigned long domain_get_maximum_gpfn(struct domain *d) > +{ > + struct p2m_domain *p2m = &d->arch.p2m; > + > + return p2m->max_mapped_gfn; This can be reduced to a single statement. > +} > + > +/************************************/ > +/* Dirty Page Tracking Support */ > +/************************************/ > +/* Mark the bitmap for a corresponding page as dirty */ > +static inline void bitmap_mark_dirty(struct domain *d, paddr_t addr) > +{ > + paddr_t ram_base = (paddr_t) GUEST_RAM_BASE; Useless cast > + int bit_index = PFN_DOWN(addr - ram_base); This is liable to truncation, and should absolutely be unsigned. > + int page_index = bit_index >> (PAGE_SHIFT + 3); > + int bit_index_residual = bit_index & ((1ul << (PAGE_SHIFT + 3)) - 1); As should all of these. > + > + set_bit(bit_index_residual, d->arch.dirty.bitmap[page_index]); > +} > + > +/* Allocate dirty bitmap resource */ > +static int bitmap_init(struct domain *d) This function name is far too generic. > +{ > + paddr_t gma_start = 0; > + paddr_t gma_end = 0; > + int nr_bytes; > + int nr_pages; > + int i; Truncation and unsigned issues. I will stop commenting on them now, but most of this patch needs fixing. > + > + domain_get_ram_range(d, &gma_start, &gma_end); > + > + nr_bytes = (PFN_DOWN(gma_end - gma_start) + 7) / 8; > + nr_pages = (nr_bytes + PAGE_SIZE - 1) / PAGE_SIZE; > + > + BUG_ON(nr_pages > MAX_DIRTY_BITMAP_PAGES); This looks like it should be an init() failure, or BUILD_BUG_ON(). > + > + for ( i = 0; i < nr_pages; ++i ) > + { > + struct page_info *page; > + page = alloc_domheap_page(NULL, 0); Those two lines can be combined, and needs a blank line following it. > + if ( page == NULL ) > + goto cleanup_on_failure; > + > + d->arch.dirty.bitmap[i] = > map_domain_page_global(__page_to_mfn(page)); __map_domain_page_global(page) is your friend, and it can fail so needs checking. How many pages is this? global dompage mapping are scarce. This function would become substantially more trivial if Xen had a zalloc_domheap_pages() helper. There is quite a bit of other code which could take advantage. > + clear_page(d->arch.dirty.bitmap[i]); > + } > + > + d->arch.dirty.bitmap_pages = nr_pages; > + return 0; > + > +cleanup_on_failure: > + nr_pages = i; > + for ( i = 0; i < nr_pages; ++i ) > + { > + unmap_domain_page_global(d->arch.dirty.bitmap[i]); > + } > + > + return -ENOMEM; > +} > + > +/* Cleanup dirty bitmap resource */ > +static void bitmap_cleanup(struct domain *d) > +{ > + int i; > + > + for ( i = 0; i < d->arch.dirty.bitmap_pages; ++i ) > + { > + unmap_domain_page_global(d->arch.dirty.bitmap[i]); > + } > +} > + > +/* Flush VLPT area */ > +static void vlpt_flush(struct domain *d) > +{ > + int flush_size; > + flush_size = (d->arch.dirty.second_lvl_end - > + d->arch.dirty.second_lvl_start) << SECOND_SHIFT; > + > + /* flushing the 3rd level mapping */ > + flush_xen_data_tlb_range_va(d->arch.dirty.second_lvl_start << > SECOND_SHIFT, > + flush_size); > +} > + > +/* Set up a page table for VLPT mapping */ > +static int vlpt_init(struct domain *d) > +{ > + uint64_t required, avail = VIRT_LIN_P2M_END - VIRT_LIN_P2M_START; > + int xen_second_linear_base; > + int gp2m_start_index, gp2m_end_index; > + struct p2m_domain *p2m = &d->arch.p2m; > + struct page_info *second_lvl_page; > + paddr_t gma_start = 0; > + paddr_t gma_end = 0; > + lpae_t *first[2]; > + int i; > + > + /* Check if reserved space is enough to cover guest physical address > space. > + * Note that each LPAE page table entry is 64-bit (8 bytes). So we only > + * shift left with LPAE_SHIFT instead of PAGE_SHIFT. */ > + domain_get_ram_range(d, &gma_start, &gma_end); > + required = (gma_end - gma_start) >> LPAE_SHIFT; > + if ( required > avail ) > + { > + dprintk(XENLOG_ERR, "Available VLPT is small for domU guest (avail: " > + "%#llx, required: %#llx)\n", (unsigned long long)avail, PRIx64 please, and loose the casts. > + (unsigned long long)required); > + return -ENOMEM; > + } > + > + /* Caulculate the base of 2nd linear table base for VIRT_LIN_P2M_START */ > + xen_second_linear_base = second_linear_offset(VIRT_LIN_P2M_START); > + > + gp2m_start_index = gma_start >> FIRST_SHIFT; > + gp2m_end_index = (gma_end >> FIRST_SHIFT) + 1; > + > + if ( xen_second_linear_base + gp2m_end_index >= LPAE_ENTRIES * 2 ) > + { > + dprintk(XENLOG_ERR, "xen second page is small for VLPT for domU"); > + return -ENOMEM; > + } > + > + /* Two pages are allocated to backup the related PTE content of guest > + * VM's 1st-level table. */ > + second_lvl_page = alloc_domheap_pages(NULL, 1, 0); > + if ( second_lvl_page == NULL ) > + return -ENOMEM; > + d->arch.dirty.second_lvl[0] = map_domain_page_global( > + page_to_mfn(second_lvl_page) ); > + d->arch.dirty.second_lvl[1] = map_domain_page_global( > + page_to_mfn(second_lvl_page+1) ); > + > + /* 1st level P2M of guest VM is 2 consecutive pages */ > + first[0] = __map_domain_page(p2m->first_level); > + first[1] = __map_domain_page(p2m->first_level+1); spaces around binary operators. > + > + for ( i = gp2m_start_index; i < gp2m_end_index; ++i ) > + { > + int k = i % LPAE_ENTRIES; > + int l = i / LPAE_ENTRIES; > + int k2 = (xen_second_linear_base + i) % LPAE_ENTRIES; > + int l2 = (xen_second_linear_base + i) / LPAE_ENTRIES; > + > + /* Update 2nd-level PTE of Xen linear table. With this, Xen linear > + * page table layout becomes: 1st Xen linear ==> 2nd Xen linear ==> > + * 2nd guest P2M (i.e. 3rd Xen linear) ==> 3rd guest P2M (i.e. Xen > + * linear content) for VIRT_LIN_P2M_START address space. */ > + write_pte(&xen_second[xen_second_linear_base+i], first[l][k]); > + > + /* We copy the mapping into domain's structure as a reference > + * in case of the context switch (used in vlpt_restore function ) */ > + d->arch.dirty.second_lvl[l2][k2] = first[l][k]; > + } > + unmap_domain_page(first[0]); > + unmap_domain_page(first[1]); > + > + /* storing the start and end index */ > + d->arch.dirty.second_lvl_start = xen_second_linear_base + > gp2m_start_index; > + d->arch.dirty.second_lvl_end = xen_second_linear_base + gp2m_end_index; > + > + vlpt_flush(d); > + > + return 0; > +} > + > +static void vlpt_cleanup(struct domain *d) > +{ > + /* First level p2m is 2 consecutive pages */ > + unmap_domain_page_global(d->arch.dirty.second_lvl[0]); > + unmap_domain_page_global(d->arch.dirty.second_lvl[1]); > +} > + > +/* Returns zero if addr is not valid or dirty mode is not set */ > +int handle_page_fault(struct domain *d, paddr_t addr) > +{ > + lpae_t *vlp2m_pte = 0; > + paddr_t gma_start = 0; > + paddr_t gma_end = 0; > + > + if ( !d->arch.dirty.mode ) > + return 0; > + > + domain_get_ram_range(d, &gma_start, &gma_end); > + > + /* Ensure that addr is inside guest's RAM */ > + if ( addr < gma_start || addr > gma_end ) > + return 0; > + > + vlp2m_pte = vlpt_get_3lvl_pte(addr); > + if ( vlp2m_pte->p2m.valid && vlp2m_pte->p2m.write == 0 && > + vlp2m_pte->p2m.type == p2m_ram_logdirty ) > + { > + lpae_t pte = *vlp2m_pte; > + pte.p2m.write = 1; > + write_pte(vlp2m_pte, pte); > + flush_tlb_local(); > + > + /* only necessary to lock between get-dirty bitmap and mark dirty > + * bitmap. If get-dirty bitmap happens immediately before this > + * lock, the corresponding dirty-page would be marked at the next > + * round of get-dirty bitmap */ > + spin_lock(&d->arch.dirty.lock); > + bitmap_mark_dirty(d, addr); > + spin_unlock(&d->arch.dirty.lock); > + } > + > + return 1; > +} > + > +/* Restore the xen page table for vlpt mapping for domain */ > +void log_dirty_restore(struct domain *d) > +{ > + int i; > + > + /* Nothing to do as log dirty mode is off */ > + if ( !(d->arch.dirty.mode) ) superfluous brackets. > + return; > + > + dsb(sy); > + > + for ( i = d->arch.dirty.second_lvl_start; i < > d->arch.dirty.second_lvl_end; > + ++i ) > + { > + int k = i % LPAE_ENTRIES; > + int l = i / LPAE_ENTRIES; > + > + if ( xen_second[i].bits != d->arch.dirty.second_lvl[l][k].bits ) > + { > + write_pte(&xen_second[i], d->arch.dirty.second_lvl[l][k]); > + flush_xen_data_tlb_range_va(i << SECOND_SHIFT, 1 << > SECOND_SHIFT); > + } > + } > + > + dsb(sy); > + isb(); > +} > + > +/* Turn on log dirty */ > +int log_dirty_on(struct domain *d) > +{ > + if ( vlpt_init(d) || bitmap_init(d) ) > + return -EINVAL; This hides -ENOMEM from each of the init functions. I am a fan of return vlpt_init(d) ?: bitmap_init(d); As an easy way of chaining a set of functions together if they succeed. Ian on the other hand isn't so I doubt you could get away with it. > + > + return 0; > +} > + > +/* Turn off log dirty */ > +void log_dirty_off(struct domain *d) > +{ > + bitmap_cleanup(d); > + vlpt_cleanup(d); > +} > + > +/* Initialize log dirty fields */ > +int log_dirty_init(struct domain *d) > +{ > + d->arch.dirty.count = 0; > + d->arch.dirty.mode = 0; > + spin_lock_init(&d->arch.dirty.lock); > + > + d->arch.dirty.second_lvl_start = 0; > + d->arch.dirty.second_lvl_end = 0; > + d->arch.dirty.second_lvl[0] = NULL; > + d->arch.dirty.second_lvl[1] = NULL; > + > + memset(d->arch.dirty.bitmap, 0, sizeof(d->arch.dirty.bitmap)); > + d->arch.dirty.bitmap_pages = 0; > + > + return 0; > +} > + > +/* Log dirty tear down */ > +void log_dirty_teardown(struct domain *d) > +{ > + return; > +} > + > /* > * Local variables: > * mode: C > diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c > index 603c097..0808cc9 100644 > --- a/xen/arch/arm/p2m.c > +++ b/xen/arch/arm/p2m.c > @@ -6,6 +6,8 @@ > #include <xen/bitops.h> > #include <asm/flushtlb.h> > #include <asm/gic.h> > +#include <xen/guest_access.h> > +#include <xen/pfn.h> > #include <asm/event.h> > #include <asm/hardirq.h> > #include <asm/page.h> > @@ -208,6 +210,7 @@ static lpae_t mfn_to_p2m_entry(unsigned long mfn, > unsigned int mattr, > break; > > case p2m_ram_ro: > + case p2m_ram_logdirty: > e.p2m.xn = 0; > e.p2m.write = 0; > break; > @@ -261,6 +264,10 @@ static int p2m_create_table(struct domain *d, > > pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid); > > + /* mark the write bit (page table's case, ro bit) as 0 > + * so, it is writable in case of vlpt access */ > + pte.pt.ro = 0; > + > write_pte(entry, pte); > > return 0; > @@ -696,6 +703,203 @@ unsigned long gmfn_to_mfn(struct domain *d, unsigned > long gpfn) > return p >> PAGE_SHIFT; > } > > +/* Change types across all p2m entries in a domain */ > +void p2m_change_entry_type_global(struct domain *d, enum mg nt) > +{ > + struct p2m_domain *p2m = &d->arch.p2m; > + paddr_t ram_base; > + int i1, i2, i3; > + int first_index, second_index, third_index; > + lpae_t *first = __map_domain_page(p2m->first_level); > + lpae_t pte, *second = NULL, *third = NULL; > + > + domain_get_ram_range(d, &ram_base, NULL); > + > + first_index = first_table_offset((uint64_t)ram_base); > + second_index = second_table_offset((uint64_t)ram_base); > + third_index = third_table_offset((uint64_t)ram_base); > + > + BUG_ON(!first); > + > + spin_lock(&p2m->lock); > + > + for ( i1 = first_index; i1 < LPAE_ENTRIES*2; ++i1 ) > + { > + lpae_walk_t first_pte = first[i1].walk; > + if ( !first_pte.valid || !first_pte.table ) > + goto out; > + > + second = map_domain_page(first_pte.base); > + BUG_ON(!second); map_domain_page() cant fail. > + > + for ( i2 = second_index; i2 < LPAE_ENTRIES; ++i2 ) > + { > + lpae_walk_t second_pte = second[i2].walk; > + > + if ( !second_pte.valid || !second_pte.table ) > + goto out; > + > + third = map_domain_page(second_pte.base); > + BUG_ON(!third); > + > + for ( i3 = third_index; i3 < LPAE_ENTRIES; ++i3 ) > + { > + lpae_walk_t third_pte = third[i3].walk; > + > + if ( !third_pte.valid ) > + goto out; > + > + pte = third[i3]; > + > + if ( nt == mg_ro ) > + { > + if ( pte.p2m.write == 1 ) > + { > + pte.p2m.write = 0; > + pte.p2m.type = p2m_ram_logdirty; > + } > + else > + { > + /* reuse avail bit as an indicator of 'actual' > + * read-only */ > + pte.p2m.type = p2m_ram_rw; > + } > + } > + else if ( nt == mg_rw ) > + { > + if ( pte.p2m.write == 0 && > + pte.p2m.type == p2m_ram_logdirty ) > + { > + pte.p2m.write = p2m_ram_rw; > + } > + } > + write_pte(&third[i3], pte); > + } > + unmap_domain_page(third); > + > + third = NULL; > + third_index = 0; > + } > + unmap_domain_page(second); > + > + second = NULL; > + second_index = 0; > + third_index = 0; > + } > + > +out: > + flush_tlb_all_local(); > + if ( third ) unmap_domain_page(third); > + if ( second ) unmap_domain_page(second); > + if ( first ) unmap_domain_page(first); > + > + spin_unlock(&p2m->lock); > +} > + > +/* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN, > + * clear the bitmap and stats. */ > +int log_dirty_op(struct domain *d, xen_domctl_shadow_op_t *sc) > +{ > + int peek = 1; > + int i; > + int bitmap_size; > + paddr_t gma_start, gma_end; > + > + /* this hypercall is called from domain 0, and we don't know which > guest's > + * vlpt is mapped in xen_second, so, to be sure, we restore vlpt here */ > + log_dirty_restore(d); > + > + domain_get_ram_range(d, &gma_start, &gma_end); > + bitmap_size = (gma_end - gma_start) / 8; > + > + if ( guest_handle_is_null(sc->dirty_bitmap) ) > + { > + peek = 0; > + } > + else > + { > + spin_lock(&d->arch.dirty.lock); > + > + for ( i = 0; i < d->arch.dirty.bitmap_pages; ++i ) > + { > + int j = 0; > + uint8_t *bitmap; > + > + copy_to_guest_offset(sc->dirty_bitmap, i * PAGE_SIZE, > + d->arch.dirty.bitmap[i], > + bitmap_size < PAGE_SIZE ? bitmap_size : > + PAGE_SIZE); > + bitmap_size -= PAGE_SIZE; > + > + /* set p2m page table read-only */ > + bitmap = d->arch.dirty.bitmap[i]; > + while ((j = find_next_bit((const long unsigned int *)bitmap, > + PAGE_SIZE*8, j)) < PAGE_SIZE*8) > + { > + lpae_t *vlpt; > + paddr_t addr = gma_start + (i << (2*PAGE_SHIFT+3)) + > + (j << PAGE_SHIFT); > + vlpt = vlpt_get_3lvl_pte(addr); > + vlpt->p2m.write = 0; > + j++; > + } > + } > + > + if ( sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN ) > + { > + for ( i = 0; i < d->arch.dirty.bitmap_pages; ++i ) > + { > + clear_page(d->arch.dirty.bitmap[i]); > + } > + } > + > + spin_unlock(&d->arch.dirty.lock); > + flush_tlb_local(); > + } > + > + sc->stats.dirty_count = d->arch.dirty.count; > + > + return 0; > +} > + > +long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc) > +{ > + long ret = 0; > + switch (sc->op) > + { > + case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY: > + case XEN_DOMCTL_SHADOW_OP_OFF: > + { > + enum mg nt = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? mg_rw : mg_ro; > + > + d->arch.dirty.mode = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? 0 : 1; > + p2m_change_entry_type_global(d, nt); > + > + if ( sc->op == XEN_DOMCTL_SHADOW_OP_OFF ) > + { > + log_dirty_off(d); > + } > + else > + { > + if ( (ret = log_dirty_on(d)) ) > + return ret; > + } > + } > + break; > + > + case XEN_DOMCTL_SHADOW_OP_CLEAN: > + case XEN_DOMCTL_SHADOW_OP_PEEK: > + { > + ret = log_dirty_op(d, sc); > + } > + break; > + > + default: > + return -ENOSYS; > + } > + return ret; > +} > + > /* > * Local variables: > * mode: C > diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c > index df4d375..b652565 100644 > --- a/xen/arch/arm/traps.c > +++ b/xen/arch/arm/traps.c > @@ -1556,6 +1556,8 @@ static void do_trap_data_abort_guest(struct > cpu_user_regs *regs, > struct hsr_dabt dabt = hsr.dabt; > int rc; > mmio_info_t info; > + int page_fault = ( dabt.write && ((dabt.dfsc & FSC_MASK) == > + (FSC_FLT_PERM|FSC_3RD_LEVEL)) ); This looks like a bool_t to me. ~Andrew > > if ( !check_conditional_instr(regs, hsr) ) > { > @@ -1577,6 +1579,13 @@ static void do_trap_data_abort_guest(struct > cpu_user_regs *regs, > if ( rc == -EFAULT ) > goto bad_data_abort; > > + /* domU page fault handling for guest live migration. Note that > + * dabt.valid can be 0 here */ > + if ( page_fault && handle_page_fault(current->domain, info.gpa) ) > + { > + /* Do not modify PC as guest needs to repeat memory operation */ > + return; > + } > /* XXX: Decode the instruction if ISS is not valid */ > if ( !dabt.valid ) > goto bad_data_abort; > diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h > index ef291ff..f18fae4 100644 > --- a/xen/include/asm-arm/config.h > +++ b/xen/include/asm-arm/config.h > @@ -87,6 +87,7 @@ > * 0 - 8M <COMMON> > * > * 32M - 128M Frametable: 24 bytes per page for 16GB of RAM > + * 128M - 256M Virtual-linear mapping to P2M table > * 256M - 1G VMAP: ioremap and early_ioremap use this virtual address > * space > * > @@ -124,13 +125,15 @@ > #define CONFIG_SEPARATE_XENHEAP 1 > > #define FRAMETABLE_VIRT_START _AT(vaddr_t,0x02000000) > -#define VMAP_VIRT_START _AT(vaddr_t,0x10000000) > +#define VIRT_LIN_P2M_START _AT(vaddr_t,0x08000000) > +#define VMAP_VIRT_START _AT(vaddr_t,0x10000000) > +#define VIRT_LIN_P2M_END VMAP_VIRT_START > #define XENHEAP_VIRT_START _AT(vaddr_t,0x40000000) > #define XENHEAP_VIRT_END _AT(vaddr_t,0x7fffffff) > #define DOMHEAP_VIRT_START _AT(vaddr_t,0x80000000) > #define DOMHEAP_VIRT_END _AT(vaddr_t,0xffffffff) > > -#define VMAP_VIRT_END XENHEAP_VIRT_START > +#define VMAP_VIRT_END XENHEAP_VIRT_START > > #define DOMHEAP_ENTRIES 1024 /* 1024 2MB mapping slots */ > > @@ -157,6 +160,11 @@ > > #define HYPERVISOR_VIRT_END DIRECTMAP_VIRT_END > > +/* Definition for VIRT_LIN_P2M_START and VIRT_LIN_P2M_END (64-bit) > + * TODO: Needs evaluation. */ > +#define VIRT_LIN_P2M_START _AT(vaddr_t, 0x08000000) > +#define VIRT_LIN_P2M_END VMAP_VIRT_START > + > #endif > > /* Fixmap slots */ > diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h > index aabeb51..ac82643 100644 > --- a/xen/include/asm-arm/domain.h > +++ b/xen/include/asm-arm/domain.h > @@ -162,6 +162,25 @@ struct arch_domain > } vuart; > > unsigned int evtchn_irq; > + > + /* dirty page tracing */ > + struct { > + spinlock_t lock; > + volatile int mode; /* 1 if dirty pages tracing enabled > */ > + volatile unsigned int count; /* dirty pages counter */ > + > + /* vlpt context switch */ > + volatile int second_lvl_start; /* start idx of virt linear space 2nd > */ > + volatile int second_lvl_end; /* end idx of virt linear space 2nd */ > + lpae_t *second_lvl[2]; /* copy of guest P2M 1st-lvl content > */ > + > + /* bitmap to track dirty pages */ > +#define MAX_DIRTY_BITMAP_PAGES 64 > + /* Because each bit represents a dirty page, the total supported > guest > + * memory is (64 entries x 4KB/entry x 8bits/byte x 4KB) = 8GB. */ > + uint8_t *bitmap[MAX_DIRTY_BITMAP_PAGES]; /* dirty bitmap */ > + int bitmap_pages; /* # of dirty bitmap pages > */ > + } dirty; > } __cacheline_aligned; > > struct arch_vcpu > diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h > index b8d4e7d..ab19025 100644 > --- a/xen/include/asm-arm/mm.h > +++ b/xen/include/asm-arm/mm.h > @@ -4,6 +4,7 @@ > #include <xen/config.h> > #include <xen/kernel.h> > #include <asm/page.h> > +#include <asm/config.h> > #include <public/xen.h> > > /* Align Xen to a 2 MiB boundary. */ > @@ -320,6 +321,7 @@ int donate_page( > #define domain_clamp_alloc_bitsize(d, b) (b) > > unsigned long domain_get_maximum_gpfn(struct domain *d); > +void domain_get_ram_range(struct domain *d, paddr_t *start, paddr_t *end); > > extern struct domain *dom_xen, *dom_io, *dom_cow; > > @@ -341,6 +343,27 @@ static inline void put_page_and_type(struct page_info > *page) > put_page(page); > } > > +enum mg { mg_clear, mg_ro, mg_rw, mg_rx }; > + > +/************************************/ > +/* Log-dirty support functions */ > +/************************************/ > +int log_dirty_on(struct domain *d); > +void log_dirty_off(struct domain *d); > +int log_dirty_init(struct domain *d); > +void log_dirty_teardown(struct domain *d); > +void log_dirty_restore(struct domain *d); > +int handle_page_fault(struct domain *d, paddr_t addr); > +/* access leaf PTE of a given guest address (GPA) */ > +static inline lpae_t * vlpt_get_3lvl_pte(paddr_t addr) > +{ > + lpae_t *table = (lpae_t *)VIRT_LIN_P2M_START; > + > + /* Since we slotted the guest's first p2m page table to xen's > + * second page table, one shift is enough for calculating the > + * index of guest p2m table entry */ > + return &table[addr >> PAGE_SHIFT]; > +} > #endif /* __ARCH_ARM_MM__ */ > /* > * Local variables: > diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h > index bd71abe..0cecbe7 100644 > --- a/xen/include/asm-arm/p2m.h > +++ b/xen/include/asm-arm/p2m.h > @@ -2,6 +2,7 @@ > #define _XEN_P2M_H > > #include <xen/mm.h> > +#include <public/domctl.h> > > struct domain; > > @@ -41,6 +42,7 @@ typedef enum { > p2m_invalid = 0, /* Nothing mapped here */ > p2m_ram_rw, /* Normal read/write guest RAM */ > p2m_ram_ro, /* Read-only; writes are silently dropped */ > + p2m_ram_logdirty, /* Read-only: special mode for log dirty */ > p2m_mmio_direct, /* Read/write mapping of genuine MMIO area */ > p2m_map_foreign, /* Ram pages from foreign domain */ > p2m_grant_map_rw, /* Read/write grant mapping */ > @@ -49,7 +51,8 @@ typedef enum { > } p2m_type_t; > > #define p2m_is_foreign(_t) ((_t) == p2m_map_foreign) > -#define p2m_is_ram(_t) ((_t) == p2m_ram_rw || (_t) == p2m_ram_ro) > +#define p2m_is_ram(_t) ((_t) == p2m_ram_rw || (_t) == p2m_ram_ro || \ > + (_t) == p2m_ram_logdirty) > > /* Initialise vmid allocator */ > void p2m_vmid_allocator_init(void); > @@ -178,6 +181,9 @@ static inline int get_page_and_type(struct page_info > *page, > return rc; > } > > +void p2m_change_entry_type_global(struct domain *d, enum mg nt); > +long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc); > + > #endif /* _XEN_P2M_H */ > > /* > diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h > index 750864a..0bf3d67 100644 > --- a/xen/include/asm-arm/processor.h > +++ b/xen/include/asm-arm/processor.h > @@ -407,6 +407,8 @@ union hsr { > #define FSC_CPR (0x3a) /* Coprocossor Abort */ > > #define FSC_LL_MASK (_AC(0x03,U)<<0) > +#define FSC_MASK (0x3f) /* Fault status mask */ > +#define FSC_3RD_LEVEL (0x03) /* Third level fault */ > > /* Time counter hypervisor control register */ > #define CNTHCTL_PA (1u<<0) /* Kernel/user access to physical counter */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |