[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v6 4/5] xen/arm: Implement hypercall for dirty page tracing
Add hypercall (shadow op: enable/disable and clean/peek dirtied page bitmap). It consists of two parts: dirty page detecting and saving. For detecting, we setup the guest p2m's leaf PTE read-only and whenever the guest tries to write something, permission fault happens and traps into xen. The permission-faulted GPA should be saved for the toolstack (when it wants to see which pages are dirtied). For this purpose, we temporarily save the GPAs into bitmap. Signed-off-by: Jaeyong Yoo <jaeyong.yoo@xxxxxxxxxxx> Signed-off-by: Junghyun Yoo <yjhyun.yoo@xxxxxxxxxxx> --- xen/arch/arm/domain.c | 3 + xen/arch/arm/domctl.c | 9 ++ xen/arch/arm/mm.c | 90 +++++++++++++++++++- xen/arch/arm/p2m.c | 195 +++++++++++++++++++++++++++++++++++++++++++ xen/arch/arm/traps.c | 18 ++++ xen/include/asm-arm/domain.h | 3 + xen/include/asm-arm/mm.h | 6 ++ xen/include/asm-arm/p2m.h | 7 +- 8 files changed, 329 insertions(+), 2 deletions(-) diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c index 4978765..6a0b36d 100644 --- a/xen/arch/arm/domain.c +++ b/xen/arch/arm/domain.c @@ -512,6 +512,9 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags) /* init for dirty-page tracing */ d->arch.dirty.mode = 0; + spin_lock_init(&d->arch.dirty.lock); + d->arch.dirty.bitmap = NULL; + d->arch.dirty.bitmap_nr_bytes = 0; d->arch.dirty.p2m_start_idx = 0; d->arch.dirty.p2m_end_idx = 0; diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c index 9f65442..054de16 100644 --- a/xen/arch/arm/domctl.c +++ b/xen/arch/arm/domctl.c @@ -107,6 +107,15 @@ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d, xfree(c.data); } break; + case XEN_DOMCTL_shadow_op: + { + domain_pause(d); + ret = dirty_mode_op(d, &domctl->u.shadow_op); + domain_unpause(d); + + copyback = 1; + } + break; default: return subarch_do_domctl(domctl, d, u_domctl); diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c index 0fc9d9a..238a15e 100644 --- a/xen/arch/arm/mm.c +++ b/xen/arch/arm/mm.c @@ -865,7 +865,6 @@ void destroy_xen_mappings(unsigned long v, unsigned long e) create_xen_entries(REMOVE, v, 0, (e - v) >> PAGE_SHIFT, 0); } -enum mg { mg_clear, mg_ro, mg_rw, mg_rx }; static void set_pte_flags_on_range(const char *p, unsigned long l, enum mg mg) { lpae_t pte; @@ -1328,6 +1327,95 @@ void cleanup_vlpt(struct domain *d) unmap_domain_page_global(d->arch.dirty.p2m_first[1]); } +static inline void mark_dirty_bitmap(struct domain *d, paddr_t addr) +{ + paddr_t ram_base = (paddr_t) GUEST_RAM_BASE; + int bit_index = PFN_DOWN(addr - ram_base); + + set_bit(bit_index, d->arch.dirty.bitmap); +} + +/* routine for dirty-page tracing + * + * On first write, it page faults, its entry is changed to read-write, + * and on retry the write succeeds. + * + * for locating p2m of the faulting entry, we use virtual-linear page table. + * returns zero if addr is not valid or dirty mode is not set + */ +int handle_page_fault(struct domain *d, paddr_t addr) +{ + struct p2m_domain *p2m = &d->arch.p2m; + lpae_t *vlp2m_pte = 0; + paddr_t gma_start = GUEST_RAM_BASE; + paddr_t gma_end = 0; + + if ( !d->arch.dirty.mode ) return 0; + gma_end = get_gma_end(d); + + /* Ensure that addr is inside guest's RAM */ + if ( addr < gma_start || + addr > gma_end ) return 0; + + spin_lock(&p2m->lock); + vlp2m_pte = get_vlpt_3lvl_pte(addr); + if ( vlp2m_pte->p2m.valid && vlp2m_pte->p2m.write == 0 && + vlp2m_pte->p2m.type == p2m_ram_logdirty ) + { + lpae_t pte = *vlp2m_pte; + pte.p2m.write = 1; + write_pte(vlp2m_pte, pte); + flush_tlb(); + spin_unlock(&p2m->lock); + + /* only necessary to lock between get-dirty bitmap and mark dirty + * bitmap. If get-dirty bitmap happens immediately before this + * lock, the corresponding dirty-page would be marked at the next + * round of get-dirty bitmap */ + spin_lock(&d->arch.dirty.lock); + mark_dirty_bitmap(d, addr); + spin_unlock(&d->arch.dirty.lock); + } + else + spin_unlock(&p2m->lock); + + return 1; +} + +int prepare_bitmap(struct domain *d) +{ + paddr_t gma_start = GUEST_RAM_BASE; + paddr_t gma_end = 0; + uint32_t nr_bytes, nr_pages, order; + + gma_end = get_gma_end(d); + + nr_bytes = (PFN_DOWN(gma_end - gma_start) + 7) / 8; + nr_pages = (nr_bytes + PAGE_SIZE - 1) / PAGE_SIZE; + order = get_order_from_pages(nr_pages); + + d->arch.dirty.bitmap = alloc_xenheap_pages(order, 0); + if ( d->arch.dirty.bitmap == NULL ) + return -ENOMEM; + + memset(d->arch.dirty.bitmap, 0, nr_bytes); + + d->arch.dirty.bitmap_nr_bytes = nr_bytes; + return 0; +} + +void cleanup_bitmap(struct domain *d) +{ + uint32_t nr_pages, order; + + nr_pages = (d->arch.dirty.bitmap_nr_bytes + PAGE_SIZE - 1) / PAGE_SIZE; + order = get_order_from_pages(nr_pages); + + free_xenheap_pages(d->arch.dirty.bitmap, order); + d->arch.dirty.bitmap = NULL; + d->arch.dirty.bitmap_nr_bytes = 0; +} + /* * Local variables: * mode: C diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c index 96bc0ef..b111452 100644 --- a/xen/arch/arm/p2m.c +++ b/xen/arch/arm/p2m.c @@ -4,6 +4,8 @@ #include <xen/errno.h> #include <xen/domain_page.h> #include <xen/bitops.h> +#include <xen/guest_access.h> +#include <xen/pfn.h> #include <asm/flushtlb.h> #include <asm/gic.h> #include <asm/event.h> @@ -223,6 +225,7 @@ static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned int mattr, break; case p2m_ram_ro: + case p2m_ram_logdirty: e.p2m.xn = 0; e.p2m.write = 0; break; @@ -284,6 +287,10 @@ static int p2m_create_table(struct domain *d, lpae_t *entry, bool_t flush_cache) pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid); + /* mark the write bit (page table's case, ro bit) as 0 + * so, it is writable in case of vlpt access */ + pte.pt.ro = 0; + p2m_write_pte(entry, pte, flush_cache); return 0; @@ -715,6 +722,194 @@ unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn) return p >> PAGE_SHIFT; } +/* Change types across all p2m entries in a domain */ +int p2m_change_entry_type_global(struct domain *d, enum mg nt) +{ + struct p2m_domain *p2m = &d->arch.p2m; + paddr_t ram_base = GUEST_RAM_BASE; + paddr_t ram_end; + paddr_t paddr; + int nr_pages; + int rc = -EFAULT, i; + unsigned long cur_first_offset = ~0, cur_second_offset = ~0; + lpae_t *first = NULL, *second = NULL, *third = NULL; + lpae_t pte; + + ram_end = get_gma_end(d); + paddr = ram_base; + nr_pages = (ram_end - ram_base) >> PAGE_SHIFT; + + spin_lock(&p2m->lock); + + first = __map_domain_page(p2m->first_level); + if ( !first || + !first[first_table_offset(paddr)].p2m.valid || + !first[first_table_offset(paddr)].p2m.table ) + goto err; + + for ( i = 0; i < nr_pages; ++i ) + { + if ( cur_first_offset != first_table_offset(paddr) ) + { + if ( second ) unmap_domain_page(second); + second = map_domain_page(first[first_table_offset(paddr)].p2m.base); + cur_first_offset = first_table_offset(paddr); + } + if ( !second || + !second[second_table_offset(paddr)].p2m.valid || + !second[second_table_offset(paddr)].p2m.table ) + goto err; + if ( cur_second_offset != second_table_offset(paddr) ) + { + if ( third ) unmap_domain_page(third); + third = map_domain_page(second[second_table_offset(paddr)].p2m.base); + cur_second_offset = second_table_offset(paddr); + } + if ( !third || + !third[third_table_offset(paddr)].p2m.valid ) + goto err; + + pte = third[third_table_offset(paddr)]; + + if ( nt == mg_ro ) + { + /* use avail-bit as a backup for write bit */ + if ( pte.p2m.write == 1 ) + { + pte.p2m.write = 0; + pte.p2m.type = p2m_ram_logdirty; + } + else + { + pte.p2m.type = p2m_ram_rw; + } + } + else if ( nt == mg_rw ) + { + /* restore the write bit */ + if ( pte.p2m.write == 0 && pte.p2m.type == p2m_ram_logdirty ) + { + pte.p2m.write = p2m_ram_rw; + } + } + + write_pte(&third[third_table_offset(paddr)], pte); + paddr += PAGE_SIZE; + } + + rc = 0; +err: + flush_tlb_all_local(); + if ( third ) unmap_domain_page(third); + if ( second ) unmap_domain_page(second); + if ( first ) unmap_domain_page(first); + spin_unlock(&p2m->lock); + return rc; +} + +/* Read a domain's log-dirty bitmap and stats. + * If the operation is a CLEAN, clear the bitmap and stats. */ +int log_dirty_op(struct domain *d, xen_domctl_shadow_op_t *sc) +{ + int bitmap_size; + paddr_t gma_start = GUEST_RAM_BASE, gma_end; + + /* this hypercall is called from domain 0, and we don't know which guest's + * vlpt is mapped in xen_second, so, to be sure, we restore vlpt here */ + restore_vlpt(d); + + gma_end = get_gma_end(d); + bitmap_size = (gma_end - gma_start) / 8; + + if ( guest_handle_is_null(sc->dirty_bitmap) ) + { + return -EINVAL; + } + else + { + uint32_t j = 0; + uint8_t *bitmap = d->arch.dirty.bitmap; + uint32_t nr_bytes = d->arch.dirty.bitmap_nr_bytes; + spin_lock(&d->arch.dirty.lock); + + if ( copy_to_guest_offset(sc->dirty_bitmap, 0, bitmap, nr_bytes) ) + { + return -EINVAL; + } + + dsb(sy); + while ((j = find_next_bit((const long unsigned int *)bitmap, + nr_bytes * 8, j)) < nr_bytes * 8) + { + lpae_t *vlpt, new_vlpt; + paddr_t addr = gma_start + (j << PAGE_SHIFT); + vlpt = get_vlpt_3lvl_pte(addr); + new_vlpt = *vlpt; + new_vlpt.p2m.write = 0; + __write_pte(vlpt, new_vlpt); + j++; + } + dsb(sy); + + if ( sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN ) + memset(bitmap, 0, nr_bytes); + + spin_unlock(&d->arch.dirty.lock); + flush_tlb_local(); + } + + sc->stats.dirty_count = 0; + + return 0; +} + +long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc) +{ + long ret = 0; + switch (sc->op) + { + case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY: + case XEN_DOMCTL_SHADOW_OP_OFF: + { + enum mg nt = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? mg_rw : mg_ro; + + d->arch.dirty.mode = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? 0 : 1; + if ( (ret = p2m_change_entry_type_global(d, nt)) ) + return ret; + + if ( sc->op == XEN_DOMCTL_SHADOW_OP_OFF ) + { + cleanup_vlpt(d); + cleanup_bitmap(d); + } + else + { + if ( (ret = prepare_vlpt(d)) ) + return ret; + + if ( (ret = prepare_bitmap(d)) ) + { + /* in case of failure, we have to cleanup vlpt */ + cleanup_vlpt(d); + return ret; + } + } + } + break; + + case XEN_DOMCTL_SHADOW_OP_CLEAN: + case XEN_DOMCTL_SHADOW_OP_PEEK: + { + ret = log_dirty_op(d, sc); + } + break; + + default: + return -ENOSYS; + } + return ret; +} + /* * Local variables: * mode: C diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c index 03a3da6..9b6d746 100644 --- a/xen/arch/arm/traps.c +++ b/xen/arch/arm/traps.c @@ -1603,6 +1603,13 @@ static void do_trap_instr_abort_guest(struct cpu_user_regs *regs, inject_iabt_exception(regs, addr, hsr.len); } +static inline int dabt_is_page_fault(struct hsr_dabt dabt) +{ + /* dabt.valid can be 0 here */ + return (dabt.dfsc & FSC_TYPE_MASK) == FSC_TYPE_FAULT && + (dabt.dfsc & FSC_LL_MASK) == 0x3 /* third level */; +} + static void do_trap_data_abort_guest(struct cpu_user_regs *regs, union hsr hsr) { @@ -1630,6 +1637,17 @@ static void do_trap_data_abort_guest(struct cpu_user_regs *regs, if ( rc == -EFAULT ) goto bad_data_abort; + /* domU page fault handling for guest live migration */ + if ( dabt_is_page_fault(dabt) ) + { + /* Do not advancey pc here for repeating memory operation in guest */ + if ( handle_page_fault(current->domain, info.gpa) ) return; + + /* handle_page_fault returns 0 means either dirty-page tracing is not + * yet started or 'real' permission fault happens. + * Then just fall through */ + } + /* XXX: Decode the instruction if ISS is not valid */ if ( !dabt.valid ) goto bad_data_abort; diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h index 9674175..75f3a57 100644 --- a/xen/include/asm-arm/domain.h +++ b/xen/include/asm-arm/domain.h @@ -171,6 +171,9 @@ struct arch_domain lpae_t *p2m_first[2]; /* copy of guest p2m's first */ int p2m_start_idx; /* start index of p2m_first */ int p2m_end_idx; /* end index of p2m_first */ + uint8_t *bitmap; /* dirty bitmap */ + uint32_t bitmap_nr_bytes; /* number of bytes for dirty bitmap */ + spinlock_t lock; /* protect list: head, mvn_head */ } dirty; } __cacheline_aligned; diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h index 7ceb568..90ece9a 100644 --- a/xen/include/asm-arm/mm.h +++ b/xen/include/asm-arm/mm.h @@ -344,6 +344,8 @@ static inline void put_page_and_type(struct page_info *page) void clear_and_clean_page(struct page_info *page); +enum mg { mg_clear, mg_ro, mg_rw, mg_rx }; + /* routine for dirty-page tracing */ #define VLPT_SIZE (1 << SECOND_SHIFT) #define VLPT_VA_TO_IDX(va) ((va - DOMHEAP_VIRT_START) >> SECOND_SHIFT) @@ -356,6 +358,10 @@ int prepare_vlpt(struct domain *d); void cleanup_vlpt(struct domain *d); void restore_vlpt(struct domain *d); +int handle_page_fault(struct domain *d, paddr_t addr); +int prepare_bitmap(struct domain *d); +void cleanup_bitmap(struct domain *d); + /* calculate the xen's virtual address for accessing the leaf PTE of * a given address (GPA) */ static inline lpae_t * get_vlpt_3lvl_pte(paddr_t addr) diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h index bd71abe..cc17ec3 100644 --- a/xen/include/asm-arm/p2m.h +++ b/xen/include/asm-arm/p2m.h @@ -2,6 +2,7 @@ #define _XEN_P2M_H #include <xen/mm.h> +#include <public/domctl.h> struct domain; @@ -41,6 +42,7 @@ typedef enum { p2m_invalid = 0, /* Nothing mapped here */ p2m_ram_rw, /* Normal read/write guest RAM */ p2m_ram_ro, /* Read-only; writes are silently dropped */ + p2m_ram_logdirty, /* Read-only; specisl mode for log dirty */ p2m_mmio_direct, /* Read/write mapping of genuine MMIO area */ p2m_map_foreign, /* Ram pages from foreign domain */ p2m_grant_map_rw, /* Read/write grant mapping */ @@ -49,7 +51,7 @@ typedef enum { } p2m_type_t; #define p2m_is_foreign(_t) ((_t) == p2m_map_foreign) -#define p2m_is_ram(_t) ((_t) == p2m_ram_rw || (_t) == p2m_ram_ro) +#define p2m_is_ram(_t) ((_t) == p2m_ram_rw || (_t) == p2m_ram_ro || (_t) == p2m_ram_logdirty ) /* Initialise vmid allocator */ void p2m_vmid_allocator_init(void); @@ -178,6 +180,9 @@ static inline int get_page_and_type(struct page_info *page, return rc; } +int p2m_change_entry_type_global(struct domain *d, enum mg nt); +long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc); + #endif /* _XEN_P2M_H */ /* -- 1.8.1.2 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |