x86/HVM: correct page dirty marking in hvm_map_guest_frame_rw() Rather than dirtying a page when establishing a (permanent) mapping, dirty it when the page gets unmapped, or - if still mapped - on the final iteration of a save operation (or in other cases where the guest is paused or already shut down). (Transient mappings continue to get dirtied upon getting mapped, to avoid the overhead of tracking.) Signed-off-by: Jan Beulich --- v3: Introduce XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL. v2: Refine predicate for calling hvm_mapped_guest_frames_mark_dirty() (now including all shut down domains as well as tool stack paused ones). --- a/tools/libxc/xc_sr_save.c +++ b/tools/libxc/xc_sr_save.c @@ -537,7 +537,8 @@ static int suspend_and_send_dirty(struct if ( xc_shadow_control( xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN, HYPERCALL_BUFFER(dirty_bitmap), ctx->save.p2m_size, - NULL, 0, &stats) != ctx->save.p2m_size ) + NULL, XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL, &stats) != + ctx->save.p2m_size ) { PERROR("Failed to retrieve logdirty bitmap"); rc = -1; --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1565,6 +1565,8 @@ int hvm_domain_initialise(struct domain INIT_LIST_HEAD(&d->arch.hvm_domain.ioreq_server.list); spin_lock_init(&d->arch.hvm_domain.irq_lock); spin_lock_init(&d->arch.hvm_domain.uc_lock); + spin_lock_init(&d->arch.hvm_domain.write_map.lock); + INIT_LIST_HEAD(&d->arch.hvm_domain.write_map.list); hvm_init_cacheattr_region_list(d); @@ -3677,6 +3679,11 @@ int hvm_virtual_to_linear_addr( return 1; } +struct hvm_write_map { + struct list_head list; + struct page_info *page; +}; + /* On non-NULL return, we leave this function holding an additional * ref on the underlying mfn, if any */ static void *_hvm_map_guest_frame(unsigned long gfn, bool_t permanent, @@ -3704,15 +3711,30 @@ static void *_hvm_map_guest_frame(unsign if ( writable ) { - if ( !p2m_is_discard_write(p2mt) ) - paging_mark_dirty(d, page_to_mfn(page)); - else + if ( unlikely(p2m_is_discard_write(p2mt)) ) *writable = 0; + else if ( !permanent ) + paging_mark_dirty(d, page_to_mfn(page)); } if ( !permanent ) return __map_domain_page(page); + if ( writable && *writable ) + { + struct hvm_write_map *track = xmalloc(struct hvm_write_map); + + if ( !track ) + { + put_page(page); + return NULL; + } + track->page = page; + spin_lock(&d->arch.hvm_domain.write_map.lock); + list_add_tail(&track->list, &d->arch.hvm_domain.write_map.list); + spin_unlock(&d->arch.hvm_domain.write_map.lock); + } + map = __map_domain_page_global(page); if ( !map ) put_page(page); @@ -3735,18 +3757,45 @@ void *hvm_map_guest_frame_ro(unsigned lo void hvm_unmap_guest_frame(void *p, bool_t permanent) { unsigned long mfn; + struct page_info *page; if ( !p ) return; mfn = domain_page_map_to_mfn(p); + page = mfn_to_page(mfn); if ( !permanent ) unmap_domain_page(p); else + { + struct domain *d = page_get_owner(page); + struct hvm_write_map *track; + unmap_domain_page_global(p); + spin_lock(&d->arch.hvm_domain.write_map.lock); + list_for_each_entry(track, &d->arch.hvm_domain.write_map.list, list) + if ( track->page == page ) + { + paging_mark_dirty(d, mfn); + list_del(&track->list); + xfree(track); + break; + } + spin_unlock(&d->arch.hvm_domain.write_map.lock); + } + + put_page(page); +} + +void hvm_mapped_guest_frames_mark_dirty(struct domain *d) +{ + struct hvm_write_map *track; - put_page(mfn_to_page(mfn)); + spin_lock(&d->arch.hvm_domain.write_map.lock); + list_for_each_entry(track, &d->arch.hvm_domain.write_map.list, list) + paging_mark_dirty(d, page_to_mfn(track->page)); + spin_unlock(&d->arch.hvm_domain.write_map.lock); } static void *hvm_map_entry(unsigned long va, bool_t *writable) --- a/xen/arch/x86/mm/paging.c +++ b/xen/arch/x86/mm/paging.c @@ -29,6 +29,7 @@ #include #include #include +#include /* SHUTDOWN_suspend */ #include "mm-locks.h" @@ -420,6 +421,13 @@ static int paging_log_dirty_op(struct do if ( !resuming ) { + /* + * Mark dirty all currently write-mapped pages on e.g. the + * final iteration of a save operation. + */ + if ( sc->mode & XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL ) + hvm_mapped_guest_frames_mark_dirty(d); + domain_pause(d); /* @@ -742,6 +750,8 @@ int paging_domctl(struct domain *d, xen_ case XEN_DOMCTL_SHADOW_OP_CLEAN: case XEN_DOMCTL_SHADOW_OP_PEEK: + if ( sc->mode & ~XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL ) + return -EINVAL; return paging_log_dirty_op(d, sc, resuming); } --- a/xen/include/asm-x86/hvm/domain.h +++ b/xen/include/asm-x86/hvm/domain.h @@ -145,6 +145,12 @@ struct hvm_domain { unsigned long *io_bitmap; + /* List of permanently write-mapped pages. */ + struct { + spinlock_t lock; + struct list_head list; + } write_map; + union { struct vmx_domain vmx; struct svm_domain svm; --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -445,6 +445,7 @@ void *hvm_map_guest_frame_rw(unsigned lo bool_t *writable); void *hvm_map_guest_frame_ro(unsigned long gfn, bool_t permanent); void hvm_unmap_guest_frame(void *p, bool_t permanent); +void hvm_mapped_guest_frames_mark_dirty(struct domain *); static inline void hvm_set_info_guest(struct vcpu *v) { --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -208,6 +208,13 @@ struct xen_domctl_getpageframeinfo3 { */ #define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL (1 << 4) +/* Mode flags for XEN_DOMCTL_SHADOW_OP_{CLEAN,PEEK}. */ + /* + * This is the final iteration: Requesting to include pages mapped + * writably by the hypervisor in the dirty bitmap. + */ +#define XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL (1 << 0) + struct xen_domctl_shadow_op_stats { uint32_t fault_count; uint32_t dirty_count; @@ -219,8 +226,9 @@ struct xen_domctl_shadow_op { /* IN variables. */ uint32_t op; /* XEN_DOMCTL_SHADOW_OP_* */ - /* OP_ENABLE */ - uint32_t mode; /* XEN_DOMCTL_SHADOW_ENABLE_* */ + /* OP_ENABLE: XEN_DOMCTL_SHADOW_ENABLE_* */ + /* OP_PEAK / OP_CLEAN: XEN_DOMCTL_SHADOW_LOGDIRTY_* */ + uint32_t mode; /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */ uint32_t mb; /* Shadow memory allocation in MB */