Xen project Mailing List

[Xen-devel] [PATCH 3/6] x86/HVM: implement memory read caching

Emulation requiring device model assistance uses a form of instruction re-execution, assuming that the second (and any further) pass takes exactly the same path. This is a valid assumption as far use of CPU registers goes (as those can't change without any other instruction executing in between), but is wrong for memory accesses. In particular it has been observed that Windows might page out buffers underneath an instruction currently under emulation (hitting between two passes). If the first pass translated a linear address successfully, any subsequent pass needs to do so too, yielding the exact same translation. Introduce a cache (used just by guest page table accesses for now) to make sure above described assumption holds. This is a very simplistic implementation for now: Only exact matches are satisfied (no overlaps or partial reads or anything). Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -27,6 +27,18 @@ #include <asm/hvm/svm/svm.h> #include <asm/vm_event.h> +struct hvmemul_cache +{ + unsigned int max_ents; + unsigned int num_ents; + struct { + paddr_t gpa:PADDR_BITS; + unsigned int size:(BITS_PER_LONG - PADDR_BITS) / 2; + unsigned int level:(BITS_PER_LONG - PADDR_BITS) / 2; + unsigned long data; + } ents[]; +}; + static void hvmtrace_io_assist(const ioreq_t *p) { unsigned int size, event; @@ -520,7 +532,7 @@ static int hvmemul_do_mmio_addr(paddr_t */ static void *hvmemul_map_linear_addr( unsigned long linear, unsigned int bytes, uint32_t pfec, - struct hvm_emulate_ctxt *hvmemul_ctxt) + struct hvm_emulate_ctxt *hvmemul_ctxt, struct hvmemul_cache *cache) { struct vcpu *curr = current; void *err, *mapping; @@ -565,7 +577,7 @@ static void *hvmemul_map_linear_addr( ASSERT(mfn_x(*mfn) == 0); res = hvm_translate_get_page(curr, addr, true, pfec, - &pfinfo, &page, NULL, &p2mt); + &pfinfo, &page, NULL, &p2mt, cache); switch ( res ) { @@ -681,6 +693,8 @@ static int hvmemul_linear_to_phys( gfn_t gfn, ngfn; unsigned long done, todo, i, offset = addr & ~PAGE_MASK; int reverse; + struct hvmemul_cache *cache = pfec & PFEC_insn_fetch + ? NULL : curr->arch.hvm_vcpu.data_cache; /* * Clip repetitions to a sensible maximum. This avoids extensive looping in @@ -710,7 +724,7 @@ static int hvmemul_linear_to_phys( return rc; gfn = gaddr_to_gfn(gaddr); } - else if ( gfn_eq(gfn = paging_gla_to_gfn(curr, addr, &pfec, NULL), + else if ( gfn_eq(gfn = paging_gla_to_gfn(curr, addr, &pfec, cache), INVALID_GFN) ) { if ( pfec & (PFEC_page_paged | PFEC_page_shared) ) @@ -726,7 +740,7 @@ static int hvmemul_linear_to_phys( { /* Get the next PFN in the range. */ addr += reverse ? -PAGE_SIZE : PAGE_SIZE; - ngfn = paging_gla_to_gfn(curr, addr, &pfec, NULL); + ngfn = paging_gla_to_gfn(curr, addr, &pfec, cache); /* Is it contiguous with the preceding PFNs? If not then we're done. */ if ( gfn_eq(ngfn, INVALID_GFN) || @@ -1046,6 +1060,8 @@ static int __hvmemul_read( pfec |= PFEC_implicit; else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 ) pfec |= PFEC_user_mode; + if ( access_type == hvm_access_insn_fetch ) + pfec |= PFEC_insn_fetch; rc = hvmemul_virtual_to_linear( seg, offset, bytes, &reps, access_type, hvmemul_ctxt, &addr); @@ -1059,7 +1075,8 @@ static int __hvmemul_read( rc = ((access_type == hvm_access_insn_fetch) ? hvm_fetch_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo) : - hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo)); + hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo, + curr->arch.hvm_vcpu.data_cache)); switch ( rc ) { @@ -1178,7 +1195,8 @@ static int hvmemul_write( (vio->mmio_gla == (addr & PAGE_MASK)) ) return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec, hvmemul_ctxt, 1); - mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt); + mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt, + curr->arch.hvm_vcpu.data_cache); if ( IS_ERR(mapping) ) return ~PTR_ERR(mapping); @@ -1218,7 +1236,8 @@ static int hvmemul_rmw( else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 ) pfec |= PFEC_user_mode; - mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt); + mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt, + current->arch.hvm_vcpu.data_cache); if ( IS_ERR(mapping) ) return ~PTR_ERR(mapping); @@ -1375,7 +1394,8 @@ static int hvmemul_cmpxchg( else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 ) pfec |= PFEC_user_mode; - mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt); + mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt, + curr->arch.hvm_vcpu.data_cache); if ( IS_ERR(mapping) ) return ~PTR_ERR(mapping); @@ -2282,6 +2302,7 @@ static int _hvm_emulate_one(struct hvm_e { vio->mmio_cache_count = 0; vio->mmio_insn_bytes = 0; + curr->arch.hvm_vcpu.data_cache->num_ents = 0; } else { @@ -2572,9 +2593,35 @@ void hvm_dump_emulation_state(const char hvmemul_ctxt->insn_buf); } +struct hvmemul_cache *hvmemul_cache_init(unsigned int nents) +{ + struct hvmemul_cache *cache = xmalloc_bytes(offsetof(struct hvmemul_cache, + ents[nents])); + + if ( cache ) + { + cache->num_ents = 0; + cache->max_ents = nents; + } + + return cache; +} + bool hvmemul_read_cache(const struct hvmemul_cache *cache, paddr_t gpa, unsigned int level, void *buffer, unsigned int size) { + unsigned int i; + + ASSERT(size <= sizeof(cache->ents->data)); + + for ( i = 0; i < cache->num_ents; ++i ) + if ( cache->ents[i].level == level && cache->ents[i].gpa == gpa && + cache->ents[i].size == size ) + { + memcpy(buffer, &cache->ents[i].data, size); + return true; + } + return false; } @@ -2582,6 +2629,35 @@ void hvmemul_write_cache(struct hvmemul_ unsigned int level, const void *buffer, unsigned int size) { + unsigned int i; + + if ( size > sizeof(cache->ents->data) ) + { + ASSERT_UNREACHABLE(); + return; + } + + for ( i = 0; i < cache->num_ents; ++i ) + if ( cache->ents[i].level == level && cache->ents[i].gpa == gpa && + cache->ents[i].size == size ) + { + memcpy(&cache->ents[i].data, buffer, size); + return; + } + + if ( unlikely(i >= cache->max_ents) ) + { + ASSERT_UNREACHABLE(); + return; + } + + cache->ents[i].level = level; + cache->ents[i].gpa = gpa; + cache->ents[i].size = size; + + memcpy(&cache->ents[i].data, buffer, size); + + cache->num_ents = i + 1; } /* --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1530,6 +1530,18 @@ int hvm_vcpu_initialise(struct vcpu *v) v->arch.hvm_vcpu.inject_event.vector = HVM_EVENT_VECTOR_UNSET; + /* + * Leaving aside the insn fetch, for which we don't use this cache, no + * insn can access more than 8 independent linear addresses (AVX2 + * gathers being the worst). Each such linear range can span a page + * boundary, i.e. require two page walks. + */ + v->arch.hvm_vcpu.data_cache = hvmemul_cache_init(CONFIG_PAGING_LEVELS * + 8 * 2); + rc = -ENOMEM; + if ( !v->arch.hvm_vcpu.data_cache ) + goto fail4; + rc = setup_compat_arg_xlat(v); /* teardown: free_compat_arg_xlat() */ if ( rc != 0 ) goto fail4; @@ -1559,6 +1571,7 @@ int hvm_vcpu_initialise(struct vcpu *v) fail5: free_compat_arg_xlat(v); fail4: + hvmemul_cache_destroy(v->arch.hvm_vcpu.data_cache); hvm_funcs.vcpu_destroy(v); fail3: vlapic_destroy(v); @@ -1581,6 +1594,8 @@ void hvm_vcpu_destroy(struct vcpu *v) free_compat_arg_xlat(v); + hvmemul_cache_destroy(v->arch.hvm_vcpu.data_cache); + tasklet_kill(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet); hvm_funcs.vcpu_destroy(v); @@ -2949,7 +2964,7 @@ void hvm_task_switch( } rc = hvm_copy_from_guest_linear( - &tss, prev_tr.base, sizeof(tss), PFEC_page_present, &pfinfo); + &tss, prev_tr.base, sizeof(tss), PFEC_page_present, &pfinfo, NULL); if ( rc == HVMTRANS_bad_linear_to_gfn ) hvm_inject_page_fault(pfinfo.ec, pfinfo.linear); if ( rc != HVMTRANS_okay ) @@ -2996,7 +3011,7 @@ void hvm_task_switch( goto out; rc = hvm_copy_from_guest_linear( - &tss, tr.base, sizeof(tss), PFEC_page_present, &pfinfo); + &tss, tr.base, sizeof(tss), PFEC_page_present, &pfinfo, NULL); if ( rc == HVMTRANS_bad_linear_to_gfn ) hvm_inject_page_fault(pfinfo.ec, pfinfo.linear); /* @@ -3107,7 +3122,7 @@ void hvm_task_switch( enum hvm_translation_result hvm_translate_get_page( struct vcpu *v, unsigned long addr, bool linear, uint32_t pfec, pagefault_info_t *pfinfo, struct page_info **page_p, - gfn_t *gfn_p, p2m_type_t *p2mt_p) + gfn_t *gfn_p, p2m_type_t *p2mt_p, struct hvmemul_cache *cache) { struct page_info *page; p2m_type_t p2mt; @@ -3115,7 +3130,7 @@ enum hvm_translation_result hvm_translat if ( linear ) { - gfn = paging_gla_to_gfn(v, addr, &pfec, NULL); + gfn = paging_gla_to_gfn(v, addr, &pfec, cache); if ( gfn_eq(gfn, INVALID_GFN) ) { @@ -3187,7 +3202,7 @@ enum hvm_translation_result hvm_translat #define HVMCOPY_linear (1u<<2) static enum hvm_translation_result __hvm_copy( void *buf, paddr_t addr, int size, struct vcpu *v, unsigned int flags, - uint32_t pfec, pagefault_info_t *pfinfo) + uint32_t pfec, pagefault_info_t *pfinfo, struct hvmemul_cache *cache) { gfn_t gfn; struct page_info *page; @@ -3220,8 +3235,8 @@ static enum hvm_translation_result __hvm count = min_t(int, PAGE_SIZE - gpa, todo); - res = hvm_translate_get_page(v, addr, flags & HVMCOPY_linear, - pfec, pfinfo, &page, &gfn, &p2mt); + res = hvm_translate_get_page(v, addr, flags & HVMCOPY_linear, pfec, + pfinfo, &page, &gfn, &p2mt, cache); if ( res != HVMTRANS_okay ) return res; @@ -3268,14 +3283,14 @@ enum hvm_translation_result hvm_copy_to_ paddr_t paddr, void *buf, int size, struct vcpu *v) { return __hvm_copy(buf, paddr, size, v, - HVMCOPY_to_guest | HVMCOPY_phys, 0, NULL); + HVMCOPY_to_guest | HVMCOPY_phys, 0, NULL, NULL); } enum hvm_translation_result hvm_copy_from_guest_phys( void *buf, paddr_t paddr, int size) { return __hvm_copy(buf, paddr, size, current, - HVMCOPY_from_guest | HVMCOPY_phys, 0, NULL); + HVMCOPY_from_guest | HVMCOPY_phys, 0, NULL, NULL); } enum hvm_translation_result hvm_copy_to_guest_linear( @@ -3284,16 +3299,17 @@ enum hvm_translation_result hvm_copy_to_ { return __hvm_copy(buf, addr, size, current, HVMCOPY_to_guest | HVMCOPY_linear, - PFEC_page_present | PFEC_write_access | pfec, pfinfo); + PFEC_page_present | PFEC_write_access | pfec, + pfinfo, NULL); } enum hvm_translation_result hvm_copy_from_guest_linear( void *buf, unsigned long addr, int size, uint32_t pfec, - pagefault_info_t *pfinfo) + pagefault_info_t *pfinfo, struct hvmemul_cache *cache) { return __hvm_copy(buf, addr, size, current, HVMCOPY_from_guest | HVMCOPY_linear, - PFEC_page_present | pfec, pfinfo); + PFEC_page_present | pfec, pfinfo, cache); } enum hvm_translation_result hvm_fetch_from_guest_linear( @@ -3302,7 +3318,7 @@ enum hvm_translation_result hvm_fetch_fr { return __hvm_copy(buf, addr, size, current, HVMCOPY_from_guest | HVMCOPY_linear, - PFEC_page_present | PFEC_insn_fetch | pfec, pfinfo); + PFEC_page_present | PFEC_insn_fetch | pfec, pfinfo, NULL); } unsigned long copy_to_user_hvm(void *to, const void *from, unsigned int len) @@ -3343,7 +3359,8 @@ unsigned long copy_from_user_hvm(void *t return 0; } - rc = hvm_copy_from_guest_linear(to, (unsigned long)from, len, 0, NULL); + rc = hvm_copy_from_guest_linear(to, (unsigned long)from, len, + 0, NULL, NULL); return rc ? len : 0; /* fake a copy_from_user() return code */ } --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -1358,7 +1358,7 @@ static void svm_emul_swint_injection(str goto raise_exception; rc = hvm_copy_from_guest_linear(&idte, idte_linear_addr, idte_size, - PFEC_implicit, &pfinfo); + PFEC_implicit, &pfinfo, NULL); if ( rc ) { if ( rc == HVMTRANS_bad_linear_to_gfn ) --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -475,7 +475,7 @@ static int decode_vmx_inst(struct cpu_us { pagefault_info_t pfinfo; int rc = hvm_copy_from_guest_linear(poperandS, base, size, - 0, &pfinfo); + 0, &pfinfo, NULL); if ( rc == HVMTRANS_bad_linear_to_gfn ) hvm_inject_page_fault(pfinfo.ec, pfinfo.linear); --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -206,7 +206,7 @@ hvm_read(enum x86_segment seg, if ( access_type == hvm_access_insn_fetch ) rc = hvm_fetch_from_guest_linear(p_data, addr, bytes, 0, &pfinfo); else - rc = hvm_copy_from_guest_linear(p_data, addr, bytes, 0, &pfinfo); + rc = hvm_copy_from_guest_linear(p_data, addr, bytes, 0, &pfinfo, NULL); switch ( rc ) { --- a/xen/include/asm-x86/hvm/emulate.h +++ b/xen/include/asm-x86/hvm/emulate.h @@ -99,6 +99,11 @@ int hvmemul_do_pio_buffer(uint16_t port, void *buffer); struct hvmemul_cache; +struct hvmemul_cache *hvmemul_cache_init(unsigned int nents); +static inline void hvmemul_cache_destroy(struct hvmemul_cache *cache) +{ + xfree(cache); +} bool hvmemul_read_cache(const struct hvmemul_cache *, paddr_t gpa, unsigned int level, void *buffer, unsigned int size); void hvmemul_write_cache(struct hvmemul_cache *, paddr_t gpa, --- a/xen/include/asm-x86/hvm/support.h +++ b/xen/include/asm-x86/hvm/support.h @@ -99,7 +99,7 @@ enum hvm_translation_result hvm_copy_to_ pagefault_info_t *pfinfo); enum hvm_translation_result hvm_copy_from_guest_linear( void *buf, unsigned long addr, int size, uint32_t pfec, - pagefault_info_t *pfinfo); + pagefault_info_t *pfinfo, struct hvmemul_cache *cache); enum hvm_translation_result hvm_fetch_from_guest_linear( void *buf, unsigned long addr, int size, uint32_t pfec, pagefault_info_t *pfinfo); @@ -113,7 +113,7 @@ enum hvm_translation_result hvm_fetch_fr enum hvm_translation_result hvm_translate_get_page( struct vcpu *v, unsigned long addr, bool linear, uint32_t pfec, pagefault_info_t *pfinfo, struct page_info **page_p, - gfn_t *gfn_p, p2m_type_t *p2mt_p); + gfn_t *gfn_p, p2m_type_t *p2mt_p, struct hvmemul_cache *cache); #define HVM_HCALL_completed 0 /* hypercall completed - no further action */ #define HVM_HCALL_preempted 1 /* hypercall preempted - re-execute VMCALL */ --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -53,8 +53,6 @@ struct hvm_mmio_cache { uint8_t buffer[32]; }; -struct hvmemul_cache; - struct hvm_vcpu_io { /* I/O request in flight to device model. */ enum hvm_io_completion io_completion; @@ -200,6 +198,7 @@ struct hvm_vcpu { u8 cache_mode; struct hvm_vcpu_io hvm_io; + struct hvmemul_cache *data_cache; /* Pending hw/sw interrupt (.vector = -1 means nothing pending). */ struct x86_event inject_event; _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.