x86emul: correctly handle CMPXCHG* comparison failures If the ->cmpxchg() hook finds a mismatch, we should deal with this the same as when the "manual" comparison reports a mismatch. This involves reverting bfce0e62c3 ("x86/emul: Drop X86EMUL_CMPXCHG_FAILED"), albeit with X86EMUL_CMPXCHG_FAILED now becoming a value distinct from X86EMUL_RETRY. In order to not leave mixed code also fully switch affected functions from paddr_t to intpte_t. Signed-off-by: Jan Beulich --- The code could be further simplified if we could rely on all ->cmpxchg() hooks always using CMPXCHG, but for now we need to cope with them using plain write (and hence accept the double reads if CMPXCHG is actually being used). Note that the patch doesn't address the incorrectness of there not being a memory write even in the comparison-failed case. --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -5236,16 +5236,17 @@ static int ptwr_emulated_read( static int ptwr_emulated_update( unsigned long addr, - paddr_t old, - paddr_t val, + intpte_t *p_old, + intpte_t val, unsigned int bytes, - unsigned int do_cmpxchg, struct ptwr_emulate_ctxt *ptwr_ctxt) { unsigned long mfn; unsigned long unaligned_addr = addr; struct page_info *page; l1_pgentry_t pte, ol1e, nl1e, *pl1e; + intpte_t old = p_old ? *p_old : 0; + unsigned int offset = 0; struct vcpu *v = current; struct domain *d = v->domain; int ret; @@ -5259,28 +5260,30 @@ static int ptwr_emulated_update( } /* Turn a sub-word access into a full-word access. */ - if ( bytes != sizeof(paddr_t) ) + if ( bytes != sizeof(val) ) { - paddr_t full; - unsigned int rc, offset = addr & (sizeof(paddr_t)-1); + intpte_t full; + unsigned int rc; + + offset = addr & (sizeof(full) - 1); /* Align address; read full word. */ - addr &= ~(sizeof(paddr_t)-1); - if ( (rc = copy_from_user(&full, (void *)addr, sizeof(paddr_t))) != 0 ) + addr &= ~(sizeof(full) - 1); + if ( (rc = copy_from_user(&full, (void *)addr, sizeof(full))) != 0 ) { x86_emul_pagefault(0, /* Read fault. */ - addr + sizeof(paddr_t) - rc, + addr + sizeof(full) - rc, &ptwr_ctxt->ctxt); return X86EMUL_EXCEPTION; } /* Mask out bits provided by caller. */ - full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8)); + full &= ~((((intpte_t)1 << (bytes * 8)) - 1) << (offset * 8)); /* Shift the caller value and OR in the missing bits. */ - val &= (((paddr_t)1 << (bytes*8)) - 1); + val &= (((intpte_t)1 << (bytes * 8)) - 1); val <<= (offset)*8; val |= full; /* Also fill in missing parts of the cmpxchg old value. */ - old &= (((paddr_t)1 << (bytes*8)) - 1); + old &= (((intpte_t)1 << (bytes * 8)) - 1); old <<= (offset)*8; old |= full; } @@ -5302,7 +5305,7 @@ static int ptwr_emulated_update( { default: if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) && - !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) + !p_old && (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) { /* * If this is an upper-half write to a PAE PTE then we assume that @@ -5333,21 +5336,26 @@ static int ptwr_emulated_update( /* Checked successfully: do the update (write or cmpxchg). */ pl1e = map_domain_page(_mfn(mfn)); pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK)); - if ( do_cmpxchg ) + if ( p_old ) { - int okay; - intpte_t t = old; ol1e = l1e_from_intpte(old); - okay = paging_cmpxchg_guest_entry(v, &l1e_get_intpte(*pl1e), - &t, l1e_get_intpte(nl1e), _mfn(mfn)); - okay = (okay && t == old); + if ( !paging_cmpxchg_guest_entry(v, &l1e_get_intpte(*pl1e), + &old, l1e_get_intpte(nl1e), _mfn(mfn)) ) + ret = X86EMUL_UNHANDLEABLE; + else if ( l1e_get_intpte(ol1e) == old ) + ret = X86EMUL_OKAY; + else + { + *p_old = old >> (offset * 8); + ret = X86EMUL_CMPXCHG_FAILED; + } - if ( !okay ) + if ( ret != X86EMUL_OKAY ) { unmap_domain_page(pl1e); put_page_from_l1e(nl1e, d); - return X86EMUL_RETRY; + return ret; } } else @@ -5374,9 +5382,9 @@ static int ptwr_emulated_write( unsigned int bytes, struct x86_emulate_ctxt *ctxt) { - paddr_t val = 0; + intpte_t val = 0; - if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes - 1)) || !bytes ) + if ( (bytes > sizeof(val)) || (bytes & (bytes - 1)) || !bytes ) { MEM_LOG("ptwr_emulate: bad write size (addr=%lx, bytes=%u)", offset, bytes); @@ -5385,9 +5393,9 @@ static int ptwr_emulated_write( memcpy(&val, p_data, bytes); - return ptwr_emulated_update( - offset, 0, val, bytes, 0, - container_of(ctxt, struct ptwr_emulate_ctxt, ctxt)); + return ptwr_emulated_update(offset, NULL, val, bytes, + container_of(ctxt, struct ptwr_emulate_ctxt, + ctxt)); } static int ptwr_emulated_cmpxchg( @@ -5398,21 +5406,20 @@ static int ptwr_emulated_cmpxchg( unsigned int bytes, struct x86_emulate_ctxt *ctxt) { - paddr_t old = 0, new = 0; + intpte_t new = 0; - if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes -1)) ) + if ( (bytes > sizeof(new)) || (bytes & (bytes -1)) ) { MEM_LOG("ptwr_emulate: bad cmpxchg size (addr=%lx, bytes=%u)", offset, bytes); return X86EMUL_UNHANDLEABLE; } - memcpy(&old, p_old, bytes); memcpy(&new, p_new, bytes); - return ptwr_emulated_update( - offset, old, new, bytes, 1, - container_of(ctxt, struct ptwr_emulate_ctxt, ctxt)); + return ptwr_emulated_update(offset, p_old, new, bytes, + container_of(ctxt, struct ptwr_emulate_ctxt, + ctxt)); } static int pv_emul_is_mem_write(const struct x86_emulate_state *state, --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -285,7 +285,7 @@ hvm_emulate_cmpxchg(enum x86_segment seg struct sh_emulate_ctxt *sh_ctxt = container_of(ctxt, struct sh_emulate_ctxt, ctxt); struct vcpu *v = current; - unsigned long addr, old, new; + unsigned long addr, new = 0; int rc; if ( bytes > sizeof(long) ) @@ -296,12 +296,10 @@ hvm_emulate_cmpxchg(enum x86_segment seg if ( rc ) return rc; - old = new = 0; - memcpy(&old, p_old, bytes); memcpy(&new, p_new, bytes); return v->arch.paging.mode->shadow.x86_emulate_cmpxchg( - v, addr, old, new, bytes, sh_ctxt); + v, addr, p_old, new, bytes, sh_ctxt); } static const struct x86_emulate_ops hvm_shadow_emulator_ops = { --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -4755,11 +4755,11 @@ sh_x86_emulate_write(struct vcpu *v, uns static int sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr, - unsigned long old, unsigned long new, - unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt) + unsigned long *p_old, unsigned long new, + unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt) { void *addr; - unsigned long prev; + unsigned long prev, old = *p_old; int rv = X86EMUL_OKAY; /* Unaligned writes are only acceptable on HVM */ @@ -4783,7 +4783,10 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u } if ( prev != old ) - rv = X86EMUL_RETRY; + { + *p_old = prev; + rv = X86EMUL_CMPXCHG_FAILED; + } SHADOW_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx" " wanted %#lx now %#lx bytes %u\n", --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1880,6 +1880,9 @@ protmode_load_seg( default: return rc; + + case X86EMUL_CMPXCHG_FAILED: + return X86EMUL_RETRY; } /* Force the Accessed flag in our local copy. */ @@ -6702,6 +6705,7 @@ x86_emulate( break; case X86EMUL_OPC(0x0f, 0xb0): case X86EMUL_OPC(0x0f, 0xb1): /* cmpxchg */ + fail_if(!ops->cmpxchg); /* Save real source value, then compare EAX against destination. */ src.orig_val = src.val; src.val = _regs.r(ax); @@ -6710,8 +6714,17 @@ x86_emulate( if ( _regs.eflags & X86_EFLAGS_ZF ) { /* Success: write back to memory. */ - dst.val = src.orig_val; + dst.val = src.val; + rc = ops->cmpxchg(dst.mem.seg, dst.mem.off, &dst.val, + &src.orig_val, dst.bytes, ctxt); + if ( rc == X86EMUL_CMPXCHG_FAILED ) + { + _regs.eflags &= ~X86_EFLAGS_ZF; + rc = X86EMUL_OKAY; + } } + if ( _regs.eflags & X86_EFLAGS_ZF ) + dst.type = OP_NONE; else { /* Failure: write the value we saw to EAX. */ @@ -7016,6 +7029,7 @@ x86_emulate( if ( memcmp(old, aux, op_bytes) ) { + cmpxchgNb_failed: /* Expected != actual: store actual to rDX:rAX and clear ZF. */ _regs.r(ax) = !(rex_prefix & REX_W) ? old->u32[0] : old->u64[0]; _regs.r(dx) = !(rex_prefix & REX_W) ? old->u32[1] : old->u64[1]; @@ -7025,7 +7039,7 @@ x86_emulate( { /* * Expected == actual: Get proposed value, attempt atomic cmpxchg - * and set ZF. + * and set ZF if successful. */ if ( !(rex_prefix & REX_W) ) { @@ -7038,10 +7052,20 @@ x86_emulate( aux->u64[1] = _regs.r(cx); } - if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux, - op_bytes, ctxt)) != X86EMUL_OKAY ) + switch ( rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux, + op_bytes, ctxt) ) + { + case X86EMUL_OKAY: + _regs.eflags |= X86_EFLAGS_ZF; + break; + + case X86EMUL_CMPXCHG_FAILED: + rc = X86EMUL_OKAY; + goto cmpxchgNb_failed; + + default: goto done; - _regs.eflags |= X86_EFLAGS_ZF; + } } break; } @@ -8049,6 +8073,8 @@ x86_emulate( rc = ops->cmpxchg( dst.mem.seg, dst.mem.off, &dst.orig_val, &dst.val, dst.bytes, ctxt); + if ( rc == X86EMUL_CMPXCHG_FAILED ) + rc = X86EMUL_RETRY; } else { --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -153,6 +153,8 @@ struct x86_emul_fpu_aux { #define X86EMUL_EXCEPTION 2 /* Retry the emulation for some reason. No state modified. */ #define X86EMUL_RETRY 3 + /* (cmpxchg accessor): CMPXCHG failed. */ +#define X86EMUL_CMPXCHG_FAILED 4 /* * Operation fully done by one of the hooks: * - validate(): operation completed (except common insn retire logic) @@ -160,7 +162,7 @@ struct x86_emul_fpu_aux { * - read_io() / write_io(): bypass GPR update (non-string insns only) * Undefined behavior when used anywhere else. */ -#define X86EMUL_DONE 4 +#define X86EMUL_DONE 5 /* FPU sub-types which may be requested via ->get_fpu(). */ enum x86_emulate_fpu_type { @@ -250,6 +252,8 @@ struct x86_emulate_ops /* * cmpxchg: Emulate an atomic (LOCKed) CMPXCHG operation. * @p_old: [IN ] Pointer to value expected to be current at @addr. + * [OUT] Pointer to value found at @addr (may always be + * updated, meaningful for X86EMUL_CMPXCHG_FAILED only). * @p_new: [IN ] Pointer to value to write to @addr. * @bytes: [IN ] Operation size (up to 8 (x86/32) or 16 (x86/64) bytes). */ --- a/xen/include/asm-x86/paging.h +++ b/xen/include/asm-x86/paging.h @@ -89,7 +89,7 @@ struct shadow_paging_mode { void *src, u32 bytes, struct sh_emulate_ctxt *sh_ctxt); int (*x86_emulate_cmpxchg )(struct vcpu *v, unsigned long va, - unsigned long old, + unsigned long *old, unsigned long new, unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt);