|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 2/6] x86emul: reduce CMPXCHG{8, 16}B footprint and casting
On 13/12/16 14:02, Jan Beulich wrote:
> Re-use an existing stack variable (reducing stack footprint, which also
> results in smaller code due to some stack accesses no longer needing a
> 32-bit displacement), at once using a union instead of casts. Also
> switch to rex_prefix based conditionals instead of op_bytes ones.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
>
> --- a/tools/tests/x86_emulator/x86_emulate.h
> +++ b/tools/tests/x86_emulator/x86_emulate.h
> @@ -31,6 +31,11 @@
> #define likely(x) __builtin_expect(!!(x), true)
> #define unlikely(x) __builtin_expect(!!(x), false)
>
> +#define container_of(ptr, type, member) ({ \
> + typeof(((type *)0)->member) *mptr__ = (ptr); \
> + (type *)((char *)mptr__ - offsetof(type, member)); \
> +})
> +
Please could we use __builtin_containerof()? I believe It is available
on all of the compilers we support.
This particular construct causes UBSAN to have a fit.
> #define is_canonical_address(x) (((int64_t)(x) >> 47) == ((int64_t)(x) >>
> 63))
>
> #define MMAP_SZ 16384
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -5282,11 +5282,14 @@ x86_emulate(
> break;
>
> case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 (cmpxchg8b/cmpxchg16b) */ {
> - unsigned long old[2], exp[2], new[2];
> + union {
> + uint32_t u32[2];
> + uint64_t u64[2];
> + } *old, *aux;
>
> generate_exception_if((modrm_reg & 7) != 1, EXC_UD);
> generate_exception_if(ea.type != OP_MEM, EXC_UD);
> - if ( op_bytes == 8 )
> + if ( rex_prefix & REX_W )
> {
> host_and_vcpu_must_have(cx16);
> op_bytes = 16;
> @@ -5295,35 +5298,52 @@ x86_emulate(
> else
> op_bytes = 8;
>
> + old = container_of(&mmvalp->ymm[0], typeof(*old), u64[0]);
> + aux = container_of(&mmvalp->ymm[2], typeof(*old), u64[0]);
This should be typeof(*aux), although it makes no difference at the moment.
> +
> /* Get actual old value. */
> if ( (rc = ops->read(ea.mem.seg, ea.mem.off, old, op_bytes,
> - ctxt)) != 0 )
> + ctxt)) != X86EMUL_OKAY )
> goto done;
>
> - /* Get expected and proposed values. */
> - if ( op_bytes == 8 )
> + /* Get expected value. */
> + if ( !(rex_prefix & REX_W) )
> {
> - ((uint32_t *)exp)[0] = _regs.eax; ((uint32_t *)exp)[1] =
> _regs.edx;
> - ((uint32_t *)new)[0] = _regs.ebx; ((uint32_t *)new)[1] =
> _regs.ecx;
> + aux->u32[0] = _regs.eax;
> + aux->u32[1] = _regs.edx;
> }
> else
> {
> - exp[0] = _regs.eax; exp[1] = _regs.edx;
> - new[0] = _regs.ebx; new[1] = _regs.ecx;
> + aux->u64[0] = _regs.eax;
> + aux->u64[1] = _regs.edx;
> }
>
> - if ( memcmp(old, exp, op_bytes) )
> + if ( memcmp(old, aux, op_bytes) )
> {
> /* Expected != actual: store actual to rDX:rAX and clear ZF. */
> - _regs.eax = (op_bytes == 8) ? ((uint32_t *)old)[0] : old[0];
> - _regs.edx = (op_bytes == 8) ? ((uint32_t *)old)[1] : old[1];
> + _regs.eax = !(rex_prefix & REX_W) ? old->u32[0] : old->u64[0];
> + _regs.edx = !(rex_prefix & REX_W) ? old->u32[1] : old->u64[1];
> _regs.eflags &= ~EFLG_ZF;
This clearing of ZF should be unconditional. I think this is a latent
bug, but if the cmpxchg() fails, we would exit having failed the xchg,
but leaving ZF stale.
~Andrew
> }
> else
> {
> - /* Expected == actual: attempt atomic cmpxchg and set ZF. */
> - if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old,
> - new, op_bytes, ctxt)) != 0 )
> + /*
> + * Expected == actual: Get proposed value, attempt atomic cmpxchg
> + * and set ZF.
> + */
> + if ( !(rex_prefix & REX_W) )
> + {
> + aux->u32[0] = _regs.ebx;
> + aux->u32[1] = _regs.ecx;
> + }
> + else
> + {
> + aux->u64[0] = _regs.ebx;
> + aux->u64[1] = _regs.ecx;
> + }
> +
> + if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
> + op_bytes, ctxt)) != X86EMUL_OKAY )
> goto done;
> _regs.eflags |= EFLG_ZF;
> }
>
>
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |