|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v3 07/25] x86emul: support AVX2 gather insns
On 07/12/17 14:03, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -391,6 +391,7 @@ static const struct {
> [0x78 ... 0x79] = { .simd_size = simd_other, .two_op = 1 },
> [0x8c] = { .simd_size = simd_other },
> [0x8e] = { .simd_size = simd_other, .to_mem = 1 },
> + [0x90 ... 0x93] = { .simd_size = simd_other, .vsib = 1 },
> [0x96 ... 0x9f] = { .simd_size = simd_packed_fp },
> [0xa6 ... 0xaf] = { .simd_size = simd_packed_fp },
> [0xb6 ... 0xbf] = { .simd_size = simd_packed_fp },
> @@ -598,6 +599,7 @@ struct x86_emulate_state {
> ext_8f0a,
> } ext;
> uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
> + uint8_t sib_index, sib_scale;
> uint8_t rex_prefix;
> bool lock_prefix;
> bool not_64bit; /* Instruction not available in 64bit. */
> @@ -2411,7 +2413,7 @@ x86_decode(
> struct x86_emulate_ctxt *ctxt,
> const struct x86_emulate_ops *ops)
> {
> - uint8_t b, d, sib, sib_index, sib_base;
> + uint8_t b, d;
> unsigned int def_op_bytes, def_ad_bytes, opcode;
> enum x86_segment override_seg = x86_seg_none;
> bool pc_rel = false;
> @@ -2745,6 +2747,7 @@ x86_decode(
>
> if ( modrm_mod == 3 )
> {
> + generate_exception_if(d & vSIB, EXC_UD);
> modrm_rm |= (rex_prefix & 1) << 3;
> ea.type = OP_REG;
> }
> @@ -2805,13 +2808,17 @@ x86_decode(
> ea.type = OP_MEM;
> if ( modrm_rm == 4 )
> {
> - sib = insn_fetch_type(uint8_t);
> - sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8);
> - sib_base = (sib & 7) | ((rex_prefix << 3) & 8);
> - if ( sib_index != 4 && !(d & vSIB) )
> - ea.mem.off = *decode_register(sib_index, state->regs,
> - false);
> - ea.mem.off <<= (sib >> 6) & 3;
> + uint8_t sib = insn_fetch_type(uint8_t);
> + uint8_t sib_base = (sib & 7) | ((rex_prefix << 3) & 8);
> +
> + state->sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) &
> 8);
> + state->sib_scale = (sib >> 6) & 3;
> + if ( state->sib_index != 4 && !(d & vSIB) )
> + {
> + ea.mem.off = *decode_register(state->sib_index,
> + state->regs, false);
> + ea.mem.off <<= state->sib_scale;
This is a functional change.
> + }
> if ( (modrm_mod == 0) && ((sib_base & 7) == 5) )
> ea.mem.off += insn_fetch_type(int32_t);
> else if ( sib_base == 4 )
> @@ -7472,6 +7479,110 @@ x86_emulate(
> break;
> }
>
> + case X86EMUL_OPC_VEX_66(0x0f38, 0x90): /* vpgatherd{d,q}
> {x,y}mm,mem,{x,y}mm */
> + case X86EMUL_OPC_VEX_66(0x0f38, 0x91): /* vpgatherq{d,q}
> {x,y}mm,mem,{x,y}mm */
> + case X86EMUL_OPC_VEX_66(0x0f38, 0x92): /* vgatherdp{s,d}
> {x,y}mm,mem,{x,y}mm */
> + case X86EMUL_OPC_VEX_66(0x0f38, 0x93): /* vgatherqp{s,d}
> {x,y}mm,mem,{x,y}mm */
> + {
> + unsigned int mask_reg = ~vex.reg & (mode_64bit() ? 0xf : 7);
> + typeof(vex) *pvex;
> + union {
> + int32_t dw[8];
> + int64_t qw[4];
> + } index, mask;
> +
> + ASSERT(ea.type == OP_MEM);
> + generate_exception_if(modrm_reg == state->sib_index ||
> + modrm_reg == mask_reg ||
> + state->sib_index == mask_reg, EXC_UD);
> + generate_exception_if(!cpu_has_avx, EXC_UD);
> + vcpu_must_have(avx2);
> + get_fpu(X86EMUL_FPU_ymm, &fic);
> +
> + /* Read destination, index, and mask registers. */
> + opc = init_prefixes(stub);
> + pvex = copy_VEX(opc, vex);
> + pvex->opcx = vex_0f;
> + opc[0] = 0x7f; /* vmovdqa */
> + /* Use (%rax) as destination and modrm_reg as source. */
> + pvex->r = !mode_64bit() || !(modrm_reg & 8);
> + pvex->b = 1;
> + opc[1] = (modrm_reg & 7) << 3;
> + pvex->reg = 0xf;
> + opc[2] = 0xc3;
> +
> + invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp));
> +
> + pvex->pfx = vex_f3; /* vmovdqu */
> + /* Switch to sib_index as source. */
> + pvex->r = !mode_64bit() || !(state->sib_index & 8);
> + opc[1] = (state->sib_index & 7) << 3;
> +
> + invoke_stub("", "", "=m" (index) : "a" (&index));
> +
> + /* Switch to mask_reg as source. */
> + pvex->r = !mode_64bit() || !(mask_reg & 8);
> + opc[1] = (mask_reg & 7) << 3;
> +
> + invoke_stub("", "", "=m" (mask) : "a" (&mask));
> + put_stub(stub);
> +
> + /* Clear untouched parts of the destination and mask values. */
> + n = 1 << (2 + vex.l - ((b & 1) | vex.w));
> + op_bytes = 4 << vex.w;
> + memset((void *)mmvalp + n * op_bytes, 0, 32 - n * op_bytes);
> + memset((void *)&mask + n * op_bytes, 0, 32 - n * op_bytes);
> +
> + for ( i = 0; i < n && rc == X86EMUL_OKAY; ++i )
> + {
> + if ( (vex.w ? mask.qw[i] : mask.dw[i]) < 0 )
> + {
> + signed long idx = b & 1 ? index.qw[i] : index.dw[i];
> +
> + rc = ops->read(ea.mem.seg,
> + ea.mem.off + (idx << state->sib_scale),
> + (void *)mmvalp + i * op_bytes, op_bytes,
> ctxt);
> + if ( rc != X86EMUL_OKAY )
> + break;
> +
> +#ifdef __XEN__
> + if ( i + 1 < n && local_events_need_delivery() )
> + rc = X86EMUL_RETRY;
> +#endif
> + }
> +
> + if ( vex.w )
> + mask.qw[i] = 0;
> + else
> + mask.dw[i] = 0;
> + }
The incomplete case here is rather more complicated. In the case that
rc != OK and local events are pending, RF needs setting, although it is
not clear if this is only applicable if an exception is pending, or
between every element.
> +
> + /* Write destination and mask registers. */
> + opc = init_prefixes(stub);
> + pvex = copy_VEX(opc, vex);
> + pvex->opcx = vex_0f;
> + opc[0] = 0x6f; /* vmovdqa */
> + /* Use modrm_reg as destination and (%rax) as source. */
> + pvex->r = !mode_64bit() || !(modrm_reg & 8);
> + pvex->b = 1;
> + opc[1] = (modrm_reg & 7) << 3;
> + pvex->reg = 0xf;
> + opc[2] = 0xc3;
> +
> + invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp));
> +
> + pvex->pfx = vex_f3; /* vmovdqu */
> + /* Switch to mask_reg as destination. */
> + pvex->r = !mode_64bit() || !(mask_reg & 8);
> + opc[1] = (mask_reg & 7) << 3;
> +
> + invoke_stub("", "", "+m" (mask) : "a" (&mask));
> + put_stub(stub);
> +
> + state->simd_size = simd_none;
> + break;
> + }
> +
> case X86EMUL_OPC_VEX_66(0x0f38, 0x96): /* vfmaddsub132p{s,d}
> {x,y}mm/mem,{x,y}mm,{x,y}mm */
> case X86EMUL_OPC_VEX_66(0x0f38, 0x97): /* vfmsubadd132p{s,d}
> {x,y}mm/mem,{x,y}mm,{x,y}mm */
> case X86EMUL_OPC_VEX_66(0x0f38, 0x98): /* vfmadd132p{s,d}
> {x,y}mm/mem,{x,y}mm,{x,y}mm */
> --- a/xen/arch/x86/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate.c
> @@ -10,6 +10,7 @@
> */
>
> #include <xen/domain_page.h>
> +#include <xen/event.h>
Spurious hunk?
~Andrew
> #include <asm/x86_emulate.h>
> #include <asm/asm_defns.h> /* mark_regs_dirty() */
> #include <asm/processor.h> /* current_cpu_info */
>
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |