|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] RE: [PATCH v6 03/10] x86emul: support MOVDIR{I,64B} insns
> -----Original Message-----
> From: Jan Beulich <jbeulich@xxxxxxxx>
> Sent: 14 April 2020 12:45
> To: xen-devel@xxxxxxxxxxxxxxxxxxxx
> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>; Wei Liu <wl@xxxxxxx>; Roger
> Pau Monne
> <roger.pau@xxxxxxxxxx>; Paul Durrant <paul@xxxxxxx>
> Subject: [PATCH v6 03/10] x86emul: support MOVDIR{I,64B} insns
>
> Introduce a new blk() hook, paralleling the rmw() one in a certain way,
> but being intended for larger data sizes, and hence its HVM intermediate
> handling function doesn't fall back to splitting the operation if the
> requested virtual address can't be mapped.
>
> Note that SDM revision 071 doesn't specify exception behavior for
> ModRM.mod == 0b11; assuming #UD here.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
hvm/emulate part...
Reviewed-by: Paul Durrant <paul@xxxxxxx>
> ---
> v6: Fold MOVDIRI and MOVDIR64B changes again. Use blk() for both. All
> tags dropped.
> v5: Introduce/use ->blk() hook. Correct asm() operands.
> v4: Split MOVDIRI and MOVDIR64B and move this one ahead. Re-base.
> v3: Update description.
> ---
> (SDE: -tnt)
>
> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -652,6 +652,18 @@ static int cmpxchg(
> return X86EMUL_OKAY;
> }
>
> +static int blk(
> + enum x86_segment seg,
> + unsigned long offset,
> + void *p_data,
> + unsigned int bytes,
> + uint32_t *eflags,
> + struct x86_emulate_state *state,
> + struct x86_emulate_ctxt *ctxt)
> +{
> + return x86_emul_blk((void *)offset, p_data, bytes, eflags, state, ctxt);
> +}
> +
> static int read_segment(
> enum x86_segment seg,
> struct segment_register *reg,
> @@ -2339,6 +2351,54 @@ int main(int argc, char **argv)
> goto fail;
> printf("okay\n");
>
> + emulops.blk = blk;
> +
> + printf("%-40s", "Testing movdiri %edx,(%ecx)...");
> + if ( stack_exec && cpu_has_movdiri )
> + {
> + instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf9; instr[3] = 0x11;
> +
> + regs.eip = (unsigned long)&instr[0];
> + regs.ecx = (unsigned long)memset(res, -1, 16);
> + regs.edx = 0x44332211;
> +
> + rc = x86_emulate(&ctxt, &emulops);
> + if ( (rc != X86EMUL_OKAY) ||
> + (regs.eip != (unsigned long)&instr[4]) ||
> + res[0] != 0x44332211 || ~res[1] )
> + goto fail;
> + printf("okay\n");
> + }
> + else
> + printf("skipped\n");
> +
> + printf("%-40s", "Testing movdir64b 144(%edx),%ecx...");
> + if ( stack_exec && cpu_has_movdir64b )
> + {
> + instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf8;
> + instr[4] = 0x8a; instr[5] = 0x90; instr[8] = instr[7] = instr[6] = 0;
> +
> + regs.eip = (unsigned long)&instr[0];
> + for ( i = 0; i < 64; ++i )
> + res[i] = i - 20;
> + regs.edx = (unsigned long)res;
> + regs.ecx = (unsigned long)(res + 16);
> +
> + rc = x86_emulate(&ctxt, &emulops);
> + if ( (rc != X86EMUL_OKAY) ||
> + (regs.eip != (unsigned long)&instr[9]) ||
> + res[15] != -5 || res[32] != 12 )
> + goto fail;
> + for ( i = 16; i < 32; ++i )
> + if ( res[i] != i )
> + goto fail;
> + printf("okay\n");
> + }
> + else
> + printf("skipped\n");
> +
> + emulops.blk = NULL;
> +
> printf("%-40s", "Testing movq %mm3,(%ecx)...");
> if ( stack_exec && cpu_has_mmx )
> {
> --- a/tools/tests/x86_emulator/x86-emulate.h
> +++ b/tools/tests/x86_emulator/x86-emulate.h
> @@ -154,6 +154,8 @@ static inline bool xcr0_mask(uint64_t ma
> #define cpu_has_avx512_vnni (cp.feat.avx512_vnni && xcr0_mask(0xe6))
> #define cpu_has_avx512_bitalg (cp.feat.avx512_bitalg && xcr0_mask(0xe6))
> #define cpu_has_avx512_vpopcntdq (cp.feat.avx512_vpopcntdq &&
> xcr0_mask(0xe6))
> +#define cpu_has_movdiri cp.feat.movdiri
> +#define cpu_has_movdir64b cp.feat.movdir64b
> #define cpu_has_avx512_4vnniw (cp.feat.avx512_4vnniw && xcr0_mask(0xe6))
> #define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6))
> #define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6))
> --- a/xen/arch/x86/Makefile
> +++ b/xen/arch/x86/Makefile
> @@ -250,12 +250,13 @@ $(BASEDIR)/include/asm-x86/asm-macros.h:
> # sure we pick up changes when the compiler used has changed.)
> ifeq ($(MAKECMDGOALS),asm-offsets.s)
>
> -as-ISA-list := CLWB EPT FSGSBASE INVPCID RDRAND RDSEED SSE4_2 VMX XSAVEOPT
> +as-ISA-list := CLWB EPT FSGSBASE INVPCID MOVDIR RDRAND RDSEED SSE4_2 VMX
> XSAVEOPT
>
> CLWB-insn := clwb (%rax)
> EPT-insn := invept (%rax),%rax
> FSGSBASE-insn := rdfsbase %rax
> INVPCID-insn := invpcid (%rax),%rax
> +MOVDIR-insn := movdiri %rax,(%rax)
> RDRAND-insn := rdrand %eax
> RDSEED-insn := rdseed %eax
> SSE4_2-insn := crc32 %eax,%eax
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -1409,6 +1409,44 @@ static int hvmemul_rmw(
> return rc;
> }
>
> +static int hvmemul_blk(
> + enum x86_segment seg,
> + unsigned long offset,
> + void *p_data,
> + unsigned int bytes,
> + uint32_t *eflags,
> + struct x86_emulate_state *state,
> + struct x86_emulate_ctxt *ctxt)
> +{
> + struct hvm_emulate_ctxt *hvmemul_ctxt =
> + container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
> + unsigned long addr;
> + uint32_t pfec = PFEC_page_present | PFEC_write_access;
> + int rc;
> + void *mapping = NULL;
> +
> + rc = hvmemul_virtual_to_linear(
> + seg, offset, bytes, NULL, hvm_access_write, hvmemul_ctxt, &addr);
> + if ( rc != X86EMUL_OKAY || !bytes )
> + return rc;
> +
> + if ( is_x86_system_segment(seg) )
> + pfec |= PFEC_implicit;
> + else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
> + pfec |= PFEC_user_mode;
> +
> + mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt);
> + if ( IS_ERR(mapping) )
> + return ~PTR_ERR(mapping);
> + if ( !mapping )
> + return X86EMUL_UNHANDLEABLE;
> +
> + rc = x86_emul_blk(mapping, p_data, bytes, eflags, state, ctxt);
> + hvmemul_unmap_linear_addr(mapping, addr, bytes, hvmemul_ctxt);
> +
> + return rc;
> +}
> +
> static int hvmemul_write_discard(
> enum x86_segment seg,
> unsigned long offset,
> @@ -2475,6 +2513,7 @@ static const struct x86_emulate_ops hvm_
> .write = hvmemul_write,
> .rmw = hvmemul_rmw,
> .cmpxchg = hvmemul_cmpxchg,
> + .blk = hvmemul_blk,
> .validate = hvmemul_validate,
> .rep_ins = hvmemul_rep_ins,
> .rep_outs = hvmemul_rep_outs,
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -548,6 +548,8 @@ static const struct ext0f38_table {
> [0xf1] = { .to_mem = 1, .two_op = 1 },
> [0xf2 ... 0xf3] = {},
> [0xf5 ... 0xf7] = {},
> + [0xf8] = { .simd_size = simd_other },
> + [0xf9] = { .to_mem = 1, .two_op = 1 /* Mov */ },
> };
>
> /* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */
> @@ -851,6 +853,9 @@ struct x86_emulate_state {
> rmw_xchg,
> rmw_xor,
> } rmw;
> + enum {
> + blk_movdir,
> + } blk;
> uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
> uint8_t sib_index, sib_scale;
> uint8_t rex_prefix;
> @@ -1914,6 +1919,8 @@ amd_like(const struct x86_emulate_ctxt *
> #define vcpu_has_avx512_bitalg() (ctxt->cpuid->feat.avx512_bitalg)
> #define vcpu_has_avx512_vpopcntdq() (ctxt->cpuid->feat.avx512_vpopcntdq)
> #define vcpu_has_rdpid() (ctxt->cpuid->feat.rdpid)
> +#define vcpu_has_movdiri() (ctxt->cpuid->feat.movdiri)
> +#define vcpu_has_movdir64b() (ctxt->cpuid->feat.movdir64b)
> #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
> #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
> #define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
> @@ -2722,10 +2729,12 @@ x86_decode_0f38(
> {
> case 0x00 ... 0xef:
> case 0xf2 ... 0xf5:
> - case 0xf7 ... 0xff:
> + case 0xf7 ... 0xf8:
> + case 0xfa ... 0xff:
> op_bytes = 0;
> /* fall through */
> case 0xf6: /* adcx / adox */
> + case 0xf9: /* movdiri */
> ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
> break;
>
> @@ -10171,6 +10180,34 @@ x86_emulate(
> : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
> break;
>
> + case X86EMUL_OPC_66(0x0f38, 0xf8): /* movdir64b r,m512 */
> + host_and_vcpu_must_have(movdir64b);
> + generate_exception_if(ea.type != OP_MEM, EXC_UD);
> + src.val = truncate_ea(*dst.reg);
> + generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt,
> ops),
> + EXC_GP, 0);
> + fail_if(!ops->blk);
> + state->blk = blk_movdir;
> + BUILD_BUG_ON(sizeof(*mmvalp) < 64);
> + if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64,
> + ctxt)) != X86EMUL_OKAY ||
> + (rc = ops->blk(x86_seg_es, src.val, mmvalp, 64, &_regs.eflags,
> + state, ctxt)) != X86EMUL_OKAY )
> + goto done;
> + state->simd_size = simd_none;
> + break;
> +
> + case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
> + host_and_vcpu_must_have(movdiri);
> + generate_exception_if(dst.type != OP_MEM, EXC_UD);
> + fail_if(!ops->blk);
> + state->blk = blk_movdir;
> + if ( (rc = ops->blk(dst.mem.seg, dst.mem.off, &src.val, op_bytes,
> + &_regs.eflags, state, ctxt)) != X86EMUL_OKAY )
> + goto done;
> + dst.type = OP_NONE;
> + break;
> +
> #ifndef X86EMUL_NO_SIMD
>
> case X86EMUL_OPC_VEX_66(0x0f3a, 0x00): /* vpermq $imm8,ymm/m256,ymm */
> @@ -11429,6 +11466,77 @@ int x86_emul_rmw(
>
> return X86EMUL_OKAY;
> }
> +
> +int x86_emul_blk(
> + void *ptr,
> + void *data,
> + unsigned int bytes,
> + uint32_t *eflags,
> + struct x86_emulate_state *state,
> + struct x86_emulate_ctxt *ctxt)
> +{
> + switch ( state->blk )
> + {
> + /*
> + * Throughout this switch(), memory clobbers are used to compensate
> + * that other operands may not properly express the (full) memory
> + * ranges covered.
> + */
> + case blk_movdir:
> + switch ( bytes )
> + {
> +#ifdef __x86_64__
> + case sizeof(uint32_t):
> +# ifdef HAVE_AS_MOVDIR
> + asm ( "movdiri %0, (%1)"
> + :: "r" (*(uint32_t *)data), "r" (ptr) : "memory" );
> +# else
> + /* movdiri %esi, (%rdi) */
> + asm ( ".byte 0x0f, 0x38, 0xf9, 0x37"
> + :: "S" (*(uint32_t *)data), "D" (ptr) : "memory" );
> +# endif
> + break;
> +#endif
> +
> + case sizeof(unsigned long):
> +#ifdef HAVE_AS_MOVDIR
> + asm ( "movdiri %0, (%1)"
> + :: "r" (*(unsigned long *)data), "r" (ptr) : "memory" );
> +#else
> + /* movdiri %rsi, (%rdi) */
> + asm ( ".byte 0x48, 0x0f, 0x38, 0xf9, 0x37"
> + :: "S" (*(unsigned long *)data), "D" (ptr) : "memory" );
> +#endif
> + break;
> +
> + case 64:
> + if ( ((unsigned long)ptr & 0x3f) )
> + {
> + ASSERT_UNREACHABLE();
> + return X86EMUL_UNHANDLEABLE;
> + }
> +#ifdef HAVE_AS_MOVDIR
> + asm ( "movdir64b (%0), %1" :: "r" (data), "r" (ptr) : "memory" );
> +#else
> + /* movdir64b (%rsi), %rdi */
> + asm ( ".byte 0x66, 0x0f, 0x38, 0xf8, 0x3e"
> + :: "S" (data), "D" (ptr) : "memory" );
> +#endif
> + break;
> +
> + default:
> + ASSERT_UNREACHABLE();
> + return X86EMUL_UNHANDLEABLE;
> + }
> + break;
> +
> + default:
> + ASSERT_UNREACHABLE();
> + return X86EMUL_UNHANDLEABLE;
> + }
> +
> + return X86EMUL_OKAY;
> +}
>
> static void __init __maybe_unused build_assertions(void)
> {
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -310,6 +310,22 @@ struct x86_emulate_ops
> struct x86_emulate_ctxt *ctxt);
>
> /*
> + * blk: Emulate a large (block) memory access.
> + * @p_data: [IN/OUT] (optional) Pointer to source/destination buffer.
> + * @eflags: [IN/OUT] Pointer to EFLAGS to be updated according to
> + * instruction effects.
> + * @state: [IN/OUT] Pointer to (opaque) emulator state.
> + */
> + int (*blk)(
> + enum x86_segment seg,
> + unsigned long offset,
> + void *p_data,
> + unsigned int bytes,
> + uint32_t *eflags,
> + struct x86_emulate_state *state,
> + struct x86_emulate_ctxt *ctxt);
> +
> + /*
> * validate: Post-decode, pre-emulate hook to allow caller controlled
> * filtering.
> */
> @@ -793,6 +809,14 @@ x86_emul_rmw(
> unsigned int bytes,
> uint32_t *eflags,
> struct x86_emulate_state *state,
> + struct x86_emulate_ctxt *ctxt);
> +int
> +x86_emul_blk(
> + void *ptr,
> + void *data,
> + unsigned int bytes,
> + uint32_t *eflags,
> + struct x86_emulate_state *state,
> struct x86_emulate_ctxt *ctxt);
>
> static inline void x86_emul_hw_exception(
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -120,6 +120,8 @@
> #define cpu_has_avx512_bitalg boot_cpu_has(X86_FEATURE_AVX512_BITALG)
> #define cpu_has_avx512_vpopcntdq boot_cpu_has(X86_FEATURE_AVX512_VPOPCNTDQ)
> #define cpu_has_rdpid boot_cpu_has(X86_FEATURE_RDPID)
> +#define cpu_has_movdiri boot_cpu_has(X86_FEATURE_MOVDIRI)
> +#define cpu_has_movdir64b boot_cpu_has(X86_FEATURE_MOVDIR64B)
>
> /* CPUID level 0x80000007.edx */
> #define cpu_has_itsc boot_cpu_has(X86_FEATURE_ITSC)
> --- a/xen/include/public/arch-x86/cpufeatureset.h
> +++ b/xen/include/public/arch-x86/cpufeatureset.h
> @@ -237,6 +237,8 @@ XEN_CPUFEATURE(AVX512_BITALG, 6*32+12) /
> XEN_CPUFEATURE(AVX512_VPOPCNTDQ, 6*32+14) /*A POPCNT for vectors of DW/QW */
> XEN_CPUFEATURE(RDPID, 6*32+22) /*A RDPID instruction */
> XEN_CPUFEATURE(CLDEMOTE, 6*32+25) /*A CLDEMOTE instruction */
> +XEN_CPUFEATURE(MOVDIRI, 6*32+27) /*A MOVDIRI instruction */
> +XEN_CPUFEATURE(MOVDIR64B, 6*32+28) /*A MOVDIR64B instruction */
>
> /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
> XEN_CPUFEATURE(ITSC, 7*32+ 8) /* Invariant TSC */
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |