[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen staging] x86emul: support AVX512 opmask insns
commit 304a8301fc9c6ff1f45a6aa081c56cd57bc7696a Author: Jan Beulich <jbeulich@xxxxxxxx> AuthorDate: Fri Oct 26 15:20:37 2018 +0200 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Fri Oct 26 15:20:37 2018 +0200 x86emul: support AVX512 opmask insns These are all VEX encoded, so the EVEX decoding logic continues to remain unused at this point. The new testcase is deliberately coded in assembly, as a C one would have become almost unreadable due to the overwhelming amount of __builtin_...() that would need to be used. After all the compiler has no underlying type (yet) that could be operated on without builtins, other than the vector types used for "normal" SIMD insns. Note that outside of 64-bit mode and despite the SDM not currently saying so, VEX.W is ignored for the KMOV{D,Q} encodings to/from GPRs, just like e.g. for the similar VMOV{D,Q}. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- tools/tests/x86_emulator/Makefile | 31 +++- tools/tests/x86_emulator/opmask.S | 144 ++++++++++++++++ tools/tests/x86_emulator/test_x86_emulator.c | 29 +++- tools/tests/x86_emulator/testcase.mk | 6 + tools/tests/x86_emulator/x86-emulate.c | 3 + tools/tests/x86_emulator/x86-emulate.h | 30 ++++ xen/arch/x86/x86_emulate/x86_emulate.c | 240 +++++++++++++++++++++++++++ xen/arch/x86/x86_emulate/x86_emulate.h | 1 + xen/include/asm-x86/cpufeature.h | 3 + 9 files changed, 483 insertions(+), 4 deletions(-) diff --git a/tools/tests/x86_emulator/Makefile b/tools/tests/x86_emulator/Makefile index e8a3e9057e..a97c43b9c2 100644 --- a/tools/tests/x86_emulator/Makefile +++ b/tools/tests/x86_emulator/Makefile @@ -16,6 +16,8 @@ FMA := fma4 fma SG := avx2-sg TESTCASES := blowfish $(SIMD) $(FMA) $(SG) +OPMASK := avx512f avx512dq avx512bw + blowfish-cflags := "" blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic=" @@ -51,6 +53,10 @@ xop-vecs := $(avx-vecs) xop-ints := 1 2 4 8 xop-flts := $(avx-flts) +avx512f-opmask-vecs := 2 +avx512dq-opmask-vecs := 1 +avx512bw-opmask-vecs := 4 8 + # For AVX and later, have the compiler avoid XMM0 to widen coverage of # the VEX.vvvv checks in the emulator. For 3DNow!, however, force SSE # use for floating point operations, to avoid mixing MMX and FPU register @@ -80,9 +86,13 @@ $(1)-cflags := \ $(foreach flt,$($(1)-flts), \ "-D_$(vec)x$(idx)f$(flt) -m$(1:-sg=) $(call non-sse,$(1)) -Os -DVEC_MAX=$(vec) -DIDX_SIZE=$(idx) -DFLOAT_SIZE=$(flt)"))) endef +define opmask-defs +$(1)-opmask-cflags := $(foreach vec,$($(1)-opmask-vecs), "-D_$(vec) -m$(1) -Os -DSIZE=$(vec)") +endef $(foreach flavor,$(SIMD) $(FMA),$(eval $(call simd-defs,$(flavor)))) $(foreach flavor,$(SG),$(eval $(call simd-sg-defs,$(flavor)))) +$(foreach flavor,$(OPMASK),$(eval $(call opmask-defs,$(flavor)))) $(addsuffix .h,$(TESTCASES)): %.h: %.c testcase.mk Makefile rm -f $@.new $*.bin @@ -100,6 +110,22 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c testcase.mk Makefile ) mv $@.new $@ +$(addsuffix -opmask.h,$(OPMASK)): %.h: opmask.S testcase.mk Makefile + rm -f $@.new $*.bin + $(foreach arch,$(filter-out $(XEN_COMPILE_ARCH),x86_32) $(XEN_COMPILE_ARCH), \ + for cflags in $($*-cflags) $($*-cflags-$(arch)); do \ + $(MAKE) -f testcase.mk TESTCASE=$* XEN_TARGET_ARCH=$(arch) $*-cflags="$$cflags" all; \ + prefix=$(shell echo $(subst -,_,$*) | sed -e 's,^\([0-9]\),_\1,'); \ + flavor=$$(echo $${cflags} | sed -e 's, .*,,' -e 'y,-=,__,') ; \ + (echo 'static const unsigned int __attribute__((section(".test, \"ax\", @progbits #")))' \ + "$${prefix}_$(arch)$${flavor}[] = {"; \ + od -v -t x $*.bin | sed -e 's/^[0-9]* /0x/' -e 's/ /, 0x/g' -e 's/$$/,/'; \ + echo "};") >>$@.new; \ + rm -f $*.bin; \ + done; \ + ) + mv $@.new $@ + $(addsuffix .c,$(SIMD)): ln -sf simd.c $@ @@ -118,7 +144,8 @@ $(TARGET): x86-emulate.o test_x86_emulator.o wrappers.o .PHONY: clean clean: - rm -rf $(TARGET) *.o *~ core $(addsuffix .h,$(TESTCASES)) *.bin x86_emulate + rm -rf $(TARGET) *.o *~ core *.bin x86_emulate + rm -rf $(TARGET) $(addsuffix .h,$(TESTCASES)) $(addsuffix -opmask.h,$(OPMASK)) .PHONY: distclean distclean: clean @@ -145,4 +172,4 @@ x86-emulate.o test_x86_emulator.o wrappers.o: %.o: %.c $(x86_emulate.h) x86-emulate.o: x86_emulate/x86_emulate.c x86-emulate.o: HOSTCFLAGS += -D__XEN_TOOLS__ -test_x86_emulator.o: $(addsuffix .h,$(TESTCASES)) +test_x86_emulator.o: $(addsuffix .h,$(TESTCASES)) $(addsuffix -opmask.h,$(OPMASK)) diff --git a/tools/tests/x86_emulator/opmask.S b/tools/tests/x86_emulator/opmask.S new file mode 100644 index 0000000000..3fad8b1f10 --- /dev/null +++ b/tools/tests/x86_emulator/opmask.S @@ -0,0 +1,144 @@ +#ifdef __i386__ +# define R(x) e##x +# define DATA(x) x +#else +# if SIZE == 8 +# define R(x) r##x +# else +# define R(x) e##x +# endif +# define DATA(x) x(%rip) +#endif + +#if SIZE == 1 +# define _(x) x##b +#elif SIZE == 2 +# define _(x) x##w +# define WIDEN(x) x##bw +#elif SIZE == 4 +# define _(x) x##d +# define WIDEN(x) x##wd +#elif SIZE == 8 +# define _(x) x##q +# define WIDEN(x) x##dq +#endif + + .macro check res1:req, res2:req, line:req + _(kmov) %\res1, DATA(out) +#if SIZE < 8 || !defined(__i386__) + _(kmov) %\res2, %R(dx) + cmp DATA(out), %R(dx) +#else + sub $8, %esp + kmovq %\res2, (%esp) + pop %ecx + pop %edx + cmp DATA(out), %ecx + jne 0f + cmp DATA(out+4), %edx +0: +#endif + je 1f + mov $\line, %eax + ret +1: + .endm + + .text + .globl _start +_start: + _(kmov) DATA(in1), %k1 +#if SIZE < 8 || !defined(__i386__) + mov DATA(in2), %R(ax) + _(kmov) %R(ax), %k2 +#else + _(kmov) DATA(in2), %k2 +#endif + + _(kor) %k1, %k2, %k3 + _(kand) %k1, %k2, %k4 + _(kandn) %k3, %k4, %k5 + _(kxor) %k1, %k2, %k6 + check k5, k6, __LINE__ + + _(knot) %k6, %k3 + _(kxnor) %k1, %k2, %k4 + check k3, k4, __LINE__ + + _(kshiftl) $1, %k1, %k3 + _(kshiftl) $2, %k3, %k4 + _(kshiftl) $3, %k1, %k5 + check k4, k5, __LINE__ + + _(kshiftr) $1, %k1, %k3 + _(kshiftr) $2, %k3, %k4 + _(kshiftr) $3, %k1, %k5 + check k4, k5, __LINE__ + + _(kortest) %k6, %k6 + jnbe 1f + mov $__LINE__, %eax + ret +1: + + _(kxor) %k0, %k0, %k3 + _(kortest) %k3, %k3 + jz 1f + mov $__LINE__, %eax + ret +1: + + _(kxnor) %k0, %k0, %k3 + _(kortest) %k3, %k3 + jc 1f + mov $__LINE__, %eax + ret +1: + +#if SIZE > 1 + + _(kshiftr) $SIZE*4, %k3, %k4 + WIDEN(kunpck) %k4, %k4, %k5 + check k3, k5, __LINE__ + +#endif + +#if SIZE != 2 || defined(__AVX512DQ__) + + _(kadd) %k1, %k1, %k3 + _(kshiftl) $1, %k1, %k4 + check k3, k4, __LINE__ + + _(ktest) %k2, %k1 + jnbe 1f + mov $__LINE__, %eax + ret +1: + + _(kxor) %k0, %k0, %k3 + _(ktest) %k0, %k3 + jz 1f + mov $__LINE__, %eax + ret +1: + + _(kxnor) %k0, %k0, %k4 + _(ktest) %k0, %k4 + jc 1f + mov $__LINE__, %eax + ret +1: + +#endif + + xor %eax, %eax + ret + + .section .rodata, "a", @progbits + .balign 8 +in1: .byte 0b10110011, 0b10001111, 0b00001111, 0b10000011, 0b11110000, 0b00111111, 0b10000000, 0b11111111 +in2: .byte 0b11111111, 0b00000001, 0b11111100, 0b00001111, 0b11000001, 0b11110000, 0b11110001, 0b11001101 + + .data + .balign 8 +out: .quad 0 diff --git a/tools/tests/x86_emulator/test_x86_emulator.c b/tools/tests/x86_emulator/test_x86_emulator.c index 2f6fb679de..ed5a3d8853 100644 --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -18,6 +18,9 @@ asm ( ".pushsection .test, \"ax\", @progbits; .popsection" ); #include "avx2.h" #include "avx2-sg.h" #include "xop.h" +#include "avx512f-opmask.h" +#include "avx512dq-opmask.h" +#include "avx512bw-opmask.h" #define verbose false /* Switch to true for far more logging. */ @@ -78,6 +81,24 @@ static bool simd_check_xop(void) return cpu_has_xop; } +static bool simd_check_avx512f(void) +{ + return cpu_has_avx512f; +} +#define simd_check_avx512f_opmask simd_check_avx512f + +static bool simd_check_avx512dq(void) +{ + return cpu_has_avx512dq; +} +#define simd_check_avx512dq_opmask simd_check_avx512dq + +static bool simd_check_avx512bw(void) +{ + return cpu_has_avx512bw; +} +#define simd_check_avx512bw_opmask simd_check_avx512bw + static void simd_set_regs(struct cpu_user_regs *regs) { if ( cpu_has_mmx ) @@ -223,6 +244,10 @@ static const struct { SIMD(XOP i16x16, xop, 32i2), SIMD(XOP i32x8, xop, 32i4), SIMD(XOP i64x4, xop, 32i8), + SIMD(OPMASK/w, avx512f_opmask, 2), + SIMD(OPMASK/b, avx512dq_opmask, 1), + SIMD(OPMASK/d, avx512bw_opmask, 4), + SIMD(OPMASK/q, avx512bw_opmask, 8), #undef SIMD_ #undef SIMD }; @@ -3426,8 +3451,8 @@ int main(int argc, char **argv) rc = x86_emulate(&ctxt, &emulops); if ( rc != X86EMUL_OKAY ) { - printf("failed at %%eip == %08lx (opcode %08x)\n", - (unsigned long)regs.eip, ctxt.opcode); + printf("failed (%d) at %%eip == %08lx (opcode %08x)\n", + rc, (unsigned long)regs.eip, ctxt.opcode); return 1; } } diff --git a/tools/tests/x86_emulator/testcase.mk b/tools/tests/x86_emulator/testcase.mk index 0a72b8db29..a565d15524 100644 --- a/tools/tests/x86_emulator/testcase.mk +++ b/tools/tests/x86_emulator/testcase.mk @@ -14,3 +14,9 @@ all: $(TESTCASE).bin $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0x100000 -o $*.tmp $*.o $(OBJCOPY) -O binary $*.tmp $@ rm -f $*.tmp + +%-opmask.bin: opmask.S + $(CC) $(filter-out -M% .%,$(CFLAGS)) -c $< -o $(basename $@).o + $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0x100000 -o $(basename $@).tmp $(basename $@).o + $(OBJCOPY) -O binary $(basename $@).tmp $@ + rm -f $(basename $@).tmp diff --git a/tools/tests/x86_emulator/x86-emulate.c b/tools/tests/x86_emulator/x86-emulate.c index bb5908b59e..aba5768d53 100644 --- a/tools/tests/x86_emulator/x86-emulate.c +++ b/tools/tests/x86_emulator/x86-emulate.c @@ -209,6 +209,9 @@ int emul_test_get_fpu( case X86EMUL_FPU_ymm: if ( cpu_has_avx ) break; + case X86EMUL_FPU_opmask: + if ( cpu_has_avx512f ) + break; default: return X86EMUL_UNHANDLEABLE; } diff --git a/tools/tests/x86_emulator/x86-emulate.h b/tools/tests/x86_emulator/x86-emulate.h index 08dead32fd..ef58466e6e 100644 --- a/tools/tests/x86_emulator/x86-emulate.h +++ b/tools/tests/x86_emulator/x86-emulate.h @@ -260,6 +260,36 @@ static inline uint64_t xgetbv(uint32_t xcr) (res.c & (1U << 21)) != 0; \ }) +#define cpu_has_avx512f ({ \ + struct cpuid_leaf res; \ + emul_test_cpuid(1, 0, &res, NULL); \ + if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 0xe6) != 0xe6) ) \ + res.b = 0; \ + else \ + emul_test_cpuid(7, 0, &res, NULL); \ + (res.b & (1U << 16)) != 0; \ +}) + +#define cpu_has_avx512dq ({ \ + struct cpuid_leaf res; \ + emul_test_cpuid(1, 0, &res, NULL); \ + if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 0xe6) != 0xe6) ) \ + res.b = 0; \ + else \ + emul_test_cpuid(7, 0, &res, NULL); \ + (res.b & (1U << 17)) != 0; \ +}) + +#define cpu_has_avx512bw ({ \ + struct cpuid_leaf res; \ + emul_test_cpuid(1, 0, &res, NULL); \ + if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 0xe6) != 0xe6) ) \ + res.b = 0; \ + else \ + emul_test_cpuid(7, 0, &res, NULL); \ + (res.b & (1U << 30)) != 0; \ +}) + int emul_test_cpuid( uint32_t leaf, uint32_t subleaf, diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c index 4afc3f6ca3..90132f4c7c 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -491,6 +491,7 @@ static const struct ext0f3a_table { [0x20] = { .simd_size = simd_none }, [0x21] = { .simd_size = simd_other }, [0x22] = { .simd_size = simd_none }, + [0x30 ... 0x33] = { .simd_size = simd_other, .two_op = 1 }, [0x38] = { .simd_size = simd_128 }, [0x39] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 }, [0x40 ... 0x41] = { .simd_size = simd_packed_fp }, @@ -1187,6 +1188,11 @@ static int _get_fpu( return X86EMUL_UNHANDLEABLE; break; + case X86EMUL_FPU_opmask: + if ( !(xcr0 & X86_XCR0_SSE) || !(xcr0 & X86_XCR0_OPMASK) ) + return X86EMUL_UNHANDLEABLE; + break; + default: break; } @@ -1762,12 +1768,15 @@ static bool vcpu_has( #define vcpu_has_bmi2() vcpu_has( 7, EBX, 8, ctxt, ops) #define vcpu_has_rtm() vcpu_has( 7, EBX, 11, ctxt, ops) #define vcpu_has_mpx() vcpu_has( 7, EBX, 14, ctxt, ops) +#define vcpu_has_avx512f() vcpu_has( 7, EBX, 16, ctxt, ops) +#define vcpu_has_avx512dq() vcpu_has( 7, EBX, 17, ctxt, ops) #define vcpu_has_rdseed() vcpu_has( 7, EBX, 18, ctxt, ops) #define vcpu_has_adx() vcpu_has( 7, EBX, 19, ctxt, ops) #define vcpu_has_smap() vcpu_has( 7, EBX, 20, ctxt, ops) #define vcpu_has_clflushopt() vcpu_has( 7, EBX, 23, ctxt, ops) #define vcpu_has_clwb() vcpu_has( 7, EBX, 24, ctxt, ops) #define vcpu_has_sha() vcpu_has( 7, EBX, 29, ctxt, ops) +#define vcpu_has_avx512bw() vcpu_has( 7, EBX, 30, ctxt, ops) #define vcpu_has_rdpid() vcpu_has( 7, ECX, 22, ctxt, ops) #define vcpu_has_clzero() vcpu_has(0x80000008, EBX, 0, ctxt, ops) @@ -2396,6 +2405,18 @@ x86_decode_twobyte( } break; + case X86EMUL_OPC_VEX(0, 0x90): /* kmov{w,q} */ + case X86EMUL_OPC_VEX_66(0, 0x90): /* kmov{b,d} */ + state->desc = DstReg | SrcMem | Mov; + state->simd_size = simd_other; + break; + + case X86EMUL_OPC_VEX(0, 0x91): /* kmov{w,q} */ + case X86EMUL_OPC_VEX_66(0, 0x91): /* kmov{b,d} */ + state->desc = DstMem | SrcReg | Mov; + state->simd_size = simd_other; + break; + case 0xae: ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK); /* fall through */ @@ -6002,6 +6023,60 @@ x86_emulate( dst.val = src.val; break; + case X86EMUL_OPC_VEX(0x0f, 0x4a): /* kadd{w,q} k,k,k */ + if ( !vex.w ) + host_and_vcpu_must_have(avx512dq); + /* fall through */ + case X86EMUL_OPC_VEX(0x0f, 0x41): /* kand{w,q} k,k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x41): /* kand{b,d} k,k,k */ + case X86EMUL_OPC_VEX(0x0f, 0x42): /* kandn{w,q} k,k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x42): /* kandn{b,d} k,k,k */ + case X86EMUL_OPC_VEX(0x0f, 0x45): /* kor{w,q} k,k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x45): /* kor{b,d} k,k,k */ + case X86EMUL_OPC_VEX(0x0f, 0x46): /* kxnor{w,q} k,k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x46): /* kxnor{b,d} k,k,k */ + case X86EMUL_OPC_VEX(0x0f, 0x47): /* kxor{w,q} k,k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x47): /* kxor{b,d} k,k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x4a): /* kadd{b,d} k,k,k */ + generate_exception_if(!vex.l, EXC_UD); + opmask_basic: + if ( vex.w ) + host_and_vcpu_must_have(avx512bw); + else if ( vex.pfx ) + host_and_vcpu_must_have(avx512dq); + opmask_common: + host_and_vcpu_must_have(avx512f); + generate_exception_if(!vex.r || (mode_64bit() && !(vex.reg & 8)) || + ea.type != OP_REG, EXC_UD); + + vex.reg |= 8; + d &= ~TwoOp; + + get_fpu(X86EMUL_FPU_opmask); + + opc = init_prefixes(stub); + opc[0] = b; + opc[1] = modrm; + insn_bytes = PFX_BYTES + 2; + + state->simd_size = simd_other; + op_bytes = 1; /* Any non-zero value will do. */ + break; + + case X86EMUL_OPC_VEX(0x0f, 0x44): /* knot{w,q} k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x44): /* knot{b,d} k,k */ + generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD); + goto opmask_basic; + + case X86EMUL_OPC_VEX(0x0f, 0x4b): /* kunpck{w,d}{d,q} k,k,k */ + generate_exception_if(!vex.l, EXC_UD); + host_and_vcpu_must_have(avx512bw); + goto opmask_common; + + case X86EMUL_OPC_VEX_66(0x0f, 0x4b): /* kunpckbw k,k,k */ + generate_exception_if(!vex.l || vex.w, EXC_UD); + goto opmask_common; + CASE_SIMD_PACKED_FP(, 0x0f, 0x50): /* movmskp{s,d} xmm,reg */ CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x50): /* vmovmskp{s,d} {x,y}mm,reg */ CASE_SIMD_PACKED_INT(0x0f, 0xd7): /* pmovmskb {,x}mm,reg */ @@ -6552,6 +6627,154 @@ x86_emulate( dst.val = test_cc(b, _regs.eflags); break; + case X86EMUL_OPC_VEX(0x0f, 0x91): /* kmov{w,q} k,mem */ + case X86EMUL_OPC_VEX_66(0x0f, 0x91): /* kmov{b,d} k,mem */ + generate_exception_if(ea.type != OP_MEM, EXC_UD); + /* fall through */ + case X86EMUL_OPC_VEX(0x0f, 0x90): /* kmov{w,q} k/mem,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x90): /* kmov{b,d} k/mem,k */ + generate_exception_if(vex.l || !vex.r, EXC_UD); + host_and_vcpu_must_have(avx512f); + if ( vex.w ) + { + host_and_vcpu_must_have(avx512bw); + op_bytes = 4 << !vex.pfx; + } + else if ( vex.pfx ) + { + host_and_vcpu_must_have(avx512dq); + op_bytes = 1; + } + else + op_bytes = 2; + + get_fpu(X86EMUL_FPU_opmask); + + opc = init_prefixes(stub); + opc[0] = b; + opc[1] = modrm; + if ( ea.type == OP_MEM ) + { + /* convert memory operand to (%rAX) */ + vex.b = 1; + opc[1] &= 0x38; + } + insn_bytes = PFX_BYTES + 2; + break; + + case X86EMUL_OPC_VEX(0x0f, 0x92): /* kmovw r32,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x92): /* kmovb r32,k */ + case X86EMUL_OPC_VEX_F2(0x0f, 0x92): /* kmov{d,q} reg,k */ + generate_exception_if(vex.l || !vex.r || vex.reg != 0xf || + ea.type != OP_REG, EXC_UD); + + host_and_vcpu_must_have(avx512f); + if ( vex.pfx == vex_f2 ) + host_and_vcpu_must_have(avx512bw); + else + { + generate_exception_if(vex.w, EXC_UD); + if ( vex.pfx ) + host_and_vcpu_must_have(avx512dq); + } + + get_fpu(X86EMUL_FPU_opmask); + + opc = init_prefixes(stub); + opc[0] = b; + /* Convert GPR source to %rAX. */ + vex.b = 1; + if ( !mode_64bit() ) + vex.w = 0; + opc[1] = modrm & 0xf8; + opc[2] = 0xc3; + + copy_VEX(opc, vex); + ea.reg = decode_gpr(&_regs, modrm_rm); + invoke_stub("", "", "=m" (dummy) : "a" (*ea.reg)); + + put_stub(stub); + + ASSERT(!state->simd_size); + dst.type = OP_NONE; + break; + + case X86EMUL_OPC_VEX(0x0f, 0x93): /* kmovw k,r32 */ + case X86EMUL_OPC_VEX_66(0x0f, 0x93): /* kmovb k,r32 */ + case X86EMUL_OPC_VEX_F2(0x0f, 0x93): /* kmov{d,q} k,reg */ + generate_exception_if(vex.l || vex.reg != 0xf || ea.type != OP_REG, + EXC_UD); + dst = ea; + dst.reg = decode_gpr(&_regs, modrm_reg); + + host_and_vcpu_must_have(avx512f); + if ( vex.pfx == vex_f2 ) + { + host_and_vcpu_must_have(avx512bw); + dst.bytes = 4 << (mode_64bit() && vex.w); + } + else + { + generate_exception_if(vex.w, EXC_UD); + dst.bytes = 4; + if ( vex.pfx ) + host_and_vcpu_must_have(avx512dq); + } + + get_fpu(X86EMUL_FPU_opmask); + + opc = init_prefixes(stub); + opc[0] = b; + /* Convert GPR destination to %rAX. */ + vex.r = 1; + if ( !mode_64bit() ) + vex.w = 0; + opc[1] = modrm & 0xc7; + opc[2] = 0xc3; + + copy_VEX(opc, vex); + invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0)); + + put_stub(stub); + + ASSERT(!state->simd_size); + break; + + case X86EMUL_OPC_VEX(0x0f, 0x99): /* ktest{w,q} k,k */ + if ( !vex.w ) + host_and_vcpu_must_have(avx512dq); + /* fall through */ + case X86EMUL_OPC_VEX(0x0f, 0x98): /* kortest{w,q} k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x98): /* kortest{b,d} k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x99): /* ktest{b,d} k,k */ + generate_exception_if(vex.l || !vex.r || vex.reg != 0xf || + ea.type != OP_REG, EXC_UD); + host_and_vcpu_must_have(avx512f); + if ( vex.w ) + host_and_vcpu_must_have(avx512bw); + else if ( vex.pfx ) + host_and_vcpu_must_have(avx512dq); + + get_fpu(X86EMUL_FPU_opmask); + + opc = init_prefixes(stub); + opc[0] = b; + opc[1] = modrm; + opc[2] = 0xc3; + + copy_VEX(opc, vex); + invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"), + _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"), + [eflags] "+g" (_regs.eflags), + "=a" (dst.val), [tmp] "=&r" (dummy) + : [mask] "i" (EFLAGS_MASK)); + + put_stub(stub); + + ASSERT(!state->simd_size); + dst.type = OP_NONE; + break; + case X86EMUL_OPC(0x0f, 0xa2): /* cpuid */ msr_val = 0; fail_if(ops->cpuid == NULL); @@ -8170,6 +8393,23 @@ x86_emulate( generate_exception_if(vex.l, EXC_UD); goto simd_0f_imm8_avx; + case X86EMUL_OPC_VEX_66(0x0f3a, 0x30): /* kshiftr{b,w} $imm8,k,k */ + case X86EMUL_OPC_VEX_66(0x0f3a, 0x32): /* kshiftl{b,w} $imm8,k,k */ + if ( !vex.w ) + host_and_vcpu_must_have(avx512dq); + opmask_shift_imm: + generate_exception_if(vex.l || !vex.r || vex.reg != 0xf || + ea.type != OP_REG, EXC_UD); + host_and_vcpu_must_have(avx512f); + get_fpu(X86EMUL_FPU_opmask); + op_bytes = 1; /* Any non-zero value will do. */ + goto simd_0f_imm8; + + case X86EMUL_OPC_VEX_66(0x0f3a, 0x31): /* kshiftr{d,q} $imm8,k,k */ + case X86EMUL_OPC_VEX_66(0x0f3a, 0x33): /* kshiftl{d,q} $imm8,k,k */ + host_and_vcpu_must_have(avx512bw); + goto opmask_shift_imm; + case X86EMUL_OPC_66(0x0f3a, 0x44): /* pclmulqdq $imm8,xmm/m128,xmm */ case X86EMUL_OPC_VEX_66(0x0f3a, 0x44): /* vpclmulqdq $imm8,xmm/m128,xmm,xmm */ host_and_vcpu_must_have(pclmulqdq); diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h index afad760dbc..3750f0c91c 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -170,6 +170,7 @@ enum x86_emulate_fpu_type { X86EMUL_FPU_mmx, /* MMX instruction set (%mm0-%mm7) */ X86EMUL_FPU_xmm, /* SSE instruction set (%xmm0-%xmm7/15) */ X86EMUL_FPU_ymm, /* AVX/XOP instruction set (%ymm0-%ymm7/15) */ + X86EMUL_FPU_opmask, /* AVX512 opmask instruction set (%k0-%k7) */ /* This sentinel will never be passed to ->get_fpu(). */ X86EMUL_FPU_none }; diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h index 5343ddc3c3..7e11a458bd 100644 --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -99,9 +99,12 @@ #define cpu_has_rtm boot_cpu_has(X86_FEATURE_RTM) #define cpu_has_fpu_sel (!boot_cpu_has(X86_FEATURE_NO_FPU_SEL)) #define cpu_has_mpx boot_cpu_has(X86_FEATURE_MPX) +#define cpu_has_avx512f boot_cpu_has(X86_FEATURE_AVX512F) +#define cpu_has_avx512dq boot_cpu_has(X86_FEATURE_AVX512DQ) #define cpu_has_rdseed boot_cpu_has(X86_FEATURE_RDSEED) #define cpu_has_smap boot_cpu_has(X86_FEATURE_SMAP) #define cpu_has_sha boot_cpu_has(X86_FEATURE_SHA) +#define cpu_has_avx512bw boot_cpu_has(X86_FEATURE_AVX512BW) /* CPUID level 0x80000007.edx */ #define cpu_has_itsc boot_cpu_has(X86_FEATURE_ITSC) -- generated by git-patchbot for /home/xen/git/xen.git#staging _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |