[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v3 01/34] x86emul: support AVX512 opmask insns
These are all VEX encoded, so the EVEX decoding logic continues to remain unused at this point. The new testcase is deliberately coded in assembly, as a C one would have become almost unreadable due to the overwhelming amount of __builtin_...() that would need to be used. After all the compiler has no underlying type (yet) that could be operated on without builtins, other than the vector types used for "normal" SIMD insns. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- v3: Use distinct temporary file names in testcase.mk. Additions to clean target. --- a/tools/tests/x86_emulator/Makefile +++ b/tools/tests/x86_emulator/Makefile @@ -16,6 +16,8 @@ FMA := fma4 fma SG := avx2-sg TESTCASES := blowfish $(SIMD) $(FMA) $(SG) +OPMASK := avx512f avx512dq avx512bw + blowfish-cflags := "" blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic=" @@ -51,6 +53,10 @@ xop-vecs := $(avx-vecs) xop-ints := 1 2 4 8 xop-flts := $(avx-flts) +avx512f-opmask-vecs := 2 +avx512dq-opmask-vecs := 1 +avx512bw-opmask-vecs := 4 8 + # For AVX and later, have the compiler avoid XMM0 to widen coverage of # the VEX.vvvv checks in the emulator. For 3DNow!, however, force SSE # use for floating point operations, to avoid mixing MMX and FPU register @@ -80,9 +86,13 @@ $(1)-cflags := \ $(foreach flt,$($(1)-flts), \ "-D_$(vec)x$(idx)f$(flt) -m$(1:-sg=) $(call non-sse,$(1)) -Os -DVEC_MAX=$(vec) -DIDX_SIZE=$(idx) -DFLOAT_SIZE=$(flt)"))) endef +define opmask-defs +$(1)-opmask-cflags := $(foreach vec,$($(1)-opmask-vecs), "-D_$(vec) -m$(1) -Os -DSIZE=$(vec)") +endef $(foreach flavor,$(SIMD) $(FMA),$(eval $(call simd-defs,$(flavor)))) $(foreach flavor,$(SG),$(eval $(call simd-sg-defs,$(flavor)))) +$(foreach flavor,$(OPMASK),$(eval $(call opmask-defs,$(flavor)))) $(addsuffix .h,$(TESTCASES)): %.h: %.c testcase.mk Makefile rm -f $@.new $*.bin @@ -100,6 +110,22 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c t ) mv $@.new $@ +$(addsuffix -opmask.h,$(OPMASK)): %.h: opmask.S testcase.mk Makefile + rm -f $@.new $*.bin + $(foreach arch,$(filter-out $(XEN_COMPILE_ARCH),x86_32) $(XEN_COMPILE_ARCH), \ + for cflags in $($*-cflags) $($*-cflags-$(arch)); do \ + $(MAKE) -f testcase.mk TESTCASE=$* XEN_TARGET_ARCH=$(arch) $*-cflags="$$cflags" all; \ + prefix=$(shell echo $(subst -,_,$*) | sed -e 's,^\([0-9]\),_\1,'); \ + flavor=$$(echo $${cflags} | sed -e 's, .*,,' -e 'y,-=,__,') ; \ + (echo 'static const unsigned int __attribute__((section(".test, \"ax\", @progbits #")))' \ + "$${prefix}_$(arch)$${flavor}[] = {"; \ + od -v -t x $*.bin | sed -e 's/^[0-9]* /0x/' -e 's/ /, 0x/g' -e 's/$$/,/'; \ + echo "};") >>$@.new; \ + rm -f $*.bin; \ + done; \ + ) + mv $@.new $@ + $(addsuffix .c,$(SIMD)): ln -sf simd.c $@ @@ -118,7 +144,8 @@ $(TARGET): x86-emulate.o test_x86_emulat .PHONY: clean clean: - rm -rf $(TARGET) *.o *~ core $(addsuffix .h,$(TESTCASES)) *.bin x86_emulate + rm -rf $(TARGET) *.o *~ core *.bin x86_emulate + rm -rf $(TARGET) $(addsuffix .h,$(TESTCASES)) $(addsuffix -opmask.h,$(OPMASK)) .PHONY: distclean distclean: clean @@ -145,4 +172,4 @@ x86-emulate.o test_x86_emulator.o wrappe x86-emulate.o: x86_emulate/x86_emulate.c x86-emulate.o: HOSTCFLAGS += -D__XEN_TOOLS__ -test_x86_emulator.o: $(addsuffix .h,$(TESTCASES)) +test_x86_emulator.o: $(addsuffix .h,$(TESTCASES)) $(addsuffix -opmask.h,$(OPMASK)) --- /dev/null +++ b/tools/tests/x86_emulator/opmask.S @@ -0,0 +1,144 @@ +#ifdef __i386__ +# define R(x) e##x +# define DATA(x) x +#else +# if SIZE == 8 +# define R(x) r##x +# else +# define R(x) e##x +# endif +# define DATA(x) x(%rip) +#endif + +#if SIZE == 1 +# define _(x) x##b +#elif SIZE == 2 +# define _(x) x##w +# define WIDEN(x) x##bw +#elif SIZE == 4 +# define _(x) x##d +# define WIDEN(x) x##wd +#elif SIZE == 8 +# define _(x) x##q +# define WIDEN(x) x##dq +#endif + + .macro check res1:req, res2:req, line:req + _(kmov) %\res1, DATA(out) +#if SIZE < 8 || !defined(__i386__) + _(kmov) %\res2, %R(dx) + cmp DATA(out), %R(dx) +#else + sub $8, %esp + kmovq %\res2, (%esp) + pop %ecx + pop %edx + cmp DATA(out), %ecx + jne 0f + cmp DATA(out+4), %edx +0: +#endif + je 1f + mov $\line, %eax + ret +1: + .endm + + .text + .globl _start +_start: + _(kmov) DATA(in1), %k1 +#if SIZE < 8 || !defined(__i386__) + mov DATA(in2), %R(ax) + _(kmov) %R(ax), %k2 +#else + _(kmov) DATA(in2), %k2 +#endif + + _(kor) %k1, %k2, %k3 + _(kand) %k1, %k2, %k4 + _(kandn) %k3, %k4, %k5 + _(kxor) %k1, %k2, %k6 + check k5, k6, __LINE__ + + _(knot) %k6, %k3 + _(kxnor) %k1, %k2, %k4 + check k3, k4, __LINE__ + + _(kshiftl) $1, %k1, %k3 + _(kshiftl) $2, %k3, %k4 + _(kshiftl) $3, %k1, %k5 + check k4, k5, __LINE__ + + _(kshiftr) $1, %k1, %k3 + _(kshiftr) $2, %k3, %k4 + _(kshiftr) $3, %k1, %k5 + check k4, k5, __LINE__ + + _(kortest) %k6, %k6 + jnbe 1f + mov $__LINE__, %eax + ret +1: + + _(kxor) %k0, %k0, %k3 + _(kortest) %k3, %k3 + jz 1f + mov $__LINE__, %eax + ret +1: + + _(kxnor) %k0, %k0, %k3 + _(kortest) %k3, %k3 + jc 1f + mov $__LINE__, %eax + ret +1: + +#if SIZE > 1 + + _(kshiftr) $SIZE*4, %k3, %k4 + WIDEN(kunpck) %k4, %k4, %k5 + check k3, k5, __LINE__ + +#endif + +#if SIZE != 2 || defined(__AVX512DQ__) + + _(kadd) %k1, %k1, %k3 + _(kshiftl) $1, %k1, %k4 + check k3, k4, __LINE__ + + _(ktest) %k2, %k1 + jnbe 1f + mov $__LINE__, %eax + ret +1: + + _(kxor) %k0, %k0, %k3 + _(ktest) %k0, %k3 + jz 1f + mov $__LINE__, %eax + ret +1: + + _(kxnor) %k0, %k0, %k4 + _(ktest) %k0, %k4 + jc 1f + mov $__LINE__, %eax + ret +1: + +#endif + + xor %eax, %eax + ret + + .section .rodata, "a", @progbits + .balign 8 +in1: .byte 0b10110011, 0b10001111, 0b00001111, 0b10000011, 0b11110000, 0b00111111, 0b10000000, 0b11111111 +in2: .byte 0b11111111, 0b00000001, 0b11111100, 0b00001111, 0b11000001, 0b11110000, 0b11110001, 0b11001101 + + .data + .balign 8 +out: .quad 0 --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -18,6 +18,9 @@ asm ( ".pushsection .test, \"ax\", @prog #include "avx2.h" #include "avx2-sg.h" #include "xop.h" +#include "avx512f-opmask.h" +#include "avx512dq-opmask.h" +#include "avx512bw-opmask.h" #define verbose false /* Switch to true for far more logging. */ @@ -78,6 +81,24 @@ static bool simd_check_xop(void) return cpu_has_xop; } +static bool simd_check_avx512f(void) +{ + return cpu_has_avx512f; +} +#define simd_check_avx512f_opmask simd_check_avx512f + +static bool simd_check_avx512dq(void) +{ + return cpu_has_avx512dq; +} +#define simd_check_avx512dq_opmask simd_check_avx512dq + +static bool simd_check_avx512bw(void) +{ + return cpu_has_avx512bw; +} +#define simd_check_avx512bw_opmask simd_check_avx512bw + static void simd_set_regs(struct cpu_user_regs *regs) { if ( cpu_has_mmx ) @@ -223,6 +244,10 @@ static const struct { SIMD(XOP i16x16, xop, 32i2), SIMD(XOP i32x8, xop, 32i4), SIMD(XOP i64x4, xop, 32i8), + SIMD(OPMASK/w, avx512f_opmask, 2), + SIMD(OPMASK/b, avx512dq_opmask, 1), + SIMD(OPMASK/d, avx512bw_opmask, 4), + SIMD(OPMASK/q, avx512bw_opmask, 8), #undef SIMD_ #undef SIMD }; @@ -3426,8 +3451,8 @@ int main(int argc, char **argv) rc = x86_emulate(&ctxt, &emulops); if ( rc != X86EMUL_OKAY ) { - printf("failed at %%eip == %08lx (opcode %08x)\n", - (unsigned long)regs.eip, ctxt.opcode); + printf("failed (%d) at %%eip == %08lx (opcode %08x)\n", + rc, (unsigned long)regs.eip, ctxt.opcode); return 1; } } --- a/tools/tests/x86_emulator/testcase.mk +++ b/tools/tests/x86_emulator/testcase.mk @@ -14,3 +14,9 @@ all: $(TESTCASE).bin $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0x100000 -o $*.tmp $*.o $(OBJCOPY) -O binary $*.tmp $@ rm -f $*.tmp + +%-opmask.bin: opmask.S + $(CC) $(filter-out -M% .%,$(CFLAGS)) -c $< -o $(basename $@).o + $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0x100000 -o $(basename $@).tmp $(basename $@).o + $(OBJCOPY) -O binary $(basename $@).tmp $@ + rm -f $(basename $@).tmp --- a/tools/tests/x86_emulator/x86-emulate.c +++ b/tools/tests/x86_emulator/x86-emulate.c @@ -209,6 +209,9 @@ int emul_test_get_fpu( case X86EMUL_FPU_ymm: if ( cpu_has_avx ) break; + case X86EMUL_FPU_opmask: + if ( cpu_has_avx512f ) + break; default: return X86EMUL_UNHANDLEABLE; } --- a/tools/tests/x86_emulator/x86-emulate.h +++ b/tools/tests/x86_emulator/x86-emulate.h @@ -236,6 +236,36 @@ static inline uint64_t xgetbv(uint32_t x (res.c & (1U << 21)) != 0; \ }) +#define cpu_has_avx512f ({ \ + struct cpuid_leaf res; \ + emul_test_cpuid(1, 0, &res, NULL); \ + if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 0xe6) != 0xe6) ) \ + res.b = 0; \ + else \ + emul_test_cpuid(7, 0, &res, NULL); \ + (res.b & (1U << 16)) != 0; \ +}) + +#define cpu_has_avx512dq ({ \ + struct cpuid_leaf res; \ + emul_test_cpuid(1, 0, &res, NULL); \ + if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 0xe6) != 0xe6) ) \ + res.b = 0; \ + else \ + emul_test_cpuid(7, 0, &res, NULL); \ + (res.b & (1U << 17)) != 0; \ +}) + +#define cpu_has_avx512bw ({ \ + struct cpuid_leaf res; \ + emul_test_cpuid(1, 0, &res, NULL); \ + if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 0xe6) != 0xe6) ) \ + res.b = 0; \ + else \ + emul_test_cpuid(7, 0, &res, NULL); \ + (res.b & (1U << 30)) != 0; \ +}) + int emul_test_cpuid( uint32_t leaf, uint32_t subleaf, --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -491,6 +491,7 @@ static const struct ext0f3a_table { [0x20] = { .simd_size = simd_none }, [0x21] = { .simd_size = simd_other }, [0x22] = { .simd_size = simd_none }, + [0x30 ... 0x33] = { .simd_size = simd_other, .two_op = 1 }, [0x38] = { .simd_size = simd_128 }, [0x39] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 }, [0x40 ... 0x41] = { .simd_size = simd_packed_fp }, @@ -1187,6 +1188,11 @@ static int _get_fpu( return X86EMUL_UNHANDLEABLE; break; + case X86EMUL_FPU_opmask: + if ( !(xcr0 & X86_XCR0_SSE) || !(xcr0 & X86_XCR0_OPMASK) ) + return X86EMUL_UNHANDLEABLE; + break; + default: break; } @@ -1762,12 +1768,15 @@ static bool vcpu_has( #define vcpu_has_bmi2() vcpu_has( 7, EBX, 8, ctxt, ops) #define vcpu_has_rtm() vcpu_has( 7, EBX, 11, ctxt, ops) #define vcpu_has_mpx() vcpu_has( 7, EBX, 14, ctxt, ops) +#define vcpu_has_avx512f() vcpu_has( 7, EBX, 16, ctxt, ops) +#define vcpu_has_avx512dq() vcpu_has( 7, EBX, 17, ctxt, ops) #define vcpu_has_rdseed() vcpu_has( 7, EBX, 18, ctxt, ops) #define vcpu_has_adx() vcpu_has( 7, EBX, 19, ctxt, ops) #define vcpu_has_smap() vcpu_has( 7, EBX, 20, ctxt, ops) #define vcpu_has_clflushopt() vcpu_has( 7, EBX, 23, ctxt, ops) #define vcpu_has_clwb() vcpu_has( 7, EBX, 24, ctxt, ops) #define vcpu_has_sha() vcpu_has( 7, EBX, 29, ctxt, ops) +#define vcpu_has_avx512bw() vcpu_has( 7, EBX, 30, ctxt, ops) #define vcpu_has_rdpid() vcpu_has( 7, ECX, 22, ctxt, ops) #define vcpu_has_clzero() vcpu_has(0x80000008, EBX, 0, ctxt, ops) @@ -2396,6 +2405,18 @@ x86_decode_twobyte( } break; + case X86EMUL_OPC_VEX(0, 0x90): /* kmov{w,q} */ + case X86EMUL_OPC_VEX_66(0, 0x90): /* kmov{b,d} */ + state->desc = DstReg | SrcMem | Mov; + state->simd_size = simd_other; + break; + + case X86EMUL_OPC_VEX(0, 0x91): /* kmov{w,q} */ + case X86EMUL_OPC_VEX_66(0, 0x91): /* kmov{b,d} */ + state->desc = DstMem | SrcReg | Mov; + state->simd_size = simd_other; + break; + case 0xae: ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK); /* fall through */ @@ -6002,6 +6023,60 @@ x86_emulate( dst.val = src.val; break; + case X86EMUL_OPC_VEX(0x0f, 0x4a): /* kadd{w,q} k,k,k */ + if ( !vex.w ) + host_and_vcpu_must_have(avx512dq); + /* fall through */ + case X86EMUL_OPC_VEX(0x0f, 0x41): /* kand{w,q} k,k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x41): /* kand{b,d} k,k,k */ + case X86EMUL_OPC_VEX(0x0f, 0x42): /* kandn{w,q} k,k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x42): /* kandn{b,d} k,k,k */ + case X86EMUL_OPC_VEX(0x0f, 0x45): /* kor{w,q} k,k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x45): /* kor{b,d} k,k,k */ + case X86EMUL_OPC_VEX(0x0f, 0x46): /* kxnor{w,q} k,k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x46): /* kxnor{b,d} k,k,k */ + case X86EMUL_OPC_VEX(0x0f, 0x47): /* kxor{w,q} k,k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x47): /* kxor{b,d} k,k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x4a): /* kadd{b,d} k,k,k */ + generate_exception_if(!vex.l, EXC_UD); + opmask_basic: + if ( vex.w ) + host_and_vcpu_must_have(avx512bw); + else if ( vex.pfx ) + host_and_vcpu_must_have(avx512dq); + opmask_common: + host_and_vcpu_must_have(avx512f); + generate_exception_if(!vex.r || (mode_64bit() && !(vex.reg & 8)) || + ea.type != OP_REG, EXC_UD); + + vex.reg |= 8; + d &= ~TwoOp; + + get_fpu(X86EMUL_FPU_opmask); + + opc = init_prefixes(stub); + opc[0] = b; + opc[1] = modrm; + insn_bytes = PFX_BYTES + 2; + + state->simd_size = simd_other; + op_bytes = 1; /* Any non-zero value will do. */ + break; + + case X86EMUL_OPC_VEX(0x0f, 0x44): /* knot{w,q} k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x44): /* knot{b,d} k,k */ + generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD); + goto opmask_basic; + + case X86EMUL_OPC_VEX(0x0f, 0x4b): /* kunpck{w,d}{d,q} k,k,k */ + generate_exception_if(!vex.l, EXC_UD); + host_and_vcpu_must_have(avx512bw); + goto opmask_common; + + case X86EMUL_OPC_VEX_66(0x0f, 0x4b): /* kunpckbw k,k,k */ + generate_exception_if(!vex.l || vex.w, EXC_UD); + goto opmask_common; + CASE_SIMD_PACKED_FP(, 0x0f, 0x50): /* movmskp{s,d} xmm,reg */ CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x50): /* vmovmskp{s,d} {x,y}mm,reg */ CASE_SIMD_PACKED_INT(0x0f, 0xd7): /* pmovmskb {,x}mm,reg */ @@ -6552,6 +6627,154 @@ x86_emulate( dst.val = test_cc(b, _regs.eflags); break; + case X86EMUL_OPC_VEX(0x0f, 0x91): /* kmov{w,q} k,mem */ + case X86EMUL_OPC_VEX_66(0x0f, 0x91): /* kmov{b,d} k,mem */ + generate_exception_if(ea.type != OP_MEM, EXC_UD); + /* fall through */ + case X86EMUL_OPC_VEX(0x0f, 0x90): /* kmov{w,q} k/mem,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x90): /* kmov{b,d} k/mem,k */ + generate_exception_if(vex.l || !vex.r, EXC_UD); + host_and_vcpu_must_have(avx512f); + if ( vex.w ) + { + host_and_vcpu_must_have(avx512bw); + op_bytes = 4 << !vex.pfx; + } + else if ( vex.pfx ) + { + host_and_vcpu_must_have(avx512dq); + op_bytes = 1; + } + else + op_bytes = 2; + + get_fpu(X86EMUL_FPU_opmask); + + opc = init_prefixes(stub); + opc[0] = b; + opc[1] = modrm; + if ( ea.type == OP_MEM ) + { + /* convert memory operand to (%rAX) */ + vex.b = 1; + opc[1] &= 0x38; + } + insn_bytes = PFX_BYTES + 2; + break; + + case X86EMUL_OPC_VEX(0x0f, 0x92): /* kmovw r32,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x92): /* kmovb r32,k */ + case X86EMUL_OPC_VEX_F2(0x0f, 0x92): /* kmov{d,q} reg,k */ + generate_exception_if(vex.l || !vex.r || vex.reg != 0xf || + ea.type != OP_REG, EXC_UD); + + host_and_vcpu_must_have(avx512f); + if ( vex.pfx == vex_f2 ) + host_and_vcpu_must_have(avx512bw); + else + { + generate_exception_if(vex.w, EXC_UD); + if ( vex.pfx ) + host_and_vcpu_must_have(avx512dq); + } + + get_fpu(X86EMUL_FPU_opmask); + + opc = init_prefixes(stub); + opc[0] = b; + /* Convert GPR source to %rAX. */ + vex.b = 1; + if ( !mode_64bit() ) + vex.w = 0; + opc[1] = modrm & 0xf8; + opc[2] = 0xc3; + + copy_VEX(opc, vex); + ea.reg = decode_gpr(&_regs, modrm_rm); + invoke_stub("", "", "=m" (dummy) : "a" (*ea.reg)); + + put_stub(stub); + + ASSERT(!state->simd_size); + dst.type = OP_NONE; + break; + + case X86EMUL_OPC_VEX(0x0f, 0x93): /* kmovw k,r32 */ + case X86EMUL_OPC_VEX_66(0x0f, 0x93): /* kmovb k,r32 */ + case X86EMUL_OPC_VEX_F2(0x0f, 0x93): /* kmov{d,q} k,reg */ + generate_exception_if(vex.l || vex.reg != 0xf || ea.type != OP_REG, + EXC_UD); + dst = ea; + dst.reg = decode_gpr(&_regs, modrm_reg); + + host_and_vcpu_must_have(avx512f); + if ( vex.pfx == vex_f2 ) + { + host_and_vcpu_must_have(avx512bw); + dst.bytes = 4 << (mode_64bit() && vex.w); + } + else + { + generate_exception_if(vex.w, EXC_UD); + dst.bytes = 4; + if ( vex.pfx ) + host_and_vcpu_must_have(avx512dq); + } + + get_fpu(X86EMUL_FPU_opmask); + + opc = init_prefixes(stub); + opc[0] = b; + /* Convert GPR destination to %rAX. */ + vex.r = 1; + if ( !mode_64bit() ) + vex.w = 0; + opc[1] = modrm & 0xc7; + opc[2] = 0xc3; + + copy_VEX(opc, vex); + invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0)); + + put_stub(stub); + + ASSERT(!state->simd_size); + break; + + case X86EMUL_OPC_VEX(0x0f, 0x99): /* ktest{w,q} k,k */ + if ( !vex.w ) + host_and_vcpu_must_have(avx512dq); + /* fall through */ + case X86EMUL_OPC_VEX(0x0f, 0x98): /* kortest{w,q} k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x98): /* kortest{b,d} k,k */ + case X86EMUL_OPC_VEX_66(0x0f, 0x99): /* ktest{b,d} k,k */ + generate_exception_if(vex.l || !vex.r || vex.reg != 0xf || + ea.type != OP_REG, EXC_UD); + host_and_vcpu_must_have(avx512f); + if ( vex.w ) + host_and_vcpu_must_have(avx512bw); + else if ( vex.pfx ) + host_and_vcpu_must_have(avx512dq); + + get_fpu(X86EMUL_FPU_opmask); + + opc = init_prefixes(stub); + opc[0] = b; + opc[1] = modrm; + opc[2] = 0xc3; + + copy_VEX(opc, vex); + invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"), + _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"), + [eflags] "+g" (_regs.eflags), + "=a" (dst.val), [tmp] "=&r" (dummy) + : [mask] "i" (EFLAGS_MASK)); + + put_stub(stub); + + ASSERT(!state->simd_size); + dst.type = OP_NONE; + break; + case X86EMUL_OPC(0x0f, 0xa2): /* cpuid */ msr_val = 0; fail_if(ops->cpuid == NULL); @@ -8170,6 +8393,23 @@ x86_emulate( generate_exception_if(vex.l, EXC_UD); goto simd_0f_imm8_avx; + case X86EMUL_OPC_VEX_66(0x0f3a, 0x30): /* kshiftr{b,w} $imm8,k,k */ + case X86EMUL_OPC_VEX_66(0x0f3a, 0x32): /* kshiftl{b,w} $imm8,k,k */ + if ( !vex.w ) + host_and_vcpu_must_have(avx512dq); + opmask_shift_imm: + generate_exception_if(vex.l || !vex.r || vex.reg != 0xf || + ea.type != OP_REG, EXC_UD); + host_and_vcpu_must_have(avx512f); + get_fpu(X86EMUL_FPU_opmask); + op_bytes = 1; /* Any non-zero value will do. */ + goto simd_0f_imm8; + + case X86EMUL_OPC_VEX_66(0x0f3a, 0x31): /* kshiftr{d,q} $imm8,k,k */ + case X86EMUL_OPC_VEX_66(0x0f3a, 0x33): /* kshiftl{d,q} $imm8,k,k */ + host_and_vcpu_must_have(avx512bw); + goto opmask_shift_imm; + case X86EMUL_OPC_66(0x0f3a, 0x44): /* pclmulqdq $imm8,xmm/m128,xmm */ case X86EMUL_OPC_VEX_66(0x0f3a, 0x44): /* vpclmulqdq $imm8,xmm/m128,xmm,xmm */ host_and_vcpu_must_have(pclmulqdq); --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -170,6 +170,7 @@ enum x86_emulate_fpu_type { X86EMUL_FPU_mmx, /* MMX instruction set (%mm0-%mm7) */ X86EMUL_FPU_xmm, /* SSE instruction set (%xmm0-%xmm7/15) */ X86EMUL_FPU_ymm, /* AVX/XOP instruction set (%ymm0-%ymm7/15) */ + X86EMUL_FPU_opmask, /* AVX512 opmask instruction set (%k0-%k7) */ /* This sentinel will never be passed to ->get_fpu(). */ X86EMUL_FPU_none }; --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -99,9 +99,12 @@ #define cpu_has_rtm boot_cpu_has(X86_FEATURE_RTM) #define cpu_has_fpu_sel (!boot_cpu_has(X86_FEATURE_NO_FPU_SEL)) #define cpu_has_mpx boot_cpu_has(X86_FEATURE_MPX) +#define cpu_has_avx512f boot_cpu_has(X86_FEATURE_AVX512F) +#define cpu_has_avx512dq boot_cpu_has(X86_FEATURE_AVX512DQ) #define cpu_has_rdseed boot_cpu_has(X86_FEATURE_RDSEED) #define cpu_has_smap boot_cpu_has(X86_FEATURE_SMAP) #define cpu_has_sha boot_cpu_has(X86_FEATURE_SHA) +#define cpu_has_avx512bw boot_cpu_has(X86_FEATURE_AVX512BW) /* CPUID level 0x80000007.edx */ #define cpu_has_itsc boot_cpu_has(X86_FEATURE_ITSC) _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |