[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen master] x86emul: support AVX512 opmask insns



commit 304a8301fc9c6ff1f45a6aa081c56cd57bc7696a
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Fri Oct 26 15:20:37 2018 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri Oct 26 15:20:37 2018 +0200

    x86emul: support AVX512 opmask insns
    
    These are all VEX encoded, so the EVEX decoding logic continues to
    remain unused at this point.
    
    The new testcase is deliberately coded in assembly, as a C one would
    have become almost unreadable due to the overwhelming amount of
    __builtin_...() that would need to be used. After all the compiler has
    no underlying type (yet) that could be operated on without builtins,
    other than the vector types used for "normal" SIMD insns.
    
    Note that outside of 64-bit mode and despite the SDM not currently
    saying so, VEX.W is ignored for the KMOV{D,Q} encodings to/from GPRs,
    just like e.g. for the similar VMOV{D,Q}.
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 tools/tests/x86_emulator/Makefile            |  31 +++-
 tools/tests/x86_emulator/opmask.S            | 144 ++++++++++++++++
 tools/tests/x86_emulator/test_x86_emulator.c |  29 +++-
 tools/tests/x86_emulator/testcase.mk         |   6 +
 tools/tests/x86_emulator/x86-emulate.c       |   3 +
 tools/tests/x86_emulator/x86-emulate.h       |  30 ++++
 xen/arch/x86/x86_emulate/x86_emulate.c       | 240 +++++++++++++++++++++++++++
 xen/arch/x86/x86_emulate/x86_emulate.h       |   1 +
 xen/include/asm-x86/cpufeature.h             |   3 +
 9 files changed, 483 insertions(+), 4 deletions(-)

diff --git a/tools/tests/x86_emulator/Makefile 
b/tools/tests/x86_emulator/Makefile
index e8a3e9057e..a97c43b9c2 100644
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -16,6 +16,8 @@ FMA := fma4 fma
 SG := avx2-sg
 TESTCASES := blowfish $(SIMD) $(FMA) $(SG)
 
+OPMASK := avx512f avx512dq avx512bw
+
 blowfish-cflags := ""
 blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic="
 
@@ -51,6 +53,10 @@ xop-vecs := $(avx-vecs)
 xop-ints := 1 2 4 8
 xop-flts := $(avx-flts)
 
+avx512f-opmask-vecs := 2
+avx512dq-opmask-vecs := 1
+avx512bw-opmask-vecs := 4 8
+
 # For AVX and later, have the compiler avoid XMM0 to widen coverage of
 # the VEX.vvvv checks in the emulator.  For 3DNow!, however, force SSE
 # use for floating point operations, to avoid mixing MMX and FPU register
@@ -80,9 +86,13 @@ $(1)-cflags := \
           $(foreach flt,$($(1)-flts), \
             "-D_$(vec)x$(idx)f$(flt) -m$(1:-sg=) $(call non-sse,$(1)) -Os 
-DVEC_MAX=$(vec) -DIDX_SIZE=$(idx) -DFLOAT_SIZE=$(flt)")))
 endef
+define opmask-defs
+$(1)-opmask-cflags := $(foreach vec,$($(1)-opmask-vecs), "-D_$(vec) -m$(1) -Os 
-DSIZE=$(vec)")
+endef
 
 $(foreach flavor,$(SIMD) $(FMA),$(eval $(call simd-defs,$(flavor))))
 $(foreach flavor,$(SG),$(eval $(call simd-sg-defs,$(flavor))))
+$(foreach flavor,$(OPMASK),$(eval $(call opmask-defs,$(flavor))))
 
 $(addsuffix .h,$(TESTCASES)): %.h: %.c testcase.mk Makefile
        rm -f $@.new $*.bin
@@ -100,6 +110,22 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c testcase.mk Makefile
        )
        mv $@.new $@
 
+$(addsuffix -opmask.h,$(OPMASK)): %.h: opmask.S testcase.mk Makefile
+       rm -f $@.new $*.bin
+       $(foreach arch,$(filter-out $(XEN_COMPILE_ARCH),x86_32) 
$(XEN_COMPILE_ARCH), \
+           for cflags in $($*-cflags) $($*-cflags-$(arch)); do \
+               $(MAKE) -f testcase.mk TESTCASE=$* XEN_TARGET_ARCH=$(arch) 
$*-cflags="$$cflags" all; \
+               prefix=$(shell echo $(subst -,_,$*) | sed -e 
's,^\([0-9]\),_\1,'); \
+               flavor=$$(echo $${cflags} | sed -e 's, .*,,' -e 'y,-=,__,') ; \
+               (echo 'static const unsigned int __attribute__((section(".test, 
\"ax\", @progbits #")))' \
+                     "$${prefix}_$(arch)$${flavor}[] = {"; \
+                od -v -t x $*.bin | sed -e 's/^[0-9]* /0x/' -e 's/ /, 0x/g' -e 
's/$$/,/'; \
+                echo "};") >>$@.new; \
+               rm -f $*.bin; \
+           done; \
+       )
+       mv $@.new $@
+
 $(addsuffix .c,$(SIMD)):
        ln -sf simd.c $@
 
@@ -118,7 +144,8 @@ $(TARGET): x86-emulate.o test_x86_emulator.o wrappers.o
 
 .PHONY: clean
 clean:
-       rm -rf $(TARGET) *.o *~ core $(addsuffix .h,$(TESTCASES)) *.bin 
x86_emulate
+       rm -rf $(TARGET) *.o *~ core *.bin x86_emulate
+       rm -rf $(TARGET) $(addsuffix .h,$(TESTCASES)) $(addsuffix 
-opmask.h,$(OPMASK))
 
 .PHONY: distclean
 distclean: clean
@@ -145,4 +172,4 @@ x86-emulate.o test_x86_emulator.o wrappers.o: %.o: %.c 
$(x86_emulate.h)
 x86-emulate.o: x86_emulate/x86_emulate.c
 x86-emulate.o: HOSTCFLAGS += -D__XEN_TOOLS__
 
-test_x86_emulator.o: $(addsuffix .h,$(TESTCASES))
+test_x86_emulator.o: $(addsuffix .h,$(TESTCASES)) $(addsuffix 
-opmask.h,$(OPMASK))
diff --git a/tools/tests/x86_emulator/opmask.S 
b/tools/tests/x86_emulator/opmask.S
new file mode 100644
index 0000000000..3fad8b1f10
--- /dev/null
+++ b/tools/tests/x86_emulator/opmask.S
@@ -0,0 +1,144 @@
+#ifdef __i386__
+# define R(x) e##x
+# define DATA(x) x
+#else
+# if SIZE == 8
+#  define R(x) r##x
+# else
+#  define R(x) e##x
+# endif
+# define DATA(x) x(%rip)
+#endif
+
+#if SIZE == 1
+# define _(x) x##b
+#elif SIZE == 2
+# define _(x) x##w
+# define WIDEN(x) x##bw
+#elif SIZE == 4
+# define _(x) x##d
+# define WIDEN(x) x##wd
+#elif SIZE == 8
+# define _(x) x##q
+# define WIDEN(x) x##dq
+#endif
+
+    .macro check res1:req, res2:req, line:req
+    _(kmov)       %\res1, DATA(out)
+#if SIZE < 8 || !defined(__i386__)
+    _(kmov)       %\res2, %R(dx)
+    cmp           DATA(out), %R(dx)
+#else
+    sub           $8, %esp
+    kmovq         %\res2, (%esp)
+    pop           %ecx
+    pop           %edx
+    cmp           DATA(out), %ecx
+    jne           0f
+    cmp           DATA(out+4), %edx
+0:
+#endif
+    je            1f
+    mov           $\line, %eax
+    ret
+1:
+    .endm
+
+    .text
+    .globl _start
+_start:
+    _(kmov)       DATA(in1), %k1
+#if SIZE < 8 || !defined(__i386__)
+    mov           DATA(in2), %R(ax)
+    _(kmov)       %R(ax), %k2
+#else
+    _(kmov)       DATA(in2), %k2
+#endif
+
+    _(kor)        %k1, %k2, %k3
+    _(kand)       %k1, %k2, %k4
+    _(kandn)      %k3, %k4, %k5
+    _(kxor)       %k1, %k2, %k6
+    check         k5, k6, __LINE__
+
+    _(knot)       %k6, %k3
+    _(kxnor)      %k1, %k2, %k4
+    check         k3, k4, __LINE__
+
+    _(kshiftl)    $1, %k1, %k3
+    _(kshiftl)    $2, %k3, %k4
+    _(kshiftl)    $3, %k1, %k5
+    check         k4, k5, __LINE__
+
+    _(kshiftr)    $1, %k1, %k3
+    _(kshiftr)    $2, %k3, %k4
+    _(kshiftr)    $3, %k1, %k5
+    check         k4, k5, __LINE__
+
+    _(kortest)    %k6, %k6
+    jnbe          1f
+    mov           $__LINE__, %eax
+    ret
+1:
+
+    _(kxor)       %k0, %k0, %k3
+    _(kortest)    %k3, %k3
+    jz            1f
+    mov           $__LINE__, %eax
+    ret
+1:
+
+    _(kxnor)      %k0, %k0, %k3
+    _(kortest)    %k3, %k3
+    jc            1f
+    mov           $__LINE__, %eax
+    ret
+1:
+
+#if SIZE > 1
+
+    _(kshiftr)    $SIZE*4, %k3, %k4
+    WIDEN(kunpck) %k4, %k4, %k5
+    check         k3, k5, __LINE__
+
+#endif
+
+#if SIZE != 2 || defined(__AVX512DQ__)
+
+    _(kadd)       %k1, %k1, %k3
+    _(kshiftl)    $1, %k1, %k4
+    check         k3, k4, __LINE__
+
+    _(ktest)      %k2, %k1
+    jnbe          1f
+    mov           $__LINE__, %eax
+    ret
+1:
+
+    _(kxor)       %k0, %k0, %k3
+    _(ktest)      %k0, %k3
+    jz            1f
+    mov           $__LINE__, %eax
+    ret
+1:
+
+    _(kxnor)      %k0, %k0, %k4
+    _(ktest)      %k0, %k4
+    jc            1f
+    mov           $__LINE__, %eax
+    ret
+1:
+
+#endif
+
+    xor           %eax, %eax
+    ret
+
+    .section .rodata, "a", @progbits
+    .balign 8
+in1: .byte 0b10110011, 0b10001111, 0b00001111, 0b10000011, 0b11110000, 
0b00111111, 0b10000000, 0b11111111
+in2: .byte 0b11111111, 0b00000001, 0b11111100, 0b00001111, 0b11000001, 
0b11110000, 0b11110001, 0b11001101
+
+    .data
+    .balign 8
+out: .quad 0
diff --git a/tools/tests/x86_emulator/test_x86_emulator.c 
b/tools/tests/x86_emulator/test_x86_emulator.c
index 2f6fb679de..ed5a3d8853 100644
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -18,6 +18,9 @@ asm ( ".pushsection .test, \"ax\", @progbits; .popsection" );
 #include "avx2.h"
 #include "avx2-sg.h"
 #include "xop.h"
+#include "avx512f-opmask.h"
+#include "avx512dq-opmask.h"
+#include "avx512bw-opmask.h"
 
 #define verbose false /* Switch to true for far more logging. */
 
@@ -78,6 +81,24 @@ static bool simd_check_xop(void)
     return cpu_has_xop;
 }
 
+static bool simd_check_avx512f(void)
+{
+    return cpu_has_avx512f;
+}
+#define simd_check_avx512f_opmask simd_check_avx512f
+
+static bool simd_check_avx512dq(void)
+{
+    return cpu_has_avx512dq;
+}
+#define simd_check_avx512dq_opmask simd_check_avx512dq
+
+static bool simd_check_avx512bw(void)
+{
+    return cpu_has_avx512bw;
+}
+#define simd_check_avx512bw_opmask simd_check_avx512bw
+
 static void simd_set_regs(struct cpu_user_regs *regs)
 {
     if ( cpu_has_mmx )
@@ -223,6 +244,10 @@ static const struct {
     SIMD(XOP i16x16,              xop,      32i2),
     SIMD(XOP i32x8,               xop,      32i4),
     SIMD(XOP i64x4,               xop,      32i8),
+    SIMD(OPMASK/w,     avx512f_opmask,         2),
+    SIMD(OPMASK/b,    avx512dq_opmask,         1),
+    SIMD(OPMASK/d,    avx512bw_opmask,         4),
+    SIMD(OPMASK/q,    avx512bw_opmask,         8),
 #undef SIMD_
 #undef SIMD
 };
@@ -3426,8 +3451,8 @@ int main(int argc, char **argv)
             rc = x86_emulate(&ctxt, &emulops);
             if ( rc != X86EMUL_OKAY )
             {
-                printf("failed at %%eip == %08lx (opcode %08x)\n",
-                       (unsigned long)regs.eip, ctxt.opcode);
+                printf("failed (%d) at %%eip == %08lx (opcode %08x)\n",
+                       rc, (unsigned long)regs.eip, ctxt.opcode);
                 return 1;
             }
         }
diff --git a/tools/tests/x86_emulator/testcase.mk 
b/tools/tests/x86_emulator/testcase.mk
index 0a72b8db29..a565d15524 100644
--- a/tools/tests/x86_emulator/testcase.mk
+++ b/tools/tests/x86_emulator/testcase.mk
@@ -14,3 +14,9 @@ all: $(TESTCASE).bin
        $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0x100000 -o $*.tmp $*.o
        $(OBJCOPY) -O binary $*.tmp $@
        rm -f $*.tmp
+
+%-opmask.bin: opmask.S
+       $(CC) $(filter-out -M% .%,$(CFLAGS)) -c $< -o $(basename $@).o
+       $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0x100000 -o $(basename $@).tmp 
$(basename $@).o
+       $(OBJCOPY) -O binary $(basename $@).tmp $@
+       rm -f $(basename $@).tmp
diff --git a/tools/tests/x86_emulator/x86-emulate.c 
b/tools/tests/x86_emulator/x86-emulate.c
index bb5908b59e..aba5768d53 100644
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -209,6 +209,9 @@ int emul_test_get_fpu(
     case X86EMUL_FPU_ymm:
         if ( cpu_has_avx )
             break;
+    case X86EMUL_FPU_opmask:
+        if ( cpu_has_avx512f )
+            break;
     default:
         return X86EMUL_UNHANDLEABLE;
     }
diff --git a/tools/tests/x86_emulator/x86-emulate.h 
b/tools/tests/x86_emulator/x86-emulate.h
index 08dead32fd..ef58466e6e 100644
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -260,6 +260,36 @@ static inline uint64_t xgetbv(uint32_t xcr)
     (res.c & (1U << 21)) != 0; \
 })
 
+#define cpu_has_avx512f ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(1, 0, &res, NULL); \
+    if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 0xe6) != 0xe6) ) \
+        res.b = 0; \
+    else \
+        emul_test_cpuid(7, 0, &res, NULL); \
+    (res.b & (1U << 16)) != 0; \
+})
+
+#define cpu_has_avx512dq ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(1, 0, &res, NULL); \
+    if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 0xe6) != 0xe6) ) \
+        res.b = 0; \
+    else \
+        emul_test_cpuid(7, 0, &res, NULL); \
+    (res.b & (1U << 17)) != 0; \
+})
+
+#define cpu_has_avx512bw ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(1, 0, &res, NULL); \
+    if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 0xe6) != 0xe6) ) \
+        res.b = 0; \
+    else \
+        emul_test_cpuid(7, 0, &res, NULL); \
+    (res.b & (1U << 30)) != 0; \
+})
+
 int emul_test_cpuid(
     uint32_t leaf,
     uint32_t subleaf,
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c 
b/xen/arch/x86/x86_emulate/x86_emulate.c
index 4afc3f6ca3..90132f4c7c 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -491,6 +491,7 @@ static const struct ext0f3a_table {
     [0x20] = { .simd_size = simd_none },
     [0x21] = { .simd_size = simd_other },
     [0x22] = { .simd_size = simd_none },
+    [0x30 ... 0x33] = { .simd_size = simd_other, .two_op = 1 },
     [0x38] = { .simd_size = simd_128 },
     [0x39] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 },
     [0x40 ... 0x41] = { .simd_size = simd_packed_fp },
@@ -1187,6 +1188,11 @@ static int _get_fpu(
             return X86EMUL_UNHANDLEABLE;
         break;
 
+    case X86EMUL_FPU_opmask:
+        if ( !(xcr0 & X86_XCR0_SSE) || !(xcr0 & X86_XCR0_OPMASK) )
+            return X86EMUL_UNHANDLEABLE;
+        break;
+
     default:
         break;
     }
@@ -1762,12 +1768,15 @@ static bool vcpu_has(
 #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
 #define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
 #define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)
+#define vcpu_has_avx512f()     vcpu_has(         7, EBX, 16, ctxt, ops)
+#define vcpu_has_avx512dq()    vcpu_has(         7, EBX, 17, ctxt, ops)
 #define vcpu_has_rdseed()      vcpu_has(         7, EBX, 18, ctxt, ops)
 #define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)
 #define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
 #define vcpu_has_clflushopt()  vcpu_has(         7, EBX, 23, ctxt, ops)
 #define vcpu_has_clwb()        vcpu_has(         7, EBX, 24, ctxt, ops)
 #define vcpu_has_sha()         vcpu_has(         7, EBX, 29, ctxt, ops)
+#define vcpu_has_avx512bw()    vcpu_has(         7, EBX, 30, ctxt, ops)
 #define vcpu_has_rdpid()       vcpu_has(         7, ECX, 22, ctxt, ops)
 #define vcpu_has_clzero()      vcpu_has(0x80000008, EBX,  0, ctxt, ops)
 
@@ -2396,6 +2405,18 @@ x86_decode_twobyte(
         }
         break;
 
+    case X86EMUL_OPC_VEX(0, 0x90):    /* kmov{w,q} */
+    case X86EMUL_OPC_VEX_66(0, 0x90): /* kmov{b,d} */
+        state->desc = DstReg | SrcMem | Mov;
+        state->simd_size = simd_other;
+        break;
+
+    case X86EMUL_OPC_VEX(0, 0x91):    /* kmov{w,q} */
+    case X86EMUL_OPC_VEX_66(0, 0x91): /* kmov{b,d} */
+        state->desc = DstMem | SrcReg | Mov;
+        state->simd_size = simd_other;
+        break;
+
     case 0xae:
         ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
         /* fall through */
@@ -6002,6 +6023,60 @@ x86_emulate(
             dst.val = src.val;
         break;
 
+    case X86EMUL_OPC_VEX(0x0f, 0x4a):    /* kadd{w,q} k,k,k */
+        if ( !vex.w )
+            host_and_vcpu_must_have(avx512dq);
+        /* fall through */
+    case X86EMUL_OPC_VEX(0x0f, 0x41):    /* kand{w,q} k,k,k */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x41): /* kand{b,d} k,k,k */
+    case X86EMUL_OPC_VEX(0x0f, 0x42):    /* kandn{w,q} k,k,k */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x42): /* kandn{b,d} k,k,k */
+    case X86EMUL_OPC_VEX(0x0f, 0x45):    /* kor{w,q} k,k,k */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x45): /* kor{b,d} k,k,k */
+    case X86EMUL_OPC_VEX(0x0f, 0x46):    /* kxnor{w,q} k,k,k */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x46): /* kxnor{b,d} k,k,k */
+    case X86EMUL_OPC_VEX(0x0f, 0x47):    /* kxor{w,q} k,k,k */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x47): /* kxor{b,d} k,k,k */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x4a): /* kadd{b,d} k,k,k */
+        generate_exception_if(!vex.l, EXC_UD);
+    opmask_basic:
+        if ( vex.w )
+            host_and_vcpu_must_have(avx512bw);
+        else if ( vex.pfx )
+            host_and_vcpu_must_have(avx512dq);
+    opmask_common:
+        host_and_vcpu_must_have(avx512f);
+        generate_exception_if(!vex.r || (mode_64bit() && !(vex.reg & 8)) ||
+                              ea.type != OP_REG, EXC_UD);
+
+        vex.reg |= 8;
+        d &= ~TwoOp;
+
+        get_fpu(X86EMUL_FPU_opmask);
+
+        opc = init_prefixes(stub);
+        opc[0] = b;
+        opc[1] = modrm;
+        insn_bytes = PFX_BYTES + 2;
+
+        state->simd_size = simd_other;
+        op_bytes = 1; /* Any non-zero value will do. */
+        break;
+
+    case X86EMUL_OPC_VEX(0x0f, 0x44):    /* knot{w,q} k,k */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x44): /* knot{b,d} k,k */
+        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
+        goto opmask_basic;
+
+    case X86EMUL_OPC_VEX(0x0f, 0x4b):    /* kunpck{w,d}{d,q} k,k,k */
+        generate_exception_if(!vex.l, EXC_UD);
+        host_and_vcpu_must_have(avx512bw);
+        goto opmask_common;
+
+    case X86EMUL_OPC_VEX_66(0x0f, 0x4b): /* kunpckbw k,k,k */
+        generate_exception_if(!vex.l || vex.w, EXC_UD);
+        goto opmask_common;
+
     CASE_SIMD_PACKED_FP(, 0x0f, 0x50):     /* movmskp{s,d} xmm,reg */
     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x50): /* vmovmskp{s,d} {x,y}mm,reg */
     CASE_SIMD_PACKED_INT(0x0f, 0xd7):      /* pmovmskb {,x}mm,reg */
@@ -6552,6 +6627,154 @@ x86_emulate(
         dst.val = test_cc(b, _regs.eflags);
         break;
 
+    case X86EMUL_OPC_VEX(0x0f, 0x91):    /* kmov{w,q} k,mem */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x91): /* kmov{b,d} k,mem */
+        generate_exception_if(ea.type != OP_MEM, EXC_UD);
+        /* fall through */
+    case X86EMUL_OPC_VEX(0x0f, 0x90):    /* kmov{w,q} k/mem,k */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x90): /* kmov{b,d} k/mem,k */
+        generate_exception_if(vex.l || !vex.r, EXC_UD);
+        host_and_vcpu_must_have(avx512f);
+        if ( vex.w )
+        {
+            host_and_vcpu_must_have(avx512bw);
+            op_bytes = 4 << !vex.pfx;
+        }
+        else if ( vex.pfx )
+        {
+            host_and_vcpu_must_have(avx512dq);
+            op_bytes = 1;
+        }
+        else
+            op_bytes = 2;
+
+        get_fpu(X86EMUL_FPU_opmask);
+
+        opc = init_prefixes(stub);
+        opc[0] = b;
+        opc[1] = modrm;
+        if ( ea.type == OP_MEM )
+        {
+            /* convert memory operand to (%rAX) */
+            vex.b = 1;
+            opc[1] &= 0x38;
+        }
+        insn_bytes = PFX_BYTES + 2;
+        break;
+
+    case X86EMUL_OPC_VEX(0x0f, 0x92):    /* kmovw r32,k */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x92): /* kmovb r32,k */
+    case X86EMUL_OPC_VEX_F2(0x0f, 0x92): /* kmov{d,q} reg,k */
+        generate_exception_if(vex.l || !vex.r || vex.reg != 0xf ||
+                              ea.type != OP_REG, EXC_UD);
+
+        host_and_vcpu_must_have(avx512f);
+        if ( vex.pfx == vex_f2 )
+            host_and_vcpu_must_have(avx512bw);
+        else
+        {
+            generate_exception_if(vex.w, EXC_UD);
+            if ( vex.pfx )
+                host_and_vcpu_must_have(avx512dq);
+        }
+
+        get_fpu(X86EMUL_FPU_opmask);
+
+        opc = init_prefixes(stub);
+        opc[0] = b;
+        /* Convert GPR source to %rAX. */
+        vex.b = 1;
+        if ( !mode_64bit() )
+            vex.w = 0;
+        opc[1] = modrm & 0xf8;
+        opc[2] = 0xc3;
+
+        copy_VEX(opc, vex);
+        ea.reg = decode_gpr(&_regs, modrm_rm);
+        invoke_stub("", "", "=m" (dummy) : "a" (*ea.reg));
+
+        put_stub(stub);
+
+        ASSERT(!state->simd_size);
+        dst.type = OP_NONE;
+        break;
+
+    case X86EMUL_OPC_VEX(0x0f, 0x93):    /* kmovw k,r32 */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x93): /* kmovb k,r32 */
+    case X86EMUL_OPC_VEX_F2(0x0f, 0x93): /* kmov{d,q} k,reg */
+        generate_exception_if(vex.l || vex.reg != 0xf || ea.type != OP_REG,
+                              EXC_UD);
+        dst = ea;
+        dst.reg = decode_gpr(&_regs, modrm_reg);
+
+        host_and_vcpu_must_have(avx512f);
+        if ( vex.pfx == vex_f2 )
+        {
+            host_and_vcpu_must_have(avx512bw);
+            dst.bytes = 4 << (mode_64bit() && vex.w);
+        }
+        else
+        {
+            generate_exception_if(vex.w, EXC_UD);
+            dst.bytes = 4;
+            if ( vex.pfx )
+                host_and_vcpu_must_have(avx512dq);
+        }
+
+        get_fpu(X86EMUL_FPU_opmask);
+
+        opc = init_prefixes(stub);
+        opc[0] = b;
+        /* Convert GPR destination to %rAX. */
+        vex.r = 1;
+        if ( !mode_64bit() )
+            vex.w = 0;
+        opc[1] = modrm & 0xc7;
+        opc[2] = 0xc3;
+
+        copy_VEX(opc, vex);
+        invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0));
+
+        put_stub(stub);
+
+        ASSERT(!state->simd_size);
+        break;
+
+    case X86EMUL_OPC_VEX(0x0f, 0x99):    /* ktest{w,q} k,k */
+        if ( !vex.w )
+            host_and_vcpu_must_have(avx512dq);
+        /* fall through */
+    case X86EMUL_OPC_VEX(0x0f, 0x98):    /* kortest{w,q} k,k */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x98): /* kortest{b,d} k,k */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x99): /* ktest{b,d} k,k */
+        generate_exception_if(vex.l || !vex.r || vex.reg != 0xf ||
+                              ea.type != OP_REG, EXC_UD);
+        host_and_vcpu_must_have(avx512f);
+        if ( vex.w )
+            host_and_vcpu_must_have(avx512bw);
+        else if ( vex.pfx )
+            host_and_vcpu_must_have(avx512dq);
+
+        get_fpu(X86EMUL_FPU_opmask);
+
+        opc = init_prefixes(stub);
+        opc[0] = b;
+        opc[1] = modrm;
+        opc[2] = 0xc3;
+
+        copy_VEX(opc, vex);
+        invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),
+                    _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),
+                    [eflags] "+g" (_regs.eflags),
+                    "=a" (dst.val), [tmp] "=&r" (dummy)
+                    : [mask] "i" (EFLAGS_MASK));
+
+        put_stub(stub);
+
+        ASSERT(!state->simd_size);
+        dst.type = OP_NONE;
+        break;
+
     case X86EMUL_OPC(0x0f, 0xa2): /* cpuid */
         msr_val = 0;
         fail_if(ops->cpuid == NULL);
@@ -8170,6 +8393,23 @@ x86_emulate(
         generate_exception_if(vex.l, EXC_UD);
         goto simd_0f_imm8_avx;
 
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x30): /* kshiftr{b,w} $imm8,k,k */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x32): /* kshiftl{b,w} $imm8,k,k */
+        if ( !vex.w )
+            host_and_vcpu_must_have(avx512dq);
+    opmask_shift_imm:
+        generate_exception_if(vex.l || !vex.r || vex.reg != 0xf ||
+                              ea.type != OP_REG, EXC_UD);
+        host_and_vcpu_must_have(avx512f);
+        get_fpu(X86EMUL_FPU_opmask);
+        op_bytes = 1; /* Any non-zero value will do. */
+        goto simd_0f_imm8;
+
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x31): /* kshiftr{d,q} $imm8,k,k */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x33): /* kshiftl{d,q} $imm8,k,k */
+        host_and_vcpu_must_have(avx512bw);
+        goto opmask_shift_imm;
+
     case X86EMUL_OPC_66(0x0f3a, 0x44):     /* pclmulqdq $imm8,xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x44): /* vpclmulqdq 
$imm8,xmm/m128,xmm,xmm */
         host_and_vcpu_must_have(pclmulqdq);
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h 
b/xen/arch/x86/x86_emulate/x86_emulate.h
index afad760dbc..3750f0c91c 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -170,6 +170,7 @@ enum x86_emulate_fpu_type {
     X86EMUL_FPU_mmx, /* MMX instruction set (%mm0-%mm7) */
     X86EMUL_FPU_xmm, /* SSE instruction set (%xmm0-%xmm7/15) */
     X86EMUL_FPU_ymm, /* AVX/XOP instruction set (%ymm0-%ymm7/15) */
+    X86EMUL_FPU_opmask, /* AVX512 opmask instruction set (%k0-%k7) */
     /* This sentinel will never be passed to ->get_fpu(). */
     X86EMUL_FPU_none
 };
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 5343ddc3c3..7e11a458bd 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -99,9 +99,12 @@
 #define cpu_has_rtm             boot_cpu_has(X86_FEATURE_RTM)
 #define cpu_has_fpu_sel         (!boot_cpu_has(X86_FEATURE_NO_FPU_SEL))
 #define cpu_has_mpx             boot_cpu_has(X86_FEATURE_MPX)
+#define cpu_has_avx512f         boot_cpu_has(X86_FEATURE_AVX512F)
+#define cpu_has_avx512dq        boot_cpu_has(X86_FEATURE_AVX512DQ)
 #define cpu_has_rdseed          boot_cpu_has(X86_FEATURE_RDSEED)
 #define cpu_has_smap            boot_cpu_has(X86_FEATURE_SMAP)
 #define cpu_has_sha             boot_cpu_has(X86_FEATURE_SHA)
+#define cpu_has_avx512bw        boot_cpu_has(X86_FEATURE_AVX512BW)
 
 /* CPUID level 0x80000007.edx */
 #define cpu_has_itsc            boot_cpu_has(X86_FEATURE_ITSC)
--
generated by git-patchbot for /home/xen/git/xen.git#master

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.