[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v6 35/42] x86emul: support AVX512PF insns
Some adjustments are necessary to the EVEX Disp8 scaling test code to account for the zero byte reads/writes. I have to admit though that I'm not fully convinced the SDM describes the faulting behavior correctly: Other prefetch insns, including the Xeon Phi Coprocessor S/G ones, don't produce #GP/#SS. Until proven otherwise this gets implemented as specified, not the least because the respective exception specification table, besides listing #GP and #SS, also explicitly says "EVEX-encoded prefetch instructions that do not cause #PF follow exception class ...". Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- v6: New. --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -520,6 +520,17 @@ static const struct test avx512er_512[] INSN(rsqrt28, 66, 0f38, cd, el, sd, el), }; +static const struct test avx512pf_512[] = { + INSNX(gatherpf0d, 66, 0f38, c6, 1, vl, sd, el), + INSNX(gatherpf0q, 66, 0f38, c7, 1, vl, sd, el), + INSNX(gatherpf1d, 66, 0f38, c6, 2, vl, sd, el), + INSNX(gatherpf1q, 66, 0f38, c7, 2, vl, sd, el), + INSNX(scatterpf0d, 66, 0f38, c6, 5, vl, sd, el), + INSNX(scatterpf0q, 66, 0f38, c7, 5, vl, sd, el), + INSNX(scatterpf1d, 66, 0f38, c6, 6, vl, sd, el), + INSNX(scatterpf1q, 66, 0f38, c7, 6, vl, sd, el), +}; + static const struct test avx512_vbmi_all[] = { INSN(permb, 66, 0f38, 8d, vl, b, vl), INSN(permi2b, 66, 0f38, 75, vl, b, vl), @@ -580,7 +591,7 @@ static bool record_access(enum x86_segme static int read(enum x86_segment seg, unsigned long offset, void *p_data, unsigned int bytes, struct x86_emulate_ctxt *ctxt) { - if ( !record_access(seg, offset, bytes) ) + if ( !record_access(seg, offset, bytes + !bytes) ) return X86EMUL_UNHANDLEABLE; memset(p_data, 0, bytes); return X86EMUL_OKAY; @@ -589,7 +600,7 @@ static int read(enum x86_segment seg, un static int write(enum x86_segment seg, unsigned long offset, void *p_data, unsigned int bytes, struct x86_emulate_ctxt *ctxt) { - if ( !record_access(seg, offset, bytes) ) + if ( !record_access(seg, offset, bytes + !bytes) ) return X86EMUL_UNHANDLEABLE; return X86EMUL_OKAY; } @@ -597,7 +608,7 @@ static int write(enum x86_segment seg, u static void test_one(const struct test *test, enum vl vl, unsigned char *instr, struct x86_emulate_ctxt *ctxt) { - unsigned int vsz, esz, i; + unsigned int vsz, esz, i, n; int rc; bool sg = strstr(test->mnemonic, "gather") || strstr(test->mnemonic, "scatter"); @@ -725,10 +736,20 @@ static void test_one(const struct test * for ( i = 0; i < (test->scale == SC_vl ? vsz : esz); ++i ) if ( accessed[i] ) goto fail; - for ( ; i < (test->scale == SC_vl ? vsz : esz) + (sg ? esz : vsz); ++i ) + + n = test->scale == SC_vl ? vsz : esz; + if ( !sg ) + n += vsz; + else if ( !strstr(test->mnemonic, "pf") ) + n += esz; + else + ++n; + + for ( ; i < n; ++i ) if ( accessed[i] != (sg ? (vsz / esz) >> (test->opc & 1 & !evex.w) : 1) ) goto fail; + for ( ; i < ARRAY_SIZE(accessed); ++i ) if ( accessed[i] ) goto fail; @@ -887,6 +908,8 @@ void evex_disp8_test(void *instr, struct RUN(avx512dq, no128); RUN(avx512dq, 512); RUN(avx512er, 512); +#define cpu_has_avx512pf cpu_has_avx512f + RUN(avx512pf, 512); RUN(avx512_vbmi, all); RUN(avx512_vbmi2, all); } --- a/tools/tests/x86_emulator/x86-emulate.c +++ b/tools/tests/x86_emulator/x86-emulate.c @@ -135,12 +135,12 @@ int emul_test_cpuid( res->c |= 1U << 22; /* - * The emulator doesn't itself use ADCX/ADOX/RDPID, so we can always run - * the respective tests. + * The emulator doesn't itself use ADCX/ADOX/RDPID nor the S/G prefetch + * insns, so we can always run the respective tests. */ if ( leaf == 7 && subleaf == 0 ) { - res->b |= 1U << 19; + res->b |= (1U << 19) | (1U << 26); res->c |= 1U << 22; } --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -525,6 +525,7 @@ static const struct ext0f38_table { [0xbd] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq }, [0xbe] = { .simd_size = simd_packed_fp, .d8s = d8s_vl }, [0xbf] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq }, + [0xc6 ... 0xc7] = { .simd_size = simd_other, .vsib = 1, .d8s = d8s_dq }, [0xc8] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl }, [0xc9] = { .simd_size = simd_other }, [0xca] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl }, @@ -1903,6 +1904,7 @@ static bool vcpu_has( #define vcpu_has_smap() vcpu_has( 7, EBX, 20, ctxt, ops) #define vcpu_has_clflushopt() vcpu_has( 7, EBX, 23, ctxt, ops) #define vcpu_has_clwb() vcpu_has( 7, EBX, 24, ctxt, ops) +#define vcpu_has_avx512pf() vcpu_has( 7, EBX, 26, ctxt, ops) #define vcpu_has_avx512er() vcpu_has( 7, EBX, 27, ctxt, ops) #define vcpu_has_sha() vcpu_has( 7, EBX, 29, ctxt, ops) #define vcpu_has_avx512bw() vcpu_has( 7, EBX, 30, ctxt, ops) @@ -9377,6 +9379,80 @@ x86_emulate( state->simd_size = simd_none; break; + } + + case X86EMUL_OPC_EVEX_66(0x0f38, 0xc6): + case X86EMUL_OPC_EVEX_66(0x0f38, 0xc7): + { + typeof(evex) *pevex; + union { + int32_t dw[16]; + int64_t qw[8]; + } index; + + ASSERT(ea.type == OP_MEM); + generate_exception_if((!cpu_has_avx512f || !evex.opmsk || evex.br || + evex.z || evex.reg != 0xf || evex.lr != 2), + EXC_UD); + vcpu_must_have(avx512pf); + + switch ( modrm_reg & 7 ) + { + case 1: /* vgatherpf0{d,q}p{d,s} mem{k} */ + case 2: /* vgatherpf1{d,q}p{d,s} mem{k} */ + break; + case 5: /* vscatterpf0{d,q}p{d,s} mem{k} */ + case 6: /* vscatterpf1{d,q}p{d,s} mem{k} */ + fail_if(!ops->write); + break; + default: + generate_exception(EXC_UD); + } + + get_fpu(X86EMUL_FPU_zmm); + + /* Read index register. */ + opc = init_evex(stub); + pevex = copy_EVEX(opc, evex); + pevex->opcx = vex_0f; + /* vmovdqu{32,64} */ + opc[0] = 0x7f; + pevex->pfx = vex_f3; + pevex->w = b & 1; + /* Use (%rax) as destination and sib_index as source. */ + pevex->b = 1; + opc[1] = (state->sib_index & 7) << 3; + pevex->r = !mode_64bit() || !(state->sib_index & 0x08); + pevex->R = !mode_64bit() || !(state->sib_index & 0x10); + pevex->RX = 1; + opc[2] = 0xc3; + + invoke_stub("", "", "=m" (index) : "a" (&index)); + put_stub(stub); + + /* Clear untouched parts of the mask value. */ + n = 1 << (4 - ((b & 1) | evex.w)); + op_mask &= (1 << n) - 1; + + for ( i = 0; rc == X86EMUL_OKAY && op_mask; ++i ) + { + signed long idx = b & 1 ? index.qw[i] : index.dw[i]; + + if ( !(op_mask & (1 << i)) ) + continue; + + rc = (modrm_reg & 4 + ? ops->write + : ops->read)(ea.mem.seg, + truncate_ea(ea.mem.off + + (idx << state->sib_scale)), + NULL, 0, ctxt); + + op_mask &= ~(1 << i); + } + + state->simd_size = simd_none; + break; } case X86EMUL_OPC(0x0f38, 0xc8): /* sha1nexte xmm/m128,xmm */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |