|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 2/3] x86emul: support {,V}LDDQU
Also take the opportunity and adjust the vmovdqu test case the new one
here has been cloned from: To zero a ymm register we don't need to go
through hoops, as 128-bit AVX insns zero the upper portion of the
destination register, and in the disabled AVX2 code there was a wrong
YMM register used.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -968,12 +968,7 @@ int main(int argc, char **argv)
{
decl_insn(vmovdqu_from_mem);
-#if 0 /* Don't use AVX2 instructions for now */
- asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n"
-#else
- asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t"
- "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n"
-#endif
+ asm volatile ( "vpxor %%xmm4, %%xmm4, %%xmm4\n"
put_insn(vmovdqu_from_mem, "vmovdqu (%0), %%ymm4")
:: "d" (NULL) );
@@ -987,7 +982,7 @@ int main(int argc, char **argv)
#if 0 /* Don't use AVX2 instructions for now */
asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
"vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
- "vpmovmskb %%ymm1, %0" : "=r" (rc) );
+ "vpmovmskb %%ymm0, %0" : "=r" (rc) );
#else
asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
"vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
@@ -1404,6 +1399,67 @@ int main(int argc, char **argv)
printf("skipped\n");
#endif
+ printf("%-40s", "Testing lddqu 4(%edx),%xmm4...");
+ if ( stack_exec && cpu_has_sse3 )
+ {
+ decl_insn(lddqu);
+
+ asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"
+ put_insn(lddqu, "lddqu 4(%0), %%xmm4")
+ :: "d" (NULL) );
+
+ set_insn(lddqu);
+ memset(res, 0x55, 64);
+ memset(res + 1, 0xff, 16);
+ regs.edx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(lddqu) )
+ goto fail;
+ asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
+ "pcmpeqb %%xmm4, %%xmm2\n\t"
+ "pmovmskb %%xmm2, %0" : "=r" (rc) );
+ if ( rc != 0xffff )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
+ printf("%-40s", "Testing vlddqu (%ecx),%ymm4...");
+ if ( stack_exec && cpu_has_avx )
+ {
+ decl_insn(vlddqu);
+
+ asm volatile ( "vpxor %%xmm4, %%xmm4, %%xmm4\n"
+ put_insn(vlddqu, "vlddqu (%0), %%ymm4")
+ :: "c" (NULL) );
+
+ set_insn(vlddqu);
+ memset(res + 1, 0xff, 32);
+ regs.ecx = (unsigned long)(res + 1);
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(vlddqu) )
+ goto fail;
+#if 0 /* Don't use AVX2 instructions for now */
+ asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
+ "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
+ "vpmovmskb %%ymm0, %0" : "=r" (rc) );
+#else
+ asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
+ "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
+ "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"
+ "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t"
+ "vpmovmskb %%xmm0, %0\n\t"
+ "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) );
+ rc |= i << 16;
+#endif
+ if ( ~rc )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
#undef decl_insn
#undef put_insn
#undef set_insn
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -71,6 +71,12 @@ static int cpuid(
(edx & (1U << 26)) != 0; \
})
+#define cpu_has_sse3 ({ \
+ unsigned int eax = 1, ecx = 0; \
+ emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
+ (ecx & (1U << 0)) != 0; \
+})
+
#define cpu_has_xsave ({ \
unsigned int eax = 1, ecx = 0; \
emul_test_cpuid(&eax, &eax, &ecx, &eax, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4993,6 +4993,9 @@ x86_emulate(
case X86EMUL_OPC_66(0x0f, 0xe7): /* movntdq xmm,m128 */
case X86EMUL_OPC_VEX_66(0x0f, 0xe7): /* vmovntdq xmm,m128 */
/* vmovntdq ymm,m256 */
+ case X86EMUL_OPC_F2(0x0f, 0xf0): /* lddqu xmm,m128 */
+ case X86EMUL_OPC_VEX_F2(0x0f, 0xf0): /* vlddqu xmm,m128 */
+ /* vlddqu ymm,m256 */
fail_if(ea.type != OP_MEM);
/* fall through */
case X86EMUL_OPC(0x0f, 0x6e): /* movd r/m32,mm */
@@ -5040,6 +5043,11 @@ x86_emulate(
{
switch ( vex.pfx )
{
+ case vex_f2:
+ /* Converting lddqu to movdqa (see also below). */
+ vcpu_must_have(sse3);
+ buf[3] = 0x6f;
+ /* fall through */
case vex_66:
case vex_f3:
host_and_vcpu_must_have(sse2);
@@ -5056,8 +5064,6 @@ x86_emulate(
get_fpu(X86EMUL_FPU_mmx, &fic);
ea.bytes = 8;
break;
- default:
- goto cannot_emulate;
}
}
else
@@ -5079,6 +5085,7 @@ x86_emulate(
ea.bytes = 8;
/* fall through */
case 0x6f:
+ case 0xf0:
load = true;
}
break;
@@ -5094,7 +5101,7 @@ x86_emulate(
{
uint32_t mxcsr = 0;
- if ( ea.bytes < 16 || vex.pfx == vex_f3 )
+ if ( ea.bytes < 16 || vex.pfx >= vex_f3 )
mxcsr = MXCSR_MM;
else if ( vcpu_has_misalignsse() )
asm ( "stmxcsr %0" : "=m" (mxcsr) );
Attachment:
x86emul-LDDQU.patch _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |