x86emul: correct vzero{all,upper} for non-64-bit-mode The registers only accessible in 64-bit mode need to be left alone in this case. Reported-by: Andrew Cooper Signed-off-by: Jan Beulich --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -2910,6 +2910,45 @@ int main(int argc, char **argv) else printf("skipped\n"); +#ifdef __x86_64__ + printf("%-40s", "Testing vzeroupper (compat)..."); + if ( cpu_has_avx ) + { + decl_insn(vzeroupper); + + ctxt.sp_size = ctxt.addr_size = 32; + + asm volatile ( "vxorps %xmm2, %xmm2, %xmm3\n" + "vcmpeqps %ymm3, %ymm3, %ymm4\n" + "vmovaps %ymm4, %ymm9\n" + put_insn(vzeroupper, "vzeroupper") ); + + set_insn(vzeroupper); + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vzeroupper) ) + goto fail; + + /* XMM0...XMM7 should have their high parts cleared. */ + asm ( "vextractf128 $1, %%ymm4, %%xmm0\n\t" + "vpmovmskb %%xmm4, %0\n\t" + "vpmovmskb %%xmm0, %1" : "=r" (rc), "=r" (i) ); + if ( rc != 0xffff || i ) + goto fail; + + /* XMM8...XMM15 should have their high parts preserved. */ + asm ( "vextractf128 $1, %%ymm9, %%xmm1\n\t" + "vpmovmskb %%xmm9, %0\n\t" + "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) ); + if ( rc != 0xffff || i != 0xffff ) + goto fail; + printf("okay\n"); + + ctxt.sp_size = ctxt.addr_size = 64; + } + else + printf("skipped\n"); +#endif + #undef decl_insn #undef put_insn #undef set_insn --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -6277,6 +6277,45 @@ x86_emulate( generate_exception_if(vex.reg != 0xf, EXC_UD); host_and_vcpu_must_have(avx); get_fpu(X86EMUL_FPU_ymm, &fic); + +#ifdef __x86_64__ + if ( !mode_64bit() ) + { + /* + * Can't use the actual instructions here, as we must not + * touch YMM8...YMM15. + */ + if ( vex.l ) + { + /* vpxor %xmmN, %xmmN, %xmmN */ + asm volatile ( ".byte 0xc5,0xf9,0xef,0xc0" ); + asm volatile ( ".byte 0xc5,0xf1,0xef,0xc9" ); + asm volatile ( ".byte 0xc5,0xe9,0xef,0xd2" ); + asm volatile ( ".byte 0xc5,0xe1,0xef,0xdb" ); + asm volatile ( ".byte 0xc5,0xd9,0xef,0xe4" ); + asm volatile ( ".byte 0xc5,0xd1,0xef,0xed" ); + asm volatile ( ".byte 0xc5,0xc9,0xef,0xf6" ); + asm volatile ( ".byte 0xc5,0xc1,0xef,0xff" ); + } + else + { + /* vpor %xmmN, %xmmN, %xmmN */ + asm volatile ( ".byte 0xc5,0xf9,0xeb,0xc0" ); + asm volatile ( ".byte 0xc5,0xf1,0xeb,0xc9" ); + asm volatile ( ".byte 0xc5,0xe9,0xeb,0xd2" ); + asm volatile ( ".byte 0xc5,0xe1,0xeb,0xdb" ); + asm volatile ( ".byte 0xc5,0xd9,0xeb,0xe4" ); + asm volatile ( ".byte 0xc5,0xd1,0xeb,0xed" ); + asm volatile ( ".byte 0xc5,0xc9,0xeb,0xf6" ); + asm volatile ( ".byte 0xc5,0xc1,0xeb,0xff" ); + } + + put_fpu(&fic); + + ASSERT(!state->simd_size); + break; + } +#endif } else {