|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v4 05/20] x86emul: support 3DNow! insns
On 28/02/18 12:59, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -356,6 +356,41 @@ static const struct twobyte_table {
> };
>
> /*
> + * The next two tables are indexed by high opcode extension byte (the one
> + * that's encoded like an immediate) nibble, with each table element then
> + * bit-indexed by low opcode extension byte nibble.
> + */
> +static const uint16_t _3dnow_table[16] = {
> + [0x0] = (1 << 0xd) /* pi2fd */,
> + [0x1] = (1 << 0xd) /* pf2id */,
> + [0x9] = (1 << 0x0) /* pfcmpge */ |
> + (1 << 0x4) /* pfmin */ |
> + (1 << 0x6) /* pfrcp */ |
> + (1 << 0x7) /* pfrsqrt */ |
> + (1 << 0xa) /* pfsub */ |
> + (1 << 0xe) /* pfadd */,
> + [0xa] = (1 << 0x0) /* pfcmpge */ |
"pfcmgt" - copy/paste mistake from above?
> + (1 << 0x4) /* pfmax */ |
> + (1 << 0x6) /* pfrcpit1 */ |
> + (1 << 0x7) /* pfrsqit1 */ |
> + (1 << 0xa) /* pfsubr */ |
> + (1 << 0xe) /* pfacc */,
> + [0xb] = (1 << 0x0) /* pfcmpeq */ |
> + (1 << 0x4) /* pfmul */ |
> + (1 << 0x6) /* pfrcpit2 */ |
> + (1 << 0x7) /* pmulhrw */ |
> + (1 << 0xf) /* pavgusb */,
> +};
> +
> +static const uint16_t _3dnow_ext_table[16] = {
> + [0x0] = (1 << 0xc) /* pi2fw */,
> + [0x1] = (1 << 0xc) /* pf2iw */,
It turns out that the AMD manual has a typo in the cross reference for
this instruction. I've asked for a correction to be made.
> + [0x8] = (1 << 0xa) /* pfnacc */ |
> + (1 << 0xa) /* pfpnacc */,
1 << 0xe
> + [0xb] = (1 << 0xb) /* pfswapd */,
There is no f in the AMD mnemonic.
> +};
> +
> +/*
> * "two_op" and "four_op" below refer to the number of register operands
> * (one of which possibly also allowing to be a memory one). The named
> * operand counts do not include any immediate operands.
> @@ -1662,6 +1697,8 @@ static bool vcpu_has(
> #define vcpu_has_rdrand() vcpu_has( 1, ECX, 30, ctxt, ops)
> #define vcpu_has_mmxext() (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \
> vcpu_has_sse())
> +#define vcpu_has_3dnow_ext() vcpu_has(0x80000001, EDX, 30, ctxt, ops)
> +#define vcpu_has_3dnow() vcpu_has(0x80000001, EDX, 31, ctxt, ops)
> #define vcpu_has_lahf_lm() vcpu_has(0x80000001, ECX, 0, ctxt, ops)
> #define vcpu_has_cr8_legacy() vcpu_has(0x80000001, ECX, 4, ctxt, ops)
> #define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX, 5, ctxt, ops)
> @@ -5524,6 +5561,26 @@ x86_emulate(
> case X86EMUL_OPC(0x0f, 0x19) ... X86EMUL_OPC(0x0f, 0x1f): /* nop */
> break;
>
> + case X86EMUL_OPC(0x0f, 0x0e): /* femms */
> + host_and_vcpu_must_have(3dnow);
> + asm volatile ( "femms" );
> + break;
> +
> + case X86EMUL_OPC(0x0f, 0x0f): /* 3DNow! */
> + if ( _3dnow_ext_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) )
> + host_and_vcpu_must_have(3dnow_ext);
> + else if ( _3dnow_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) )
> + host_and_vcpu_must_have(3dnow);
I'd switch these two if's around. We're more likely to get a hit in the
plain 3dnow space than the 3dnow_ext space.
~Andrew
> + else
> + generate_exception(EXC_UD);
> +
> + get_fpu(X86EMUL_FPU_mmx, &fic);
> +
> + d = DstReg | SrcMem;
> + op_bytes = 8;
> + state->simd_size = simd_other;
> + goto simd_0f_imm8;
> +
> #define CASE_SIMD_PACKED_INT(pfx, opc) \
> case X86EMUL_OPC(pfx, opc): \
> case X86EMUL_OPC_66(pfx, opc)
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -71,6 +71,8 @@
> && boot_cpu_has(X86_FEATURE_FFXSR))
> #define cpu_has_page1gb boot_cpu_has(X86_FEATURE_PAGE1GB)
> #define cpu_has_rdtscp boot_cpu_has(X86_FEATURE_RDTSCP)
> +#define cpu_has_3dnow_ext boot_cpu_has(X86_FEATURE_3DNOWEXT)
> +#define cpu_has_3dnow boot_cpu_has(X86_FEATURE_3DNOW)
>
> /* CPUID level 0x80000001.ecx */
> #define cpu_has_cmp_legacy boot_cpu_has(X86_FEATURE_CMP_LEGACY)
>
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |