Xen project Mailing List

Re: [Xen-devel] [PATCH v3 06/25] x86emul: support most remaining AVX2 insns

To: Jan Beulich <JBeulich@xxxxxxxx>, xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx>

From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

Date: Thu, 1 Feb 2018 19:45:31 +0000

Cc: George Dunlap <George.Dunlap@xxxxxxxxxxxxx>

Delivery-date: Thu, 01 Feb 2018 19:46:07 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 07/12/17 14:03, Jan Beulich wrote: > --- a/xen/arch/x86/x86_emulate/x86_emulate.c > +++ b/xen/arch/x86/x86_emulate/x86_emulate.c > @@ -370,7 +370,7 @@ static const struct { > [0x0c ... 0x0f] = { .simd_size = simd_packed_fp }, > [0x10] = { .simd_size = simd_packed_int }, > [0x13] = { .simd_size = simd_other, .two_op = 1 }, > - [0x14 ... 0x15] = { .simd_size = simd_packed_fp }, > + [0x14 ... 0x16] = { .simd_size = simd_packed_fp }, > [0x17] = { .simd_size = simd_packed_int, .two_op = 1 }, > [0x18 ... 0x19] = { .simd_size = simd_scalar_fp, .two_op = 1 }, > [0x1a] = { .simd_size = simd_128, .two_op = 1 }, > @@ -382,9 +382,15 @@ static const struct { > [0x2c ... 0x2d] = { .simd_size = simd_other }, > [0x2e ... 0x2f] = { .simd_size = simd_other, .to_mem = 1 }, > [0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 }, > - [0x37 ... 0x3f] = { .simd_size = simd_packed_int }, > + [0x36 ... 0x3f] = { .simd_size = simd_packed_int }, > [0x40] = { .simd_size = simd_packed_int }, > [0x41] = { .simd_size = simd_packed_int, .two_op = 1 }, > + [0x45 ... 0x47] = { .simd_size = simd_packed_int }, > + [0x58 ... 0x59] = { .simd_size = simd_other, .two_op = 1 }, > + [0x5a] = { .simd_size = simd_128, .two_op = 1 }, > + [0x78 ... 0x79] = { .simd_size = simd_other, .two_op = 1 }, > + [0x8c] = { .simd_size = simd_other }, > + [0x8e] = { .simd_size = simd_other, .to_mem = 1 }, > [0x96 ... 0x9f] = { .simd_size = simd_packed_fp }, > [0xa6 ... 0xaf] = { .simd_size = simd_packed_fp }, > [0xb6 ... 0xbf] = { .simd_size = simd_packed_fp }, > @@ -406,6 +412,9 @@ static const struct { > uint8_t two_op:1; > uint8_t four_op:1; > } ext0f3a_table[256] = { > + [0x00] = { .simd_size = simd_packed_int, .two_op = 1 }, > + [0x01] = { .simd_size = simd_packed_fp, .two_op = 1 }, > + [0x02] = { .simd_size = simd_packed_int }, > [0x04 ... 0x05] = { .simd_size = simd_packed_fp, .two_op = 1 }, > [0x06] = { .simd_size = simd_packed_fp }, > [0x08 ... 0x09] = { .simd_size = simd_packed_fp, .two_op = 1 }, > @@ -419,9 +428,12 @@ static const struct { > [0x20] = { .simd_size = simd_none }, > [0x21] = { .simd_size = simd_other }, > [0x22] = { .simd_size = simd_none }, > + [0x38] = { .simd_size = simd_128 }, > + [0x39] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 }, > [0x40 ... 0x41] = { .simd_size = simd_packed_fp }, > [0x42] = { .simd_size = simd_packed_int }, > [0x44] = { .simd_size = simd_packed_int }, > + [0x46] = { .simd_size = simd_packed_int }, > [0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 }, > [0x4c] = { .simd_size = simd_packed_int, .four_op = 1 }, > [0x5c ... 0x5f] = { .simd_size = simd_packed_fp, .four_op = 1 }, > @@ -2973,7 +2985,7 @@ x86_decode( > } > break; > > - case simd_scalar_fp: > + case simd_scalar_fp: /* case simd_scalar_dq: */ I don't see this case label used, or introduced in any later patches. Is it stale? > op_bytes = 4 << (ctxt->opcode & 1); > break; > > @@ -6070,6 +6082,10 @@ x86_emulate( > case X86EMUL_OPC_VEX_66(0x0f38, 0x40): /* vpmulld > {x,y}mm/mem,{x,y}mm,{x,y}mm */ > if ( !vex.l ) > goto simd_0f_avx; > + /* fall through */ > + case X86EMUL_OPC_VEX_66(0x0f38, 0x45): /* vpsrlv{d,q} > {x,y}mm/mem,{x,y}mm,{x,y}mm */ 0x46 / vpsrav{d,q}? You add a decode for it above, but I don't see an introduced case. > + case X86EMUL_OPC_VEX_66(0x0f38, 0x47): /* vpsllv{d,q} > {x,y}mm/mem,{x,y}mm,{x,y}mm */ > + simd_0f_avx2: > host_and_vcpu_must_have(avx2); > goto simd_0f_ymm; > } > @@ -6169,7 +6185,10 @@ x86_emulate( > case X86EMUL_OPC_VEX_66(0x0f3a, 0x0f): /* vpalignr > $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ > case X86EMUL_OPC_VEX_66(0x0f3a, 0x42): /* vmpsadbw > $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */ > if ( vex.l ) > + { > + simd_0f_imm8_avx2: > host_and_vcpu_must_have(avx2); > + } > else > { > case X86EMUL_OPC_VEX_66(0x0f3a, 0x08): /* vroundps > $imm8,{x,y}mm/mem,{x,y}mm */ > @@ -7150,12 +7169,16 @@ x86_emulate( > fic.insn_bytes = PFX_BYTES + 3; > break; > > - case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd m64,ymm */ > + case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,ymm */ > case X86EMUL_OPC_VEX_66(0x0f38, 0x1a): /* vbroadcastf128 m128,ymm */ > generate_exception_if(!vex.l, EXC_UD); > /* fall through */ > - case X86EMUL_OPC_VEX_66(0x0f38, 0x18): /* vbroadcastss m32,{x,y}mm */ > - generate_exception_if(ea.type != OP_MEM, EXC_UD); > + case X86EMUL_OPC_VEX_66(0x0f38, 0x18): /* vbroadcastss xmm/m32,{x,y}mm */ It would help reviewability substantially if you split bugfixes of existing code out separately from introduction of new code, especially given the quantity of new additions here. These comment changes are particularly deceptive. > + if ( ea.type != OP_MEM ) > + { > + generate_exception_if(b & 2, EXC_UD); > + host_and_vcpu_must_have(avx2); > + } > /* fall through */ > case X86EMUL_OPC_VEX_66(0x0f38, 0x0c): /* vpermilps > {x,y}mm/mem,{x,y}mm,{x,y}mm */ > case X86EMUL_OPC_VEX_66(0x0f38, 0x0d): /* vpermilpd > {x,y}mm/mem,{x,y}mm,{x,y}mm */ > @@ -7254,6 +7277,11 @@ x86_emulate( > op_bytes = 8 << vex.l; > goto simd_0f_ymm; > > + case X86EMUL_OPC_VEX_66(0x0f38, 0x16): /* vpermps ymm/m256,ymm,ymm */ > + case X86EMUL_OPC_VEX_66(0x0f38, 0x36): /* vpermd ymm/m256,ymm,ymm */ > + generate_exception_if(!vex.l || vex.w, EXC_UD); > + goto simd_0f_avx2; > + Looking at these additions, the case labels look like they need sorting again. Are you going to organise that in a later patch? > case X86EMUL_OPC_VEX_66(0x0f38, 0x20): /* vpmovsxbw xmm/mem,{x,y}mm */ > case X86EMUL_OPC_VEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,{x,y}mm */ > case X86EMUL_OPC_VEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,{x,y}mm */ > @@ -7370,6 +7398,80 @@ x86_emulate( > generate_exception_if(vex.l, EXC_UD); > goto simd_0f_avx; > > + case X86EMUL_OPC_VEX_66(0x0f38, 0x58): /* vpbroadcastd xmm/m32,{x,y}mm */ > + case X86EMUL_OPC_VEX_66(0x0f38, 0x59): /* vpbroadcastq xmm/m64,{x,y}mm */ > + case X86EMUL_OPC_VEX_66(0x0f38, 0x78): /* vpbroadcastb xmm/m8,{x,y}mm */ > + case X86EMUL_OPC_VEX_66(0x0f38, 0x79): /* vpbroadcastw xmm/m16,{x,y}mm */ > + op_bytes = 1 << ((!(b & 0x20) * 2) + (b & 1)); > + /* fall through */ > + case X86EMUL_OPC_VEX_66(0x0f38, 0x46): /* vpsravd > {x,y}mm/mem,{x,y}mm,{x,y}mm */ > + generate_exception_if(vex.w, EXC_UD); Oh - here is vpsrav{d,q}. Why is it not with its companions? The manual does curiously omit mention of the W1 encoding for VEX (unlike its companions), but all 3 have W0 and W1 mentioned for EVEX encoding. Judging by them all having identical behaviour, and this one not being declared as suffering a fault because of W, I expect that it is probably encoded as WIG. I've noticed lower down as well that you are inconsistent with vex.w handling compared to the manual as to whether you reject or ignore unspecified encodings. Is this intentional, and if so, why? ~Andrew _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.