Xen project Mailing List

[Xen-devel] [PATCH v7 46/49] x86emul: support VAES insns

To: "xen-devel" <xen-devel@xxxxxxxxxxxxxxxxxxxx>

From: "Jan Beulich" <JBeulich@xxxxxxxx>

Date: Wed, 19 Dec 2018 08:07:02 -0700

Cc: George Dunlap <George.Dunlap@xxxxxxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wei.liu2@xxxxxxxxxx>, Roger Pau Monne <roger.pau@xxxxxxxxxx>

Delivery-date: Wed, 19 Dec 2018 15:07:10 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

As to the feature dependency adjustment, just like for VPCLMULQDQ while strictly speaking AVX is a sufficient prereq (to have YMM registers), 256-bit vectors of integers have got fully introduced with AVX2 only. Along the lines of AESNI, since the insns here and in particular their memory access patterns follow the usual scheme, I didn't think it was necessary to add a contrived test specifically for them, beyond the Disp8 scaling one. I've been carrying a todo item though to add both AES and SHA test cases. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- v7: New. --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -591,6 +591,18 @@ static const struct test avx512_vpopcntd INSN(popcnt, 66, 0f38, 55, vl, dq, vl) }; +/* + * The uses of b in this table are simply (one of) the shortest form(s) of + * saying "no broadcast" without introducing a 128-bit granularity enumerator. + * Due to all of the insns being WIG, w, d_nb, and q_nb would all also fit. + */ +static const struct test vaes_all[] = { + INSN(aesdec, 66, 0f38, de, vl, b, vl), + INSN(aesdeclast, 66, 0f38, df, vl, b, vl), + INSN(aesenc, 66, 0f38, dc, vl, b, vl), + INSN(aesenclast, 66, 0f38, dd, vl, b, vl), +}; + static const struct test vpclmulqdq_all[] = { INSN(pclmulqdq, 66, 0f3a, 44, vl, q_nb, vl) }; @@ -975,6 +987,7 @@ void evex_disp8_test(void *instr, struct if ( cpu_has_avx512f ) { + RUN(vaes, all); RUN(vpclmulqdq, all); } } --- a/tools/tests/x86_emulator/x86-emulate.h +++ b/tools/tests/x86_emulator/x86-emulate.h @@ -144,6 +144,7 @@ static inline bool xcr0_mask(uint64_t ma #define cpu_has_avx512vl (cp.feat.avx512vl && xcr0_mask(0xe6)) #define cpu_has_avx512_vbmi (cp.feat.avx512_vbmi && xcr0_mask(0xe6)) #define cpu_has_avx512_vbmi2 (cp.feat.avx512_vbmi2 && xcr0_mask(0xe6)) +#define cpu_has_vaes (cp.feat.vaes && xcr0_mask(6)) #define cpu_has_vpclmulqdq (cp.feat.vpclmulqdq && xcr0_mask(6)) #define cpu_has_avx512_vnni (cp.feat.avx512_vnni && xcr0_mask(0xe6)) #define cpu_has_avx512_bitalg (cp.feat.avx512_bitalg && xcr0_mask(0xe6)) --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -541,7 +541,7 @@ static const struct ext0f38_table { [0xcc] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl }, [0xcd] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq }, [0xdb] = { .simd_size = simd_packed_int, .two_op = 1 }, - [0xdc ... 0xdf] = { .simd_size = simd_packed_int }, + [0xdc ... 0xdf] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, [0xf0] = { .two_op = 1 }, [0xf1] = { .to_mem = 1, .two_op = 1 }, [0xf2 ... 0xf3] = {}, @@ -1922,6 +1922,7 @@ static bool vcpu_has( #define vcpu_has_avx512vl() vcpu_has( 7, EBX, 31, ctxt, ops) #define vcpu_has_avx512_vbmi() vcpu_has( 7, ECX, 1, ctxt, ops) #define vcpu_has_avx512_vbmi2() vcpu_has( 7, ECX, 6, ctxt, ops) +#define vcpu_has_vaes() vcpu_has( 7, ECX, 9, ctxt, ops) #define vcpu_has_vpclmulqdq() vcpu_has( 7, ECX, 10, ctxt, ops) #define vcpu_has_avx512_vnni() vcpu_has( 7, ECX, 11, ctxt, ops) #define vcpu_has_avx512_bitalg() vcpu_has( 7, ECX, 12, ctxt, ops) @@ -8924,13 +8925,9 @@ x86_emulate( case X86EMUL_OPC_66(0x0f38, 0xdb): /* aesimc xmm/m128,xmm */ case X86EMUL_OPC_VEX_66(0x0f38, 0xdb): /* vaesimc xmm/m128,xmm */ case X86EMUL_OPC_66(0x0f38, 0xdc): /* aesenc xmm/m128,xmm,xmm */ - case X86EMUL_OPC_VEX_66(0x0f38, 0xdc): /* vaesenc xmm/m128,xmm,xmm */ case X86EMUL_OPC_66(0x0f38, 0xdd): /* aesenclast xmm/m128,xmm,xmm */ - case X86EMUL_OPC_VEX_66(0x0f38, 0xdd): /* vaesenclast xmm/m128,xmm,xmm */ case X86EMUL_OPC_66(0x0f38, 0xde): /* aesdec xmm/m128,xmm,xmm */ - case X86EMUL_OPC_VEX_66(0x0f38, 0xde): /* vaesdec xmm/m128,xmm,xmm */ case X86EMUL_OPC_66(0x0f38, 0xdf): /* aesdeclast xmm/m128,xmm,xmm */ - case X86EMUL_OPC_VEX_66(0x0f38, 0xdf): /* vaesdeclast xmm/m128,xmm,xmm */ host_and_vcpu_must_have(aesni); if ( vex.opcx == vex_none ) goto simd_0f38_common; @@ -9630,6 +9627,25 @@ x86_emulate( host_and_vcpu_must_have(avx512er); goto simd_zmm_scalar_sae; + case X86EMUL_OPC_VEX_66(0x0f38, 0xdc): /* vaesenc {x,y}mm/mem,{x,y}mm,{x,y}mm */ + case X86EMUL_OPC_VEX_66(0x0f38, 0xdd): /* vaesenclast {x,y}mm/mem,{x,y}mm,{x,y}mm */ + case X86EMUL_OPC_VEX_66(0x0f38, 0xde): /* vaesdec {x,y}mm/mem,{x,y}mm,{x,y}mm */ + case X86EMUL_OPC_VEX_66(0x0f38, 0xdf): /* vaesdeclast {x,y}mm/mem,{x,y}mm,{x,y}mm */ + if ( !vex.l ) + host_and_vcpu_must_have(aesni); + else + host_and_vcpu_must_have(vaes); + goto simd_0f_avx; + + case X86EMUL_OPC_EVEX_66(0x0f38, 0xdc): /* vaesenc [xyz]mm/mem,[xyz]mm,[xyz]mm */ + case X86EMUL_OPC_EVEX_66(0x0f38, 0xdd): /* vaesenclast [xyz]mm/mem,[xyz]mm,[xyz]mm */ + case X86EMUL_OPC_EVEX_66(0x0f38, 0xde): /* vaesdec [xyz]mm/mem,[xyz]mm,[xyz]mm */ + case X86EMUL_OPC_EVEX_66(0x0f38, 0xdf): /* vaesdeclast [xyz]mm/mem,[xyz]mm,[xyz]mm */ + host_and_vcpu_must_have(vaes); + generate_exception_if(evex.brs || evex.opmsk, EXC_UD); + fault_suppression = false; + goto avx512f_no_sae; + case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */ case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */ vcpu_must_have(movbe); --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -113,6 +113,7 @@ /* CPUID level 0x00000007:0.ecx */ #define cpu_has_avx512_vbmi boot_cpu_has(X86_FEATURE_AVX512_VBMI) #define cpu_has_avx512_vbmi2 boot_cpu_has(X86_FEATURE_AVX512_VBMI2) +#define cpu_has_vaes boot_cpu_has(X86_FEATURE_VAES) #define cpu_has_vpclmulqdq boot_cpu_has(X86_FEATURE_VPCLMULQDQ) #define cpu_has_avx512_vnni boot_cpu_has(X86_FEATURE_AVX512_VNNI) #define cpu_has_avx512_bitalg boot_cpu_has(X86_FEATURE_AVX512_BITALG) --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -229,6 +229,7 @@ XEN_CPUFEATURE(UMIP, 6*32+ 2) / XEN_CPUFEATURE(PKU, 6*32+ 3) /*H Protection Keys for Userspace */ XEN_CPUFEATURE(OSPKE, 6*32+ 4) /*! OS Protection Keys Enable */ XEN_CPUFEATURE(AVX512_VBMI2, 6*32+ 6) /*A Additional AVX-512 Vector Byte Manipulation Instrs */ +XEN_CPUFEATURE(VAES, 6*32+ 9) /*A Vector AES Instrs */ XEN_CPUFEATURE(VPCLMULQDQ, 6*32+10) /*A Vector Carry-less Multiplication Instrs */ XEN_CPUFEATURE(AVX512_VNNI, 6*32+11) /*A Vector Neural Network Instrs */ XEN_CPUFEATURE(AVX512_BITALG, 6*32+12) /*A Support for VPOPCNT[B,W] and VPSHUFBITQMB */ --- a/xen/tools/gen-cpuid.py +++ b/xen/tools/gen-cpuid.py @@ -257,7 +257,7 @@ def crunch_numbers(state): # feature flags. If want to use AVX512, AVX2 must be supported and # enabled. Certain later extensions, acting on 256-bit vectors of # integers, better depend on AVX2 than AVX. - AVX2: [AVX512F, VPCLMULQDQ], + AVX2: [AVX512F, VAES, VPCLMULQDQ], # AVX512F is taken to mean hardware support for 512bit registers # (which in practice depends on the EVEX prefix to encode) as well _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.