[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2] x86: Avoid using .byte for instructions where safe to do so


  • To: "Andrew Cooper" <andrew.cooper3@xxxxxxxxxx>, Xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: "Teddy Astie" <teddy.astie@xxxxxxxxxx>
  • Date: Thu, 09 Apr 2026 12:22:04 +0000
  • Authentication-results: eu.smtp.expurgate.cloud; dkim=pass header.s=mte1 header.d=mandrillapp.com header.i="@mandrillapp.com" header.h="From:Subject:Message-Id:To:Cc:References:In-Reply-To:Feedback-ID:Date:MIME-Version:Content-Type:Content-Transfer-Encoding"; dkim=pass header.s=mte1 header.d=vates.tech header.i="teddy.astie@xxxxxxxxxx" header.h="From:Subject:Message-Id:To:Cc:References:In-Reply-To:Feedback-ID:Date:MIME-Version:Content-Type:Content-Transfer-Encoding"
  • Cc: "Jan Beulich" <jbeulich@xxxxxxxx>, "Roger Pau Monné" <roger.pau@xxxxxxxxxx>
  • Delivery-date: Thu, 09 Apr 2026 12:22:13 +0000
  • Feedback-id: 30504962:30504962.20260409:md
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Le 09/04/2026 à 13:43, Andrew Cooper a écrit :
> The new toolchain baseline knows XGETBV, VPXOR and VPOR.
>
> For the other cases using .byte, annotate the toolchain minima.
>
> No functional change.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> ---
> CC: Jan Beulich <jbeulich@xxxxxxxx>
> CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> CC: Teddy Astie <teddy.astie@xxxxxxxxxx>
>
> v2
>   * CLZERO, {WR,RD}PKRU can't be named yet.
>
> Pull out of previous series as it's somewhat unrelated.  The XSAVE cleanup has
> other prerequiesites before it can move away from .byte.
> ---
>   xen/arch/x86/arch.mk                   |  4 +++
>   xen/arch/x86/include/asm/asm-defns.h   |  1 +
>   xen/arch/x86/include/asm/msr.h         |  2 ++
>   xen/arch/x86/include/asm/prot-key.h    |  4 +--
>   xen/arch/x86/include/asm/xstate.h      |  3 +--
>   xen/arch/x86/x86_emulate/0f01.c        |  2 +-
>   xen/arch/x86/x86_emulate/x86_emulate.c | 34 ++++++++++++--------------
>   7 files changed, 27 insertions(+), 23 deletions(-)
>
> diff --git a/xen/arch/x86/arch.mk b/xen/arch/x86/arch.mk
> index 0b42e6312fac..cd0602a79aaf 100644
> --- a/xen/arch/x86/arch.mk
> +++ b/xen/arch/x86/arch.mk
> @@ -17,7 +17,11 @@ CFLAGS-$(CONFIG_CC_IS_GCC) += -malign-data=abi
>   $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
>   $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
>   $(call as-option-add,CFLAGS,CC,".equ \"x\"$(comma)1",-DHAVE_AS_QUOTED_SYM)
> +
> +# Binutils >= 2.31, Clang >= 7
>   $(call as-option-add,CFLAGS,CC,"movdiri 
> %rax$(comma)(%rax)",-DHAVE_AS_MOVDIR)
> +
> +# Binutils >= 2.33, Clang >= 9
>   $(call as-option-add,CFLAGS,CC,"enqcmd (%rax)$(comma)%rax",-DHAVE_AS_ENQCMD)
>
>   # Check to see whether the assembler supports the .nop directive.
> diff --git a/xen/arch/x86/include/asm/asm-defns.h 
> b/xen/arch/x86/include/asm/asm-defns.h
> index 239dc3af096c..dc9b3ce272fd 100644
> --- a/xen/arch/x86/include/asm/asm-defns.h
> +++ b/xen/arch/x86/include/asm/asm-defns.h
> @@ -1,5 +1,6 @@
>   #include <asm/page-bits.h>
>
> +/* binutils >= 2.26 or Clang >= 3.8 */
>   .macro clzero
>       .byte 0x0f, 0x01, 0xfc
>   .endm
> diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h
> index 941a7612f4ba..1377d156f4e1 100644
> --- a/xen/arch/x86/include/asm/msr.h
> +++ b/xen/arch/x86/include/asm/msr.h
> @@ -63,6 +63,8 @@ static inline void wrmsrns(uint32_t msr, uint64_t val)
>       /*
>        * WRMSR is 2 bytes.  WRMSRNS is 3 bytes.  Pad WRMSR with a redundant CS
>        * prefix to avoid a trailing NOP.
> +     *
> +     * Binutils >= 2.40, Clang >= 16
>        */
>       alternative_input(".byte 0x2e; wrmsr",
>                         ".byte 0x0f,0x01,0xc6", X86_FEATURE_WRMSRNS,
> diff --git a/xen/arch/x86/include/asm/prot-key.h 
> b/xen/arch/x86/include/asm/prot-key.h
> index 8fb15b5c32e9..e8550e0c9203 100644
> --- a/xen/arch/x86/include/asm/prot-key.h
> +++ b/xen/arch/x86/include/asm/prot-key.h
> @@ -19,7 +19,7 @@ static inline uint32_t rdpkru(void)
>   {
>       uint32_t pkru;
>
> -    asm volatile ( ".byte 0x0f,0x01,0xee"
> +    asm volatile ( ".byte 0x0f,0x01,0xee" /* binutils >= 2.26 or Clang >= 
> 3.8 */
>                      : "=a" (pkru) : "c" (0) : "dx" );
>
>       return pkru;
> @@ -27,7 +27,7 @@ static inline uint32_t rdpkru(void)
>
>   static inline void wrpkru(uint32_t pkru)
>   {
> -    asm volatile ( ".byte 0x0f,0x01,0xef"
> +    asm volatile ( ".byte 0x0f,0x01,0xef" /* binutils >= 2.26 or Clang >= 
> 3.8 */
>                      :: "a" (pkru), "d" (0), "c" (0) );
>   }
>
> diff --git a/xen/arch/x86/include/asm/xstate.h 
> b/xen/arch/x86/include/asm/xstate.h
> index c96d75e38b25..0519379edb57 100644
> --- a/xen/arch/x86/include/asm/xstate.h
> +++ b/xen/arch/x86/include/asm/xstate.h
> @@ -118,8 +118,7 @@ static inline uint64_t xgetbv(unsigned int index)
>       uint32_t lo, hi;
>
>       ASSERT(index); /* get_xcr0() should be used instead. */
> -    asm volatile ( ".byte 0x0f,0x01,0xd0" /* xgetbv */
> -                   : "=a" (lo), "=d" (hi) : "c" (index) );
> +    asm volatile ( "xgetbv" : "=a" (lo), "=d" (hi) : "c" (index) );
>
>       return lo | ((uint64_t)hi << 32);
>   }
> diff --git a/xen/arch/x86/x86_emulate/0f01.c b/xen/arch/x86/x86_emulate/0f01.c
> index 4d36c7d289a5..87d338f0c74a 100644
> --- a/xen/arch/x86/x86_emulate/0f01.c
> +++ b/xen/arch/x86/x86_emulate/0f01.c
> @@ -122,7 +122,7 @@ int x86emul_0f01(struct x86_emulate_state *s,
>           {
>           case vex_none: /* serialize */
>               host_and_vcpu_must_have(serialize);
> -            asm volatile ( ".byte 0x0f, 0x01, 0xe8" );
> +            asm volatile ( ".byte 0x0f, 0x01, 0xe8" ); /* Binutils >= 2.34, 
> Clang >= 11 */
>               break;
>           case vex_f2: /* xsusldtrk */
>               vcpu_must_have(tsxldtrk);
> diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c 
> b/xen/arch/x86/x86_emulate/x86_emulate.c
> index 11d145e17723..e58735ee9590 100644
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -4748,27 +4748,25 @@ x86_emulate(
>                    */
>                   if ( vex.l )
>                   {
> -                    /* vpxor %xmmN, %xmmN, %xmmN */
> -                    asm volatile ( ".byte 0xc5,0xf9,0xef,0xc0" );
> -                    asm volatile ( ".byte 0xc5,0xf1,0xef,0xc9" );
> -                    asm volatile ( ".byte 0xc5,0xe9,0xef,0xd2" );
> -                    asm volatile ( ".byte 0xc5,0xe1,0xef,0xdb" );
> -                    asm volatile ( ".byte 0xc5,0xd9,0xef,0xe4" );
> -                    asm volatile ( ".byte 0xc5,0xd1,0xef,0xed" );
> -                    asm volatile ( ".byte 0xc5,0xc9,0xef,0xf6" );
> -                    asm volatile ( ".byte 0xc5,0xc1,0xef,0xff" );
> +                    asm volatile ( "vpxor %xmm0, %xmm0, %xmm0" );
> +                    asm volatile ( "vpxor %xmm1, %xmm1, %xmm1" );
> +                    asm volatile ( "vpxor %xmm2, %xmm2, %xmm2" );
> +                    asm volatile ( "vpxor %xmm3, %xmm3, %xmm3" );
> +                    asm volatile ( "vpxor %xmm4, %xmm4, %xmm4" );
> +                    asm volatile ( "vpxor %xmm5, %xmm5, %xmm5" );
> +                    asm volatile ( "vpxor %xmm6, %xmm6, %xmm6" );
> +                    asm volatile ( "vpxor %xmm7, %xmm7, %xmm7" );
>                   }
>                   else
>                   {
> -                    /* vpor %xmmN, %xmmN, %xmmN */
> -                    asm volatile ( ".byte 0xc5,0xf9,0xeb,0xc0" );
> -                    asm volatile ( ".byte 0xc5,0xf1,0xeb,0xc9" );
> -                    asm volatile ( ".byte 0xc5,0xe9,0xeb,0xd2" );
> -                    asm volatile ( ".byte 0xc5,0xe1,0xeb,0xdb" );
> -                    asm volatile ( ".byte 0xc5,0xd9,0xeb,0xe4" );
> -                    asm volatile ( ".byte 0xc5,0xd1,0xeb,0xed" );
> -                    asm volatile ( ".byte 0xc5,0xc9,0xeb,0xf6" );
> -                    asm volatile ( ".byte 0xc5,0xc1,0xeb,0xff" );
> +                    asm volatile ( "vpor %xmm0, %xmm0, %xmm0" );
> +                    asm volatile ( "vpor %xmm1, %xmm1, %xmm1" );
> +                    asm volatile ( "vpor %xmm2, %xmm2, %xmm2" );
> +                    asm volatile ( "vpor %xmm3, %xmm3, %xmm3" );
> +                    asm volatile ( "vpor %xmm4, %xmm4, %xmm4" );
> +                    asm volatile ( "vpor %xmm5, %xmm5, %xmm5" );
> +                    asm volatile ( "vpor %xmm6, %xmm6, %xmm6" );
> +                    asm volatile ( "vpor %xmm7, %xmm7, %xmm7" );
>                   }
>
>                   ASSERT(!state->simd_size);

Reviewed-by: Teddy Astie <teddy.astie@xxxxxxxxxx>


--
Teddy Astie | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech





 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.