[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Minios-devel] [UNIKRAFT PATCH v2 4/9] plat: check for and enable extended CPU features



Hi Yuri,

On 12/17/18 5:02 PM, Yuri Volchkov wrote:
Florian Schmidt <florian.schmidt@xxxxxxxxx> writes:

But only do this if code is compiled with SSE/AVX.

Signed-off-by: Florian Schmidt <florian.schmidt@xxxxxxxxx>
---
  plat/common/include/x86/cpu_defs.h | 22 ++++++++++
  plat/kvm/x86/entry64.S             | 58 +++++++++++++++++++++----
  plat/kvm/x86/setup.c               | 15 -------
  plat/xen/x86/entry64.S             | 68 +++++++++++++++++++++++++++---
  plat/xen/x86/setup.c               | 15 -------
  5 files changed, 135 insertions(+), 43 deletions(-)

diff --git a/plat/common/include/x86/cpu_defs.h 
b/plat/common/include/x86/cpu_defs.h
index 9ecec967..78821b52 100644
--- a/plat/common/include/x86/cpu_defs.h
+++ b/plat/common/include/x86/cpu_defs.h
@@ -58,6 +58,7 @@
   */
  #define X86_CR0_MP              (1 << 1)    /* Monitor Coprocessor */
  #define X86_CR0_EM              (1 << 2)    /* Emulation */
+#define X86_CR0_TS              (1 << 2)    /* Task Switched */
Did you mean 1 << 3?

Uh... right. It didn't surface as a bug immediately because I only use it once to reset a field (to be safe) that comes out of the initial CPU state after booting as 0 already. Gonna fix that.


  #define X86_CR0_NE              (1 << 5)    /* Numeric Exception */
  #define X86_CR0_PG              (1 << 31)   /* Paging */
@@ -67,10 +68,31 @@
  #define X86_CR4_PAE             (1 << 5)    /* enable PAE */
  #define X86_CR4_OSFXSR          (1 << 9)    /* OS support for FXSAVE/FXRSTOR 
*/
  #define X86_CR4_OSXMMEXCPT      (1 << 10)   /* OS support for FP exceptions */
+#define X86_CR4_FSGSBASE        (1 << 16)   /* enable FSGSBASE*/
+#define X86_CR4_OSXSAVE         (1 << 18)   /* enable XSAVE, extended states */
/*
   * Intel CPU features in EFER
   */
  #define X86_EFER_LME            (1 << 8)    /* Long mode enable (R/W) */
+/* CPUID feature bits in ECX and EDX when EAX=1 */
+#define X86_CPUID1_ECX_XSAVE    (1 << 26)
+#define X86_CPUID1_ECX_OSXSAVE  (1 << 27)
+#define X86_CPUID1_ECX_AVX      (1 << 28)
+#define X86_CPUID1_EDX_FPU      (1 << 0)
+#define X86_CPUID1_EDX_FXSR     (1 << 24)
+#define X86_CPUID1_EDX_SSE      (1 << 25)
+/* CPUID feature bits in EBX and ECX when EAX=7 */
ECX is in charge of the sub-leaf, maybe add this in comment as you did
for the next one?

Will do.


+#define X86_CPUID7_EBX_FSGSBASE (1 << 0)
+/* CPUID feature bits when EAX=0xd, EXC=1 */
Typo EXC instead of ECX

Yep.


+#define X86_CPUIDD1_EAX_XSAVEOPT (1<<0)
+
+/*
+ * Extended Control Register 0 (XCR0)
+ */
+#define X86_XCR0_X87            (1 << 0)
+#define X86_XCR0_XMM            (1 << 1)
+#define X86_XCR0_YMM            (1 << 2)
As far as I understand, the last two are enabling SSE and AVX. Maybe we
name them respectively, instead of XMM and YMM?

I originally named them like that because they govern whether XMM and YMM registers are available and can be saved by XSAVE. But now that I double-checked, I noticed the Intel documentation names them XCR0.SSE and XCR0.AVX, so I agree we should just go with that.


+
  #endif /* __PLAT_CMN_X86_CPU_DEFS_H__ */
diff --git a/plat/kvm/x86/entry64.S b/plat/kvm/x86/entry64.S
index dc3614a2..3fde22ea 100644
--- a/plat/kvm/x86/entry64.S
+++ b/plat/kvm/x86/entry64.S
@@ -172,15 +172,57 @@ ENTRY(_libkvmplat_start64)
        movq $bootstack, %rsp
        xorq %rbp, %rbp
- /* enable FPU and SSE units */
-       movq %cr0, %rax
-       andq $(~X86_CR0_EM), %rax
-       orq $(X86_CR0_MP | X86_CR0_NE), %rax
-       movq %rax, %cr0
-       movq %cr4, %rax
-       orq $(X86_CR4_OSXMMEXCPT | X86_CR4_OSFXSR), %rax
-       movq %rax, %cr4
+       /* We will work on cr0 and cr4 multiple times.
+        * We put cr0 into rsi and cr4 into rdi, because cpuid and
+        * xgetbv/xsetbv work on eax/ebx/ecx/edx. */
+       movq %cr0, %rsi
+       movq %cr4, %rdi
+       /* FPU and SSE are part of base x86-64, so no need to check for their
+        * availability before enabling and initializing. */
+       andl $(~(X86_CR0_EM | X86_CR0_TS)), %esi
+       orl $(X86_CR0_MP | X86_CR0_NE), %esi
+       movq %rsi, %cr0
+       fninit
So the floating point is enabled always? Should it be rather be a config
option? Not insisting, just asking.

We could do that and add a configuration option to unikraft that globally disables all floating-point instructions, similar to how the Linux kernel does it. In that case, we could indeed skip pretty much all this code. However, I'm a bit worried about the implications of this, because that most likely means we'll run into problems with full-fledged libc implementations such as newlib or musl. (or at least with the libm part).

I think in general, this is an interesting idea, and we could check adding such a mode to unikraft, because I could think of situations where we might not need any floating point operations. However, I would rather make this a future feature patch, so that we can continue with this series that is more of a bug fix (though a pretty large one).

However, now that I think about it, maybe instead of just putting a warning in the trap handler (in the last patch of the series), it would make sense to compile trap.c and the time.c files that contain the timer interrupt handler with how Linux does it: "-mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-fp-ret-in-387 -mskip-rax-setup" (looking at arch/x86/Makefile).

As an aside, does anyone know why it lists -mno-sse2 on top of -mno-sse? Does -mno-sse not imply -mno-sse2? And if so, why do they not also put -mno-sse-3, -mno-ssse3, ... in their CFLAGS?


+#if __SSE__
+       orl $(X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT), %edi
+       movq %rdi, %cr4
        ldmxcsr (mxcsr_ptr)
+#endif /* __SSE__ */
+       /* Check capabilities subject to availability as indicated by cpuid.
+        * First, start off with "standard features" */
+       movl $0x1, %eax
+       cpuid
+#if __AVX__
+       /* ecx and edx now contain capability information, so we can now
+        * enable capabilities based on the indicated features */
+       /* OSXSAVE needs to be enabled before AVX */
+       testl $(X86_CPUID1_ECX_XSAVE), %ecx
+       jz noxsave
+       orl $(X86_CR4_OSXSAVE), %edi
+       movq %rdi, %cr4
+       /* now enable AVX. This needs to be last checking cpuid features from
+        * the eax=1 cpuid call, because it clobbers ecx */
+       testl $(X86_CPUID1_ECX_AVX), %ecx
+       jz noavx
+       xorl %ecx, %ecx
+       xgetbv
+       orl $(X86_XCR0_XMM | X86_XCR0_YMM), %eax
According to my very shallow understanding, XMM is SSE relating
thing. Should its enabling be moved to the appropriate section above?

Ah, these flags only set information in the extended capabilities register XCR0 (via the XSETBV that follows). These flags just signal which information is available and should be saved via the XSAVE instruction. That instruction itself was only added well after SSE, so the assumption here is that, if you have XSAVE available, then you can save SSE's XMM registers.

As far as I remember (but to be honest, I can't find the information right now), XSAVE was only introduced with AVX (before that, there was FXSAVE for SSE support on CPUs with XMM registers, but no YMM registers), so setting those two flags means "XSAVE, please save both SSE and AVX registers for me", and there's no point in ever using xsave for SSE only or for AVX only.

With later extensions (AVX512 especially), we get ZMM registers, and then we need to set another flag in XCR0 so that XSAVE also covers those. We don't have support for AVX512 in unikraft yet, though. One main reason is that I don't have a machine with AVX512 to test the code on. ;-)


Anyways, it probably a good idea to add a compilation error in case
__AVX__ is defined, but __SSE__ is not.

I guess we could... However: these are flags set by the compiler based on your -mcpu / -mtune choices for CPU types, and there are no machines that have AVX but not SSE, so this seems like a very synthetic corner case where someone maliciously undefined __AVX__ and defined __SSE__ manually.


+       xsetbv
+noavx:
+noxsave:
+#endif /* __AVX__ */
+       /* Now, check for extended features. */
+       movl $0x7, %eax
+       movl $0x1, %ecx
+       cpuid
+       /* ebx, ecx, edx now contain extended capabilties information. */
+       /* check for and enable FS/GSBASE */
+       testl $(X86_CPUID7_EBX_FSGSBASE), %ebx
+       jz nofsgsbase
+       orl $(X86_CR4_FSGSBASE), %edi
+       movq %rdi, %cr4
+nofsgsbase:
+       /* done setting up CPU capabilities */
/* read multiboot info pointer */
        movq -8(%rsp), %rdi
diff --git a/plat/kvm/x86/setup.c b/plat/kvm/x86/setup.c
index e02886d1..47a78dcf 100644
--- a/plat/kvm/x86/setup.c
+++ b/plat/kvm/x86/setup.c
@@ -109,20 +109,6 @@ static inline void _mb_init_mem(struct multiboot_info *mi)
        _libkvmplat_stack_top  = (void *) (max_addr - __STACK_SIZE);
  }
-static inline void _init_cpufeatures(void)
-{
-#if __SSE__
-       unsigned long sse_status = 0x1f80;
-#endif
-
-       /* FPU */
-       asm volatile("fninit");
-
-#if __SSE__
-       asm volatile("ldmxcsr %0" : : "m"(sse_status));
-#endif
-}
-
  static void _libkvmplat_entry2(void *arg __attribute__((unused)))
  {
        ukplat_entry_argp(NULL, cmdline, sizeof(cmdline));
@@ -133,7 +119,6 @@ void _libkvmplat_entry(void *arg)
        struct multiboot_info *mi = (struct multiboot_info *)arg;
_libkvmplat_init_console();
-       _init_cpufeatures();
        traps_init();
        intctrl_init();
diff --git a/plat/xen/x86/entry64.S b/plat/xen/x86/entry64.S
index c266804a..4363ac0e 100644
--- a/plat/xen/x86/entry64.S
+++ b/plat/xen/x86/entry64.S
@@ -25,6 +25,7 @@
#include <uk/arch/types.h>
  #include <uk/arch/limits.h>
+#include <x86/cpu_defs.h>
  #include <x86/traps.h>
  #include <uk/config.h>
  #include <xen/xen.h>
@@ -60,11 +61,68 @@ _libxenplat_start:
  #include "entry_hvm.S"
#endif
-        cld
-        movq stack_start(%rip),%rsp
-        andq $(~(__STACK_SIZE-1)), %rsp
-        movq %rsi,%rdi
-        call _libxenplat_x86entry
+       cld
+       movq stack_start(%rip),%rsp
+       andq $(~(__STACK_SIZE-1)), %rsp
+       movq %rsi, %r8 /* esi contains pointer to start_info page */
+       /* We will work on cr0 and cr4 multiple times.
+        * We put cr0 into rsi and cr4 into rdi, because cpuid and
+        * xgetbv/xsetbv work on eax/ebx/ecx/edx. */
+       movq %cr0, %rsi
+       movq %cr4, %rdi
+       /* FPU and SSE are part of base x86-64, so no need to check for their
+        * availability before enabling and initializing. */
+       andl $(~(X86_CR0_EM | X86_CR0_TS)), %esi
+       orl $(X86_CR0_MP | X86_CR0_NE), %esi
+       movq %rsi, %cr0
+       fninit
+#if __SSE__
+       orl $(X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT), %edi
+       movq %rdi, %cr4
+       ldmxcsr (mxcsr_ptr)
+#endif /* __SSE__ */
+       /* Check capabilities subject to availability as indicated by cpuid.
+        * First, start off with "standard features" */
+       movl $0x1, %eax
+       cpuid
+#if __AVX__
+       /* ecx and edx now contain capability information, so we can now
+        * enable capabilities based on the indicated features */
+       /* OSXSAVE needs to be enabled before AVX */
+       testl $(X86_CPUID1_ECX_XSAVE), %ecx
+       jz noxsave
+       orl $(X86_CR4_OSXSAVE), %edi
+       movq %rdi, %cr4
+       /* now enable AVX. This needs to be last checking cpuid features from
+        * the eax=1 cpuid call, because it clobbers ecx */
+       testl $(X86_CPUID1_ECX_AVX), %ecx
+       jz noavx
+       xorl %ecx, %ecx
+       xgetbv
+       orl $(X86_XCR0_XMM | X86_XCR0_YMM), %eax
+       xsetbv
+noavx:
+noxsave:
+#endif /* __AVX__ */
+       /* Now, check for extended features. */
+       movl $0x7, %eax
+       movl $0x1, %ecx
+       cpuid
+       /* ebx, ecx, edx now contain extended capabilties information. */
+       /* check for and enable FS/GSBASE */
+       testl $(X86_CPUID7_EBX_FSGSBASE), %ebx
+       jz nofsgsbase
+       orl $(X86_CR4_FSGSBASE), %edi
+       movq %rdi, %cr4
+nofsgsbase:
+       /* Done setting up CPU capabilities, hand over to C entry point. */
+       movq %r8, %rdi /* pass pointer to start_info page to C entry */
+       call _libxenplat_x86entry
+
+.type mxcsr_ptr, @object
+mxcsr_ptr:
+       .long 0x1f80                    /* Intel SDM power-on default */
+
stack_start:
          .quad _libxenplat_bootstack + (2*__STACK_SIZE)
diff --git a/plat/xen/x86/setup.c b/plat/xen/x86/setup.c
index 35fdd35e..a41d5cb3 100644
--- a/plat/xen/x86/setup.c
+++ b/plat/xen/x86/setup.c
@@ -113,20 +113,6 @@ static inline void _init_traps(void)
        traps_init();
  }
-static inline void _init_cpufeatures(void)
-{
-#if __SSE__
-       unsigned long sse_status = 0x1f80;
-#endif
-
-       /* FPU */
-       asm volatile("fninit");
-
-#if __SSE__
-       asm volatile("ldmxcsr %0" : : "m"(sse_status));
-#endif
-}
-
  static inline void _init_shared_info(void)
  {
        int ret;
@@ -184,7 +170,6 @@ void _libxenplat_x86entry(void *start_info) __noreturn;
  void _libxenplat_x86entry(void *start_info)
  {
        _init_traps();
-       _init_cpufeatures();
        HYPERVISOR_start_info = (start_info_t *)start_info;
        _libxenplat_prepare_console(); /* enables buffering for console */
--
2.19.2



--
Dr. Florian Schmidt
フローリアン・シュミット
Research Scientist,
Systems and Machine Learning Group
NEC Laboratories Europe
Kurfürsten-Anlage 36, D-69115 Heidelberg
Tel.     +49 (0)6221 4342-265
Fax:     +49 (0)6221 4342-155
e-mail:  florian.schmidt@xxxxxxxxx
============================================================
Registered at Amtsgericht Mannheim, Germany, HRB728558

_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.