|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen staging] x86: Always use eager-fpu
commit 70a6abd2cd17f11b8de29ca5782233e430e97d9a
Author: Ross Lagerwall <ross.lagerwall@xxxxxxxxxx>
AuthorDate: Tue Mar 24 18:19:30 2026 +0000
Commit: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CommitDate: Mon Mar 30 15:48:13 2026 +0100
x86: Always use eager-fpu
Lazy FPU avoids some work during a context switch but pushes more
expensive costs elsewhere:
* For a workload running some Windows VMs, I measured about 83% of
context switches out had used the FPU so most of the time the FPU
save/restore is not avoided, just delayed.
* A key difference between 32-bit and 64-bit OSes is that %xmm is in the
base featureset for 64-bit and thus gets ubiquitous use in userspace.
This is likely why we hit 83%.
* Setting/clearing the cr0.TS bit is serializing and reportedly slower
than the processor optimized xsave/restore.
* Linux uses PKRU so a partial xsave/restore is performed on each
context switch anyway, followed by a second xsave/restore at some
point during execution. This interferes with the 'modified'
optimisation that hardware uses to try and reduce the cost of the
following XSAVE.
There is no measurable performance benefit for using lazy FPU and it
adds unwanted complexity so remove the option and always use eager-fpu.
Signed-off-by: Ross Lagerwall <ross.lagerwall@xxxxxxxxxx>
Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
docs/misc/xen-command-line.pandoc | 7 +--
xen/arch/x86/i387.c | 2 +-
xen/arch/x86/include/asm/spec_ctrl.h | 1 -
xen/arch/x86/spec_ctrl.c | 88 ++----------------------------------
4 files changed, 6 insertions(+), 92 deletions(-)
diff --git a/docs/misc/xen-command-line.pandoc
b/docs/misc/xen-command-line.pandoc
index ebdca007d2..6c77129732 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -2470,7 +2470,7 @@ By default SSBD will be mitigated at runtime (i.e
`ssbd=runtime`).
> {msr-sc,rsb,verw,{ibpb,bhb}-entry}=<bool>|{pv,hvm}=<bool>,
> bti-thunk=retpoline|lfence|jmp,bhb-seq=short|tsx|long,
> {ibrs,ibpb,ssbd,psfd,
-> eager-fpu,l1d-flush,branch-harden,srb-lock,
+> l1d-flush,branch-harden,srb-lock,
> unpriv-mmio,gds-mit,div-scrub,lock-harden,
> bhi-dis-s,bp-spec-reduce,ibpb-alt}=<bool> ]`
@@ -2574,11 +2574,6 @@ On hardware supporting IBPB (Indirect Branch Prediction
Barrier), the `ibpb=`
option can be used to force (the default) or prevent Xen from issuing branch
prediction barriers on vcpu context switches.
-On all hardware, the `eager-fpu=` option can be used to force or prevent Xen
-from using fully eager FPU context switches. This is currently implemented as
-a global control. By default, Xen will choose to use fully eager context
-switches on hardware believed to speculate past #NM exceptions.
-
On hardware supporting L1D_FLUSH, the `l1d-flush=` option can be used to force
or prevent Xen from issuing an L1 data cache flush on each VMEntry.
Irrespective of Xen's setting, the feature is virtualised for HVM guests to
diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index b84cd6f7a9..954ba3b179 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -297,7 +297,7 @@ void save_fpu_enable(void)
/* Initialize FPU's context save area */
int vcpu_init_fpu(struct vcpu *v)
{
- v->arch.fully_eager_fpu = opt_eager_fpu;
+ v->arch.fully_eager_fpu = true;
return xstate_alloc_save_area(v);
}
diff --git a/xen/arch/x86/include/asm/spec_ctrl.h
b/xen/arch/x86/include/asm/spec_ctrl.h
index 505e3ab863..8f82533c41 100644
--- a/xen/arch/x86/include/asm/spec_ctrl.h
+++ b/xen/arch/x86/include/asm/spec_ctrl.h
@@ -79,7 +79,6 @@ static always_inline void spec_ctrl_new_guest_context(void)
extern int8_t opt_ibpb_ctxt_switch;
extern bool opt_ssbd;
extern int8_t opt_bhi_dis_s;
-extern int8_t opt_eager_fpu;
extern int8_t opt_l1d_flush;
extern bool bsp_delay_spec_ctrl;
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index dd0413e1fc..bc8538a56f 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -61,7 +61,6 @@ static int8_t __initdata opt_psfd = -1;
int8_t __ro_after_init opt_bhi_dis_s = -1;
int8_t __ro_after_init opt_ibpb_ctxt_switch = -1;
-int8_t __ro_after_init opt_eager_fpu = -1;
int8_t __ro_after_init opt_l1d_flush = -1;
static bool __initdata opt_branch_harden =
IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH);
@@ -104,8 +103,6 @@ static int __init cf_check parse_spec_ctrl(const char *s)
opt_msr_sc_pv = false;
opt_msr_sc_hvm = false;
- opt_eager_fpu = 0;
-
if ( opt_xpti_hwdom < 0 )
opt_xpti_hwdom = 0;
if ( opt_xpti_domu < 0 )
@@ -336,8 +333,6 @@ static int __init cf_check parse_spec_ctrl(const char *s)
/* Misc settings. */
else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 )
opt_ibpb_ctxt_switch = val;
- else if ( (val = parse_boolean("eager-fpu", s, ss)) >= 0 )
- opt_eager_fpu = val;
else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 )
opt_l1d_flush = val;
else if ( (val = parse_boolean("branch-harden", s, ss)) >= 0 )
@@ -648,32 +643,30 @@ static void __init print_details(enum ind_thunk thunk)
* mitigation support for guests.
*/
#ifdef CONFIG_HVM
- printk(" Support for HVM VMs:%s%s%s%s%s%s%s%s\n",
+ printk(" Support for HVM VMs:%s%s%s%s%s%s%s\n",
(boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ||
boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ||
opt_bhb_entry_hvm || amd_virt_spec_ctrl ||
- opt_eager_fpu || opt_verw_hvm) ? "" : "
None",
+ opt_verw_hvm) ? "" : "
None",
boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "",
(boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
amd_virt_spec_ctrl) ? " MSR_VIRT_SPEC_CTRL" :
"",
boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "",
- opt_eager_fpu ? " EAGER_FPU" : "",
opt_verw_hvm ? " VERW" : "",
boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : "",
opt_bhb_entry_hvm ? " BHB-entry" : "");
#endif
#ifdef CONFIG_PV
- printk(" Support for PV VMs:%s%s%s%s%s%s%s\n",
+ printk(" Support for PV VMs:%s%s%s%s%s%s\n",
(boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
boot_cpu_has(X86_FEATURE_SC_RSB_PV) ||
boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ||
opt_bhb_entry_pv ||
- opt_eager_fpu || opt_verw_pv) ? "" : "
None",
+ opt_verw_pv) ? "" : "
None",
boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "",
boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "",
- opt_eager_fpu ? " EAGER_FPU" : "",
opt_verw_pv ? " VERW" : "",
boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : "",
opt_bhb_entry_pv ? " BHB-entry" : "");
@@ -959,75 +952,6 @@ static bool __init rsb_is_full_width(void)
return true;
}
-/* Calculate whether this CPU speculates past #NM */
-static bool __init should_use_eager_fpu(void)
-{
- /*
- * Assume all unrecognised processors are ok. This is only known to
- * affect Intel Family 6 processors.
- */
- if ( boot_cpu_data.vendor != X86_VENDOR_INTEL ||
- boot_cpu_data.family != 6 )
- return false;
-
- switch ( boot_cpu_data.model )
- {
- /*
- * Core processors since at least Nehalem are vulnerable.
- */
- case 0x1e: /* Nehalem */
- case 0x1f: /* Auburndale / Havendale */
- case 0x1a: /* Nehalem EP */
- case 0x2e: /* Nehalem EX */
- case 0x25: /* Westmere */
- case 0x2c: /* Westmere EP */
- case 0x2f: /* Westmere EX */
- case 0x2a: /* SandyBridge */
- case 0x2d: /* SandyBridge EP/EX */
- case 0x3a: /* IvyBridge */
- case 0x3e: /* IvyBridge EP/EX */
- case 0x3c: /* Haswell */
- case 0x3f: /* Haswell EX/EP */
- case 0x45: /* Haswell D */
- case 0x46: /* Haswell H */
- case 0x3d: /* Broadwell */
- case 0x47: /* Broadwell H */
- case 0x4f: /* Broadwell EP/EX */
- case 0x56: /* Broadwell D */
- case 0x4e: /* Skylake M */
- case 0x55: /* Skylake X */
- case 0x5e: /* Skylake D */
- case 0x66: /* Cannonlake */
- case 0x67: /* Cannonlake? */
- case 0x8e: /* Kabylake M */
- case 0x9e: /* Kabylake D */
- return true;
-
- /*
- * Atom processors are not vulnerable.
- */
- case 0x1c: /* Pineview */
- case 0x26: /* Lincroft */
- case 0x27: /* Penwell */
- case 0x35: /* Cloverview */
- case 0x36: /* Cedarview */
- case 0x37: /* Baytrail / Valleyview (Silvermont) */
- case 0x4d: /* Avaton / Rangely (Silvermont) */
- case 0x4c: /* Cherrytrail / Brasswell */
- case 0x4a: /* Merrifield */
- case 0x5a: /* Moorefield */
- case 0x5c: /* Goldmont */
- case 0x5f: /* Denverton */
- case 0x7a: /* Gemini Lake */
- return false;
-
- default:
- printk("Unrecognised CPU model %#x - assuming vulnerable to LazyFPU\n",
- boot_cpu_data.model);
- return true;
- }
-}
-
/*
*
https://www.amd.com/content/dam/amd/en/documents/corporate/cr/speculative-return-stack-overflow-whitepaper.pdf
*/
@@ -2221,10 +2145,6 @@ void __init init_speculation_mitigations(void)
div_calculations(hw_smt_enabled);
- /* Check whether Eager FPU should be enabled by default. */
- if ( opt_eager_fpu == -1 )
- opt_eager_fpu = should_use_eager_fpu();
-
/* (Re)init BSP state now that default_scf has been calculated. */
init_shadow_spec_ctrl_state(get_cpu_info());
--
generated by git-patchbot for /home/xen/git/xen.git#staging
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |