[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen master] x86: Clamp bits in eflags more aggressively



commit dbd936248f3846449c0d0e192d544b592c1c44d5
Author:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
AuthorDate: Tue May 27 15:58:46 2025 +0100
Commit:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CommitDate: Fri Mar 13 17:18:04 2026 +0000

    x86: Clamp bits in eflags more aggressively
    
    In FRED mode, ERET is stricter than IRET about flags.  Notably this means:
    
     * The vm86 bit (bit 17) and IOPL (bits 12,13) must be clear.
     * The sticky-1 reserved bit (bit 2) must be set, so dom0_construct() needs 
to
       set X86_EFLAGS_MBS in order for a PV dom0 to start.
     * All other reserved bits must be clear.
    
    Xen has been overly lax with reserved bit handling.  Adjust
    arch_set_info_guest*() and hypercall_iret() which consume flags to clamp the
    reserved bits for all guest types.
    
    This is a minor ABI change, but by the same argument as commit
    9f892f84c279 ("x86/domctl: Stop using XLAT_cpu_user_regs()"); the reserved
    bits would get clamped like this naturally by hardware when the vCPU is run.
    
    The handling of vm86 is also different.  Guests under 32bit Xen really could
    use vm86 mode, but Long Mode disallows vm86 mode and IRET simply ignores the
    bit.  Xen's behaviour for a PV32 guest trying to use vm86 mode under a 64bit
    Xen is to arrange to deliver #GP at the target of the IRET, rather than to
    fail the IRET itself.
    
    However there's no filtering in arch_set_info_guest() itself, and it can't
    arrange to queue a #GP at the target, so do the next best thing and fail the
    hypercall.  This is not expected to create an issue for PV guests, as the
    result of such an arch_set_info_guest() previously would be to run 
supposedly
    Real Mode code as Protected Mode code.
    
    This allows PV guests to start when Xen is using FRED mode.
    
    Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
---
 xen/arch/x86/domain.c                | 24 ++++++++++++++++++++++--
 xen/arch/x86/hvm/domain.c            |  4 ++--
 xen/arch/x86/include/asm/x86-defns.h |  7 +++++++
 xen/arch/x86/pv/dom0_build.c         |  2 +-
 xen/arch/x86/pv/iret.c               |  8 +++++---
 5 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 868c26036d..4664264b2f 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1193,6 +1193,14 @@ int arch_set_info_guest(
 
             if ( !__addr_ok(c.nat->ldt_base) )
                 return -EINVAL;
+
+            /*
+             * IRET in Long Mode discards EFLAGS.VM, but in FRED mode ERET
+             * cares that it is zero.
+             *
+             * Guests can't see FRED, so emulate IRET behaviour.
+             */
+            c.nat->user_regs.rflags &= ~X86_EFLAGS_VM;
         }
 #ifdef CONFIG_COMPAT
         else
@@ -1205,6 +1213,18 @@ int arch_set_info_guest(
 
             for ( i = 0; i < ARRAY_SIZE(c.cmp->trap_ctxt); i++ )
                 fixup_guest_code_selector(d, c.cmp->trap_ctxt[i].cs);
+
+            /*
+             * Under 32bit Xen, PV guests could really use vm86 mode.  Under
+             * 64bit Xen, vm86 mode can't be entered even by PV32 guests.
+             *
+             * For backwards compatibility, compat HYPERCALL_iret will arrange
+             * to deliver #GP at the target of the IRET rather than to fail
+             * the IRET itself, but we can't arrange for the same behaviour
+             * here.  Reject the hypercall as the next best option.
+             */
+            if ( c.cmp->user_regs.eflags & X86_EFLAGS_VM )
+                return -EINVAL;
         }
 #endif
 
@@ -1244,7 +1264,7 @@ int arch_set_info_guest(
         v->arch.user_regs.rax               = c.nat->user_regs.rax;
         v->arch.user_regs.rip               = c.nat->user_regs.rip;
         v->arch.user_regs.cs                = c.nat->user_regs.cs;
-        v->arch.user_regs.rflags            = c.nat->user_regs.rflags;
+        v->arch.user_regs.rflags            = (c.nat->user_regs.rflags & 
X86_EFLAGS_ALL) | X86_EFLAGS_MBS;
         v->arch.user_regs.rsp               = c.nat->user_regs.rsp;
         v->arch.user_regs.ss                = c.nat->user_regs.ss;
         v->arch.pv.es                       = c.nat->user_regs.es;
@@ -1268,7 +1288,7 @@ int arch_set_info_guest(
         v->arch.user_regs.eax               = c.cmp->user_regs.eax;
         v->arch.user_regs.eip               = c.cmp->user_regs.eip;
         v->arch.user_regs.cs                = c.cmp->user_regs.cs;
-        v->arch.user_regs.eflags            = c.cmp->user_regs.eflags;
+        v->arch.user_regs.eflags            = (c.cmp->user_regs.eflags & 
X86_EFLAGS_ALL) | X86_EFLAGS_MBS;
         v->arch.user_regs.esp               = c.cmp->user_regs.esp;
         v->arch.user_regs.ss                = c.cmp->user_regs.ss;
         v->arch.pv.es                       = c.cmp->user_regs.es;
diff --git a/xen/arch/x86/hvm/domain.c b/xen/arch/x86/hvm/domain.c
index 155d61db13..a0e811ea47 100644
--- a/xen/arch/x86/hvm/domain.c
+++ b/xen/arch/x86/hvm/domain.c
@@ -194,7 +194,7 @@ int arch_set_info_hvm_guest(struct vcpu *v, const struct 
vcpu_hvm_context *ctx)
         uregs->rsi    = regs->esi;
         uregs->rdi    = regs->edi;
         uregs->rip    = regs->eip;
-        uregs->rflags = regs->eflags;
+        uregs->rflags = (regs->eflags & X86_EFLAGS_ALL) | X86_EFLAGS_MBS;
 
         v->arch.hvm.guest_cr[0] = regs->cr0;
         v->arch.hvm.guest_cr[3] = regs->cr3;
@@ -245,7 +245,7 @@ int arch_set_info_hvm_guest(struct vcpu *v, const struct 
vcpu_hvm_context *ctx)
         uregs->rsi    = regs->rsi;
         uregs->rdi    = regs->rdi;
         uregs->rip    = regs->rip;
-        uregs->rflags = regs->rflags;
+        uregs->rflags = (regs->rflags & X86_EFLAGS_ALL) | X86_EFLAGS_MBS;
 
         v->arch.hvm.guest_cr[0] = regs->cr0;
         v->arch.hvm.guest_cr[3] = regs->cr3;
diff --git a/xen/arch/x86/include/asm/x86-defns.h 
b/xen/arch/x86/include/asm/x86-defns.h
index 0a0ba83de7..edeb0b4ff9 100644
--- a/xen/arch/x86/include/asm/x86-defns.h
+++ b/xen/arch/x86/include/asm/x86-defns.h
@@ -27,6 +27,13 @@
     (X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |   \
      X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)
 
+#define X86_EFLAGS_ALL                                          \
+    (X86_EFLAGS_ARITH_MASK | X86_EFLAGS_TF | X86_EFLAGS_IF |    \
+     X86_EFLAGS_DF | X86_EFLAGS_OF | X86_EFLAGS_IOPL |          \
+     X86_EFLAGS_NT | X86_EFLAGS_RF | X86_EFLAGS_VM |            \
+     X86_EFLAGS_AC | X86_EFLAGS_VIF | X86_EFLAGS_VIP |          \
+     X86_EFLAGS_ID)
+
 /*
  * Intel CPU flags in CR0
  */
diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
index 9a11a0a16b..075a3646c2 100644
--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -1024,7 +1024,7 @@ static int __init dom0_construct(const struct boot_domain 
*bd)
     regs->rip = parms.virt_entry;
     regs->rsp = vstack_end;
     regs->rsi = vstartinfo_start;
-    regs->eflags = X86_EFLAGS_IF;
+    regs->eflags = X86_EFLAGS_IF | X86_EFLAGS_MBS;
 
     /*
      * We don't call arch_set_info_guest(), so some initialisation needs doing
diff --git a/xen/arch/x86/pv/iret.c b/xen/arch/x86/pv/iret.c
index d3a1fb2c68..39ce316b8d 100644
--- a/xen/arch/x86/pv/iret.c
+++ b/xen/arch/x86/pv/iret.c
@@ -80,8 +80,9 @@ long do_iret(void)
 
     regs->rip    = iret_saved.rip;
     regs->cs     = iret_saved.cs | 3; /* force guest privilege */
-    regs->rflags = ((iret_saved.rflags & ~(X86_EFLAGS_IOPL|X86_EFLAGS_VM))
-                    | X86_EFLAGS_IF);
+    regs->rflags = ((iret_saved.rflags & X86_EFLAGS_ALL &
+                     ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) |
+                    X86_EFLAGS_IF | X86_EFLAGS_MBS);
     regs->rsp    = iret_saved.rsp;
     regs->ss     = iret_saved.ss | 3; /* force guest privilege */
 
@@ -143,7 +144,8 @@ int compat_iret(void)
     if ( VM_ASSIST(v->domain, architectural_iopl) )
         v->arch.pv.iopl = eflags & X86_EFLAGS_IOPL;
 
-    regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
+    regs->eflags = ((eflags & X86_EFLAGS_ALL & ~X86_EFLAGS_IOPL) |
+                    X86_EFLAGS_IF | X86_EFLAGS_MBS);
 
     if ( unlikely(eflags & X86_EFLAGS_VM) )
     {
--
generated by git-patchbot for /home/xen/git/xen.git#master



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.