[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 06/24] x86/pv: Implement pv_inject_{event, page_fault, hw_exception}()



To help with event injection improvements for the PV uses of x86_emulate(),
implement a event injection API which matches its hvm counterpart.

This is started with taking do_guest_trap() and modifying its calling API to
pv_inject_event(), subsequentally implementing the former in terms of the
latter.

The existing propagate_page_fault() is fairly similar to
pv_inject_page_fault(), although it has a return value.  Only a single caller
makes use of the return value, and non-NULL is only returned if the passed cr2
is non-canonical.  Opencode this single case in
handle_gdt_ldt_mapping_fault(), allowing propagate_page_fault() to become
void.

The call to reserved_bit_page_fault() in propagate_page_fault() was
conceptually wrong to start with.  Complaining about reserved bits should be
part of handling the pagefault itself, not part of injecting a pagefault into
the guest.  It is therefore moved ahead of the injection call in
do_page_fault() to compensate.

The remaining #PF specific bits are moved into pv_inject_event(), and
pv_inject_page_fault() is implemented as a static inline wrapper.

No practical change from a guests point of view.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>

v3:
 * Reposition reserved_bit_page_fault() handling
v2:
 * New
---
 xen/arch/x86/mm.c               |   5 +-
 xen/arch/x86/mm/shadow/common.c |   4 +-
 xen/arch/x86/traps.c            | 147 ++++++++++++++++++++--------------------
 xen/include/asm-x86/domain.h    |  26 +++++++
 xen/include/asm-x86/mm.h        |   1 -
 5 files changed, 104 insertions(+), 79 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index d365f59..b7c7122 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5136,7 +5136,7 @@ static int ptwr_emulated_read(
     if ( !__addr_ok(addr) ||
          (rc = __copy_from_user(p_data, (void *)addr, bytes)) )
     {
-        propagate_page_fault(addr + bytes - rc, 0); /* read fault */
+        pv_inject_page_fault(0, addr + bytes - rc); /* Read fault. */
         return X86EMUL_EXCEPTION;
     }
 
@@ -5177,7 +5177,8 @@ static int ptwr_emulated_update(
         addr &= ~(sizeof(paddr_t)-1);
         if ( (rc = copy_from_user(&full, (void *)addr, sizeof(paddr_t))) != 0 )
         {
-            propagate_page_fault(addr+sizeof(paddr_t)-rc, 0); /* read fault */
+            pv_inject_page_fault(0, /* Read fault. */
+                                 addr + sizeof(paddr_t) - rc);
             return X86EMUL_EXCEPTION;
         }
         /* Mask out bits provided by caller. */
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index a4a3c4b..f07803b 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -323,7 +323,7 @@ pv_emulate_read(enum x86_segment seg,
 
     if ( (rc = copy_from_user(p_data, (void *)offset, bytes)) != 0 )
     {
-        propagate_page_fault(offset + bytes - rc, 0); /* read fault */
+        pv_inject_page_fault(0, offset + bytes - rc); /* Read fault. */
         return X86EMUL_EXCEPTION;
     }
 
@@ -1723,7 +1723,7 @@ static mfn_t emulate_gva_to_mfn(struct vcpu *v, unsigned 
long vaddr,
         if ( is_hvm_vcpu(v) )
             hvm_inject_page_fault(pfec, vaddr);
         else
-            propagate_page_fault(vaddr, pfec);
+            pv_inject_page_fault(pfec, vaddr);
         return _mfn(BAD_GVA_TO_GFN);
     }
 
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index b464211..195d590 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -625,37 +625,75 @@ void fatal_trap(const struct cpu_user_regs *regs, bool_t 
show_remote)
           (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
 }
 
-static void do_guest_trap(unsigned int trapnr,
-                          const struct cpu_user_regs *regs)
+void pv_inject_event(const struct x86_event *event)
 {
     struct vcpu *v = current;
+    struct cpu_user_regs *regs = guest_cpu_user_regs();
     struct trap_bounce *tb;
     const struct trap_info *ti;
+    const uint8_t vector = event->vector;
     const bool use_error_code =
-        ((trapnr < 32) && (TRAP_HAVE_EC & (1u << trapnr)));
+        ((vector < 32) && (TRAP_HAVE_EC & (1u << vector)));
+    unsigned int error_code = event->error_code;
 
-    trace_pv_trap(trapnr, regs->eip, use_error_code, regs->error_code);
+    ASSERT(vector == event->vector); /* Confirm no truncation. */
+    if ( use_error_code )
+        ASSERT(error_code != X86_EVENT_NO_EC);
+    else
+        ASSERT(error_code == X86_EVENT_NO_EC);
 
     tb = &v->arch.pv_vcpu.trap_bounce;
-    ti = &v->arch.pv_vcpu.trap_ctxt[trapnr];
+    ti = &v->arch.pv_vcpu.trap_ctxt[vector];
 
     tb->flags = TBF_EXCEPTION;
     tb->cs    = ti->cs;
     tb->eip   = ti->address;
 
+    if ( vector == TRAP_page_fault )
+    {
+        v->arch.pv_vcpu.ctrlreg[2] = event->cr2;
+        arch_set_cr2(v, event->cr2);
+
+        /* Re-set error_code.user flag appropriately for the guest. */
+        error_code &= ~PFEC_user_mode;
+        if ( !guest_kernel_mode(v, regs) )
+            error_code |= PFEC_user_mode;
+
+        trace_pv_page_fault(event->cr2, error_code);
+    }
+    else
+        trace_pv_trap(vector, regs->eip, use_error_code, error_code);
+
     if ( use_error_code )
     {
         tb->flags |= TBF_EXCEPTION_ERRCODE;
-        tb->error_code = regs->error_code;
+        tb->error_code = error_code;
     }
 
     if ( TI_GET_IF(ti) )
         tb->flags |= TBF_INTERRUPT;
 
     if ( unlikely(null_trap_bounce(v, tb)) )
+    {
         gprintk(XENLOG_WARNING,
                 "Unhandled %s fault/trap [#%d, ec=%04x]\n",
-                trapstr(trapnr), trapnr, regs->error_code);
+                trapstr(vector), vector, error_code);
+
+        if ( vector == TRAP_page_fault )
+            show_page_walk(event->cr2);
+    }
+}
+
+static inline void do_guest_trap(unsigned int trapnr,
+                                 const struct cpu_user_regs *regs)
+{
+    const struct x86_event event = {
+        .vector = trapnr,
+        .error_code = (((trapnr < 32) && (TRAP_HAVE_EC & (1u << trapnr)))
+                       ? regs->error_code : X86_EVENT_NO_EC),
+    };
+
+    pv_inject_event(&event);
 }
 
 static void instruction_done(
@@ -1289,7 +1327,7 @@ static int emulate_invalid_rdtscp(struct cpu_user_regs 
*regs)
     eip = regs->eip;
     if ( (rc = copy_from_user(opcode, (char *)eip, sizeof(opcode))) != 0 )
     {
-        propagate_page_fault(eip + sizeof(opcode) - rc, 0);
+        pv_inject_page_fault(0, eip + sizeof(opcode) - rc);
         return EXCRET_fault_fixed;
     }
     if ( memcmp(opcode, "\xf\x1\xf9", sizeof(opcode)) )
@@ -1310,7 +1348,7 @@ static int emulate_forced_invalid_op(struct cpu_user_regs 
*regs)
     /* Check for forced emulation signature: ud2 ; .ascii "xen". */
     if ( (rc = copy_from_user(sig, (char *)eip, sizeof(sig))) != 0 )
     {
-        propagate_page_fault(eip + sizeof(sig) - rc, 0);
+        pv_inject_page_fault(0, eip + sizeof(sig) - rc);
         return EXCRET_fault_fixed;
     }
     if ( memcmp(sig, "\xf\xbxen", sizeof(sig)) )
@@ -1320,7 +1358,7 @@ static int emulate_forced_invalid_op(struct cpu_user_regs 
*regs)
     /* We only emulate CPUID. */
     if ( ( rc = copy_from_user(instr, (char *)eip, sizeof(instr))) != 0 )
     {
-        propagate_page_fault(eip + sizeof(instr) - rc, 0);
+        pv_inject_page_fault(0, eip + sizeof(instr) - rc);
         return EXCRET_fault_fixed;
     }
     if ( memcmp(instr, "\xf\xa2", sizeof(instr)) )
@@ -1487,53 +1525,6 @@ static void reserved_bit_page_fault(
     show_execution_state(regs);
 }
 
-struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code)
-{
-    struct trap_info *ti;
-    struct vcpu *v = current;
-    struct trap_bounce *tb = &v->arch.pv_vcpu.trap_bounce;
-
-    if ( unlikely(!is_canonical_address(addr)) )
-    {
-        ti = &v->arch.pv_vcpu.trap_ctxt[TRAP_gp_fault];
-        tb->flags      = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
-        tb->error_code = 0;
-        tb->cs         = ti->cs;
-        tb->eip        = ti->address;
-        if ( TI_GET_IF(ti) )
-            tb->flags |= TBF_INTERRUPT;
-        return tb;
-    }
-
-    v->arch.pv_vcpu.ctrlreg[2] = addr;
-    arch_set_cr2(v, addr);
-
-    /* Re-set error_code.user flag appropriately for the guest. */
-    error_code &= ~PFEC_user_mode;
-    if ( !guest_kernel_mode(v, guest_cpu_user_regs()) )
-        error_code |= PFEC_user_mode;
-
-    trace_pv_page_fault(addr, error_code);
-
-    ti = &v->arch.pv_vcpu.trap_ctxt[TRAP_page_fault];
-    tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
-    tb->error_code = error_code;
-    tb->cs         = ti->cs;
-    tb->eip        = ti->address;
-    if ( TI_GET_IF(ti) )
-        tb->flags |= TBF_INTERRUPT;
-    if ( unlikely(null_trap_bounce(v, tb)) )
-    {
-        printk("%pv: unhandled page fault (ec=%04X)\n", v, error_code);
-        show_page_walk(addr);
-    }
-
-    if ( unlikely(error_code & PFEC_reserved_bit) )
-        reserved_bit_page_fault(addr, guest_cpu_user_regs());
-
-    return NULL;
-}
-
 static int handle_gdt_ldt_mapping_fault(
     unsigned long offset, struct cpu_user_regs *regs)
 {
@@ -1565,17 +1556,22 @@ static int handle_gdt_ldt_mapping_fault(
         }
         else
         {
-            struct trap_bounce *tb;
-
             /* In hypervisor mode? Leave it to the #PF handler to fix up. */
             if ( !guest_mode(regs) )
                 return 0;
-            /* In guest mode? Propagate fault to guest, with adjusted %cr2. */
-            tb = propagate_page_fault(curr->arch.pv_vcpu.ldt_base + offset,
-                                      regs->error_code);
-            if ( tb )
-                tb->error_code = (offset & ~(X86_XEC_EXT | X86_XEC_IDT)) |
-                                 X86_XEC_TI;
+
+            /* Access would have become non-canonical? Pass #GP[sel] back. */
+            if ( unlikely(!is_canonical_address(
+                              curr->arch.pv_vcpu.ldt_base + offset)) )
+            {
+                uint16_t ec = (offset & ~(X86_XEC_EXT | X86_XEC_IDT)) | 
X86_XEC_TI;
+
+                pv_inject_hw_exception(TRAP_gp_fault, ec);
+            }
+            else
+                /* else pass the #PF back, with adjusted %cr2. */
+                pv_inject_page_fault(regs->error_code,
+                                     curr->arch.pv_vcpu.ldt_base + offset);
         }
     }
     else
@@ -1858,7 +1854,10 @@ void do_page_fault(struct cpu_user_regs *regs)
             return;
     }
 
-    propagate_page_fault(addr, regs->error_code);
+    if ( unlikely(regs->error_code & PFEC_reserved_bit) )
+        reserved_bit_page_fault(addr, regs);
+
+    pv_inject_page_fault(regs->error_code, addr);
 }
 
 /*
@@ -2788,7 +2787,7 @@ int pv_emul_cpuid(unsigned int *eax, unsigned int *ebx, 
unsigned int *ecx,
         goto fail;                                                          \
     if ( (_rc = copy_from_user(&_x, (type *)_ptr, sizeof(_x))) != 0 )       \
     {                                                                       \
-        propagate_page_fault(_ptr + sizeof(_x) - _rc, 0);                   \
+        pv_inject_page_fault(0, _ptr + sizeof(_x) - _rc);                   \
         goto skip;                                                          \
     }                                                                       \
     (eip) += sizeof(_x); _x; })
@@ -2953,8 +2952,8 @@ static int emulate_privileged_op(struct cpu_user_regs 
*regs)
             if ( (rc = copy_to_user((void *)data_base + rd_ad(edi),
                                     &data, op_bytes)) != 0 )
             {
-                propagate_page_fault(data_base + rd_ad(edi) + op_bytes - rc,
-                                     PFEC_write_access);
+                pv_inject_page_fault(PFEC_write_access,
+                                     data_base + rd_ad(edi) + op_bytes - rc);
                 return EXCRET_fault_fixed;
             }
             wr_ad(edi, regs->edi + (int)((regs->eflags & X86_EFLAGS_DF)
@@ -2971,8 +2970,8 @@ static int emulate_privileged_op(struct cpu_user_regs 
*regs)
             if ( (rc = copy_from_user(&data, (void *)data_base + rd_ad(esi),
                                       op_bytes)) != 0 )
             {
-                propagate_page_fault(data_base + rd_ad(esi)
-                                     + op_bytes - rc, 0);
+                pv_inject_page_fault(0, data_base + rd_ad(esi)
+                                     + op_bytes - rc);
                 return EXCRET_fault_fixed;
             }
             guest_io_write(port, op_bytes, data, currd);
@@ -3529,8 +3528,8 @@ static void emulate_gate_op(struct cpu_user_regs *regs)
             rc = __put_user(item, stkp); \
             if ( rc ) \
             { \
-                propagate_page_fault((unsigned long)(stkp + 1) - rc, \
-                                     PFEC_write_access); \
+                pv_inject_page_fault(PFEC_write_access, \
+                                     (unsigned long)(stkp + 1) - rc); \
                 return; \
             } \
         } while ( 0 )
@@ -3597,7 +3596,7 @@ static void emulate_gate_op(struct cpu_user_regs *regs)
                     rc = __get_user(parm, ustkp);
                     if ( rc )
                     {
-                        propagate_page_fault((unsigned long)(ustkp + 1) - rc, 
0);
+                        pv_inject_page_fault(0, (unsigned long)(ustkp + 1) - 
rc);
                         return;
                     }
                     push(parm);
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index f6a40eb..39cc658 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -8,6 +8,7 @@
 #include <asm/hvm/domain.h>
 #include <asm/e820.h>
 #include <asm/mce.h>
+#include <asm/x86_emulate.h>
 #include <public/vcpu.h>
 #include <public/hvm/hvm_info_table.h>
 
@@ -632,6 +633,31 @@ static inline void free_vcpu_guest_context(struct 
vcpu_guest_context *vgc)
 struct vcpu_hvm_context;
 int arch_set_info_hvm_guest(struct vcpu *v, const struct vcpu_hvm_context 
*ctx);
 
+void pv_inject_event(const struct x86_event *event);
+
+static inline void pv_inject_hw_exception(unsigned int vector, int errcode)
+{
+    const struct x86_event event = {
+        .vector = vector,
+        .type = X86_EVENTTYPE_HW_EXCEPTION,
+        .error_code = errcode,
+    };
+
+    pv_inject_event(&event);
+}
+
+static inline void pv_inject_page_fault(int errcode, unsigned long cr2)
+{
+    const struct x86_event event = {
+        .vector = TRAP_page_fault,
+        .type = X86_EVENTTYPE_HW_EXCEPTION,
+        .error_code = errcode,
+        .cr2 = cr2,
+    };
+
+    pv_inject_event(&event);
+}
+
 #endif /* __ASM_DOMAIN_H__ */
 
 /*
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 1b4d1c3..a15029c 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -539,7 +539,6 @@ int new_guest_cr3(unsigned long pfn);
 void make_cr3(struct vcpu *v, unsigned long mfn);
 void update_cr3(struct vcpu *v);
 int vcpu_destroy_pagetables(struct vcpu *);
-struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code);
 void *do_page_walk(struct vcpu *v, unsigned long addr);
 
 int __sync_local_execstate(void);
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.