[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86: emulate I/O port access breakpoints



Emulate the trapping on I/O port accesses when emulating IN/OUT.

Also allow 8-byte breakpoints on x86-64 (and on i686 if the hardware
supports them), and tighten the condition for loading debug registers
during context switch.

This patch depends on the prior single step injection patch.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: 2007-11-13/xen/arch/x86/domain.c
===================================================================
--- 2007-11-13.orig/xen/arch/x86/domain.c       2007-11-12 08:47:42.000000000 
+0100
+++ 2007-11-13/xen/arch/x86/domain.c    2007-11-21 09:44:48.000000000 +0100
@@ -42,6 +42,7 @@
 #include <asm/hypercall.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
+#include <asm/debugreg.h>
 #include <asm/msr.h>
 #include <asm/nmi.h>
 #include <asm/iommu.h>
@@ -1219,7 +1220,7 @@ static void paravirt_ctxt_switch_from(st
      * inside Xen, before we get a chance to reload DR7, and this cannot always
      * safely be handled.
      */
-    if ( unlikely(v->arch.guest_context.debugreg[7]) )
+    if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
         write_debugreg(7, 0);
 }
 
@@ -1234,7 +1235,7 @@ static void paravirt_ctxt_switch_to(stru
     if ( unlikely(cr4 != read_cr4()) )
         write_cr4(cr4);
 
-    if ( unlikely(v->arch.guest_context.debugreg[7]) )
+    if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
     {
         write_debugreg(0, v->arch.guest_context.debugreg[0]);
         write_debugreg(1, v->arch.guest_context.debugreg[1]);
Index: 2007-11-13/xen/arch/x86/domctl.c
===================================================================
--- 2007-11-13.orig/xen/arch/x86/domctl.c       2007-11-12 08:47:42.000000000 
+0100
+++ 2007-11-13/xen/arch/x86/domctl.c    2007-11-21 12:37:04.000000000 +0100
@@ -825,12 +825,18 @@ void arch_get_info_guest(struct vcpu *v,
                 c.nat->ctrlreg[1] = xen_pfn_to_cr3(
                     pagetable_get_pfn(v->arch.guest_table_user));
 #endif
+
+            c.nat->debugreg[7] |= c.nat->debugreg[5];
+            c.nat->debugreg[5] = 0;
         }
 #ifdef CONFIG_COMPAT
         else
         {
             l4_pgentry_t *l4e = __va(pagetable_get_paddr(v->arch.guest_table));
             c.cmp->ctrlreg[3] = compat_pfn_to_cr3(l4e_get_pfn(*l4e));
+
+            c.cmp->debugreg[7] |= c.cmp->debugreg[5];
+            c.cmp->debugreg[5] = 0;
         }
 #endif
 
Index: 2007-11-13/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- 2007-11-13.orig/xen/arch/x86/hvm/svm/svm.c  2007-11-20 16:46:55.000000000 
+0100
+++ 2007-11-13/xen/arch/x86/hvm/svm/svm.c       2007-11-21 09:47:01.000000000 
+0100
@@ -34,6 +34,7 @@
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
 #include <asm/types.h>
+#include <asm/debugreg.h>
 #include <asm/msr.h>
 #include <asm/spinlock.h>
 #include <asm/hvm/hvm.h>
@@ -189,8 +190,6 @@ static void __restore_debug_registers(st
  * if one of the breakpoints is enabled.  So mask out all bits that don't
  * enable some breakpoint functionality.
  */
-#define DR7_ACTIVE_MASK 0xff
-
 static void svm_restore_dr(struct vcpu *v)
 {
     if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
Index: 2007-11-13/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- 2007-11-13.orig/xen/arch/x86/hvm/vmx/vmx.c  2007-11-20 16:32:57.000000000 
+0100
+++ 2007-11-13/xen/arch/x86/hvm/vmx/vmx.c       2007-11-21 09:47:07.000000000 
+0100
@@ -33,6 +33,7 @@
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
 #include <asm/types.h>
+#include <asm/debugreg.h>
 #include <asm/msr.h>
 #include <asm/spinlock.h>
 #include <asm/paging.h>
@@ -434,8 +435,6 @@ static void __restore_debug_registers(st
  * if one of the breakpoints is enabled.  So mask out all bits that don't
  * enable some breakpoint functionality.
  */
-#define DR7_ACTIVE_MASK 0xff
-
 static void vmx_restore_dr(struct vcpu *v)
 {
     /* NB. __vmread() is not usable here, so we cannot read from the VMCS. */
Index: 2007-11-13/xen/arch/x86/traps.c
===================================================================
--- 2007-11-13.orig/xen/arch/x86/traps.c        2007-11-20 15:46:19.000000000 
+0100
+++ 2007-11-13/xen/arch/x86/traps.c     2007-11-21 11:29:05.000000000 +0100
@@ -412,17 +412,51 @@ static int do_guest_trap(
     return 0;
 }
 
-static void instruction_done(struct cpu_user_regs *regs, unsigned long eip)
+static void instruction_done(struct cpu_user_regs *regs,
+    unsigned long eip, unsigned int bpmatch)
 {
     regs->eip = eip;
     regs->eflags &= ~X86_EFLAGS_RF;
-    if ( regs->eflags & X86_EFLAGS_TF )
+    if ( bpmatch || (regs->eflags & X86_EFLAGS_TF) )
     {
-        current->arch.guest_context.debugreg[6] |= 0xffff4ff0;
+        current->arch.guest_context.debugreg[6] |= bpmatch | 0xffff0ff0;
+        if ( regs->eflags & X86_EFLAGS_TF )
+            current->arch.guest_context.debugreg[6] |= 0x4000;
         do_guest_trap(TRAP_debug, regs, 0);
     }
 }
 
+static unsigned int check_guest_io_breakpoint(struct vcpu *v,
+    unsigned int port, unsigned int len)
+{
+    unsigned int match = 0;
+
+    if ( unlikely(v->arch.guest_context.ctrlreg[4] & X86_CR4_DE) )
+    {
+        unsigned int i;
+
+        for ( i = 0; i < 4; ++i )
+            if ( v->arch.guest_context.debugreg[5] &
+                 (3 << (i * DR_ENABLE_SIZE)) )
+            {
+               unsigned long start = v->arch.guest_context.debugreg[i];
+               unsigned int width = 0;
+
+               switch ( (v->arch.guest_context.debugreg[7] >>
+                         (DR_CONTROL_SHIFT + i * DR_CONTROL_SIZE)) & 0xc )
+               {
+               case DR_LEN_1: width = 1; break;
+               case DR_LEN_2: width = 2; break;
+               case DR_LEN_4: width = 4; break;
+               case DR_LEN_8: width = 8; break;
+               }
+               if ( start < port + len && start + width > port )
+                   match |= 1 << i;
+            }
+    }
+    return match;
+}
+
 /*
  * Called from asm to set up the NMI trapbounce info.
  * Returns 0 if no callback is set up, else 1.
@@ -639,7 +673,6 @@ static int emulate_forced_invalid_op(str
     {
         /* Modify Feature Information. */
         clear_bit(X86_FEATURE_VME, &d);
-        clear_bit(X86_FEATURE_DE,  &d);
         clear_bit(X86_FEATURE_PSE, &d);
         clear_bit(X86_FEATURE_PGE, &d);
         if ( !cpu_has_sep )
@@ -668,7 +701,7 @@ static int emulate_forced_invalid_op(str
     regs->ebx = b;
     regs->ecx = c;
     regs->edx = d;
-    instruction_done(regs, eip);
+    instruction_done(regs, eip, 0);
 
     trace_trap_one_addr(TRC_PV_FORCED_INVALID_OP, regs->eip);
 
@@ -1325,7 +1358,7 @@ static int emulate_privileged_op(struct 
     unsigned long *reg, eip = regs->eip, res;
     u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0, lock = 0, rex = 0;
     enum { lm_seg_none, lm_seg_fs, lm_seg_gs } lm_ovr = lm_seg_none;
-    unsigned int port, i, data_sel, ar, data, rc;
+    unsigned int port, i, data_sel, ar, data, rc, bpmatch = 0;
     unsigned int op_bytes, op_default, ad_bytes, ad_default;
 #define rd_ad(reg) (ad_bytes >= sizeof(regs->reg) \
                     ? regs->reg \
@@ -1475,6 +1508,8 @@ static int emulate_privileged_op(struct 
         }
 #endif
 
+        port = (u16)regs->edx;
+
     continue_io_string:
         switch ( opcode )
         {
@@ -1483,9 +1518,8 @@ static int emulate_privileged_op(struct 
         case 0x6d: /* INSW/INSL */
             if ( data_limit < op_bytes - 1 ||
                  rd_ad(edi) > data_limit - (op_bytes - 1) ||
-                 !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
+                 !guest_io_okay(port, op_bytes, v, regs) )
                 goto fail;
-            port = (u16)regs->edx;
             switch ( op_bytes )
             {
             case 1:
@@ -1515,7 +1549,7 @@ static int emulate_privileged_op(struct 
         case 0x6f: /* OUTSW/OUTSL */
             if ( data_limit < op_bytes - 1 ||
                  rd_ad(esi) > data_limit - (op_bytes - 1) ||
-                 !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
+                 !guest_io_okay(port, op_bytes, v, regs) )
                 goto fail;
             rc = copy_from_user(&data, (void *)data_base + rd_ad(esi), 
op_bytes);
             if ( rc != 0 )
@@ -1523,7 +1557,6 @@ static int emulate_privileged_op(struct 
                 propagate_page_fault(data_base + rd_ad(esi) + op_bytes - rc, 
0);
                 return EXCRET_fault_fixed;
             }
-            port = (u16)regs->edx;
             switch ( op_bytes )
             {
             case 1:
@@ -1549,9 +1582,11 @@ static int emulate_privileged_op(struct 
             break;
         }
 
+        bpmatch = check_guest_io_breakpoint(v, port, op_bytes);
+
         if ( rep_prefix && (wr_ad(ecx, regs->ecx - 1) != 0) )
         {
-            if ( !hypercall_preempt_check() )
+            if ( !bpmatch && !hypercall_preempt_check() )
                 goto continue_io_string;
             eip = regs->eip;
         }
@@ -1630,6 +1665,7 @@ static int emulate_privileged_op(struct 
                 regs->eax = (u32)~0;
             break;
         }
+        bpmatch = check_guest_io_breakpoint(v, port, op_bytes);
         goto done;
 
     case 0xec: /* IN %dx,%al */
@@ -1667,6 +1703,7 @@ static int emulate_privileged_op(struct 
                 io_emul(regs);
             break;
         }
+        bpmatch = check_guest_io_breakpoint(v, port, op_bytes);
         goto done;
 
     case 0xee: /* OUT %al,%dx */
@@ -1960,7 +1997,7 @@ static int emulate_privileged_op(struct 
 #undef rd_ad
 
  done:
-    instruction_done(regs, eip);
+    instruction_done(regs, eip, bpmatch);
     return EXCRET_fault_fixed;
 
  fail:
@@ -2330,7 +2367,7 @@ static int emulate_gate_op(struct cpu_u
         sel |= (regs->cs & 3);
 
     regs->cs = sel;
-    instruction_done(regs, off);
+    instruction_done(regs, off, 0);
 #endif
 
     return 0;
@@ -2842,25 +2879,44 @@ long set_debugreg(struct vcpu *v, int re
         /*
          * DR7: Bit 10 reserved (set to 1).
          *      Bits 11-12,14-15 reserved (set to 0).
+         */
+        value &= ~DR_CONTROL_RESERVED_ZERO; /* reserved bits => 0 */
+        value |=  DR_CONTROL_RESERVED_ONE;  /* reserved bits => 1 */
+        /*
          * Privileged bits:
          *      GD (bit 13): must be 0.
-         *      R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
-         *      LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
          */
-        /* DR7 == 0 => debugging disabled for this domain. */
-        if ( value != 0 )
+        if ( value & DR_GENERAL_DETECT )
+            return -EPERM;
+        /* DR7.{G,L}E = 0 => debugging disabled for this domain. */
+        if ( value & DR7_ACTIVE_MASK )
         {
-            value &= 0xffff27ff; /* reserved bits => 0 */
-            value |= 0x00000400; /* reserved bits => 1 */
-            if ( (value & (1<<13)) != 0 ) return -EPERM;
-            for ( i = 0; i < 16; i += 2 )
-                if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
+            unsigned int io_enable = 0;
+
+            for ( i = DR_CONTROL_SHIFT; i < 32; i += DR_CONTROL_SIZE )
+            {
+                if ( ((value >> i) & 3) == DR_IO )
+                {
+                    if ( !(v->arch.guest_context.ctrlreg[4] & X86_CR4_DE) )
+                        return -EPERM;
+                    io_enable |= value & (3 << ((i - 16) >> 1));
+                }
+#ifdef __i386__
+                if ( (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+                      !boot_cpu_has(X86_FEATURE_LM)) &&
+                     ((value >> i) & 0xc) == DR_LEN_8 )
+                    return -EPERM;
+#endif
+            }
+            v->arch.guest_context.debugreg[5] = io_enable;
+            value &= ~io_enable;
             /*
              * If DR7 was previously clear then we need to load all other
              * debug registers at this point as they were not restored during
              * context switch.
              */
-            if ( (v == curr) && (v->arch.guest_context.debugreg[7] == 0) )
+            if ( (v == curr) &&
+                 !(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
             {
                 write_debugreg(0, v->arch.guest_context.debugreg[0]);
                 write_debugreg(1, v->arch.guest_context.debugreg[1]);
@@ -2868,9 +2924,9 @@ long set_debugreg(struct vcpu *v, int re
                 write_debugreg(3, v->arch.guest_context.debugreg[3]);
                 write_debugreg(6, v->arch.guest_context.debugreg[6]);
             }
+            if ( v == curr )
+                write_debugreg(7, value);
         }
-        if ( v == curr ) 
-            write_debugreg(7, value);
         break;
     default:
         return -EINVAL;
@@ -2887,8 +2943,19 @@ long do_set_debugreg(int reg, unsigned l
 
 unsigned long do_get_debugreg(int reg)
 {
-    if ( (reg < 0) || (reg > 7) ) return -EINVAL;
-    return current->arch.guest_context.debugreg[reg];
+    switch ( reg )
+    {
+    case 0 ... 3:
+    case 6:
+        return current->arch.guest_context.debugreg[reg];
+    case 7:
+        return current->arch.guest_context.debugreg[7] |
+               current->arch.guest_context.debugreg[5];
+    case 4 ... 5:
+        return current->arch.guest_context.ctrlreg[4] & X86_CR4_DE ?
+               current->arch.guest_context.debugreg[reg + 2] : 0;
+    }
+    return -EINVAL;
 }
 
 /*
Index: 2007-11-13/xen/include/asm-x86/debugreg.h
===================================================================
--- 2007-11-13.orig/xen/include/asm-x86/debugreg.h      2005-11-17 
15:51:06.000000000 +0100
+++ 2007-11-13/xen/include/asm-x86/debugreg.h   2007-11-21 09:39:32.000000000 
+0100
@@ -33,11 +33,13 @@
 
 #define DR_RW_EXECUTE (0x0)   /* Settings for the access types to trap on */
 #define DR_RW_WRITE (0x1)
+#define DR_IO (0x2)
 #define DR_RW_READ (0x3)
 
 #define DR_LEN_1 (0x0) /* Settings for data length to trap on */
 #define DR_LEN_2 (0x4)
 #define DR_LEN_4 (0xC)
+#define DR_LEN_8 (0x8)
 
 /* The low byte to the control register determine which registers are
    enabled.  There are 4 fields of two bits.  One bit is "local", meaning
@@ -53,12 +55,16 @@
 #define DR_LOCAL_ENABLE_MASK (0x55)  /* Set  local bits for all 4 regs */
 #define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
 
+#define DR7_ACTIVE_MASK (DR_LOCAL_ENABLE_MASK|DR_GLOBAL_ENABLE_MASK)
+
 /* The second byte to the control register has a few special things.
    We can slow the instruction pipeline for instructions coming via the
    gdt or the ldt if we want to.  I am not sure why this is an advantage */
 
-#define DR_CONTROL_RESERVED (~0xFFFF03FFUL) /* Reserved by Intel */
-#define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
-#define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
+#define DR_CONTROL_RESERVED_ZERO (~0xFFFF23FFUL) /* Reserved, read as zero */
+#define DR_CONTROL_RESERVED_ONE  ( 0x00000400  ) /* Reserved, read as one */
+#define DR_LOCAL_EXACT_ENABLE    ( 0x00000100  ) /* Local exact enable */
+#define DR_GLOBAL_EXACT_ENABLE   ( 0x00000200  ) /* Global exact enable */
+#define DR_GENERAL_DETECT        ( 0x00002000  ) /* General detect enable */
 
 #endif /* _X86_DEBUGREG_H */



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.