[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 RFC 2/4] x86/PV: support data breakpoint extension registers



Introducing an extension to XEN_DOMCTL_[gs]et_ext_vcpucontext similar
to the generic MSR save/restore logic recently added for HVM.

This also moves some debug register related declarations/definition to
the header intended for these.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v2: libxc adjustment put in place (depending on the separately posted
    http://lists.xenproject.org/archives/html/xen-devel/2014-03/msg03059.html)

--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -590,8 +590,13 @@ static int buffer_tail_pv(xc_interface *
                           uint32_t vcpuextstate_size)
 {
     unsigned int i;
-    size_t pfnlen, vcpulen;
+    size_t pfnlen, vcpulen, total;
+    int alloc = 0;
     struct domain_info_context *dinfo = &ctx->dinfo;
+    union {
+        const unsigned char *raw;
+        const xen_domctl_ext_vcpucontext_t *evc;
+    } ptr;
 
     /* TODO: handle changing pfntab and vcpu counts */
     /* PFN tab */
@@ -634,11 +639,36 @@ static int buffer_tail_pv(xc_interface *
             ERROR("Error allocating VCPU ctxt tail buffer");
             goto free_pfntab;
         }
+        alloc = 1;
     }
     // DPRINTF("Reading VCPUS: %d bytes\n", vcpulen);
-    if ( RDEXACT(fd, buf->vcpubuf, vcpulen) ) {
-        PERROR("Error when reading ctxt");
-        goto free_vcpus;
+    for (total = i = 0, ptr.raw = buf->vcpubuf; ext_vcpucontext; ) {
+        if ( RDEXACT(fd, buf->vcpubuf + total, vcpulen) ) {
+            PERROR("Error when reading ctxt");
+            goto free_vcpus;
+        }
+        total += vcpulen;
+        for (vcpulen = 0; i < buf->vcpucount; ++i) {
+            size_t msrlen;
+
+            if ((const unsigned char *)(ptr.evc + 1) > buf->vcpubuf + total)
+                break;
+            msrlen = ptr.evc->msr_count * sizeof(xen_domctl_ext_vcpu_msr_t);
+            vcpulen += msrlen;
+            ptr.raw += 128 + msrlen + vcpuextstate_size;
+        }
+        if (!vcpulen)
+            break;
+        if (alloc) {
+            void *nbuf = realloc(buf->vcpubuf, total + vcpulen);
+
+            if (!nbuf) {
+                ERROR("Error growing VCPU ctxt tail buffer");
+                goto free_vcpus;
+            }
+            ptr.raw = nbuf + (ptr.raw - buf->vcpubuf);
+            buf->vcpubuf = nbuf;
+        }
     }
 
     /* load shared_info_page */
@@ -1996,6 +2026,8 @@ int xc_domain_restore(xc_interface *xch,
     vcpup = tailbuf.u.pv.vcpubuf;
     for ( i = 0; i <= max_vcpu_id; i++ )
     {
+        DECLARE_HYPERCALL_BUFFER(xen_domctl_ext_vcpu_msr_t, msrs);
+
         if ( !(vcpumap[i/64] & (1ULL << (i%64))) )
             continue;
 
@@ -2130,9 +2162,25 @@ int xc_domain_restore(xc_interface *xch,
             goto vcpu_ext_state_restore;
         memcpy(&domctl.u.ext_vcpucontext, vcpup, 128);
         vcpup += 128;
+        if ( domctl.u.ext_vcpucontext.msr_count )
+        {
+            size_t sz = domctl.u.ext_vcpucontext.msr_count * sizeof(*msrs);
+
+            msrs = xc_hypercall_buffer_alloc(xch, msrs, sz);
+            if ( !msrs )
+            {
+                PERROR("No memory for vcpu%d MSRs", i);
+                goto out;
+            }
+            memcpy(msrs, vcpup, sz);
+            vcpup += sz;
+            set_xen_guest_handle(domctl.u.ext_vcpucontext.msrs, msrs);
+        }
         domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
         domctl.domain = dom;
         frc = xc_domctl(xch, &domctl);
+        if ( msrs )
+            xc_hypercall_buffer_free(xch, msrs);
         if ( frc != 0 )
         {
             PERROR("Couldn't set extended vcpu%d info", i);
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -836,6 +836,9 @@ int xc_domain_save(xc_interface *xch, in
     /* base of the region in which domain memory is mapped */
     unsigned char *region_base = NULL;
 
+    /* MSR extensions to xen_domctl_ext_vcpucontext_t */
+    DECLARE_HYPERCALL_BUFFER(xen_domctl_ext_vcpu_msr_t, msrs);
+
     /* A copy of the CPU eXtended States of the guest. */
     DECLARE_HYPERCALL_BUFFER(void, buffer);
 
@@ -1960,16 +1963,36 @@ int xc_domain_save(xc_interface *xch, in
         domctl.domain = dom;
         memset(&domctl.u, 0, sizeof(domctl.u));
         domctl.u.ext_vcpucontext.vcpu = i;
-        if ( xc_domctl(xch, &domctl) < 0 )
+        frc = xc_domctl(xch, &domctl);
+        if ( frc < 0 && errno == ENOBUFS && domctl.u.ext_vcpucontext.msr_count 
)
+        {
+            msrs = xc_hypercall_buffer_alloc(xch, msrs,
+                                             
domctl.u.ext_vcpucontext.msr_count *
+                                             sizeof(*msrs));
+            set_xen_guest_handle(domctl.u.ext_vcpucontext.msrs, msrs);
+            frc = msrs ? xc_domctl(xch, &domctl) : -1;
+        }
+        if ( frc < 0 )
         {
             PERROR("No extended context for VCPU%d", i);
             goto out;
         }
         if ( wrexact(io_fd, &domctl.u.ext_vcpucontext, 128) )
         {
-            PERROR("Error when writing to state file (2)");
+            PERROR("Error when writing to state file (ext ctxt)");
             goto out;
         }
+        if ( msrs )
+        {
+            if ( wrexact(io_fd, msrs,
+                         domctl.u.ext_vcpucontext.msr_count * sizeof(*msrs)) )
+            {
+                PERROR("Error when writing to state file (MSRs)");
+                goto out;
+            }
+            xc_hypercall_buffer_free(xch, msrs);
+            msrs = NULL;
+        }
 
         /* Start to fetch CPU eXtended States */
         /* Get buffer size first */
@@ -2134,6 +2157,8 @@ int xc_domain_save(xc_interface *xch, in
 
     xc_hypercall_buffer_free_pages(xch, to_send, 
NRPAGES(bitmap_size(dinfo->p2m_size)));
     xc_hypercall_buffer_free_pages(xch, to_skip, 
NRPAGES(bitmap_size(dinfo->p2m_size)));
+    if (msrs)
+        xc_hypercall_buffer_free(xch, msrs);
 
     free(pfn_type);
     free(pfn_batch);
--- a/xen/arch/x86/acpi/suspend.c
+++ b/xen/arch/x86/acpi/suspend.c
@@ -9,6 +9,7 @@
 #include <xen/smp.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
+#include <asm/debugreg.h>
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1316,14 +1316,7 @@ static void paravirt_ctxt_switch_to(stru
         write_cr4(cr4);
 
     if ( unlikely(v->arch.debugreg[7] & DR7_ACTIVE_MASK) )
-    {
-        write_debugreg(0, v->arch.debugreg[0]);
-        write_debugreg(1, v->arch.debugreg[1]);
-        write_debugreg(2, v->arch.debugreg[2]);
-        write_debugreg(3, v->arch.debugreg[3]);
-        write_debugreg(6, v->arch.debugreg[6]);
-        write_debugreg(7, v->arch.debugreg[7]);
-    }
+        activate_debugregs(v);
 
     if ( (v->domain->arch.tsc_mode ==  TSC_MODE_PVRDTSCP) &&
          boot_cpu_has(X86_FEATURE_RDTSCP) )
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -52,6 +52,7 @@ long arch_do_domctl(
 {
     long ret = 0;
     bool_t copyback = 0;
+    unsigned long i;
 
     switch ( domctl->cmd )
     {
@@ -319,7 +320,6 @@ long arch_do_domctl(
 
     case XEN_DOMCTL_getmemlist:
     {
-        int i;
         unsigned long max_pfns = domctl->u.getmemlist.max_pfns;
         uint64_t mfn;
         struct page_info *page;
@@ -645,7 +645,6 @@ long arch_do_domctl(
         unsigned long mfn = domctl->u.memory_mapping.first_mfn;
         unsigned long nr_mfns = domctl->u.memory_mapping.nr_mfns;
         int add = domctl->u.memory_mapping.add_mapping;
-        unsigned long i;
 
         ret = -EINVAL;
         if ( (mfn + nr_mfns - 1) < mfn || /* wrap? */
@@ -809,6 +808,7 @@ long arch_do_domctl(
     {
         struct xen_domctl_ext_vcpucontext *evc;
         struct vcpu *v;
+        struct xen_domctl_ext_vcpu_msr msr;
 
         evc = &domctl->u.ext_vcpucontext;
 
@@ -854,7 +854,42 @@ long arch_do_domctl(
             evc->vmce.mci_ctl2_bank0 = v->arch.vmce.bank[0].mci_ctl2;
             evc->vmce.mci_ctl2_bank1 = v->arch.vmce.bank[1].mci_ctl2;
 
-            ret = 0;
+            i = ret = 0;
+            if ( boot_cpu_has(X86_FEATURE_DBEXT) )
+            {
+                unsigned int j;
+
+                if ( v->arch.pv_vcpu.dr_mask[0] )
+                {
+                    if ( i < evc->msr_count && !ret )
+                    {
+                        msr.index = MSR_AMD64_DR0_ADDRESS_MASK;
+                        msr.reserved = 0;
+                        msr.value = v->arch.pv_vcpu.dr_mask[0];
+                        if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) )
+                            ret = -EFAULT;
+                    }
+                    ++i;
+                }
+                for ( j = 0; j < 3; ++j )
+                {
+                    if ( !v->arch.pv_vcpu.dr_mask[1 + j] )
+                        continue;
+                    if ( i < evc->msr_count && !ret )
+                    {
+                        msr.index = MSR_AMD64_DR1_ADDRESS_MASK + j;
+                        msr.reserved = 0;
+                        msr.value = v->arch.pv_vcpu.dr_mask[1 + j];
+                        if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) )
+                            ret = -EFAULT;
+                    }
+                    ++i;
+                }
+            }
+            if ( i > evc->msr_count && !ret )
+                ret = -ENOBUFS;
+            evc->msr_count = i;
+
             vcpu_unpause(v);
             copyback = 1;
         }
@@ -909,9 +944,49 @@ long arch_do_domctl(
 
                 ret = vmce_restore_vcpu(v, &vmce);
             }
+            else if ( evc->size > offsetof(typeof(*evc), vmce) )
+                ret = -EINVAL;
             else
                 ret = 0;
 
+            if ( ret || evc->size <= offsetof(typeof(*evc), msrs) )
+                /* nothing */;
+            else if ( evc->size < offsetof(typeof(*evc), msrs) +
+                                  sizeof(evc->msrs) )
+                ret = -EINVAL;
+            else
+            {
+                for ( i = 0; i < evc->msr_count; ++i )
+                {
+                    ret = -EFAULT;
+                    if ( copy_from_guest_offset(&msr, evc->msrs, i, 1) )
+                        break;
+                    ret = -EINVAL;
+                    if ( msr.reserved )
+                        break;
+                    switch ( msr.index )
+                    {
+                    case MSR_AMD64_DR0_ADDRESS_MASK:
+                        if ( !boot_cpu_has(X86_FEATURE_DBEXT) ||
+                             (msr.value >> 32) )
+                            break;
+                        v->arch.pv_vcpu.dr_mask[0] = msr.value;
+                        continue;
+                    case MSR_AMD64_DR1_ADDRESS_MASK ...
+                         MSR_AMD64_DR3_ADDRESS_MASK:
+                        if ( !boot_cpu_has(X86_FEATURE_DBEXT) ||
+                             (msr.value >> 32) )
+                            break;
+                        msr.index -= MSR_AMD64_DR1_ADDRESS_MASK - 1;
+                        v->arch.pv_vcpu.dr_mask[msr.index] = msr.value;
+                        continue;
+                    }
+                    break;
+                }
+                if ( i == evc->msr_count )
+                    ret = 0;
+            }
+
             domain_unpause(d);
         }
     }
@@ -921,7 +996,6 @@ long arch_do_domctl(
     {
         xen_domctl_cpuid_t *ctl = &domctl->u.cpuid;
         cpuid_input_t *cpuid = NULL; 
-        int i;
 
         for ( i = 0; i < MAX_CPUID_INPUT; i++ )
         {
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2498,6 +2498,23 @@ static int emulate_privileged_op(struct 
             if ( wrmsr_safe(regs->ecx, msr_content) != 0 )
                 goto fail;
             break;
+
+        case MSR_AMD64_DR0_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) || (msr_content >> 32) )
+                goto fail;
+            v->arch.pv_vcpu.dr_mask[0] = msr_content;
+            if ( v->arch.debugreg[7] & DR7_ACTIVE_MASK )
+                wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, msr_content);
+            break;
+        case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) || (msr_content >> 32) )
+                goto fail;
+            v->arch.pv_vcpu.dr_mask
+                [regs->_ecx - MSR_AMD64_DR1_ADDRESS_MASK + 1] = msr_content;
+            if ( v->arch.debugreg[7] & DR7_ACTIVE_MASK )
+                wrmsrl(regs->_ecx, msr_content);
+            break;
+
         default:
             if ( wrmsr_hypervisor_regs(regs->ecx, msr_content) == 1 )
                 break;
@@ -2585,6 +2602,21 @@ static int emulate_privileged_op(struct 
             regs->eax = (uint32_t)msr_content;
             regs->edx = (uint32_t)(msr_content >> 32);
             break;
+
+        case MSR_AMD64_DR0_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+                goto fail;
+            regs->eax = v->arch.pv_vcpu.dr_mask[0];
+            regs->edx = 0;
+            break;
+        case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+                goto fail;
+            regs->eax = v->arch.pv_vcpu.dr_mask
+                            [regs->_ecx - MSR_AMD64_DR1_ADDRESS_MASK + 1];
+            regs->edx = 0;
+            break;
+
         default:
             if ( rdmsr_hypervisor_regs(regs->ecx, &val) )
             {
@@ -3628,7 +3660,27 @@ long do_set_trap_table(XEN_GUEST_HANDLE_
     return rc;
 }
 
-long set_debugreg(struct vcpu *v, int reg, unsigned long value)
+void activate_debugregs(const struct vcpu *curr)
+{
+    ASSERT(curr == current);
+
+    write_debugreg(0, curr->arch.debugreg[0]);
+    write_debugreg(1, curr->arch.debugreg[1]);
+    write_debugreg(2, curr->arch.debugreg[2]);
+    write_debugreg(3, curr->arch.debugreg[3]);
+    write_debugreg(6, curr->arch.debugreg[6]);
+    write_debugreg(7, curr->arch.debugreg[7]);
+
+    if ( boot_cpu_has(X86_FEATURE_DBEXT) )
+    {
+        wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[0]);
+        wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[1]);
+        wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[2]);
+        wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[3]);
+    }
+}
+
+long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
 {
     int i;
     struct vcpu *curr = current;
@@ -3709,11 +3761,8 @@ long set_debugreg(struct vcpu *v, int re
             if ( (v == curr) &&
                  !(v->arch.debugreg[7] & DR7_ACTIVE_MASK) )
             {
-                write_debugreg(0, v->arch.debugreg[0]);
-                write_debugreg(1, v->arch.debugreg[1]);
-                write_debugreg(2, v->arch.debugreg[2]);
-                write_debugreg(3, v->arch.debugreg[3]);
-                write_debugreg(6, v->arch.debugreg[6]);
+                activate_debugregs(curr);
+                break;
             }
         }
         if ( v == curr )
--- a/xen/include/asm-x86/debugreg.h
+++ b/xen/include/asm-x86/debugreg.h
@@ -64,4 +64,16 @@
 #define DR_GLOBAL_EXACT_ENABLE   (0x00000200ul) /* Global exact enable */
 #define DR_GENERAL_DETECT        (0x00002000ul) /* General detect enable */
 
+#define write_debugreg(reg, val) do {                       \
+    unsigned long __val = val;                              \
+    asm volatile ( "mov %0,%%db" #reg : : "r" (__val) );    \
+} while (0)
+#define read_debugreg(reg) ({                               \
+    unsigned long __val;                                    \
+    asm volatile ( "mov %%db" #reg ",%0" : "=r" (__val) );  \
+    __val;                                                  \
+})
+long set_debugreg(struct vcpu *, unsigned int reg, unsigned long value);
+void activate_debugregs(const struct vcpu *);
+
 #endif /* _X86_DEBUGREG_H */
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -374,6 +374,9 @@ struct pv_vcpu
     unsigned long shadow_ldt_mapcnt;
     spinlock_t shadow_ldt_lock;
 
+    /* data breakpoint extension MSRs */
+    uint32_t dr_mask[4];
+
     /* Deferred VA-based update state. */
     bool_t need_update_runstate_area;
     struct vcpu_time_info pending_system_time;
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -462,17 +462,6 @@ long set_gdt(struct vcpu *d, 
              unsigned long *frames, 
              unsigned int entries);
 
-#define write_debugreg(reg, val) do {                       \
-    unsigned long __val = val;                              \
-    asm volatile ( "mov %0,%%db" #reg : : "r" (__val) );    \
-} while (0)
-#define read_debugreg(reg) ({                               \
-    unsigned long __val;                                    \
-    asm volatile ( "mov %%db" #reg ",%0" : "=r" (__val) );  \
-    __val;                                                  \
-})
-long set_debugreg(struct vcpu *p, int reg, unsigned long value);
-
 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
 static always_inline void rep_nop(void)
 {
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -36,7 +36,7 @@
 #include "grant_table.h"
 #include "hvm/save.h"
 
-#define XEN_DOMCTL_INTERFACE_VERSION 0x00000009
+#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a
 
 /*
  * NB. xen_domctl.domain is an IN/OUT parameter for this operation.
@@ -563,6 +563,16 @@ typedef struct xen_domctl_pin_mem_cachea
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t);
 
 
+#if defined(__i386__) || defined(__x86_64__)
+struct xen_domctl_ext_vcpu_msr {
+    uint32_t         index;
+    uint32_t         reserved;
+    uint64_aligned_t value;
+};
+typedef struct xen_domctl_ext_vcpu_msr xen_domctl_ext_vcpu_msr_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpu_msr_t);
+#endif
+
 /* XEN_DOMCTL_set_ext_vcpucontext */
 /* XEN_DOMCTL_get_ext_vcpucontext */
 struct xen_domctl_ext_vcpucontext {
@@ -582,6 +592,7 @@ struct xen_domctl_ext_vcpucontext {
     uint16_t         sysenter_callback_cs;
     uint8_t          syscall32_disables_events;
     uint8_t          sysenter_disables_events;
+    uint16_t         msr_count;
 #if defined(__GNUC__)
     union {
         uint64_aligned_t mcg_cap;
@@ -590,6 +601,7 @@ struct xen_domctl_ext_vcpucontext {
 #else
     struct hvm_vmce_vcpu vmce;
 #endif
+    XEN_GUEST_HANDLE_64(xen_domctl_ext_vcpu_msr_t) msrs;
 #endif
 };
 typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t;


Attachment: PV-debug-address-mask-MSRs.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.