[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86: cpuid configuration for PV guest



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1213180535 -3600
# Node ID 7c771f0a24f37cbb3ab56db79f3a82106b42f4d8
# Parent  f292689f48ba62050f1e019dfad1da0bb6444034
x86: cpuid configuration for PV guest

Add pv guest support for the cpuid configuration and checking. That
feature only works for cpuid request which are coming from the guest's
kernel, a process could still call the cpuid directly.

A new policy for pv guest has been created in libxc. dom0 cpuid
emulation is left hardcoded in Xen.

Signed-off-by: Jean Guyader <jean.guyader@xxxxxxxxxxxxx>
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
 tools/libxc/xc_cpuid_x86.c     |  177 +++++++++++++++++++++++++++++++----------
 tools/python/xen/xend/image.py |   78 ++++++++++--------
 xen/arch/x86/traps.c           |   69 +++++++++------
 3 files changed, 222 insertions(+), 102 deletions(-)

diff -r f292689f48ba -r 7c771f0a24f3 tools/libxc/xc_cpuid_x86.c
--- a/tools/libxc/xc_cpuid_x86.c        Wed Jun 11 09:44:42 2008 +0100
+++ b/tools/libxc/xc_cpuid_x86.c        Wed Jun 11 11:35:35 2008 +0100
@@ -25,8 +25,8 @@
 #include <xen/hvm/params.h>
 
 #define bitmaskof(idx)      (1u << ((idx) & 31))
-#define clear_bit(idx, dst) ((dst) &= ~(1u << (idx)))
-#define set_bit(idx, dst)   ((dst) |= (1u << (idx)))
+#define clear_bit(idx, dst) ((dst) &= ~(1u << ((idx) & 31)))
+#define set_bit(idx, dst)   ((dst) |= (1u << ((idx) & 31)))
 
 #define DEF_MAX_BASE 0x00000004u
 #define DEF_MAX_EXT  0x80000008u
@@ -36,6 +36,33 @@ static int hypervisor_is_64bit(int xc)
     xen_capabilities_info_t xen_caps = "";
     return ((xc_version(xc, XENVER_capabilities, &xen_caps) == 0) &&
             (strstr(xen_caps, "x86_64") != NULL));
+}
+
+static void cpuid(const unsigned int *input, unsigned int *regs)
+{
+    unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1];
+    asm (
+#ifdef __i386__
+        "push %%ebx; cpuid; mov %%ebx,%1; pop %%ebx"
+#else
+        "push %%rbx; cpuid; mov %%ebx,%1; pop %%rbx"
+#endif
+        : "=a" (regs[0]), "=r" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
+        : "0" (input[0]), "2" (count) );
+}
+
+/* Get the manufacturer brand name of the host processor. */
+static void xc_cpuid_brand_get(char *str)
+{
+    unsigned int input[2] = { 0, 0 };
+    unsigned int regs[4];
+
+    cpuid(input, regs);
+
+    *(uint32_t *)(str + 0) = regs[1];
+    *(uint32_t *)(str + 4) = regs[3];
+    *(uint32_t *)(str + 8) = regs[2];
+    str[12] = '\0';
 }
 
 static void amd_xc_cpuid_policy(
@@ -60,8 +87,8 @@ static void amd_xc_cpuid_policy(
         int is_64bit = hypervisor_is_64bit(xc) && is_pae;
 
         if ( !is_pae )
-            clear_bit(X86_FEATURE_PAE & 31, regs[3]);
-        clear_bit(X86_FEATURE_PSE36 & 31, regs[3]);
+            clear_bit(X86_FEATURE_PAE, regs[3]);
+        clear_bit(X86_FEATURE_PSE36, regs[3]);
 
         /* Filter all other features according to a whitelist. */
         regs[2] &= ((is_64bit ? bitmaskof(X86_FEATURE_LAHF_LM) : 0) |
@@ -113,42 +140,17 @@ static void intel_xc_cpuid_policy(
     }
 }
 
-static void cpuid(const unsigned int *input, unsigned int *regs)
-{
-    unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1];
-    asm (
-#ifdef __i386__
-        "push %%ebx; cpuid; mov %%ebx,%1; pop %%ebx"
-#else
-        "push %%rbx; cpuid; mov %%ebx,%1; pop %%rbx"
-#endif
-        : "=a" (regs[0]), "=r" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
-        : "0" (input[0]), "2" (count) );
-}
-
-/* Get the manufacturer brand name of the host processor. */
-static void xc_cpuid_brand_get(char *str)
-{
-    unsigned int input[2] = { 0, 0 };
-    unsigned int regs[4];
-
-    cpuid(input, regs);
-
-    *(uint32_t *)(str + 0) = regs[1];
-    *(uint32_t *)(str + 4) = regs[3];
-    *(uint32_t *)(str + 8) = regs[2];
-    str[12] = '\0';
-}
-
-static void xc_cpuid_policy(
+static void xc_cpuid_hvm_policy(
     int xc, domid_t domid, const unsigned int *input, unsigned int *regs)
 {
     char brand[13];
     unsigned long pae;
+    int is_pae;
 
     xc_get_hvm_param(xc, domid, HVM_PARAM_PAE_ENABLED, &pae);
-
-    switch( input[0] )
+    is_pae = !!pae;
+
+    switch ( input[0] )
     {
     case 0x00000000:
         if ( regs[0] > DEF_MAX_BASE )
@@ -188,8 +190,8 @@ static void xc_cpuid_policy(
         /* We always support MTRR MSRs. */
         regs[3] |= bitmaskof(X86_FEATURE_MTRR);
 
-        if ( !pae )
-            clear_bit(X86_FEATURE_PAE & 31, regs[3]);
+        if ( !is_pae )
+            clear_bit(X86_FEATURE_PAE, regs[3]);
         break;
 
     case 0x80000000:
@@ -198,8 +200,8 @@ static void xc_cpuid_policy(
         break;
 
     case 0x80000001:
-        if ( !pae )
-            clear_bit(X86_FEATURE_NX & 31, regs[3]);
+        if ( !is_pae )
+            clear_bit(X86_FEATURE_NX, regs[3]);
         break;
 
 
@@ -223,9 +225,104 @@ static void xc_cpuid_policy(
 
     xc_cpuid_brand_get(brand);
     if ( strstr(brand, "AMD") )
-        amd_xc_cpuid_policy(xc, domid, input, regs, !!pae);
+        amd_xc_cpuid_policy(xc, domid, input, regs, is_pae);
     else
-        intel_xc_cpuid_policy(xc, domid, input, regs, !!pae);
+        intel_xc_cpuid_policy(xc, domid, input, regs, is_pae);
+
+}
+
+static void xc_cpuid_pv_policy(
+    int xc, domid_t domid, const unsigned int *input, unsigned int *regs)
+{
+    DECLARE_DOMCTL;
+    int guest_64bit, xen_64bit = hypervisor_is_64bit(xc);
+    char brand[13];
+
+    xc_cpuid_brand_get(brand);
+
+    memset(&domctl, 0, sizeof(domctl));
+    domctl.domain = domid;
+    domctl.cmd = XEN_DOMCTL_get_address_size;
+    do_domctl(xc, &domctl);
+    guest_64bit = (domctl.u.address_size.size == 64);
+
+    if ( (input[0] & 0x7fffffff) == 1 )
+    {
+        clear_bit(X86_FEATURE_VME, regs[3]);
+        clear_bit(X86_FEATURE_PSE, regs[3]);
+        clear_bit(X86_FEATURE_PGE, regs[3]);
+        clear_bit(X86_FEATURE_MCE, regs[3]);
+        clear_bit(X86_FEATURE_MCA, regs[3]);
+        clear_bit(X86_FEATURE_MTRR, regs[3]);
+        clear_bit(X86_FEATURE_PSE36, regs[3]);
+    }
+
+    switch ( input[0] )
+    {
+    case 1:
+        if ( !xen_64bit || strstr(brand, "AMD") )
+            clear_bit(X86_FEATURE_SEP, regs[3]);
+        clear_bit(X86_FEATURE_DS, regs[3]);
+        clear_bit(X86_FEATURE_ACC, regs[3]);
+        clear_bit(X86_FEATURE_PBE, regs[3]);
+
+        clear_bit(X86_FEATURE_DTES64, regs[2]);
+        clear_bit(X86_FEATURE_MWAIT, regs[2]);
+        clear_bit(X86_FEATURE_DSCPL, regs[2]);
+        clear_bit(X86_FEATURE_VMXE, regs[2]);
+        clear_bit(X86_FEATURE_SMXE, regs[2]);
+        clear_bit(X86_FEATURE_EST, regs[2]);
+        clear_bit(X86_FEATURE_TM2, regs[2]);
+        if ( !guest_64bit )
+            clear_bit(X86_FEATURE_CX16, regs[2]);
+        clear_bit(X86_FEATURE_XTPR, regs[2]);
+        clear_bit(X86_FEATURE_PDCM, regs[2]);
+        clear_bit(X86_FEATURE_DCA, regs[2]);
+        break;
+    case 0x80000001:
+        if ( !guest_64bit )
+        {
+            clear_bit(X86_FEATURE_LM, regs[3]);
+            clear_bit(X86_FEATURE_LAHF_LM, regs[2]);
+            if ( !strstr(brand, "AMD") )
+                clear_bit(X86_FEATURE_SYSCALL, regs[3]);
+        }
+        else
+        {
+            set_bit(X86_FEATURE_SYSCALL, regs[3]);
+        }
+        clear_bit(X86_FEATURE_PAGE1GB, regs[3]);
+        clear_bit(X86_FEATURE_RDTSCP, regs[3]);
+
+        clear_bit(X86_FEATURE_SVME, regs[2]);
+        clear_bit(X86_FEATURE_OSVW, regs[2]);
+        clear_bit(X86_FEATURE_IBS, regs[2]);
+        clear_bit(X86_FEATURE_SKINIT, regs[2]);
+        clear_bit(X86_FEATURE_WDT, regs[2]);
+        break;
+    case 5: /* MONITOR/MWAIT */
+    case 0xa: /* Architectural Performance Monitor Features */
+    case 0x8000000a: /* SVM revision and features */
+    case 0x8000001b: /* Instruction Based Sampling */
+        regs[0] = regs[1] = regs[2] = regs[3] = 0;
+        break;
+    }
+}
+
+static int xc_cpuid_policy(
+    int xc, domid_t domid, const unsigned int *input, unsigned int *regs)
+{
+    xc_dominfo_t        info;
+
+    if ( xc_domain_getinfo(xc, domid, 1, &info) == 0 )
+        return -EINVAL;
+
+    if ( info.hvm )
+        xc_cpuid_hvm_policy(xc, domid, input, regs);
+    else
+        xc_cpuid_pv_policy(xc, domid, input, regs);
+
+    return 0;
 }
 
 static int xc_cpuid_do_domctl(
diff -r f292689f48ba -r 7c771f0a24f3 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Wed Jun 11 09:44:42 2008 +0100
+++ b/tools/python/xen/xend/image.py    Wed Jun 11 11:35:35 2008 +0100
@@ -103,6 +103,12 @@ class ImageHandler:
         if rtc_timeoffset is not None:
             xc.domain_set_time_offset(self.vm.getDomid(), int(rtc_timeoffset))
 
+        self.cpuid = None
+        self.cpuid_check = None
+        if 'cpuid' in vmConfig:
+            self.cpuid = vmConfig['cpuid'];
+        if 'cpuid_check' in vmConfig:
+            self.cpuid_check = vmConfig['cpuid_check']
 
     def cleanupBootloading(self):
         if self.bootloader:
@@ -454,6 +460,37 @@ class ImageHandler:
             except:
                 pass
 
+    def setCpuid(self):
+        xc.domain_set_policy_cpuid(self.vm.getDomid())
+
+        if self.cpuid is not None:
+            cpuid = self.cpuid
+            transformed = {}
+            for sinput, regs in cpuid.iteritems():
+                inputs = sinput.split(',')
+                input = long(inputs[0])
+                sub_input = None
+                if len(inputs) == 2:
+                    sub_input = long(inputs[1])
+                t = xc.domain_set_cpuid(self.vm.getDomid(),
+                                        input, sub_input, regs)
+                transformed[sinput] = t
+            self.cpuid = transformed
+
+        if self.cpuid_check is not None:
+            cpuid_check = self.cpuid_check
+            transformed = {}
+            for sinput, regs_check in cpuid_check.iteritems():
+                inputs = sinput.split(',')
+                input = long(inputs[0])
+                sub_input = None
+                if len(inputs) == 2:
+                    sub_input = long(inputs[1])
+                t = xc.domain_check_cpuid(input, sub_input, regs_check)
+                transformed[sinput] = t
+            self.cpuid_check = transformed
+
+
 
 class LinuxImageHandler(ImageHandler):
 
@@ -536,38 +573,7 @@ class HVMImageHandler(ImageHandler):
         self.apic = int(vmConfig['platform'].get('apic', 0))
         self.acpi = int(vmConfig['platform'].get('acpi', 0))
         self.guest_os_type = vmConfig['platform'].get('guest_os_type')
-
-        self.vmConfig = vmConfig
            
-    def setCpuid(self):
-        xc.domain_set_policy_cpuid(self.vm.getDomid())
-
-        if 'cpuid' in self.vmConfig:
-            cpuid = self.vmConfig['cpuid']
-            transformed = {}
-            for sinput, regs in cpuid.iteritems():
-                inputs = sinput.split(',')
-                input = long(inputs[0])
-                sub_input = None
-                if len(inputs) == 2:
-                    sub_input = long(inputs[1])
-                t = xc.domain_set_cpuid(self.vm.getDomid(),
-                                        input, sub_input, regs)
-                transformed[sinput] = t
-            self.vmConfig['cpuid'] = transformed
-
-        if 'cpuid_check' in self.vmConfig:
-            cpuid_check = self.vmConfig['cpuid_check']
-            transformed = {}
-            for sinput, regs_check in cpuid_check.iteritems():
-                inputs = sinput.split(',')
-                input = long(inputs[0])
-                sub_input = None
-                if len(inputs) == 2:
-                    sub_input = long(inputs[1])
-                t = xc.domain_check_cpuid(input, sub_input, regs_check)
-                transformed[sinput] = t
-            self.vmConfig['cpuid_check'] = transformed
 
     # Return a list of cmd line args to the device models based on the
     # xm config file
@@ -730,6 +736,9 @@ class IA64_Linux_ImageHandler(LinuxImage
         LinuxImageHandler.configure(self, vmConfig)
         self.vhpt = int(vmConfig['platform'].get('vhpt',  0))
 
+    def setCpuid(self):
+        # Guest CPUID configuration is not implemented yet.
+        return
 
 class X86_HVM_ImageHandler(HVMImageHandler):
 
@@ -739,8 +748,9 @@ class X86_HVM_ImageHandler(HVMImageHandl
 
     def buildDomain(self):
         xc.hvm_set_param(self.vm.getDomid(), HVM_PARAM_PAE_ENABLED, self.pae)
+        rc = HVMImageHandler.buildDomain(self)
         self.setCpuid()
-        return HVMImageHandler.buildDomain(self)
+        return rc
 
     def getRequiredAvailableMemory(self, mem_kb):
         # Add 8 MiB overhead for QEMU's video RAM.
@@ -769,7 +779,9 @@ class X86_Linux_ImageHandler(LinuxImageH
         # add an 8MB slack to balance backend allocations.
         mem_kb = self.getRequiredMaximumReservation() + (8 * 1024)
         xc.domain_set_memmap_limit(self.vm.getDomid(), mem_kb)
-        return LinuxImageHandler.buildDomain(self)
+        rc = LinuxImageHandler.buildDomain(self)
+        self.setCpuid()
+        return rc
 
 _handlers = {
     "ia64": {
diff -r f292689f48ba -r 7c771f0a24f3 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Wed Jun 11 09:44:42 2008 +0100
+++ b/xen/arch/x86/traps.c      Wed Jun 11 11:35:35 2008 +0100
@@ -649,37 +649,21 @@ int cpuid_hypervisor_leaves(
     return 1;
 }
 
-static int emulate_forced_invalid_op(struct cpu_user_regs *regs)
-{
-    char sig[5], instr[2];
+static void pv_cpuid(struct cpu_user_regs *regs)
+{
     uint32_t a, b, c, d;
-    unsigned long eip, rc;
 
     a = regs->eax;
     b = regs->ebx;
     c = regs->ecx;
     d = regs->edx;
-    eip = regs->eip;
-
-    /* Check for forced emulation signature: ud2 ; .ascii "xen". */
-    if ( (rc = copy_from_user(sig, (char *)eip, sizeof(sig))) != 0 )
-    {
-        propagate_page_fault(eip + sizeof(sig) - rc, 0);
-        return EXCRET_fault_fixed;
-    }
-    if ( memcmp(sig, "\xf\xbxen", sizeof(sig)) )
-        return 0;
-    eip += sizeof(sig);
-
-    /* We only emulate CPUID. */
-    if ( ( rc = copy_from_user(instr, (char *)eip, sizeof(instr))) != 0 )
-    {
-        propagate_page_fault(eip + sizeof(instr) - rc, 0);
-        return EXCRET_fault_fixed;
-    }
-    if ( memcmp(instr, "\xf\xa2", sizeof(instr)) )
-        return 0;
-    eip += sizeof(instr);
+
+    if ( current->domain->domain_id != 0 )
+    {
+        if ( !cpuid_hypervisor_leaves(a, &a, &b, &c, &d) )
+            domain_cpuid(current->domain, a, b, &a, &b, &c, &d);
+        goto out;
+    }
 
     asm ( 
         "cpuid"
@@ -694,8 +678,6 @@ static int emulate_forced_invalid_op(str
         __clear_bit(X86_FEATURE_PGE, &d);
         __clear_bit(X86_FEATURE_MCE, &d);
         __clear_bit(X86_FEATURE_MCA, &d);
-        if ( !IS_PRIV(current->domain) )
-            __clear_bit(X86_FEATURE_MTRR, &d);
         __clear_bit(X86_FEATURE_PSE36, &d);
     }
     switch ( (uint32_t)regs->eax )
@@ -717,8 +699,6 @@ static int emulate_forced_invalid_op(str
         __clear_bit(X86_FEATURE_DSCPL % 32, &c);
         __clear_bit(X86_FEATURE_VMXE % 32, &c);
         __clear_bit(X86_FEATURE_SMXE % 32, &c);
-        if ( !IS_PRIV(current->domain) )
-            __clear_bit(X86_FEATURE_EST % 32, &c);
         __clear_bit(X86_FEATURE_TM2 % 32, &c);
         if ( is_pv_32bit_vcpu(current) )
             __clear_bit(X86_FEATURE_CX16 % 32, &c);
@@ -758,10 +738,41 @@ static int emulate_forced_invalid_op(str
         break;
     }
 
+ out:
     regs->eax = a;
     regs->ebx = b;
     regs->ecx = c;
     regs->edx = d;
+}
+
+static int emulate_forced_invalid_op(struct cpu_user_regs *regs)
+{
+    char sig[5], instr[2];
+    unsigned long eip, rc;
+
+    eip = regs->eip;
+
+    /* Check for forced emulation signature: ud2 ; .ascii "xen". */
+    if ( (rc = copy_from_user(sig, (char *)eip, sizeof(sig))) != 0 )
+    {
+        propagate_page_fault(eip + sizeof(sig) - rc, 0);
+        return EXCRET_fault_fixed;
+    }
+    if ( memcmp(sig, "\xf\xbxen", sizeof(sig)) )
+        return 0;
+    eip += sizeof(sig);
+
+    /* We only emulate CPUID. */
+    if ( ( rc = copy_from_user(instr, (char *)eip, sizeof(instr))) != 0 )
+    {
+        propagate_page_fault(eip + sizeof(instr) - rc, 0);
+        return EXCRET_fault_fixed;
+    }
+    if ( memcmp(instr, "\xf\xa2", sizeof(instr)) )
+        return 0;
+    eip += sizeof(instr);
+
+    pv_cpuid(regs);
 
     instruction_done(regs, eip, 0);
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.