[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] cpuid configuration for PV guest



Hi,

This patch add the pv guest support for the cpuid configuration and
checking. That feature only works for cpuid request which are coming
from the guest's kernel, a process could still call the cpuid directly.

A new policy for pv guest has been created in libxc. dom0 cpuid
emulation is left hardcoded in Xen.

Signed-off-by: Jean Guyader <jean.guyader@xxxxxxxxxxxxx>

--
Jean Guyader

diff -r 5603534c62f9 tools/libxc/xc_cpuid_x86.c
--- a/tools/libxc/xc_cpuid_x86.c        Tue Jun 03 09:41:19 2008 +0100
+++ b/tools/libxc/xc_cpuid_x86.c        Wed Jun 04 11:31:19 2008 +0100
@@ -140,14 +140,66 @@ static void xc_cpuid_brand_get(char *str
     str[12] = '\0';
 }
 
-static void xc_cpuid_policy(
+static void xc_cpuid_pv_policy(
     int xc, domid_t domid, const unsigned int *input, unsigned int *regs)
 {
-    char brand[13];
-    unsigned long pae;
+    switch ( input[0] )
+    {
+    case 0x00000000:
+        if ( regs[0] > DEF_MAX_BASE )
+            regs[0] = DEF_MAX_BASE;
+        break;
 
-    xc_get_hvm_param(xc, domid, HVM_PARAM_PAE_ENABLED, &pae);
+    case 1:
+        clear_bit(X86_FEATURE_DTES64 % 32, regs[2]);
+        clear_bit(X86_FEATURE_MWAIT % 32, regs[2]);
+        clear_bit(X86_FEATURE_DSCPL % 32, regs[2]);
+        clear_bit(X86_FEATURE_VMXE % 32, regs[2]);
+        clear_bit(X86_FEATURE_SMXE % 32, regs[2]);
+        clear_bit(X86_FEATURE_EST % 32, regs[2]);
+        clear_bit(X86_FEATURE_TM2 % 32, regs[2]);
+        clear_bit(X86_FEATURE_CX16 % 32, regs[2]);
+        clear_bit(X86_FEATURE_XTPR % 32, regs[2]);
+        clear_bit(X86_FEATURE_PDCM % 32, regs[2]);
+        clear_bit(X86_FEATURE_DCA % 32, regs[2]);
+        
+        clear_bit(X86_FEATURE_VME % 32, regs[3]);
+        clear_bit(X86_FEATURE_PSE % 32, regs[3]);
+        clear_bit(X86_FEATURE_PGE % 32, regs[3]);
+        clear_bit(X86_FEATURE_MCE % 32, regs[3]);
+        clear_bit(X86_FEATURE_MCA % 32, regs[3]);
+        clear_bit(X86_FEATURE_MTRR % 32, regs[3]);
+        clear_bit(X86_FEATURE_DS % 32, regs[3]);
+        clear_bit(X86_FEATURE_ACC % 32, regs[3]);
+        clear_bit(X86_FEATURE_PBE % 32, regs[3]);
+        break;
 
+    case 0x80000000:
+        if ( regs[0] > DEF_MAX_EXT )
+            regs[0] = DEF_MAX_EXT;
+        break;
+ 
+    case 0x80000001:
+        clear_bit(X86_FEATURE_NX & 31, regs[3]);
+        break;
+
+    case 0x00000002:
+    case 0x00000004:
+    case 0x80000002:
+    case 0x80000003:
+    case 0x80000004:
+    case 0x80000006:
+        break;
+
+    default:
+        regs[0] = regs[1] = regs[2] = regs[3] = 0;
+    }
+}
+
+static void xc_cpuid_hvm_policy(
+    int xc, domid_t domid, const unsigned int *input, unsigned int *regs,
+    int is_pae)
+{
     switch( input[0] )
     {
     case 0x00000000:
@@ -188,7 +240,7 @@ static void xc_cpuid_policy(
         /* We always support MTRR MSRs. */
         regs[3] |= bitmaskof(X86_FEATURE_MTRR);
 
-        if ( !pae )
+        if ( !is_pae )
             clear_bit(X86_FEATURE_PAE & 31, regs[3]);
         break;
 
@@ -198,7 +250,7 @@ static void xc_cpuid_policy(
         break;
 
     case 0x80000001:
-        if ( !pae )
+        if ( !is_pae )
             clear_bit(X86_FEATURE_NX & 31, regs[3]);
         break;
 
@@ -221,11 +273,32 @@ static void xc_cpuid_policy(
         break;
     }
 
+}
+
+static int xc_cpuid_policy(
+    int xc, domid_t domid, const unsigned int *input, unsigned int *regs)
+{
+    xc_dominfo_t        info;
+    unsigned long       pae = 1;
+    char                brand[13];
+
+    if ( xc_domain_getinfo(xc, domid, 1, &info) == 0 )
+        return -EINVAL;
+    if ( info.hvm )
+    {
+        xc_get_hvm_param(xc, domid, HVM_PARAM_PAE_ENABLED, &pae);
+        xc_cpuid_hvm_policy(xc, domid, input, regs, !!pae);
+    }
+    else
+        xc_cpuid_pv_policy(xc, domid, input, regs);
+
     xc_cpuid_brand_get(brand);
     if ( strstr(brand, "AMD") )
-        amd_xc_cpuid_policy(xc, domid, input, regs, !!pae);
+        amd_xc_cpuid_policy(xc, domid, input, regs, pae);
     else
-        intel_xc_cpuid_policy(xc, domid, input, regs, !!pae);
+        intel_xc_cpuid_policy(xc, domid, input, regs, pae);
+
+    return 0;
 }
 
 static int xc_cpuid_do_domctl(
diff -r 5603534c62f9 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Tue Jun 03 09:41:19 2008 +0100
+++ b/tools/python/xen/xend/image.py    Wed Jun 04 11:31:19 2008 +0100
@@ -103,6 +103,12 @@ class ImageHandler:
         if rtc_timeoffset is not None:
             xc.domain_set_time_offset(self.vm.getDomid(), int(rtc_timeoffset))
 
+        self.cpuid = None
+        self.cpuid_check = None
+        if 'cpuid' in vmConfig:
+            self.cpuid = vmConfig['cpuid'];
+        if 'cpuid_check' in vmConfig:
+            self.cpuid_check = vmConfig['cpuid_check']
 
     def cleanupBootloading(self):
         if self.bootloader:
@@ -454,6 +460,37 @@ class ImageHandler:
             except:
                 pass
 
+    def setCpuid(self):
+        xc.domain_set_policy_cpuid(self.vm.getDomid())
+
+        if self.cpuid is not None:
+            cpuid = self.cpuid
+            transformed = {}
+            for sinput, regs in cpuid.iteritems():
+                inputs = sinput.split(',')
+                input = long(inputs[0])
+                sub_input = None
+                if len(inputs) == 2:
+                    sub_input = long(inputs[1])
+                t = xc.domain_set_cpuid(self.vm.getDomid(),
+                                        input, sub_input, regs)
+                transformed[sinput] = t
+            self.cpuid = transformed
+
+        if self.cpuid_check is not None:
+            cpuid_check = self.cpuid_check
+            transformed = {}
+            for sinput, regs_check in cpuid_check.iteritems():
+                inputs = sinput.split(',')
+                input = long(inputs[0])
+                sub_input = None
+                if len(inputs) == 2:
+                    sub_input = long(inputs[1])
+                t = xc.domain_check_cpuid(input, sub_input, regs_check)
+                transformed[sinput] = t
+            self.cpuid_check = transformed
+
+
 
 class LinuxImageHandler(ImageHandler):
 
@@ -536,38 +573,7 @@ class HVMImageHandler(ImageHandler):
         self.apic = int(vmConfig['platform'].get('apic', 0))
         self.acpi = int(vmConfig['platform'].get('acpi', 0))
         self.guest_os_type = vmConfig['platform'].get('guest_os_type')
-
-        self.vmConfig = vmConfig
            
-    def setCpuid(self):
-        xc.domain_set_policy_cpuid(self.vm.getDomid())
-
-        if 'cpuid' in self.vmConfig:
-            cpuid = self.vmConfig['cpuid']
-            transformed = {}
-            for sinput, regs in cpuid.iteritems():
-                inputs = sinput.split(',')
-                input = long(inputs[0])
-                sub_input = None
-                if len(inputs) == 2:
-                    sub_input = long(inputs[1])
-                t = xc.domain_set_cpuid(self.vm.getDomid(),
-                                        input, sub_input, regs)
-                transformed[sinput] = t
-            self.vmConfig['cpuid'] = transformed
-
-        if 'cpuid_check' in self.vmConfig:
-            cpuid_check = self.vmConfig['cpuid_check']
-            transformed = {}
-            for sinput, regs_check in cpuid_check.iteritems():
-                inputs = sinput.split(',')
-                input = long(inputs[0])
-                sub_input = None
-                if len(inputs) == 2:
-                    sub_input = long(inputs[1])
-                t = xc.domain_check_cpuid(input, sub_input, regs_check)
-                transformed[sinput] = t
-            self.vmConfig['cpuid_check'] = transformed
 
     # Return a list of cmd line args to the device models based on the
     # xm config file
@@ -730,6 +736,9 @@ class IA64_Linux_ImageHandler(LinuxImage
         LinuxImageHandler.configure(self, vmConfig)
         self.vhpt = int(vmConfig['platform'].get('vhpt',  0))
 
+    def setCpuid(self):
+        # Guest CPUID configuration is not implemented yet.
+        return
 
 class X86_HVM_ImageHandler(HVMImageHandler):
 
@@ -769,6 +778,7 @@ class X86_Linux_ImageHandler(LinuxImageH
         # add an 8MB slack to balance backend allocations.
         mem_kb = self.getRequiredMaximumReservation() + (8 * 1024)
         xc.domain_set_memmap_limit(self.vm.getDomid(), mem_kb)
+        self.setCpuid()
         return LinuxImageHandler.buildDomain(self)
 
 _handlers = {
diff -r 5603534c62f9 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Tue Jun 03 09:41:19 2008 +0100
+++ b/xen/arch/x86/traps.c      Wed Jun 04 11:31:19 2008 +0100
@@ -654,6 +654,7 @@ static int emulate_forced_invalid_op(str
     char sig[5], instr[2];
     uint32_t a, b, c, d;
     unsigned long eip, rc;
+    struct segment_register cs;
 
     a = regs->eax;
     b = regs->ebx;
@@ -681,81 +682,103 @@ static int emulate_forced_invalid_op(str
         return 0;
     eip += sizeof(instr);
 
-    asm ( 
-        "cpuid"
-        : "=a" (a), "=b" (b), "=c" (c), "=d" (d)
-        : "0" (a), "1" (b), "2" (c), "3" (d) );
+    if (!IS_PRIV(current->domain))
+    {
+        if ( !cpuid_hypervisor_leaves(regs->eax, &a, &b, &c, &d) )
+            domain_cpuid(current->domain, regs->eax, regs->ebx,
+                         &a, &b, &c, &d);
+        
+        switch ( regs->eax )
+        {
+            case 0x80000001:    
+                /* SYSCALL is visible if running in long mode. */
+                memcpy(&cs, &regs->cs, sizeof (cs));
+                if ( cs.attr.fields.l ||
+                     boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
+                    __set_bit(X86_FEATURE_SYSCALL, &d);
+                else
+                    __clear_bit(X86_FEATURE_SYSCALL, &d);
+                break;
+        }
+    }
+    else
+    {
+        asm ( 
+            "cpuid"
+            : "=a" (a), "=b" (b), "=c" (c), "=d" (d)
+            : "0" (a), "1" (b), "2" (c), "3" (d) );
 
-    if ( (regs->eax & 0x7fffffff) == 1 )
-    {
-        /* Modify Feature Information. */
-        __clear_bit(X86_FEATURE_VME, &d);
-        __clear_bit(X86_FEATURE_PSE, &d);
-        __clear_bit(X86_FEATURE_PGE, &d);
-        __clear_bit(X86_FEATURE_MCE, &d);
-        __clear_bit(X86_FEATURE_MCA, &d);
-        if ( !IS_PRIV(current->domain) )
-            __clear_bit(X86_FEATURE_MTRR, &d);
-        __clear_bit(X86_FEATURE_PSE36, &d);
-    }
-    switch ( (uint32_t)regs->eax )
-    {
-    case 1:
-        /* Modify Feature Information. */
-        if ( !cpu_has_sep )
-            __clear_bit(X86_FEATURE_SEP, &d);
+        if ( (regs->eax & 0x7fffffff) == 1 )
+        {
+            /* Modify Feature Information. */
+            __clear_bit(X86_FEATURE_VME, &d);
+            __clear_bit(X86_FEATURE_PSE, &d);
+            __clear_bit(X86_FEATURE_PGE, &d);
+            __clear_bit(X86_FEATURE_MCE, &d);
+            __clear_bit(X86_FEATURE_MCA, &d);
+            if ( !IS_PRIV(current->domain) )
+                __clear_bit(X86_FEATURE_MTRR, &d);
+            __clear_bit(X86_FEATURE_PSE36, &d);
+        }
+        switch ( (uint32_t)regs->eax )
+        {
+        case 1:
+            /* Modify Feature Information. */
+            if ( !cpu_has_sep )
+                __clear_bit(X86_FEATURE_SEP, &d);
 #ifdef __i386__
-        if ( !supervisor_mode_kernel )
-            __clear_bit(X86_FEATURE_SEP, &d);
+            if ( !supervisor_mode_kernel )
+                __clear_bit(X86_FEATURE_SEP, &d);
 #endif
-        __clear_bit(X86_FEATURE_DS, &d);
-        __clear_bit(X86_FEATURE_ACC, &d);
-        __clear_bit(X86_FEATURE_PBE, &d);
+            __clear_bit(X86_FEATURE_DS, &d);
+            __clear_bit(X86_FEATURE_ACC, &d);
+            __clear_bit(X86_FEATURE_PBE, &d);
 
-        __clear_bit(X86_FEATURE_DTES64 % 32, &c);
-        __clear_bit(X86_FEATURE_MWAIT % 32, &c);
-        __clear_bit(X86_FEATURE_DSCPL % 32, &c);
-        __clear_bit(X86_FEATURE_VMXE % 32, &c);
-        __clear_bit(X86_FEATURE_SMXE % 32, &c);
-        if ( !IS_PRIV(current->domain) )
-            __clear_bit(X86_FEATURE_EST % 32, &c);
-        __clear_bit(X86_FEATURE_TM2 % 32, &c);
-        if ( is_pv_32bit_vcpu(current) )
-            __clear_bit(X86_FEATURE_CX16 % 32, &c);
-        __clear_bit(X86_FEATURE_XTPR % 32, &c);
-        __clear_bit(X86_FEATURE_PDCM % 32, &c);
-        __clear_bit(X86_FEATURE_DCA % 32, &c);
-        break;
-    case 0x80000001:
-        /* Modify Feature Information. */
-        if ( is_pv_32bit_vcpu(current) )
-        {
-            __clear_bit(X86_FEATURE_LM % 32, &d);
-            __clear_bit(X86_FEATURE_LAHF_LM % 32, &c);
+            __clear_bit(X86_FEATURE_DTES64 % 32, &c);
+            __clear_bit(X86_FEATURE_MWAIT % 32, &c);
+            __clear_bit(X86_FEATURE_DSCPL % 32, &c);
+            __clear_bit(X86_FEATURE_VMXE % 32, &c);
+            __clear_bit(X86_FEATURE_SMXE % 32, &c);
+            if ( !IS_PRIV(current->domain) )
+                __clear_bit(X86_FEATURE_EST % 32, &c);
+            __clear_bit(X86_FEATURE_TM2 % 32, &c);
+            if ( is_pv_32bit_vcpu(current) )
+                __clear_bit(X86_FEATURE_CX16 % 32, &c);
+            __clear_bit(X86_FEATURE_XTPR % 32, &c);
+            __clear_bit(X86_FEATURE_PDCM % 32, &c);
+            __clear_bit(X86_FEATURE_DCA % 32, &c);
+            break;
+        case 0x80000001:
+            /* Modify Feature Information. */
+            if ( is_pv_32bit_vcpu(current) )
+            {
+                __clear_bit(X86_FEATURE_LM % 32, &d);
+                __clear_bit(X86_FEATURE_LAHF_LM % 32, &c);
+            }
+#ifndef __i386__
+            if ( is_pv_32on64_vcpu(current) &&
+                 boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
+#endif
+                __clear_bit(X86_FEATURE_SYSCALL % 32, &d);
+            __clear_bit(X86_FEATURE_PAGE1GB % 32, &d);
+            __clear_bit(X86_FEATURE_RDTSCP % 32, &d);
+
+            __clear_bit(X86_FEATURE_SVME % 32, &c);
+            __clear_bit(X86_FEATURE_OSVW % 32, &c);
+            __clear_bit(X86_FEATURE_IBS % 32, &c);
+            __clear_bit(X86_FEATURE_SKINIT % 32, &c);
+            __clear_bit(X86_FEATURE_WDT % 32, &c);
+            break;
+        case 5: /* MONITOR/MWAIT */
+        case 0xa: /* Architectural Performance Monitor Features */
+        case 0x8000000a: /* SVM revision and features */
+        case 0x8000001b: /* Instruction Based Sampling */
+            a = b = c = d = 0;
+            break;
+        default:
+            (void)cpuid_hypervisor_leaves(regs->eax, &a, &b, &c, &d);
+            break;
         }
-#ifndef __i386__
-        if ( is_pv_32on64_vcpu(current) &&
-             boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
-#endif
-            __clear_bit(X86_FEATURE_SYSCALL % 32, &d);
-        __clear_bit(X86_FEATURE_PAGE1GB % 32, &d);
-        __clear_bit(X86_FEATURE_RDTSCP % 32, &d);
-
-        __clear_bit(X86_FEATURE_SVME % 32, &c);
-        __clear_bit(X86_FEATURE_OSVW % 32, &c);
-        __clear_bit(X86_FEATURE_IBS % 32, &c);
-        __clear_bit(X86_FEATURE_SKINIT % 32, &c);
-        __clear_bit(X86_FEATURE_WDT % 32, &c);
-        break;
-    case 5: /* MONITOR/MWAIT */
-    case 0xa: /* Architectural Performance Monitor Features */
-    case 0x8000000a: /* SVM revision and features */
-    case 0x8000001b: /* Instruction Based Sampling */
-        a = b = c = d = 0;
-        break;
-    default:
-        (void)cpuid_hypervisor_leaves(regs->eax, &a, &b, &c, &d);
-        break;
     }
 
     regs->eax = a;

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.