[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] Configure hvm's cpuid



Hi,

This patch is to configure the cpuid's replies for hvm guests. The cpuid's replies are now pre-calculated in libxc and given to the hypervisor via a new domctl.

There is two part for this patch :
  - We can overwrite cpuid's replies.
  - We can check that a vm is compatible with a host's processor.

Best regards,

Signed-off-by: Jean Guyader <jean.guyader@xxxxxxxxxxxxx>

--
Jean Guyader
diff -r 4e6577dec729 tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm      Thu Apr 24 10:14:43 2008 +0100
+++ b/tools/examples/xmexample.hvm      Thu Apr 24 18:05:55 2008 +0100
@@ -219,3 +219,27 @@ serial='pty'
 #-----------------------------------------------------------------------------
 #   Set keyboard layout, default is en-us keyboard. 
 #keymap='ja'
+
+#-----------------------------------------------------------------------------
+#   Configure guest CPUID responses:
+#cpuid=[ '1:ecx=xxxxxxxxxxxxxxxxxxxxxxxxxx1xxxxx,
+#           eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
+# - Set the VMX feature flag in the guest (CPUID_1:ECX:5)
+# - Default behaviour for all other bits in ECX And EAX registers.
+# 
+# Each successive character represent a lesser-significant bit:
+#  '1' -> force the corresponding bit to 1
+#  '0' -> force to 0
+#  'x' -> we don't care (default behaviour)
+#  'k' -> pass through the host bit value
+#  's' -> as 'k' but preserve across save/restore and migration
+#
+#   Configure host CPUID consistency checks, which must be satisfied for this
+#   VM to be allowed to run on this host:on this host's processor:
+#cpuid_check=[ '1:ecx=xxxxxxxxxxxxxxxxxxxxxxxxxx1xxxxx' ]
+#
+# The format is similar to the above for 'cpuid':
+#  '1' -> the bit must be '1'
+#  '0' -> the bit must be '0'
+#  'x' -> we don't care (do not check)
+#  's' -> the bit must be the same as the host that started this VM
diff -r 4e6577dec729 tools/libxc/Makefile
--- a/tools/libxc/Makefile      Thu Apr 24 10:14:43 2008 +0100
+++ b/tools/libxc/Makefile      Thu Apr 24 18:05:55 2008 +0100
@@ -53,6 +53,7 @@ GUEST_SRCS-y                 += xc_dom_c
 GUEST_SRCS-y                 += xc_dom_compat_linux.c
 
 GUEST_SRCS-$(CONFIG_X86)     += xc_dom_x86.c
+GUEST_SRCS-$(CONFIG_X86)     += xc_cpuid_x86.c
 GUEST_SRCS-$(CONFIG_IA64)    += xc_dom_ia64.c
 GUEST_SRCS-$(CONFIG_POWERPC) += xc_dom_powerpc.c
 endif
diff -r 4e6577dec729 tools/libxc/xc_cpufeature.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_cpufeature.h       Thu Apr 24 18:05:55 2008 +0100
@@ -0,0 +1,115 @@
+#ifndef __LIBXC_CPUFEATURE_H
+#define __LIBXC_CPUFEATURE_H
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+#define X86_FEATURE_FPU                (0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME                (0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE         (0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE        (0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC                (0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR                (0*32+ 5) /* Model-Specific Registers, 
RDMSR, WRMSR */
+#define X86_FEATURE_PAE                (0*32+ 6) /* Physical Address 
Extensions */
+#define X86_FEATURE_MCE                (0*32+ 7) /* Machine Check Architecture 
*/
+#define X86_FEATURE_CX8                (0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC       (0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP                (0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR       (0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE                (0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA                (0*32+14) /* Machine Check Architecture 
*/
+#define X86_FEATURE_CMOV       (0*32+15) /* CMOV instruction (FCMOVCC and 
FCOMI too if FPU present) */
+#define X86_FEATURE_PAT                (0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36      (0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN         (0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLSH     (0*32+19) /* Supports the CLFLUSH instruction */
+#define X86_FEATURE_DS         (0*32+21) /* Debug Store */
+#define X86_FEATURE_ACPI       (0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX                (0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR       (0*32+24) /* FXSAVE and FXRSTOR instructions 
(fast save and restore */
+                                         /* of FPU context), and CR4.OSFXSR 
available */
+#define X86_FEATURE_XMM                (0*32+25) /* Streaming SIMD Extensions 
*/
+#define X86_FEATURE_XMM2       (0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SELFSNOOP  (0*32+27) /* CPU self snoop */
+#define X86_FEATURE_HT         (0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC                (0*32+29) /* Automatic clock control */
+#define X86_FEATURE_IA64       (0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE                (0*32+31) /* Pending Break Enable */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL    (1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP         (1*32+19) /* MP Capable. */
+#define X86_FEATURE_NX         (1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT     (1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_FFXSR       (1*32+25) /* FFXSR instruction optimizations */
+#define X86_FEATURE_PAGE1GB    (1*32+26) /* 1Gb large page support */
+#define X86_FEATURE_RDTSCP     (1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM         (1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT   (1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW      (1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY   (2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN    (2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI       (2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX      (3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR    (3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR  (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR        (3*32+ 3) /* Centaur MCRs (= MTRRs) */
+/* cpu types for specific tunings: */
+#define X86_FEATURE_K8         (3*32+ 4) /* Opteron, Athlon64 */
+#define X86_FEATURE_K7         (3*32+ 5) /* Athlon */
+#define X86_FEATURE_P3         (3*32+ 6) /* P3 */
+#define X86_FEATURE_P4         (3*32+ 7) /* P4 */
+#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+#define X86_FEATURE_XMM3       (4*32+ 0) /* Streaming SIMD Extensions-3 */
+#define X86_FEATURE_DTES64     (4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT      (4*32+ 3) /* Monitor/Mwait support */
+#define X86_FEATURE_DSCPL      (4*32+ 4) /* CPL Qualified Debug Store */
+#define X86_FEATURE_VMXE       (4*32+ 5) /* Virtual Machine Extensions */
+#define X86_FEATURE_SMXE       (4*32+ 6) /* Safer Mode Extensions */
+#define X86_FEATURE_EST                (4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2                (4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3      (4*32+ 9) /* Supplemental Streaming SIMD 
Extensions-3 */
+#define X86_FEATURE_CID                (4*32+10) /* Context ID */
+#define X86_FEATURE_CX16        (4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR       (4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM       (4*32+15) /* Perf/Debug Capability MSR */
+#define X86_FEATURE_DCA                (4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_SSE4_1     (4*32+19) /* Streaming SIMD Extensions 4.1 */
+#define X86_FEATURE_SSE4_2     (4*32+20) /* Streaming SIMD Extensions 4.2 */
+#define X86_FEATURE_POPCNT     (4*32+23) /* POPCNT instruction */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+#define X86_FEATURE_XSTORE     (5*32+ 2) /* on-CPU RNG present (xstore insn) */
+#define X86_FEATURE_XSTORE_EN  (5*32+ 3) /* on-CPU RNG enabled */
+#define X86_FEATURE_XCRYPT     (5*32+ 6) /* on-CPU crypto (xcrypt insn) */
+#define X86_FEATURE_XCRYPT_EN  (5*32+ 7) /* on-CPU crypto enabled */
+#define X86_FEATURE_ACE2       (5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN    (5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE                (5*32+ 10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN     (5*32+ 11) /* PHE enabled */
+#define X86_FEATURE_PMM                (5*32+ 12) /* PadLock Montgomery 
Multiplier */
+#define X86_FEATURE_PMM_EN     (5*32+ 13) /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+#define X86_FEATURE_LAHF_LM    (6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVME        (6*32+ 2) /* Secure Virtual Machine */
+#define X86_FEATURE_EXTAPICSPACE (6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_ALTMOVCR   (6*32+ 4) /* LOCK MOV CR accesses CR+8 */
+#define X86_FEATURE_ABM                (6*32+ 5) /* Advanced Bit Manipulation 
*/
+#define X86_FEATURE_SSE4A      (6*32+ 6) /* AMD Streaming SIMD Extensions-4a */
+#define X86_FEATURE_MISALIGNSSE        (6*32+ 7) /* Misaligned SSE Access */
+#define X86_FEATURE_3DNOWPF    (6*32+ 8) /* 3DNow! Prefetch */
+#define X86_FEATURE_OSVW       (6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS                (6*32+ 10) /* Instruction Based 
Sampling */
+#define X86_FEATURE_SSE5       (6*32+ 11) /* AMD Streaming SIMD Extensions-5 */
+#define X86_FEATURE_SKINIT     (6*32+ 12) /* SKINIT, STGI/CLGI, DEV */
+#define X86_FEATURE_WDT                (6*32+ 13) /* Watchdog Timer */
+
+#endif /* __LIBXC_CPUFEATURE_H */
diff -r 4e6577dec729 tools/libxc/xc_cpuid_x86.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_cpuid_x86.c        Thu Apr 24 18:05:55 2008 +0100
@@ -0,0 +1,430 @@
+/******************************************************************************
+ * xc_cpuid_x86.c 
+ *
+ * Compute cpuid of a domain.
+ *
+ * Copyright (c) 2008, Citrix Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <stdlib.h>
+#include "xc_private.h"
+#include "xc_cpufeature.h"
+#include <xen/hvm/params.h>
+
+#define bitmaskof(idx)      (1u << ((idx) & 31))
+#define clear_bit(idx, dst) ((dst) &= ~(1u << (idx)))
+#define set_bit(idx, dst)   ((dst) |= (1u << (idx)))
+
+#define DEF_MAX_BASE 0x00000004u
+#define DEF_MAX_EXT  0x80000008u
+
+static void amd_xc_cpuid_policy(
+    int xc, domid_t domid, const unsigned int *input, unsigned int *regs)
+{
+    unsigned long pae = 0;
+
+    xc_get_hvm_param(xc, domid, HVM_PARAM_PAE_ENABLED, &pae);
+
+    switch ( input[0] )
+    {
+    case 0x00000001:
+        /* Mask Intel-only features. */
+        regs[2] &= ~(bitmaskof(X86_FEATURE_SSSE3) |
+                     bitmaskof(X86_FEATURE_SSE4_1) |
+                     bitmaskof(X86_FEATURE_SSE4_2));
+        break;
+
+    case 0x00000002:
+    case 0x00000004:
+        regs[0] = regs[1] = regs[2] = 0;
+        break;
+
+    case 0x80000001:
+        if ( !pae )
+            clear_bit(X86_FEATURE_PAE & 31, regs[3]);
+        clear_bit(X86_FEATURE_PSE36 & 31, regs[3]);
+
+        /* Filter all other features according to a whitelist. */
+        regs[2] &= (bitmaskof(X86_FEATURE_LAHF_LM) |
+                    bitmaskof(X86_FEATURE_ALTMOVCR) |
+                    bitmaskof(X86_FEATURE_ABM) |
+                    bitmaskof(X86_FEATURE_SSE4A) |
+                    bitmaskof(X86_FEATURE_MISALIGNSSE) |
+                    bitmaskof(X86_FEATURE_3DNOWPF));
+        regs[3] &= (0x0183f3ff | /* features shared with 0x00000001:EDX */
+                    bitmaskof(X86_FEATURE_NX) |
+                    bitmaskof(X86_FEATURE_LM) |
+                    bitmaskof(X86_FEATURE_SYSCALL) |
+                    bitmaskof(X86_FEATURE_MP) |
+                    bitmaskof(X86_FEATURE_MMXEXT) |
+                    bitmaskof(X86_FEATURE_FFXSR) |
+                    bitmaskof(X86_FEATURE_3DNOW) |
+                    bitmaskof(X86_FEATURE_3DNOWEXT));
+        break;
+    }
+}
+
+static void intel_xc_cpuid_policy(
+    int xc, domid_t domid, const unsigned int *input, unsigned int *regs)
+{
+    switch ( input[0] )
+    {
+    case 0x00000001:
+        /* Mask AMD-only features. */
+        regs[2] &= ~(bitmaskof(X86_FEATURE_POPCNT));
+        break;
+
+    case 0x00000004:
+        regs[0] &= 0x3FF;
+        regs[3] &= 0x3FF;
+        break;
+
+    case 0x80000001:
+        /* Only a few features are advertised in Intel's 0x80000001. */
+        regs[2] &= (bitmaskof(X86_FEATURE_LAHF_LM));
+        regs[3] &= (bitmaskof(X86_FEATURE_NX) |
+                    bitmaskof(X86_FEATURE_LM) |
+                    bitmaskof(X86_FEATURE_SYSCALL));
+        break;
+    }
+}
+
+static void cpuid(const unsigned int *input, unsigned int *regs)
+{
+    unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1];
+    unsigned int bx_temp;
+    asm ( "mov %%ebx,%4; cpuid; mov %%ebx,%1; mov %4,%%ebx"
+          : "=a" (regs[0]), "=r" (regs[1]),
+          "=c" (regs[2]), "=d" (regs[3]), "=m" (bx_temp)
+          : "0" (input[0]), "2" (count) );
+}
+
+/* Get the manufacturer brand name of the host processor. */
+static void xc_cpuid_brand_get(char *str)
+{
+    unsigned int input[2] = { 0, 0 };
+    unsigned int regs[4];
+
+    cpuid(input, regs);
+
+    *(uint32_t *)(str + 0) = regs[1];
+    *(uint32_t *)(str + 4) = regs[3];
+    *(uint32_t *)(str + 8) = regs[2];
+    str[12] = '\0';
+}
+
+static void xc_cpuid_policy(
+    int xc, domid_t domid, const unsigned int *input, unsigned int *regs)
+{
+    char brand[13];
+    unsigned long pae;
+
+    xc_get_hvm_param(xc, domid, HVM_PARAM_PAE_ENABLED, &pae);
+
+    switch( input[0] )
+    {
+    case 0x00000000:
+        if ( regs[0] > DEF_MAX_BASE )
+            regs[0] = DEF_MAX_BASE;
+        break;
+
+    case 0x00000001:
+        regs[2] &= (bitmaskof(X86_FEATURE_XMM3) |
+                    bitmaskof(X86_FEATURE_SSSE3) |
+                    bitmaskof(X86_FEATURE_CX16) |
+                    bitmaskof(X86_FEATURE_SSE4_1) |
+                    bitmaskof(X86_FEATURE_SSE4_2) |
+                    bitmaskof(X86_FEATURE_POPCNT));
+
+        regs[3] &= (bitmaskof(X86_FEATURE_FPU) |
+                    bitmaskof(X86_FEATURE_VME) |
+                    bitmaskof(X86_FEATURE_DE) |
+                    bitmaskof(X86_FEATURE_PSE) |
+                    bitmaskof(X86_FEATURE_TSC) |
+                    bitmaskof(X86_FEATURE_MSR) |
+                    bitmaskof(X86_FEATURE_PAE) |
+                    bitmaskof(X86_FEATURE_MCE) |
+                    bitmaskof(X86_FEATURE_CX8) |
+                    bitmaskof(X86_FEATURE_APIC) |
+                    bitmaskof(X86_FEATURE_SEP) |
+                    bitmaskof(X86_FEATURE_MTRR) |
+                    bitmaskof(X86_FEATURE_PGE) |
+                    bitmaskof(X86_FEATURE_MCA) |
+                    bitmaskof(X86_FEATURE_CMOV) |
+                    bitmaskof(X86_FEATURE_PAT) |
+                    bitmaskof(X86_FEATURE_CLFLSH) |
+                    bitmaskof(X86_FEATURE_MMX) |
+                    bitmaskof(X86_FEATURE_FXSR) |
+                    bitmaskof(X86_FEATURE_XMM) |
+                    bitmaskof(X86_FEATURE_XMM2));
+            
+        /* We always support MTRR MSRs. */
+        regs[3] |= bitmaskof(X86_FEATURE_MTRR);
+
+        if ( !pae )
+            clear_bit(X86_FEATURE_PAE & 31, regs[3]);
+        break;
+
+    case 0x80000000:
+        if ( regs[0] > DEF_MAX_EXT )
+            regs[0] = DEF_MAX_EXT;
+        break;
+
+    case 0x80000001:
+        if ( !pae )
+            clear_bit(X86_FEATURE_NX & 31, regs[3]);
+        break;
+
+
+    case 0x80000008:
+        regs[0] &= 0x0000ffffu;
+        regs[1] = regs[2] = regs[3] = 0;
+        break;
+
+    case 0x00000002:
+    case 0x00000004:
+    case 0x80000002:
+    case 0x80000003:
+    case 0x80000004:
+    case 0x80000006:
+        break;
+
+    default:
+        regs[0] = regs[1] = regs[2] = regs[3] = 0;
+        break;
+    }
+
+    xc_cpuid_brand_get(brand);
+    if ( strstr(brand, "AMD") )
+        amd_xc_cpuid_policy(xc, domid, input, regs);
+    else
+        intel_xc_cpuid_policy(xc, domid, input, regs);
+}
+
+static int xc_cpuid_do_domctl(
+    int xc, domid_t domid,
+    const unsigned int *input, const unsigned int *regs)
+{
+    DECLARE_DOMCTL;
+
+    memset(&domctl, 0, sizeof (domctl));
+    domctl.domain = domid;
+    domctl.cmd = XEN_DOMCTL_set_cpuid;
+    domctl.u.cpuid.input[0] = input[0];
+    domctl.u.cpuid.input[1] = input[1];
+    domctl.u.cpuid.eax = regs[0];
+    domctl.u.cpuid.ebx = regs[1];
+    domctl.u.cpuid.ecx = regs[2];
+    domctl.u.cpuid.edx = regs[3];
+
+    return do_domctl(xc, &domctl);
+}
+
+static char *alloc_str(void)
+{
+    char *s = malloc(33);
+    memset(s, 0, 33);
+    return s;
+}
+
+void xc_cpuid_to_str(const unsigned int *regs, char **strs)
+{
+    int i, j;
+
+    for ( i = 0; i < 4; i++ )
+    {
+        strs[i] = alloc_str();
+        for ( j = 0; j < 32; j++ )
+            strs[i][j] = !!((regs[i] & (1U << (31 - j)))) ? '1' : '0';
+    }
+}
+
+int xc_cpuid_apply_policy(int xc, domid_t domid)
+{
+    unsigned int input[2] = { 0, 0 }, regs[4];
+    unsigned int base_max, ext_max;
+    int rc;
+
+    cpuid(input, regs);
+    base_max = (regs[0] <= DEF_MAX_BASE) ? regs[0] : DEF_MAX_BASE;
+    input[0] = 0x80000000;
+    cpuid(input, regs);
+    ext_max = (regs[0] <= DEF_MAX_EXT) ? regs[0] : DEF_MAX_EXT;
+
+    input[0] = 0;
+    input[1] = XEN_CPUID_INPUT_UNUSED;
+    for ( ; ; )
+    {
+        cpuid(input, regs);
+        xc_cpuid_policy(xc, domid, input, regs);
+
+        if ( regs[0] || regs[1] || regs[2] || regs[3] )
+        {
+            rc = xc_cpuid_do_domctl(xc, domid, input, regs);
+            if ( rc )
+                return rc;
+
+            /* Intel cache descriptor leaves. */
+            if ( input[0] == 4 )
+            {
+                input[1]++;
+                /* More to do? Then loop keeping %%eax==0x00000004. */
+                if ( (regs[0] & 0x1f) != 0 )
+                    continue;
+            }
+        }
+
+        input[0]++;
+        input[1] = (input[0] == 4) ? 0 : XEN_CPUID_INPUT_UNUSED;
+        if ( !(input[0] & 0x80000000u) && (input[0] > base_max ) )
+            input[0] = 0x80000000u;
+
+        if ( (input[0] & 0x80000000u) && (input[0] > ext_max) )
+            break;
+    }
+
+    return 0;
+}
+
+/*
+ * Check whether a VM is allowed to launch on this host's processor type.
+ *
+ * @config format is similar to that of xc_cpuid_set():
+ *  '1' -> the bit must be set to 1
+ *  '0' -> must be 0
+ *  'x' -> we don't care
+ *  's' -> (same) must be the same
+ */
+int xc_cpuid_check(
+    int xc, const unsigned int *input,
+    const char **config,
+    char **config_transformed)
+{
+    int i, j;
+    unsigned int regs[4];
+
+    memset(config_transformed, 0, 4 * sizeof(*config_transformed));
+
+    cpuid(input, regs);
+
+    for ( i = 0; i < 4; i++ )
+    {
+        if ( config[i] == NULL )
+            continue;
+        config_transformed[i] = alloc_str();
+        for ( j = 0; j < 32; j++ )
+        {
+            unsigned char val = !!((regs[i] & (1U << (31 - j))));
+            if ( !strchr("10xs", config[i][j]) ||
+                 ((config[i][j] == '1') && !val) ||
+                 ((config[i][j] == '0') && val) )
+                goto fail;
+            config_transformed[i][j] = config[i][j];
+            if ( config[i][j] == 's' )
+                config_transformed[i][j] = '0' + val;
+        }
+    }
+
+    return 0;
+
+ fail:
+    for ( i = 0; i < 4; i++ )
+    {
+        free(config_transformed[i]);
+        config_transformed[i] = NULL;
+    }
+    return -EPERM;
+}
+
+/*
+ * Configure a single input with the informatiom from config.
+ *
+ * Config is an array of strings:
+ *   config[0] = eax
+ *   config[1] = ebx
+ *   config[2] = ecx
+ *   config[3] = edx
+ *
+ * The format of the string is the following:
+ *   '1' -> force to 1
+ *   '0' -> force to 0
+ *   'x' -> we don't care (use default)
+ *   'k' -> pass through host value
+ *   's' -> pass through the first time and then keep the same value
+ *          across save/restore and migration.
+ * 
+ * For 's' and 'x' the configuration is overwritten with the value applied.
+ */
+int xc_cpuid_set(
+    int xc, domid_t domid, const unsigned int *input,
+    const char **config, char **config_transformed)
+{
+    int rc;
+    unsigned int i, j, regs[4], polregs[4];
+
+    memset(config_transformed, 0, 4 * sizeof(*config_transformed));
+
+    cpuid(input, regs);
+
+    memcpy(polregs, regs, sizeof(regs));
+    xc_cpuid_policy(xc, domid, input, polregs);
+
+    for ( i = 0; i < 4; i++ )
+    {
+        if ( config[i] == NULL )
+            continue;
+        
+        config_transformed[i] = alloc_str();
+
+        for ( j = 0; j < 32; j++ )
+        {
+            unsigned char val = !!((regs[i] & (1U << (31 - j))));
+            unsigned char polval = !!((polregs[i] & (1U << (31 - j))));
+
+            rc = -EINVAL;
+            if ( !strchr("10xks", config[i][j]) )
+                goto fail;
+
+            if ( config[i][j] == '1' )
+                val = 1;
+            else if ( config[i][j] == '0' )
+                val = 0;
+            else if ( config[i][j] == 'x' )
+                val = polval;
+
+            if ( val )
+                set_bit(31 - j, regs[i]);
+            else
+                clear_bit(31 - j, regs[i]);
+
+            config_transformed[i][j] = config[i][j];
+            if ( config[i][j] == 's' )
+                config_transformed[i][j] = '0' + val;
+        }
+    }
+
+    rc = xc_cpuid_do_domctl(xc, domid, input, regs);
+    if ( rc == 0 )
+        return 0;
+
+ fail:
+    for ( i = 0; i < 4; i++ )
+    {
+        free(config_transformed[i]);
+        config_transformed[i] = NULL;
+    }
+    return rc;
+}
diff -r 4e6577dec729 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Apr 24 10:14:43 2008 +0100
+++ b/tools/libxc/xenctrl.h     Thu Apr 24 18:05:55 2008 +0100
@@ -983,4 +983,20 @@ int xc_domain_set_target(int xc_handle,
                          uint32_t domid,
                          uint32_t target);
 
+#if defined(__i386__) || defined(__x86_64__)
+int xc_cpuid_check(int xc,
+                   const unsigned int *input,
+                   const char **config,
+                   char **config_transformed);
+int xc_cpuid_set(int xc,
+                 domid_t domid,
+                 const unsigned int *input,
+                 const char **config,
+                 char **config_transformed);
+int xc_cpuid_apply_policy(int xc,
+                          domid_t domid);
+void xc_cpuid_to_str(const unsigned int *regs,
+                     char **strs);
+#endif
+
 #endif /* XENCTRL_H */
diff -r 4e6577dec729 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Thu Apr 24 10:14:43 2008 +0100
+++ b/tools/python/xen/lowlevel/xc/xc.c Thu Apr 24 18:05:55 2008 +0100
@@ -610,6 +610,110 @@ static PyObject *pyxc_set_os_type(XcObje
     return zero;
 }
 #endif /* __ia64__ */
+
+
+#if defined(__i386__) || defined(__x86_64__)
+static void pyxc_dom_extract_cpuid(PyObject *config,
+                                  char **regs)
+{
+    const char *regs_extract[4] = { "eax", "ebx", "ecx", "edx" };
+    PyObject *obj;
+    int i;
+
+    memset(regs, 0, 4*sizeof(*regs));
+
+    if ( !PyDict_Check(config) )
+        return;
+
+    for ( i = 0; i < 4; i++ )
+        if ( (obj = PyDict_GetItemString(config, regs_extract[i])) != NULL )
+            regs[i] = PyString_AS_STRING(obj);
+}
+
+static PyObject *pyxc_create_cpuid_dict(char **regs)
+{
+   const char *regs_extract[4] = { "eax", "ebx", "ecx", "edx" };
+   PyObject *dict;
+   int i;
+
+   dict = PyDict_New();
+   for ( i = 0; i < 4; i++ )
+   {
+       if ( regs[i] == NULL )
+           continue;
+       PyDict_SetItemString(dict, regs_extract[i],
+                            PyString_FromString(regs[i]));
+       free(regs[i]);
+       regs[i] = NULL;
+   }
+   return dict;
+}
+
+static PyObject *pyxc_dom_check_cpuid(XcObject *self,
+                                      PyObject *args)
+{
+    PyObject *sub_input, *config;
+    unsigned int input[2];
+    char *regs[4], *regs_transform[4];
+
+    if ( !PyArg_ParseTuple(args, "iOO", &input[0], &sub_input, &config) )
+        return NULL;
+
+    pyxc_dom_extract_cpuid(config, regs);
+
+    input[1] = XEN_CPUID_INPUT_UNUSED;
+    if ( PyLong_Check(sub_input) )
+        input[1] = PyLong_AsUnsignedLong(sub_input);
+
+    if ( xc_cpuid_check(self->xc_handle, input,
+                        (const char **)regs, regs_transform) )
+        return pyxc_error_to_exception();
+
+    return pyxc_create_cpuid_dict(regs_transform);
+}
+
+static PyObject *pyxc_dom_set_policy_cpuid(XcObject *self,
+                                           PyObject *args)
+{
+    domid_t domid;
+
+    if ( !PyArg_ParseTuple(args, "i", &domid) )
+        return NULL;
+
+    if ( xc_cpuid_apply_policy(self->xc_handle, domid) )
+        return pyxc_error_to_exception();
+
+    Py_INCREF(zero);
+    return zero;
+}
+
+
+static PyObject *pyxc_dom_set_cpuid(XcObject *self,
+                                    PyObject *args)
+{
+    domid_t domid;
+    PyObject *sub_input, *config;
+    unsigned int input[2];
+    char *regs[4], *regs_transform[4];
+
+    if ( !PyArg_ParseTuple(args, "iiOO", &domid,
+                           &input[0], &sub_input, &config) )
+        return NULL;
+
+    pyxc_dom_extract_cpuid(config, regs);
+
+    input[1] = XEN_CPUID_INPUT_UNUSED;
+    if ( PyLong_Check(sub_input) )
+        input[1] = PyLong_AsUnsignedLong(sub_input);
+
+    if ( xc_cpuid_set(self->xc_handle, domid, input, (const char **)regs,
+                      regs_transform) )
+        return pyxc_error_to_exception();
+
+    return pyxc_create_cpuid_dict(regs_transform);
+}
+
+#endif /* __i386__ || __x86_64__ */
 
 static PyObject *pyxc_hvm_build(XcObject *self,
                                 PyObject *args,
@@ -1635,6 +1739,37 @@ static PyMethodDef pyxc_methods[] = {
       " log [int]: Specifies the area's size.\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 #endif /* __powerpc */
+  
+#if defined(__i386__) || defined(__x86_64__)
+    { "domain_check_cpuid", 
+      (PyCFunction)pyxc_dom_check_cpuid, 
+      METH_VARARGS, "\n"
+      "Apply checks to host CPUID.\n"
+      " input [long]: Input for cpuid instruction (eax)\n"
+      " sub_input [long]: Second input (optional, may be None) for cpuid "
+      "                     instruction (ecx)\n"
+      " config [dict]: Dictionary of register\n"
+      " config [dict]: Dictionary of register, use for checking\n\n"
+      "Returns: [int] 0 on success; exception on error.\n" },
+    
+    { "domain_set_cpuid", 
+      (PyCFunction)pyxc_dom_set_cpuid, 
+      METH_VARARGS, "\n"
+      "Set cpuid response for an input and a domain.\n"
+      " dom [int]: Identifier of domain.\n"
+      " input [long]: Input for cpuid instruction (eax)\n"
+      " sub_input [long]: Second input (optional, may be None) for cpuid "
+      "                     instruction (ecx)\n"
+      " config [dict]: Dictionary of register\n\n"
+      "Returns: [int] 0 on success; exception on error.\n" },
+
+    { "domain_set_policy_cpuid", 
+      (PyCFunction)pyxc_dom_set_policy_cpuid, 
+      METH_VARARGS, "\n"
+      "Set the default cpuid policy for a domain.\n"
+      " dom [int]: Identifier of domain.\n\n"
+      "Returns: [int] 0 on success; exception on error.\n" },
+#endif
 
     { NULL, NULL, 0, NULL }
 };
diff -r 4e6577dec729 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Thu Apr 24 10:14:43 2008 +0100
+++ b/tools/python/xen/xend/XendCheckpoint.py   Thu Apr 24 18:05:55 2008 +0100
@@ -309,6 +309,7 @@ def restore(xd, fd, dominfo = None, paus
                 else:
                     break
             os.close(qemu_fd)
+            restore_image.setCpuid()
 
 
         os.read(fd, 1)           # Wait for source to close connection
diff -r 4e6577dec729 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Thu Apr 24 10:14:43 2008 +0100
+++ b/tools/python/xen/xend/XendConfig.py       Thu Apr 24 18:05:55 2008 +0100
@@ -203,6 +203,8 @@ XENAPI_CFG_TYPES = {
     'target': int,
     'security_label': str,
     'pci': str,
+    'cpuid' : dict,
+    'cpuid_check' : dict,
 }
 
 # List of legacy configuration keys that have no equivalent in the
@@ -497,6 +499,32 @@ class XendConfig(dict):
         if 'handle' in dominfo:
             self['uuid'] = uuid.toString(dominfo['handle'])
             
+    def parse_cpuid(self, cfg, field):
+       def int2bin(n, count=32):
+           return "".join([str((n >> y) & 1) for y in range(count-1, -1, -1)])
+
+       for input, regs in cfg[field].iteritems():
+           if not regs is dict:
+               cfg[field][input] = dict(regs)
+
+       cpuid = {}
+       for input in cfg[field]:
+           inputs = input.split(',')
+           if inputs[0][0:2] == '0x':
+               inputs[0] = str(int(inputs[0], 16))
+           if len(inputs) == 2:
+               if inputs[1][0:2] == '0x':
+                   inputs[1] = str(int(inputs[1], 16))
+           new_input = ','.join(inputs)
+           cpuid[new_input] = {} # new input
+           for reg in cfg[field][input]:
+               val = cfg[field][input][reg]
+               if val[0:2] == '0x':
+                   cpuid[new_input][reg] = int2bin(int(val, 16))
+               else:
+                   cpuid[new_input][reg] = val
+       cfg[field] = cpuid
+
     def _parse_sxp(self, sxp_cfg):
         """ Populate this XendConfig using the parsed SXP.
 
@@ -653,6 +681,12 @@ class XendConfig(dict):
                 except ValueError, e:
                     raise XendConfigError('cpus = %s: %s' % (cfg['cpus'], e))
 
+        # Parse cpuid
+        if 'cpuid' in cfg:
+            self.parse_cpuid(cfg, 'cpuid')
+        if 'cpuid_check' in cfg:
+            self.parse_cpuid(cfg, 'cpuid_check')
+
         import xen.util.xsm.xsm as security
         if security.on():
             from xen.util.acmpolicy import ACM_LABEL_UNLABELED
@@ -901,6 +935,16 @@ class XendConfig(dict):
             int(self['vcpus_params'].get('weight', 256))
         self['vcpus_params']['cap'] = int(self['vcpus_params'].get('cap', 0))
 
+    def cpuid_to_sxp(self, sxpr, field):
+        regs_list = []
+        for input, regs in self[field].iteritems():
+            reg_list = []
+            for reg, val in regs.iteritems():
+                reg_list.append([reg, val])
+            regs_list.append([input, reg_list])
+        sxpr.append([field, regs_list])
+
+
     def to_sxp(self, domain = None, ignore_devices = False, ignore = [],
                legacy_only = True):
         """ Get SXP representation of this config object.
@@ -1011,6 +1055,13 @@ class XendConfig(dict):
             except:
                 txn.abort()
                 raise
+
+        if 'cpuid' in self:
+            self.cpuid_to_sxp(sxpr, 'cpuid')
+        if 'cpuid_check' in self:
+            self.cpuid_to_sxp(sxpr, 'cpuid_check')
+
+        log.debug(sxpr)
 
         return sxpr    
     
diff -r 4e6577dec729 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Thu Apr 24 10:14:43 2008 +0100
+++ b/tools/python/xen/xend/image.py    Thu Apr 24 18:05:55 2008 +0100
@@ -551,6 +551,38 @@ class HVMImageHandler(ImageHandler):
         self.acpi = int(vmConfig['platform'].get('acpi', 0))
         self.guest_os_type = vmConfig['platform'].get('guest_os_type')
 
+        self.vmConfig = vmConfig
+           
+    def setCpuid(self):
+        xc.domain_set_policy_cpuid(self.vm.getDomid())
+
+        if 'cpuid' in self.vmConfig:
+            cpuid = self.vmConfig['cpuid']
+            transformed = {}
+            for sinput, regs in cpuid.iteritems():
+                inputs = sinput.split(',')
+                input = long(inputs[0])
+                sub_input = None
+                if len(inputs) == 2:
+                    sub_input = long(inputs[1])
+                t = xc.domain_set_cpuid(self.vm.getDomid(),
+                                        input, sub_input, regs)
+                transformed[sinput] = t
+            self.vmConfig['cpuid'] = transformed
+
+        if 'cpuid_check' in self.vmConfig:
+            cpuid_check = self.vmConfig['cpuid_check']
+            transformed = {}
+            for sinput, regs_check in cpuid_check.iteritems():
+                inputs = sinput.split(',')
+                input = long(inputs[0])
+                sub_input = None
+                if len(inputs) == 2:
+                    sub_input = long(inputs[1])
+                t = xc.domain_check_cpuid(input, sub_input, regs_check)
+                transformed[sinput] = t
+            self.vmConfig['cpuid_check'] = transformed
+
     # Return a list of cmd line args to the device models based on the
     # xm config file
     def parseDeviceModelArgs(self, vmConfig):
@@ -718,6 +750,7 @@ class X86_HVM_ImageHandler(HVMImageHandl
 
     def buildDomain(self):
         xc.hvm_set_param(self.vm.getDomid(), HVM_PARAM_PAE_ENABLED, self.pae)
+        self.setCpuid()
         return HVMImageHandler.buildDomain(self)
 
     def getRequiredAvailableMemory(self, mem_kb):
diff -r 4e6577dec729 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Thu Apr 24 10:14:43 2008 +0100
+++ b/tools/python/xen/xm/create.py     Thu Apr 24 18:05:55 2008 +0100
@@ -549,6 +549,14 @@ gopts.var('hap', val='HAP',
           use="""Hap status (0=hap is disabled;
           1=hap is enabled.""")
 
+gopts.var('cpuid', val="IN[,SIN]:eax=EAX,ebx=EBX,exc=ECX,edx=EDX",
+          fn=append_value, default=[],
+          use="""Cpuid description.""")
+
+gopts.var('cpuid_check', val="IN[,SIN]:eax=EAX,ebx=EBX,exc=ECX,edx=EDX",
+          fn=append_value, default=[],
+          use="""Cpuid check description.""")
+
 def err(msg):
     """Print an error to stderr and exit.
     """
@@ -755,7 +763,7 @@ def configure_hvm(config_image, vals):
              'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
              'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor',
              'acpi', 'apic', 'usb', 'usbdevice', 'keymap', 'pci', 'hpet',
-             'guest_os_type', 'hap', 'opengl']
+             'guest_os_type', 'hap', 'opengl', 'cpuid', 'cpuid_check']
 
     for a in args:
         if a in vals.__dict__ and vals.__dict__[a] is not None:
@@ -779,7 +787,8 @@ def make_config(vals):
     map(add_conf, ['name', 'memory', 'maxmem', 'shadow_memory',
                    'restart', 'on_poweroff',
                    'on_reboot', 'on_crash', 'vcpus', 'vcpu_avail', 'features',
-                   'on_xend_start', 'on_xend_stop', 'target'])
+                   'on_xend_start', 'on_xend_stop', 'target', 'cpuid',
+                   'cpuid_check'])
 
     if vals.uuid is not None:
         config.append(['uuid', vals.uuid])
@@ -842,6 +851,24 @@ def preprocess_disk(vals):
             err('Invalid disk specifier: ' + v)
         disk.append(d)
     vals.disk = disk
+
+def preprocess_cpuid(vals, attr_name):
+    if not vals.cpuid: return
+    cpuid = {} 
+    for cpuid_input in getattr(vals, attr_name):
+        cpuid_match = re.match(r"(?P<input>(0x)?[0-9A-Fa-f]+):(?P<regs>.*)", 
cpuid_input)
+        if cpuid_match != None:
+            res_cpuid = cpuid_match.groupdict()
+            input = res_cpuid['input']
+            regs = res_cpuid['regs'].split(',')
+            cpuid[input]= {} # New input
+            for reg in regs:
+                reg_match = re.match(r"(?P<reg>eax|ebx|ecx|edx)=(?P<val>.*)", 
reg)
+                if reg_match == None:
+                    err("cpuid's syntax is (eax|ebx|ecx|edx)=value")
+                res = reg_match.groupdict()
+                cpuid[input][res['reg']] = res['val'] # new register
+    setattr(vals, attr_name, cpuid)
 
 def preprocess_pci(vals):
     if not vals.pci: return
@@ -1047,6 +1074,8 @@ def preprocess(vals):
     preprocess_vnc(vals)
     preprocess_vtpm(vals)
     preprocess_access_control(vals)
+    preprocess_cpuid(vals, 'cpuid')
+    preprocess_cpuid(vals, 'cpuid_check')
 
 
 def comma_sep_kv_to_dict(c):
diff -r 4e6577dec729 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu Apr 24 10:14:43 2008 +0100
+++ b/xen/arch/x86/domain.c     Thu Apr 24 18:05:55 2008 +0100
@@ -440,10 +440,9 @@ int arch_domain_create(struct domain *d,
 {
 #ifdef __x86_64__
     struct page_info *pg;
-    int i;
 #endif
     l1_pgentry_t gdt_l1e;
-    int vcpuid, pdpt_order, paging_initialised = 0;
+    int i, vcpuid, pdpt_order, paging_initialised = 0;
     int rc = -ENOMEM;
 
     d->arch.hvm_domain.hap_enabled =
@@ -539,6 +538,13 @@ int arch_domain_create(struct domain *d,
         /* 32-bit PV guest by default only if Xen is not 64-bit. */
         d->arch.is_32bit_pv = d->arch.has_32bit_shinfo =
             (CONFIG_PAGING_LEVELS != 4);
+    }
+
+    memset(d->arch.cpuids, 0, sizeof(d->arch.cpuids));
+    for ( i = 0; i < MAX_CPUID_INPUT; i++ )
+    {
+        d->arch.cpuids[i].input[0] = XEN_CPUID_INPUT_UNUSED;
+        d->arch.cpuids[i].input[1] = XEN_CPUID_INPUT_UNUSED;
     }
 
     return 0;
@@ -1910,6 +1916,37 @@ void arch_dump_vcpu_info(struct vcpu *v)
     paging_dump_vcpu_info(v);
 }
 
+void domain_cpuid(
+    struct domain *d,
+    unsigned int  input,
+    unsigned int  sub_input,
+    unsigned int  *eax,
+    unsigned int  *ebx,
+    unsigned int  *ecx,
+    unsigned int  *edx)
+{
+    cpuid_input_t *cpuid;
+    int i;
+
+    for ( i = 0; i < MAX_CPUID_INPUT; i++ )
+    {
+        cpuid = &d->arch.cpuids[i];
+
+        if ( (cpuid->input[0] == input) &&
+             ((cpuid->input[1] == XEN_CPUID_INPUT_UNUSED) ||
+              (cpuid->input[1] == sub_input)) )
+        {
+            *eax = cpuid->eax;
+            *ebx = cpuid->ebx;
+            *ecx = cpuid->ecx;
+            *edx = cpuid->edx;
+            return;
+        }
+    }
+
+    *eax = *ebx = *ecx = *edx = 0;
+}
+
 /*
  * Local variables:
  * mode: C
diff -r 4e6577dec729 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Thu Apr 24 10:14:43 2008 +0100
+++ b/xen/arch/x86/domctl.c     Thu Apr 24 18:05:55 2008 +0100
@@ -842,6 +842,45 @@ long arch_do_domctl(
     }
     break;
 
+    case XEN_DOMCTL_set_cpuid:
+    {
+        struct domain *d;
+        xen_domctl_cpuid_t *ctl = &domctl->u.cpuid;
+        cpuid_input_t *cpuid = NULL; 
+        int i;
+
+        ret = -ESRCH;
+        d = rcu_lock_domain_by_id(domctl->domain);
+        if ( d == NULL )
+            break;
+
+        for ( i = 0; i < MAX_CPUID_INPUT; i++ )
+        {
+            cpuid = &d->arch.cpuids[i];
+
+            if ( cpuid->input[0] == XEN_CPUID_INPUT_UNUSED )
+                break;
+
+            if ( (cpuid->input[0] == ctl->input[0]) &&
+                 ((cpuid->input[1] == XEN_CPUID_INPUT_UNUSED) ||
+                  (cpuid->input[1] == ctl->input[1])) )
+                break;
+        }
+        
+        if ( i == MAX_CPUID_INPUT )
+        {
+            ret = -ENOENT;
+        }
+        else
+        {
+            memcpy(cpuid, ctl, sizeof(cpuid_input_t));
+            ret = 0;
+        }
+
+        rcu_unlock_domain(d);
+    }
+    break;
+
     default:
         ret = -ENOSYS;
         break;
diff -r 4e6577dec729 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Thu Apr 24 10:14:43 2008 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Thu Apr 24 18:05:55 2008 +0100
@@ -1594,66 +1594,15 @@ void hvm_cpuid(unsigned int input, unsig
     if ( cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx) )
         return;
 
-    cpuid(input, eax, ebx, ecx, edx);
+    domain_cpuid(v->domain, input, *ecx, eax, ebx, ecx, edx);
 
-    switch ( input )
+    if ( input == 0x00000001 )
     {
-    case 0x00000001:
-        /* Clear #threads count and poke initial VLAPIC ID. */
-        *ebx &= 0x0000FFFFu;
-        *ebx |= (current->vcpu_id * 2) << 24;
-
-        /* We always support MTRR MSRs. */
-        *edx |= bitmaskof(X86_FEATURE_MTRR);
-
-        *ecx &= (bitmaskof(X86_FEATURE_XMM3) |
-                 bitmaskof(X86_FEATURE_SSSE3) |
-                 bitmaskof(X86_FEATURE_CX16) |
-                 bitmaskof(X86_FEATURE_SSE4_1) |
-                 bitmaskof(X86_FEATURE_SSE4_2) |
-                 bitmaskof(X86_FEATURE_POPCNT));
-
-        *edx &= (bitmaskof(X86_FEATURE_FPU) |
-                 bitmaskof(X86_FEATURE_VME) |
-                 bitmaskof(X86_FEATURE_DE) |
-                 bitmaskof(X86_FEATURE_PSE) |
-                 bitmaskof(X86_FEATURE_TSC) |
-                 bitmaskof(X86_FEATURE_MSR) |
-                 bitmaskof(X86_FEATURE_PAE) |
-                 bitmaskof(X86_FEATURE_MCE) |
-                 bitmaskof(X86_FEATURE_CX8) |
-                 bitmaskof(X86_FEATURE_APIC) |
-                 bitmaskof(X86_FEATURE_SEP) |
-                 bitmaskof(X86_FEATURE_MTRR) |
-                 bitmaskof(X86_FEATURE_PGE) |
-                 bitmaskof(X86_FEATURE_MCA) |
-                 bitmaskof(X86_FEATURE_CMOV) |
-                 bitmaskof(X86_FEATURE_PAT) |
-                 bitmaskof(X86_FEATURE_CLFLSH) |
-                 bitmaskof(X86_FEATURE_MMX) |
-                 bitmaskof(X86_FEATURE_FXSR) |
-                 bitmaskof(X86_FEATURE_XMM) |
-                 bitmaskof(X86_FEATURE_XMM2));
+        /* Fix up VLAPIC details. */
+        *ebx &= 0x00FFFFFFu;
+        *ebx |= (v->vcpu_id * 2) << 24;
         if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
-            __clear_bit(X86_FEATURE_APIC & 31, edx);
-#if CONFIG_PAGING_LEVELS >= 3
-        if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
-#endif
-            __clear_bit(X86_FEATURE_PAE & 31, edx);
-        break;
-
-    case 0x80000001:
-#if CONFIG_PAGING_LEVELS >= 3
-        if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
-#endif
-            __clear_bit(X86_FEATURE_NX & 31, edx);
-#ifdef __i386__
-        /* Mask feature for Intel ia32e or AMD long mode. */
-        __clear_bit(X86_FEATURE_LAHF_LM & 31, ecx);
-        __clear_bit(X86_FEATURE_LM & 31, edx);
-        __clear_bit(X86_FEATURE_SYSCALL & 31, edx);
-#endif
-        break;
+            __clear_bit(X86_FEATURE_APIC & 31, ebx);
     }
 }
 
@@ -1663,10 +1612,14 @@ int hvm_msr_read_intercept(struct cpu_us
     uint64_t msr_content = 0;
     struct vcpu *v = current;
     uint64_t *var_range_base, *fixed_range_base;
-    int index;
+    int index, mtrr;
+    uint32_t cpuid[4];
 
     var_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.var_ranges;
     fixed_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.fixed_ranges;
+
+    hvm_cpuid(1, &cpuid[0], &cpuid[1], &cpuid[2], &cpuid[3]);
+    mtrr = !!(cpuid[3] & bitmaskof(X86_FEATURE_MTRR));
 
     switch ( ecx )
     {
@@ -1695,25 +1648,37 @@ int hvm_msr_read_intercept(struct cpu_us
         break;
 
     case MSR_MTRRcap:
+        if ( !mtrr )
+            goto gp_fault;
         msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
         break;
     case MSR_MTRRdefType:
+        if ( !mtrr )
+            goto gp_fault;
         msr_content = v->arch.hvm_vcpu.mtrr.def_type
                         | (v->arch.hvm_vcpu.mtrr.enabled << 10);
         break;
     case MSR_MTRRfix64K_00000:
+        if ( !mtrr )
+            goto gp_fault;
         msr_content = fixed_range_base[0];
         break;
     case MSR_MTRRfix16K_80000:
     case MSR_MTRRfix16K_A0000:
+        if ( !mtrr )
+            goto gp_fault;
         index = regs->ecx - MSR_MTRRfix16K_80000;
         msr_content = fixed_range_base[index + 1];
         break;
     case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
+        if ( !mtrr )
+            goto gp_fault;
         index = regs->ecx - MSR_MTRRfix4K_C0000;
         msr_content = fixed_range_base[index + 3];
         break;
     case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
+        if ( !mtrr )
+            goto gp_fault;
         index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
         msr_content = var_range_base[index];
         break;
@@ -1725,6 +1690,10 @@ int hvm_msr_read_intercept(struct cpu_us
     regs->eax = (uint32_t)msr_content;
     regs->edx = (uint32_t)(msr_content >> 32);
     return X86EMUL_OKAY;
+
+gp_fault:
+    hvm_inject_exception(TRAP_gp_fault, 0, 0);
+    return X86EMUL_EXCEPTION;
 }
 
 int hvm_msr_write_intercept(struct cpu_user_regs *regs)
@@ -1739,7 +1708,11 @@ int hvm_msr_write_intercept(struct cpu_u
     uint32_t ecx = regs->ecx;
     uint64_t msr_content = (uint32_t)regs->eax | ((uint64_t)regs->edx << 32);
     struct vcpu *v = current;
-    int index;
+    int index, mtrr;
+    uint32_t cpuid[4];
+
+    hvm_cpuid(1, &cpuid[0], &cpuid[1], &cpuid[2], &cpuid[3]);
+    mtrr = !!(cpuid[3] & bitmaskof(X86_FEATURE_MTRR));
 
     switch ( ecx )
     {
@@ -1758,29 +1731,41 @@ int hvm_msr_write_intercept(struct cpu_u
         break;
 
     case MSR_MTRRcap:
+        if ( !mtrr )
+            goto gp_fault;
         goto gp_fault;
     case MSR_MTRRdefType:
+        if ( !mtrr )
+            goto gp_fault;
         if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
            goto gp_fault;
         break;
     case MSR_MTRRfix64K_00000:
+        if ( !mtrr )
+            goto gp_fault;
         if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
             goto gp_fault;
         break;
     case MSR_MTRRfix16K_80000:
     case MSR_MTRRfix16K_A0000:
+        if ( !mtrr )
+            goto gp_fault;
         index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
         if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
                                      index, msr_content) )
             goto gp_fault;
         break;
     case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
+        if ( !mtrr )
+            goto gp_fault;
         index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
         if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
                                      index, msr_content) )
             goto gp_fault;
         break;
     case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
+        if ( !mtrr )
+            goto gp_fault;
         if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
                                      regs->ecx, msr_content) )
             goto gp_fault;
diff -r 4e6577dec729 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Thu Apr 24 10:14:43 2008 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c        Thu Apr 24 18:05:55 2008 +0100
@@ -892,56 +892,11 @@ static void svm_cpuid_intercept(
 
     hvm_cpuid(input, eax, ebx, ecx, edx);
 
-    switch ( input )
+    if ( input == 0x80000001 )
     {
-    case 0x00000001:
-        /* Mask Intel-only features. */
-        *ecx &= ~(bitmaskof(X86_FEATURE_SSSE3) |
-                  bitmaskof(X86_FEATURE_SSE4_1) |
-                  bitmaskof(X86_FEATURE_SSE4_2));
-        break;
-
-    case 0x80000001:
-        /* Filter features which are shared with 0x00000001:EDX. */
+        /* Fix up VLAPIC details. */
         if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
             __clear_bit(X86_FEATURE_APIC & 31, edx);
-#if CONFIG_PAGING_LEVELS >= 3
-        if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
-#endif
-            __clear_bit(X86_FEATURE_PAE & 31, edx);
-        __clear_bit(X86_FEATURE_PSE36 & 31, edx);
-
-        /* We always support MTRR MSRs. */
-        *edx |= bitmaskof(X86_FEATURE_MTRR);
-
-        /* Filter all other features according to a whitelist. */
-        *ecx &= (bitmaskof(X86_FEATURE_LAHF_LM) |
-                 bitmaskof(X86_FEATURE_ALTMOVCR) |
-                 bitmaskof(X86_FEATURE_ABM) |
-                 bitmaskof(X86_FEATURE_SSE4A) |
-                 bitmaskof(X86_FEATURE_MISALIGNSSE) |
-                 bitmaskof(X86_FEATURE_3DNOWPF));
-        *edx &= (0x0183f3ff | /* features shared with 0x00000001:EDX */
-                 bitmaskof(X86_FEATURE_NX) |
-                 bitmaskof(X86_FEATURE_LM) |
-                 bitmaskof(X86_FEATURE_SYSCALL) |
-                 bitmaskof(X86_FEATURE_MP) |
-                 bitmaskof(X86_FEATURE_MMXEXT) |
-                 bitmaskof(X86_FEATURE_FFXSR) |
-                 bitmaskof(X86_FEATURE_3DNOW) |
-                 bitmaskof(X86_FEATURE_3DNOWEXT));
-        break;
-
-    case 0x80000007:
-    case 0x8000000A:
-        /* Mask out features of power management and SVM extension. */
-        *eax = *ebx = *ecx = *edx = 0;
-        break;
-
-    case 0x80000008:
-        /* Make sure Number of CPU core is 1 when HTT=0 */
-        *ecx &= 0xFFFFFF00;
-        break;
     }
 
     HVMTRACE_3D(CPUID, v, input,
diff -r 4e6577dec729 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu Apr 24 10:14:43 2008 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Thu Apr 24 18:05:55 2008 +0100
@@ -1311,34 +1311,21 @@ static void vmx_cpuid_intercept(
     unsigned int *ecx, unsigned int *edx)
 {
     unsigned int input = *eax;
-    unsigned int count = *ecx;
+    struct segment_register cs;
+    struct vcpu *v = current;
 
     hvm_cpuid(input, eax, ebx, ecx, edx);
 
     switch ( input )
     {
-    case 0x00000001:
-        /* Mask AMD-only features. */
-        *ecx &= ~(bitmaskof(X86_FEATURE_POPCNT));
-        break;
-
-    case 0x00000004:
-        cpuid_count(input, count, eax, ebx, ecx, edx);
-        *eax &= 0x3FFF; /* one core */
-        break;
-
-    case 0x00000006:
-    case 0x00000009:
-        *eax = *ebx = *ecx = *edx = 0;
-        break;
-
-    case 0x80000001:
-        /* Only a few features are advertised in Intel's 0x80000001. */
-        *ecx &= (bitmaskof(X86_FEATURE_LAHF_LM));
-        *edx &= (bitmaskof(X86_FEATURE_NX) |
-                 bitmaskof(X86_FEATURE_LM) |
-                 bitmaskof(X86_FEATURE_SYSCALL));
-        break;
+        case 0x80000001:
+            /* SYSCALL is visible iff running in long mode. */
+            hvm_get_segment_register(v, x86_seg_cs, &cs);
+            if ( cs.attr.fields.l )
+                *edx |= bitmaskof(X86_FEATURE_SYSCALL);
+            else
+                *edx &= ~(bitmaskof(X86_FEATURE_SYSCALL));
+            break;
     }
 
     HVMTRACE_3D(CPUID, current, input,
diff -r 4e6577dec729 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Thu Apr 24 10:14:43 2008 +0100
+++ b/xen/include/asm-x86/domain.h      Thu Apr 24 18:05:55 2008 +0100
@@ -187,6 +187,9 @@ struct paging_vcpu {
     struct shadow_vcpu shadow;
 };
 
+#define MAX_CPUID_INPUT 40
+typedef xen_domctl_cpuid_t cpuid_input_t;
+
 struct p2m_domain;
 
 struct arch_domain
@@ -243,6 +246,8 @@ struct arch_domain
         RELMEM_done,
     } relmem;
     struct list_head relmem_list;
+
+    cpuid_input_t cpuids[MAX_CPUID_INPUT];
 } __cacheline_aligned;
 
 #ifdef CONFIG_X86_PAE
@@ -353,6 +358,14 @@ unsigned long pv_guest_cr4_fixup(unsigne
 #define real_cr4_to_pv_guest_cr4(c) \
     ((c) & ~(X86_CR4_PGE | X86_CR4_PSE))
 
+void domain_cpuid(struct domain *d,
+                  unsigned int  input,
+                  unsigned int  sub_input,
+                  unsigned int  *eax,
+                  unsigned int  *ebx,
+                  unsigned int  *ecx,
+                  unsigned int  *edx);
+
 #endif /* __ASM_DOMAIN_H__ */
 
 /*
diff -r 4e6577dec729 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Thu Apr 24 10:14:43 2008 +0100
+++ b/xen/include/public/domctl.h       Thu Apr 24 18:05:55 2008 +0100
@@ -571,6 +571,19 @@ typedef struct xen_domctl_set_target xen
 typedef struct xen_domctl_set_target xen_domctl_set_target_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_target_t);
 
+#if defined(__i386__) || defined(__x86_64__)
+# define XEN_CPUID_INPUT_UNUSED  0xFFFFFFFF
+# define XEN_DOMCTL_set_cpuid 49
+struct xen_domctl_cpuid {
+  unsigned int  input[2];
+  unsigned int  eax;
+  unsigned int  ebx;
+  unsigned int  ecx;
+  unsigned int  edx;
+};
+typedef struct xen_domctl_cpuid xen_domctl_cpuid_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t);
+#endif
 
 struct xen_domctl {
     uint32_t cmd;
@@ -609,6 +622,9 @@ struct xen_domctl {
         struct xen_domctl_ext_vcpucontext   ext_vcpucontext;
         struct xen_domctl_set_opt_feature   set_opt_feature;
         struct xen_domctl_set_target        set_target;
+#if defined(__i386__) || defined(__x86_64__)
+        struct xen_domctl_cpuid             cpuid;
+#endif
         uint8_t                             pad[128];
     } u;
 };
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.