[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg



# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1218633741 -32400
# Node ID da236d7f59b963585800e7471f8a0451b83ae569
# Parent  fa8be8a6cb74976d5a96f830a9a2238abf622822
# Parent  c6402709acc8122e3f8f92a885750afb4061ac61
merge with xen-unstable.hg
---
 .hgtags                             |    1 
 docs/misc/kexec_and_kdump.txt       |  213 ++++++++++++++++++++++++++++++++++++
 extras/mini-os/include/lwipopts.h   |    1 
 tools/Makefile                      |    7 -
 tools/cross-install                 |    8 +
 tools/ioemu/hw/pass-through.h       |    1 
 tools/ioemu/hw/pt-msi.c             |   24 +---
 tools/libxc/xc_physdev.c            |   10 -
 tools/libxc/xenctrl.h               |    2 
 tools/misc/xend                     |   16 +-
 tools/python/xen/xend/XendAPI.py    |    3 
 tools/python/xen/xend/XendConfig.py |    2 
 tools/python/xen/xend/XendPIF.py    |   20 +++
 xen/Makefile                        |    2 
 xen/arch/x86/cpu/mcheck/mce.h       |    2 
 xen/arch/x86/mm/shadow/common.c     |   40 +++++-
 xen/arch/x86/mm/shadow/multi.c      |    7 -
 xen/arch/x86/mm/shadow/private.h    |    9 -
 xen/arch/x86/msi.c                  |   82 +++++--------
 xen/arch/x86/oprofile/nmi_int.c     |   40 ++++--
 xen/arch/x86/physdev.c              |   15 +-
 xen/common/page_alloc.c             |   13 ++
 xen/drivers/passthrough/io.c        |    3 
 xen/drivers/passthrough/vtd/iommu.c |    3 
 xen/include/asm-x86/event.h         |    7 -
 xen/include/asm-x86/msi.h           |   10 +
 xen/include/public/physdev.h        |   11 +
 27 files changed, 417 insertions(+), 135 deletions(-)

diff -r fa8be8a6cb74 -r da236d7f59b9 .hgtags
--- a/.hgtags   Wed Aug 13 13:18:06 2008 +0900
+++ b/.hgtags   Wed Aug 13 22:22:21 2008 +0900
@@ -28,3 +28,4 @@ c3494402098e26507fc61a6579832c0149351d6a
 c3494402098e26507fc61a6579832c0149351d6a 3.3.0-rc1
 dde12ff94c96331668fe38a7b09506fa94d03c34 3.3.0-rc2
 57fca3648f25dcc085ee380954342960a7979987 3.3.0-rc3
+96d0a48e87ee46ba7b73e8c906a7e2e0baf60e2e 3.3.0-rc4
diff -r fa8be8a6cb74 -r da236d7f59b9 docs/misc/kexec_and_kdump.txt
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/docs/misc/kexec_and_kdump.txt     Wed Aug 13 22:22:21 2008 +0900
@@ -0,0 +1,213 @@
+
+=======================
+Kexec and Kdump for Xen
+=======================
+
+This is a breif guide to using Kexec and Kdump in conjunction with Xen.
+This functionaly works at the level of the hypervisor and dom0 kernel.
+And will thus affect all guests running on a machine.
+
+At this stage it does not work in conjunction with domU kernels.
+
+This document should be read in conjunction with
+Documentation/kdump/kdump.txt from the Linux kernel source.
+Some of the information in this document has been
+sourced from that document.
+
+
+Kexec
+=====
+
+It is possible to kexec from Xen or Linux to either Xen or Linux.
+
+Pattern        | Before Kexec       | After Kexec
+---------------+--------------------+--------------------
+Xen -> Xen     | first hypervisor & | second hypervisor &
+               | dom0 kernel        | dom0 kernel
+---------------+--------------------+--------------------
+Xen   -> Linux | first hypervisor & | second kernel
+               | dom0 kernel        |
+---------------+--------------------+--------------------
+Linux -> Xen   | first kernel       | second hypervisor &
+               |                    | dom0 kernel
+---------------+--------------------+--------------------
+Linux -> Linux | first kernel       | second kernel
+
+If you are kexecing to Xen then you will also need to preapare the second
+hypervisor and dom0 kernel that will run after kexec. These may be the same
+as the first hypervisor and dom0 kernel that are used before kexec if you
+are kexecing from Xen to Xen.
+
+If you are kexecing to Linux then you will need to prepare the second Linux
+kernel that will run after kexec. In the case that you are kexecing from
+Linux, it may be the same as the first kernel image that that runs before
+kexec.
+
+Regardless of which kexec pattern you wish to run, you will
+need to have kexec-tools installed. This provides the kexec command.
+
+1. Load
+-------
+
+Before kexecing the second kernel or hypervisor & dom0 kernel
+need to be loaded into the running hypervisor or kernel using
+the kexec command.
+
+  a. To kexec to Xen (Xen->Xen or Linux->Xen)
+
+  kexec -l --append="XEN_ARGS -- DOM0_ARGS" \
+       --vmm="XEN_IMAGE" "DOM0_IMAGE" KEXEC_ARGS
+
+  where:
+    XEN_ARGS: command line arguments to the xen hypervisor
+              On x86 the no-real-mode argument should be included
+    DOM0_ARGS: command line arguments to the dom0 kernel
+    XEN_IMAGE: xen hypervisor image
+    DOM0_IMAGE: dom0 kernel image
+    KEXEC_ARGS: additional kexec-tools command line arguments
+
+  e.g. kexec -l --append "no-real-mode" --vmm="/boot/xen.gz" /boot/vmlinuz.gz
+
+  OR
+
+  b. To kexec to Linux (Xen->Linux or Linux->Linux)
+
+  kexec -l LINUX_IMAGE --append "$LINUX_ARGS" KEXEC_ARGS
+
+  where:
+    LINUX_IMAGE: the second linux kernel image
+    LINUX_ARGS: command line arguments to the second linux kernel
+    KEXEC_ARGS: additional kexec-tools command line arguments
+
+  e.g. kexec -l /boot/second-vmlinuz.gz
+
+2. Execute
+----------
+
+Once the second kernel is loaded, it can be executed at any time.
+If you don't see the second kernel booting within a second or so,
+you are in trouble :(
+
+   kexec -e
+
+Kdump
+=====
+
+It is possible to kdump from Xen or Linux to a Linux crash kernel.
+It is not possible to use xen as a crash kernel.
+
+Pattern        | Before Kexec       | After Kexec
+---------------+--------------------+--------------------
+Xen -> Linux   | first hypervisor & | crash kernel
+               | dom0 kernel        |
+---------------+--------------------+--------------------
+Linux -> Linux | first kernel       | crash kernel
+
+Regardless of if you are kdumping from Xen or Linux you will need to
+prepare a linux crash kernel.  You will also need to have kexec-tools
+installed. This provides the kexec command.
+
+0. Set-Up The Crash Kernel Region
+---------------------------------
+
+In order to use kdump an area of memory has to be reserved at boot time.
+This is the area of memory that the crash kernel will use, thus allowing it
+to run without disrupting the memory used by the first kernel. This area is
+called the crash kernel region and is reserved using the crashkernel
+command line parameter to the Xen hypervisor. It has two forms:
+
+  i) crashkernel=size
+
+     This is the simplest and recommended way to reserve the crash kernel
+     region. Just specify how large the region should be and the hypervisor
+     will find a good location for it. A good size to start with is 128Mb
+
+     e.g.
+
+     crashkernel=128M
+
+  ii) crashkernel=size@base
+
+      In this form the base address is provided in addition to
+      the size. Use this if auto-placement doesn't work for some reason.
+      It is strongly recommended that the base address be aligned
+      to 64Mb, else memory below the alignment point will not
+      be usable.
+
+      e.g. crashkernel=128M@256M
+
+   Regardless of which of the two forms of the crashkernel command line you
+   use, the crash kernel region should appear in /proc/iomem on x86 or
+   /proc/iomem_machine on ia64. If it doesn't then either the crashkernel
+   parameter is missing, or for some reason the region couldn't be placed -
+   for instance because it is too large.
+
+   # cat /proc/iomem
+   ...
+   00100000-07feffff : System RAM
+     00100000-00bfffff : Hypervisor code and data
+     0533f000-0733efff : Crash kernel
+   ...
+
+
+1. Load
+-------
+
+Once you are running in a kexec-enabled hypervisor and dom0,
+you can prepare to kdump by loading the crash kernel into the
+running kernel.
+
+  kexec -p CRASH_KERNEL_IMAGE --append "$CRASH_KERNEL_ARGS" KEXEC_ARGS
+
+  where:
+    CRASH_KERNEL_IMAGE: the crash kernel image
+    CRASH_KERNEL_ARGS: command line arguments to the crash kernel
+                      init 1 is strongly recommended
+                      irqpoll is strongly recommended
+                      maxcpus=1 is required if the crash kernel is SMP
+                      reset_devices is strongly recommended
+    KEXEC_ARGS: additional kexec-tools command line arguments
+                On x86 --args-linux should be supplied if an uncompressed
+               vmlinux image is used as the crash kernel
+
+  e.g. kexec -p /boot/crash-vmlinuz \
+        --append "init 1 irqpoll maxcpus=1 reset_devices" --args-linux
+
+On x86 systems the crash kernel may be either
+- A uncompressed vmlinux image if the kernel is not relocatable
+- A compressed bzImage or vmlinuz image if the kernel is relocatable
+- Relocatability is crontroled by the CONFIG_RELOCATABLE kernel
+  compile configuration parameter. This option may not be available
+  depending on the kernel version
+On ia64
+  Either a vmlinuz or vmlinux.gz image may be used
+
+
+2. Execute
+----------
+
+Once the second kernel is loaded, the crash kernel will be executed if the
+hypervisor panics. It will also be executed if dom0 panics or if dom0
+oopses and /proc/sys/kernel/panic_on_oops is set to a non-zero value
+
+echo 1 > /proc/sys/kernel/panic_on_oops
+
+Kdump may also be triggered (for testing)
+
+  a. From Domain 0
+
+  echo c > /proc/sysrq-trigger
+
+  b. From Xen
+
+     Enter the xen console
+
+     ctrl^a ctrl^a  (may be bound to a different key, this is the default)
+
+     Select C for "trigger a crashdump"
+
+     C
+
+If you don't see the crash kernel booting within a second or so,
+you are in trouble :(
+
diff -r fa8be8a6cb74 -r da236d7f59b9 extras/mini-os/include/lwipopts.h
--- a/extras/mini-os/include/lwipopts.h Wed Aug 13 13:18:06 2008 +0900
+++ b/extras/mini-os/include/lwipopts.h Wed Aug 13 22:22:21 2008 +0900
@@ -15,6 +15,7 @@
 #define LWIP_DHCP 1
 #define LWIP_COMPAT_SOCKETS 0
 #define LWIP_IGMP 1
+#define LWIP_USE_HEAP_FROM_INTERRUPT 1
 #define MEMP_NUM_SYS_TIMEOUT 10
 #define TCP_SND_BUF 3000
 #define TCP_MSS 1500
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/Makefile
--- a/tools/Makefile    Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/Makefile    Wed Aug 13 22:22:21 2008 +0900
@@ -38,8 +38,10 @@ endif
 
 # For the sake of linking, set the sys-root
 ifneq ($(CROSS_COMPILE),)
+CROSS_BIN_PATH ?= /usr/$(CROSS_COMPILE:-=)/bin
 CROSS_SYS_ROOT ?= /usr/$(CROSS_COMPILE:-=)/sys-root
-export CROSS_SYS_ROOT
+export CROSS_SYS_ROOT # exported for check/funcs.sh
+export CROSS_BIN_PATH # exported for cross-install.sh
 endif
 
 .PHONY: all
@@ -57,7 +59,8 @@ ifneq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_
 ifneq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
 IOEMU_CONFIGURE_CROSS ?= --cpu=$(XEN_TARGET_ARCH) \
                         --cross-prefix=$(CROSS_COMPILE) \
-                        --interp-prefix=$(CROSS_SYS_ROOT)
+                        --interp-prefix=$(CROSS_SYS_ROOT) \
+                        --install=$(CURDIR)/cross-install
 endif
 
 ioemu/config-host.mak:
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/cross-install
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cross-install       Wed Aug 13 22:22:21 2008 +0900
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+# prepend CROSS_BIN_PATH to find the right "strip"
+if [ -n "$CROSS_BIN_PATH" ]; then
+    PATH="$CROSS_BIN_PATH:$PATH"
+fi
+
+exec install "$@"
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/ioemu/hw/pass-through.h
--- a/tools/ioemu/hw/pass-through.h     Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/ioemu/hw/pass-through.h     Wed Aug 13 22:22:21 2008 +0900
@@ -120,6 +120,7 @@ struct pt_msix_info {
     int enabled;
     int total_entries;
     int bar_index;
+    uint64_t table_base;
     uint32_t table_off;
     uint64_t mmio_base_addr;
     int mmio_index;
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/ioemu/hw/pt-msi.c
--- a/tools/ioemu/hw/pt-msi.c   Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/ioemu/hw/pt-msi.c   Wed Aug 13 22:22:21 2008 +0900
@@ -38,8 +38,8 @@ int pt_msi_setup(struct pt_dev *dev)
     }
 
     if ( xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq,
-                                                       dev->pci_dev->dev << 3 
| dev->pci_dev->func,
-                                                       dev->pci_dev->bus, 0, 
1) )
+                                 dev->pci_dev->dev << 3 | dev->pci_dev->func,
+                                 dev->pci_dev->bus, 0, 0) )
     {
         PT_LOG("error map msi\n");
         return -1;
@@ -121,7 +121,8 @@ static int pt_msix_update_one(struct pt_
     {
         ret = xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq,
                                 dev->pci_dev->dev << 3 | dev->pci_dev->func,
-                                dev->pci_dev->bus, entry_nr, 0);
+                                dev->pci_dev->bus, entry_nr,
+                                dev->msix->table_base);
         if ( ret )
         {
             PT_LOG("error map msix entry %x\n", entry_nr);
@@ -183,7 +184,7 @@ static void pci_msix_writel(void *opaque
     entry = &msix->msix_entry[entry_nr];
     offset = ((addr - msix->mmio_base_addr) % 16) / 4;
 
-    if ( offset != 3 && msix->enabled && entry->io_mem[3] & 0x1 )
+    if ( offset != 3 && msix->enabled && !(entry->io_mem[3] & 0x1) )
     {
         PT_LOG("can not update msix entry %d since MSI-X is already \
                 function now.\n", entry_nr);
@@ -196,7 +197,7 @@ static void pci_msix_writel(void *opaque
 
     if ( offset == 3 )
     {
-        if ( !(val & 0x1) )
+        if ( msix->enabled && !(val & 0x1) )
             pt_msix_update_one(dev, entry_nr);
         mask_physical_msix_entry(dev, entry_nr, entry->io_mem[3] & 0x1);
     }
@@ -280,7 +281,6 @@ int pt_msix_init(struct pt_dev *dev, int
     uint8_t id;
     uint16_t control;
     int i, total_entries, table_off, bar_index;
-    uint64_t bar_base;
     struct pci_dev *pd = dev->pci_dev;
 
     id = pci_read_byte(pd, pos + PCI_CAP_LIST_ID);
@@ -314,18 +314,14 @@ int pt_msix_init(struct pt_dev *dev, int
     table_off = pci_read_long(pd, pos + PCI_MSIX_TABLE);
     bar_index = dev->msix->bar_index = table_off & PCI_MSIX_BIR;
     table_off &= table_off & ~PCI_MSIX_BIR;
-    bar_base = pci_read_long(pd, 0x10 + 4 * bar_index);
-    if ( (bar_base & 0x6) == 0x4 )
-    {
-        bar_base &= ~0xf;
-        bar_base += (uint64_t)pci_read_long(pd, 0x10 + 4 * (bar_index + 1)) << 
32;
-    }
-    PT_LOG("get MSI-X table bar base %lx\n", bar_base);
+    dev->msix->table_base = dev->pci_dev->base_addr[bar_index];
+    PT_LOG("get MSI-X table bar base %llx\n",
+           (unsigned long long)dev->msix->table_base);
 
     dev->msix->fd = open("/dev/mem", O_RDWR);
     dev->msix->phys_iomem_base = mmap(0, total_entries * 16,
                           PROT_WRITE | PROT_READ, MAP_SHARED | MAP_LOCKED,
-                          dev->msix->fd, bar_base + table_off);
+                          dev->msix->fd, dev->msix->table_base + table_off);
     PT_LOG("mapping physical MSI-X table to %lx\n",
            (unsigned long)dev->msix->phys_iomem_base);
     return 0;
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/libxc/xc_physdev.c
--- a/tools/libxc/xc_physdev.c  Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/libxc/xc_physdev.c  Wed Aug 13 22:22:21 2008 +0900
@@ -51,7 +51,7 @@ int xc_physdev_map_pirq_msi(int xc_handl
                             int devfn,
                             int bus,
                             int entry_nr,
-                            int msi_type)
+                            uint64_t table_base)
 {
     int rc;
     struct physdev_map_pirq map;
@@ -63,10 +63,10 @@ int xc_physdev_map_pirq_msi(int xc_handl
     map.type = MAP_PIRQ_TYPE_MSI;
     map.index = index;
     map.pirq = *pirq;
-    map.msi_info.devfn = devfn;
-    map.msi_info.bus = bus;
-    map.msi_info.entry_nr = entry_nr;
-    map.msi_info.msi = msi_type;
+    map.bus = bus;
+    map.devfn = devfn;
+    map.entry_nr = entry_nr;
+    map.table_base = table_base;
 
     rc = do_physdev_op(xc_handle, PHYSDEVOP_map_pirq, &map);
 
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/libxc/xenctrl.h     Wed Aug 13 22:22:21 2008 +0900
@@ -917,7 +917,7 @@ int xc_physdev_map_pirq_msi(int xc_handl
                             int devfn,
                             int bus,
                             int entry_nr,
-                            int msi_type);
+                            uint64_t table_base);
 
 int xc_physdev_unmap_pirq(int xc_handle,
                           int domid,
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/misc/xend
--- a/tools/misc/xend   Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/misc/xend   Wed Aug 13 22:22:21 2008 +0900
@@ -77,6 +77,10 @@ def check_user():
         hline()
         raise CheckError("invalid user")
 
+def start_daemon(daemon, *args):
+    if os.fork() == 0:
+        os.execvp(daemon, (daemon,) + args)
+
 def start_xenstored():
     pidfname = "/var/run/xenstore.pid"
     try:
@@ -102,13 +106,15 @@ def start_xenstored():
     s,o = commands.getstatusoutput(cmd)
 
 def start_consoled():
-    if os.fork() == 0:
-        os.execvp('xenconsoled', ['xenconsoled'])
+    XENCONSOLED_TRACE = os.getenv("XENCONSOLED_TRACE")
+    args = ""
+    if XENCONSOLED_TRACE:
+        args += "--log=" + XENCONSOLED_TRACE
+    start_daemon("xenconsoled", args)
 
 def start_blktapctrl():
-    if os.fork() == 0:
-        os.execvp('blktapctrl', ['blktapctrl'])
-            
+    start_daemon("blktapctrl", "")
+
 def main():
     try:
         check_logging()
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/python/xen/xend/XendAPI.py
--- a/tools/python/xen/xend/XendAPI.py  Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/python/xen/xend/XendAPI.py  Wed Aug 13 22:22:21 2008 +0900
@@ -2265,7 +2265,8 @@ class XendAPI(object):
             'type': image.type,
             'sharable': image.sharable,
             'read_only': image.read_only,
-            'other_config': image.other_config
+            'other_config': image.other_config,
+            'security_label' : image.get_security_label()
             })
 
     # Class Functions    
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/python/xen/xend/XendConfig.py       Wed Aug 13 22:22:21 2008 +0900
@@ -448,7 +448,7 @@ class XendConfig(dict):
                 self['platform']['hpet'] = 0
             if 'loader' not in self['platform']:
                 # Old configs may have hvmloader set as PV_kernel param
-                if self.has_key('PV_kernel') and re.search('hvmloader', 
self['PV_kernel']):
+                if self.has_key('PV_kernel') and self['PV_kernel'] != '':
                     self['platform']['loader'] = self['PV_kernel']
                     self['PV_kernel'] = ''
                 else:
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/python/xen/xend/XendPIF.py
--- a/tools/python/xen/xend/XendPIF.py  Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/python/xen/xend/XendPIF.py  Wed Aug 13 22:22:21 2008 +0900
@@ -95,6 +95,22 @@ def linux_set_mtu(iface, mtu):
     except ValueError:
         return False
 
+def linux_get_mtu(device):
+    return _linux_get_pif_param(device, 'mtu')
+
+def linux_get_mac(device):
+    return _linux_get_pif_param(device, 'link/ether')
+
+def _linux_get_pif_parm(device, param_name):
+    ip_get_dev_data = 'ip link show %s' % device
+    rc, output = commands.getstatusoutput(ip_get_dev_data)
+    if rc == 0:
+        params = output.split(' ')
+        for i in xrange(len(params)):
+            if params[i] == param_name:
+                return params[i+1]
+    return ''
+
 def _create_VLAN(dev, vlan):
     rc, _ = commands.getstatusoutput('vconfig add %s %d' %
                                      (dev, vlan))
@@ -259,8 +275,8 @@ class XendPIF(XendBase):
         # Create the record
         record = {
             "device":  device,
-            "MAC":     '',
-            "MTU":     '',
+            "MAC":     linux_get_mac('%s.%d' % (device, vlan)),
+            "MTU":     linux_get_mtu('%s.%d' % (device, vlan)),
             "network": network_uuid,
             "VLAN":    vlan
             }
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/Makefile
--- a/xen/Makefile      Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/Makefile      Wed Aug 13 22:22:21 2008 +0900
@@ -2,7 +2,7 @@
 # All other places this is stored (eg. compile.h) should be autogenerated.
 export XEN_VERSION       = 3
 export XEN_SUBVERSION    = 3
-export XEN_EXTRAVERSION ?= .0-rc4-pre$(XEN_VENDORVERSION)
+export XEN_EXTRAVERSION ?= .0-rc5-pre$(XEN_VENDORVERSION)
 export XEN_FULLVERSION   = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
 -include xen-version
 
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h     Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce.h     Wed Aug 13 22:22:21 2008 +0900
@@ -26,5 +26,5 @@ void x86_mcinfo_dump(struct mc_info *mi)
 void x86_mcinfo_dump(struct mc_info *mi);
 
 /* Global variables */
-extern int mce_disabled __initdata;
+extern int mce_disabled;
 extern unsigned int nr_mce_banks;
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/mm/shadow/common.c   Wed Aug 13 22:22:21 2008 +0900
@@ -3357,23 +3357,45 @@ shadow_write_p2m_entry(struct vcpu *v, u
         }
     }
 
-    /* If we're removing a superpage mapping from the p2m, remove all the
-     * MFNs covered by it from the shadows too. */
+    /* If we're removing a superpage mapping from the p2m, we need to check 
+     * all the pages covered by it.  If they're still there in the new 
+     * scheme, that's OK, but otherwise they must be unshadowed. */
     if ( level == 2 && (l1e_get_flags(*p) & _PAGE_PRESENT) &&
          (l1e_get_flags(*p) & _PAGE_PSE) )
     {
         unsigned int i;
-        mfn_t mfn = _mfn(l1e_get_pfn(*p));
+        cpumask_t flushmask;
+        mfn_t omfn = _mfn(l1e_get_pfn(*p));
+        mfn_t nmfn = _mfn(l1e_get_pfn(new));
+        l1_pgentry_t *npte = NULL;
         p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p));
-        if ( p2m_is_valid(p2mt) && mfn_valid(mfn) )
-        {
+        if ( p2m_is_valid(p2mt) && mfn_valid(omfn) )
+        {
+            cpus_clear(flushmask);
+
+            /* If we're replacing a superpage with a normal L1 page, map it */
+            if ( (l1e_get_flags(new) & _PAGE_PRESENT)
+                 && !(l1e_get_flags(new) & _PAGE_PSE) 
+                 && mfn_valid(nmfn) )
+                npte = map_domain_page(mfn_x(nmfn));
+            
             for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
             {
-                sh_remove_all_shadows_and_parents(v, mfn);
-                if ( sh_remove_all_mappings(v, mfn) )
-                    flush_tlb_mask(d->domain_dirty_cpumask);
-                mfn = _mfn(mfn_x(mfn) + 1);
+                if ( !npte 
+                     || !p2m_is_ram(p2m_flags_to_type(l1e_get_flags(npte[i])))
+                     || l1e_get_pfn(npte[i]) != mfn_x(omfn) )
+                {
+                    /* This GFN->MFN mapping has gone away */
+                    sh_remove_all_shadows_and_parents(v, omfn);
+                    if ( sh_remove_all_mappings(v, omfn) )
+                        cpus_or(flushmask, flushmask, d->domain_dirty_cpumask);
+                }
+                omfn = _mfn(mfn_x(omfn) + 1);
             }
+            flush_tlb_mask(flushmask);
+            
+            if ( npte )
+                unmap_domain_page(npte);
         }
     }
 
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/mm/shadow/multi.c    Wed Aug 13 22:22:21 2008 +0900
@@ -3181,14 +3181,9 @@ static int sh_page_fault(struct vcpu *v,
     rc = guest_walk_tables(v, va, &gw, regs->error_code);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
+    regs->error_code &= ~PFEC_page_present;
     if ( !(rc & _PAGE_PRESENT) )
         regs->error_code |= PFEC_page_present;
-    else if ( regs->error_code & PFEC_page_present )
-    {
-            SHADOW_ERROR("OOS paranoia: Something is wrong in guest TLB"
-                         " flushing. Have fun debugging it.\n");
-            regs->error_code &= ~PFEC_page_present;
-    }
 #endif
 
     if ( rc != 0 )
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/mm/shadow/private.h  Wed Aug 13 22:22:21 2008 +0900
@@ -213,15 +213,14 @@ struct shadow_page_info
     };
 };
 
-/* The structure above *must* be the same size as a struct page_info
+/* The structure above *must* be no larger than a struct page_info
  * from mm.h, since we'll be using the same space in the frametable. 
  * Also, the mbz field must line up with the owner field of normal 
  * pages, so they look properly like anonymous/xen pages. */
 static inline void shadow_check_page_struct_offsets(void) {
-    BUILD_BUG_ON(sizeof (struct shadow_page_info) 
-                 != sizeof (struct page_info));
-    BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) 
-                 != offsetof(struct page_info, u.inuse._domain));
+    BUILD_BUG_ON(sizeof (struct shadow_page_info) > sizeof (struct page_info));
+    BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) !=
+                 offsetof(struct page_info, u.inuse._domain));
 };
 
 /* Shadow type codes */
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/msi.c        Wed Aug 13 22:22:21 2008 +0900
@@ -490,28 +490,6 @@ static int msi_capability_init(struct pc
     return 0;
 }
 
-static u64 pci_resource_start(struct pci_dev *dev, u8 bar_index)
-{
-    u64 bar_base;
-    u32 reg_val;
-    u8 bus = dev->bus;
-    u8 slot = PCI_SLOT(dev->devfn);
-    u8 func = PCI_FUNC(dev->devfn);
-
-    reg_val = pci_conf_read32(bus, slot, func,
-                              PCI_BASE_ADDRESS_0 + 4 * bar_index);
-    bar_base = reg_val & PCI_BASE_ADDRESS_MEM_MASK;
-    if ( ( reg_val & PCI_BASE_ADDRESS_MEM_TYPE_MASK ) ==
-         PCI_BASE_ADDRESS_MEM_TYPE_64 )
-    {
-        reg_val = pci_conf_read32(bus, slot, func,
-                                  PCI_BASE_ADDRESS_0 + 4 * (bar_index + 1));
-        bar_base |= ((u64)reg_val) << 32;
-    }
-
-    return bar_base;
-}
-
 /**
  * msix_capability_init - configure device's MSI-X capability
  * @dev: pointer to the pci_dev data structure of MSI-X device function
@@ -522,7 +500,7 @@ static u64 pci_resource_start(struct pci
  * single MSI-X irq. A return of zero indicates the successful setup of
  * requested MSI-X entries with allocated irqs or non-zero for otherwise.
  **/
-static int msix_capability_init(struct pci_dev *dev, int vector, int entry_nr)
+static int msix_capability_init(struct pci_dev *dev, struct msi_info *msi)
 {
     struct msi_desc *entry;
     int pos;
@@ -549,7 +527,7 @@ static int msix_capability_init(struct p
     table_offset = pci_conf_read32(bus, slot, func, 
msix_table_offset_reg(pos));
     bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
     table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
-    phys_addr = pci_resource_start(dev, bir) + table_offset;
+    phys_addr = msi->table_base + table_offset;
     idx = msix_fixmap_alloc();
     if ( idx < 0 )
     {
@@ -561,11 +539,11 @@ static int msix_capability_init(struct p
 
     entry->msi_attrib.type = PCI_CAP_ID_MSIX;
     entry->msi_attrib.is_64 = 1;
-    entry->msi_attrib.entry_nr = entry_nr;
+    entry->msi_attrib.entry_nr = msi->entry_nr;
     entry->msi_attrib.maskbit = 1;
     entry->msi_attrib.masked = 1;
     entry->msi_attrib.pos = pos;
-    entry->vector = vector;
+    entry->vector = msi->vector;
     entry->dev = dev;
     entry->mask_base = base;
 
@@ -589,24 +567,25 @@ static int msix_capability_init(struct p
  * indicates the successful setup of an entry zero with the new MSI
  * irq or non-zero for otherwise.
  **/
-static int __pci_enable_msi(u8 bus, u8 devfn, int vector)
+static int __pci_enable_msi(struct msi_info *msi)
 {
     int status;
     struct pci_dev *pdev;
 
-    pdev = pci_lock_pdev(bus, devfn);
+    pdev = pci_lock_pdev(msi->bus, msi->devfn);
     if ( !pdev )
        return -ENODEV;
 
-    if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSI) )
+    if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSI) )
     {
        spin_unlock(&pdev->lock);
-        dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on device 
\
-            %02x:%02x.%01x.\n", vector, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+        dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on "
+            "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
+            PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
         return 0;
     }
 
-    status = msi_capability_init(pdev, vector);
+    status = msi_capability_init(pdev, msi->vector);
     spin_unlock(&pdev->lock);
     return status;
 }
@@ -659,37 +638,37 @@ static void __pci_disable_msi(int vector
  * of irqs available. Driver should use the returned value to re-send
  * its request.
  **/
-static int __pci_enable_msix(u8 bus, u8 devfn, int vector, int entry_nr)
+static int __pci_enable_msix(struct msi_info *msi)
 {
     int status, pos, nr_entries;
     struct pci_dev *pdev;
     u16 control;
-    u8 slot = PCI_SLOT(devfn);
-    u8 func = PCI_FUNC(devfn);
-
-    pdev = pci_lock_pdev(bus, devfn);
+    u8 slot = PCI_SLOT(msi->devfn);
+    u8 func = PCI_FUNC(msi->devfn);
+
+    pdev = pci_lock_pdev(msi->bus, msi->devfn);
     if ( !pdev )
        return -ENODEV;
 
-    pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
-    control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
+    pos = pci_find_cap_offset(msi->bus, slot, func, PCI_CAP_ID_MSIX);
+    control = pci_conf_read16(msi->bus, slot, func, msi_control_reg(pos));
     nr_entries = multi_msix_capable(control);
-    if (entry_nr > nr_entries)
+    if (msi->entry_nr > nr_entries)
     {
        spin_unlock(&pdev->lock);
         return -EINVAL;
     }
 
-    if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSIX) )
+    if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSIX) )
     {
        spin_unlock(&pdev->lock);
-        dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on \
-                device %02x:%02x.%01x.\n", vector, bus,
-                PCI_SLOT(devfn), PCI_FUNC(devfn));
+        dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on "
+                "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
+                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
         return 0;
     }
 
-    status = msix_capability_init(pdev, vector, entry_nr);
+    status = msix_capability_init(pdev, msi);
     spin_unlock(&pdev->lock);
     return status;
 }
@@ -727,13 +706,12 @@ static void __pci_disable_msix(int vecto
     spin_unlock(&dev->lock);
 }
 
-int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi)
-{
-    ASSERT(spin_is_locked(&irq_desc[vector].lock));
-    if ( msi )
-        return __pci_enable_msi(bus, devfn, vector);
-    else
-        return __pci_enable_msix(bus, devfn, vector, entry_nr);
+int pci_enable_msi(struct msi_info *msi)
+{
+    ASSERT(spin_is_locked(&irq_desc[msi->vector].lock));
+
+    return  msi->table_base ? __pci_enable_msix(msi) :
+                              __pci_enable_msi(msi);
 }
 
 void pci_disable_msi(int vector)
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/oprofile/nmi_int.c
--- a/xen/arch/x86/oprofile/nmi_int.c   Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/oprofile/nmi_int.c   Wed Aug 13 22:22:21 2008 +0900
@@ -296,24 +296,40 @@ static int __init ppro_init(char ** cpu_
 {
        __u8 cpu_model = current_cpu_data.x86_model;
 
-       if (cpu_model == 15 || cpu_model == 23) {
+       switch (cpu_model) {
+       case 0 ... 2:
+               *cpu_type = "i386/ppro";
+               break;
+       case 3 ... 5:
+               *cpu_type = "i386/pii";
+               break;
+       case 6 ... 8:
+               *cpu_type = "i386/piii";
+               break;
+       case 9:
+               *cpu_type = "i386/p6_mobile";
+               break;
+       case 10 ... 13:
+               *cpu_type = "i386/p6";
+               break;
+       case 14:
+               *cpu_type = "i386/core";
+               break;
+       case 15: case 23:
                *cpu_type = "i386/core_2";
                ppro_has_global_ctrl = 1;
-       } else if (cpu_model == 14)
-               *cpu_type = "i386/core";
-       else if (cpu_model > 13) {
+               break;
+       case 26:
+               *cpu_type = "i386/core_2";
+               ppro_has_global_ctrl = 1;
+               break;
+       default:
+               /* Unknown */
                printk("xenoprof: Initialization failed. "
                       "Intel processor model %d for P6 class family is not "
                       "supported\n", cpu_model);
                return 0;
-       } else if (cpu_model == 9)
-               *cpu_type = "i386/p6_mobile";
-       else if (cpu_model > 5)
-               *cpu_type = "i386/piii";
-       else if (cpu_model > 2)
-               *cpu_type = "i386/pii";
-       else
-               *cpu_type = "i386/ppro";
+       }
 
        model = &op_ppro_spec;
        return 1;
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/physdev.c    Wed Aug 13 22:22:21 2008 +0900
@@ -66,6 +66,7 @@ static int map_domain_pirq(struct domain
 {
     int ret = 0;
     int old_vector, old_pirq;
+    struct msi_info msi;
 
     if ( d == NULL )
         return -EINVAL;
@@ -115,10 +116,14 @@ static int map_domain_pirq(struct domain
                      vector);
         desc->handler = &pci_msi_type;
 
-        ret = pci_enable_msi(map->msi_info.bus,
-                                    map->msi_info.devfn, vector,
-                                                        map->msi_info.entry_nr,
-                                                        map->msi_info.msi);
+        msi.bus = map->bus;
+        msi.devfn = map->devfn;
+        msi.entry_nr = map->entry_nr;
+        msi.table_base = map->table_base;
+        msi.vector = vector;
+
+        ret = pci_enable_msi(&msi);
+
         spin_unlock_irqrestore(&desc->lock, flags);
         if ( ret )
             goto done;
@@ -139,7 +144,7 @@ static int unmap_domain_pirq(struct doma
     int ret = 0;
     int vector;
 
-    if ( d == NULL || pirq < 0 || pirq > NR_PIRQS )
+    if ( d == NULL || pirq < 0 || pirq >= NR_PIRQS )
         return -EINVAL;
 
     if ( !IS_PRIV(current->domain) )
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/common/page_alloc.c   Wed Aug 13 22:22:21 2008 +0900
@@ -950,6 +950,14 @@ static void page_scrub_softirq(void)
     void             *p;
     int               i;
     s_time_t          start = NOW();
+    static spinlock_t serialise_lock = SPIN_LOCK_UNLOCKED;
+
+    /* free_heap_pages() does not parallelise well. Serialise this function. */
+    if ( !spin_trylock(&serialise_lock) )
+    {
+        set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(1));
+        return;
+    }
 
     /* Aim to do 1ms of work every 10ms. */
     do {
@@ -958,7 +966,7 @@ static void page_scrub_softirq(void)
         if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
         {
             spin_unlock(&page_scrub_lock);
-            return;
+            goto out;
         }
         
         /* Peel up to 16 pages from the list. */
@@ -989,6 +997,9 @@ static void page_scrub_softirq(void)
     } while ( (NOW() - start) < MILLISECS(1) );
 
     set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(10));
+
+ out:
+    spin_unlock(&serialise_lock);
 }
 
 static void page_scrub_timer_fn(void *unused)
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/drivers/passthrough/io.c
--- a/xen/drivers/passthrough/io.c      Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/drivers/passthrough/io.c      Wed Aug 13 22:22:21 2008 +0900
@@ -74,6 +74,9 @@ int pt_irq_create_bind_vtd(
     if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )
     {
         int pirq = pt_irq_bind->machine_irq;
+
+        if ( pirq < 0 || pirq >= NR_IRQS )
+            return -EINVAL;
 
         if ( !(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_VALID ) )
         {
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.c       Wed Aug 13 22:22:21 2008 +0900
@@ -1789,7 +1789,8 @@ int intel_vtd_setup(void)
     memset(domid_bitmap, 0, domid_bitmap_size / 8);
     set_bit(0, domid_bitmap);
 
-    init_vtd_hw();
+    if ( init_vtd_hw() )
+        goto error;
 
     register_keyhandler('V', dump_iommu_info, "dump iommu info");
 
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h       Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/include/asm-x86/event.h       Wed Aug 13 22:22:21 2008 +0900
@@ -69,12 +69,7 @@ static inline void local_event_delivery_
 /* No arch specific virq definition now. Default to global. */
 static inline int arch_virq_is_global(int virq)
 {
-    switch (virq) {
-    case VIRQ_MCA:
-        return 1;
-    default:
-        return 1;
-    }
+    return 1;
 }
 
 #endif
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/include/asm-x86/msi.h
--- a/xen/include/asm-x86/msi.h Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/include/asm-x86/msi.h Wed Aug 13 22:22:21 2008 +0900
@@ -53,6 +53,14 @@
 #else
 #define MAX_MSIX_PAGES              32
 #endif
+
+struct msi_info {
+    int bus;
+    int devfn;
+    int vector;
+    int entry_nr;
+    uint64_t table_base;
+};
 
 struct msi_msg {
        u32     address_lo;     /* low 32 bits of msi message address */
@@ -64,7 +72,7 @@ extern void mask_msi_irq(unsigned int ir
 extern void mask_msi_irq(unsigned int irq);
 extern void unmask_msi_irq(unsigned int irq);
 extern void set_msi_irq_affinity(unsigned int irq, cpumask_t mask);
-extern int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi);
+extern int pci_enable_msi(struct msi_info *msi);
 extern void pci_disable_msi(int vector);
 extern void pci_cleanup_msi(struct pci_dev *pdev);
 
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/include/public/physdev.h
--- a/xen/include/public/physdev.h      Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/include/public/physdev.h      Wed Aug 13 22:22:21 2008 +0900
@@ -136,10 +136,13 @@ struct physdev_map_pirq {
     /* IN or OUT */
     int pirq;
     /* IN */
-    struct {
-        int bus, devfn, entry_nr;
-               int msi;  /* 0 - MSIX    1 - MSI */
-    } msi_info;
+    int bus;
+    /* IN */
+    int devfn;
+    /* IN */
+    int entry_nr;
+    /* IN */
+    uint64_t table_base;
 };
 typedef struct physdev_map_pirq physdev_map_pirq_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.