[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Isaku Yamahata <yamahata@xxxxxxxxxxxxx> # Date 1218633741 -32400 # Node ID da236d7f59b963585800e7471f8a0451b83ae569 # Parent fa8be8a6cb74976d5a96f830a9a2238abf622822 # Parent c6402709acc8122e3f8f92a885750afb4061ac61 merge with xen-unstable.hg --- .hgtags | 1 docs/misc/kexec_and_kdump.txt | 213 ++++++++++++++++++++++++++++++++++++ extras/mini-os/include/lwipopts.h | 1 tools/Makefile | 7 - tools/cross-install | 8 + tools/ioemu/hw/pass-through.h | 1 tools/ioemu/hw/pt-msi.c | 24 +--- tools/libxc/xc_physdev.c | 10 - tools/libxc/xenctrl.h | 2 tools/misc/xend | 16 +- tools/python/xen/xend/XendAPI.py | 3 tools/python/xen/xend/XendConfig.py | 2 tools/python/xen/xend/XendPIF.py | 20 +++ xen/Makefile | 2 xen/arch/x86/cpu/mcheck/mce.h | 2 xen/arch/x86/mm/shadow/common.c | 40 +++++- xen/arch/x86/mm/shadow/multi.c | 7 - xen/arch/x86/mm/shadow/private.h | 9 - xen/arch/x86/msi.c | 82 +++++-------- xen/arch/x86/oprofile/nmi_int.c | 40 ++++-- xen/arch/x86/physdev.c | 15 +- xen/common/page_alloc.c | 13 ++ xen/drivers/passthrough/io.c | 3 xen/drivers/passthrough/vtd/iommu.c | 3 xen/include/asm-x86/event.h | 7 - xen/include/asm-x86/msi.h | 10 + xen/include/public/physdev.h | 11 + 27 files changed, 417 insertions(+), 135 deletions(-) diff -r fa8be8a6cb74 -r da236d7f59b9 .hgtags --- a/.hgtags Wed Aug 13 13:18:06 2008 +0900 +++ b/.hgtags Wed Aug 13 22:22:21 2008 +0900 @@ -28,3 +28,4 @@ c3494402098e26507fc61a6579832c0149351d6a c3494402098e26507fc61a6579832c0149351d6a 3.3.0-rc1 dde12ff94c96331668fe38a7b09506fa94d03c34 3.3.0-rc2 57fca3648f25dcc085ee380954342960a7979987 3.3.0-rc3 +96d0a48e87ee46ba7b73e8c906a7e2e0baf60e2e 3.3.0-rc4 diff -r fa8be8a6cb74 -r da236d7f59b9 docs/misc/kexec_and_kdump.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docs/misc/kexec_and_kdump.txt Wed Aug 13 22:22:21 2008 +0900 @@ -0,0 +1,213 @@ + +======================= +Kexec and Kdump for Xen +======================= + +This is a breif guide to using Kexec and Kdump in conjunction with Xen. +This functionaly works at the level of the hypervisor and dom0 kernel. +And will thus affect all guests running on a machine. + +At this stage it does not work in conjunction with domU kernels. + +This document should be read in conjunction with +Documentation/kdump/kdump.txt from the Linux kernel source. +Some of the information in this document has been +sourced from that document. + + +Kexec +===== + +It is possible to kexec from Xen or Linux to either Xen or Linux. + +Pattern | Before Kexec | After Kexec +---------------+--------------------+-------------------- +Xen -> Xen | first hypervisor & | second hypervisor & + | dom0 kernel | dom0 kernel +---------------+--------------------+-------------------- +Xen -> Linux | first hypervisor & | second kernel + | dom0 kernel | +---------------+--------------------+-------------------- +Linux -> Xen | first kernel | second hypervisor & + | | dom0 kernel +---------------+--------------------+-------------------- +Linux -> Linux | first kernel | second kernel + +If you are kexecing to Xen then you will also need to preapare the second +hypervisor and dom0 kernel that will run after kexec. These may be the same +as the first hypervisor and dom0 kernel that are used before kexec if you +are kexecing from Xen to Xen. + +If you are kexecing to Linux then you will need to prepare the second Linux +kernel that will run after kexec. In the case that you are kexecing from +Linux, it may be the same as the first kernel image that that runs before +kexec. + +Regardless of which kexec pattern you wish to run, you will +need to have kexec-tools installed. This provides the kexec command. + +1. Load +------- + +Before kexecing the second kernel or hypervisor & dom0 kernel +need to be loaded into the running hypervisor or kernel using +the kexec command. + + a. To kexec to Xen (Xen->Xen or Linux->Xen) + + kexec -l --append="XEN_ARGS -- DOM0_ARGS" \ + --vmm="XEN_IMAGE" "DOM0_IMAGE" KEXEC_ARGS + + where: + XEN_ARGS: command line arguments to the xen hypervisor + On x86 the no-real-mode argument should be included + DOM0_ARGS: command line arguments to the dom0 kernel + XEN_IMAGE: xen hypervisor image + DOM0_IMAGE: dom0 kernel image + KEXEC_ARGS: additional kexec-tools command line arguments + + e.g. kexec -l --append "no-real-mode" --vmm="/boot/xen.gz" /boot/vmlinuz.gz + + OR + + b. To kexec to Linux (Xen->Linux or Linux->Linux) + + kexec -l LINUX_IMAGE --append "$LINUX_ARGS" KEXEC_ARGS + + where: + LINUX_IMAGE: the second linux kernel image + LINUX_ARGS: command line arguments to the second linux kernel + KEXEC_ARGS: additional kexec-tools command line arguments + + e.g. kexec -l /boot/second-vmlinuz.gz + +2. Execute +---------- + +Once the second kernel is loaded, it can be executed at any time. +If you don't see the second kernel booting within a second or so, +you are in trouble :( + + kexec -e + +Kdump +===== + +It is possible to kdump from Xen or Linux to a Linux crash kernel. +It is not possible to use xen as a crash kernel. + +Pattern | Before Kexec | After Kexec +---------------+--------------------+-------------------- +Xen -> Linux | first hypervisor & | crash kernel + | dom0 kernel | +---------------+--------------------+-------------------- +Linux -> Linux | first kernel | crash kernel + +Regardless of if you are kdumping from Xen or Linux you will need to +prepare a linux crash kernel. You will also need to have kexec-tools +installed. This provides the kexec command. + +0. Set-Up The Crash Kernel Region +--------------------------------- + +In order to use kdump an area of memory has to be reserved at boot time. +This is the area of memory that the crash kernel will use, thus allowing it +to run without disrupting the memory used by the first kernel. This area is +called the crash kernel region and is reserved using the crashkernel +command line parameter to the Xen hypervisor. It has two forms: + + i) crashkernel=size + + This is the simplest and recommended way to reserve the crash kernel + region. Just specify how large the region should be and the hypervisor + will find a good location for it. A good size to start with is 128Mb + + e.g. + + crashkernel=128M + + ii) crashkernel=size@base + + In this form the base address is provided in addition to + the size. Use this if auto-placement doesn't work for some reason. + It is strongly recommended that the base address be aligned + to 64Mb, else memory below the alignment point will not + be usable. + + e.g. crashkernel=128M@256M + + Regardless of which of the two forms of the crashkernel command line you + use, the crash kernel region should appear in /proc/iomem on x86 or + /proc/iomem_machine on ia64. If it doesn't then either the crashkernel + parameter is missing, or for some reason the region couldn't be placed - + for instance because it is too large. + + # cat /proc/iomem + ... + 00100000-07feffff : System RAM + 00100000-00bfffff : Hypervisor code and data + 0533f000-0733efff : Crash kernel + ... + + +1. Load +------- + +Once you are running in a kexec-enabled hypervisor and dom0, +you can prepare to kdump by loading the crash kernel into the +running kernel. + + kexec -p CRASH_KERNEL_IMAGE --append "$CRASH_KERNEL_ARGS" KEXEC_ARGS + + where: + CRASH_KERNEL_IMAGE: the crash kernel image + CRASH_KERNEL_ARGS: command line arguments to the crash kernel + init 1 is strongly recommended + irqpoll is strongly recommended + maxcpus=1 is required if the crash kernel is SMP + reset_devices is strongly recommended + KEXEC_ARGS: additional kexec-tools command line arguments + On x86 --args-linux should be supplied if an uncompressed + vmlinux image is used as the crash kernel + + e.g. kexec -p /boot/crash-vmlinuz \ + --append "init 1 irqpoll maxcpus=1 reset_devices" --args-linux + +On x86 systems the crash kernel may be either +- A uncompressed vmlinux image if the kernel is not relocatable +- A compressed bzImage or vmlinuz image if the kernel is relocatable +- Relocatability is crontroled by the CONFIG_RELOCATABLE kernel + compile configuration parameter. This option may not be available + depending on the kernel version +On ia64 + Either a vmlinuz or vmlinux.gz image may be used + + +2. Execute +---------- + +Once the second kernel is loaded, the crash kernel will be executed if the +hypervisor panics. It will also be executed if dom0 panics or if dom0 +oopses and /proc/sys/kernel/panic_on_oops is set to a non-zero value + +echo 1 > /proc/sys/kernel/panic_on_oops + +Kdump may also be triggered (for testing) + + a. From Domain 0 + + echo c > /proc/sysrq-trigger + + b. From Xen + + Enter the xen console + + ctrl^a ctrl^a (may be bound to a different key, this is the default) + + Select C for "trigger a crashdump" + + C + +If you don't see the crash kernel booting within a second or so, +you are in trouble :( + diff -r fa8be8a6cb74 -r da236d7f59b9 extras/mini-os/include/lwipopts.h --- a/extras/mini-os/include/lwipopts.h Wed Aug 13 13:18:06 2008 +0900 +++ b/extras/mini-os/include/lwipopts.h Wed Aug 13 22:22:21 2008 +0900 @@ -15,6 +15,7 @@ #define LWIP_DHCP 1 #define LWIP_COMPAT_SOCKETS 0 #define LWIP_IGMP 1 +#define LWIP_USE_HEAP_FROM_INTERRUPT 1 #define MEMP_NUM_SYS_TIMEOUT 10 #define TCP_SND_BUF 3000 #define TCP_MSS 1500 diff -r fa8be8a6cb74 -r da236d7f59b9 tools/Makefile --- a/tools/Makefile Wed Aug 13 13:18:06 2008 +0900 +++ b/tools/Makefile Wed Aug 13 22:22:21 2008 +0900 @@ -38,8 +38,10 @@ endif # For the sake of linking, set the sys-root ifneq ($(CROSS_COMPILE),) +CROSS_BIN_PATH ?= /usr/$(CROSS_COMPILE:-=)/bin CROSS_SYS_ROOT ?= /usr/$(CROSS_COMPILE:-=)/sys-root -export CROSS_SYS_ROOT +export CROSS_SYS_ROOT # exported for check/funcs.sh +export CROSS_BIN_PATH # exported for cross-install.sh endif .PHONY: all @@ -57,7 +59,8 @@ ifneq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ ifneq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH)) IOEMU_CONFIGURE_CROSS ?= --cpu=$(XEN_TARGET_ARCH) \ --cross-prefix=$(CROSS_COMPILE) \ - --interp-prefix=$(CROSS_SYS_ROOT) + --interp-prefix=$(CROSS_SYS_ROOT) \ + --install=$(CURDIR)/cross-install endif ioemu/config-host.mak: diff -r fa8be8a6cb74 -r da236d7f59b9 tools/cross-install --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/cross-install Wed Aug 13 22:22:21 2008 +0900 @@ -0,0 +1,8 @@ +#!/bin/sh + +# prepend CROSS_BIN_PATH to find the right "strip" +if [ -n "$CROSS_BIN_PATH" ]; then + PATH="$CROSS_BIN_PATH:$PATH" +fi + +exec install "$@" diff -r fa8be8a6cb74 -r da236d7f59b9 tools/ioemu/hw/pass-through.h --- a/tools/ioemu/hw/pass-through.h Wed Aug 13 13:18:06 2008 +0900 +++ b/tools/ioemu/hw/pass-through.h Wed Aug 13 22:22:21 2008 +0900 @@ -120,6 +120,7 @@ struct pt_msix_info { int enabled; int total_entries; int bar_index; + uint64_t table_base; uint32_t table_off; uint64_t mmio_base_addr; int mmio_index; diff -r fa8be8a6cb74 -r da236d7f59b9 tools/ioemu/hw/pt-msi.c --- a/tools/ioemu/hw/pt-msi.c Wed Aug 13 13:18:06 2008 +0900 +++ b/tools/ioemu/hw/pt-msi.c Wed Aug 13 22:22:21 2008 +0900 @@ -38,8 +38,8 @@ int pt_msi_setup(struct pt_dev *dev) } if ( xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq, - dev->pci_dev->dev << 3 | dev->pci_dev->func, - dev->pci_dev->bus, 0, 1) ) + dev->pci_dev->dev << 3 | dev->pci_dev->func, + dev->pci_dev->bus, 0, 0) ) { PT_LOG("error map msi\n"); return -1; @@ -121,7 +121,8 @@ static int pt_msix_update_one(struct pt_ { ret = xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq, dev->pci_dev->dev << 3 | dev->pci_dev->func, - dev->pci_dev->bus, entry_nr, 0); + dev->pci_dev->bus, entry_nr, + dev->msix->table_base); if ( ret ) { PT_LOG("error map msix entry %x\n", entry_nr); @@ -183,7 +184,7 @@ static void pci_msix_writel(void *opaque entry = &msix->msix_entry[entry_nr]; offset = ((addr - msix->mmio_base_addr) % 16) / 4; - if ( offset != 3 && msix->enabled && entry->io_mem[3] & 0x1 ) + if ( offset != 3 && msix->enabled && !(entry->io_mem[3] & 0x1) ) { PT_LOG("can not update msix entry %d since MSI-X is already \ function now.\n", entry_nr); @@ -196,7 +197,7 @@ static void pci_msix_writel(void *opaque if ( offset == 3 ) { - if ( !(val & 0x1) ) + if ( msix->enabled && !(val & 0x1) ) pt_msix_update_one(dev, entry_nr); mask_physical_msix_entry(dev, entry_nr, entry->io_mem[3] & 0x1); } @@ -280,7 +281,6 @@ int pt_msix_init(struct pt_dev *dev, int uint8_t id; uint16_t control; int i, total_entries, table_off, bar_index; - uint64_t bar_base; struct pci_dev *pd = dev->pci_dev; id = pci_read_byte(pd, pos + PCI_CAP_LIST_ID); @@ -314,18 +314,14 @@ int pt_msix_init(struct pt_dev *dev, int table_off = pci_read_long(pd, pos + PCI_MSIX_TABLE); bar_index = dev->msix->bar_index = table_off & PCI_MSIX_BIR; table_off &= table_off & ~PCI_MSIX_BIR; - bar_base = pci_read_long(pd, 0x10 + 4 * bar_index); - if ( (bar_base & 0x6) == 0x4 ) - { - bar_base &= ~0xf; - bar_base += (uint64_t)pci_read_long(pd, 0x10 + 4 * (bar_index + 1)) << 32; - } - PT_LOG("get MSI-X table bar base %lx\n", bar_base); + dev->msix->table_base = dev->pci_dev->base_addr[bar_index]; + PT_LOG("get MSI-X table bar base %llx\n", + (unsigned long long)dev->msix->table_base); dev->msix->fd = open("/dev/mem", O_RDWR); dev->msix->phys_iomem_base = mmap(0, total_entries * 16, PROT_WRITE | PROT_READ, MAP_SHARED | MAP_LOCKED, - dev->msix->fd, bar_base + table_off); + dev->msix->fd, dev->msix->table_base + table_off); PT_LOG("mapping physical MSI-X table to %lx\n", (unsigned long)dev->msix->phys_iomem_base); return 0; diff -r fa8be8a6cb74 -r da236d7f59b9 tools/libxc/xc_physdev.c --- a/tools/libxc/xc_physdev.c Wed Aug 13 13:18:06 2008 +0900 +++ b/tools/libxc/xc_physdev.c Wed Aug 13 22:22:21 2008 +0900 @@ -51,7 +51,7 @@ int xc_physdev_map_pirq_msi(int xc_handl int devfn, int bus, int entry_nr, - int msi_type) + uint64_t table_base) { int rc; struct physdev_map_pirq map; @@ -63,10 +63,10 @@ int xc_physdev_map_pirq_msi(int xc_handl map.type = MAP_PIRQ_TYPE_MSI; map.index = index; map.pirq = *pirq; - map.msi_info.devfn = devfn; - map.msi_info.bus = bus; - map.msi_info.entry_nr = entry_nr; - map.msi_info.msi = msi_type; + map.bus = bus; + map.devfn = devfn; + map.entry_nr = entry_nr; + map.table_base = table_base; rc = do_physdev_op(xc_handle, PHYSDEVOP_map_pirq, &map); diff -r fa8be8a6cb74 -r da236d7f59b9 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Wed Aug 13 13:18:06 2008 +0900 +++ b/tools/libxc/xenctrl.h Wed Aug 13 22:22:21 2008 +0900 @@ -917,7 +917,7 @@ int xc_physdev_map_pirq_msi(int xc_handl int devfn, int bus, int entry_nr, - int msi_type); + uint64_t table_base); int xc_physdev_unmap_pirq(int xc_handle, int domid, diff -r fa8be8a6cb74 -r da236d7f59b9 tools/misc/xend --- a/tools/misc/xend Wed Aug 13 13:18:06 2008 +0900 +++ b/tools/misc/xend Wed Aug 13 22:22:21 2008 +0900 @@ -77,6 +77,10 @@ def check_user(): hline() raise CheckError("invalid user") +def start_daemon(daemon, *args): + if os.fork() == 0: + os.execvp(daemon, (daemon,) + args) + def start_xenstored(): pidfname = "/var/run/xenstore.pid" try: @@ -102,13 +106,15 @@ def start_xenstored(): s,o = commands.getstatusoutput(cmd) def start_consoled(): - if os.fork() == 0: - os.execvp('xenconsoled', ['xenconsoled']) + XENCONSOLED_TRACE = os.getenv("XENCONSOLED_TRACE") + args = "" + if XENCONSOLED_TRACE: + args += "--log=" + XENCONSOLED_TRACE + start_daemon("xenconsoled", args) def start_blktapctrl(): - if os.fork() == 0: - os.execvp('blktapctrl', ['blktapctrl']) - + start_daemon("blktapctrl", "") + def main(): try: check_logging() diff -r fa8be8a6cb74 -r da236d7f59b9 tools/python/xen/xend/XendAPI.py --- a/tools/python/xen/xend/XendAPI.py Wed Aug 13 13:18:06 2008 +0900 +++ b/tools/python/xen/xend/XendAPI.py Wed Aug 13 22:22:21 2008 +0900 @@ -2265,7 +2265,8 @@ class XendAPI(object): 'type': image.type, 'sharable': image.sharable, 'read_only': image.read_only, - 'other_config': image.other_config + 'other_config': image.other_config, + 'security_label' : image.get_security_label() }) # Class Functions diff -r fa8be8a6cb74 -r da236d7f59b9 tools/python/xen/xend/XendConfig.py --- a/tools/python/xen/xend/XendConfig.py Wed Aug 13 13:18:06 2008 +0900 +++ b/tools/python/xen/xend/XendConfig.py Wed Aug 13 22:22:21 2008 +0900 @@ -448,7 +448,7 @@ class XendConfig(dict): self['platform']['hpet'] = 0 if 'loader' not in self['platform']: # Old configs may have hvmloader set as PV_kernel param - if self.has_key('PV_kernel') and re.search('hvmloader', self['PV_kernel']): + if self.has_key('PV_kernel') and self['PV_kernel'] != '': self['platform']['loader'] = self['PV_kernel'] self['PV_kernel'] = '' else: diff -r fa8be8a6cb74 -r da236d7f59b9 tools/python/xen/xend/XendPIF.py --- a/tools/python/xen/xend/XendPIF.py Wed Aug 13 13:18:06 2008 +0900 +++ b/tools/python/xen/xend/XendPIF.py Wed Aug 13 22:22:21 2008 +0900 @@ -95,6 +95,22 @@ def linux_set_mtu(iface, mtu): except ValueError: return False +def linux_get_mtu(device): + return _linux_get_pif_param(device, 'mtu') + +def linux_get_mac(device): + return _linux_get_pif_param(device, 'link/ether') + +def _linux_get_pif_parm(device, param_name): + ip_get_dev_data = 'ip link show %s' % device + rc, output = commands.getstatusoutput(ip_get_dev_data) + if rc == 0: + params = output.split(' ') + for i in xrange(len(params)): + if params[i] == param_name: + return params[i+1] + return '' + def _create_VLAN(dev, vlan): rc, _ = commands.getstatusoutput('vconfig add %s %d' % (dev, vlan)) @@ -259,8 +275,8 @@ class XendPIF(XendBase): # Create the record record = { "device": device, - "MAC": '', - "MTU": '', + "MAC": linux_get_mac('%s.%d' % (device, vlan)), + "MTU": linux_get_mtu('%s.%d' % (device, vlan)), "network": network_uuid, "VLAN": vlan } diff -r fa8be8a6cb74 -r da236d7f59b9 xen/Makefile --- a/xen/Makefile Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/Makefile Wed Aug 13 22:22:21 2008 +0900 @@ -2,7 +2,7 @@ # All other places this is stored (eg. compile.h) should be autogenerated. export XEN_VERSION = 3 export XEN_SUBVERSION = 3 -export XEN_EXTRAVERSION ?= .0-rc4-pre$(XEN_VENDORVERSION) +export XEN_EXTRAVERSION ?= .0-rc5-pre$(XEN_VENDORVERSION) export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) -include xen-version diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/cpu/mcheck/mce.h --- a/xen/arch/x86/cpu/mcheck/mce.h Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/arch/x86/cpu/mcheck/mce.h Wed Aug 13 22:22:21 2008 +0900 @@ -26,5 +26,5 @@ void x86_mcinfo_dump(struct mc_info *mi) void x86_mcinfo_dump(struct mc_info *mi); /* Global variables */ -extern int mce_disabled __initdata; +extern int mce_disabled; extern unsigned int nr_mce_banks; diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/arch/x86/mm/shadow/common.c Wed Aug 13 22:22:21 2008 +0900 @@ -3357,23 +3357,45 @@ shadow_write_p2m_entry(struct vcpu *v, u } } - /* If we're removing a superpage mapping from the p2m, remove all the - * MFNs covered by it from the shadows too. */ + /* If we're removing a superpage mapping from the p2m, we need to check + * all the pages covered by it. If they're still there in the new + * scheme, that's OK, but otherwise they must be unshadowed. */ if ( level == 2 && (l1e_get_flags(*p) & _PAGE_PRESENT) && (l1e_get_flags(*p) & _PAGE_PSE) ) { unsigned int i; - mfn_t mfn = _mfn(l1e_get_pfn(*p)); + cpumask_t flushmask; + mfn_t omfn = _mfn(l1e_get_pfn(*p)); + mfn_t nmfn = _mfn(l1e_get_pfn(new)); + l1_pgentry_t *npte = NULL; p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p)); - if ( p2m_is_valid(p2mt) && mfn_valid(mfn) ) - { + if ( p2m_is_valid(p2mt) && mfn_valid(omfn) ) + { + cpus_clear(flushmask); + + /* If we're replacing a superpage with a normal L1 page, map it */ + if ( (l1e_get_flags(new) & _PAGE_PRESENT) + && !(l1e_get_flags(new) & _PAGE_PSE) + && mfn_valid(nmfn) ) + npte = map_domain_page(mfn_x(nmfn)); + for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) { - sh_remove_all_shadows_and_parents(v, mfn); - if ( sh_remove_all_mappings(v, mfn) ) - flush_tlb_mask(d->domain_dirty_cpumask); - mfn = _mfn(mfn_x(mfn) + 1); + if ( !npte + || !p2m_is_ram(p2m_flags_to_type(l1e_get_flags(npte[i]))) + || l1e_get_pfn(npte[i]) != mfn_x(omfn) ) + { + /* This GFN->MFN mapping has gone away */ + sh_remove_all_shadows_and_parents(v, omfn); + if ( sh_remove_all_mappings(v, omfn) ) + cpus_or(flushmask, flushmask, d->domain_dirty_cpumask); + } + omfn = _mfn(mfn_x(omfn) + 1); } + flush_tlb_mask(flushmask); + + if ( npte ) + unmap_domain_page(npte); } } diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/arch/x86/mm/shadow/multi.c Wed Aug 13 22:22:21 2008 +0900 @@ -3181,14 +3181,9 @@ static int sh_page_fault(struct vcpu *v, rc = guest_walk_tables(v, va, &gw, regs->error_code); #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) + regs->error_code &= ~PFEC_page_present; if ( !(rc & _PAGE_PRESENT) ) regs->error_code |= PFEC_page_present; - else if ( regs->error_code & PFEC_page_present ) - { - SHADOW_ERROR("OOS paranoia: Something is wrong in guest TLB" - " flushing. Have fun debugging it.\n"); - regs->error_code &= ~PFEC_page_present; - } #endif if ( rc != 0 ) diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/arch/x86/mm/shadow/private.h Wed Aug 13 22:22:21 2008 +0900 @@ -213,15 +213,14 @@ struct shadow_page_info }; }; -/* The structure above *must* be the same size as a struct page_info +/* The structure above *must* be no larger than a struct page_info * from mm.h, since we'll be using the same space in the frametable. * Also, the mbz field must line up with the owner field of normal * pages, so they look properly like anonymous/xen pages. */ static inline void shadow_check_page_struct_offsets(void) { - BUILD_BUG_ON(sizeof (struct shadow_page_info) - != sizeof (struct page_info)); - BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) - != offsetof(struct page_info, u.inuse._domain)); + BUILD_BUG_ON(sizeof (struct shadow_page_info) > sizeof (struct page_info)); + BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) != + offsetof(struct page_info, u.inuse._domain)); }; /* Shadow type codes */ diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/msi.c --- a/xen/arch/x86/msi.c Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/arch/x86/msi.c Wed Aug 13 22:22:21 2008 +0900 @@ -490,28 +490,6 @@ static int msi_capability_init(struct pc return 0; } -static u64 pci_resource_start(struct pci_dev *dev, u8 bar_index) -{ - u64 bar_base; - u32 reg_val; - u8 bus = dev->bus; - u8 slot = PCI_SLOT(dev->devfn); - u8 func = PCI_FUNC(dev->devfn); - - reg_val = pci_conf_read32(bus, slot, func, - PCI_BASE_ADDRESS_0 + 4 * bar_index); - bar_base = reg_val & PCI_BASE_ADDRESS_MEM_MASK; - if ( ( reg_val & PCI_BASE_ADDRESS_MEM_TYPE_MASK ) == - PCI_BASE_ADDRESS_MEM_TYPE_64 ) - { - reg_val = pci_conf_read32(bus, slot, func, - PCI_BASE_ADDRESS_0 + 4 * (bar_index + 1)); - bar_base |= ((u64)reg_val) << 32; - } - - return bar_base; -} - /** * msix_capability_init - configure device's MSI-X capability * @dev: pointer to the pci_dev data structure of MSI-X device function @@ -522,7 +500,7 @@ static u64 pci_resource_start(struct pci * single MSI-X irq. A return of zero indicates the successful setup of * requested MSI-X entries with allocated irqs or non-zero for otherwise. **/ -static int msix_capability_init(struct pci_dev *dev, int vector, int entry_nr) +static int msix_capability_init(struct pci_dev *dev, struct msi_info *msi) { struct msi_desc *entry; int pos; @@ -549,7 +527,7 @@ static int msix_capability_init(struct p table_offset = pci_conf_read32(bus, slot, func, msix_table_offset_reg(pos)); bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK); table_offset &= ~PCI_MSIX_FLAGS_BIRMASK; - phys_addr = pci_resource_start(dev, bir) + table_offset; + phys_addr = msi->table_base + table_offset; idx = msix_fixmap_alloc(); if ( idx < 0 ) { @@ -561,11 +539,11 @@ static int msix_capability_init(struct p entry->msi_attrib.type = PCI_CAP_ID_MSIX; entry->msi_attrib.is_64 = 1; - entry->msi_attrib.entry_nr = entry_nr; + entry->msi_attrib.entry_nr = msi->entry_nr; entry->msi_attrib.maskbit = 1; entry->msi_attrib.masked = 1; entry->msi_attrib.pos = pos; - entry->vector = vector; + entry->vector = msi->vector; entry->dev = dev; entry->mask_base = base; @@ -589,24 +567,25 @@ static int msix_capability_init(struct p * indicates the successful setup of an entry zero with the new MSI * irq or non-zero for otherwise. **/ -static int __pci_enable_msi(u8 bus, u8 devfn, int vector) +static int __pci_enable_msi(struct msi_info *msi) { int status; struct pci_dev *pdev; - pdev = pci_lock_pdev(bus, devfn); + pdev = pci_lock_pdev(msi->bus, msi->devfn); if ( !pdev ) return -ENODEV; - if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSI) ) + if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSI) ) { spin_unlock(&pdev->lock); - dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on device \ - %02x:%02x.%01x.\n", vector, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on " + "device %02x:%02x.%01x.\n", msi->vector, msi->bus, + PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); return 0; } - status = msi_capability_init(pdev, vector); + status = msi_capability_init(pdev, msi->vector); spin_unlock(&pdev->lock); return status; } @@ -659,37 +638,37 @@ static void __pci_disable_msi(int vector * of irqs available. Driver should use the returned value to re-send * its request. **/ -static int __pci_enable_msix(u8 bus, u8 devfn, int vector, int entry_nr) +static int __pci_enable_msix(struct msi_info *msi) { int status, pos, nr_entries; struct pci_dev *pdev; u16 control; - u8 slot = PCI_SLOT(devfn); - u8 func = PCI_FUNC(devfn); - - pdev = pci_lock_pdev(bus, devfn); + u8 slot = PCI_SLOT(msi->devfn); + u8 func = PCI_FUNC(msi->devfn); + + pdev = pci_lock_pdev(msi->bus, msi->devfn); if ( !pdev ) return -ENODEV; - pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX); - control = pci_conf_read16(bus, slot, func, msi_control_reg(pos)); + pos = pci_find_cap_offset(msi->bus, slot, func, PCI_CAP_ID_MSIX); + control = pci_conf_read16(msi->bus, slot, func, msi_control_reg(pos)); nr_entries = multi_msix_capable(control); - if (entry_nr > nr_entries) + if (msi->entry_nr > nr_entries) { spin_unlock(&pdev->lock); return -EINVAL; } - if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSIX) ) + if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSIX) ) { spin_unlock(&pdev->lock); - dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on \ - device %02x:%02x.%01x.\n", vector, bus, - PCI_SLOT(devfn), PCI_FUNC(devfn)); + dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on " + "device %02x:%02x.%01x.\n", msi->vector, msi->bus, + PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); return 0; } - status = msix_capability_init(pdev, vector, entry_nr); + status = msix_capability_init(pdev, msi); spin_unlock(&pdev->lock); return status; } @@ -727,13 +706,12 @@ static void __pci_disable_msix(int vecto spin_unlock(&dev->lock); } -int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi) -{ - ASSERT(spin_is_locked(&irq_desc[vector].lock)); - if ( msi ) - return __pci_enable_msi(bus, devfn, vector); - else - return __pci_enable_msix(bus, devfn, vector, entry_nr); +int pci_enable_msi(struct msi_info *msi) +{ + ASSERT(spin_is_locked(&irq_desc[msi->vector].lock)); + + return msi->table_base ? __pci_enable_msix(msi) : + __pci_enable_msi(msi); } void pci_disable_msi(int vector) diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/oprofile/nmi_int.c --- a/xen/arch/x86/oprofile/nmi_int.c Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/arch/x86/oprofile/nmi_int.c Wed Aug 13 22:22:21 2008 +0900 @@ -296,24 +296,40 @@ static int __init ppro_init(char ** cpu_ { __u8 cpu_model = current_cpu_data.x86_model; - if (cpu_model == 15 || cpu_model == 23) { + switch (cpu_model) { + case 0 ... 2: + *cpu_type = "i386/ppro"; + break; + case 3 ... 5: + *cpu_type = "i386/pii"; + break; + case 6 ... 8: + *cpu_type = "i386/piii"; + break; + case 9: + *cpu_type = "i386/p6_mobile"; + break; + case 10 ... 13: + *cpu_type = "i386/p6"; + break; + case 14: + *cpu_type = "i386/core"; + break; + case 15: case 23: *cpu_type = "i386/core_2"; ppro_has_global_ctrl = 1; - } else if (cpu_model == 14) - *cpu_type = "i386/core"; - else if (cpu_model > 13) { + break; + case 26: + *cpu_type = "i386/core_2"; + ppro_has_global_ctrl = 1; + break; + default: + /* Unknown */ printk("xenoprof: Initialization failed. " "Intel processor model %d for P6 class family is not " "supported\n", cpu_model); return 0; - } else if (cpu_model == 9) - *cpu_type = "i386/p6_mobile"; - else if (cpu_model > 5) - *cpu_type = "i386/piii"; - else if (cpu_model > 2) - *cpu_type = "i386/pii"; - else - *cpu_type = "i386/ppro"; + } model = &op_ppro_spec; return 1; diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/arch/x86/physdev.c Wed Aug 13 22:22:21 2008 +0900 @@ -66,6 +66,7 @@ static int map_domain_pirq(struct domain { int ret = 0; int old_vector, old_pirq; + struct msi_info msi; if ( d == NULL ) return -EINVAL; @@ -115,10 +116,14 @@ static int map_domain_pirq(struct domain vector); desc->handler = &pci_msi_type; - ret = pci_enable_msi(map->msi_info.bus, - map->msi_info.devfn, vector, - map->msi_info.entry_nr, - map->msi_info.msi); + msi.bus = map->bus; + msi.devfn = map->devfn; + msi.entry_nr = map->entry_nr; + msi.table_base = map->table_base; + msi.vector = vector; + + ret = pci_enable_msi(&msi); + spin_unlock_irqrestore(&desc->lock, flags); if ( ret ) goto done; @@ -139,7 +144,7 @@ static int unmap_domain_pirq(struct doma int ret = 0; int vector; - if ( d == NULL || pirq < 0 || pirq > NR_PIRQS ) + if ( d == NULL || pirq < 0 || pirq >= NR_PIRQS ) return -EINVAL; if ( !IS_PRIV(current->domain) ) diff -r fa8be8a6cb74 -r da236d7f59b9 xen/common/page_alloc.c --- a/xen/common/page_alloc.c Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/common/page_alloc.c Wed Aug 13 22:22:21 2008 +0900 @@ -950,6 +950,14 @@ static void page_scrub_softirq(void) void *p; int i; s_time_t start = NOW(); + static spinlock_t serialise_lock = SPIN_LOCK_UNLOCKED; + + /* free_heap_pages() does not parallelise well. Serialise this function. */ + if ( !spin_trylock(&serialise_lock) ) + { + set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(1)); + return; + } /* Aim to do 1ms of work every 10ms. */ do { @@ -958,7 +966,7 @@ static void page_scrub_softirq(void) if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) ) { spin_unlock(&page_scrub_lock); - return; + goto out; } /* Peel up to 16 pages from the list. */ @@ -989,6 +997,9 @@ static void page_scrub_softirq(void) } while ( (NOW() - start) < MILLISECS(1) ); set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(10)); + + out: + spin_unlock(&serialise_lock); } static void page_scrub_timer_fn(void *unused) diff -r fa8be8a6cb74 -r da236d7f59b9 xen/drivers/passthrough/io.c --- a/xen/drivers/passthrough/io.c Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/drivers/passthrough/io.c Wed Aug 13 22:22:21 2008 +0900 @@ -74,6 +74,9 @@ int pt_irq_create_bind_vtd( if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI ) { int pirq = pt_irq_bind->machine_irq; + + if ( pirq < 0 || pirq >= NR_IRQS ) + return -EINVAL; if ( !(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_VALID ) ) { diff -r fa8be8a6cb74 -r da236d7f59b9 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/drivers/passthrough/vtd/iommu.c Wed Aug 13 22:22:21 2008 +0900 @@ -1789,7 +1789,8 @@ int intel_vtd_setup(void) memset(domid_bitmap, 0, domid_bitmap_size / 8); set_bit(0, domid_bitmap); - init_vtd_hw(); + if ( init_vtd_hw() ) + goto error; register_keyhandler('V', dump_iommu_info, "dump iommu info"); diff -r fa8be8a6cb74 -r da236d7f59b9 xen/include/asm-x86/event.h --- a/xen/include/asm-x86/event.h Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/include/asm-x86/event.h Wed Aug 13 22:22:21 2008 +0900 @@ -69,12 +69,7 @@ static inline void local_event_delivery_ /* No arch specific virq definition now. Default to global. */ static inline int arch_virq_is_global(int virq) { - switch (virq) { - case VIRQ_MCA: - return 1; - default: - return 1; - } + return 1; } #endif diff -r fa8be8a6cb74 -r da236d7f59b9 xen/include/asm-x86/msi.h --- a/xen/include/asm-x86/msi.h Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/include/asm-x86/msi.h Wed Aug 13 22:22:21 2008 +0900 @@ -53,6 +53,14 @@ #else #define MAX_MSIX_PAGES 32 #endif + +struct msi_info { + int bus; + int devfn; + int vector; + int entry_nr; + uint64_t table_base; +}; struct msi_msg { u32 address_lo; /* low 32 bits of msi message address */ @@ -64,7 +72,7 @@ extern void mask_msi_irq(unsigned int ir extern void mask_msi_irq(unsigned int irq); extern void unmask_msi_irq(unsigned int irq); extern void set_msi_irq_affinity(unsigned int irq, cpumask_t mask); -extern int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi); +extern int pci_enable_msi(struct msi_info *msi); extern void pci_disable_msi(int vector); extern void pci_cleanup_msi(struct pci_dev *pdev); diff -r fa8be8a6cb74 -r da236d7f59b9 xen/include/public/physdev.h --- a/xen/include/public/physdev.h Wed Aug 13 13:18:06 2008 +0900 +++ b/xen/include/public/physdev.h Wed Aug 13 22:22:21 2008 +0900 @@ -136,10 +136,13 @@ struct physdev_map_pirq { /* IN or OUT */ int pirq; /* IN */ - struct { - int bus, devfn, entry_nr; - int msi; /* 0 - MSIX 1 - MSI */ - } msi_info; + int bus; + /* IN */ + int devfn; + /* IN */ + int entry_nr; + /* IN */ + uint64_t table_base; }; typedef struct physdev_map_pirq physdev_map_pirq_t; DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |