[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Isaku Yamahata <yamahata@xxxxxxxxxxxxx> # Date 1234491748 -32400 # Node ID c7cba853583da45ee4478237047fdd5d6bed68cd # Parent af992824b5cfa3b81dbe68293216a5df3ec0bdf4 # Parent 32b15413749255e0cd518f25d9202759586dcb27 merge with xen-unstable.hg --- .hgignore | 10 Config.mk | 4 docs/man/xm.pod.1 | 2 extras/mini-os/arch/x86/mm.c | 6 tools/blktap/drivers/Makefile | 2 tools/firmware/rombios/rombios.c | 15 - tools/libxc/xc_domain.c | 58 ++++ tools/libxc/xc_pagetab.c | 36 +- tools/libxc/xenctrl.h | 25 + tools/python/xen/xend/XendAPIStore.py | 3 tools/python/xen/xend/image.py | 34 +- tools/python/xen/xend/server/pciquirk.py | 3 tools/python/xen/xm/create.py | 2 tools/python/xen/xm/main.py | 8 tools/xentrace/xenctx.c | 58 +++- xen/arch/ia64/linux-xen/iosapic.c | 10 xen/arch/ia64/linux-xen/irq_ia64.c | 26 +- xen/arch/ia64/linux-xen/mca.c | 9 xen/arch/ia64/xen/hypercall.c | 2 xen/arch/ia64/xen/irq.c | 24 - xen/arch/x86/Makefile | 1 xen/arch/x86/acpi/cpufreq/cpufreq.c | 16 - xen/arch/x86/acpi/suspend.c | 3 xen/arch/x86/boot/Makefile | 6 xen/arch/x86/boot/build32.mk | 24 + xen/arch/x86/boot/head.S | 10 xen/arch/x86/boot/reloc.c | 89 ++++++ xen/arch/x86/cpu/mcheck/amd_k8.c | 14 - xen/arch/x86/cpu/mcheck/amd_nonfatal.c | 13 - xen/arch/x86/cpu/mcheck/mce.c | 130 ++++++++++ xen/arch/x86/cpu/mcheck/mce.h | 3 xen/arch/x86/cpu/mcheck/mce_intel.c | 8 xen/arch/x86/domain.c | 38 +- xen/arch/x86/domain_build.c | 2 xen/arch/x86/domctl.c | 42 ++- xen/arch/x86/e820.c | 2 xen/arch/x86/i8259.c | 4 xen/arch/x86/io_apic.c | 54 ---- xen/arch/x86/irq.c | 77 +++++ xen/arch/x86/mm.c | 158 ++++++------ xen/arch/x86/mm/hap/hap.c | 23 - xen/arch/x86/mm/hap/p2m-ept.c | 88 ++++-- xen/arch/x86/mm/p2m.c | 95 ++----- xen/arch/x86/mm/paging.c | 6 xen/arch/x86/mm/shadow/common.c | 296 +++++++++++------------ xen/arch/x86/mm/shadow/multi.c | 131 ++++------ xen/arch/x86/mm/shadow/private.h | 123 ++------- xen/arch/x86/numa.c | 2 xen/arch/x86/physdev.c | 4 xen/arch/x86/setup.c | 16 - xen/arch/x86/smpboot.c | 4 xen/arch/x86/x86_32/xen.lds.S | 1 xen/arch/x86/x86_64/entry.S | 1 xen/arch/x86/x86_64/xen.lds.S | 1 xen/common/domain.c | 18 - xen/common/grant_table.c | 2 xen/common/hvm/save.c | 48 +++ xen/common/memory.c | 32 -- xen/common/page_alloc.c | 158 +++++++----- xen/drivers/char/serial.c | 2 xen/drivers/cpufreq/cpufreq.c | 73 +++++ xen/drivers/cpufreq/cpufreq_misc_governors.c | 25 + xen/drivers/cpufreq/cpufreq_ondemand.c | 95 +++---- xen/drivers/passthrough/amd/iommu_init.c | 23 - xen/drivers/passthrough/amd/iommu_map.c | 22 - xen/drivers/passthrough/amd/pci_amd_iommu.c | 1 xen/drivers/passthrough/io.c | 2 xen/drivers/passthrough/iommu.c | 9 xen/drivers/passthrough/vtd/dmar.c | 18 - xen/drivers/passthrough/vtd/ia64/vtd.c | 13 - xen/drivers/passthrough/vtd/intremap.c | 2 xen/drivers/passthrough/vtd/iommu.c | 61 +++- xen/drivers/passthrough/vtd/iommu.h | 4 xen/drivers/passthrough/vtd/qinval.c | 4 xen/drivers/passthrough/vtd/vtd.h | 4 xen/drivers/passthrough/vtd/x86/vtd.c | 9 xen/include/acpi/cpufreq/cpufreq.h | 1 xen/include/asm-ia64/hardirq.h | 1 xen/include/asm-ia64/hvm/iommu.h | 1 xen/include/asm-ia64/hvm/irq.h | 14 - xen/include/asm-ia64/linux-xen/asm/smp.h | 1 xen/include/asm-ia64/linux-xen/linux/interrupt.h | 4 xen/include/asm-ia64/linux/asm/hw_irq.h | 2 xen/include/asm-ia64/mm.h | 12 xen/include/asm-ia64/tlbflush.h | 3 xen/include/asm-x86/domain.h | 13 - xen/include/asm-x86/hvm/vmx/vmx.h | 6 xen/include/asm-x86/iocap.h | 5 xen/include/asm-x86/irq.h | 3 xen/include/asm-x86/mm.h | 84 +++++- xen/include/asm-x86/p2m.h | 4 xen/include/asm-x86/page.h | 38 ++ xen/include/asm-x86/perfc.h | 1 xen/include/asm-x86/processor.h | 1 xen/include/public/arch-ia64/hvm/save.h | 4 xen/include/public/arch-x86/hvm/save.h | 4 xen/include/public/arch-x86/xen-mca.h | 48 +++ xen/include/public/domctl.h | 12 xen/include/public/io/pciif.h | 2 xen/include/xen/hvm/save.h | 2 xen/include/xen/iocap.h | 3 xen/include/xen/irq.h | 20 + xen/include/xen/mm.h | 186 +++++++++++++- xen/include/xen/sched.h | 5 xen/xsm/flask/hooks.c | 1 105 files changed, 1880 insertions(+), 1048 deletions(-) diff -r af992824b5cf -r c7cba853583d .hgignore --- a/.hgignore Fri Feb 13 10:56:01 2009 +0900 +++ b/.hgignore Fri Feb 13 11:22:28 2009 +0900 @@ -256,6 +256,7 @@ ^xen/arch/x86/asm-offsets\.s$ ^xen/arch/x86/boot/mkelf32$ ^xen/arch/x86/xen\.lds$ +^xen/arch/x86/boot/reloc.S$ ^xen/ddb/.*$ ^xen/include/asm$ ^xen/include/asm-.*/asm-offsets\.h$ @@ -279,15 +280,6 @@ ^xen/arch/ia64/asm-xsi-offsets\.s$ ^xen/arch/ia64/map\.out$ ^xen/arch/ia64/xen\.lds\.s$ -^xen/arch/powerpc/dom0\.bin$ -^xen/arch/powerpc/asm-offsets\.s$ -^xen/arch/powerpc/firmware$ -^xen/arch/powerpc/firmware.dbg$ -^xen/arch/powerpc/firmware_image.bin$ -^xen/arch/powerpc/xen\.lds$ -^xen/arch/powerpc/\.xen-syms$ -^xen/arch/powerpc/xen-syms\.S$ -^xen/arch/powerpc/cmdline.dep$ ^unmodified_drivers/linux-2.6/\.tmp_versions ^unmodified_drivers/linux-2.6/.*\.cmd$ ^unmodified_drivers/linux-2.6/.*\.ko$ diff -r af992824b5cf -r c7cba853583d Config.mk --- a/Config.mk Fri Feb 13 10:56:01 2009 +0900 +++ b/Config.mk Fri Feb 13 11:22:28 2009 +0900 @@ -1,7 +1,7 @@ # -*- mode: Makefile; -*- -# A debug build of Xen and tools? -debug ?= y ## TEMPORARILY ENABLED +# A debug build of Xen and tools? TEMPORARILY ENABLED +debug ?= y XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \ -e s/i86pc/x86_32/ -e s/amd64/x86_64/) diff -r af992824b5cf -r c7cba853583d docs/man/xm.pod.1 --- a/docs/man/xm.pod.1 Fri Feb 13 10:56:01 2009 +0900 +++ b/docs/man/xm.pod.1 Fri Feb 13 11:22:28 2009 +0900 @@ -66,6 +66,8 @@ The attached console will perform much l The attached console will perform much like a standard serial console, so running curses based interfaces over the console B<is not advised>. Vi tends to get very odd when using it over this interface. + +Use the key combination Ctrl+] to detach the domain console. =item B<create> I<configfile> [I<OPTIONS>] [I<vars>].. diff -r af992824b5cf -r c7cba853583d extras/mini-os/arch/x86/mm.c --- a/extras/mini-os/arch/x86/mm.c Fri Feb 13 10:56:01 2009 +0900 +++ b/extras/mini-os/arch/x86/mm.c Fri Feb 13 11:22:28 2009 +0900 @@ -550,9 +550,15 @@ static void clear_bootstrap(void) void arch_init_p2m(unsigned long max_pfn) { +#ifdef __x86_64__ #define L1_P2M_SHIFT 9 #define L2_P2M_SHIFT 18 #define L3_P2M_SHIFT 27 +#else +#define L1_P2M_SHIFT 10 +#define L2_P2M_SHIFT 20 +#define L3_P2M_SHIFT 30 +#endif #define L1_P2M_ENTRIES (1 << L1_P2M_SHIFT) #define L2_P2M_ENTRIES (1 << (L2_P2M_SHIFT - L1_P2M_SHIFT)) #define L3_P2M_ENTRIES (1 << (L3_P2M_SHIFT - L2_P2M_SHIFT)) diff -r af992824b5cf -r c7cba853583d tools/blktap/drivers/Makefile --- a/tools/blktap/drivers/Makefile Fri Feb 13 10:56:01 2009 +0900 +++ b/tools/blktap/drivers/Makefile Fri Feb 13 11:22:28 2009 +0900 @@ -13,7 +13,7 @@ CFLAGS += -I $(LIBAIO_DIR) CFLAGS += -I $(LIBAIO_DIR) CFLAGS += -D_GNU_SOURCE -ifeq ($(shell . ./check_gcrypt),"yes") +ifeq ($(shell . ./check_gcrypt $(CC)),yes) CFLAGS += -DUSE_GCRYPT CRYPT_LIB := -lgcrypt else diff -r af992824b5cf -r c7cba853583d tools/firmware/rombios/rombios.c --- a/tools/firmware/rombios/rombios.c Fri Feb 13 10:56:01 2009 +0900 +++ b/tools/firmware/rombios/rombios.c Fri Feb 13 11:22:28 2009 +0900 @@ -4609,6 +4609,10 @@ int15_function32(regs, ES, DS, FLAGS) { Bit32u extended_memory_size=0; // 64bits long Bit16u CX,DX; +#ifdef HVMASSIST + Bit16u off, e820_table_size; + Bit32u base, type, size; +#endif BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.r16.ax); @@ -4625,8 +4629,10 @@ ASM_START ;; Get the count in eax mov bx, sp +SEG SS mov ax, _int15_function32.CX [bx] shl eax, #16 +SEG SS mov ax, _int15_function32.DX [bx] ;; convert to numbers of 15usec ticks @@ -4660,8 +4666,7 @@ ASM_END { #ifdef HVMASSIST case 0x20: { - Bit16u e820_table_size = - read_word(E820_SEG, E820_NR_OFFSET) * 0x14; + e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14; if (regs.u.r32.edx != 0x534D4150) /* SMAP */ goto int15_unimplemented; @@ -4674,8 +4679,6 @@ ASM_END if ((regs.u.r32.ebx + 0x14 - 1) > e820_table_size) regs.u.r32.ebx = 0; } else if (regs.u.r16.bx == 1) { - Bit32u base, type; - Bit16u off; for (off = 0; off < e820_table_size; off += 0x14) { base = read_dword(E820_SEG, E820_OFFSET + off); type = read_dword(E820_SEG, E820_OFFSET + 0x10 + off); @@ -4699,9 +4702,7 @@ ASM_END } case 0x01: { - Bit16u off, e820_table_size = - read_word(E820_SEG, E820_NR_OFFSET) * 0x14; - Bit32u base, type, size; + e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14; // do we have any reason to fail here ? CLEAR_CF(); diff -r af992824b5cf -r c7cba853583d tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Fri Feb 13 10:56:01 2009 +0900 +++ b/tools/libxc/xc_domain.c Fri Feb 13 11:22:28 2009 +0900 @@ -269,6 +269,38 @@ int xc_domain_hvm_getcontext(int xc_hand unlock_pages(ctxt_buf, size); return (ret < 0 ? -1 : domctl.u.hvmcontext.size); +} + +/* Get just one element of the HVM guest context. + * size must be >= HVM_SAVE_LENGTH(type) */ +int xc_domain_hvm_getcontext_partial(int xc_handle, + uint32_t domid, + uint16_t typecode, + uint16_t instance, + void *ctxt_buf, + uint32_t size) +{ + int ret; + DECLARE_DOMCTL; + + if ( !ctxt_buf ) + return -EINVAL; + + domctl.cmd = XEN_DOMCTL_gethvmcontext_partial; + domctl.domain = (domid_t) domid; + domctl.u.hvmcontext_partial.type = typecode; + domctl.u.hvmcontext_partial.instance = instance; + set_xen_guest_handle(domctl.u.hvmcontext_partial.buffer, ctxt_buf); + + if ( (ret = lock_pages(ctxt_buf, size)) != 0 ) + return ret; + + ret = do_domctl(xc_handle, &domctl); + + if ( ctxt_buf ) + unlock_pages(ctxt_buf, size); + + return ret ? -1 : 0; } /* set info to hvm guest for restore */ @@ -909,6 +941,32 @@ int xc_domain_update_msi_irq( return rc; } +int xc_domain_unbind_msi_irq( + int xc_handle, + uint32_t domid, + uint32_t gvec, + uint32_t pirq, + uint32_t gflags) +{ + int rc; + xen_domctl_bind_pt_irq_t *bind; + + DECLARE_DOMCTL; + + domctl.cmd = XEN_DOMCTL_unbind_pt_irq; + domctl.domain = (domid_t)domid; + + bind = &(domctl.u.bind_pt_irq); + bind->hvm_domid = domid; + bind->irq_type = PT_IRQ_TYPE_MSI; + bind->machine_irq = pirq; + bind->u.msi.gvec = gvec; + bind->u.msi.gflags = gflags; + + rc = do_domctl(xc_handle, &domctl); + return rc; +} + /* Pass-through: binds machine irq to guests irq */ int xc_domain_bind_pt_irq( int xc_handle, diff -r af992824b5cf -r c7cba853583d tools/libxc/xc_pagetab.c --- a/tools/libxc/xc_pagetab.c Fri Feb 13 10:56:01 2009 +0900 +++ b/tools/libxc/xc_pagetab.c Fri Feb 13 11:22:28 2009 +0900 @@ -4,50 +4,42 @@ * Function to translate virtual to physical addresses. */ #include "xc_private.h" +#include <xen/hvm/save.h> #define CR0_PG 0x80000000 #define CR4_PAE 0x20 #define PTE_PSE 0x80 +#define EFER_LMA 0x400 + unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom, int vcpu, unsigned long long virt) { xc_dominfo_t dominfo; - vcpu_guest_context_any_t ctx; uint64_t paddr, mask, pte = 0; int size, level, pt_levels = 2; void *map; if (xc_domain_getinfo(xc_handle, dom, 1, &dominfo) != 1 - || dominfo.domid != dom - || xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0) + || dominfo.domid != dom) return 0; /* What kind of paging are we dealing with? */ if (dominfo.hvm) { - unsigned long cr0, cr3, cr4; - xen_capabilities_info_t xen_caps = ""; - if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) + struct hvm_hw_cpu ctx; + if (xc_domain_hvm_getcontext_partial(xc_handle, dom, + HVM_SAVE_CODE(CPU), vcpu, + &ctx, sizeof ctx) != 0) return 0; - /* HVM context records are always host-sized */ - if (strstr(xen_caps, "xen-3.0-x86_64")) { - cr0 = ctx.x64.ctrlreg[0]; - cr3 = ctx.x64.ctrlreg[3]; - cr4 = ctx.x64.ctrlreg[4]; - } else { - cr0 = ctx.x32.ctrlreg[0]; - cr3 = ctx.x32.ctrlreg[3]; - cr4 = ctx.x32.ctrlreg[4]; - } - if (!(cr0 & CR0_PG)) + if (!(ctx.cr0 & CR0_PG)) return virt; - if (0 /* XXX how to get EFER.LMA? */) - pt_levels = 4; - else - pt_levels = (cr4 & CR4_PAE) ? 3 : 2; - paddr = cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull); + pt_levels = (ctx.msr_efer&EFER_LMA) ? 4 : (ctx.cr4&CR4_PAE) ? 3 : 2; + paddr = ctx.cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull); } else { DECLARE_DOMCTL; + vcpu_guest_context_any_t ctx; + if (xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0) + return 0; domctl.domain = dom; domctl.cmd = XEN_DOMCTL_get_address_size; if ( do_domctl(xc_handle, &domctl) != 0 ) diff -r af992824b5cf -r c7cba853583d tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Fri Feb 13 10:56:01 2009 +0900 +++ b/tools/libxc/xenctrl.h Fri Feb 13 11:22:28 2009 +0900 @@ -375,6 +375,25 @@ int xc_domain_hvm_getcontext(int xc_hand uint8_t *ctxt_buf, uint32_t size); + +/** + * This function returns one element of the context of a hvm domain + * @parm xc_handle a handle to an open hypervisor interface + * @parm domid the domain to get information from + * @parm typecode which type of elemnt required + * @parm instance which instance of the type + * @parm ctxt_buf a pointer to a structure to store the execution context of + * the hvm domain + * @parm size the size of ctxt_buf (must be >= HVM_SAVE_LENGTH(typecode)) + * @return 0 on success, -1 on failure + */ +int xc_domain_hvm_getcontext_partial(int xc_handle, + uint32_t domid, + uint16_t typecode, + uint16_t instance, + void *ctxt_buf, + uint32_t size); + /** * This function will set the context for hvm domain * @@ -1074,6 +1093,12 @@ int xc_domain_update_msi_irq( uint32_t gvec, uint32_t pirq, uint32_t gflags); + +int xc_domain_unbind_msi_irq(int xc_handle, + uint32_t domid, + uint32_t gvec, + uint32_t pirq, + uint32_t gflags); int xc_domain_bind_pt_irq(int xc_handle, uint32_t domid, diff -r af992824b5cf -r c7cba853583d tools/python/xen/xend/XendAPIStore.py --- a/tools/python/xen/xend/XendAPIStore.py Fri Feb 13 10:56:01 2009 +0900 +++ b/tools/python/xen/xend/XendAPIStore.py Fri Feb 13 11:22:28 2009 +0900 @@ -33,7 +33,8 @@ def register(uuid, type, inst): def deregister(uuid, type): old = get(uuid, type) - del __classes[(uuid, type)] + if old is not None: + del __classes[(uuid, type)] return old def get(uuid, type): diff -r af992824b5cf -r c7cba853583d tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Fri Feb 13 10:56:01 2009 +0900 +++ b/tools/python/xen/xend/image.py Fri Feb 13 11:22:28 2009 +0900 @@ -372,8 +372,6 @@ class ImageHandler: env['DISPLAY'] = self.display if self.xauthority: env['XAUTHORITY'] = self.xauthority - if self.vncconsole: - args = args + ([ "-vncviewer" ]) unique_id = "%i-%i" % (self.vm.getDomid(), time.time()) sentinel_path = sentinel_path_prefix + unique_id sentinel_path_fifo = sentinel_path + '.fifo' @@ -558,24 +556,30 @@ class ImageHandler: os.kill(self.pid, signal.SIGHUP) except OSError, exn: log.exception(exn) - try: - # Try to reap the child every 100ms for 10s. Then SIGKILL it. - for i in xrange(100): + # Try to reap the child every 100ms for 10s. Then SIGKILL it. + for i in xrange(100): + try: (p, rv) = os.waitpid(self.pid, os.WNOHANG) if p == self.pid: break - time.sleep(0.1) - else: - log.warning("DeviceModel %d took more than 10s " - "to terminate: sending SIGKILL" % self.pid) + except OSError: + # This is expected if Xend has been restarted within + # the life of this domain. In this case, we can kill + # the process, but we can't wait for it because it's + # not our child. We continue this loop, and after it is + # terminated make really sure the process is going away + # (SIGKILL). + pass + time.sleep(0.1) + else: + log.warning("DeviceModel %d took more than 10s " + "to terminate: sending SIGKILL" % self.pid) + try: os.kill(self.pid, signal.SIGKILL) os.waitpid(self.pid, 0) - except OSError, exn: - # This is expected if Xend has been restarted within the - # life of this domain. In this case, we can kill the process, - # but we can't wait for it because it's not our child. - # We just make really sure it's going away (SIGKILL) first. - os.kill(self.pid, signal.SIGKILL) + except OSError: + # This happens if the process doesn't exist. + pass state = xstransact.Remove("/local/domain/0/device-model/%i" % self.vm.getDomid()) finally: diff -r af992824b5cf -r c7cba853583d tools/python/xen/xend/server/pciquirk.py --- a/tools/python/xen/xend/server/pciquirk.py Fri Feb 13 10:56:01 2009 +0900 +++ b/tools/python/xen/xend/server/pciquirk.py Fri Feb 13 11:22:28 2009 +0900 @@ -123,7 +123,8 @@ class PCIQuirk: log.info("Config file does not exist: %s" % PERMISSIVE_CONFIG_FILE) self.pci_perm_dev_config = ['xend-pci-perm-devs'] - devices = child_at(child(pci_perm_dev_config, 'unconstrained_dev_ids'),0) + devices = child_at(child(self.pci_perm_dev_config, + 'unconstrained_dev_ids'),0) if self.__matchPCIdev( devices ): log.debug("Permissive mode enabled for PCI device [%s]" % self.devid) diff -r af992824b5cf -r c7cba853583d tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Fri Feb 13 10:56:01 2009 +0900 +++ b/tools/python/xen/xm/create.py Fri Feb 13 11:22:28 2009 +0900 @@ -1337,7 +1337,7 @@ def main(argv): elif not opts.is_xml: dom = make_domain(opts, config) - if opts.vals.vncviewer: + if opts.vals.vncconsole: domid = domain_name_to_domid(sxp.child_value(config, 'name', -1)) vncviewer_autopass = getattr(opts.vals,'vncviewer-autopass', False) console.runVncViewer(domid, vncviewer_autopass, True) diff -r af992824b5cf -r c7cba853583d tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Fri Feb 13 10:56:01 2009 +0900 +++ b/tools/python/xen/xm/main.py Fri Feb 13 11:22:28 2009 +0900 @@ -59,7 +59,11 @@ import XenAPI import XenAPI import xen.lowlevel.xc -xc = xen.lowlevel.xc.xc() +try: + xc = xen.lowlevel.xc.xc() +except Exception, ex: + print >>sys.stderr, ("Is xen kernel running?") + sys.exit(1) import inspect from xen.xend import XendOptions @@ -735,7 +739,7 @@ def xm_save(args): (options, params) = getopt.gnu_getopt(args, 'c', ['checkpoint']) except getopt.GetoptError, opterr: err(opterr) - sys.exit(1) + usage('save') checkpoint = False for (k, v) in options: diff -r af992824b5cf -r c7cba853583d tools/xentrace/xenctx.c --- a/tools/xentrace/xenctx.c Fri Feb 13 10:56:01 2009 +0900 +++ b/tools/xentrace/xenctx.c Fri Feb 13 11:22:28 2009 +0900 @@ -26,6 +26,7 @@ #include "xenctrl.h" #include <xen/foreign/x86_32.h> #include <xen/foreign/x86_64.h> +#include <xen/hvm/save.h> int xc_handle = 0; int domid = 0; @@ -287,6 +288,35 @@ static void print_ctx_32(vcpu_guest_cont } } +static void print_ctx_32on64(vcpu_guest_context_x86_64_t *ctx) +{ + struct cpu_user_regs_x86_64 *regs = &ctx->user_regs; + + printf("cs:eip: %04x:%08x ", regs->cs, (uint32_t)regs->eip); + print_symbol((uint32_t)regs->eip); + print_flags((uint32_t)regs->eflags); + printf("ss:esp: %04x:%08x\n", regs->ss, (uint32_t)regs->esp); + + printf("eax: %08x\t", (uint32_t)regs->eax); + printf("ebx: %08x\t", (uint32_t)regs->ebx); + printf("ecx: %08x\t", (uint32_t)regs->ecx); + printf("edx: %08x\n", (uint32_t)regs->edx); + + printf("esi: %08x\t", (uint32_t)regs->esi); + printf("edi: %08x\t", (uint32_t)regs->edi); + printf("ebp: %08x\n", (uint32_t)regs->ebp); + + printf(" ds: %04x\t", regs->ds); + printf(" es: %04x\t", regs->es); + printf(" fs: %04x\t", regs->fs); + printf(" gs: %04x\n", regs->gs); + + if (disp_all) { + print_special(ctx->ctrlreg, "cr", 0x1d, 4); + print_special(ctx->debugreg, "dr", 0xcf, 4); + } +} + static void print_ctx_64(vcpu_guest_context_x86_64_t *ctx) { struct cpu_user_regs_x86_64 *regs = &ctx->user_regs; @@ -335,6 +365,8 @@ static void print_ctx(vcpu_guest_context { if (ctxt_word_size == 4) print_ctx_32(&ctx->x32); + else if (guest_word_size == 4) + print_ctx_32on64(&ctx->x64); else print_ctx_64(&ctx->x64); } @@ -788,23 +820,29 @@ static void dump_ctx(int vcpu) #if defined(__i386__) || defined(__x86_64__) { - struct xen_domctl domctl; - memset(&domctl, 0, sizeof domctl); - domctl.domain = domid; - domctl.cmd = XEN_DOMCTL_get_address_size; - if (xc_domctl(xc_handle, &domctl) == 0) - ctxt_word_size = guest_word_size = domctl.u.address_size.size / 8; if (dominfo.hvm) { + struct hvm_hw_cpu cpuctx; xen_capabilities_info_t xen_caps = ""; + if (xc_domain_hvm_getcontext_partial( + xc_handle, domid, HVM_SAVE_CODE(CPU), + vcpu, &cpuctx, sizeof cpuctx) != 0) { + perror("xc_domain_hvm_getcontext_partial"); + exit(-1); + } + guest_word_size = (cpuctx.msr_efer & 0x400) ? 8 : 4; + /* HVM guest context records are always host-sized */ if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) { perror("xc_version"); exit(-1); } - /* HVM guest context records are always host-sized */ ctxt_word_size = (strstr(xen_caps, "xen-3.0-x86_64")) ? 8 : 4; - /* XXX For now we can't tell whether a HVM guest is in long - * XXX mode; eventually fix this here and in xc_pagetab.c */ - guest_word_size = 4; + } else { + struct xen_domctl domctl; + memset(&domctl, 0, sizeof domctl); + domctl.domain = domid; + domctl.cmd = XEN_DOMCTL_get_address_size; + if (xc_domctl(xc_handle, &domctl) == 0) + ctxt_word_size = guest_word_size = domctl.u.address_size.size / 8; } } #endif diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/linux-xen/iosapic.c --- a/xen/arch/ia64/linux-xen/iosapic.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/ia64/linux-xen/iosapic.c Fri Feb 13 11:22:28 2009 +0900 @@ -93,6 +93,16 @@ #include <asm/ptrace.h> #include <asm/system.h> +#ifdef XEN +static inline int iosapic_irq_to_vector (int irq) +{ + return irq; +} + +#undef irq_to_vector +#define irq_to_vector(irq) iosapic_irq_to_vector(irq) +#define AUTO_ASSIGN AUTO_ASSIGN_IRQ +#endif #undef DEBUG_INTERRUPT_ROUTING diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/linux-xen/irq_ia64.c --- a/xen/arch/ia64/linux-xen/irq_ia64.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/ia64/linux-xen/irq_ia64.c Fri Feb 13 11:22:28 2009 +0900 @@ -250,6 +250,7 @@ register_percpu_irq (ia64_vector vec, st register_percpu_irq (ia64_vector vec, struct irqaction *action) { irq_desc_t *desc; +#ifndef XEN unsigned int irq; for (irq = 0; irq < NR_IRQS; ++irq) @@ -258,16 +259,19 @@ register_percpu_irq (ia64_vector vec, st desc->status |= IRQ_PER_CPU; desc->handler = &irq_type_ia64_lsapic; if (action) -#ifdef XEN - setup_vector(irq, action); -#else setup_irq(irq, action); -#endif - } -} - -#ifdef XEN -int request_irq(unsigned int irq, + } +#else + desc = irq_descp(vec); + desc->status |= IRQ_PER_CPU; + desc->handler = &irq_type_ia64_lsapic; + if (action) + setup_vector(vec, action); +#endif +} + +#ifdef XEN +int request_irq_vector(unsigned int vector, void (*handler)(int, void *, struct cpu_user_regs *), unsigned long irqflags, const char * devname, void *dev_id) { @@ -279,7 +283,7 @@ int request_irq(unsigned int irq, * otherwise we'll have trouble later trying to figure out * which interrupt is which (messes up the interrupt freeing logic etc). * */ - if (irq >= NR_IRQS) + if (vector >= NR_VECTORS) return -EINVAL; if (!handler) return -EINVAL; @@ -291,7 +295,7 @@ int request_irq(unsigned int irq, action->handler = handler; action->name = devname; action->dev_id = dev_id; - setup_vector(irq, action); + setup_vector(vector, action); if (retval) xfree(action); diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/linux-xen/mca.c --- a/xen/arch/ia64/linux-xen/mca.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/ia64/linux-xen/mca.c Fri Feb 13 11:22:28 2009 +0900 @@ -114,7 +114,6 @@ extern void ia64_slave_init_handler (v extern void ia64_slave_init_handler (void); #ifdef XEN extern void setup_vector (unsigned int vec, struct irqaction *action); -#define setup_irq(irq, action) setup_vector(irq, action) #endif static ia64_mc_info_t ia64_mc_info; @@ -1931,12 +1930,18 @@ ia64_mca_late_init(void) if (cpe_vector >= 0) { /* If platform supports CPEI, enable the irq. */ cpe_poll_enabled = 0; +#ifndef XEN for (irq = 0; irq < NR_IRQS; ++irq) if (irq_to_vector(irq) == cpe_vector) { desc = irq_descp(irq); desc->status |= IRQ_PER_CPU; - setup_irq(irq, &mca_cpe_irqaction); + setup_vector(irq, &mca_cpe_irqaction); } +#else + desc = irq_descp(cpe_vector); + desc->status |= IRQ_PER_CPU; + setup_vector(cpe_vector, &mca_cpe_irqaction); +#endif ia64_mca_register_cpev(cpe_vector); IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); } else { diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/xen/hypercall.c --- a/xen/arch/ia64/xen/hypercall.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/ia64/xen/hypercall.c Fri Feb 13 11:22:28 2009 +0900 @@ -543,7 +543,7 @@ long do_physdev_op(int cmd, XEN_GUEST_HA break; irq_status_query.flags = 0; /* Edge-triggered interrupts don't need an explicit unmask downcall. */ - if ( !strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") ) + if ( !strstr(irq_descp(irq)->handler->typename, "edge") ) irq_status_query.flags |= XENIRQSTAT_needs_eoi; ret = copy_to_guest(arg, &irq_status_query, 1) ? -EFAULT : 0; break; diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/xen/irq.c --- a/xen/arch/ia64/xen/irq.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/ia64/xen/irq.c Fri Feb 13 11:22:28 2009 +0900 @@ -228,11 +228,11 @@ out: * disabled. */ -int setup_vector(unsigned int irq, struct irqaction * new) +int setup_vector(unsigned int vector, struct irqaction * new) { unsigned long flags; struct irqaction *old, **p; - irq_desc_t *desc = irq_descp(irq); + irq_desc_t *desc = irq_descp(vector); /* * The following block of code has to be executed atomically @@ -248,8 +248,8 @@ int setup_vector(unsigned int irq, struc desc->depth = 0; desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_GUEST); - desc->handler->startup(irq); - desc->handler->enable(irq); + desc->handler->startup(vector); + desc->handler->enable(vector); spin_unlock_irqrestore(&desc->lock,flags); return 0; @@ -258,13 +258,11 @@ int setup_vector(unsigned int irq, struc /* Vectors reserved by xen (and thus not sharable with domains). */ unsigned long ia64_xen_vector[BITS_TO_LONGS(NR_IRQS)]; -int setup_irq(unsigned int irq, struct irqaction * new) -{ - unsigned int vec; +int setup_irq_vector(unsigned int vec, struct irqaction * new) +{ int res; - /* Get vector for IRQ. */ - if (acpi_gsi_to_irq (irq, &vec) < 0) + if ( vec == IA64_INVALID_VECTOR ) return -ENOSYS; /* Reserve the vector (and thus the irq). */ if (test_and_set_bit(vec, ia64_xen_vector)) @@ -273,14 +271,12 @@ int setup_irq(unsigned int irq, struct i return res; } -void free_irq(unsigned int irq) -{ - unsigned int vec; +void release_irq_vector(unsigned int vec) +{ unsigned long flags; irq_desc_t *desc; - /* Get vector for IRQ. */ - if (acpi_gsi_to_irq(irq, &vec) < 0) + if ( vec == IA64_INVALID_VECTOR ) return; desc = irq_descp(vec); diff -r af992824b5cf -r c7cba853583d xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/Makefile Fri Feb 13 11:22:28 2009 +0900 @@ -92,3 +92,4 @@ clean:: clean:: rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32 rm -f $(BASEDIR)/.xen-syms.[0-9]* boot/.*.d + rm -f boot/reloc.S boot/reloc.lnk boot/reloc.bin diff -r af992824b5cf -r c7cba853583d xen/arch/x86/acpi/cpufreq/cpufreq.c --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri Feb 13 11:22:28 2009 +0900 @@ -58,6 +58,9 @@ static struct acpi_cpufreq_data *drv_dat static struct cpufreq_driver acpi_cpufreq_driver; +static unsigned int __read_mostly acpi_pstate_strict; +integer_param("acpi_pstate_strict", acpi_pstate_strict); + static int check_est_cpu(unsigned int cpuid) { struct cpuinfo_x86 *cpu = &cpu_data[cpuid]; @@ -180,7 +183,7 @@ static void drv_read(struct drv_cmd *cmd ASSERT(cpus_weight(cmd->mask) == 1); /* to reduce IPI for the sake of performance */ - if (cpu_isset(smp_processor_id(), cmd->mask)) + if (likely(cpu_isset(smp_processor_id(), cmd->mask))) do_drv_read((void *)cmd); else on_selected_cpus( cmd->mask, do_drv_read, (void *)cmd, 0, 1); @@ -196,15 +199,16 @@ static u32 get_cur_val(cpumask_t mask) struct cpufreq_policy *policy; struct processor_performance *perf; struct drv_cmd cmd; - unsigned int cpu; + unsigned int cpu = smp_processor_id(); if (unlikely(cpus_empty(mask))) return 0; - cpu = first_cpu(mask); + if (!cpu_isset(cpu, mask)) + cpu = first_cpu(mask); policy = cpufreq_cpu_policy[cpu]; - if (!policy) + if (cpu >= NR_CPUS || !policy || !drv_data[policy->cpu]) return 0; switch (drv_data[policy->cpu]->cpu_feature) { @@ -214,7 +218,7 @@ static u32 get_cur_val(cpumask_t mask) break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; - perf = drv_data[first_cpu(mask)]->acpi_data; + perf = drv_data[policy->cpu]->acpi_data; cmd.addr.io.port = perf->control_register.address; cmd.addr.io.bit_width = perf->control_register.bit_width; break; @@ -393,7 +397,7 @@ static int acpi_cpufreq_target(struct cp drv_write(&cmd); - if (!check_freqs(cmd.mask, freqs.new, data)) { + if (acpi_pstate_strict && !check_freqs(cmd.mask, freqs.new, data)) { printk(KERN_WARNING "Fail transfer to new freq %d\n", freqs.new); return -EAGAIN; } diff -r af992824b5cf -r c7cba853583d xen/arch/x86/acpi/suspend.c --- a/xen/arch/x86/acpi/suspend.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/acpi/suspend.c Fri Feb 13 11:22:28 2009 +0900 @@ -65,6 +65,9 @@ void restore_rest_processor_state(void) /* Reload FPU state on next FPU use. */ stts(); + if (cpu_has_pat) + wrmsrl(MSR_IA32_CR_PAT, host_pat); + mtrr_ap_init(); mcheck_init(&boot_cpu_data); } diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/Makefile --- a/xen/arch/x86/boot/Makefile Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/boot/Makefile Fri Feb 13 11:22:28 2009 +0900 @@ -1,1 +1,7 @@ obj-y += head.o obj-y += head.o + +head.o: reloc.S + +# NB. BOOT_TRAMPOLINE == 0x8c000 +%.S: %.c + RELOC=0x8c000 $(MAKE) -f build32.mk $@ diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/build32.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/boot/build32.mk Fri Feb 13 11:22:28 2009 +0900 @@ -0,0 +1,24 @@ +XEN_ROOT=../../../.. +override XEN_TARGET_ARCH=x86_32 +CFLAGS = +include $(XEN_ROOT)/Config.mk + +# Disable PIE/SSP if GCC supports them. They can break us. +$(call cc-option-add,CFLAGS,CC,-nopie) +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector) +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all) + +CFLAGS += -Werror -fno-builtin -msoft-float + +%.S: %.bin + (od -v -t x $< | head -n -1 | \ + sed 's/ /,0x/g' | sed 's/^[0-9]*,/ .long /') >$@ + +%.bin: %.lnk + $(OBJCOPY) -O binary $< $@ + +%.lnk: %.o + $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0x8c000 -o $@ $< + +%.o: %.c + $(CC) $(CFLAGS) -c $< -o $@ diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/head.S --- a/xen/arch/x86/boot/head.S Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/boot/head.S Fri Feb 13 11:22:28 2009 +0900 @@ -79,8 +79,11 @@ __start: cmp $0x2BADB002,%eax jne not_multiboot - /* Save the Multiboot info structure for later use. */ - mov %ebx,sym_phys(multiboot_ptr) + /* Save the Multiboot info struct (after relocation) for later use. */ + mov $sym_phys(cpu0_stack)+1024,%esp + push %ebx + call reloc + mov %eax,sym_phys(multiboot_ptr) /* Initialize BSS (no nasty surprises!) */ mov $sym_phys(__bss_start),%edi @@ -192,6 +195,9 @@ 2: cmp $L1_PAGETABLE_ENTRIES,%e #include "cmdline.S" +reloc: +#include "reloc.S" + .align 16 .globl trampoline_start, trampoline_end trampoline_start: diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/reloc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/boot/reloc.c Fri Feb 13 11:22:28 2009 +0900 @@ -0,0 +1,89 @@ +/****************************************************************************** + * reloc.c + * + * 32-bit flat memory-map routines for relocating Multiboot structures + * and modules. This is most easily done early with paging disabled. + * + * Copyright (c) 2009, Citrix Systems, Inc. + * + * Authors: + * Keir Fraser <keir.fraser@xxxxxxxxxx> + */ + +asm ( + " .text \n" + " .globl _start \n" + "_start: \n" + " mov $_start,%edi \n" + " call 1f \n" + "1: pop %esi \n" + " sub $1b-_start,%esi \n" + " mov $__bss_start-_start,%ecx \n" + " rep movsb \n" + " xor %eax,%eax \n" + " mov $_end,%ecx \n" + " sub %edi,%ecx \n" + " rep stosb \n" + " mov $reloc,%eax \n" + " jmp *%eax \n" + ); + +typedef unsigned int u32; +#include "../../../include/xen/multiboot.h" + +extern char _start[]; + +static void *memcpy(void *dest, const void *src, unsigned int n) +{ + char *s = (char *)src, *d = dest; + while ( n-- ) + *d++ = *s++; + return dest; +} + +static void *reloc_mbi_struct(void *old, unsigned int bytes) +{ + static void *alloc = &_start; + alloc = (void *)(((unsigned long)alloc - bytes) & ~15ul); + return memcpy(alloc, old, bytes); +} + +static char *reloc_mbi_string(char *old) +{ + char *p; + for ( p = old; *p != '\0'; p++ ) + continue; + return reloc_mbi_struct(old, p - old + 1); +} + +multiboot_info_t *reloc(multiboot_info_t *mbi_old) +{ + multiboot_info_t *mbi = reloc_mbi_struct(mbi_old, sizeof(*mbi)); + int i; + + if ( mbi->flags & MBI_CMDLINE ) + mbi->cmdline = (u32)reloc_mbi_string((char *)mbi->cmdline); + + if ( mbi->flags & MBI_MODULES ) + { + module_t *mods = reloc_mbi_struct( + (module_t *)mbi->mods_addr, mbi->mods_count * sizeof(module_t)); + mbi->mods_addr = (u32)mods; + for ( i = 0; i < mbi->mods_count; i++ ) + if ( mods[i].string ) + mods[i].string = (u32)reloc_mbi_string((char *)mods[i].string); + } + + if ( mbi->flags & MBI_MEMMAP ) + mbi->mmap_addr = (u32)reloc_mbi_struct( + (memory_map_t *)mbi->mmap_addr, mbi->mmap_length); + + /* Mask features we don't understand or don't relocate. */ + mbi->flags &= (MBI_MEMLIMITS | + MBI_DRIVES | + MBI_CMDLINE | + MBI_MODULES | + MBI_MEMMAP); + + return mbi; +} diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/amd_k8.c --- a/xen/arch/x86/cpu/mcheck/amd_k8.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/cpu/mcheck/amd_k8.c Fri Feb 13 11:22:28 2009 +0900 @@ -99,6 +99,8 @@ void k8_machine_check(struct cpu_user_re mc_data = x86_mcinfo_getptr(); cpu_nr = smp_processor_id(); + BUG_ON(cpu_nr != vcpu->processor); + curdom = vcpu->domain; memset(&mc_global, 0, sizeof(mc_global)); @@ -106,14 +108,12 @@ void k8_machine_check(struct cpu_user_re mc_global.common.size = sizeof(mc_global); mc_global.mc_domid = curdom->domain_id; /* impacted domain */ - mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */ - BUG_ON(cpu_nr != vcpu->processor); - mc_global.mc_core_threadid = 0; + + x86_mc_get_cpu_info(cpu_nr, &mc_global.mc_socketid, + &mc_global.mc_coreid, &mc_global.mc_core_threadid, + &mc_global.mc_apicid, NULL, NULL, NULL); + mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */ -#if 0 /* TODO: on which socket is this physical core? - It's not clear to me how to figure this out. */ - mc_global.mc_socketid = ???; -#endif mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE; rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus); diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/amd_nonfatal.c --- a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c Fri Feb 13 11:22:28 2009 +0900 @@ -95,6 +95,7 @@ void mce_amd_checkregs(void *info) mc_data = NULL; cpu_nr = smp_processor_id(); + BUG_ON(cpu_nr != vcpu->processor); event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA); error_found = 0; @@ -103,14 +104,12 @@ void mce_amd_checkregs(void *info) mc_global.common.size = sizeof(mc_global); mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */ - mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */ - BUG_ON(cpu_nr != vcpu->processor); - mc_global.mc_core_threadid = 0; mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */ -#if 0 /* TODO: on which socket is this physical core? - It's not clear to me how to figure this out. */ - mc_global.mc_socketid = ???; -#endif + + x86_mc_get_cpu_info(cpu_nr, &mc_global.mc_socketid, + &mc_global.mc_coreid, &mc_global.mc_core_threadid, + &mc_global.mc_apicid, NULL, NULL, NULL); + mc_global.mc_flags |= MC_FLAG_CORRECTABLE; rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus); diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/mce.c --- a/xen/arch/x86/cpu/mcheck/mce.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/cpu/mcheck/mce.c Fri Feb 13 11:22:28 2009 +0900 @@ -443,6 +443,96 @@ next: +static void do_mc_get_cpu_info(void *v) +{ + int cpu = smp_processor_id(); + int cindex, cpn; + struct cpuinfo_x86 *c; + xen_mc_logical_cpu_t *log_cpus, *xcp; + uint32_t junk, ebx; + + log_cpus = v; + c = &cpu_data[cpu]; + cindex = 0; + cpn = cpu - 1; + + /* + * Deal with sparse masks, condensed into a contig array. + */ + while (cpn >= 0) { + if (cpu_isset(cpn, cpu_online_map)) + cindex++; + cpn--; + } + + xcp = &log_cpus[cindex]; + c = &cpu_data[cpu]; + xcp->mc_cpunr = cpu; + x86_mc_get_cpu_info(cpu, &xcp->mc_chipid, + &xcp->mc_coreid, &xcp->mc_threadid, + &xcp->mc_apicid, &xcp->mc_ncores, + &xcp->mc_ncores_active, &xcp->mc_nthreads); + xcp->mc_cpuid_level = c->cpuid_level; + xcp->mc_family = c->x86; + xcp->mc_vendor = c->x86_vendor; + xcp->mc_model = c->x86_model; + xcp->mc_step = c->x86_mask; + xcp->mc_cache_size = c->x86_cache_size; + xcp->mc_cache_alignment = c->x86_cache_alignment; + memcpy(xcp->mc_vendorid, c->x86_vendor_id, sizeof xcp->mc_vendorid); + memcpy(xcp->mc_brandid, c->x86_model_id, sizeof xcp->mc_brandid); + memcpy(xcp->mc_cpu_caps, c->x86_capability, sizeof xcp->mc_cpu_caps); + + /* + * This part needs to run on the CPU itself. + */ + xcp->mc_nmsrvals = __MC_NMSRS; + xcp->mc_msrvalues[0].reg = MSR_IA32_MCG_CAP; + rdmsrl(MSR_IA32_MCG_CAP, xcp->mc_msrvalues[0].value); + + if (c->cpuid_level >= 1) { + cpuid(1, &junk, &ebx, &junk, &junk); + xcp->mc_clusterid = (ebx >> 24) & 0xff; + } else + xcp->mc_clusterid = hard_smp_processor_id(); +} + + +void x86_mc_get_cpu_info(unsigned cpu, uint32_t *chipid, uint16_t *coreid, + uint16_t *threadid, uint32_t *apicid, + unsigned *ncores, unsigned *ncores_active, + unsigned *nthreads) +{ + struct cpuinfo_x86 *c; + + *apicid = cpu_physical_id(cpu); + c = &cpu_data[cpu]; + if (c->apicid == BAD_APICID) { + *chipid = cpu; + *coreid = 0; + *threadid = 0; + if (ncores != NULL) + *ncores = 1; + if (ncores_active != NULL) + *ncores_active = 1; + if (nthreads != NULL) + *nthreads = 1; + } else { + *chipid = phys_proc_id[cpu]; + if (c->x86_max_cores > 1) + *coreid = cpu_core_id[cpu]; + else + *coreid = 0; + *threadid = c->apicid & ((1 << (c->x86_num_siblings - 1)) - 1); + if (ncores != NULL) + *ncores = c->x86_max_cores; + if (ncores_active != NULL) + *ncores_active = c->booted_cores; + if (nthreads != NULL) + *nthreads = c->x86_num_siblings; + } +} + /* Machine Check Architecture Hypercall */ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc) { @@ -452,6 +542,7 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u struct domain *domU; struct xen_mc_fetch *mc_fetch; struct xen_mc_notifydomain *mc_notifydomain; + struct xen_mc_physcpuinfo *mc_physcpuinfo; struct mc_info *mi; uint32_t flags; uint32_t fetch_idx; @@ -460,6 +551,8 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u * a DomU to fetch mc data while Dom0 notifies another DomU. */ static DEFINE_SPINLOCK(mc_lock); static DEFINE_SPINLOCK(mc_notify_lock); + int nlcpu; + xen_mc_logical_cpu_t *log_cpus = NULL; if ( copy_from_guest(op, u_xen_mc, 1) ) return -EFAULT; @@ -580,6 +673,43 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u spin_unlock(&mc_notify_lock); break; + + case XEN_MC_physcpuinfo: + if ( !IS_PRIV(v->domain) ) + return -EPERM; + + mc_physcpuinfo = &op->u.mc_physcpuinfo; + nlcpu = num_online_cpus(); + + if (!guest_handle_is_null(mc_physcpuinfo->info)) { + if (mc_physcpuinfo->ncpus <= 0) + return -EINVAL; + nlcpu = min(nlcpu, (int)mc_physcpuinfo->ncpus); + log_cpus = xmalloc_array(xen_mc_logical_cpu_t, nlcpu); + if (log_cpus == NULL) + return -ENOMEM; + + if (on_each_cpu(do_mc_get_cpu_info, log_cpus, + 1, 1) != 0) { + xfree(log_cpus); + return -EIO; + } + } + + mc_physcpuinfo->ncpus = nlcpu; + + if (copy_to_guest(u_xen_mc, op, 1)) { + if (log_cpus != NULL) + xfree(log_cpus); + return -EFAULT; + } + + if (!guest_handle_is_null(mc_physcpuinfo->info)) { + if (copy_to_guest(mc_physcpuinfo->info, + log_cpus, nlcpu)) + ret = -EFAULT; + xfree(log_cpus); + } } return ret; diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/mce.h --- a/xen/arch/x86/cpu/mcheck/mce.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/cpu/mcheck/mce.h Fri Feb 13 11:22:28 2009 +0900 @@ -34,4 +34,5 @@ int x86_mcinfo_add(struct mc_info *mi, v int x86_mcinfo_add(struct mc_info *mi, void *mcinfo); void x86_mcinfo_dump(struct mc_info *mi); void mc_panic(char *s); - +void x86_mc_get_cpu_info(unsigned, uint32_t *, uint16_t *, uint16_t *, + uint32_t *, uint32_t *, uint32_t *, uint32_t *); diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 13 11:22:28 2009 +0900 @@ -182,11 +182,9 @@ static struct mc_info *machine_check_pol mcg.mc_flags = MC_FLAG_POLLED; else if (calltype == MC_FLAG_CMCI) mcg.mc_flags = MC_FLAG_CMCI; - mcg.mc_socketid = phys_proc_id[cpu]; - mcg.mc_coreid = cpu_core_id[cpu]; - mcg.mc_apicid = cpu_physical_id(cpu); - mcg.mc_core_threadid = - mcg.mc_apicid & ( 1 << (cpu_data[cpu].x86_num_siblings - 1)); + x86_mc_get_cpu_info( + cpu, &mcg.mc_socketid, &mcg.mc_coreid, + &mcg.mc_core_threadid, &mcg.mc_apicid, NULL, NULL, NULL); rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus); for ( i = 0; i < nr_mce_banks; i++ ) { diff -r af992824b5cf -r c7cba853583d xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/domain.c Fri Feb 13 11:22:28 2009 +0900 @@ -141,7 +141,7 @@ void dump_pageframe_info(struct domain * } else { - list_for_each_entry ( page, &d->page_list, list ) + page_list_for_each ( page, &d->page_list ) { printk(" DomPage %p: caf=%08lx, taf=%" PRtype_info "\n", _p(page_to_mfn(page)), @@ -154,7 +154,7 @@ void dump_pageframe_info(struct domain * p2m_pod_dump_data(d); } - list_for_each_entry ( page, &d->xenpage_list, list ) + page_list_for_each ( page, &d->xenpage_list ) { printk(" XenPage %p: caf=%08lx, taf=%" PRtype_info "\n", _p(page_to_mfn(page)), @@ -352,6 +352,8 @@ int vcpu_initialise(struct vcpu *v) v->arch.perdomain_ptes = d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT); + spin_lock_init(&v->arch.shadow_ldt_lock); + return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0); } @@ -380,7 +382,7 @@ int arch_domain_create(struct domain *d, INIT_LIST_HEAD(&d->arch.pdev_list); d->arch.relmem = RELMEM_not_started; - INIT_LIST_HEAD(&d->arch.relmem_list); + INIT_PAGE_LIST_HEAD(&d->arch.relmem_list); pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t)); d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order, 0); @@ -1655,9 +1657,8 @@ int hypercall_xlat_continuation(unsigned #endif static int relinquish_memory( - struct domain *d, struct list_head *list, unsigned long type) -{ - struct list_head *ent; + struct domain *d, struct page_list_head *list, unsigned long type) +{ struct page_info *page; unsigned long x, y; int ret = 0; @@ -1665,17 +1666,13 @@ static int relinquish_memory( /* Use a recursive lock, as we may enter 'free_domheap_page'. */ spin_lock_recursive(&d->page_alloc_lock); - ent = list->next; - while ( ent != list ) - { - page = list_entry(ent, struct page_info, list); - + while ( (page = page_list_remove_head(list)) ) + { /* Grab a reference to the page so it won't disappear from under us. */ if ( unlikely(!get_page(page, d)) ) { /* Couldn't get a reference -- someone is freeing this page. */ - ent = ent->next; - list_move_tail(&page->list, &d->arch.relmem_list); + page_list_add_tail(page, &d->arch.relmem_list); continue; } @@ -1687,6 +1684,7 @@ static int relinquish_memory( break; case -EAGAIN: case -EINTR: + page_list_add(page, list); set_bit(_PGT_pinned, &page->u.inuse.type_info); put_page(page); goto out; @@ -1723,6 +1721,7 @@ static int relinquish_memory( case 0: break; case -EINTR: + page_list_add(page, list); page->u.inuse.type_info |= PGT_validated; if ( x & PGT_partial ) put_page(page); @@ -1730,6 +1729,7 @@ static int relinquish_memory( ret = -EAGAIN; goto out; case -EAGAIN: + page_list_add(page, list); page->u.inuse.type_info |= PGT_partial; if ( x & PGT_partial ) put_page(page); @@ -1746,9 +1746,8 @@ static int relinquish_memory( } } - /* Follow the list chain and /then/ potentially free the page. */ - ent = ent->next; - list_move_tail(&page->list, &d->arch.relmem_list); + /* Put the page on the list and /then/ potentially free it. */ + page_list_add_tail(page, &d->arch.relmem_list); put_page(page); if ( hypercall_preempt_check() ) @@ -1758,7 +1757,12 @@ static int relinquish_memory( } } - list_splice_init(&d->arch.relmem_list, list); + /* list is empty at this point. */ + if ( !page_list_empty(&d->arch.relmem_list) ) + { + *list = d->arch.relmem_list; + INIT_PAGE_LIST_HEAD(&d->arch.relmem_list); + } out: spin_unlock_recursive(&d->page_alloc_lock); diff -r af992824b5cf -r c7cba853583d xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/domain_build.c Fri Feb 13 11:22:28 2009 +0900 @@ -880,7 +880,7 @@ int __init construct_dom0( } si->first_p2m_pfn = pfn; si->nr_p2m_frames = d->tot_pages - count; - list_for_each_entry ( page, &d->page_list, list ) + page_list_for_each ( page, &d->page_list ) { mfn = page_to_mfn(page); if ( get_gpfn_from_mfn(mfn) >= count ) diff -r af992824b5cf -r c7cba853583d xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/domctl.c Fri Feb 13 11:22:28 2009 +0900 @@ -240,7 +240,7 @@ long arch_do_domctl( struct domain *d = rcu_lock_domain_by_id(domctl->domain); unsigned long max_pfns = domctl->u.getmemlist.max_pfns; uint64_t mfn; - struct list_head *list_ent; + struct page_info *page; ret = -EINVAL; if ( d != NULL ) @@ -259,19 +259,19 @@ long arch_do_domctl( goto getmemlist_out; } - ret = 0; - list_ent = d->page_list.next; - for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ ) + ret = i = 0; + page_list_for_each(page, &d->page_list) { - mfn = page_to_mfn(list_entry( - list_ent, struct page_info, list)); + if ( i >= max_pfns ) + break; + mfn = page_to_mfn(page); if ( copy_to_guest_offset(domctl->u.getmemlist.buffer, i, &mfn, 1) ) { ret = -EFAULT; break; } - list_ent = mfn_to_page(mfn)->list.next; + ++i; } spin_unlock(&d->page_alloc_lock); @@ -416,6 +416,34 @@ long arch_do_domctl( rcu_unlock_domain(d); } break; + + case XEN_DOMCTL_gethvmcontext_partial: + { + struct domain *d; + + ret = -ESRCH; + if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL ) + break; + + ret = xsm_hvmcontext(d, domctl->cmd); + if ( ret ) + goto gethvmcontext_partial_out; + + ret = -EINVAL; + if ( !is_hvm_domain(d) ) + goto gethvmcontext_partial_out; + + domain_pause(d); + ret = hvm_save_one(d, domctl->u.hvmcontext_partial.type, + domctl->u.hvmcontext_partial.instance, + domctl->u.hvmcontext_partial.buffer); + domain_unpause(d); + + gethvmcontext_partial_out: + rcu_unlock_domain(d); + } + break; + case XEN_DOMCTL_set_address_size: { diff -r af992824b5cf -r c7cba853583d xen/arch/x86/e820.c --- a/xen/arch/x86/e820.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/e820.c Fri Feb 13 11:22:28 2009 +0900 @@ -1,10 +1,10 @@ #include <xen/config.h> #include <xen/init.h> #include <xen/lib.h> +#include <xen/mm.h> #include <xen/compat.h> #include <xen/dmi.h> #include <asm/e820.h> -#include <asm/mm.h> #include <asm/page.h> /* opt_mem: Limit of physical RAM. Any RAM beyond this point is ignored. */ diff -r af992824b5cf -r c7cba853583d xen/arch/x86/i8259.c --- a/xen/arch/x86/i8259.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/i8259.c Fri Feb 13 11:22:28 2009 +0900 @@ -410,8 +410,8 @@ void __init init_IRQ(void) } /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */ - vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN; - vector_irq[0x80] = NEVER_ASSIGN; + vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN_IRQ; + vector_irq[0x80] = NEVER_ASSIGN_IRQ; apic_intr_init(); diff -r af992824b5cf -r c7cba853583d xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/io_apic.c Fri Feb 13 11:22:28 2009 +0900 @@ -49,7 +49,6 @@ static struct { int pin, apic; } ioapic_ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); int skip_ioapic_setup; @@ -88,9 +87,6 @@ static struct irq_pin_list { [0 ... PIN_MAP_SIZE-1].pin = -1 }; static int irq_2_pin_free_entry = NR_IRQS; - -int vector_irq[NR_VECTORS] __read_mostly = { - [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN}; /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are @@ -668,56 +664,6 @@ static inline int IO_APIC_irq_trigger(in /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ u8 irq_vector[NR_IRQS] __read_mostly; - -int free_irq_vector(int vector) -{ - int irq; - - BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR)); - - spin_lock(&vector_lock); - if ((irq = vector_irq[vector]) == AUTO_ASSIGN) - vector_irq[vector] = FREE_TO_ASSIGN; - spin_unlock(&vector_lock); - - return (irq == AUTO_ASSIGN) ? 0 : -EINVAL; -} - -int assign_irq_vector(int irq) -{ - static unsigned current_vector = FIRST_DYNAMIC_VECTOR; - unsigned vector; - - BUG_ON(irq >= NR_IRQS); - - spin_lock(&vector_lock); - - if ((irq != AUTO_ASSIGN) && (IO_APIC_VECTOR(irq) > 0)) { - spin_unlock(&vector_lock); - return IO_APIC_VECTOR(irq); - } - - vector = current_vector; - while (vector_irq[vector] != FREE_TO_ASSIGN) { - vector += 8; - if (vector > LAST_DYNAMIC_VECTOR) - vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7); - - if (vector == current_vector) { - spin_unlock(&vector_lock); - return -ENOSPC; - } - } - - current_vector = vector; - vector_irq[vector] = irq; - if (irq != AUTO_ASSIGN) - IO_APIC_VECTOR(irq) = vector; - - spin_unlock(&vector_lock); - - return vector; -} static struct hw_interrupt_type ioapic_level_type; static struct hw_interrupt_type ioapic_edge_type; diff -r af992824b5cf -r c7cba853583d xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/irq.c Fri Feb 13 11:22:28 2009 +0900 @@ -27,6 +27,11 @@ boolean_param("noirqbalance", opt_noirqb irq_desc_t irq_desc[NR_VECTORS]; +static DEFINE_SPINLOCK(vector_lock); +int vector_irq[NR_VECTORS] __read_mostly = { + [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ +}; + static void __do_IRQ_guest(int vector); void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { } @@ -54,6 +59,56 @@ struct hw_interrupt_type no_irq_type = { atomic_t irq_err_count; +int free_irq_vector(int vector) +{ + int irq; + + BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR)); + + spin_lock(&vector_lock); + if ((irq = vector_irq[vector]) == AUTO_ASSIGN_IRQ) + vector_irq[vector] = FREE_TO_ASSIGN_IRQ; + spin_unlock(&vector_lock); + + return (irq == AUTO_ASSIGN_IRQ) ? 0 : -EINVAL; +} + +int assign_irq_vector(int irq) +{ + static unsigned current_vector = FIRST_DYNAMIC_VECTOR; + unsigned vector; + + BUG_ON(irq >= NR_IRQS); + + spin_lock(&vector_lock); + + if ((irq != AUTO_ASSIGN_IRQ) && (IO_APIC_VECTOR(irq) > 0)) { + spin_unlock(&vector_lock); + return IO_APIC_VECTOR(irq); + } + + vector = current_vector; + while (vector_irq[vector] != FREE_TO_ASSIGN_IRQ) { + vector += 8; + if (vector > LAST_DYNAMIC_VECTOR) + vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7); + + if (vector == current_vector) { + spin_unlock(&vector_lock); + return -ENOSPC; + } + } + + current_vector = vector; + vector_irq[vector] = irq; + if (irq != AUTO_ASSIGN_IRQ) + IO_APIC_VECTOR(irq) = vector; + + spin_unlock(&vector_lock); + + return vector; +} + asmlinkage void do_IRQ(struct cpu_user_regs *regs) { unsigned int vector = regs->entry_vector; @@ -104,7 +159,7 @@ asmlinkage void do_IRQ(struct cpu_user_r spin_unlock(&desc->lock); } -int request_irq(unsigned int irq, +int request_irq_vector(unsigned int vector, void (*handler)(int, void *, struct cpu_user_regs *), unsigned long irqflags, const char * devname, void *dev_id) { @@ -117,7 +172,7 @@ int request_irq(unsigned int irq, * which interrupt is which (messes up the interrupt freeing * logic etc). */ - if (irq >= NR_IRQS) + if (vector >= NR_VECTORS) return -EINVAL; if (!handler) return -EINVAL; @@ -130,34 +185,32 @@ int request_irq(unsigned int irq, action->name = devname; action->dev_id = dev_id; - retval = setup_irq(irq, action); + retval = setup_irq_vector(vector, action); if (retval) xfree(action); return retval; } -void free_irq(unsigned int irq) -{ - unsigned int vector = irq_to_vector(irq); - irq_desc_t *desc = &irq_desc[vector]; +void release_irq_vector(unsigned int vector) +{ + irq_desc_t *desc = &irq_desc[vector]; unsigned long flags; spin_lock_irqsave(&desc->lock,flags); desc->action = NULL; desc->depth = 1; desc->status |= IRQ_DISABLED; - desc->handler->shutdown(irq); + desc->handler->shutdown(vector); spin_unlock_irqrestore(&desc->lock,flags); /* Wait to make sure it's not being used on another CPU */ do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS ); } -int setup_irq(unsigned int irq, struct irqaction *new) -{ - unsigned int vector = irq_to_vector(irq); - irq_desc_t *desc = &irq_desc[vector]; +int setup_irq_vector(unsigned int vector, struct irqaction *new) +{ + irq_desc_t *desc = &irq_desc[vector]; unsigned long flags; spin_lock_irqsave(&desc->lock,flags); diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/mm.c Fri Feb 13 11:22:28 2009 +0900 @@ -179,12 +179,6 @@ l2_pgentry_t *compat_idle_pg_table_l2 = #define l3_disallow_mask(d) L3_DISALLOW_MASK #endif -static void queue_deferred_ops(struct domain *d, unsigned int ops) -{ - ASSERT(d == current->domain); - this_cpu(percpu_mm_info).deferred_ops |= ops; -} - void __init init_frametable(void) { unsigned long nr_pages, page_step, i, mfn; @@ -333,7 +327,7 @@ void share_xen_page_with_guest( page->count_info |= PGC_allocated | 1; if ( unlikely(d->xenheap_pages++ == 0) ) get_knownalive_domain(d); - list_add_tail(&page->list, &d->xenpage_list); + page_list_add_tail(page, &d->xenpage_list); } spin_unlock(&d->page_alloc_lock); @@ -464,14 +458,18 @@ void update_cr3(struct vcpu *v) } -static void invalidate_shadow_ldt(struct vcpu *v) +static void invalidate_shadow_ldt(struct vcpu *v, int flush) { int i; unsigned long pfn; struct page_info *page; - + + BUG_ON(unlikely(in_irq())); + + spin_lock(&v->arch.shadow_ldt_lock); + if ( v->arch.shadow_ldt_mapcnt == 0 ) - return; + goto out; v->arch.shadow_ldt_mapcnt = 0; @@ -486,11 +484,12 @@ static void invalidate_shadow_ldt(struct put_page_and_type(page); } - /* Dispose of the (now possibly invalid) mappings from the TLB. */ - if ( v == current ) - queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT); - else - flush_tlb_mask(v->domain->domain_dirty_cpumask); + /* Rid TLBs of stale mappings (guest mappings and shadow mappings). */ + if ( flush ) + flush_tlb_mask(v->vcpu_dirty_cpumask); + + out: + spin_unlock(&v->arch.shadow_ldt_lock); } @@ -541,8 +540,10 @@ int map_ldt_shadow_page(unsigned int off nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW); + spin_lock(&v->arch.shadow_ldt_lock); l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e); v->arch.shadow_ldt_mapcnt++; + spin_unlock(&v->arch.shadow_ldt_lock); return 1; } @@ -989,7 +990,7 @@ void put_page_from_l1e(l1_pgentry_t l1e, (d == e) ) { for_each_vcpu ( d, v ) - invalidate_shadow_ldt(v); + invalidate_shadow_ldt(v, 1); } put_page(page); } @@ -2023,30 +2024,17 @@ int free_page_type(struct page_info *pag unsigned long gmfn; int rc; - if ( likely(owner != NULL) ) - { - /* - * We have to flush before the next use of the linear mapping - * (e.g., update_va_mapping()) or we could end up modifying a page - * that is no longer a page table (and hence screw up ref counts). - */ - if ( current->domain == owner ) - queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS); - else - flush_tlb_mask(owner->domain_dirty_cpumask); - - if ( unlikely(paging_mode_enabled(owner)) ) - { - /* A page table is dirtied when its type count becomes zero. */ - paging_mark_dirty(owner, page_to_mfn(page)); - - if ( shadow_mode_refcounts(owner) ) - return 0; - - gmfn = mfn_to_gmfn(owner, page_to_mfn(page)); - ASSERT(VALID_M2P(gmfn)); - shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn)); - } + if ( likely(owner != NULL) && unlikely(paging_mode_enabled(owner)) ) + { + /* A page table is dirtied when its type count becomes zero. */ + paging_mark_dirty(owner, page_to_mfn(page)); + + if ( shadow_mode_refcounts(owner) ) + return 0; + + gmfn = mfn_to_gmfn(owner, page_to_mfn(page)); + ASSERT(VALID_M2P(gmfn)); + shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn)); } if ( !(type & PGT_partial) ) @@ -2366,8 +2354,8 @@ void cleanup_page_cacheattr(struct page_ int new_guest_cr3(unsigned long mfn) { - struct vcpu *v = current; - struct domain *d = v->domain; + struct vcpu *curr = current; + struct domain *d = curr->domain; int okay; unsigned long old_base_mfn; @@ -2377,19 +2365,19 @@ int new_guest_cr3(unsigned long mfn) okay = paging_mode_refcounts(d) ? 0 /* Old code was broken, but what should it be? */ : mod_l4_entry( - __va(pagetable_get_paddr(v->arch.guest_table)), + __va(pagetable_get_paddr(curr->arch.guest_table)), l4e_from_pfn( mfn, (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)), - pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0; + pagetable_get_pfn(curr->arch.guest_table), 0, 0) == 0; if ( unlikely(!okay) ) { MEM_LOG("Error while installing new compat baseptr %lx", mfn); return 0; } - invalidate_shadow_ldt(v); - write_ptbase(v); + invalidate_shadow_ldt(curr, 0); + write_ptbase(curr); return 1; } @@ -2403,14 +2391,14 @@ int new_guest_cr3(unsigned long mfn) return 0; } - invalidate_shadow_ldt(v); - - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - - v->arch.guest_table = pagetable_from_pfn(mfn); - update_cr3(v); - - write_ptbase(v); + invalidate_shadow_ldt(curr, 0); + + old_base_mfn = pagetable_get_pfn(curr->arch.guest_table); + + curr->arch.guest_table = pagetable_from_pfn(mfn); + update_cr3(curr); + + write_ptbase(curr); if ( likely(old_base_mfn != 0) ) { @@ -2440,6 +2428,10 @@ static void process_deferred_ops(void) flush_tlb_local(); } + /* + * Do this after flushing TLBs, to ensure we see fresh LDT mappings + * via the linear pagetable mapping. + */ if ( deferred_ops & DOP_RELOAD_LDT ) (void)map_ldt_shadow_page(0); @@ -2565,8 +2557,8 @@ int do_mmuext_op( unsigned long mfn = 0, gmfn = 0, type; unsigned int done = 0; struct page_info *page; - struct vcpu *v = current; - struct domain *d = v->domain; + struct vcpu *curr = current; + struct domain *d = curr->domain; if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { @@ -2729,8 +2721,8 @@ int do_mmuext_op( } } - old_mfn = pagetable_get_pfn(v->arch.guest_table_user); - v->arch.guest_table_user = pagetable_from_pfn(mfn); + old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); + curr->arch.guest_table_user = pagetable_from_pfn(mfn); if ( old_mfn != 0 ) { @@ -2750,7 +2742,7 @@ int do_mmuext_op( case MMUEXT_INVLPG_LOCAL: if ( !paging_mode_enabled(d) - || paging_invlpg(v, op.arg1.linear_addr) != 0 ) + || paging_invlpg(curr, op.arg1.linear_addr) != 0 ) flush_tlb_one_local(op.arg1.linear_addr); break; @@ -2773,7 +2765,7 @@ int do_mmuext_op( } case MMUEXT_TLB_FLUSH_ALL: - flush_tlb_mask(d->domain_dirty_cpumask); + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS; break; case MMUEXT_INVLPG_ALL: @@ -2809,13 +2801,14 @@ int do_mmuext_op( okay = 0; MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents); } - else if ( (v->arch.guest_context.ldt_ents != ents) || - (v->arch.guest_context.ldt_base != ptr) ) + else if ( (curr->arch.guest_context.ldt_ents != ents) || + (curr->arch.guest_context.ldt_base != ptr) ) { - invalidate_shadow_ldt(v); - v->arch.guest_context.ldt_base = ptr; - v->arch.guest_context.ldt_ents = ents; - load_LDT(v); + invalidate_shadow_ldt(curr, 0); + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB; + curr->arch.guest_context.ldt_base = ptr; + curr->arch.guest_context.ldt_ents = ents; + load_LDT(curr); this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT; if ( ents != 0 ) this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT; @@ -2931,8 +2924,7 @@ int do_mmu_update( struct page_info *page; int rc = 0, okay = 1, i = 0; unsigned int cmd, done = 0; - struct vcpu *v = current; - struct domain *d = v->domain; + struct domain *d = current->domain; struct domain_mmap_cache mapcache; if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) @@ -3042,7 +3034,8 @@ int do_mmu_update( #endif case PGT_writable_page: perfc_incr(writable_mmu_updates); - okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn)); + okay = paging_write_guest_entry( + current, va, req.val, _mfn(mfn)); break; } page_unlock(page); @@ -3052,7 +3045,8 @@ int do_mmu_update( else if ( get_page_type(page, PGT_writable_page) ) { perfc_incr(writable_mmu_updates); - okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn)); + okay = paging_write_guest_entry( + current, va, req.val, _mfn(mfn)); put_page_type(page); } @@ -3508,7 +3502,7 @@ int steal_page( /* Unlink from original owner. */ if ( !(memflags & MEMF_no_refcount) ) d->tot_pages--; - list_del(&page->list); + page_list_del(page, &d->page_list); spin_unlock(&d->page_alloc_lock); return 0; @@ -3567,34 +3561,40 @@ int do_update_va_mapping(unsigned long v if ( pl1e ) guest_unmap_l1e(v, pl1e); - process_deferred_ops(); - switch ( flags & UVMF_FLUSHTYPE_MASK ) { case UVMF_TLB_FLUSH: switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) { case UVMF_LOCAL: - flush_tlb_local(); + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB; break; case UVMF_ALL: - flush_tlb_mask(d->domain_dirty_cpumask); + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS; break; default: + if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS ) + break; if ( unlikely(!is_pv_32on64_domain(d) ? get_user(vmask, (unsigned long *)bmap_ptr) : get_user(vmask, (unsigned int *)bmap_ptr)) ) - rc = -EFAULT; + rc = -EFAULT, vmask = 0; pmask = vcpumask_to_pcpumask(d, vmask); + if ( cpu_isset(smp_processor_id(), pmask) ) + this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB; flush_tlb_mask(pmask); break; } break; case UVMF_INVLPG: + if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS ) + break; switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) { case UVMF_LOCAL: + if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB ) + break; if ( !paging_mode_enabled(d) || (paging_invlpg(v, va) != 0) ) flush_tlb_one_local(va); @@ -3606,13 +3606,17 @@ int do_update_va_mapping(unsigned long v if ( unlikely(!is_pv_32on64_domain(d) ? get_user(vmask, (unsigned long *)bmap_ptr) : get_user(vmask, (unsigned int *)bmap_ptr)) ) - rc = -EFAULT; + rc = -EFAULT, vmask = 0; pmask = vcpumask_to_pcpumask(d, vmask); + if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB ) + cpu_clear(smp_processor_id(), pmask); flush_tlb_one_mask(pmask, va); break; } break; } + + process_deferred_ops(); return rc; } diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/hap/hap.c --- a/xen/arch/x86/mm/hap/hap.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/mm/hap/hap.c Fri Feb 13 11:22:28 2009 +0900 @@ -45,11 +45,11 @@ /* Override macros from asm/page.h to make them work with mfn_t */ #undef mfn_to_page -#define mfn_to_page(_m) (frame_table + mfn_x(_m)) +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) #undef mfn_valid -#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn)) #undef page_to_mfn -#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) +#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) /************************************************/ /* HAP LOG DIRTY SUPPORT */ @@ -96,11 +96,10 @@ static struct page_info *hap_alloc(struc ASSERT(hap_locked_by_me(d)); - if ( unlikely(list_empty(&d->arch.paging.hap.freelist)) ) + pg = page_list_remove_head(&d->arch.paging.hap.freelist); + if ( unlikely(!pg) ) return NULL; - pg = list_entry(d->arch.paging.hap.freelist.next, struct page_info, list); - list_del(&pg->list); d->arch.paging.hap.free_pages--; p = hap_map_domain_page(page_to_mfn(pg)); @@ -118,7 +117,7 @@ static void hap_free(struct domain *d, m ASSERT(hap_locked_by_me(d)); d->arch.paging.hap.free_pages++; - list_add_tail(&pg->list, &d->arch.paging.hap.freelist); + page_list_add_tail(pg, &d->arch.paging.hap.freelist); } static struct page_info *hap_alloc_p2m_page(struct domain *d) @@ -210,15 +209,13 @@ hap_set_allocation(struct domain *d, uns } d->arch.paging.hap.free_pages++; d->arch.paging.hap.total_pages++; - list_add_tail(&pg->list, &d->arch.paging.hap.freelist); + page_list_add_tail(pg, &d->arch.paging.hap.freelist); } else if ( d->arch.paging.hap.total_pages > pages ) { /* Need to return memory to domheap */ - ASSERT(!list_empty(&d->arch.paging.hap.freelist)); - pg = list_entry(d->arch.paging.hap.freelist.next, - struct page_info, list); - list_del(&pg->list); + pg = page_list_remove_head(&d->arch.paging.hap.freelist); + ASSERT(pg); d->arch.paging.hap.free_pages--; d->arch.paging.hap.total_pages--; pg->count_info = 0; @@ -393,7 +390,7 @@ void hap_domain_init(struct domain *d) void hap_domain_init(struct domain *d) { hap_lock_init(d); - INIT_LIST_HEAD(&d->arch.paging.hap.freelist); + INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist); /* This domain will use HAP for log-dirty mode */ paging_log_dirty_init(d, hap_enable_log_dirty, hap_disable_log_dirty, diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/hap/p2m-ept.c --- a/xen/arch/x86/mm/hap/p2m-ept.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/mm/hap/p2m-ept.c Fri Feb 13 11:22:28 2009 +0900 @@ -63,7 +63,7 @@ static int ept_set_middle_entry(struct d pg->count_info = 1; pg->u.inuse.type_info = 1 | PGT_validated; - list_add_tail(&pg->list, &d->arch.p2m->pages); + page_list_add_tail(pg, &d->arch.p2m->pages); ept_entry->emt = 0; ept_entry->igmt = 0; @@ -116,12 +116,12 @@ static int ept_next_level(struct domain } /* - * TODO: ept_set_entry() computes 'need_modify_vtd_table' for itself, + * ept_set_entry() computes 'need_modify_vtd_table' for itself, * by observing whether any gfn->mfn translations are modified. */ static int -_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, - unsigned int order, p2m_type_t p2mt, int need_modify_vtd_table) +ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, + unsigned int order, p2m_type_t p2mt) { ept_entry_t *table = NULL; unsigned long gfn_remainder = gfn, offset = 0; @@ -131,6 +131,7 @@ _ept_set_entry(struct domain *d, unsigne int walk_level = order / EPT_TABLE_ORDER; int direct_mmio = (p2mt == p2m_mmio_direct); uint8_t igmt = 0; + int need_modify_vtd_table = 1; /* we only support 4k and 2m pages now */ @@ -171,14 +172,23 @@ _ept_set_entry(struct domain *d, unsigne if ( ret == GUEST_TABLE_SUPER_PAGE ) { - ept_entry->mfn = mfn_x(mfn) - offset; + if ( ept_entry->mfn == (mfn_x(mfn) - offset) ) + need_modify_vtd_table = 0; + else + ept_entry->mfn = mfn_x(mfn) - offset; + if ( ept_entry->avail1 == p2m_ram_logdirty && p2mt == p2m_ram_rw ) for ( i = 0; i < 512; i++ ) paging_mark_dirty(d, mfn_x(mfn)-offset+i); } else - ept_entry->mfn = mfn_x(mfn); + { + if ( ept_entry->mfn == mfn_x(mfn) ) + need_modify_vtd_table = 0; + else + ept_entry->mfn = mfn_x(mfn); + } ept_entry->avail1 = p2mt; ept_entry->rsvd = 0; @@ -239,7 +249,10 @@ _ept_set_entry(struct domain *d, unsigne &igmt, direct_mmio); split_ept_entry->igmt = igmt; - split_ept_entry->mfn = mfn_x(mfn); + if ( split_ept_entry->mfn == mfn_x(mfn) ) + need_modify_vtd_table = 0; + else + split_ept_entry->mfn = mfn_x(mfn); split_ept_entry->avail1 = p2mt; ept_p2m_type_to_flags(split_ept_entry, p2mt); @@ -287,17 +300,6 @@ out: } return rv; -} - -static int -ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, - unsigned int order, p2m_type_t p2mt) -{ - /* ept_set_entry() are called from set_entry(), - * We should always create VT-d page table acording - * to the gfn to mfn translations changes. - */ - return _ept_set_entry(d, gfn, mfn, order, p2mt, 1); } /* Read ept p2m entries */ @@ -393,6 +395,21 @@ static mfn_t ept_get_entry_current(unsig return ept_get_entry(current->domain, gfn, t, q); } +/* To test if the new emt type is the same with old, + * return 1 to not to reset ept entry. + */ +static int need_modify_ept_entry(struct domain *d, unsigned long gfn, + unsigned long mfn, uint8_t o_igmt, + uint8_t o_emt, p2m_type_t p2mt) +{ + uint8_t igmt, emt; + emt = epte_get_entry_emt(d, gfn, mfn, &igmt, + (p2mt == p2m_mmio_direct)); + if ( (emt == o_emt) && (igmt == o_igmt) ) + return 0; + return 1; +} + void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn, unsigned long end_gfn) { @@ -401,6 +418,7 @@ void ept_change_entry_emt_with_range(str uint64_t epte; int order = 0; unsigned long mfn; + uint8_t o_igmt, o_emt; for ( gfn = start_gfn; gfn <= end_gfn; gfn++ ) { @@ -410,7 +428,9 @@ void ept_change_entry_emt_with_range(str mfn = (epte & EPTE_MFN_MASK) >> PAGE_SHIFT; if ( !mfn_valid(mfn) ) continue; - p2mt = (epte & EPTE_AVAIL1_MASK) >> 8; + p2mt = (epte & EPTE_AVAIL1_MASK) >> EPTE_AVAIL1_SHIFT; + o_igmt = (epte & EPTE_IGMT_MASK) >> EPTE_IGMT_SHIFT; + o_emt = (epte & EPTE_EMT_MASK) >> EPTE_EMT_SHIFT; order = 0; if ( epte & EPTE_SUPER_PAGE_MASK ) @@ -422,30 +442,26 @@ void ept_change_entry_emt_with_range(str * Set emt for super page. */ order = EPT_TABLE_ORDER; - /* vmx_set_uc_mode() dont' touch the gfn to mfn - * translations, only modify the emt field of the EPT entries. - * so we need not modify the current VT-d page tables. - */ - _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0); + if ( need_modify_ept_entry(d, gfn, mfn, + o_igmt, o_emt, p2mt) ) + ept_set_entry(d, gfn, _mfn(mfn), order, p2mt); gfn += 0x1FF; } else { - /* 1)change emt for partial entries of the 2m area. - * 2)vmx_set_uc_mode() dont' touch the gfn to mfn - * translations, only modify the emt field of the EPT entries. - * so we need not modify the current VT-d page tables. - */ - _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt,0); + /* change emt for partial entries of the 2m area. */ + if ( need_modify_ept_entry(d, gfn, mfn, + o_igmt, o_emt, p2mt) ) + ept_set_entry(d, gfn, _mfn(mfn), order, p2mt); gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF; } } - else /* 1)gfn assigned with 4k - * 2)vmx_set_uc_mode() dont' touch the gfn to mfn - * translations, only modify the emt field of the EPT entries. - * so we need not modify the current VT-d page tables. - */ - _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0); + else /* gfn assigned with 4k */ + { + if ( need_modify_ept_entry(d, gfn, mfn, + o_igmt, o_emt, p2mt) ) + ept_set_entry(d, gfn, _mfn(mfn), order, p2mt); + } } } diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/mm/p2m.c Fri Feb 13 11:22:28 2009 +0900 @@ -89,11 +89,11 @@ /* Override macros from asm/page.h to make them work with mfn_t */ #undef mfn_to_page -#define mfn_to_page(_m) (frame_table + mfn_x(_m)) +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) #undef mfn_valid -#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn)) #undef page_to_mfn -#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) +#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) /* PTE flags for the various types of p2m entry */ @@ -175,7 +175,7 @@ p2m_next_level(struct domain *d, mfn_t * struct page_info *pg = d->arch.p2m->alloc_page(d); if ( pg == NULL ) return 0; - list_add_tail(&pg->list, &d->arch.p2m->pages); + page_list_add_tail(pg, &d->arch.p2m->pages); pg->u.inuse.type_info = type | 1 | PGT_validated; pg->count_info = 1; @@ -214,7 +214,7 @@ p2m_next_level(struct domain *d, mfn_t * struct page_info *pg = d->arch.p2m->alloc_page(d); if ( pg == NULL ) return 0; - list_add_tail(&pg->list, &d->arch.p2m->pages); + page_list_add_tail(pg, &d->arch.p2m->pages); pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated; pg->count_info = 1; @@ -300,18 +300,18 @@ p2m_pod_cache_add(struct domain *d, for(i=0; i < 1 << order ; i++) { p = page + i; - list_del(&p->list); + page_list_del(p, &d->page_list); } /* Then add the first one to the appropriate populate-on-demand list */ switch(order) { case 9: - list_add_tail(&page->list, &p2md->pod.super); /* lock: page_alloc */ + page_list_add_tail(page, &p2md->pod.super); /* lock: page_alloc */ p2md->pod.count += 1 << order; break; case 0: - list_add_tail(&page->list, &p2md->pod.single); /* lock: page_alloc */ + page_list_add_tail(page, &p2md->pod.single); /* lock: page_alloc */ p2md->pod.count += 1 ; break; default: @@ -334,54 +334,51 @@ static struct page_info * p2m_pod_cache_ struct page_info *p = NULL; int i; - if ( order == 9 && list_empty(&p2md->pod.super) ) + if ( order == 9 && page_list_empty(&p2md->pod.super) ) { return NULL; } - else if ( order == 0 && list_empty(&p2md->pod.single) ) + else if ( order == 0 && page_list_empty(&p2md->pod.single) ) { unsigned long mfn; struct page_info *q; - BUG_ON( list_empty(&p2md->pod.super) ); + BUG_ON( page_list_empty(&p2md->pod.super) ); /* Break up a superpage to make single pages. NB count doesn't * need to be adjusted. */ printk("%s: Breaking up superpage.\n", __func__); - p = list_entry(p2md->pod.super.next, struct page_info, list); - list_del(&p->list); + p = page_list_remove_head(&p2md->pod.super); mfn = mfn_x(page_to_mfn(p)); for ( i=0; i<(1<<9); i++ ) { q = mfn_to_page(_mfn(mfn+i)); - list_add_tail(&q->list, &p2md->pod.single); + page_list_add_tail(q, &p2md->pod.single); } } switch ( order ) { case 9: - BUG_ON( list_empty(&p2md->pod.super) ); - p = list_entry(p2md->pod.super.next, struct page_info, list); + BUG_ON( page_list_empty(&p2md->pod.super) ); + p = page_list_remove_head(&p2md->pod.super); p2md->pod.count -= 1 << order; /* Lock: page_alloc */ break; case 0: - BUG_ON( list_empty(&p2md->pod.single) ); - p = list_entry(p2md->pod.single.next, struct page_info, list); + BUG_ON( page_list_empty(&p2md->pod.single) ); + p = page_list_remove_head(&p2md->pod.single); p2md->pod.count -= 1; break; default: BUG(); } - list_del(&p->list); - /* Put the pages back on the domain page_list */ for ( i = 0 ; i < (1 << order) ; i++ ) { BUG_ON(page_get_owner(p + i) != d); - list_add_tail(&p[i].list, &d->page_list); + page_list_add_tail(p + i, &d->page_list); } return p; @@ -425,7 +422,7 @@ p2m_pod_set_cache_target(struct domain * spin_lock(&d->page_alloc_lock); if ( (p2md->pod.count - pod_target) > (1>>9) - && !list_empty(&p2md->pod.super) ) + && !page_list_empty(&p2md->pod.super) ) order = 9; else order = 0; @@ -535,38 +532,27 @@ p2m_pod_empty_cache(struct domain *d) p2m_pod_empty_cache(struct domain *d) { struct p2m_domain *p2md = d->arch.p2m; - struct list_head *q, *p; + struct page_info *page; spin_lock(&d->page_alloc_lock); - list_for_each_safe(p, q, &p2md->pod.super) /* lock: page_alloc */ + while ( (page = page_list_remove_head(&p2md->pod.super)) ) { int i; - struct page_info *page; - list_del(p); - - page = list_entry(p, struct page_info, list); - for ( i = 0 ; i < (1 << 9) ; i++ ) { BUG_ON(page_get_owner(page + i) != d); - list_add_tail(&page[i].list, &d->page_list); + page_list_add_tail(page + i, &d->page_list); } p2md->pod.count -= 1<<9; } - list_for_each_safe(p, q, &p2md->pod.single) - { - struct page_info *page; - - list_del(p); - - page = list_entry(p, struct page_info, list); - + while ( (page = page_list_remove_head(&p2md->pod.single)) ) + { BUG_ON(page_get_owner(page) != d); - list_add_tail(&page->list, &d->page_list); + page_list_add_tail(page, &d->page_list); p2md->pod.count -= 1; } @@ -952,7 +938,7 @@ p2m_pod_emergency_sweep_super(struct dom * NB that this is a zero-sum game; we're increasing our cache size * by increasing our 'debt'. Since we hold the p2m lock, * (entry_count - count) must remain the same. */ - if ( !list_empty(&p2md->pod.super) && i < limit ) + if ( !page_list_empty(&p2md->pod.super) && i < limit ) break; } @@ -1035,12 +1021,12 @@ p2m_pod_demand_populate(struct domain *d } /* If we're low, start a sweep */ - if ( order == 9 && list_empty(&p2md->pod.super) ) + if ( order == 9 && page_list_empty(&p2md->pod.super) ) p2m_pod_emergency_sweep_super(d); - if ( list_empty(&p2md->pod.single) && + if ( page_list_empty(&p2md->pod.single) && ( ( order == 0 ) - || (order == 9 && list_empty(&p2md->pod.super) ) ) ) + || (order == 9 && page_list_empty(&p2md->pod.super) ) ) ) p2m_pod_emergency_sweep(d); /* Keep track of the highest gfn demand-populated by a guest fault */ @@ -1477,9 +1463,9 @@ int p2m_init(struct domain *d) memset(p2m, 0, sizeof(*p2m)); p2m_lock_init(p2m); - INIT_LIST_HEAD(&p2m->pages); - INIT_LIST_HEAD(&p2m->pod.super); - INIT_LIST_HEAD(&p2m->pod.single); + INIT_PAGE_LIST_HEAD(&p2m->pages); + INIT_PAGE_LIST_HEAD(&p2m->pod.super); + INIT_PAGE_LIST_HEAD(&p2m->pod.single); p2m->set_entry = p2m_set_entry; p2m->get_entry = p2m_gfn_to_mfn; @@ -1540,7 +1526,6 @@ int p2m_alloc_table(struct domain *d, { mfn_t mfn = _mfn(INVALID_MFN); - struct list_head *entry; struct page_info *page, *p2m_top; unsigned int page_count = 0; unsigned long gfn = -1UL; @@ -1566,7 +1551,7 @@ int p2m_alloc_table(struct domain *d, p2m_unlock(p2m); return -ENOMEM; } - list_add_tail(&p2m_top->list, &p2m->pages); + page_list_add_tail(p2m_top, &p2m->pages); p2m_top->count_info = 1; p2m_top->u.inuse.type_info = @@ -1587,11 +1572,8 @@ int p2m_alloc_table(struct domain *d, goto error; /* Copy all existing mappings from the page list and m2p */ - for ( entry = d->page_list.next; - entry != &d->page_list; - entry = entry->next ) - { - page = list_entry(entry, struct page_info, list); + page_list_for_each(page, &d->page_list) + { mfn = page_to_mfn(page); gfn = get_gpfn_from_mfn(mfn_x(mfn)); page_count++; @@ -1621,19 +1603,14 @@ void p2m_teardown(struct domain *d) /* Return all the p2m pages to Xen. * We know we don't have any extra mappings to these pages */ { - struct list_head *entry, *n; struct page_info *pg; struct p2m_domain *p2m = d->arch.p2m; p2m_lock(p2m); d->arch.phys_table = pagetable_null(); - list_for_each_safe(entry, n, &p2m->pages) - { - pg = list_entry(entry, struct page_info, list); - list_del(entry); + while ( (pg = page_list_remove_head(&p2m->pages)) ) p2m->free_page(d, pg); - } p2m_unlock(p2m); } diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/paging.c --- a/xen/arch/x86/mm/paging.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/mm/paging.c Fri Feb 13 11:22:28 2009 +0900 @@ -47,11 +47,11 @@ /************************************************/ /* Override macros from asm/page.h to make them work with mfn_t */ #undef mfn_to_page -#define mfn_to_page(_m) (frame_table + mfn_x(_m)) +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) #undef mfn_valid -#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn)) #undef page_to_mfn -#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) +#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) /* The log-dirty lock. This protects the log-dirty bitmap from * concurrent accesses (and teardowns, etc). diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/mm/shadow/common.c Fri Feb 13 11:22:28 2009 +0900 @@ -48,9 +48,9 @@ void shadow_domain_init(struct domain *d int i; shadow_lock_init(d); for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) - INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]); - INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist); - INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows); + INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelists[i]); + INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist); + INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows); /* Use shadow pagetables for log-dirty support */ paging_log_dirty_init(d, shadow_enable_log_dirty, @@ -1291,9 +1291,9 @@ static inline int space_is_available( for ( ; order <= shadow_max_order(d); ++order ) { unsigned int n = count; - const struct list_head *p; - - list_for_each ( p, &d->arch.paging.shadow.freelists[order] ) + const struct page_info *sp; + + page_list_for_each ( sp, &d->arch.paging.shadow.freelists[order] ) if ( --n == 0 ) return 1; count = (count + 1) >> 1; @@ -1306,8 +1306,8 @@ static inline int space_is_available( * non-Xen mappings in this top-level shadow mfn */ static void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn) { - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); - switch ( sp->type ) + struct page_info *sp = mfn_to_page(smfn); + switch ( sp->u.sh.type ) { case SH_type_l2_32_shadow: SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, 2)(v,smfn); @@ -1322,7 +1322,7 @@ static void shadow_unhook_mappings(struc break; #endif default: - SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->type); + SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->u.sh.type); BUG(); } } @@ -1334,7 +1334,7 @@ static inline void trace_shadow_prealloc /* Convert smfn to gfn */ unsigned long gfn; ASSERT(mfn_valid(smfn)); - gfn = mfn_to_gfn(d, _mfn(mfn_to_shadow_page(smfn)->backpointer)); + gfn = mfn_to_gfn(d, _mfn(mfn_to_page(smfn)->v.sh.back)); __trace_var(TRC_SHADOW_PREALLOC_UNPIN, 0/*!tsc*/, sizeof(gfn), (unsigned char*)&gfn); } @@ -1350,8 +1350,7 @@ static void _shadow_prealloc( /* Need a vpcu for calling unpins; for now, since we don't have * per-vcpu shadows, any will do */ struct vcpu *v, *v2; - struct list_head *l, *t; - struct shadow_page_info *sp; + struct page_info *sp, *t; mfn_t smfn; int i; @@ -1365,10 +1364,9 @@ static void _shadow_prealloc( /* Stage one: walk the list of pinned pages, unpinning them */ perfc_incr(shadow_prealloc_1); - list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows) - { - sp = list_entry(l, struct shadow_page_info, list); - smfn = shadow_page_to_mfn(sp); + page_list_for_each_safe_reverse(sp, t, &d->arch.paging.shadow.pinned_shadows) + { + smfn = page_to_mfn(sp); /* Unpin this top-level shadow */ trace_shadow_prealloc_unpin(d, smfn); @@ -1427,8 +1425,7 @@ void shadow_prealloc(struct domain *d, u * this domain's shadows */ static void shadow_blow_tables(struct domain *d) { - struct list_head *l, *t; - struct shadow_page_info *sp; + struct page_info *sp, *t; struct vcpu *v = d->vcpu[0]; mfn_t smfn; int i; @@ -1436,10 +1433,9 @@ static void shadow_blow_tables(struct do ASSERT(v != NULL); /* Pass one: unpin all pinned pages */ - list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows) - { - sp = list_entry(l, struct shadow_page_info, list); - smfn = shadow_page_to_mfn(sp); + page_list_for_each_safe_reverse(sp, t, &d->arch.paging.shadow.pinned_shadows) + { + smfn = page_to_mfn(sp); sh_unpin(v, smfn); } @@ -1493,6 +1489,18 @@ __initcall(shadow_blow_tables_keyhandler __initcall(shadow_blow_tables_keyhandler_init); #endif /* !NDEBUG */ +static inline struct page_info * +next_shadow(const struct page_info *sp) +{ + return sp->next_shadow ? mfn_to_page(_mfn(sp->next_shadow)) : NULL; +} + +static inline void +set_next_shadow(struct page_info *sp, struct page_info *next) +{ + sp->next_shadow = next ? mfn_x(page_to_mfn(next)) : 0; +} + /* Allocate another shadow's worth of (contiguous, aligned) pages, * and fill in the type and backpointer fields of their page_infos. * Never fails to allocate. */ @@ -1500,7 +1508,7 @@ mfn_t shadow_alloc(struct domain *d, u32 shadow_type, unsigned long backpointer) { - struct shadow_page_info *sp = NULL; + struct page_info *sp = NULL; unsigned int order = shadow_order(shadow_type); cpumask_t mask; void *p; @@ -1515,7 +1523,7 @@ mfn_t shadow_alloc(struct domain *d, /* Find smallest order which can satisfy the request. */ for ( i = order; i <= SHADOW_MAX_ORDER; i++ ) - if ( !list_empty(&d->arch.paging.shadow.freelists[i]) ) + if ( (sp = page_list_remove_head(&d->arch.paging.shadow.freelists[i])) ) goto found; /* If we get here, we failed to allocate. This should never happen. @@ -1526,16 +1534,12 @@ mfn_t shadow_alloc(struct domain *d, BUG(); found: - sp = list_entry(d->arch.paging.shadow.freelists[i].next, - struct shadow_page_info, list); - list_del(&sp->list); - /* We may have to halve the chunk a number of times. */ while ( i != order ) { i--; - sp->order = i; - list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]); + sp->v.free.order = i; + page_list_add_tail(sp, &d->arch.paging.shadow.freelists[i]); sp += 1 << i; } d->arch.paging.shadow.free_pages -= 1 << order; @@ -1553,26 +1557,26 @@ mfn_t shadow_alloc(struct domain *d, flush_tlb_mask(mask); } /* Now safe to clear the page for reuse */ - p = sh_map_domain_page(shadow_page_to_mfn(sp+i)); + p = sh_map_domain_page(page_to_mfn(sp+i)); ASSERT(p != NULL); clear_page(p); sh_unmap_domain_page(p); - INIT_LIST_HEAD(&sp[i].list); - sp[i].type = shadow_type; - sp[i].pinned = 0; - sp[i].count = 0; - sp[i].backpointer = backpointer; - sp[i].next_shadow = NULL; + INIT_PAGE_LIST_ENTRY(&sp[i].list); + sp[i].u.sh.type = shadow_type; + sp[i].u.sh.pinned = 0; + sp[i].u.sh.count = 0; + sp[i].v.sh.back = backpointer; + set_next_shadow(&sp[i], NULL); perfc_incr(shadow_alloc_count); } - return shadow_page_to_mfn(sp); + return page_to_mfn(sp); } /* Return some shadow pages to the pool. */ void shadow_free(struct domain *d, mfn_t smfn) { - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); + struct page_info *sp = mfn_to_page(smfn); u32 shadow_type; unsigned long order; unsigned long mask; @@ -1581,7 +1585,7 @@ void shadow_free(struct domain *d, mfn_t ASSERT(shadow_locked_by_me(d)); perfc_incr(shadow_free); - shadow_type = sp->type; + shadow_type = sp->u.sh.type; ASSERT(shadow_type != SH_type_none); ASSERT(shadow_type != SH_type_p2m_table); order = shadow_order(shadow_type); @@ -1605,7 +1609,7 @@ void shadow_free(struct domain *d, mfn_t } #endif /* Strip out the type: this is now a free shadow page */ - sp[i].type = 0; + sp[i].u.sh.type = 0; /* Remember the TLB timestamp so we will know whether to flush * TLBs when we reuse the page. Because the destructors leave the * contents of the pages in place, we can delay TLB flushes until @@ -1618,22 +1622,24 @@ void shadow_free(struct domain *d, mfn_t for ( ; order < shadow_max_order(d); ++order ) { mask = 1 << order; - if ( (mfn_x(shadow_page_to_mfn(sp)) & mask) ) { + if ( (mfn_x(page_to_mfn(sp)) & mask) ) { /* Merge with predecessor block? */ - if ( ((sp-mask)->type != PGT_none) || ((sp-mask)->order != order) ) + if ( ((sp-mask)->u.sh.type != PGT_none) || + ((sp-mask)->v.free.order != order) ) break; - list_del(&(sp-mask)->list); sp -= mask; + page_list_del(sp, &d->arch.paging.shadow.freelists[order]); } else { /* Merge with successor block? */ - if ( ((sp+mask)->type != PGT_none) || ((sp+mask)->order != order) ) + if ( ((sp+mask)->u.sh.type != PGT_none) || + ((sp+mask)->v.free.order != order) ) break; - list_del(&(sp+mask)->list); - } - } - - sp->order = order; - list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]); + page_list_del(sp + mask, &d->arch.paging.shadow.freelists[order]); + } + } + + sp->v.free.order = order; + page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]); } /* Divert some memory from the pool to be used by the p2m mapping. @@ -1672,7 +1678,7 @@ sh_alloc_p2m_pages(struct domain *d) */ page_set_owner(&pg[i], d); pg[i].count_info = 1; - list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist); + page_list_add_tail(&pg[i], &d->arch.paging.shadow.p2m_freelist); } return 1; } @@ -1681,25 +1687,22 @@ static struct page_info * static struct page_info * shadow_alloc_p2m_page(struct domain *d) { - struct list_head *entry; struct page_info *pg; mfn_t mfn; void *p; shadow_lock(d); - if ( list_empty(&d->arch.paging.shadow.p2m_freelist) && + if ( page_list_empty(&d->arch.paging.shadow.p2m_freelist) && !sh_alloc_p2m_pages(d) ) { shadow_unlock(d); return NULL; } - entry = d->arch.paging.shadow.p2m_freelist.next; - list_del(entry); + pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist); shadow_unlock(d); - pg = list_entry(entry, struct page_info, list); mfn = page_to_mfn(pg); p = sh_map_domain_page(mfn); clear_page(p); @@ -1780,7 +1783,7 @@ static unsigned int sh_set_allocation(st unsigned int pages, int *preempted) { - struct shadow_page_info *sp; + struct page_info *sp; unsigned int lower_bound; unsigned int j, order = shadow_max_order(d); @@ -1802,7 +1805,7 @@ static unsigned int sh_set_allocation(st if ( d->arch.paging.shadow.total_pages < pages ) { /* Need to allocate more memory from domheap */ - sp = (struct shadow_page_info *) + sp = (struct page_info *) alloc_domheap_pages(NULL, order, MEMF_node(domain_to_node(d))); if ( sp == NULL ) { @@ -1813,23 +1816,26 @@ static unsigned int sh_set_allocation(st d->arch.paging.shadow.total_pages += 1 << order; for ( j = 0; j < 1U << order; j++ ) { - sp[j].type = 0; - sp[j].pinned = 0; - sp[j].count = 0; - sp[j].mbz = 0; + sp[j].u.sh.type = 0; + sp[j].u.sh.pinned = 0; + sp[j].u.sh.count = 0; sp[j].tlbflush_timestamp = 0; /* Not in any TLB */ } - sp->order = order; - list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]); + sp->v.free.order = order; + page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]); } else if ( d->arch.paging.shadow.total_pages > pages ) { /* Need to return memory to domheap */ _shadow_prealloc(d, order, 1); - ASSERT(!list_empty(&d->arch.paging.shadow.freelists[order])); - sp = list_entry(d->arch.paging.shadow.freelists[order].next, - struct shadow_page_info, list); - list_del(&sp->list); + sp = page_list_remove_head(&d->arch.paging.shadow.freelists[order]); + ASSERT(sp); + /* + * The pages were allocated anonymously, but the owner field + * gets overwritten normally, so need to clear it here. + */ + for ( j = 0; j < 1U << order; j++ ) + page_set_owner(&((struct page_info *)sp)[j], NULL); d->arch.paging.shadow.free_pages -= 1 << order; d->arch.paging.shadow.total_pages -= 1 << order; free_domheap_pages((struct page_info *)sp, order); @@ -1880,7 +1886,7 @@ static void sh_hash_audit_bucket(struct static void sh_hash_audit_bucket(struct domain *d, int bucket) /* Audit one bucket of the hash table */ { - struct shadow_page_info *sp, *x; + struct page_info *sp, *x; if ( !(SHADOW_AUDIT_ENABLE) ) return; @@ -1889,38 +1895,39 @@ static void sh_hash_audit_bucket(struct while ( sp ) { /* Not a shadow? */ - BUG_ON( sp->mbz != 0 ); + BUG_ON( sp->count_info != 0 ); /* Bogus type? */ - BUG_ON( sp->type == 0 ); - BUG_ON( sp->type > SH_type_max_shadow ); + BUG_ON( sp->u.sh.type == 0 ); + BUG_ON( sp->u.sh.type > SH_type_max_shadow ); /* Wrong bucket? */ - BUG_ON( sh_hash(sp->backpointer, sp->type) != bucket ); + BUG_ON( sh_hash(sp->v.sh.back, sp->u.sh.type) != bucket ); /* Duplicate entry? */ - for ( x = sp->next_shadow; x; x = x->next_shadow ) - BUG_ON( x->backpointer == sp->backpointer && x->type == sp->type ); + for ( x = next_shadow(sp); x; x = next_shadow(x) ) + BUG_ON( x->v.sh.back == sp->v.sh.back && + x->u.sh.type == sp->u.sh.type ); /* Follow the backpointer to the guest pagetable */ - if ( sp->type != SH_type_fl1_32_shadow - && sp->type != SH_type_fl1_pae_shadow - && sp->type != SH_type_fl1_64_shadow ) - { - struct page_info *gpg = mfn_to_page(_mfn(sp->backpointer)); + if ( sp->u.sh.type != SH_type_fl1_32_shadow + && sp->u.sh.type != SH_type_fl1_pae_shadow + && sp->u.sh.type != SH_type_fl1_64_shadow ) + { + struct page_info *gpg = mfn_to_page(_mfn(sp->v.sh.back)); /* Bad shadow flags on guest page? */ - BUG_ON( !(gpg->shadow_flags & (1<<sp->type)) ); + BUG_ON( !(gpg->shadow_flags & (1<<sp->u.sh.type)) ); /* Bad type count on guest page? */ #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) - if ( sp->type == SH_type_l1_32_shadow - || sp->type == SH_type_l1_pae_shadow - || sp->type == SH_type_l1_64_shadow ) + if ( sp->u.sh.type == SH_type_l1_32_shadow + || sp->u.sh.type == SH_type_l1_pae_shadow + || sp->u.sh.type == SH_type_l1_64_shadow ) { if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page && (gpg->u.inuse.type_info & PGT_count_mask) != 0 ) { if ( !page_is_out_of_sync(gpg) ) { - SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")" + SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by %#"PRI_mfn")" " and not OOS but has typecount %#lx\n", - sp->backpointer, - mfn_x(shadow_page_to_mfn(sp)), + sp->v.sh.back, + mfn_x(page_to_mfn(sp)), gpg->u.inuse.type_info); BUG(); } @@ -1931,15 +1938,15 @@ static void sh_hash_audit_bucket(struct if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page && (gpg->u.inuse.type_info & PGT_count_mask) != 0 ) { - SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")" + SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by %#"PRI_mfn")" " but has typecount %#lx\n", - sp->backpointer, mfn_x(shadow_page_to_mfn(sp)), + sp->v.sh.back, mfn_x(page_to_mfn(sp)), gpg->u.inuse.type_info); BUG(); } } /* That entry was OK; on we go */ - sp = sp->next_shadow; + sp = next_shadow(sp); } } @@ -1972,15 +1979,15 @@ static void sh_hash_audit(struct domain * Returns 0 for success, 1 for error. */ static int shadow_hash_alloc(struct domain *d) { - struct shadow_page_info **table; + struct page_info **table; ASSERT(shadow_locked_by_me(d)); ASSERT(!d->arch.paging.shadow.hash_table); - table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS); + table = xmalloc_array(struct page_info *, SHADOW_HASH_BUCKETS); if ( !table ) return 1; memset(table, 0, - SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *)); + SHADOW_HASH_BUCKETS * sizeof (struct page_info *)); d->arch.paging.shadow.hash_table = table; return 0; } @@ -2002,7 +2009,7 @@ mfn_t shadow_hash_lookup(struct vcpu *v, * or INVALID_MFN if it doesn't exist */ { struct domain *d = v->domain; - struct shadow_page_info *sp, *prev; + struct page_info *sp, *prev; key_t key; ASSERT(shadow_locked_by_me(d)); @@ -2019,21 +2026,21 @@ mfn_t shadow_hash_lookup(struct vcpu *v, prev = NULL; while(sp) { - if ( sp->backpointer == n && sp->type == t ) + if ( sp->v.sh.back == n && sp->u.sh.type == t ) { /* Pull-to-front if 'sp' isn't already the head item */ if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) ) { if ( unlikely(d->arch.paging.shadow.hash_walking != 0) ) /* Can't reorder: someone is walking the hash chains */ - return shadow_page_to_mfn(sp); + return page_to_mfn(sp); else { ASSERT(prev); /* Delete sp from the list */ prev->next_shadow = sp->next_shadow; /* Re-insert it at the head of the list */ - sp->next_shadow = d->arch.paging.shadow.hash_table[key]; + set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]); d->arch.paging.shadow.hash_table[key] = sp; } } @@ -2041,10 +2048,10 @@ mfn_t shadow_hash_lookup(struct vcpu *v, { perfc_incr(shadow_hash_lookup_head); } - return shadow_page_to_mfn(sp); + return page_to_mfn(sp); } prev = sp; - sp = sp->next_shadow; + sp = next_shadow(sp); } perfc_incr(shadow_hash_lookup_miss); @@ -2056,7 +2063,7 @@ void shadow_hash_insert(struct vcpu *v, /* Put a mapping (n,t)->smfn into the hash table */ { struct domain *d = v->domain; - struct shadow_page_info *sp; + struct page_info *sp; key_t key; ASSERT(shadow_locked_by_me(d)); @@ -2070,8 +2077,8 @@ void shadow_hash_insert(struct vcpu *v, sh_hash_audit_bucket(d, key); /* Insert this shadow at the top of the bucket */ - sp = mfn_to_shadow_page(smfn); - sp->next_shadow = d->arch.paging.shadow.hash_table[key]; + sp = mfn_to_page(smfn); + set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]); d->arch.paging.shadow.hash_table[key] = sp; sh_hash_audit_bucket(d, key); @@ -2082,7 +2089,7 @@ void shadow_hash_delete(struct vcpu *v, /* Excise the mapping (n,t)->smfn from the hash table */ { struct domain *d = v->domain; - struct shadow_page_info *sp, *x; + struct page_info *sp, *x; key_t key; ASSERT(shadow_locked_by_me(d)); @@ -2095,10 +2102,10 @@ void shadow_hash_delete(struct vcpu *v, key = sh_hash(n, t); sh_hash_audit_bucket(d, key); - sp = mfn_to_shadow_page(smfn); + sp = mfn_to_page(smfn); if ( d->arch.paging.shadow.hash_table[key] == sp ) /* Easy case: we're deleting the head item. */ - d->arch.paging.shadow.hash_table[key] = sp->next_shadow; + d->arch.paging.shadow.hash_table[key] = next_shadow(sp); else { /* Need to search for the one we want */ @@ -2107,15 +2114,15 @@ void shadow_hash_delete(struct vcpu *v, { ASSERT(x); /* We can't have hit the end, since our target is * still in the chain somehwere... */ - if ( x->next_shadow == sp ) + if ( next_shadow(x) == sp ) { x->next_shadow = sp->next_shadow; break; } - x = x->next_shadow; - } - } - sp->next_shadow = NULL; + x = next_shadow(x); + } + } + set_next_shadow(sp, NULL); sh_hash_audit_bucket(d, key); } @@ -2137,7 +2144,7 @@ static void hash_foreach(struct vcpu *v, { int i, done = 0; struct domain *d = v->domain; - struct shadow_page_info *x; + struct page_info *x; /* Say we're here, to stop hash-lookups reordering the chains */ ASSERT(shadow_locked_by_me(d)); @@ -2149,14 +2156,14 @@ static void hash_foreach(struct vcpu *v, /* WARNING: This is not safe against changes to the hash table. * The callback *must* return non-zero if it has inserted or * deleted anything from the hash (lookups are OK, though). */ - for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow ) - { - if ( callback_mask & (1 << x->type) ) + for ( x = d->arch.paging.shadow.hash_table[i]; x; x = next_shadow(x) ) + { + if ( callback_mask & (1 << x->u.sh.type) ) { - ASSERT(x->type <= 15); - ASSERT(callbacks[x->type] != NULL); - done = callbacks[x->type](v, shadow_page_to_mfn(x), - callback_mfn); + ASSERT(x->u.sh.type <= 15); + ASSERT(callbacks[x->u.sh.type] != NULL); + done = callbacks[x->u.sh.type](v, page_to_mfn(x), + callback_mfn); if ( done ) break; } } @@ -2173,8 +2180,8 @@ static void hash_foreach(struct vcpu *v, void sh_destroy_shadow(struct vcpu *v, mfn_t smfn) { - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); - unsigned int t = sp->type; + struct page_info *sp = mfn_to_page(smfn); + unsigned int t = sp->u.sh.type; SHADOW_PRINTK("smfn=%#lx\n", mfn_x(smfn)); @@ -2186,7 +2193,7 @@ void sh_destroy_shadow(struct vcpu *v, m t == SH_type_fl1_64_shadow || t == SH_type_monitor_table || (is_pv_32on64_vcpu(v) && t == SH_type_l4_64_shadow) || - (page_get_owner(mfn_to_page(_mfn(sp->backpointer))) + (page_get_owner(mfn_to_page(_mfn(sp->v.sh.back))) == v->domain)); /* The down-shifts here are so that the switch statement is on nice @@ -2438,7 +2445,7 @@ int sh_remove_write_access(struct vcpu * { unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask); mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn); - int shtype = mfn_to_shadow_page(last_smfn)->type; + int shtype = mfn_to_page(last_smfn)->u.sh.type; if ( callbacks[shtype] ) callbacks[shtype](v, last_smfn, gmfn); @@ -2481,25 +2488,25 @@ int sh_remove_write_access_from_sl1p(str int sh_remove_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn, mfn_t smfn, unsigned long off) { - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); + struct page_info *sp = mfn_to_page(smfn); ASSERT(mfn_valid(smfn)); ASSERT(mfn_valid(gmfn)); - if ( sp->type == SH_type_l1_32_shadow - || sp->type == SH_type_fl1_32_shadow ) + if ( sp->u.sh.type == SH_type_l1_32_shadow + || sp->u.sh.type == SH_type_fl1_32_shadow ) { return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,2) (v, gmfn, smfn, off); } #if CONFIG_PAGING_LEVELS >= 3 - else if ( sp->type == SH_type_l1_pae_shadow - || sp->type == SH_type_fl1_pae_shadow ) + else if ( sp->u.sh.type == SH_type_l1_pae_shadow + || sp->u.sh.type == SH_type_fl1_pae_shadow ) return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,3) (v, gmfn, smfn, off); #if CONFIG_PAGING_LEVELS >= 4 - else if ( sp->type == SH_type_l1_64_shadow - || sp->type == SH_type_fl1_64_shadow ) + else if ( sp->u.sh.type == SH_type_l1_64_shadow + || sp->u.sh.type == SH_type_fl1_64_shadow ) return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,4) (v, gmfn, smfn, off); #endif @@ -2601,17 +2608,17 @@ static int sh_remove_shadow_via_pointer( /* Follow this shadow's up-pointer, if it has one, and remove the reference * found there. Returns 1 if that was the only reference to this shadow */ { - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); + struct page_info *sp = mfn_to_page(smfn); mfn_t pmfn; void *vaddr; int rc; - ASSERT(sp->type > 0); - ASSERT(sp->type < SH_type_max_shadow); - ASSERT(sp->type != SH_type_l2_32_shadow); - ASSERT(sp->type != SH_type_l2_pae_shadow); - ASSERT(sp->type != SH_type_l2h_pae_shadow); - ASSERT(sp->type != SH_type_l4_64_shadow); + ASSERT(sp->u.sh.type > 0); + ASSERT(sp->u.sh.type < SH_type_max_shadow); + ASSERT(sp->u.sh.type != SH_type_l2_32_shadow); + ASSERT(sp->u.sh.type != SH_type_l2_pae_shadow); + ASSERT(sp->u.sh.type != SH_type_l2h_pae_shadow); + ASSERT(sp->u.sh.type != SH_type_l4_64_shadow); if (sp->up == 0) return 0; pmfn = _mfn(sp->up >> PAGE_SHIFT); @@ -2622,10 +2629,10 @@ static int sh_remove_shadow_via_pointer( ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn)); /* Is this the only reference to this shadow? */ - rc = (sp->count == 1) ? 1 : 0; + rc = (sp->u.sh.count == 1) ? 1 : 0; /* Blank the offending entry */ - switch (sp->type) + switch (sp->u.sh.type) { case SH_type_l1_32_shadow: case SH_type_l2_32_shadow: @@ -3156,7 +3163,6 @@ void shadow_teardown(struct domain *d) { struct vcpu *v; mfn_t mfn; - struct list_head *entry, *n; struct page_info *pg; ASSERT(d->is_dying); @@ -3208,12 +3214,8 @@ void shadow_teardown(struct domain *d) } #endif /* (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC)) */ - list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist) - { - list_del(entry); - pg = list_entry(entry, struct page_info, list); + while ( (pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist)) ) shadow_free_p2m_page(d, pg); - } if ( d->arch.paging.shadow.total_pages != 0 ) { @@ -3657,7 +3659,6 @@ int shadow_track_dirty_vram(struct domai for ( i = 0; i < nr; i++ ) { mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t); struct page_info *page; - u32 count_info; int dirty = 0; paddr_t sl1ma = d->dirty_vram->sl1ma[i]; @@ -3668,8 +3669,7 @@ int shadow_track_dirty_vram(struct domai else { page = mfn_to_page(mfn); - count_info = page->u.inuse.type_info & PGT_count_mask; - switch (count_info) + switch (page->u.inuse.type_info & PGT_count_mask) { case 0: /* No guest reference, nothing to track. */ diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/mm/shadow/multi.c Fri Feb 13 11:22:28 2009 +0900 @@ -973,13 +973,13 @@ static int shadow_set_l2e(struct vcpu *v } #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) { - struct shadow_page_info *sp = mfn_to_shadow_page(sl1mfn); - mfn_t gl1mfn = _mfn(sp->backpointer); + struct page_info *sp = mfn_to_page(sl1mfn); + mfn_t gl1mfn = _mfn(sp->v.sh.back); /* If the shadow is a fl1 then the backpointer contains the GFN instead of the GMFN, and it's definitely not OOS. */ - if ( (sp->type != SH_type_fl1_shadow) && mfn_valid(gl1mfn) + if ( (sp->u.sh.type != SH_type_fl1_shadow) && mfn_valid(gl1mfn) && mfn_is_out_of_sync(gl1mfn) ) sh_resync(v, gl1mfn); } @@ -1036,9 +1036,8 @@ static inline void shadow_vram_get_l1e(s if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) { unsigned long i = gfn - d->dirty_vram->begin_pfn; struct page_info *page = mfn_to_page(mfn); - u32 count_info = page->u.inuse.type_info & PGT_count_mask; - if ( count_info == 1 ) + if ( (page->u.inuse.type_info & PGT_count_mask) == 1 ) /* Initial guest reference, record it */ d->dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK); @@ -1064,12 +1063,11 @@ static inline void shadow_vram_put_l1e(s if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) { unsigned long i = gfn - d->dirty_vram->begin_pfn; struct page_info *page = mfn_to_page(mfn); - u32 count_info = page->u.inuse.type_info & PGT_count_mask; int dirty = 0; paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) | ((unsigned long)sl1e & ~PAGE_MASK); - if ( count_info == 1 ) { + if ( (page->u.inuse.type_info & PGT_count_mask) == 1 ) { /* Last reference */ if ( d->dirty_vram->sl1ma[i] == INVALID_PADDR ) { /* We didn't know it was that one, let's say it is dirty */ @@ -1194,8 +1192,8 @@ do { do { \ int _i; \ shadow_l1e_t *_sp = sh_map_domain_page((_sl1mfn)); \ - ASSERT(mfn_to_shadow_page(_sl1mfn)->type == SH_type_l1_shadow \ - || mfn_to_shadow_page(_sl1mfn)->type == SH_type_fl1_shadow); \ + ASSERT(mfn_to_page(_sl1mfn)->u.sh.type == SH_type_l1_shadow \ + || mfn_to_page(_sl1mfn)->u.sh.type == SH_type_fl1_shadow);\ for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ ) \ { \ (_sl1e) = _sp + _i; \ @@ -1232,7 +1230,7 @@ do { do { \ int _i, _j, __done = 0; \ int _xen = !shadow_mode_external(_dom); \ - ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_32_shadow); \ + ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_32_shadow);\ for ( _j = 0; _j < 4 && !__done; _j++ ) \ { \ shadow_l2e_t *_sp = sh_map_domain_page(_sl2mfn); \ @@ -1260,11 +1258,11 @@ do { int _i; \ int _xen = !shadow_mode_external(_dom); \ shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn)); \ - ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_pae_shadow \ - || mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_pae_shadow);\ + ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_pae_shadow \ + || mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow);\ for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \ if ( (!(_xen)) \ - || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_pae_shadow\ + || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_pae_shadow\ || ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES)) \ < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \ { \ @@ -1285,13 +1283,13 @@ do { int _i; \ int _xen = !shadow_mode_external(_dom); \ shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn)); \ - ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_64_shadow || \ - mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_64_shadow); \ + ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_64_shadow ||\ + mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_64_shadow);\ for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \ { \ if ( (!(_xen)) \ || !is_pv_32on64_domain(_dom) \ - || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_64_shadow \ + || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_64_shadow\ || (_i < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_dom)) ) \ { \ (_sl2e) = _sp + _i; \ @@ -1313,7 +1311,7 @@ do { do { \ int _i; \ shadow_l3e_t *_sp = sh_map_domain_page((_sl3mfn)); \ - ASSERT(mfn_to_shadow_page(_sl3mfn)->type == SH_type_l3_64_shadow); \ + ASSERT(mfn_to_page(_sl3mfn)->u.sh.type == SH_type_l3_64_shadow);\ for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ ) \ { \ (_sl3e) = _sp + _i; \ @@ -1331,7 +1329,7 @@ do { shadow_l4e_t *_sp = sh_map_domain_page((_sl4mfn)); \ int _xen = !shadow_mode_external(_dom); \ int _i; \ - ASSERT(mfn_to_shadow_page(_sl4mfn)->type == SH_type_l4_64_shadow); \ + ASSERT(mfn_to_page(_sl4mfn)->u.sh.type == SH_type_l4_64_shadow);\ for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ ) \ { \ if ( (!(_xen)) || is_guest_l4_slot(_dom, _i) ) \ @@ -1506,7 +1504,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf && shadow_type != SH_type_l2h_pae_shadow && shadow_type != SH_type_l4_64_shadow ) /* Lower-level shadow, not yet linked form a higher level */ - mfn_to_shadow_page(smfn)->up = 0; + mfn_to_page(smfn)->up = 0; #if GUEST_PAGING_LEVELS == 4 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) @@ -1519,14 +1517,12 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf * of them, decide that this isn't an old linux guest, and stop * pinning l3es. This is not very quick but it doesn't happen * very often. */ - struct list_head *l, *t; - struct shadow_page_info *sp; + struct page_info *sp, *t; struct vcpu *v2; int l4count = 0, vcpus = 0; - list_for_each(l, &v->domain->arch.paging.shadow.pinned_shadows) - { - sp = list_entry(l, struct shadow_page_info, list); - if ( sp->type == SH_type_l4_64_shadow ) + page_list_for_each(sp, &v->domain->arch.paging.shadow.pinned_shadows) + { + if ( sp->u.sh.type == SH_type_l4_64_shadow ) l4count++; } for_each_vcpu ( v->domain, v2 ) @@ -1534,11 +1530,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf if ( l4count > 2 * vcpus ) { /* Unpin all the pinned l3 tables, and don't pin any more. */ - list_for_each_safe(l, t, &v->domain->arch.paging.shadow.pinned_shadows) + page_list_for_each_safe(sp, t, &v->domain->arch.paging.shadow.pinned_shadows) { - sp = list_entry(l, struct shadow_page_info, list); - if ( sp->type == SH_type_l3_64_shadow ) - sh_unpin(v, shadow_page_to_mfn(sp)); + if ( sp->u.sh.type == SH_type_l3_64_shadow ) + sh_unpin(v, page_to_mfn(sp)); } v->domain->arch.paging.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL; } @@ -1921,7 +1916,7 @@ void sh_destroy_l4_shadow(struct vcpu *v void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn) { shadow_l4e_t *sl4e; - u32 t = mfn_to_shadow_page(smfn)->type; + u32 t = mfn_to_page(smfn)->u.sh.type; mfn_t gmfn, sl4mfn; SHADOW_DEBUG(DESTROY_SHADOW, @@ -1929,7 +1924,7 @@ void sh_destroy_l4_shadow(struct vcpu *v ASSERT(t == SH_type_l4_shadow); /* Record that the guest page isn't shadowed any more (in this type) */ - gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer); + gmfn = _mfn(mfn_to_page(smfn)->v.sh.back); delete_shadow_status(v, gmfn, t, smfn); shadow_demote(v, gmfn, t); /* Decrement refcounts of all the old entries */ @@ -1950,7 +1945,7 @@ void sh_destroy_l3_shadow(struct vcpu *v void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn) { shadow_l3e_t *sl3e; - u32 t = mfn_to_shadow_page(smfn)->type; + u32 t = mfn_to_page(smfn)->u.sh.type; mfn_t gmfn, sl3mfn; SHADOW_DEBUG(DESTROY_SHADOW, @@ -1958,7 +1953,7 @@ void sh_destroy_l3_shadow(struct vcpu *v ASSERT(t == SH_type_l3_shadow); /* Record that the guest page isn't shadowed any more (in this type) */ - gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer); + gmfn = _mfn(mfn_to_page(smfn)->v.sh.back); delete_shadow_status(v, gmfn, t, smfn); shadow_demote(v, gmfn, t); @@ -1980,7 +1975,7 @@ void sh_destroy_l2_shadow(struct vcpu *v void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn) { shadow_l2e_t *sl2e; - u32 t = mfn_to_shadow_page(smfn)->type; + u32 t = mfn_to_page(smfn)->u.sh.type; mfn_t gmfn, sl2mfn; SHADOW_DEBUG(DESTROY_SHADOW, @@ -1993,7 +1988,7 @@ void sh_destroy_l2_shadow(struct vcpu *v #endif /* Record that the guest page isn't shadowed any more (in this type) */ - gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer); + gmfn = _mfn(mfn_to_page(smfn)->v.sh.back); delete_shadow_status(v, gmfn, t, smfn); shadow_demote(v, gmfn, t); @@ -2014,7 +2009,7 @@ void sh_destroy_l1_shadow(struct vcpu *v { struct domain *d = v->domain; shadow_l1e_t *sl1e; - u32 t = mfn_to_shadow_page(smfn)->type; + u32 t = mfn_to_page(smfn)->u.sh.type; SHADOW_DEBUG(DESTROY_SHADOW, "%s(%05lx)\n", __func__, mfn_x(smfn)); @@ -2023,12 +2018,12 @@ void sh_destroy_l1_shadow(struct vcpu *v /* Record that the guest page isn't shadowed any more (in this type) */ if ( t == SH_type_fl1_shadow ) { - gfn_t gfn = _gfn(mfn_to_shadow_page(smfn)->backpointer); + gfn_t gfn = _gfn(mfn_to_page(smfn)->v.sh.back); delete_fl1_shadow_status(v, gfn, smfn); } else { - mfn_t gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer); + mfn_t gmfn = _mfn(mfn_to_page(smfn)->v.sh.back); delete_shadow_status(v, gmfn, t, smfn); shadow_demote(v, gmfn, t); } @@ -2054,7 +2049,7 @@ void sh_destroy_monitor_table(struct vcp void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn) { struct domain *d = v->domain; - ASSERT(mfn_to_shadow_page(mmfn)->type == SH_type_monitor_table); + ASSERT(mfn_to_page(mmfn)->u.sh.type == SH_type_monitor_table); #if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4) { @@ -2298,7 +2293,7 @@ static int validate_gl2e(struct vcpu *v, #if SHADOW_PAGING_LEVELS == 3 reserved_xen_slot = - ((mfn_to_shadow_page(sl2mfn)->type == SH_type_l2h_pae_shadow) && + ((mfn_to_page(sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow) && (shadow_index >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)))); #else /* SHADOW_PAGING_LEVELS == 2 */ @@ -2352,7 +2347,7 @@ static int validate_gl1e(struct vcpu *v, result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn); #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) - gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer); + gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back); if ( mfn_valid(gl1mfn) && mfn_is_out_of_sync(gl1mfn) ) { @@ -2429,30 +2424,30 @@ void sh_resync_l1(struct vcpu *v, mfn_t * called in the *mode* of the vcpu that unsynced it. Clear? Good. */ int sh_safe_not_to_sync(struct vcpu *v, mfn_t gl1mfn) { - struct shadow_page_info *sp; + struct page_info *sp; mfn_t smfn; smfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow); ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */ /* Up to l2 */ - sp = mfn_to_shadow_page(smfn); - if ( sp->count != 1 || !sp->up ) + sp = mfn_to_page(smfn); + if ( sp->u.sh.count != 1 || !sp->up ) return 0; smfn = _mfn(sp->up >> PAGE_SHIFT); ASSERT(mfn_valid(smfn)); #if (SHADOW_PAGING_LEVELS == 4) /* up to l3 */ - sp = mfn_to_shadow_page(smfn); - if ( sp->count != 1 || !sp->up ) + sp = mfn_to_page(smfn); + if ( sp->u.sh.count != 1 || !sp->up ) return 0; smfn = _mfn(sp->up >> PAGE_SHIFT); ASSERT(mfn_valid(smfn)); /* up to l4 */ - sp = mfn_to_shadow_page(smfn); - if ( sp->count != 1 + sp = mfn_to_page(smfn); + if ( sp->u.sh.count != 1 || sh_type_is_pinnable(v, SH_type_l3_64_shadow) || !sp->up ) return 0; smfn = _mfn(sp->up >> PAGE_SHIFT); @@ -2970,8 +2965,8 @@ static int sh_page_fault(struct vcpu *v, + shadow_l2_linear_offset(va)), sizeof(sl2e)) != 0) || !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) - || !mfn_valid(gl1mfn = _mfn(mfn_to_shadow_page( - shadow_l2e_get_mfn(sl2e))->backpointer)) + || !mfn_valid(gl1mfn = _mfn(mfn_to_page( + shadow_l2e_get_mfn(sl2e))->v.sh.back)) || unlikely(mfn_is_out_of_sync(gl1mfn)) ) { /* Hit the slow path as if there had been no @@ -3523,7 +3518,7 @@ sh_invlpg(struct vcpu *v, unsigned long // easier than invalidating all of the individual 4K pages). // sl1mfn = shadow_l2e_get_mfn(sl2e); - if ( mfn_to_shadow_page(sl1mfn)->type + if ( mfn_to_page(sl1mfn)->u.sh.type == SH_type_fl1_shadow ) { flush_tlb_local(); @@ -3533,7 +3528,7 @@ sh_invlpg(struct vcpu *v, unsigned long #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) /* Check to see if the SL1 is out of sync. */ { - mfn_t gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer); + mfn_t gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back); struct page_info *pg = mfn_to_page(gl1mfn); if ( mfn_valid(gl1mfn) && page_is_out_of_sync(pg) ) @@ -3563,7 +3558,7 @@ sh_invlpg(struct vcpu *v, unsigned long } sl1mfn = shadow_l2e_get_mfn(sl2e); - gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer); + gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back); pg = mfn_to_page(gl1mfn); if ( likely(sh_mfn_is_a_page_table(gl1mfn) @@ -3968,7 +3963,7 @@ sh_set_toplevel_shadow(struct vcpu *v, /* Need to repin the old toplevel shadow if it's been unpinned * by shadow_prealloc(): in PV mode we're still running on this * shadow and it's not safe to free it yet. */ - if ( !mfn_to_shadow_page(old_smfn)->pinned && !sh_pin(v, old_smfn) ) + if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(v, old_smfn) ) { SHADOW_ERROR("can't re-pin %#lx\n", mfn_x(old_smfn)); domain_crash(v->domain); @@ -4262,16 +4257,16 @@ int sh_rm_write_access_from_sl1p(struct { int r; shadow_l1e_t *sl1p, sl1e; - struct shadow_page_info *sp; + struct page_info *sp; ASSERT(mfn_valid(gmfn)); ASSERT(mfn_valid(smfn)); - sp = mfn_to_shadow_page(smfn); - - if ( sp->mbz != 0 - || (sp->type != SH_type_l1_shadow - && sp->type != SH_type_fl1_shadow) ) + sp = mfn_to_page(smfn); + + if ( sp->count_info != 0 + || (sp->u.sh.type != SH_type_l1_shadow + && sp->u.sh.type != SH_type_fl1_shadow) ) goto fail; sl1p = sh_map_domain_page(smfn); @@ -4410,7 +4405,7 @@ void sh_clear_shadow_entry(struct vcpu * void sh_clear_shadow_entry(struct vcpu *v, void *ep, mfn_t smfn) /* Blank out a single shadow entry */ { - switch ( mfn_to_shadow_page(smfn)->type ) + switch ( mfn_to_page(smfn)->u.sh.type ) { case SH_type_l1_shadow: (void) shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break; @@ -4443,7 +4438,7 @@ int sh_remove_l1_shadow(struct vcpu *v, && (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) ) { (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn); - if ( mfn_to_shadow_page(sl1mfn)->type == 0 ) + if ( mfn_to_page(sl1mfn)->u.sh.type == 0 ) /* This breaks us cleanly out of the FOREACH macro */ done = 1; } @@ -4466,7 +4461,7 @@ int sh_remove_l2_shadow(struct vcpu *v, && (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) ) { (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn); - if ( mfn_to_shadow_page(sl2mfn)->type == 0 ) + if ( mfn_to_page(sl2mfn)->u.sh.type == 0 ) /* This breaks us cleanly out of the FOREACH macro */ done = 1; } @@ -4488,7 +4483,7 @@ int sh_remove_l3_shadow(struct vcpu *v, && (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) ) { (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn); - if ( mfn_to_shadow_page(sl3mfn)->type == 0 ) + if ( mfn_to_page(sl3mfn)->u.sh.type == 0 ) /* This breaks us cleanly out of the FOREACH macro */ done = 1; } @@ -4890,7 +4885,7 @@ int sh_audit_l1_table(struct vcpu *v, mf int done = 0; /* Follow the backpointer */ - gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer); + gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back); #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) /* Out-of-sync l1 shadows can contain anything: just check the OOS hash */ @@ -4980,7 +4975,7 @@ int sh_audit_l2_table(struct vcpu *v, mf int done = 0; /* Follow the backpointer */ - gl2mfn = _mfn(mfn_to_shadow_page(sl2mfn)->backpointer); + gl2mfn = _mfn(mfn_to_page(sl2mfn)->v.sh.back); #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) /* Only L1's may be out of sync. */ @@ -5029,7 +5024,7 @@ int sh_audit_l3_table(struct vcpu *v, mf int done = 0; /* Follow the backpointer */ - gl3mfn = _mfn(mfn_to_shadow_page(sl3mfn)->backpointer); + gl3mfn = _mfn(mfn_to_page(sl3mfn)->v.sh.back); #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) /* Only L1's may be out of sync. */ @@ -5076,7 +5071,7 @@ int sh_audit_l4_table(struct vcpu *v, mf int done = 0; /* Follow the backpointer */ - gl4mfn = _mfn(mfn_to_shadow_page(sl4mfn)->backpointer); + gl4mfn = _mfn(mfn_to_page(sl4mfn)->v.sh.back); #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) /* Only L1's may be out of sync. */ diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/mm/shadow/private.h Fri Feb 13 11:22:28 2009 +0900 @@ -220,60 +220,6 @@ extern void shadow_audit_tables(struct v #undef GUEST_LEVELS #endif /* CONFIG_PAGING_LEVELS == 4 */ -/****************************************************************************** - * Page metadata for shadow pages. - */ - -struct shadow_page_info -{ - union { - /* Ensures that shadow_page_info is same size as page_info. */ - struct page_info page_info; - - struct { - union { - /* When in use, guest page we're a shadow of */ - unsigned long backpointer; - /* When free, order of the freelist we're on */ - unsigned int order; - }; - union { - /* When in use, next shadow in this hash chain */ - struct shadow_page_info *next_shadow; - /* When free, TLB flush time when freed */ - u32 tlbflush_timestamp; - }; - struct { - unsigned long mbz; /* Must be zero: count_info is here. */ - unsigned long type:5; /* What kind of shadow is this? */ - unsigned long pinned:1; /* Is the shadow pinned? */ - unsigned long count:26; /* Reference count */ - } __attribute__((packed)); - union { - /* For unused shadow pages, a list of pages of this order; for - * pinnable shadows, if pinned, a list of other pinned shadows - * (see sh_type_is_pinnable() below for the definition of - * "pinnable" shadow types). */ - struct list_head list; - /* For non-pinnable shadows, a higher entry that points - * at us. */ - paddr_t up; - }; - }; - }; -}; - -/* The structure above *must* be no larger than a struct page_info - * from mm.h, since we'll be using the same space in the frametable. - * Also, the mbz field must line up with the count_info field of normal - * pages, so they cannot be successfully get_page()d. */ -static inline void shadow_check_page_struct_offsets(void) { - BUILD_BUG_ON(sizeof (struct shadow_page_info) != - sizeof (struct page_info)); - BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) != - offsetof(struct page_info, count_info)); -}; - /* Shadow type codes */ #define SH_type_none (0U) /* on the shadow free list */ #define SH_type_min_shadow (1U) @@ -528,22 +474,13 @@ mfn_t oos_snapshot_lookup(struct vcpu *v * MFN/page-info handling */ -// Override mfn_to_page from asm/page.h, which was #include'd above, -// in order to make it work with our mfn type. +/* Override macros from asm/page.h to make them work with mfn_t */ #undef mfn_to_page -#define mfn_to_page(_m) (frame_table + mfn_x(_m)) -#define mfn_to_shadow_page(_m) ((struct shadow_page_info *)mfn_to_page(_m)) - -// Override page_to_mfn from asm/page.h, which was #include'd above, -// in order to make it work with our mfn type. +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) +#undef mfn_valid +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn)) #undef page_to_mfn -#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) -#define shadow_page_to_mfn(_spg) (page_to_mfn((struct page_info *)_spg)) - -// Override mfn_valid from asm/page.h, which was #include'd above, -// in order to make it work with our mfn type. -#undef mfn_valid -#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) +#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) /* Override pagetable_t <-> struct page_info conversions to work with mfn_t */ #undef pagetable_get_page @@ -675,26 +612,26 @@ static inline int sh_get_ref(struct vcpu static inline int sh_get_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa) { u32 x, nx; - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); + struct page_info *sp = mfn_to_page(smfn); ASSERT(mfn_valid(smfn)); - x = sp->count; + x = sp->u.sh.count; nx = x + 1; if ( unlikely(nx >= 1U<<26) ) { - SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRtype_info " smfn=%lx\n", - sp->backpointer, mfn_x(smfn)); + SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRpgmfn " smfn=%lx\n", + sp->v.sh.back, mfn_x(smfn)); return 0; } /* Guarded by the shadow lock, so no need for atomic update */ - sp->count = nx; + sp->u.sh.count = nx; /* We remember the first shadow entry that points to each shadow. */ if ( entry_pa != 0 - && !sh_type_is_pinnable(v, sp->type) + && !sh_type_is_pinnable(v, sp->u.sh.type) && sp->up == 0 ) sp->up = entry_pa; @@ -707,29 +644,29 @@ static inline void sh_put_ref(struct vcp static inline void sh_put_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa) { u32 x, nx; - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); + struct page_info *sp = mfn_to_page(smfn); ASSERT(mfn_valid(smfn)); - ASSERT(sp->mbz == 0); + ASSERT(sp->count_info == 0); /* If this is the entry in the up-pointer, remove it */ if ( entry_pa != 0 - && !sh_type_is_pinnable(v, sp->type) + && !sh_type_is_pinnable(v, sp->u.sh.type) && sp->up == entry_pa ) sp->up = 0; - x = sp->count; + x = sp->u.sh.count; nx = x - 1; if ( unlikely(x == 0) ) { SHADOW_ERROR("shadow ref underflow, smfn=%lx oc=%08x t=%#x\n", - mfn_x(smfn), sp->count, sp->type); + mfn_x(smfn), sp->u.sh.count, sp->u.sh.type); BUG(); } /* Guarded by the shadow lock, so no need for atomic update */ - sp->count = nx; + sp->u.sh.count = nx; if ( unlikely(nx == 0) ) sh_destroy_shadow(v, smfn); @@ -741,26 +678,26 @@ static inline void sh_put_ref(struct vcp * Returns 0 for failure, 1 for success. */ static inline int sh_pin(struct vcpu *v, mfn_t smfn) { - struct shadow_page_info *sp; + struct page_info *sp; ASSERT(mfn_valid(smfn)); - sp = mfn_to_shadow_page(smfn); - ASSERT(sh_type_is_pinnable(v, sp->type)); - if ( sp->pinned ) + sp = mfn_to_page(smfn); + ASSERT(sh_type_is_pinnable(v, sp->u.sh.type)); + if ( sp->u.sh.pinned ) { /* Already pinned: take it out of the pinned-list so it can go * at the front */ - list_del(&sp->list); + page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows); } else { /* Not pinned: pin it! */ if ( !sh_get_ref(v, smfn, 0) ) return 0; - sp->pinned = 1; + sp->u.sh.pinned = 1; } /* Put it at the head of the list of pinned shadows */ - list_add(&sp->list, &v->domain->arch.paging.shadow.pinned_shadows); + page_list_add(sp, &v->domain->arch.paging.shadow.pinned_shadows); return 1; } @@ -768,15 +705,15 @@ static inline int sh_pin(struct vcpu *v, * of pinned shadows, and release the extra ref. */ static inline void sh_unpin(struct vcpu *v, mfn_t smfn) { - struct shadow_page_info *sp; + struct page_info *sp; ASSERT(mfn_valid(smfn)); - sp = mfn_to_shadow_page(smfn); - ASSERT(sh_type_is_pinnable(v, sp->type)); - if ( sp->pinned ) + sp = mfn_to_page(smfn); + ASSERT(sh_type_is_pinnable(v, sp->u.sh.type)); + if ( sp->u.sh.pinned ) { - sp->pinned = 0; - list_del(&sp->list); + sp->u.sh.pinned = 0; + page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows); sp->up = 0; /* in case this stops being a pinnable type in future */ sh_put_ref(v, smfn, 0); } diff -r af992824b5cf -r c7cba853583d xen/arch/x86/numa.c --- a/xen/arch/x86/numa.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/numa.c Fri Feb 13 11:22:28 2009 +0900 @@ -312,7 +312,7 @@ static void dump_numa(unsigned char key) for_each_online_node(i) page_num_node[i] = 0; - list_for_each_entry(page, &d->page_list, list) + page_list_for_each(page, &d->page_list) { i = phys_to_nid(page_to_mfn(page) << PAGE_SHIFT); page_num_node[i]++; diff -r af992824b5cf -r c7cba853583d xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/physdev.c Fri Feb 13 11:22:28 2009 +0900 @@ -62,7 +62,7 @@ static int physdev_map_pirq(struct physd ret = -EINVAL; goto free_domain; } - vector = IO_APIC_VECTOR(map->index); + vector = domain_irq_to_vector(current->domain, map->index); if ( !vector ) { dprintk(XENLOG_G_ERR, "dom%d: map irq with no vector %d\n", @@ -75,7 +75,7 @@ static int physdev_map_pirq(struct physd case MAP_PIRQ_TYPE_MSI: vector = map->index; if ( vector == -1 ) - vector = assign_irq_vector(AUTO_ASSIGN); + vector = assign_irq_vector(AUTO_ASSIGN_IRQ); if ( vector < 0 || vector >= NR_VECTORS ) { diff -r af992824b5cf -r c7cba853583d xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/setup.c Fri Feb 13 11:22:28 2009 +0900 @@ -417,7 +417,7 @@ void __init __start_xen(unsigned long mb unsigned int initrdidx = 1; multiboot_info_t *mbi = __va(mbi_p); module_t *mod = (module_t *)__va(mbi->mods_addr); - unsigned long nr_pages, modules_length, modules_headroom = -1; + unsigned long nr_pages, modules_length, modules_headroom; unsigned long allocator_bitmap_end; int i, e820_warn = 0, bytes = 0; struct ns16550_defaults ns16550 = { @@ -618,6 +618,12 @@ void __init __start_xen(unsigned long mb */ modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start; + /* ensure mod[0] is mapped before parsing */ + bootstrap_map(mod[0].mod_start, mod[0].mod_end); + modules_headroom = bzimage_headroom( + (char *)(unsigned long)mod[0].mod_start, + (unsigned long)(mod[0].mod_end - mod[0].mod_start)); + for ( i = boot_e820.nr_map-1; i >= 0; i-- ) { uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1; @@ -636,7 +642,8 @@ void __init __start_xen(unsigned long mb s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR); #if defined(CONFIG_X86_64) -#define reloc_size ((__pa(&_end) + mask) & ~mask) +/* Relocate Xen image, allocation bitmap, and one page of padding. */ +#define reloc_size ((__pa(&_end) + max_page/8 + PAGE_SIZE + mask) & ~mask) /* Is the region suitable for relocating Xen? */ if ( !xen_phys_start && ((e-s) >= reloc_size) ) { @@ -721,11 +728,6 @@ void __init __start_xen(unsigned long mb } #endif - if ( modules_headroom == -1 ) - modules_headroom = bzimage_headroom( - (char *)(unsigned long)mod[0].mod_start, - (unsigned long)(mod[0].mod_end - mod[0].mod_start)); - /* Is the region suitable for relocating the multiboot modules? */ if ( !initial_images_start && (s < e) && ((e-s) >= (modules_length+modules_headroom)) ) diff -r af992824b5cf -r c7cba853583d xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/smpboot.c Fri Feb 13 11:22:28 2009 +0900 @@ -1256,8 +1256,6 @@ int __cpu_disable(void) mdelay(1); local_irq_disable(); - cpufreq_del_cpu(cpu); - time_suspend(); cpu_mcheck_disable(); @@ -1320,6 +1318,8 @@ int cpu_down(unsigned int cpu) } printk("Prepare to bring CPU%d down...\n", cpu); + + cpufreq_del_cpu(cpu); err = stop_machine_run(take_cpu_down, NULL, cpu); if (err < 0) diff -r af992824b5cf -r c7cba853583d xen/arch/x86/x86_32/xen.lds.S --- a/xen/arch/x86/x86_32/xen.lds.S Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/x86_32/xen.lds.S Fri Feb 13 11:22:28 2009 +0900 @@ -91,6 +91,7 @@ SECTIONS *(.exit.text) *(.exit.data) *(.exitcall.exit) + *(.eh_frame) } /* Stabs debugging sections. */ diff -r af992824b5cf -r c7cba853583d xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/x86_64/entry.S Fri Feb 13 11:22:28 2009 +0900 @@ -739,7 +739,6 @@ ENTRY(hypercall_args_table) .byte 1 /* do_sysctl */ /* 35 */ .byte 1 /* do_domctl */ .byte 2 /* do_kexec */ - .byte 1 /* do_xsm_op */ .rept __HYPERVISOR_arch_0-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr diff -r af992824b5cf -r c7cba853583d xen/arch/x86/x86_64/xen.lds.S --- a/xen/arch/x86/x86_64/xen.lds.S Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/arch/x86/x86_64/xen.lds.S Fri Feb 13 11:22:28 2009 +0900 @@ -89,6 +89,7 @@ SECTIONS *(.exit.text) *(.exit.data) *(.exitcall.exit) + *(.eh_frame) } /* Stabs debugging sections. */ diff -r af992824b5cf -r c7cba853583d xen/common/domain.c --- a/xen/common/domain.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/common/domain.c Fri Feb 13 11:22:28 2009 +0900 @@ -41,7 +41,6 @@ boolean_param("dom0_vcpus_pin", opt_dom0 /* set xen as default cpufreq */ enum cpufreq_controller cpufreq_controller = FREQCTL_xen; -struct cpufreq_governor *cpufreq_opt_governor; static void __init setup_cpufreq_option(char *str) { @@ -70,19 +69,6 @@ static void __init setup_cpufreq_option( cpufreq_cmdline_parse(arg); } custom_param("cpufreq", setup_cpufreq_option); - -static void __init setup_cpufreq_gov_option(char *str) -{ - if ( !strcmp(str, "userspace") ) - cpufreq_opt_governor = &cpufreq_gov_userspace; - else if ( !strcmp(str, "performance") ) - cpufreq_opt_governor = &cpufreq_gov_performance; - else if ( !strcmp(str, "powersave") ) - cpufreq_opt_governor = &cpufreq_gov_powersave; - else if ( !strcmp(str, "ondemand") ) - cpufreq_opt_governor = &cpufreq_gov_dbs; -} -custom_param("cpufreq_governor", setup_cpufreq_gov_option); /* Protect updates/reads (resp.) of domain_list and domain_hash. */ DEFINE_SPINLOCK(domlist_update_lock); @@ -233,8 +219,8 @@ struct domain *domain_create( spin_lock_init(&d->page_alloc_lock); spin_lock_init(&d->shutdown_lock); spin_lock_init(&d->hypercall_deadlock_mutex); - INIT_LIST_HEAD(&d->page_list); - INIT_LIST_HEAD(&d->xenpage_list); + INIT_PAGE_LIST_HEAD(&d->page_list); + INIT_PAGE_LIST_HEAD(&d->xenpage_list); if ( domcr_flags & DOMCRF_hvm ) d->is_hvm = 1; diff -r af992824b5cf -r c7cba853583d xen/common/grant_table.c --- a/xen/common/grant_table.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/common/grant_table.c Fri Feb 13 11:22:28 2009 +0900 @@ -1192,7 +1192,7 @@ gnttab_transfer( /* Okay, add the page to 'e'. */ if ( unlikely(e->tot_pages++ == 0) ) get_knownalive_domain(e); - list_add_tail(&page->list, &e->page_list); + page_list_add_tail(page, &e->page_list); page_set_owner(page, e); spin_unlock(&e->page_alloc_lock); diff -r af992824b5cf -r c7cba853583d xen/common/hvm/save.c --- a/xen/common/hvm/save.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/common/hvm/save.c Fri Feb 13 11:22:28 2009 +0900 @@ -26,6 +26,7 @@ #include <xen/version.h> #include <public/version.h> #include <xen/sched.h> +#include <xen/guest_access.h> #include <asm/hvm/support.h> @@ -75,6 +76,53 @@ size_t hvm_save_size(struct domain *d) return sz; } +/* Extract a single instance of a save record, by marshalling all + * records of that type and copying out the one we need. */ +int hvm_save_one(struct domain *d, uint16_t typecode, uint16_t instance, + XEN_GUEST_HANDLE_64(uint8) handle) +{ + int rv = 0; + size_t sz = 0; + struct vcpu *v; + hvm_domain_context_t ctxt = { 0, }; + + if ( d->is_dying + || typecode > HVM_SAVE_CODE_MAX + || hvm_sr_handlers[typecode].size < sizeof(struct hvm_save_descriptor) + || hvm_sr_handlers[typecode].save == NULL ) + return -EINVAL; + + if ( hvm_sr_handlers[typecode].kind == HVMSR_PER_VCPU ) + for_each_vcpu(d, v) + sz += hvm_sr_handlers[typecode].size; + else + sz = hvm_sr_handlers[typecode].size; + + if ( (instance + 1) * hvm_sr_handlers[typecode].size > sz ) + return -EINVAL; + + ctxt.size = sz; + ctxt.data = xmalloc_bytes(sz); + if ( !ctxt.data ) + return -ENOMEM; + + if ( hvm_sr_handlers[typecode].save(d, &ctxt) != 0 ) + { + gdprintk(XENLOG_ERR, + "HVM save: failed to save type %"PRIu16"\n", typecode); + rv = -EFAULT; + } + else if ( copy_to_guest(handle, + ctxt.data + + (instance * hvm_sr_handlers[typecode].size) + + sizeof (struct hvm_save_descriptor), + hvm_sr_handlers[typecode].size + - sizeof (struct hvm_save_descriptor)) ) + rv = -EFAULT; + + xfree(ctxt.data); + return rv; +} int hvm_save(struct domain *d, hvm_domain_context_t *h) { diff -r af992824b5cf -r c7cba853583d xen/common/memory.c --- a/xen/common/memory.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/common/memory.c Fri Feb 13 11:22:28 2009 +0900 @@ -218,8 +218,8 @@ static long memory_exchange(XEN_GUEST_HA static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg) { struct xen_memory_exchange exch; - LIST_HEAD(in_chunk_list); - LIST_HEAD(out_chunk_list); + PAGE_LIST_HEAD(in_chunk_list); + PAGE_LIST_HEAD(out_chunk_list); unsigned long in_chunk_order, out_chunk_order; xen_pfn_t gpfn, gmfn, mfn; unsigned long i, j, k; @@ -325,7 +325,7 @@ static long memory_exchange(XEN_GUEST_HA goto fail; } - list_add(&page->list, &in_chunk_list); + page_list_add(page, &in_chunk_list); } } @@ -339,7 +339,7 @@ static long memory_exchange(XEN_GUEST_HA goto fail; } - list_add(&page->list, &out_chunk_list); + page_list_add(page, &out_chunk_list); } /* @@ -347,10 +347,8 @@ static long memory_exchange(XEN_GUEST_HA */ /* Destroy final reference to each input page. */ - while ( !list_empty(&in_chunk_list) ) - { - page = list_entry(in_chunk_list.next, struct page_info, list); - list_del(&page->list); + while ( (page = page_list_remove_head(&in_chunk_list)) ) + { if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) ) BUG(); mfn = page_to_mfn(page); @@ -360,10 +358,8 @@ static long memory_exchange(XEN_GUEST_HA /* Assign each output page to the domain. */ j = 0; - while ( !list_empty(&out_chunk_list) ) - { - page = list_entry(out_chunk_list.next, struct page_info, list); - list_del(&page->list); + while ( (page = page_list_remove_head(&out_chunk_list)) ) + { if ( assign_pages(d, page, exch.out.extent_order, MEMF_no_refcount) ) BUG(); @@ -399,21 +395,13 @@ static long memory_exchange(XEN_GUEST_HA */ fail: /* Reassign any input pages we managed to steal. */ - while ( !list_empty(&in_chunk_list) ) - { - page = list_entry(in_chunk_list.next, struct page_info, list); - list_del(&page->list); + while ( (page = page_list_remove_head(&in_chunk_list)) ) if ( assign_pages(d, page, 0, MEMF_no_refcount) ) BUG(); - } /* Free any output pages we managed to allocate. */ - while ( !list_empty(&out_chunk_list) ) - { - page = list_entry(out_chunk_list.next, struct page_info, list); - list_del(&page->list); + while ( (page = page_list_remove_head(&out_chunk_list)) ) free_domheap_pages(page, exch.out.extent_order); - } exch.nr_exchanged = i << in_chunk_order; diff -r af992824b5cf -r c7cba853583d xen/common/page_alloc.c --- a/xen/common/page_alloc.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/common/page_alloc.c Fri Feb 13 11:22:28 2009 +0900 @@ -71,7 +71,7 @@ integer_param("dma_bits", dma_bitsize); #endif static DEFINE_SPINLOCK(page_scrub_lock); -LIST_HEAD(page_scrub_list); +PAGE_LIST_HEAD(page_scrub_list); static unsigned long scrub_pages; /********************* @@ -264,7 +264,7 @@ unsigned long __init alloc_boot_pages( #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN : \ (fls(page_to_mfn(pg)) - 1)) -typedef struct list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1]; +typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1]; static heap_by_zone_and_order_t *_heap[MAX_NUMNODES]; #define heap(node, zone, order) ((*_heap[node])[zone][order]) @@ -272,13 +272,16 @@ static unsigned long *avail[MAX_NUMNODES static DEFINE_SPINLOCK(heap_lock); -static void init_node_heap(int node) +static unsigned long init_node_heap(int node, unsigned long mfn, + unsigned long nr) { /* First node to be discovered has its heap metadata statically alloced. */ static heap_by_zone_and_order_t _heap_static; static unsigned long avail_static[NR_ZONES]; static int first_node_initialised; - + unsigned long needed = (sizeof(**_heap) + + sizeof(**avail) * NR_ZONES + + PAGE_SIZE - 1) >> PAGE_SHIFT; int i, j; if ( !first_node_initialised ) @@ -286,19 +289,40 @@ static void init_node_heap(int node) _heap[node] = &_heap_static; avail[node] = avail_static; first_node_initialised = 1; + needed = 0; + } +#ifdef DIRECTMAP_VIRT_END + else if ( nr >= needed && + mfn + needed <= virt_to_mfn(DIRECTMAP_VIRT_END) ) + { + _heap[node] = mfn_to_virt(mfn); + avail[node] = mfn_to_virt(mfn + needed) - sizeof(**avail) * NR_ZONES; + } +#endif + else if ( get_order_from_bytes(sizeof(**_heap)) == + get_order_from_pages(needed) ) + { + _heap[node] = alloc_xenheap_pages(get_order_from_pages(needed), 0); + BUG_ON(!_heap[node]); + avail[node] = (void *)_heap[node] + (needed << PAGE_SHIFT) - + sizeof(**avail) * NR_ZONES; + needed = 0; } else { _heap[node] = xmalloc(heap_by_zone_and_order_t); avail[node] = xmalloc_array(unsigned long, NR_ZONES); BUG_ON(!_heap[node] || !avail[node]); + needed = 0; } memset(avail[node], 0, NR_ZONES * sizeof(long)); for ( i = 0; i < NR_ZONES; i++ ) for ( j = 0; j <= MAX_ORDER; j++ ) - INIT_LIST_HEAD(&(*_heap[node])[i][j]); + INIT_PAGE_LIST_HEAD(&(*_heap[node])[i][j]); + + return needed; } /* Allocate 2^@order contiguous pages. */ @@ -340,7 +364,7 @@ static struct page_info *alloc_heap_page /* Find smallest order which can satisfy the request. */ for ( j = order; j <= MAX_ORDER; j++ ) - if ( !list_empty(&heap(node, zone, j)) ) + if ( (pg = page_list_remove_head(&heap(node, zone, j))) ) goto found; } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */ @@ -354,14 +378,11 @@ static struct page_info *alloc_heap_page return NULL; found: - pg = list_entry(heap(node, zone, j).next, struct page_info, list); - list_del(&pg->list); - /* We may have to halve the chunk a number of times. */ while ( j != order ) { PFN_ORDER(pg) = --j; - list_add_tail(&pg->list, &heap(node, zone, j)); + page_list_add_tail(pg, &heap(node, zone, j)); pg += 1 << j; } @@ -378,10 +399,13 @@ static struct page_info *alloc_heap_page /* Reference count must continuously be zero for free pages. */ BUG_ON(pg[i].count_info != 0); - /* Add in any extra CPUs that need flushing because of this page. */ - cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask); - tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp); - cpus_or(mask, mask, extra_cpus_mask); + if ( pg[i].u.free.need_tlbflush ) + { + /* Add in extra CPUs that need flushing because of this page. */ + cpus_andnot(extra_cpus_mask, cpu_online_map, mask); + tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp); + cpus_or(mask, mask, extra_cpus_mask); + } /* Initialise fields which have other uses for free pages. */ pg[i].u.inuse.type_info = 0; @@ -404,7 +428,6 @@ static void free_heap_pages( unsigned long mask; unsigned int i, node = phys_to_nid(page_to_maddr(pg)); unsigned int zone = page_to_zone(pg); - struct domain *d; ASSERT(order <= MAX_ORDER); ASSERT(node >= 0); @@ -425,15 +448,10 @@ static void free_heap_pages( */ pg[i].count_info = 0; - if ( (d = page_get_owner(&pg[i])) != NULL ) - { + /* If a page has no owner it will need no safety TLB flush. */ + pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL); + if ( pg[i].u.free.need_tlbflush ) pg[i].tlbflush_timestamp = tlbflush_current_time(); - pg[i].u.free.cpumask = d->domain_dirty_cpumask; - } - else - { - cpus_clear(pg[i].u.free.cpumask); - } } spin_lock(&heap_lock); @@ -452,8 +470,8 @@ static void free_heap_pages( if ( allocated_in_map(page_to_mfn(pg)-mask) || (PFN_ORDER(pg-mask) != order) ) break; - list_del(&(pg-mask)->list); pg -= mask; + page_list_del(pg, &heap(node, zone, order)); } else { @@ -461,7 +479,7 @@ static void free_heap_pages( if ( allocated_in_map(page_to_mfn(pg)+mask) || (PFN_ORDER(pg+mask) != order) ) break; - list_del(&(pg+mask)->list); + page_list_del(pg + mask, &heap(node, zone, order)); } order++; @@ -471,7 +489,7 @@ static void free_heap_pages( } PFN_ORDER(pg) = order; - list_add_tail(&pg->list, &heap(node, zone, order)); + page_list_add_tail(pg, &heap(node, zone, order)); spin_unlock(&heap_lock); } @@ -482,7 +500,6 @@ static void free_heap_pages( * latter is not on a MAX_ORDER boundary, then we reserve the page by * not freeing it to the buddy allocator. */ -#define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER)) static void init_heap_pages( struct page_info *pg, unsigned long nr_pages) { @@ -491,25 +508,33 @@ static void init_heap_pages( nid_prev = phys_to_nid(page_to_maddr(pg-1)); - for ( i = 0; i < nr_pages; i++ ) + for ( i = 0; i < nr_pages; nid_prev = nid_curr, i++ ) { nid_curr = phys_to_nid(page_to_maddr(pg+i)); if ( unlikely(!avail[nid_curr]) ) - init_node_heap(nid_curr); + { + unsigned long n; + + n = init_node_heap(nid_curr, page_to_mfn(pg+i), nr_pages - i); + if ( n ) + { + BUG_ON(i + n > nr_pages); + i += n - 1; + continue; + } + } /* - * free pages of the same node, or if they differ, but are on a - * MAX_ORDER alignement boundary (which already get reserved) + * Free pages of the same node, or if they differ, but are on a + * MAX_ORDER alignment boundary (which already get reserved). */ - if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) & - MAX_ORDER_ALIGNED) ) - free_heap_pages(pg+i, 0); - else - printk("Reserving non-aligned node boundary @ mfn %lu\n", - page_to_mfn(pg+i)); - - nid_prev = nid_curr; + if ( (nid_curr == nid_prev) || + !(page_to_mfn(pg+i) & ((1UL << MAX_ORDER) - 1)) ) + free_heap_pages(pg+i, 0); + else + printk("Reserving non-aligned node boundary @ mfn %#lx\n", + page_to_mfn(pg+i)); } } @@ -537,7 +562,7 @@ static unsigned long avail_heap_pages( #define avail_for_domheap(mfn) !(allocated_in_map(mfn) || is_xen_heap_mfn(mfn)) void __init end_boot_allocator(void) { - unsigned long i; + unsigned long i, nr = 0; int curr_free, next_free; /* Pages that are free now go to the domain sub-allocator. */ @@ -550,8 +575,15 @@ void __init end_boot_allocator(void) if ( next_free ) map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */ if ( curr_free ) - init_heap_pages(mfn_to_page(i), 1); - } + ++nr; + else if ( nr ) + { + init_heap_pages(mfn_to_page(i - nr), nr); + nr = 0; + } + } + if ( nr ) + init_heap_pages(mfn_to_page(i - nr), nr); if ( !dma_bitsize && (num_online_nodes() > 1) ) { @@ -786,7 +818,7 @@ int assign_pages( page_set_owner(&pg[i], d); wmb(); /* Domain pointer must be visible before updating refcnt. */ pg[i].count_info = PGC_allocated | 1; - list_add_tail(&pg[i].list, &d->page_list); + page_list_add_tail(&pg[i], &d->page_list); } spin_unlock(&d->page_alloc_lock); @@ -844,7 +876,7 @@ void free_domheap_pages(struct page_info spin_lock_recursive(&d->page_alloc_lock); for ( i = 0; i < (1 << order); i++ ) - list_del(&pg[i].list); + page_list_del2(&pg[i], &d->xenpage_list, &d->arch.relmem_list); d->xenheap_pages -= 1 << order; drop_dom_ref = (d->xenheap_pages == 0); @@ -859,7 +891,7 @@ void free_domheap_pages(struct page_info for ( i = 0; i < (1 << order); i++ ) { BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0); - list_del(&pg[i].list); + page_list_del2(&pg[i], &d->page_list, &d->arch.relmem_list); } d->tot_pages -= 1 << order; @@ -882,7 +914,7 @@ void free_domheap_pages(struct page_info { page_set_owner(&pg[i], NULL); spin_lock(&page_scrub_lock); - list_add(&pg[i].list, &page_scrub_list); + page_list_add(&pg[i], &page_scrub_list); scrub_pages++; spin_unlock(&page_scrub_lock); } @@ -965,7 +997,7 @@ static DEFINE_PER_CPU(struct timer, page static void page_scrub_softirq(void) { - struct list_head *ent; + PAGE_LIST_HEAD(list); struct page_info *pg; void *p; int i; @@ -983,32 +1015,26 @@ static void page_scrub_softirq(void) do { spin_lock(&page_scrub_lock); - if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) ) + /* Peel up to 16 pages from the list. */ + for ( i = 0; i < 16; i++ ) + { + if ( !(pg = page_list_remove_head(&page_scrub_list)) ) + break; + page_list_add_tail(pg, &list); + } + + if ( unlikely(i == 0) ) { spin_unlock(&page_scrub_lock); goto out; } - - /* Peel up to 16 pages from the list. */ - for ( i = 0; i < 16; i++ ) - { - if ( ent->next == &page_scrub_list ) - break; - ent = ent->next; - } - - /* Remove peeled pages from the list. */ - ent->next->prev = &page_scrub_list; - page_scrub_list.next = ent->next; - scrub_pages -= (i+1); + + scrub_pages -= i; spin_unlock(&page_scrub_lock); - /* Working backwards, scrub each page in turn. */ - while ( ent != &page_scrub_list ) - { - pg = list_entry(ent, struct page_info, list); - ent = ent->prev; + /* Scrub each page in turn. */ + while ( (pg = page_list_remove_head(&list)) ) { p = map_domain_page(page_to_mfn(pg)); scrub_page(p); unmap_domain_page(p); diff -r af992824b5cf -r c7cba853583d xen/drivers/char/serial.c --- a/xen/drivers/char/serial.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/char/serial.c Fri Feb 13 11:22:28 2009 +0900 @@ -471,7 +471,7 @@ void serial_suspend(void) int i, irq; for ( i = 0; i < ARRAY_SIZE(com); i++ ) if ( (irq = serial_irq(i)) >= 0 ) - free_irq(irq); + release_irq(irq); } void serial_resume(void) diff -r af992824b5cf -r c7cba853583d xen/drivers/cpufreq/cpufreq.c --- a/xen/drivers/cpufreq/cpufreq.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/cpufreq/cpufreq.c Fri Feb 13 11:22:28 2009 +0900 @@ -46,6 +46,9 @@ #include <acpi/acpi.h> #include <acpi/cpufreq/cpufreq.h> +static unsigned int usr_max_freq, usr_min_freq; +static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy); + struct cpufreq_dom { unsigned int dom; cpumask_t map; @@ -53,6 +56,7 @@ struct cpufreq_dom { }; static LIST_HEAD(cpufreq_dom_list_head); +struct cpufreq_governor *cpufreq_opt_governor; LIST_HEAD(cpufreq_governor_list); struct cpufreq_governor *__find_governor(const char *governor) @@ -213,6 +217,9 @@ int cpufreq_add_cpu(unsigned int cpu) perf->domain_info.num_processors) { memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); policy->governor = NULL; + + cpufreq_cmdline_common_para(&new_policy); + ret = __cpufreq_set_policy(policy, &new_policy); if (ret) { if (new_policy.governor == CPUFREQ_DEFAULT_GOVERNOR) @@ -467,3 +474,69 @@ out: return ret; } +static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy) +{ + if (usr_max_freq) + new_policy->max = usr_max_freq; + if (usr_min_freq) + new_policy->min = usr_min_freq; +} + +static int __init cpufreq_handle_common_option(const char *name, const char *val) +{ + if (!strcmp(name, "maxfreq") && val) { + usr_max_freq = simple_strtoul(val, NULL, 0); + return 1; + } + + if (!strcmp(name, "minfreq") && val) { + usr_min_freq = simple_strtoul(val, NULL, 0); + return 1; + } + + return 0; +} + +void __init cpufreq_cmdline_parse(char *str) +{ + static struct cpufreq_governor *__initdata cpufreq_governors[] = + { + &cpufreq_gov_userspace, + &cpufreq_gov_dbs, + &cpufreq_gov_performance, + &cpufreq_gov_powersave + }; + unsigned int gov_index = 0; + + do { + char *val, *end = strchr(str, ','); + unsigned int i; + + if (end) + *end++ = '\0'; + val = strchr(str, '='); + if (val) + *val++ = '\0'; + + if (!cpufreq_opt_governor) { + if (!val) { + for (i = 0; i < ARRAY_SIZE(cpufreq_governors); ++i) { + if (!strcmp(str, cpufreq_governors[i]->name)) { + cpufreq_opt_governor = cpufreq_governors[i]; + gov_index = i; + str = NULL; + break; + } + } + } else { + cpufreq_opt_governor = CPUFREQ_DEFAULT_GOVERNOR; + } + } + + if (str && !cpufreq_handle_common_option(str, val) && + cpufreq_governors[gov_index]->handle_option) + cpufreq_governors[gov_index]->handle_option(str, val); + + str = end; + } while (str); +} diff -r af992824b5cf -r c7cba853583d xen/drivers/cpufreq/cpufreq_misc_governors.c --- a/xen/drivers/cpufreq/cpufreq_misc_governors.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/cpufreq/cpufreq_misc_governors.c Fri Feb 13 11:22:28 2009 +0900 @@ -18,6 +18,7 @@ #include <xen/sched.h> #include <acpi/cpufreq/cpufreq.h> +static unsigned int usr_speed; /* * cpufreq userspace governor @@ -26,6 +27,7 @@ static int cpufreq_governor_userspace(st unsigned int event) { int ret = 0; + unsigned int freq; if (!policy) return -EINVAL; @@ -35,12 +37,17 @@ static int cpufreq_governor_userspace(st case CPUFREQ_GOV_STOP: break; case CPUFREQ_GOV_LIMITS: - if (policy->max < policy->cur) + freq = usr_speed ? : policy->cur; + if (policy->max < freq) ret = __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); - else if (policy->min > policy->cur) + else if (policy->min > freq) ret = __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); + else if (usr_speed) + ret = __cpufreq_driver_target(policy, freq, + CPUFREQ_RELATION_L); + break; default: ret = -EINVAL; @@ -50,9 +57,17 @@ static int cpufreq_governor_userspace(st return ret; } +static void __init +cpufreq_userspace_handle_option(const char *name, const char *val) +{ + if (!strcmp(name, "speed") && val) + usr_speed = simple_strtoul(val, NULL, 0); +} + struct cpufreq_governor cpufreq_gov_userspace = { .name = "userspace", .governor = cpufreq_governor_userspace, + .handle_option = cpufreq_userspace_handle_option }; static int __init cpufreq_gov_userspace_init(void) @@ -61,7 +76,7 @@ static int __init cpufreq_gov_userspace_ } __initcall(cpufreq_gov_userspace_init); -static void cpufreq_gov_userspace_exit(void) +static void __exit cpufreq_gov_userspace_exit(void) { cpufreq_unregister_governor(&cpufreq_gov_userspace); } @@ -106,7 +121,7 @@ static int __init cpufreq_gov_performanc } __initcall(cpufreq_gov_performance_init); -static void cpufreq_gov_performance_exit(void) +static void __exit cpufreq_gov_performance_exit(void) { cpufreq_unregister_governor(&cpufreq_gov_performance); } @@ -151,7 +166,7 @@ static int __init cpufreq_gov_powersave_ } __initcall(cpufreq_gov_powersave_init); -static void cpufreq_gov_powersave_exit(void) +static void __exit cpufreq_gov_powersave_exit(void) { cpufreq_unregister_governor(&cpufreq_gov_powersave); } diff -r af992824b5cf -r c7cba853583d xen/drivers/cpufreq/cpufreq_ondemand.c --- a/xen/drivers/cpufreq/cpufreq_ondemand.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/cpufreq/cpufreq_ondemand.c Fri Feb 13 11:22:28 2009 +0900 @@ -281,9 +281,50 @@ int cpufreq_governor_dbs(struct cpufreq_ return 0; } +static void __init cpufreq_dbs_handle_option(const char *name, const char *val) +{ + if ( !strcmp(name, "rate") && val ) + { + usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1); + } + else if ( !strcmp(name, "up_threshold") && val ) + { + unsigned long tmp = simple_strtoul(val, NULL, 0); + + if ( tmp < MIN_FREQUENCY_UP_THRESHOLD ) + { + printk(XENLOG_WARNING "cpufreq/ondemand: " + "specified threshold too low, using %d\n", + MIN_FREQUENCY_UP_THRESHOLD); + tmp = MIN_FREQUENCY_UP_THRESHOLD; + } + else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD ) + { + printk(XENLOG_WARNING "cpufreq/ondemand: " + "specified threshold too high, using %d\n", + MAX_FREQUENCY_UP_THRESHOLD); + tmp = MAX_FREQUENCY_UP_THRESHOLD; + } + dbs_tuners_ins.up_threshold = tmp; + } + else if ( !strcmp(name, "bias") && val ) + { + unsigned long tmp = simple_strtoul(val, NULL, 0); + + if ( tmp > 1000 ) + { + printk(XENLOG_WARNING "cpufreq/ondemand: " + "specified bias too high, using 1000\n"); + tmp = 1000; + } + dbs_tuners_ins.powersave_bias = tmp; + } +} + struct cpufreq_governor cpufreq_gov_dbs = { .name = "ondemand", .governor = cpufreq_governor_dbs, + .handle_option = cpufreq_dbs_handle_option }; static int __init cpufreq_gov_dbs_init(void) @@ -292,60 +333,8 @@ static int __init cpufreq_gov_dbs_init(v } __initcall(cpufreq_gov_dbs_init); -static void cpufreq_gov_dbs_exit(void) +static void __exit cpufreq_gov_dbs_exit(void) { cpufreq_unregister_governor(&cpufreq_gov_dbs); } __exitcall(cpufreq_gov_dbs_exit); - -void __init cpufreq_cmdline_parse(char *str) -{ - do { - char *val, *end = strchr(str, ','); - - if ( end ) - *end++ = '\0'; - val = strchr(str, '='); - if ( val ) - *val++ = '\0'; - - if ( !strcmp(str, "rate") && val ) - { - usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1); - } - else if ( !strcmp(str, "threshold") && val ) - { - unsigned long tmp = simple_strtoul(val, NULL, 0); - - if ( tmp < MIN_FREQUENCY_UP_THRESHOLD ) - { - printk(XENLOG_WARNING "cpufreq/ondemand: " - "specified threshold too low, using %d\n", - MIN_FREQUENCY_UP_THRESHOLD); - tmp = MIN_FREQUENCY_UP_THRESHOLD; - } - else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD ) - { - printk(XENLOG_WARNING "cpufreq/ondemand: " - "specified threshold too high, using %d\n", - MAX_FREQUENCY_UP_THRESHOLD); - tmp = MAX_FREQUENCY_UP_THRESHOLD; - } - dbs_tuners_ins.up_threshold = tmp; - } - else if ( !strcmp(str, "bias") && val ) - { - unsigned long tmp = simple_strtoul(val, NULL, 0); - - if ( tmp > 1000 ) - { - printk(XENLOG_WARNING "cpufreq/ondemand: " - "specified bias too high, using 1000\n"); - tmp = 1000; - } - dbs_tuners_ins.powersave_bias = tmp; - } - - str = end; - } while ( str ); -} diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/passthrough/amd/iommu_init.c Fri Feb 13 11:22:28 2009 +0900 @@ -479,26 +479,27 @@ static int set_iommu_interrupt_handler(s { int vector, ret; - vector = assign_irq_vector(AUTO_ASSIGN); - vector_to_iommu[vector] = iommu; - - /* make irq == vector */ - irq_vector[vector] = vector; - vector_irq[vector] = vector; - - if ( !vector ) - { - amd_iov_error("no vectors\n"); + vector = assign_irq_vector(AUTO_ASSIGN_IRQ); + if ( vector <= 0 ) + { + gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n"); return 0; } irq_desc[vector].handler = &iommu_msi_type; - ret = request_irq(vector, amd_iommu_page_fault, 0, "amd_iommu", iommu); + ret = request_irq_vector(vector, amd_iommu_page_fault, 0, + "amd_iommu", iommu); if ( ret ) { + irq_desc[vector].handler = &no_irq_type; + free_irq_vector(vector); amd_iov_error("can't request irq\n"); return 0; } + + /* Make sure that vector is never re-used. */ + vector_irq[vector] = NEVER_ASSIGN_IRQ; + vector_to_iommu[vector] = iommu; iommu->vector = vector; return vector; } diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/amd/iommu_map.c --- a/xen/drivers/passthrough/amd/iommu_map.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/passthrough/amd/iommu_map.c Fri Feb 13 11:22:28 2009 +0900 @@ -461,8 +461,8 @@ int amd_iommu_map_page(struct domain *d, iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn); if ( iommu_l2e == 0 ) { + spin_unlock_irqrestore(&hd->mapping_lock, flags); amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); - spin_unlock_irqrestore(&hd->mapping_lock, flags); return -EFAULT; } set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir); @@ -493,8 +493,8 @@ int amd_iommu_unmap_page(struct domain * if ( iommu_l2e == 0 ) { + spin_unlock_irqrestore(&hd->mapping_lock, flags); amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); - spin_unlock_irqrestore(&hd->mapping_lock, flags); return -EFAULT; } @@ -533,9 +533,9 @@ int amd_iommu_reserve_domain_unity_map( if ( iommu_l2e == 0 ) { - amd_iov_error( - "Invalid IO pagetable entry phys_addr = %lx\n", phys_addr); spin_unlock_irqrestore(&hd->mapping_lock, flags); + amd_iov_error("Invalid IO pagetable entry phys_addr = %lx\n", + phys_addr); return -EFAULT; } @@ -552,7 +552,6 @@ int amd_iommu_sync_p2m(struct domain *d) { unsigned long mfn, gfn, flags; u64 iommu_l2e; - struct list_head *entry; struct page_info *page; struct hvm_iommu *hd; int iw = IOMMU_IO_WRITE_ENABLED; @@ -568,10 +567,10 @@ int amd_iommu_sync_p2m(struct domain *d) if ( hd->p2m_synchronized ) goto out; - for ( entry = d->page_list.next; entry != &d->page_list; - entry = entry->next ) - { - page = list_entry(entry, struct page_info, list); + spin_lock(&d->page_alloc_lock); + + page_list_for_each ( page, &d->page_list ) + { mfn = page_to_mfn(page); gfn = get_gpfn_from_mfn(mfn); @@ -582,13 +581,16 @@ int amd_iommu_sync_p2m(struct domain *d) if ( iommu_l2e == 0 ) { + spin_unlock(&d->page_alloc_lock); + spin_unlock_irqrestore(&hd->mapping_lock, flags); amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); - spin_unlock_irqrestore(&hd->mapping_lock, flags); return -EFAULT; } set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir); } + + spin_unlock(&d->page_alloc_lock); hd->p2m_synchronized = 1; diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Feb 13 11:22:28 2009 +0900 @@ -23,7 +23,6 @@ #include <xen/pci_regs.h> #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> -#include <asm/mm.h> extern unsigned short ivrs_bdf_entries; extern struct ivrs_mappings *ivrs_mappings; diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/io.c --- a/xen/drivers/passthrough/io.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/passthrough/io.c Fri Feb 13 11:22:28 2009 +0900 @@ -87,8 +87,8 @@ int pt_irq_create_bind_vtd( if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 ) { + spin_unlock(&d->event_lock); xfree(hvm_irq_dpci); - spin_unlock(&d->event_lock); return -EINVAL; } } diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/passthrough/iommu.c Fri Feb 13 11:22:28 2009 +0900 @@ -33,6 +33,8 @@ int amd_iov_detect(void); * no-pv Disable IOMMU for PV domains (default) * force|required Don't boot unless IOMMU is enabled * passthrough Bypass VT-d translation for Dom0 + * snoop Utilize the snoop control for IOMMU (default) + * no-snoop Dont utilize the snoop control for IOMMU */ custom_param("iommu", parse_iommu_param); int iommu_enabled = 0; @@ -45,6 +47,7 @@ static void __init parse_iommu_param(cha { char *ss; iommu_enabled = 1; + iommu_snoop = 1; do { ss = strchr(s, ','); @@ -62,6 +65,10 @@ static void __init parse_iommu_param(cha force_iommu = 1; else if ( !strcmp(s, "passthrough") ) iommu_passthrough = 1; + else if ( !strcmp(s, "snoop") ) + iommu_snoop = 1; + else if ( !strcmp(s, "no-snoop") ) + iommu_snoop = 0; s = ss + 1; } while ( ss ); @@ -141,7 +148,7 @@ static int iommu_populate_page_table(str spin_lock(&d->page_alloc_lock); - list_for_each_entry ( page, &d->page_list, list ) + page_list_for_each ( page, &d->page_list ) { if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page ) { diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/dmar.c --- a/xen/drivers/passthrough/vtd/dmar.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/passthrough/vtd/dmar.c Fri Feb 13 11:22:28 2009 +0900 @@ -21,6 +21,7 @@ #include <xen/init.h> #include <xen/bitmap.h> +#include <xen/errno.h> #include <xen/kernel.h> #include <xen/acpi.h> #include <xen/mm.h> @@ -518,8 +519,6 @@ int acpi_dmar_init(void) int acpi_dmar_init(void) { int rc; - struct acpi_drhd_unit *drhd; - struct iommu *iommu; rc = -ENODEV; if ( force_iommu ) @@ -536,20 +535,7 @@ int acpi_dmar_init(void) if ( list_empty(&acpi_drhd_units) ) goto fail; - /* Giving that all devices within guest use same io page table, - * enable snoop control only if all VT-d engines support it. - */ - iommu_snoop = 1; - for_each_drhd_unit ( drhd ) - { - iommu = drhd->iommu; - if ( !ecap_snp_ctl(iommu->ecap) ) { - iommu_snoop = 0; - break; - } - } - - printk("Intel VT-d has been enabled, snoop_control=%d.\n", iommu_snoop); + printk("Intel VT-d has been enabled\n"); return 0; diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/ia64/vtd.c --- a/xen/drivers/passthrough/vtd/ia64/vtd.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/passthrough/vtd/ia64/vtd.c Fri Feb 13 11:22:28 2009 +0900 @@ -29,7 +29,9 @@ #include "../vtd.h" -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; +int vector_irq[NR_VECTORS] __read_mostly = { + [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ +}; /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ u8 irq_vector[NR_IRQS] __read_mostly; @@ -45,18 +47,19 @@ void unmap_vtd_domain_page(void *va) } /* Allocate page table, return its machine address */ -u64 alloc_pgtable_maddr(struct domain *d) +u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages) { struct page_info *pg; u64 *vaddr; - pg = alloc_domheap_page(NULL, d ? MEMF_node(domain_to_node(d)) : 0); + pg = alloc_domheap_pages(NULL, get_order_from_pages(npages), + d ? MEMF_node(domain_to_node(d)) : 0); vaddr = map_domain_page(page_to_mfn(pg)); if ( !vaddr ) return 0; - memset(vaddr, 0, PAGE_SIZE); + memset(vaddr, 0, PAGE_SIZE * npages); - iommu_flush_cache_page(vaddr); + iommu_flush_cache_page(vaddr, npages); unmap_domain_page(vaddr); return page_to_maddr(pg); diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/intremap.c --- a/xen/drivers/passthrough/vtd/intremap.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/passthrough/vtd/intremap.c Fri Feb 13 11:22:28 2009 +0900 @@ -502,7 +502,7 @@ int intremap_setup(struct iommu *iommu) ir_ctrl = iommu_ir_ctrl(iommu); if ( ir_ctrl->iremap_maddr == 0 ) { - ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL); + ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL, 1); if ( ir_ctrl->iremap_maddr == 0 ) { dprintk(XENLOG_WARNING VTDPREFIX, diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/passthrough/vtd/iommu.c Fri Feb 13 11:22:28 2009 +0900 @@ -129,9 +129,9 @@ void iommu_flush_cache_entry(void *addr) __iommu_flush_cache(addr, 8); } -void iommu_flush_cache_page(void *addr) -{ - __iommu_flush_cache(addr, PAGE_SIZE_4K); +void iommu_flush_cache_page(void *addr, unsigned long npages) +{ + __iommu_flush_cache(addr, PAGE_SIZE_4K * npages); } int nr_iommus; @@ -146,7 +146,7 @@ static u64 bus_to_context_maddr(struct i root = &root_entries[bus]; if ( !root_present(*root) ) { - maddr = alloc_pgtable_maddr(NULL); + maddr = alloc_pgtable_maddr(NULL, 1); if ( maddr == 0 ) { unmap_vtd_domain_page(root_entries); @@ -174,7 +174,7 @@ static u64 addr_to_dma_page_maddr(struct addr &= (((u64)1) << addr_width) - 1; ASSERT(spin_is_locked(&hd->mapping_lock)); if ( hd->pgd_maddr == 0 ) - if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain)) == 0) ) + if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain, 1)) == 0) ) goto out; parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr); @@ -187,7 +187,7 @@ static u64 addr_to_dma_page_maddr(struct { if ( !alloc ) break; - maddr = alloc_pgtable_maddr(domain); + maddr = alloc_pgtable_maddr(domain, 1); if ( !maddr ) break; dma_set_pte_addr(*pte, maddr); @@ -577,7 +577,7 @@ static int iommu_set_root_entry(struct i spin_lock(&iommu->lock); if ( iommu->root_maddr == 0 ) - iommu->root_maddr = alloc_pgtable_maddr(NULL); + iommu->root_maddr = alloc_pgtable_maddr(NULL, 1); if ( iommu->root_maddr == 0 ) { spin_unlock(&iommu->lock); @@ -874,23 +874,27 @@ int iommu_set_interrupt(struct iommu *io { int vector, ret; - vector = assign_irq_vector(AUTO_ASSIGN); - vector_to_iommu[vector] = iommu; - - /* VT-d fault is a MSI, make irq == vector */ - irq_vector[vector] = vector; - vector_irq[vector] = vector; - - if ( !vector ) + vector = assign_irq_vector(AUTO_ASSIGN_IRQ); + if ( vector <= 0 ) { gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n"); return -EINVAL; } irq_desc[vector].handler = &dma_msi_type; - ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu); + ret = request_irq_vector(vector, iommu_page_fault, 0, "dmar", iommu); if ( ret ) + { + irq_desc[vector].handler = &no_irq_type; + free_irq_vector(vector); gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n"); + return ret; + } + + /* Make sure that vector is never re-used. */ + vector_irq[vector] = NEVER_ASSIGN_IRQ; + vector_to_iommu[vector] = iommu; + return vector; } @@ -966,7 +970,7 @@ static void iommu_free(struct acpi_drhd_ iounmap(iommu->reg); free_intel_iommu(iommu->intel); - free_irq(iommu->vector); + release_irq_vector(iommu->vector); xfree(iommu); drhd->iommu = NULL; @@ -1677,6 +1681,11 @@ static int init_vtd_hw(void) } vector = iommu_set_interrupt(iommu); + if ( vector < 0 ) + { + gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n"); + return vector; + } dma_msi_data_init(iommu, vector); dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map))); iommu->vector = vector; @@ -1756,6 +1765,23 @@ int intel_vtd_setup(void) if ( init_vtd_hw() ) goto error; + /* Giving that all devices within guest use same io page table, + * enable snoop control only if all VT-d engines support it. + */ + + if ( iommu_snoop ) + { + for_each_drhd_unit ( drhd ) + { + iommu = drhd->iommu; + if ( !ecap_snp_ctl(iommu->ecap) ) { + iommu_snoop = 0; + break; + } + } + } + + printk("Intel VT-d snoop control %sabled\n", iommu_snoop ? "en" : "dis"); register_keyhandler('V', dump_iommu_info, "dump iommu info"); return 0; @@ -1764,6 +1790,7 @@ int intel_vtd_setup(void) for_each_drhd_unit ( drhd ) iommu_free(drhd); vtd_enabled = 0; + iommu_snoop = 0; return -ENOMEM; } diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/iommu.h --- a/xen/drivers/passthrough/vtd/iommu.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/passthrough/vtd/iommu.h Fri Feb 13 11:22:28 2009 +0900 @@ -397,7 +397,9 @@ struct poll_info { u32 udata; }; -#define QINVAL_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct qinval_entry)) +#define MAX_QINVAL_PAGES 8 +#define NUM_QINVAL_PAGES 1 +#define QINVAL_ENTRY_NR (PAGE_SIZE_4K*NUM_QINVAL_PAGES/sizeof(struct qinval_entry)) #define qinval_present(v) ((v).lo & 1) #define qinval_fault_disable(v) (((v).lo >> 1) & 1) diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/qinval.c --- a/xen/drivers/passthrough/vtd/qinval.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/passthrough/vtd/qinval.c Fri Feb 13 11:22:28 2009 +0900 @@ -427,7 +427,7 @@ int qinval_setup(struct iommu *iommu) if ( qi_ctrl->qinval_maddr == 0 ) { - qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL); + qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL, NUM_QINVAL_PAGES); if ( qi_ctrl->qinval_maddr == 0 ) { dprintk(XENLOG_WARNING VTDPREFIX, @@ -445,6 +445,8 @@ int qinval_setup(struct iommu *iommu) * registers are automatically reset to 0 with write * to IQA register. */ + if ( NUM_QINVAL_PAGES <= MAX_QINVAL_PAGES ) + qi_ctrl->qinval_maddr |= NUM_QINVAL_PAGES - 1; dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr); /* enable queued invalidation hardware */ diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/vtd.h --- a/xen/drivers/passthrough/vtd/vtd.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/passthrough/vtd/vtd.h Fri Feb 13 11:22:28 2009 +0900 @@ -101,12 +101,12 @@ void cacheline_flush(char *); void cacheline_flush(char *); void flush_all_cache(void); void *map_to_nocache_virt(int nr_iommus, u64 maddr); -u64 alloc_pgtable_maddr(struct domain *d); +u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages); void free_pgtable_maddr(u64 maddr); void *map_vtd_domain_page(u64 maddr); void unmap_vtd_domain_page(void *va); void iommu_flush_cache_entry(void *addr); -void iommu_flush_cache_page(void *addr); +void iommu_flush_cache_page(void *addr, unsigned long npages); #endif // _VTD_H_ diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/x86/vtd.c --- a/xen/drivers/passthrough/vtd/x86/vtd.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/drivers/passthrough/vtd/x86/vtd.c Fri Feb 13 11:22:28 2009 +0900 @@ -38,20 +38,21 @@ void unmap_vtd_domain_page(void *va) } /* Allocate page table, return its machine address */ -u64 alloc_pgtable_maddr(struct domain *d) +u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages) { struct page_info *pg; u64 *vaddr; unsigned long mfn; - pg = alloc_domheap_page(NULL, d ? MEMF_node(domain_to_node(d)) : 0); + pg = alloc_domheap_pages(NULL, get_order_from_pages(npages), + d ? MEMF_node(domain_to_node(d)) : 0); if ( !pg ) return 0; mfn = page_to_mfn(pg); vaddr = map_domain_page(mfn); - memset(vaddr, 0, PAGE_SIZE); + memset(vaddr, 0, PAGE_SIZE * npages); - iommu_flush_cache_page(vaddr); + iommu_flush_cache_page(vaddr, npages); unmap_domain_page(vaddr); return (u64)mfn << PAGE_SHIFT_4K; diff -r af992824b5cf -r c7cba853583d xen/include/acpi/cpufreq/cpufreq.h --- a/xen/include/acpi/cpufreq/cpufreq.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/acpi/cpufreq/cpufreq.h Fri Feb 13 11:22:28 2009 +0900 @@ -87,6 +87,7 @@ struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; int (*governor)(struct cpufreq_policy *policy, unsigned int event); + void (*handle_option)(const char *name, const char *value); struct list_head governor_list; }; diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/hardirq.h --- a/xen/include/asm-ia64/hardirq.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-ia64/hardirq.h Fri Feb 13 11:22:28 2009 +0900 @@ -4,6 +4,7 @@ #define __ARCH_IRQ_STAT 1 #define HARDIRQ_BITS 14 #include <linux/hardirq.h> +#include <xen/sched.h> #define local_softirq_pending() (local_cpu_data->softirq_pending) diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/hvm/iommu.h --- a/xen/include/asm-ia64/hvm/iommu.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-ia64/hvm/iommu.h Fri Feb 13 11:22:28 2009 +0900 @@ -28,7 +28,6 @@ static inline void pci_cleanup_msi(struc /* TODO */ } -#define AUTO_ASSIGN -1 extern int assign_irq_vector (int irq); diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/hvm/irq.h --- a/xen/include/asm-ia64/hvm/irq.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-ia64/hvm/irq.h Fri Feb 13 11:22:28 2009 +0900 @@ -90,13 +90,17 @@ struct hvm_irq { #define hvm_pci_intx_link(dev, intx) \ (((dev) + (intx)) & 3) -/* Extract the IA-64 vector that corresponds to IRQ. */ -static inline int -irq_to_vector (int irq) +#define IA64_INVALID_VECTOR ((unsigned int)((int)-1)) +static inline unsigned int irq_to_vector(int irq) { - return irq; + int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); + unsigned int vector; + + if ( acpi_gsi_to_irq(irq, &vector) < 0) + return 0; + + return vector; } - extern u8 irq_vector[NR_IRQS]; extern int vector_irq[NR_VECTORS]; diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/linux-xen/asm/smp.h --- a/xen/include/asm-ia64/linux-xen/asm/smp.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-ia64/linux-xen/asm/smp.h Fri Feb 13 11:22:28 2009 +0900 @@ -47,7 +47,6 @@ ia64_get_lid (void) #define SMP_IPI_REDIRECTION (1 << 1) #ifdef XEN -#include <xen/sched.h> #define raw_smp_processor_id() (current->processor) #else #define raw_smp_processor_id() (current_thread_info()->cpu) diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/linux-xen/linux/interrupt.h --- a/xen/include/asm-ia64/linux-xen/linux/interrupt.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-ia64/linux-xen/linux/interrupt.h Fri Feb 13 11:22:28 2009 +0900 @@ -52,10 +52,10 @@ struct irqaction { }; extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs); -extern int request_irq(unsigned int, +extern int request_irq_vector(unsigned int, irqreturn_t (*handler)(int, void *, struct pt_regs *), unsigned long, const char *, void *); -extern void free_irq(unsigned int, void *); +extern void release_irq_vector(unsigned int, void *); #endif diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/linux/asm/hw_irq.h --- a/xen/include/asm-ia64/linux/asm/hw_irq.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-ia64/linux/asm/hw_irq.h Fri Feb 13 11:22:28 2009 +0900 @@ -34,7 +34,7 @@ typedef u8 ia64_vector; #define IA64_MAX_VECTORED_IRQ 255 #define IA64_NUM_VECTORS 256 -#define AUTO_ASSIGN -1 +#define AUTO_ASSIGN_IRQ (-1) #define IA64_SPURIOUS_INT_VECTOR 0x0f diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/mm.h --- a/xen/include/asm-ia64/mm.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-ia64/mm.h Fri Feb 13 11:22:28 2009 +0900 @@ -13,7 +13,6 @@ #include <xen/list.h> #include <xen/spinlock.h> #include <xen/perfc.h> -#include <xen/sched.h> #include <asm/processor.h> #include <asm/atomic.h> @@ -63,21 +62,14 @@ struct page_info struct { /* Order-size of the free chunk this page is the head of. */ u32 order; - /* Mask of possibly-tainted TLBs. */ - cpumask_t cpumask; + /* Do TLBs need flushing for safety before next page use? */ + bool_t need_tlbflush; } free; } u; /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */ u32 tlbflush_timestamp; - -#if 0 -// following added for Linux compiling - page_flags_t flags; - atomic_t _count; - struct list_head lru; // is this the same as above "list"? -#endif }; #define set_page_count(p,v) atomic_set(&(p)->_count, v - 1) diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/tlbflush.h --- a/xen/include/asm-ia64/tlbflush.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-ia64/tlbflush.h Fri Feb 13 11:22:28 2009 +0900 @@ -1,7 +1,8 @@ #ifndef __FLUSHTLB_H__ #define __FLUSHTLB_H__ -#include <xen/sched.h> +struct vcpu; +struct domain; /* TLB flushes can be either local (current vcpu only) or domain wide (on all vcpus). diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-x86/domain.h Fri Feb 13 11:22:28 2009 +0900 @@ -79,11 +79,11 @@ struct shadow_domain { int locker; /* processor which holds the lock */ const char *locker_function; /* Func that took it */ unsigned int opt_flags; /* runtime tunable optimizations on/off */ - struct list_head pinned_shadows; + struct page_list_head pinned_shadows; /* Memory allocation */ - struct list_head freelists[SHADOW_MAX_ORDER + 1]; - struct list_head p2m_freelist; + struct page_list_head freelists[SHADOW_MAX_ORDER + 1]; + struct page_list_head p2m_freelist; unsigned int total_pages; /* number of pages allocated */ unsigned int free_pages; /* number of pages on freelists */ unsigned int p2m_pages; /* number of pages allocates to p2m */ @@ -92,7 +92,7 @@ struct shadow_domain { pagetable_t unpaged_pagetable; /* Shadow hashtable */ - struct shadow_page_info **hash_table; + struct page_info **hash_table; int hash_walking; /* Some function is walking the hash table */ /* Fast MMIO path heuristic */ @@ -143,7 +143,7 @@ struct hap_domain { int locker; const char *locker_function; - struct list_head freelist; + struct page_list_head freelist; unsigned int total_pages; /* number of pages allocated */ unsigned int free_pages; /* number of pages on freelists */ unsigned int p2m_pages; /* number of pages allocates to p2m */ @@ -265,7 +265,7 @@ struct arch_domain RELMEM_l2, RELMEM_done, } relmem; - struct list_head relmem_list; + struct page_list_head relmem_list; cpuid_input_t cpuids[MAX_CPUID_INPUT]; } __cacheline_aligned; @@ -352,6 +352,7 @@ struct arch_vcpu /* Current LDT details. */ unsigned long shadow_ldt_mapcnt; + spinlock_t shadow_ldt_lock; struct paging_vcpu paging; diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Fri Feb 13 11:22:28 2009 +0900 @@ -48,7 +48,11 @@ typedef union { #define EPTE_SUPER_PAGE_MASK 0x80 #define EPTE_MFN_MASK 0x1fffffffffff000 #define EPTE_AVAIL1_MASK 0xF00 -#define EPTE_EMT_MASK 0x78 +#define EPTE_EMT_MASK 0x38 +#define EPTE_IGMT_MASK 0x40 +#define EPTE_AVAIL1_SHIFT 8 +#define EPTE_EMT_SHIFT 3 +#define EPTE_IGMT_SHIFT 6 void vmx_asm_vmexit_handler(struct cpu_user_regs); void vmx_asm_do_vmentry(void); diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/iocap.h --- a/xen/include/asm-x86/iocap.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-x86/iocap.h Fri Feb 13 11:22:28 2009 +0900 @@ -14,7 +14,8 @@ #define ioports_access_permitted(d, s, e) \ rangeset_contains_range((d)->arch.ioport_caps, s, e) -#define cache_flush_permitted(d) \ - (!rangeset_is_empty((d)->iomem_caps)) +#define cache_flush_permitted(d) \ + (!rangeset_is_empty((d)->iomem_caps) || \ + !rangeset_is_empty((d)->arch.ioport_caps)) #endif /* __X86_IOCAP_H__ */ diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/irq.h --- a/xen/include/asm-x86/irq.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-x86/irq.h Fri Feb 13 11:22:28 2009 +0900 @@ -19,9 +19,6 @@ extern int vector_irq[NR_VECTORS]; extern u8 irq_vector[NR_IRQS]; -#define AUTO_ASSIGN -1 -#define NEVER_ASSIGN -2 -#define FREE_TO_ASSIGN -3 #define platform_legacy_irq(irq) ((irq) < 16) diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-x86/mm.h Fri Feb 13 11:22:28 2009 +0900 @@ -12,15 +12,40 @@ * Per-page-frame information. * * Every architecture must ensure the following: - * 1. 'struct page_info' contains a 'struct list_head list'. + * 1. 'struct page_info' contains a 'struct page_list_entry list'. * 2. Provide a PFN_ORDER() macro for accessing the order of a free page. */ -#define PFN_ORDER(_pfn) ((_pfn)->u.free.order) +#define PFN_ORDER(_pfn) ((_pfn)->v.free.order) + +/* + * This definition is solely for the use in struct page_info (and + * struct page_list_head), intended to allow easy adjustment once x86-64 + * wants to support more than 16TB. + * 'unsigned long' should be used for MFNs everywhere else. + */ +#define __mfn_t unsigned int +#define PRpgmfn "08x" + +#undef page_list_entry +struct page_list_entry +{ + __mfn_t next, prev; +}; struct page_info { - /* Each frame can be threaded onto a doubly-linked list. */ - struct list_head list; + union { + /* Each frame can be threaded onto a doubly-linked list. + * + * For unused shadow pages, a list of pages of this order; for + * pinnable shadows, if pinned, a list of other pinned shadows + * (see sh_type_is_pinnable() below for the definition of + * "pinnable" shadow types). + */ + struct page_list_entry list; + /* For non-pinnable shadows, a higher entry that points at us. */ + paddr_t up; + }; /* Reference count and various PGC_xxx flags and fields. */ unsigned long count_info; @@ -30,21 +55,46 @@ struct page_info /* Page is in use: ((count_info & PGC_count_mask) != 0). */ struct { - /* Owner of this page (NULL if page is anonymous). */ - u32 _domain; /* pickled format */ /* Type reference count and various PGT_xxx flags and fields. */ unsigned long type_info; } inuse; + /* Page is in use as a shadow: count_info == 0. */ + struct { + unsigned long type:5; /* What kind of shadow is this? */ + unsigned long pinned:1; /* Is the shadow pinned? */ + unsigned long count:26; /* Reference count */ + } sh; + /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ struct { + /* Do TLBs need flushing for safety before next page use? */ + bool_t need_tlbflush; + } free; + + } u; + + union { + + /* Page is in use, but not as a shadow. */ + struct { + /* Owner of this page (NULL if page is anonymous). */ + u32 _domain; /* pickled format */ + } inuse; + + /* Page is in use as a shadow. */ + struct { + /* GMFN of guest page we're a shadow of. */ + __mfn_t back; + } sh; + + /* Page is on a free list (including shadow code free lists). */ + struct { /* Order-size of the free chunk this page is the head of. */ - u32 order; - /* Mask of possibly-tainted TLBs. */ - cpumask_t cpumask; + unsigned int order; } free; - } u; + } v; union { /* @@ -95,8 +145,13 @@ struct page_info * tracked for TLB-flush avoidance when a guest runs in shadow mode. */ u32 shadow_flags; + + /* When in use as a shadow, next shadow in this hash chain. */ + __mfn_t next_shadow; }; }; + +#undef __mfn_t #define PG_shift(idx) (BITS_PER_LONG - (idx)) #define PG_mask(x, idx) (x ## UL << PG_shift(idx)) @@ -155,7 +210,8 @@ struct page_info }) #else #define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap) -#define is_xen_heap_mfn(mfn) is_xen_heap_page(&frame_table[mfn]) +#define is_xen_heap_mfn(mfn) \ + (__mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn))) #endif #if defined(__i386__) @@ -174,10 +230,10 @@ struct page_info #define SHADOW_OOS_FIXUPS 2 #define page_get_owner(_p) \ - ((struct domain *)((_p)->u.inuse._domain ? \ - mfn_to_virt((_p)->u.inuse._domain) : NULL)) + ((struct domain *)((_p)->v.inuse._domain ? \ + mfn_to_virt((_p)->v.inuse._domain) : NULL)) #define page_set_owner(_p,_d) \ - ((_p)->u.inuse._domain = (_d) ? virt_to_mfn(_d) : 0) + ((_p)->v.inuse._domain = (_d) ? virt_to_mfn(_d) : 0) #define maddr_get_owner(ma) (page_get_owner(maddr_to_page((ma)))) #define vaddr_get_owner(va) (page_get_owner(virt_to_page((va)))) diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-x86/p2m.h Fri Feb 13 11:22:28 2009 +0900 @@ -110,7 +110,7 @@ struct p2m_domain { const char *locker_function; /* Func that took it */ /* Pages used to construct the p2m */ - struct list_head pages; + struct page_list_head pages; /* Functions to call to get or free pages for the p2m */ struct page_info * (*alloc_page )(struct domain *d); @@ -148,7 +148,7 @@ struct p2m_domain { * protect moving stuff from the PoD cache to the domain page list. */ struct { - struct list_head super, /* List of superpages */ + struct page_list_head super, /* List of superpages */ single; /* Non-super lists */ int count, /* # of pages in cache lists */ entry_count; /* # of pages in p2m marked pod */ diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-x86/page.h Fri Feb 13 11:22:28 2009 +0900 @@ -220,31 +220,47 @@ void copy_page_sse2(void *, const void * copy_page_sse2(_t, _f) : \ (void)memcpy(_t, _f, PAGE_SIZE)) -#define mfn_valid(mfn) ((mfn) < max_page) +#define __mfn_valid(mfn) ((mfn) < max_page) /* Convert between Xen-heap virtual addresses and machine addresses. */ #define __pa(x) (virt_to_maddr(x)) #define __va(x) (maddr_to_virt(x)) /* Convert between Xen-heap virtual addresses and machine frame numbers. */ -#define virt_to_mfn(va) (virt_to_maddr(va) >> PAGE_SHIFT) -#define mfn_to_virt(mfn) (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT)) +#define __virt_to_mfn(va) (virt_to_maddr(va) >> PAGE_SHIFT) +#define __mfn_to_virt(mfn) (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT)) /* Convert between machine frame numbers and page-info structures. */ -#define mfn_to_page(mfn) (frame_table + (mfn)) -#define page_to_mfn(pg) ((unsigned long)((pg) - frame_table)) +#define __mfn_to_page(mfn) (frame_table + (mfn)) +#define __page_to_mfn(pg) ((unsigned long)((pg) - frame_table)) /* Convert between machine addresses and page-info structures. */ -#define maddr_to_page(ma) (frame_table + ((ma) >> PAGE_SHIFT)) -#define page_to_maddr(pg) ((paddr_t)((pg) - frame_table) << PAGE_SHIFT) +#define __maddr_to_page(ma) (frame_table + ((ma) >> PAGE_SHIFT)) +#define __page_to_maddr(pg) ((paddr_t)((pg) - frame_table) << PAGE_SHIFT) /* Convert between Xen-heap virtual addresses and page-info structures. */ -#define virt_to_page(va) (frame_table + (__pa(va) >> PAGE_SHIFT)) -#define page_to_virt(pg) (maddr_to_virt(page_to_maddr(pg))) +#define __virt_to_page(va) (frame_table + (__pa(va) >> PAGE_SHIFT)) +#define __page_to_virt(pg) (maddr_to_virt(page_to_maddr(pg))) /* Convert between frame number and address formats. */ -#define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT) -#define paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT)) +#define __pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT) +#define __paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT)) + +/* + * We define non-underscored wrappers for above conversion functions. These are + * overridden in various source files while underscored versions remain intact. + */ +#define mfn_valid(mfn) __mfn_valid(mfn) +#define virt_to_mfn(va) __virt_to_mfn(va) +#define mfn_to_virt(mfn) __mfn_to_virt(mfn) +#define mfn_to_page(mfn) __mfn_to_page(mfn) +#define page_to_mfn(pg) __page_to_mfn(pg) +#define maddr_to_page(ma) __maddr_to_page(ma) +#define page_to_maddr(pg) __page_to_maddr(pg) +#define virt_to_page(va) __virt_to_page(va) +#define page_to_virt(pg) __page_to_virt(pg) +#define pfn_to_paddr(pfn) __pfn_to_paddr(pfn) +#define paddr_to_pfn(pa) __paddr_to_pfn(pa) #endif /* !defined(__ASSEMBLY__) */ diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/perfc.h --- a/xen/include/asm-x86/perfc.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-x86/perfc.h Fri Feb 13 11:22:28 2009 +0900 @@ -1,6 +1,5 @@ #ifndef __ASM_PERFC_H__ #define __ASM_PERFC_H__ -#include <asm/mm.h> static inline void arch_perfc_printall(void) { diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/processor.h --- a/xen/include/asm-x86/processor.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/asm-x86/processor.h Fri Feb 13 11:22:28 2009 +0900 @@ -188,6 +188,7 @@ extern struct cpuinfo_x86 cpu_data[]; #define current_cpu_data boot_cpu_data #endif +extern u64 host_pat; extern int phys_proc_id[NR_CPUS]; extern int cpu_core_id[NR_CPUS]; diff -r af992824b5cf -r c7cba853583d xen/include/public/arch-ia64/hvm/save.h --- a/xen/include/public/arch-ia64/hvm/save.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/public/arch-ia64/hvm/save.h Fri Feb 13 11:22:28 2009 +0900 @@ -23,8 +23,8 @@ #ifndef __XEN_PUBLIC_HVM_SAVE_IA64_H__ #define __XEN_PUBLIC_HVM_SAVE_IA64_H__ -#include <public/hvm/save.h> -#include <public/arch-ia64.h> +#include "../../hvm/save.h" +#include "../../arch-ia64.h" /* * Save/restore header: general info about the save file. diff -r af992824b5cf -r c7cba853583d xen/include/public/arch-x86/hvm/save.h --- a/xen/include/public/arch-x86/hvm/save.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/public/arch-x86/hvm/save.h Fri Feb 13 11:22:28 2009 +0900 @@ -287,7 +287,7 @@ struct hvm_hw_pci_irqs { * Indexed by: device*4 + INTx#. */ union { - DECLARE_BITMAP(i, 32*4); + unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 32*4); */ uint64_t pad[2]; }; }; @@ -300,7 +300,7 @@ struct hvm_hw_isa_irqs { * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing). */ union { - DECLARE_BITMAP(i, 16); + unsigned long i[1]; /* DECLARE_BITMAP(i, 16); */ uint64_t pad[1]; }; }; diff -r af992824b5cf -r c7cba853583d xen/include/public/arch-x86/xen-mca.h --- a/xen/include/public/arch-x86/xen-mca.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/public/arch-x86/xen-mca.h Fri Feb 13 11:22:28 2009 +0900 @@ -56,7 +56,7 @@ /* Hypercall */ #define __HYPERVISOR_mca __HYPERVISOR_arch_0 -#define XEN_MCA_INTERFACE_VERSION 0x03000001 +#define XEN_MCA_INTERFACE_VERSION 0x03000002 /* IN: Dom0 calls hypercall from MC event handler. */ #define XEN_MC_CORRECTABLE 0x0 @@ -118,7 +118,7 @@ struct mcinfo_global { uint16_t mc_domid; uint32_t mc_socketid; /* physical socket of the physical core */ uint16_t mc_coreid; /* physical impacted core */ - uint8_t mc_apicid; + uint32_t mc_apicid; uint16_t mc_core_threadid; /* core thread of physical core */ uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ uint64_t mc_gstatus; /* global status */ @@ -175,6 +175,41 @@ struct mc_info { }; typedef struct mc_info mc_info_t; +#define __MC_MSR_ARRAYSIZE 8 +#define __MC_NMSRS 1 +#define MC_NCAPS 7 /* 7 CPU feature flag words */ +#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */ +#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */ +#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */ +#define MC_CAPS_LINUX 3 /* Linux-defined */ +#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */ +#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */ +#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */ + +typedef struct mcinfo_logical_cpu { + uint32_t mc_cpunr; + uint32_t mc_chipid; + uint16_t mc_coreid; + uint16_t mc_threadid; + uint32_t mc_apicid; + uint32_t mc_clusterid; + uint32_t mc_ncores; + uint32_t mc_ncores_active; + uint32_t mc_nthreads; + int32_t mc_cpuid_level; + uint32_t mc_family; + uint32_t mc_vendor; + uint32_t mc_model; + uint32_t mc_step; + char mc_vendorid[16]; + char mc_brandid[64]; + uint32_t mc_cpu_caps[MC_NCAPS]; + uint32_t mc_cache_size; + uint32_t mc_cache_alignment; + int32_t mc_nmsrvals; + struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; +} xen_mc_logical_cpu_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t); /* @@ -272,6 +307,14 @@ typedef struct xen_mc_notifydomain xen_m typedef struct xen_mc_notifydomain xen_mc_notifydomain_t; DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t); +#define XEN_MC_physcpuinfo 3 +struct xen_mc_physcpuinfo { + /* IN/OUT */ + uint32_t ncpus; + uint32_t pad0; + /* OUT */ + XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info; +}; struct xen_mc { uint32_t cmd; @@ -279,6 +322,7 @@ struct xen_mc { union { struct xen_mc_fetch mc_fetch; struct xen_mc_notifydomain mc_notifydomain; + struct xen_mc_physcpuinfo mc_physcpuinfo; uint8_t pad[MCINFO_HYPERCALLSIZE]; } u; }; diff -r af992824b5cf -r c7cba853583d xen/include/public/domctl.h --- a/xen/include/public/domctl.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/public/domctl.h Fri Feb 13 11:22:28 2009 +0900 @@ -630,6 +630,17 @@ typedef struct xen_domctl_debug_op xen_d typedef struct xen_domctl_debug_op xen_domctl_debug_op_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t); +/* + * Request a particular record from the HVM context + */ +#define XEN_DOMCTL_gethvmcontext_partial 55 +typedef struct xen_domctl_hvmcontext_partial { + uint32_t type; /* IN: Type of record required */ + uint32_t instance; /* IN: Instance of that type */ + XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */ +} xen_domctl_hvmcontext_partial_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t); + struct xen_domctl { uint32_t cmd; @@ -658,6 +669,7 @@ struct xen_domctl { struct xen_domctl_settimeoffset settimeoffset; struct xen_domctl_real_mode_area real_mode_area; struct xen_domctl_hvmcontext hvmcontext; + struct xen_domctl_hvmcontext_partial hvmcontext_partial; struct xen_domctl_address_size address_size; struct xen_domctl_sendtrigger sendtrigger; struct xen_domctl_get_device_group get_device_group; diff -r af992824b5cf -r c7cba853583d xen/include/public/io/pciif.h --- a/xen/include/public/io/pciif.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/public/io/pciif.h Fri Feb 13 11:22:28 2009 +0900 @@ -29,7 +29,7 @@ /* xen_pci_sharedinfo flags */ #define _XEN_PCIF_active (0) -#define XEN_PCIF_active (1<<_XEN_PCI_active) +#define XEN_PCIF_active (1<<_XEN_PCIF_active) #define _XEN_PCIB_AERHANDLER (1) #define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER) #define _XEN_PCIB_active (2) diff -r af992824b5cf -r c7cba853583d xen/include/xen/hvm/save.h --- a/xen/include/xen/hvm/save.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/xen/hvm/save.h Fri Feb 13 11:22:28 2009 +0900 @@ -152,6 +152,8 @@ __initcall(__hvm_register_##_x##_save_an /* Entry points for saving and restoring HVM domain state */ size_t hvm_save_size(struct domain *d); int hvm_save(struct domain *d, hvm_domain_context_t *h); +int hvm_save_one(struct domain *d, uint16_t typecode, uint16_t instance, + XEN_GUEST_HANDLE_64(uint8) handle); int hvm_load(struct domain *d, hvm_domain_context_t *h); /* Arch-specific definitions. */ diff -r af992824b5cf -r c7cba853583d xen/include/xen/iocap.h --- a/xen/include/xen/iocap.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/xen/iocap.h Fri Feb 13 11:22:28 2009 +0900 @@ -29,6 +29,7 @@ rangeset_contains_singleton((d)->irq_caps, i) #define multipage_allocation_permitted(d) \ - (!rangeset_is_empty((d)->iomem_caps)) + (!rangeset_is_empty((d)->iomem_caps) || \ + !rangeset_is_empty((d)->arch.ioport_caps)) #endif /* __XEN_IOCAP_H__ */ diff -r af992824b5cf -r c7cba853583d xen/include/xen/irq.h --- a/xen/include/xen/irq.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/xen/irq.h Fri Feb 13 11:22:28 2009 +0900 @@ -24,6 +24,11 @@ struct irqaction #define IRQ_GUEST 16 /* IRQ is handled by guest OS(es) */ #define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */ #define IRQ_PER_CPU 256 /* IRQ is per CPU */ + +/* Special IRQ numbers. */ +#define AUTO_ASSIGN_IRQ (-1) +#define NEVER_ASSIGN_IRQ (-2) +#define FREE_TO_ASSIGN_IRQ (-3) /* * Interrupt controller descriptor. This is all we need @@ -64,11 +69,20 @@ typedef struct { extern irq_desc_t irq_desc[NR_VECTORS]; -extern int setup_irq(unsigned int, struct irqaction *); -extern void free_irq(unsigned int); -extern int request_irq(unsigned int irq, +extern int setup_irq_vector(unsigned int, struct irqaction *); +extern void release_irq_vector(unsigned int); +extern int request_irq_vector(unsigned int vector, void (*handler)(int, void *, struct cpu_user_regs *), unsigned long irqflags, const char * devname, void *dev_id); + +#define setup_irq(irq, action) \ + setup_irq_vector(irq_to_vector(irq), action) + +#define release_irq(irq) \ + release_irq_vector(irq_to_vector(irq)) + +#define request_irq(irq, handler, irqflags, devname, devid) \ + request_irq_vector(irq_to_vector(irq), handler, irqflags, defname, devid) extern hw_irq_controller no_irq_type; extern void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs); diff -r af992824b5cf -r c7cba853583d xen/include/xen/mm.h --- a/xen/include/xen/mm.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/xen/mm.h Fri Feb 13 11:22:28 2009 +0900 @@ -85,22 +85,192 @@ int assign_pages( #define MAX_ORDER 20 /* 2^20 contiguous pages */ #endif +#define page_list_entry list_head + +#include <asm/mm.h> + +#ifndef page_list_entry +struct page_list_head +{ + struct page_info *next, *tail; +}; +/* These must only have instances in struct page_info. */ +# define page_list_entry + +# define PAGE_LIST_HEAD_INIT(name) { NULL, NULL } +# define PAGE_LIST_HEAD(name) \ + struct page_list_head name = PAGE_LIST_HEAD_INIT(name) +# define INIT_PAGE_LIST_HEAD(head) ((head)->tail = (head)->next = NULL) +# define INIT_PAGE_LIST_ENTRY(ent) ((ent)->prev = (ent)->next = ~0) + +static inline int +page_list_empty(const struct page_list_head *head) +{ + return !head->next; +} +static inline struct page_info * +page_list_first(const struct page_list_head *head) +{ + return head->next; +} +static inline struct page_info * +page_list_next(const struct page_info *page, + const struct page_list_head *head) +{ + return page != head->tail ? mfn_to_page(page->list.next) : NULL; +} +static inline struct page_info * +page_list_prev(const struct page_info *page, + const struct page_list_head *head) +{ + return page != head->next ? mfn_to_page(page->list.prev) : NULL; +} +static inline void +page_list_add(struct page_info *page, struct page_list_head *head) +{ + if ( head->next ) + { + page->list.next = page_to_mfn(head->next); + head->next->list.prev = page_to_mfn(page); + } + else + { + head->tail = page; + page->list.next = ~0; + } + page->list.prev = ~0; + head->next = page; +} +static inline void +page_list_add_tail(struct page_info *page, struct page_list_head *head) +{ + page->list.next = ~0; + if ( head->next ) + { + page->list.prev = page_to_mfn(head->tail); + head->tail->list.next = page_to_mfn(page); + } + else + { + page->list.prev = ~0; + head->next = page; + } + head->tail = page; +} +static inline bool_t +__page_list_del_head(struct page_info *page, struct page_list_head *head, + struct page_info *next, struct page_info *prev) +{ + if ( head->next == page ) + { + if ( head->tail != page ) + { + next->list.prev = ~0; + head->next = next; + } + else + head->tail = head->next = NULL; + return 1; + } + + if ( head->tail == page ) + { + prev->list.next = ~0; + head->tail = prev; + return 1; + } + + return 0; +} +static inline void +page_list_del(struct page_info *page, struct page_list_head *head) +{ + struct page_info *next = mfn_to_page(page->list.next); + struct page_info *prev = mfn_to_page(page->list.prev); + + if ( !__page_list_del_head(page, head, next, prev) ) + { + next->list.prev = page->list.prev; + prev->list.next = page->list.next; + } +} +static inline void +page_list_del2(struct page_info *page, struct page_list_head *head1, + struct page_list_head *head2) +{ + struct page_info *next = mfn_to_page(page->list.next); + struct page_info *prev = mfn_to_page(page->list.prev); + + if ( !__page_list_del_head(page, head1, next, prev) && + !__page_list_del_head(page, head2, next, prev) ) + { + next->list.prev = page->list.prev; + prev->list.next = page->list.next; + } +} +static inline struct page_info * +page_list_remove_head(struct page_list_head *head) +{ + struct page_info *page = head->next; + + if ( page ) + page_list_del(page, head); + + return page; +} + +#define page_list_for_each(pos, head) \ + for ( pos = (head)->next; pos; pos = page_list_next(pos, head) ) +#define page_list_for_each_safe(pos, tmp, head) \ + for ( pos = (head)->next; \ + pos ? (tmp = page_list_next(pos, head), 1) : 0; \ + pos = tmp ) +#define page_list_for_each_safe_reverse(pos, tmp, head) \ + for ( pos = (head)->tail; \ + pos ? (tmp = page_list_prev(pos, head), 1) : 0; \ + pos = tmp ) +#else +# define page_list_head list_head +# define PAGE_LIST_HEAD_INIT LIST_HEAD_INIT +# define PAGE_LIST_HEAD LIST_HEAD +# define INIT_PAGE_LIST_HEAD INIT_LIST_HEAD +# define INIT_PAGE_LIST_ENTRY INIT_LIST_HEAD +# define page_list_empty list_empty +# define page_list_first(hd) list_entry((hd)->next, \ + struct page_info, list) +# define page_list_next(pg, hd) list_entry((pg)->list.next, \ + struct page_info, list) +# define page_list_add(pg, hd) list_add(&(pg)->list, hd) +# define page_list_add_tail(pg, hd) list_add_tail(&(pg)->list, hd) +# define page_list_del(pg, hd) list_del(&(pg)->list) +# define page_list_del2(pg, hd1, hd2) list_del(&(pg)->list) +# define page_list_remove_head(hd) (!page_list_empty(hd) ? \ + ({ \ + struct page_info *__pg = page_list_first(hd); \ + list_del(&__pg->list); \ + __pg; \ + }) : NULL) +# define page_list_for_each(pos, head) list_for_each_entry(pos, head, list) +# define page_list_for_each_safe(pos, tmp, head) \ + list_for_each_entry_safe(pos, tmp, head, list) +# define page_list_for_each_safe_reverse(pos, tmp, head) \ + list_for_each_entry_safe_reverse(pos, tmp, head, list) +#endif + /* Automatic page scrubbing for dead domains. */ -extern struct list_head page_scrub_list; -#define page_scrub_schedule_work() \ - do { \ - if ( !list_empty(&page_scrub_list) ) \ - raise_softirq(PAGE_SCRUB_SOFTIRQ); \ +extern struct page_list_head page_scrub_list; +#define page_scrub_schedule_work() \ + do { \ + if ( !page_list_empty(&page_scrub_list) ) \ + raise_softirq(PAGE_SCRUB_SOFTIRQ); \ } while ( 0 ) #define page_scrub_kick() \ do { \ - if ( !list_empty(&page_scrub_list) ) \ + if ( !page_list_empty(&page_scrub_list) ) \ cpumask_raise_softirq(cpu_online_map, PAGE_SCRUB_SOFTIRQ); \ } while ( 0 ) unsigned long avail_scrub_pages(void); -#include <asm/mm.h> - int guest_remove_page(struct domain *d, unsigned long gmfn); /* Returns TRUE if the whole page at @mfn is ordinary RAM. */ diff -r af992824b5cf -r c7cba853583d xen/include/xen/sched.h --- a/xen/include/xen/sched.h Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/include/xen/sched.h Fri Feb 13 11:22:28 2009 +0900 @@ -19,6 +19,7 @@ #include <xen/xenoprof.h> #include <xen/rcupdate.h> #include <xen/irq.h> +#include <xen/mm.h> #ifdef CONFIG_COMPAT #include <compat/vcpu.h> @@ -171,8 +172,8 @@ struct domain spinlock_t domain_lock; spinlock_t page_alloc_lock; /* protects all the following fields */ - struct list_head page_list; /* linked list, of size tot_pages */ - struct list_head xenpage_list; /* linked list, of size xenheap_pages */ + struct page_list_head page_list; /* linked list, of size tot_pages */ + struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */ unsigned int tot_pages; /* number of pages currently possesed */ unsigned int max_pages; /* maximum value for tot_pages */ unsigned int xenheap_pages; /* # pages allocated from Xen heap */ diff -r af992824b5cf -r c7cba853583d xen/xsm/flask/hooks.c --- a/xen/xsm/flask/hooks.c Fri Feb 13 10:56:01 2009 +0900 +++ b/xen/xsm/flask/hooks.c Fri Feb 13 11:22:28 2009 +0900 @@ -820,6 +820,7 @@ static int flask_hvmcontext(struct domai perm = HVM__SETHVMC; break; case XEN_DOMCTL_gethvmcontext: + case XEN_DOMCTL_gethvmcontext_partial: perm = HVM__GETHVMC; break; default: _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |