[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merged.
# HG changeset patch # User emellor@xxxxxxxxxxxxxxxxxxxxxx # Node ID cf89e8f0831b3a483c1b8c614d1b8eb02180548b # Parent 516cf6553011ac4263814c7f7c0cf358f629a906 # Parent 1ecb7f1ddc7900f34a7d7d962e5be3df3d850d61 Merged. diff -r 516cf6553011 -r cf89e8f0831b buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Mon Mar 20 08:56:46 2006 +++ b/buildconfigs/mk.linux-2.6-xen Mon Mar 20 08:56:54 2006 @@ -2,8 +2,8 @@ OS = linux LINUX_SERIES = 2.6 -LINUX_VER = 2.6.16-rc5 -LINUX_SRCS = linux-2.6.15.tar.bz2 patch-2.6.16-rc5.bz2 +LINUX_VER = 2.6.16-rc6 +LINUX_SRCS = linux-2.6.15.tar.bz2 patch-2.6.16-rc6.bz2 LINUX_PDIR = linux-$(LINUX_VER) EXTRAVERSION ?= xen @@ -34,7 +34,7 @@ touch $(@D)/.hgskip touch $@ -pristine-linux-%.16-rc5/.valid-pristine: pristine-$(LINUX_PDIR)/.valid-srcs +pristine-linux-%.16-rc6/.valid-pristine: pristine-$(LINUX_PDIR)/.valid-srcs touch $@ # update timestamp to avoid rebuild $(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/arch/i386/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile Mon Mar 20 08:56:54 2006 @@ -11,7 +11,7 @@ obj-y += cpu/ obj-y += timers/ -obj-$(CONFIG_ACPI) += acpi/ +obj-y += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o obj-$(CONFIG_X86_MSR) += msr.o diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/arch/i386/kernel/acpi/Makefile --- a/linux-2.6-xen-sparse/arch/i386/kernel/acpi/Makefile Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/acpi/Makefile Mon Mar 20 08:56:54 2006 @@ -1,4 +1,4 @@ -obj-y := boot.o +obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c Mon Mar 20 08:56:54 2006 @@ -44,6 +44,9 @@ extern int gsi_irq_sharing(int gsi); #include <asm/proto.h> +static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } + + #else /* X86 */ #ifdef CONFIG_X86_LOCAL_APIC @@ -1111,9 +1114,6 @@ disable_acpi(); return error; } -#ifdef __i386__ - check_acpi_pci(); -#endif acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c Mon Mar 20 08:56:54 2006 @@ -283,10 +283,10 @@ c->x86_capability[4] = excap; c->x86 = (tfms >> 8) & 15; c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) { + if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - } c->x86_mask = tfms & 15; } else { /* Have CPUID level 0 only - unheard of */ diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c Mon Mar 20 08:56:54 2006 @@ -92,6 +92,8 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); + +int timer_over_8254 __initdata = 1; /* * Is the SiS APIC rmw bug present ? @@ -2329,7 +2331,8 @@ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); timer_ack = 1; - enable_8259A_irq(0); + if (timer_over_8254 > 0) + enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2459,6 +2462,20 @@ print_IO_APIC(); } +static int __init setup_disable_8254_timer(char *s) +{ + timer_over_8254 = -1; + return 1; +} +static int __init setup_enable_8254_timer(char *s) +{ + timer_over_8254 = 2; + return 1; +} + +__setup("disable_8254_timer", setup_disable_8254_timer); +__setup("enable_8254_timer", setup_enable_8254_timer); + /* * Called after all the initialization is done. If we didnt find any * APIC bugs then we can allow the modify fast path diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c Mon Mar 20 08:56:54 2006 @@ -25,6 +25,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/sched.h> +#include <linux/cpumask.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/vmalloc.h> diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Mon Mar 20 08:56:54 2006 @@ -1819,6 +1819,10 @@ op.u.set_iopl.iopl = 1; HYPERVISOR_physdev_op(&op); +#ifdef CONFIG_X86_IO_APIC + check_acpi_pci(); /* Checks more than just ACPI actually */ +#endif + #ifdef CONFIG_ACPI if (!(xen_start_info->flags & SIF_INITDOMAIN)) { printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Mon Mar 20 08:56:54 2006 @@ -840,9 +840,9 @@ write_seqlock_irqsave(&xtime_lock, flags); xtime.tv_sec = sec; xtime.tv_nsec = 0; + jiffies_64 += sleep_length; + wall_jiffies += sleep_length; write_sequnlock_irqrestore(&xtime_lock, flags); - jiffies += sleep_length; - wall_jiffies += sleep_length; if (last_timer->resume) last_timer->resume(); cur_timer = last_timer; diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S Mon Mar 20 08:56:54 2006 @@ -254,7 +254,6 @@ st8 [r11]=r10 ;; br.ret.sptk.many rp - ;; END(xen_set_rr) GLOBAL_ENTRY(xen_fc) @@ -264,7 +263,16 @@ (p7) fc r32;; (p7) br.ret.sptk.many rp ;; - ptc.e r96 // this is a "privified" fc r32 + movl r9=XSI_PSR_IC + mov r8=r32 + ;; + ld8 r10=[r9] + ;; + st8 [r9]=r0 + ;; + XEN_HYPER_FC + ;; + st8 [r9]=r10 ;; br.ret.sptk.many rp END(xen_fc) @@ -276,7 +284,16 @@ (p7) mov r8=cpuid[r32];; (p7) br.ret.sptk.many rp ;; - mov r72=rr[r32] // this is a "privified" mov r8=cpuid[r32] + movl r9=XSI_PSR_IC + mov r8=r32 + ;; + ld8 r10=[r9] + ;; + st8 [r9]=r0 + ;; + XEN_HYPER_GET_CPUID + ;; + st8 [r9]=r10 ;; br.ret.sptk.many rp END(xen_get_cpuid) @@ -288,7 +305,16 @@ (p7) mov r8=pmd[r32];; (p7) br.ret.sptk.many rp ;; - mov r72=pmc[r32] // this is a "privified" mov r8=pmd[r32] + movl r9=XSI_PSR_IC + mov r8=r32 + ;; + ld8 r10=[r9] + ;; + st8 [r9]=r0 + ;; + XEN_HYPER_GET_PMD + ;; + st8 [r9]=r10 ;; br.ret.sptk.many rp END(xen_get_pmd) @@ -301,10 +327,20 @@ (p7) mov r8=ar24;; (p7) br.ret.sptk.many rp ;; - mov ar24=r72 // this is a "privified" mov r8=ar.eflg + movl r9=XSI_PSR_IC + mov r8=r32 + ;; + ld8 r10=[r9] + ;; + st8 [r9]=r0 + ;; + XEN_HYPER_GET_EFLAG + ;; + st8 [r9]=r10 ;; br.ret.sptk.many rp END(xen_get_eflag) + // some bits aren't set if pl!=0, see SDM vol1 3.1.8 GLOBAL_ENTRY(xen_set_eflag) movl r8=running_on_xen;; @@ -313,11 +349,17 @@ (p7) mov ar24=r32 (p7) br.ret.sptk.many rp ;; - // FIXME: this remains no-op'd because it generates - // a privileged register (general exception) trap rather than - // a privileged operation fault - //mov ar24=r32 - ;; - br.ret.sptk.many rp -END(xen_get_eflag) + movl r9=XSI_PSR_IC + mov r8=r32 + ;; + ld8 r10=[r9] + ;; + st8 [r9]=r0 + ;; + XEN_HYPER_SET_EFLAG + ;; + st8 [r9]=r10 + ;; + br.ret.sptk.many rp +END(xen_set_eflag) #endif diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Mon Mar 20 08:56:54 2006 @@ -723,16 +723,12 @@ movl r30=1f // load continuation point in case of nested fault ;; #ifdef CONFIG_XEN -#if 1 mov r18=r8; mov r8=r16; XEN_HYPER_THASH;; mov r17=r8; mov r8=r18;; #else - tak r17=r80 // "privified" thash -#endif -#else thash r17=r16 // compute virtual address of L3 PTE #endif mov r29=b0 // save b0 in case of nested fault @@ -812,16 +808,12 @@ #endif /* CONFIG_ITANIUM */ ;; #ifdef CONFIG_XEN -#if 1 mov r18=r8; mov r8=r16; XEN_HYPER_THASH;; mov r17=r8; mov r8=r18;; #else - tak r17=r80 // "privified" thash -#endif -#else thash r17=r16 // compute virtual address of L3 PTE #endif mov r29=b0 // save b0 in case of nested fault) @@ -898,15 +890,11 @@ movl r30=1f // load continuation point in case of nested fault ;; #ifdef CONFIG_XEN -#if 1 mov r18=r8; mov r8=r16; XEN_HYPER_THASH;; mov r17=r8; mov r8=r18;; -#else - tak r17=r80 // "privified" thash -#endif #else thash r17=r16 // compute virtual address of L3 PTE #endif diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Mon Mar 20 08:56:54 2006 @@ -818,7 +818,7 @@ jmp error_exit #ifdef CONFIG_X86_LOCAL_APIC -ENTRY(nmi) +KPROBE_ENTRY(nmi) zeroentry do_nmi_callback ENTRY(do_nmi_callback) addq $8, %rsp @@ -828,6 +828,7 @@ XEN_BLOCK_EVENTS(%rsi) GET_THREAD_INFO(%rcx) jmp retint_restore_args + .previous .text #endif ALIGN diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/drivers/char/tty_io.c --- a/linux-2.6-xen-sparse/drivers/char/tty_io.c Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c Mon Mar 20 08:56:54 2006 @@ -305,7 +305,7 @@ t->commit = 0; t->read = 0; /* DEBUG ONLY */ - memset(t->data, '*', size); +/* memset(t->data, '*', size); */ /* printk("Flip recycle %p\n", t); */ return t; } diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Mon Mar 20 08:56:54 2006 @@ -1049,6 +1049,8 @@ if (xsd_port_intf) xsd_port_intf->read_proc = xsd_port_read; } + else + xenstored_ready = 1; /* Initialize the interface to xenstore. */ err = xs_init(); @@ -1058,10 +1060,8 @@ return err; } - if (!dom0) { - xenstored_ready = 1; + if (!dom0) xenbus_probe(NULL); - } return 0; } diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/include/asm-i386/apic.h --- a/linux-2.6-xen-sparse/include/asm-i386/apic.h Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/apic.h Mon Mar 20 08:56:54 2006 @@ -139,6 +139,8 @@ #define ARCH_APICTIMER_STOPS_ON_C3 1 #endif +extern int timer_over_8254; + #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/include/asm-ia64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Mon Mar 20 08:56:54 2006 @@ -32,6 +32,7 @@ #include <xen/interface/xen.h> #include <xen/interface/sched.h> +#include <xen/interface/dom0_ops.h> #include <linux/errno.h> /* FIXME: temp place to hold these page related macros */ diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Mon Mar 20 08:56:54 2006 @@ -44,6 +44,8 @@ void force_evtchn_callback(void); +int xen_init(void); + /* Turn jiffies into Xen system time. XXX Implement me. */ #define jiffies_to_st(j) 0 diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h --- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Mon Mar 20 08:56:54 2006 @@ -33,6 +33,11 @@ #define XEN_HYPER_GET_RR break 0x10 #define XEN_HYPER_SET_RR break 0x11 #define XEN_HYPER_SET_KR break 0x12 +#define XEN_HYPER_FC break 0x13 +#define XEN_HYPER_GET_CPUID break 0x14 +#define XEN_HYPER_GET_PMD break 0x15 +#define XEN_HYPER_GET_EFLAG break 0x16 +#define XEN_HYPER_SET_EFLAG break 0x17 #endif #ifndef __ASSEMBLY__ diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/include/linux/gfp.h --- a/linux-2.6-xen-sparse/include/linux/gfp.h Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/include/linux/gfp.h Mon Mar 20 08:56:54 2006 @@ -161,9 +161,9 @@ void page_alloc_init(void); #ifdef CONFIG_NUMA -void drain_remote_pages(void); +void drain_node_pages(int node); #else -static inline void drain_remote_pages(void) { }; +static inline void drain_node_pages(int node) { }; #endif #endif /* __LINUX_GFP_H */ diff -r 516cf6553011 -r cf89e8f0831b linux-2.6-xen-sparse/mm/page_alloc.c --- a/linux-2.6-xen-sparse/mm/page_alloc.c Mon Mar 20 08:56:46 2006 +++ b/linux-2.6-xen-sparse/mm/page_alloc.c Mon Mar 20 08:56:54 2006 @@ -591,20 +591,19 @@ } #ifdef CONFIG_NUMA -/* Called from the slab reaper to drain remote pagesets */ -void drain_remote_pages(void) -{ - struct zone *zone; - int i; +/* + * Called from the slab reaper to drain pagesets on a particular node that + * belong to the currently executing processor. + */ +void drain_node_pages(int nodeid) +{ + int i, z; unsigned long flags; local_irq_save(flags); - for_each_zone(zone) { + for (z = 0; z < MAX_NR_ZONES; z++) { + struct zone *zone = NODE_DATA(nodeid)->node_zones + z; struct per_cpu_pageset *pset; - - /* Do not drain local pagesets */ - if (zone->zone_pgdat->node_id == numa_node_id()) - continue; pset = zone_pcp(zone, smp_processor_id()); for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { diff -r 516cf6553011 -r cf89e8f0831b tools/examples/xmexample.vti --- a/tools/examples/xmexample.vti Mon Mar 20 08:56:46 2006 +++ b/tools/examples/xmexample.vti Mon Mar 20 08:56:54 2006 @@ -14,8 +14,8 @@ # Kernel image file. kernel = "/boot/Flash.fd" -# The domain build function. VMX domain uses 'vmx'. -builder='vmx' +# The domain build function. VTI domain uses 'hvm'. +builder='hvm' # Initial memory allocation (in megabytes) for the new domain. memory = 256 diff -r 516cf6553011 -r cf89e8f0831b tools/firmware/rombios/apmbios.S --- a/tools/firmware/rombios/apmbios.S Mon Mar 20 08:56:46 2006 +++ b/tools/firmware/rombios/apmbios.S Mon Mar 20 08:56:54 2006 @@ -217,14 +217,22 @@ ; APM interface disconnect APMSYM(04): cmp al, #0x04 + jne APMSYM(05) + jmp APMSYM(ok) + +;----------------- +; APM cpu idle +APMSYM(05): + cmp al, #0x05 jne APMSYM(07) + hlt jmp APMSYM(ok) ;----------------- ; APM Set Power State APMSYM(07): cmp al, #0x07 - jne APMSYM(0a) + jne APMSYM(08) cmp bx, #1 jne APMSYM(ok) @@ -265,6 +273,14 @@ mov ax, #APMSYM(07_standby_str) call APMSYM(out_str) pop edx + jmp APMSYM(ok) + +;----------------- +; APM Enable / Disable +APMSYM(08): + cmp al, #0x08 + jne APMSYM(0a) + jmp APMSYM(ok) ;----------------- @@ -297,11 +313,30 @@ ; APM Driver Version APMSYM(0e): cmp al, #0x0e - jne APMSYM(unimplemented) + jne APMSYM(0f) mov ah, #1 mov al, #2 + jmp APMSYM(ok) + +;----------------- +; APM Engage / Disengage +APMSYM(0f): + cmp al, #0x0f + jne APMSYM(10) + + jmp APMSYM(ok) + +;----------------- +; APM Get Capabilities +APMSYM(10): + cmp al, #0x10 + jne APMSYM(unimplemented) + + mov bl, #0 + mov cx, #0 + jmp APMSYM(ok) ;----------------- diff -r 516cf6553011 -r cf89e8f0831b tools/firmware/rombios/rombios.c --- a/tools/firmware/rombios/rombios.c Mon Mar 20 08:56:46 2006 +++ b/tools/firmware/rombios/rombios.c Mon Mar 20 08:56:54 2006 @@ -149,7 +149,7 @@ #define BX_SUPPORT_FLOPPY 1 #define BX_FLOPPY_ON_CNT 37 /* 2 seconds */ #define BX_PCIBIOS 1 -#define BX_APM 0 +#define BX_APM 1 #define BX_USE_ATADRV 1 #define BX_ELTORITO_BOOT 1 diff -r 516cf6553011 -r cf89e8f0831b tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Mon Mar 20 08:56:46 2006 +++ b/tools/libxc/xc_hvm_build.c Mon Mar 20 08:56:54 2006 @@ -51,7 +51,7 @@ char *elfbase, int xch, uint32_t dom, unsigned long *parray, struct domain_setup_info *dsi); -static unsigned char build_e820map(void *e820_page, unsigned long mem_size) +static unsigned char build_e820map(void *e820_page, unsigned long long mem_size) { struct e820entry *e820entry = (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET); @@ -81,11 +81,17 @@ #define STATIC_PAGES 2 /* for ioreq_t and store_mfn */ /* Most of the ram goes here */ e820entry[nr_map].addr = 0x100000; - e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES*PAGE_SIZE; + e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES * PAGE_SIZE; e820entry[nr_map].type = E820_RAM; nr_map++; /* Statically allocated special pages */ + + /* For xenstore */ + e820entry[nr_map].addr = mem_size - 2 * PAGE_SIZE; + e820entry[nr_map].size = PAGE_SIZE; + e820entry[nr_map].type = E820_XENSTORE; + nr_map++; /* Shared ioreq_t page */ e820entry[nr_map].addr = mem_size - PAGE_SIZE; @@ -93,12 +99,6 @@ e820entry[nr_map].type = E820_SHARED_PAGE; nr_map++; - /* For xenstore */ - e820entry[nr_map].addr = mem_size - 2*PAGE_SIZE; - e820entry[nr_map].size = PAGE_SIZE; - e820entry[nr_map].type = E820_XENSTORE; - nr_map++; - e820entry[nr_map].addr = mem_size; e820entry[nr_map].size = 0x3 * PAGE_SIZE; e820entry[nr_map].type = E820_NVS; @@ -117,8 +117,7 @@ return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map); } -static void -set_hvm_info_checksum(struct hvm_info_table *t) +static void set_hvm_info_checksum(struct hvm_info_table *t) { uint8_t *ptr = (uint8_t *)t, sum = 0; unsigned int i; @@ -142,19 +141,16 @@ char *va_map; struct hvm_info_table *va_hvm; - - va_map = xc_map_foreign_range( - xc_handle, - dom, - PAGE_SIZE, - PROT_READ|PROT_WRITE, - pfn_list[HVM_INFO_PFN]); + va_map = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ | PROT_WRITE, + pfn_list[HVM_INFO_PFN]); if ( va_map == NULL ) return -1; va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET); memset(va_hvm, 0, sizeof(*va_hvm)); + strncpy(va_hvm->signature, "HVM INFO", 8); va_hvm->length = sizeof(struct hvm_info_table); va_hvm->acpi_enabled = acpi; @@ -183,58 +179,59 @@ unsigned long *store_mfn) { unsigned long *page_array = NULL; - unsigned long count, i; + unsigned long long ptr; + xc_mmu_t *mmu = NULL; + shared_info_t *shared_info; void *e820_page; unsigned char e820_map_nr; - xc_mmu_t *mmu = NULL; - int rc; struct domain_setup_info dsi; - unsigned long v_end; + unsigned long long v_end; unsigned long shared_page_frame = 0; shared_iopage_t *sp; memset(&dsi, 0, sizeof(struct domain_setup_info)); - if ( (rc = parseelfimage(image, image_size, &dsi)) != 0 ) - goto error_out; - - if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 ) + if ( (parseelfimage(image, image_size, &dsi)) != 0 ) + goto error_out; + + if ( (dsi.v_kernstart & (PAGE_SIZE - 1)) != 0 ) { PERROR("Guest OS must load to a page boundary.\n"); goto error_out; } /* memsize is in megabytes */ - v_end = (unsigned long)memsize << 20; + v_end = (unsigned long long)memsize << 20; printf("VIRTUAL MEMORY ARRANGEMENT:\n" - " Loaded HVM loader: %08lx->%08lx\n" - " TOTAL: %08lx->%08lx\n", + " Loaded HVM loader: %08lx->%08lx\n" + " TOTAL: %08lx->%016llx\n", dsi.v_kernstart, dsi.v_kernend, dsi.v_start, v_end); - printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry); - - if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) ) - { - ERROR("Initial guest OS requires too much space\n" - "(%luMB is greater than %luMB limit)\n", - (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20); + printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry); + + if ( (v_end - dsi.v_start) > ((unsigned long long)nr_pages << PAGE_SHIFT) ) + { + PERROR("Initial guest OS requires too much space: " + "(%lluMB is greater than %lluMB limit)\n", + (unsigned long long)(v_end - dsi.v_start) >> 20, + ((unsigned long long)nr_pages << PAGE_SHIFT) >> 20); goto error_out; } if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ) { - PERROR("Could not allocate memory"); + PERROR("Could not allocate memory.\n"); goto error_out; } if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages ) { - PERROR("Could not get the page frame list"); + PERROR("Could not get the page frame list.\n"); goto error_out; } @@ -246,20 +243,21 @@ /* Write the machine->phys table entries. */ for ( count = 0; count < nr_pages; count++ ) { + ptr = (unsigned long long)page_array[count] << PAGE_SHIFT; if ( xc_add_mmu_update(xc_handle, mmu, - (page_array[count] << PAGE_SHIFT) | - MMU_MACHPHYS_UPDATE, count) ) + ptr | MMU_MACHPHYS_UPDATE, count) ) goto error_out; } - if ( set_hvm_info(xc_handle, dom, page_array, vcpus, pae, acpi, apic) ) { - fprintf(stderr, "Couldn't set hvm info for HVM guest.\n"); + if ( set_hvm_info(xc_handle, dom, page_array, vcpus, pae, acpi, apic) ) + { + ERROR("Couldn't set hvm info for HVM guest.\n"); goto error_out; } if ( (e820_page = xc_map_foreign_range( - xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, - page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 ) + xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, + page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 ) goto error_out; memset(e820_page, 0, PAGE_SIZE); e820_map_nr = build_e820map(e820_page, v_end); @@ -267,8 +265,8 @@ /* shared_info page starts its life empty. */ if ( (shared_info = xc_map_foreign_range( - xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, - shared_info_frame)) == 0 ) + xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, + shared_info_frame)) == 0 ) goto error_out; memset(shared_info, 0, sizeof(shared_info_t)); /* Mask all upcalls... */ @@ -279,8 +277,8 @@ /* Populate the event channel port in the shared page */ shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1]; if ( (sp = (shared_iopage_t *) xc_map_foreign_range( - xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, - shared_page_frame)) == 0 ) + xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, + shared_page_frame)) == 0 ) goto error_out; memset(sp, 0, PAGE_SIZE); @@ -290,7 +288,7 @@ vp_eport = xc_evtchn_alloc_unbound(xc_handle, dom, 0); if ( vp_eport < 0 ) { - fprintf(stderr, "Couldn't get unbound port from VMX guest.\n"); + PERROR("Couldn't get unbound port from VMX guest.\n"); goto error_out; } sp->vcpu_iodata[i].vp_eport = vp_eport; diff -r 516cf6553011 -r cf89e8f0831b tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Mon Mar 20 08:56:46 2006 +++ b/tools/libxc/xc_linux_build.c Mon Mar 20 08:56:54 2006 @@ -536,8 +536,12 @@ *store_mfn = page_array[1]; *console_mfn = page_array[2]; - printf("store_mfn: 0x%lx, console_mfn: 0x%lx\n", - (uint64_t)store_mfn, (uint64_t)console_mfn); + printf("start_info: 0x%lx at 0x%lx, " + "store_mfn: 0x%lx at 0x%lx, " + "console_mfn: 0x%lx at 0x%lx\n", + page_array[0], nr_pages, + *store_mfn, nr_pages - 2, + *console_mfn, nr_pages - 1); start_info = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[0]); diff -r 516cf6553011 -r cf89e8f0831b xen/Makefile --- a/xen/Makefile Mon Mar 20 08:56:46 2006 +++ b/xen/Makefile Mon Mar 20 08:56:54 2006 @@ -61,6 +61,7 @@ $(MAKE) -C acm endif $(MAKE) -C arch/$(TARGET_ARCH) + $(MAKE) -C arch/$(TARGET_ARCH) $(TARGET) # drivers/char/console.o contains static banner/compile info. Blow it away. # Don't refresh these files during e.g., 'sudo make install' diff -r 516cf6553011 -r cf89e8f0831b xen/Rules.mk --- a/xen/Rules.mk Mon Mar 20 08:56:46 2006 +++ b/xen/Rules.mk Mon Mar 20 08:56:54 2006 @@ -7,6 +7,15 @@ perfc ?= n perfc_arrays?= n crash_debug ?= n + +# Hardcoded configuration implications and dependencies. +# Do this is a neater way if it becomes unwieldy. +ifeq ($(debug),y) +verbose := y +endif +ifeq ($(perfc_arrays),y) +perfc := y +endif XEN_ROOT=$(BASEDIR)/.. include $(XEN_ROOT)/Config.mk @@ -27,41 +36,23 @@ HDRS := $(subst $(BASEDIR)/include/xen/banner.h,,$(HDRS)) HDRS := $(subst $(BASEDIR)/include/xen/compile.h,,$(HDRS)) -C_SRCS := $(wildcard *.c) -S_SRCS := $(wildcard *.S) -OBJS := $(patsubst %.S,%.o,$(S_SRCS)) -OBJS += $(patsubst %.c,%.o,$(C_SRCS)) +include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk # Note that link order matters! -ALL_OBJS := $(BASEDIR)/common/common.o -ALL_OBJS += $(BASEDIR)/drivers/char/driver.o -ALL_OBJS += $(BASEDIR)/drivers/acpi/driver.o -ifeq ($(ACM_SECURITY),y) -ALL_OBJS += $(BASEDIR)/acm/acm.o -CFLAGS += -DACM_SECURITY -endif -ALL_OBJS += $(BASEDIR)/arch/$(TARGET_ARCH)/arch.o +ALL_OBJS-y += $(BASEDIR)/common/built_in.o +ALL_OBJS-y += $(BASEDIR)/drivers/built_in.o +ALL_OBJS-$(ACM_SECURITY) += $(BASEDIR)/acm/built_in.o +ALL_OBJS-y += $(BASEDIR)/arch/$(TARGET_ARCH)/built_in.o -include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk +CFLAGS-y += -g -D__XEN__ +CFLAGS-$(ACM_SECURITY) += -DACM_SECURITY +CFLAGS-$(verbose) += -DVERBOSE +CFLAGS-$(crash_debug) += -DCRASH_DEBUG +CFLAGS-$(perfc) += -DPERF_COUNTERS +CFLAGS-$(perfc_arrays) += -DPERF_ARRAYS -CFLAGS += -g -D__XEN__ - -ifneq ($(debug)$(verbose),nn) -CFLAGS += -DVERBOSE -endif - -ifeq ($(crash_debug),y) -CFLAGS += -DCRASH_DEBUG -endif - -ifeq ($(perfc),y) -CFLAGS += -DPERF_COUNTERS -ifeq ($(perfc_arrays),y) -CFLAGS += -DPERF_ARRAYS -endif -endif - -CFLAGS := $(strip $(CFLAGS)) +ALL_OBJS := $(ALL_OBJS-y) +CFLAGS := $(strip $(CFLAGS) $(CFLAGS-y)) %.o: %.c $(HDRS) Makefile $(CC) $(CFLAGS) -c $< -o $@ diff -r 516cf6553011 -r cf89e8f0831b xen/acm/Makefile --- a/xen/acm/Makefile Mon Mar 20 08:56:46 2006 +++ b/xen/acm/Makefile Mon Mar 20 08:56:54 2006 @@ -1,15 +1,9 @@ +include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk -OBJS = acm_core.o -OBJS += acm_policy.o -OBJS += acm_simple_type_enforcement_hooks.o -OBJS += acm_chinesewall_hooks.o -OBJS += acm_null_hooks.o +obj-y += acm_core.o +obj-y += acm_policy.o +obj-y += acm_simple_type_enforcement_hooks.o +obj-y += acm_chinesewall_hooks.o +obj-y += acm_null_hooks.o -default: acm.o - -acm.o: $(OBJS) - $(LD) $(LDFLAGS) -r -o acm.o $(OBJS) - -clean: - rm -f *.o *~ core +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/Rules.mk Mon Mar 20 08:56:54 2006 @@ -1,11 +1,13 @@ ######################################## # ia64-specific definitions +HAS_ACPI := y VALIDATE_VT ?= n ifneq ($(COMPILE_ARCH),$(TARGET_ARCH)) CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux- endif -AFLAGS += -D__ASSEMBLY__ +AFLAGS += -D__ASSEMBLY__ -nostdinc $(CPPFLAGS) +AFLAGS += -mconstant-gp CPPFLAGS += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-ia64 \ -I$(BASEDIR)/include/asm-ia64/linux \ -I$(BASEDIR)/include/asm-ia64/linux-xen \ @@ -13,6 +15,7 @@ -I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen CFLAGS += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing +CFLAGS += -mconstant-gp #CFLAGS += -O3 # -O3 over-inlines making debugging tough! CFLAGS += -O2 # but no optimization causes compile errors! #CFLAGS += -iwithprefix include -Wall -DMONITOR_BASE=$(MONITOR_BASE) diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/asm-offsets.c --- a/xen/arch/ia64/asm-offsets.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/asm-offsets.c Mon Mar 20 08:56:54 2006 @@ -84,7 +84,6 @@ //DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand)); //DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal)); //DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid)); - DEFINE(IA64_PGD, offsetof(struct domain, arch.mm)); DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct vcpu, arch._thread.ksp)); DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct vcpu, arch._thread.on_ustack)); diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/linux-xen/efi.c --- a/xen/arch/ia64/linux-xen/efi.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/linux-xen/efi.c Mon Mar 20 08:56:54 2006 @@ -37,11 +37,17 @@ #define EFI_DEBUG 0 extern efi_status_t efi_call_phys (void *, ...); +extern unsigned long long memparse (char *ptr, char **retptr); struct efi efi; EXPORT_SYMBOL(efi); static efi_runtime_services_t *runtime; +#ifdef XEN +// this is a temporary hack to avoid CONFIG_VIRTUAL_MEM_MAP +static unsigned long mem_limit = ~0UL, max_addr = 0x100000000; +#else static unsigned long mem_limit = ~0UL, max_addr = ~0UL; +#endif #define efi_call_virt(f, args...) (*(f))(args) @@ -328,8 +334,6 @@ if (running_on_sim && md->type != EFI_CONVENTIONAL_MEMORY) continue; } -// this is a temporary hack to avoid CONFIG_VIRTUAL_MEM_MAP - if (md->phys_addr >= 0x100000000) continue; #endif /* * granule_addr is the base of md's first granule. diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/linux-xen/setup.c --- a/xen/arch/ia64/linux-xen/setup.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/linux-xen/setup.c Mon Mar 20 08:56:54 2006 @@ -384,6 +384,9 @@ *cmdline_p = __va(ia64_boot_param->command_line); #ifndef XEN strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE); +#else + early_cmdline_parse(cmdline_p); + cmdline_parse(*cmdline_p); #endif efi_init(); @@ -414,10 +417,6 @@ } #endif -#ifdef XEN - early_cmdline_parse(cmdline_p); - cmdline_parse(*cmdline_p); -#endif if (early_console_setup(*cmdline_p) == 0) mark_bsp_online(); @@ -842,7 +841,9 @@ | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC)); #endif atomic_inc(&init_mm.mm_count); +#ifndef XEN current->active_mm = &init_mm; +#endif #ifdef XEN if (current->domain->arch.mm) #else diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/linux-xen/smp.c --- a/xen/arch/ia64/linux-xen/smp.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/linux-xen/smp.c Mon Mar 20 08:56:54 2006 @@ -296,7 +296,12 @@ { preempt_disable(); /* this happens for the common case of a single-threaded fork(): */ +#ifdef XEN + if (likely(mm == current->domain->arch.mm + && atomic_read(&mm->mm_users) == 1)) +#else if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1)) +#endif { local_finish_flush_tlb_mm(mm); preempt_enable(); diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/linux-xen/smpboot.c --- a/xen/arch/ia64/linux-xen/smpboot.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/linux-xen/smpboot.c Mon Mar 20 08:56:54 2006 @@ -72,8 +72,7 @@ #ifdef CONFIG_SMP /* ifdef XEN */ -//#define SMP_DEBUG 0 -#define SMP_DEBUG 1 +#define SMP_DEBUG 0 #if SMP_DEBUG #define Dprintk(x...) printk(x) @@ -482,16 +481,14 @@ do_rest: task_for_booting_cpu = c_idle.idle; #else - struct domain *idle; struct vcpu *v; - void *stack; v = idle_vcpu[cpu] = alloc_vcpu(idle_vcpu[0]->domain, cpu, cpu); BUG_ON(v == NULL); - printf ("do_boot_cpu: cpu=%d, domain=%p, vcpu=%p\n", cpu, idle, v); - - task_for_booting_cpu = v; + //printf ("do_boot_cpu: cpu=%d, domain=%p, vcpu=%p\n", cpu, idle, v); + + task_for_booting_cpu = (task_t *)v; /* Set cpu number. */ get_thread_info(v)->cpu = cpu; @@ -522,6 +519,7 @@ return 0; } +#ifndef XEN static int __init decay (char *str) { @@ -531,6 +529,7 @@ } __setup("decay=", decay); +#endif /* * Initialize the logical CPU number to SAPICID mapping diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/vmx/mmio.c --- a/xen/arch/ia64/vmx/mmio.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/vmx/mmio.c Mon Mar 20 08:56:54 2006 @@ -33,6 +33,7 @@ #include <asm/mm.h> #include <asm/vmx.h> #include <public/event_channel.h> +#include <linux/event.h> /* struct mmio_list *lookup_mmio(u64 gpa, struct mmio_list *mio_base) diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/vmx/pal_emul.c --- a/xen/arch/ia64/vmx/pal_emul.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/vmx/pal_emul.c Mon Mar 20 08:56:54 2006 @@ -20,6 +20,7 @@ #include <asm/vmx_vcpu.h> #include <asm/pal.h> +#include <asm/sal.h> static void get_pal_parameters (VCPU *vcpu, UINT64 *gr29, @@ -68,7 +69,6 @@ static struct ia64_pal_retval pal_vm_tr_read (VCPU *vcpu ) { -#warning pal_vm_tr_read: to be implemented struct ia64_pal_retval result; result.status= -1; //unimplemented @@ -101,7 +101,6 @@ static struct ia64_pal_retval pal_halt (VCPU *vcpu) { -#warning pal_halt: to be implemented //bugbug: to be implement. struct ia64_pal_retval result; @@ -140,12 +139,18 @@ static struct ia64_pal_retval pal_bus_get_features(VCPU *vcpu){ - + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval pal_cache_summary(VCPU *vcpu){ - + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval @@ -157,22 +162,34 @@ static struct ia64_pal_retval pal_cache_info(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval pal_cache_prot_info(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_cache_shared_info(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval pal_mem_attrib(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval pal_debug_info(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval @@ -182,8 +199,16 @@ static struct ia64_pal_retval pal_freq_base(VCPU *vcpu){ struct ia64_pal_retval result; + struct ia64_sal_retval isrv; PAL_CALL(result,PAL_FREQ_BASE, 0, 0, 0); + if(result.v0 == 0){ //PAL_FREQ_BASE may not be implemented in some platforms, call SAL instead. + SAL_CALL(isrv, SAL_FREQ_BASE, + SAL_FREQ_BASE_PLATFORM, 0, 0, 0, 0, 0, 0); + result.status = isrv.status; + result.v0 = isrv.v0; + result.v1 = result.v2 =0; + } return result; } @@ -197,46 +222,89 @@ static struct ia64_pal_retval pal_halt_info(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval pal_logical_to_physica(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval pal_perf_mon_info(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval pal_proc_get_features(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval pal_ptce_info(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval pal_register_info(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval pal_rse_info(VCPU *vcpu){ -} - + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; +} static struct ia64_pal_retval pal_test_info(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval pal_vm_summary(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval pal_vm_info(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } static struct ia64_pal_retval pal_vm_page_size(VCPU *vcpu){ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + return result; } void pal_emul( VCPU *vcpu) { @@ -285,6 +353,82 @@ case PAL_FREQ_BASE: result = pal_freq_base (vcpu); + break; + + case PAL_BUS_GET_FEATURES : + result = pal_bus_get_features (vcpu); + break; + + case PAL_CACHE_SUMMARY : + result = pal_cache_summary (vcpu); + break; + + case PAL_CACHE_INIT : + result = pal_cache_init(vcpu); + break; + + case PAL_CACHE_INFO : + result = pal_cache_info(vcpu); + break; + + case PAL_CACHE_PROT_INFO : + result = pal_cache_prot_info(vcpu); + break; + + case PAL_MEM_ATTRIB : + result = pal_mem_attrib(vcpu); + break; + + case PAL_DEBUG_INFO : + result = pal_debug_info(vcpu); + break; + + case PAL_FIXED_ADDR : + result = pal_fixed_addr(vcpu); + break; + + case PAL_HALT_INFO : + result = pal_halt_info(vcpu); + break; + + case PAL_LOGICAL_TO_PHYSICAL : + result = pal_logical_to_physica(vcpu); + break; + + case PAL_PERF_MON_INFO : + result = pal_perf_mon_info(vcpu); + break; + + case PAL_PROC_GET_FEATURES: + result = pal_proc_get_features(vcpu); + break; + + case PAL_PTCE_INFO : + result = pal_ptce_info(vcpu); + break; + + case PAL_REGISTER_INFO : + result = pal_register_info(vcpu); + break; + + case PAL_RSE_INFO : + result = pal_rse_info(vcpu); + break; + + case PAL_TEST_PROC : + result = pal_test_info(vcpu); + break; + + case PAL_VM_SUMMARY : + result = pal_vm_summary(vcpu); + break; + + case PAL_VM_INFO : + result = pal_vm_info(vcpu); + break; + + case PAL_VM_PAGE_SIZE : + result = pal_vm_page_size(vcpu); break; default: diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/vmx/vlsapic.c --- a/xen/arch/ia64/vmx/vlsapic.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/vmx/vlsapic.c Mon Mar 20 08:56:54 2006 @@ -97,16 +97,15 @@ { vtime_t *vtm; VCPU *vcpu = data; - u64 cur_itc,vitm; - - UINT64 vec; - - vec = VCPU(vcpu, itv) & 0xff; - vmx_vcpu_pend_interrupt(vcpu, vec); - + u64 cur_itc,vitv; + + vitv = VCPU(vcpu, itv); + if ( !ITV_IRQ_MASK(vitv) ){ + vmx_vcpu_pend_interrupt(vcpu, vitv & 0xff); + } vtm=&(vcpu->arch.arch_vmx.vtm); cur_itc = now_itc(vtm); - vitm =VCPU(vcpu, itm); + // vitm =VCPU(vcpu, itm); //fire_itc2 = cur_itc; //fire_itm2 = vitm; update_last_itc(vtm,cur_itc); // pseudo read to update vITC @@ -131,55 +130,76 @@ */ uint64_t vtm_get_itc(VCPU *vcpu) { - uint64_t guest_itc, spsr; + uint64_t guest_itc; vtime_t *vtm; vtm=&(vcpu->arch.arch_vmx.vtm); - // FIXME: should use local_irq_disable & local_irq_enable ?? - local_irq_save(spsr); guest_itc = now_itc(vtm); -// update_last_itc(vtm, guest_itc); - - local_irq_restore(spsr); return guest_itc; } + + + void vtm_set_itc(VCPU *vcpu, uint64_t new_itc) { - uint64_t spsr; + uint64_t vitm, vitv; vtime_t *vtm; - + vitm = VCPU(vcpu,itm); + vitv = VCPU(vcpu,itv); vtm=&(vcpu->arch.arch_vmx.vtm); - local_irq_save(spsr); vtm->vtm_offset = new_itc - ia64_get_itc(); vtm->last_itc = new_itc; - vtm_interruption_update(vcpu, vtm); - local_irq_restore(spsr); -} - -void vtm_set_itv(VCPU *vcpu) -{ - uint64_t spsr,itv; - vtime_t *vtm; - + if(vitm < new_itc){ + clear_bit(ITV_VECTOR(vitv), &VCPU(vcpu, irr[0])); + stop_timer(&vtm->vtm_timer); + } +} + + +#define TIMER_SLOP (50*1000) /* ns */ /* copy from timer.c */ +extern u64 cycle_to_ns(u64 cyle); + + +void vtm_set_itm(VCPU *vcpu, uint64_t val) +{ + vtime_t *vtm; + uint64_t vitv, cur_itc, expires; + vitv = VCPU(vcpu, itv); vtm=&(vcpu->arch.arch_vmx.vtm); - local_irq_save(spsr); - itv = VCPU(vcpu, itv); - if ( ITV_IRQ_MASK(itv) ) + // TODO; need to handle VHPI in future + clear_bit(ITV_VECTOR(vitv), &VCPU(vcpu, irr[0])); + VCPU(vcpu,itm)=val; + cur_itc =now_itc(vtm); + if(val > vtm->last_itc){ + expires = NOW() + cycle_to_ns(val-cur_itc) + TIMER_SLOP; + set_timer(&vtm->vtm_timer, expires); + }else{ stop_timer(&vtm->vtm_timer); - vtm_interruption_update(vcpu, vtm); - local_irq_restore(spsr); -} - - -/* - * Update interrupt or hook the vtm timer for fire + } +} + + +void vtm_set_itv(VCPU *vcpu, uint64_t val) +{ + uint64_t olditv; + olditv = VCPU(vcpu, itv); + VCPU(vcpu, itv) = val; + if(ITV_IRQ_MASK(val)){ + clear_bit(ITV_VECTOR(olditv), &VCPU(vcpu, irr[0])); + }else if(ITV_VECTOR(olditv)!=ITV_VECTOR(val)){ + if(test_and_clear_bit(ITV_VECTOR(olditv), &VCPU(vcpu, irr[0]))) + set_bit(ITV_VECTOR(val), &VCPU(vcpu, irr[0])); + } +} + + +/* + * Update interrupt or hook the vtm timer for fire * At this point vtm_timer should be removed if itv is masked. */ /* Interrupt must be disabled at this point */ - -extern u64 cycle_to_ns(u64 cyle); -#define TIMER_SLOP (50*1000) /* ns */ /* copy from timer.c */ +/* void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm) { uint64_t cur_itc,vitm,vitv; @@ -197,8 +217,7 @@ cur_itc =now_itc(vtm); diff_last = vtm->last_itc - vitm; diff_now = cur_itc - vitm; - update_last_itc (vtm,cur_itc); - + if ( diff_last >= 0 ) { // interrupt already fired. stop_timer(&vtm->vtm_timer); @@ -207,28 +226,32 @@ // ITV is fired. vmx_vcpu_pend_interrupt(vcpu, vitv&0xff); } +*/ /* Both last_itc & cur_itc < itm, wait for fire condition */ - else { +/* else { expires = NOW() + cycle_to_ns(0-diff_now) + TIMER_SLOP; set_timer(&vtm->vtm_timer, expires); } local_irq_restore(spsr); } + */ /* * Action for vtm when the domain is scheduled out. * Remove the timer for vtm. */ +/* void vtm_domain_out(VCPU *vcpu) { if(!is_idle_domain(vcpu->domain)) stop_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer); } - + */ /* * Action for vtm when the domain is scheduled in. * Fire vtm IRQ or add the timer for vtm. */ +/* void vtm_domain_in(VCPU *vcpu) { vtime_t *vtm; @@ -238,6 +261,7 @@ vtm_interruption_update(vcpu, vtm); } } + */ /* * Next for vLSapic @@ -413,11 +437,12 @@ { return ( (pending >> 4) > mic ); } - +#if 0 static int is_invalid_irq(int vec) { return (vec == 1 || ((vec <= 14 && vec >= 3))); } +#endif //shadow it due to no use currently #define IRQ_NO_MASKED 0 #define IRQ_MASKED_BY_VTPR 1 diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/vmx/vmmu.c --- a/xen/arch/ia64/vmx/vmmu.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/vmx/vmmu.c Mon Mar 20 08:56:54 2006 @@ -31,6 +31,7 @@ #include <asm/hw_irq.h> #include <asm/vmx_pal_vsa.h> #include <asm/kregs.h> +#include <asm/vcpu.h> #include <xen/irq.h> /* @@ -68,14 +69,14 @@ /* * The VRN bits of va stand for which rr to get. */ -ia64_rr vmmu_get_rr(VCPU *vcpu, u64 va) -{ - ia64_rr vrr; - vmx_vcpu_get_rr(vcpu, va, &vrr.rrval); - return vrr; -} - - +//ia64_rr vmmu_get_rr(struct vcpu *vcpu, u64 va) +//{ +// ia64_rr vrr; +// vcpu_get_rr(vcpu, va, &vrr.rrval); +// return vrr; +//} + +/* void recycle_message(thash_cb_t *hcb, u64 para) { if(hcb->ht == THASH_VHPT) @@ -84,7 +85,7 @@ } printk("hcb=%p recycled with %lx\n",hcb,para); } - + */ /* * Purge all guest TCs in logical processor. @@ -102,7 +103,6 @@ u32 stride1,stride2; u32 i,j; u64 psr; - result = ia64_pal_call_static(PAL_PTCE_INFO,0,0,0, 0); if ( result.status != 0 ) { @@ -113,7 +113,7 @@ count2 = LOW_32BITS (result.v1); stride1 = HIGH_32BITS(result.v2); stride2 = LOW_32BITS (result.v2); - + local_irq_save(psr); for (i=0; i<count1; i++) { for (j=0; j<count2; j++) { @@ -133,24 +133,10 @@ // struct page_info *page; thash_cb_t *vhpt; PTA pta_value; -/* - page = alloc_domheap_pages (NULL, VCPU_VHPT_ORDER, 0); - if ( page == NULL ) { - panic("No enough contiguous memory for init_domain_mm\n"); - } - vbase = page_to_virt(page); - printk("Allocate domain vhpt at 0x%lx\n", (u64)vbase); - memset(vbase, 0, VCPU_VHPT_SIZE); - */ -// vcur = (void*)((u64)vbase + VCPU_VHPT_SIZE); vcur -= sizeof (thash_cb_t); vhpt = vcur; vhpt->ht = THASH_VHPT; vhpt->vcpu = d; -// vhpt->hash_func = machine_thash; -// vcur -= sizeof (vhpt_special); -// vs = vcur; - /* Setup guest pta */ pta_value.val = 0; pta_value.ve = 1; @@ -159,14 +145,10 @@ pta_value.base = ((u64)vbase) >> PTA_BASE_SHIFT; d->arch.arch_vmx.mpta = pta_value.val; -// vhpt->vs = vs; -// vhpt->vs->get_mfn = __gpfn_to_mfn_foreign; -// vhpt->vs->tag_func = machine_ttag; vhpt->hash = vbase; vhpt->hash_sz = VCPU_VHPT_SIZE/2; vhpt->cch_buf = (void *)(vbase + vhpt->hash_sz); vhpt->cch_sz = (u64)vcur - (u64)vhpt->cch_buf; -// vhpt->recycle_notifier = recycle_message; thash_init(vhpt,VCPU_VHPT_SHIFT-1); return vhpt; } @@ -177,9 +159,8 @@ { struct page_info *page; void *vbase, *vhptbase, *vcur; - tlb_special_t *ts; thash_cb_t *tlb; - + page = alloc_domheap_pages (NULL, VCPU_VHPT_ORDER, 0); if ( page == NULL ) { panic("No enough contiguous memory for init_domain_mm\n"); @@ -193,10 +174,7 @@ tlb = vcur; tlb->ht = THASH_TLB; tlb->vcpu = d; - vcur -= sizeof (tlb_special_t); - ts = vcur; - tlb->ts = ts; - tlb->ts->vhpt = init_domain_vhpt(d,vhptbase,vbase); + tlb->vhpt = init_domain_vhpt(d,vhptbase,vbase); // tlb->hash_func = machine_thash; tlb->hash = vbase; tlb->hash_sz = VCPU_VTLB_SIZE/2; @@ -207,27 +185,6 @@ return tlb; } -/* Allocate physical to machine mapping table for domN - * FIXME: Later this interface may be removed, if that table is provided - * by control panel. Dom0 has gpfn identical to mfn, which doesn't need - * this interface at all. - */ -#if 0 -void -alloc_pmt(struct domain *d) -{ - struct page_info *page; - - /* Only called once */ - ASSERT(d->arch.pmt); - - page = alloc_domheap_pages(NULL, get_order(d->max_pages), 0); - ASSERT(page); - - d->arch.pmt = page_to_virt(page); - memset(d->arch.pmt, 0x55, d->max_pages * 8); -} -#endif /* * Insert guest TLB to machine TLB. * data: In TLB format @@ -240,7 +197,6 @@ unsigned long mtlb_ppn; mtlb.ifa = tlb->vadr; mtlb.itir = tlb->itir & ~ITIR_RV_MASK; - //vmx_vcpu_get_rr(d, mtlb.ifa, &vrr.value); mtlb.page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK; mtlb.ppn = get_mfn(d->domain,tlb->ppn); mtlb_ppn=mtlb.ppn; @@ -311,7 +267,7 @@ IA64_PSR vpsr; vpsr.val = vmx_vcpu_get_psr(vcpu); - vrr = vmx_vcpu_rr(vcpu, vadr); + vcpu_get_rr(vcpu, vadr, &vrr.rrval); vmx_vcpu_get_pta(vcpu,&vpta.val); if ( vrr.ve & vpta.ve ) { @@ -355,21 +311,18 @@ u64 *vpa; thash_data_t *tlb; thash_cb_t *hcb; - ia64_rr vrr; u64 mfn; if ( !(VCPU(vcpu, vpsr) & IA64_PSR_IT) ) { // I-side physical mode gpip = gip; } else { - vmx_vcpu_get_rr(vcpu, gip, &vrr.rrval); - hcb = vmx_vcpu_get_vtlb(vcpu); - tlb = vtlb_lookup_ex (hcb, vrr.rid, gip, ISIDE_TLB ); - if( tlb == NULL ) - tlb = vtlb_lookup_ex (hcb, - vrr.rid, gip, DSIDE_TLB ); - if (tlb) - gpip = (tlb->ppn << 12) | ( gip & (PSIZE(tlb->ps)-1) ); + hcb = vmx_vcpu_get_vtlb(vcpu); + tlb = vtlb_lookup(hcb, gip, ISIDE_TLB); +// if( tlb == NULL ) +// tlb = vtlb_lookup(hcb, gip, DSIDE_TLB ); + if (tlb) + gpip = (tlb->ppn >>(tlb->ps-12)<<tlb->ps) | ( gip & (PSIZE(tlb->ps)-1) ); } if( gpip){ mfn = gmfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT); @@ -388,236 +341,146 @@ IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) { - - thash_data_t data, *ovl; - thash_cb_t *hcb; - search_section_t sections; - ia64_rr vrr; - - hcb = vmx_vcpu_get_vtlb(vcpu); - data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; - data.itir=itir; - data.vadr=PAGEALIGN(ifa,data.ps); - data.tc = 1; - data.cl=ISIDE_TLB; - vmx_vcpu_get_rr(vcpu, ifa, (UINT64 *)&vrr); - data.rid = vrr.rid; - - sections.tr = 1; - sections.tc = 0; - - ovl = vtr_find_overlap(hcb, &data, ISIDE_TLB); - while (ovl) { + int slot; + u64 ps, va; + thash_cb_t *hcb; + + ps = itir_ps(itir); + va = PAGEALIGN(ifa, ps); + slot = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB); + if (slot >=0) { // generate MCA. panic("Tlb conflict!!"); return IA64_FAULT; } - thash_purge_and_insert(hcb, &data, ifa); - return IA64_NO_FAULT; -} - - - + hcb = vmx_vcpu_get_vtlb(vcpu); + thash_purge_and_insert(hcb, pte, itir, ifa); + return IA64_NO_FAULT; +} IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) { - - thash_data_t data, *ovl; - thash_cb_t *hcb; - search_section_t sections; - ia64_rr vrr; - - hcb = vmx_vcpu_get_vtlb(vcpu); - data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; - data.itir=itir; - data.vadr=PAGEALIGN(ifa,data.ps); - data.tc = 1; - data.cl=DSIDE_TLB; - vmx_vcpu_get_rr(vcpu, ifa,(UINT64 *)&vrr); - data.rid = vrr.rid; - sections.tr = 1; - sections.tc = 0; - - ovl = vtr_find_overlap(hcb, &data, DSIDE_TLB); - if (ovl) { - // generate MCA. - panic("Tlb conflict!!"); - return IA64_FAULT; - } - thash_purge_and_insert(hcb, &data, ifa); - return IA64_NO_FAULT; -} - -/* - * Return TRUE/FALSE for success of lock operation - */ - -/* -int vmx_lock_guest_dtc (VCPU *vcpu, UINT64 va, int lock) -{ - - thash_cb_t *hcb; - ia64_rr vrr; - u64 preferred_size; - - vmx_vcpu_get_rr(vcpu, va, &vrr); - hcb = vmx_vcpu_get_vtlb(vcpu); - va = PAGEALIGN(va,vrr.ps); - preferred_size = PSIZE(vrr.ps); - return thash_lock_tc(hcb, va, preferred_size, vrr.rid, DSIDE_TLB, lock); -} - */ - - - -IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx) -{ - - thash_data_t data, *ovl; - thash_cb_t *hcb; - search_section_t sections; - ia64_rr vrr; - /* u64 mfn,psr; */ - - hcb = vmx_vcpu_get_vtlb(vcpu); - data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; - data.itir=itir; - data.vadr=PAGEALIGN(ifa,data.ps); - data.tc = 0; - data.cl=ISIDE_TLB; - vmx_vcpu_get_rr(vcpu, ifa, (UINT64 *)&vrr); - data.rid = vrr.rid; - sections.tr = 1; - sections.tc = 0; - - - ovl = vtr_find_overlap(hcb, &data, ISIDE_TLB); - if (ovl) { + int slot; + u64 ps, va, gpfn; + thash_cb_t *hcb; + + ps = itir_ps(itir); + va = PAGEALIGN(ifa, ps); + slot = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB); + if (slot >=0) { // generate MCA. panic("Tlb conflict!!"); return IA64_FAULT; } - sections.tr = 0; - sections.tc = 1; - thash_purge_entries(hcb, &data, sections); -/* if((idx==IA64_TR_KERNEL)&&(data.ps == KERNEL_TR_PAGE_SHIFT)){ - data.contiguous=1; - } - */ - thash_tr_insert(hcb, &data, ifa, idx); -/* - if((idx==IA64_TR_KERNEL)&&(data.ps == KERNEL_TR_PAGE_SHIFT)){ - mfn = __gpfn_to_mfn_foreign(vcpu->domain,arch_to_xen_ppn(data.ppn)); - data.page_flags=pte&~PAGE_FLAGS_RV_MASK; - data.ppn = xen_to_arch_ppn(mfn); - psr = ia64_clear_ic(); - ia64_itr(0x1, IA64_ITR_GUEST_KERNEL, data.vadr, data.page_flags, data.ps); - ia64_set_psr(psr); // restore psr - ia64_srlz_i(); -// return IA64_NO_FAULT; - } -*/ - return IA64_NO_FAULT; -} - -IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx) -{ - - thash_data_t data, *ovl; - thash_cb_t *hcb; - search_section_t sections; - ia64_rr vrr; - /* u64 mfn,psr; */ - - hcb = vmx_vcpu_get_vtlb(vcpu); - data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; - data.itir=itir; - data.vadr=PAGEALIGN(ifa,data.ps); - data.tc = 0; - data.cl=DSIDE_TLB; - vmx_vcpu_get_rr(vcpu, ifa,(UINT64 *)&vrr); - data.rid = vrr.rid; - sections.tr = 1; - sections.tc = 0; - - ovl = vtr_find_overlap(hcb, &data, DSIDE_TLB); - while (ovl) { + hcb = vmx_vcpu_get_vtlb(vcpu); + gpfn = (pte & _PAGE_PPN_MASK)>> PAGE_SHIFT; + if(__gpfn_is_io(vcpu->domain,gpfn)) + pte |= VTLB_PTE_IO; + thash_purge_and_insert(hcb, pte, itir, ifa); + return IA64_NO_FAULT; + +} + + + + +IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) +{ + int index; + u64 ps, va, rid; + thash_cb_t *hcb; + + ps = itir_ps(itir); + va = PAGEALIGN(ifa, ps); + index = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB); + if (index >=0) { // generate MCA. panic("Tlb conflict!!"); return IA64_FAULT; } - sections.tr = 0; - sections.tc = 1; - thash_purge_entries(hcb, &data, sections); -/* - if((idx==IA64_TR_KERNEL)&&(data.ps == KERNEL_TR_PAGE_SHIFT)){ - data.contiguous=1; - } - */ - thash_tr_insert(hcb, &data, ifa, idx); -/* - if((idx==IA64_TR_KERNEL)&&(data.ps == KERNEL_TR_PAGE_SHIFT)){ - mfn = __gpfn_to_mfn_foreign(vcpu->domain,arch_to_xen_ppn(data.ppn)); - data.page_flags=pte&~PAGE_FLAGS_RV_MASK; - data.ppn = xen_to_arch_ppn(mfn); - psr = ia64_clear_ic(); - ia64_itr(0x2,IA64_DTR_GUEST_KERNEL , data.vadr, data.page_flags, data.ps); - ia64_set_psr(psr); // restore psr - ia64_srlz_i(); -// return IA64_NO_FAULT; - } -*/ - - return IA64_NO_FAULT; -} - - - -IA64FAULT vmx_vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 ps) -{ - thash_cb_t *hcb; - ia64_rr rr; - search_section_t sections; - - hcb = vmx_vcpu_get_vtlb(vcpu); - rr=vmx_vcpu_rr(vcpu,vadr); - sections.tr = 1; - sections.tc = 1; - thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,DSIDE_TLB); - return IA64_NO_FAULT; -} - -IA64FAULT vmx_vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 ps) -{ - thash_cb_t *hcb; - ia64_rr rr; - search_section_t sections; - hcb = vmx_vcpu_get_vtlb(vcpu); - rr=vmx_vcpu_rr(vcpu,vadr); - sections.tr = 1; - sections.tc = 1; - thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,ISIDE_TLB); - return IA64_NO_FAULT; -} - -IA64FAULT vmx_vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 ps) -{ - thash_cb_t *hcb; - ia64_rr vrr; - search_section_t sections; - hcb = vmx_vcpu_get_vtlb(vcpu); - vrr=vmx_vcpu_rr(vcpu,vadr); - sections.tr = 0; - sections.tc = 1; - vadr = PAGEALIGN(vadr, ps); - - thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,DSIDE_TLB); - thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,ISIDE_TLB); - return IA64_NO_FAULT; -} - - -IA64FAULT vmx_vcpu_ptc_e(VCPU *vcpu, UINT64 vadr) + hcb = vmx_vcpu_get_vtlb(vcpu); + thash_purge_entries(hcb, va, ps); + vcpu_get_rr(vcpu, va, &rid); + rid = rid& RR_RID_MASK; + vmx_vcpu_set_tr((thash_data_t *)&vcpu->arch.itrs[slot], pte, itir, va, rid); + vcpu_quick_region_set(PSCBX(vcpu,itr_regions),va); + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) +{ + int index; + u64 ps, va, gpfn, rid; + thash_cb_t *hcb; + + ps = itir_ps(itir); + va = PAGEALIGN(ifa, ps); + index = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB); + if (index>=0) { + // generate MCA. + panic("Tlb conflict!!"); + return IA64_FAULT; + } + hcb = vmx_vcpu_get_vtlb(vcpu); + thash_purge_entries(hcb, va, ps); + gpfn = (pte & _PAGE_PPN_MASK)>> PAGE_SHIFT; + if(__gpfn_is_io(vcpu->domain,gpfn)) + pte |= VTLB_PTE_IO; + vcpu_get_rr(vcpu, va, &rid); + rid = rid& RR_RID_MASK; + vmx_vcpu_set_tr((thash_data_t *)&vcpu->arch.dtrs[slot], pte, itir, va, rid); + vcpu_quick_region_set(PSCBX(vcpu,dtr_regions),va); + return IA64_NO_FAULT; +} + + + +IA64FAULT vmx_vcpu_ptr_d(VCPU *vcpu,UINT64 ifa,UINT64 ps) +{ + int index; + u64 va; + thash_cb_t *hcb; + + va = PAGEALIGN(ifa, ps); + index = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB); + if (index>=0) { + vcpu->arch.dtrs[index].p=0; + index = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB); + } + hcb = vmx_vcpu_get_vtlb(vcpu); + thash_purge_entries(hcb, va, ps); + return IA64_NO_FAULT; +} + +IA64FAULT vmx_vcpu_ptr_i(VCPU *vcpu,UINT64 ifa,UINT64 ps) +{ + int index; + u64 va; + thash_cb_t *hcb; + + va = PAGEALIGN(ifa, ps); + index = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB); + if (index>=0) { + vcpu->arch.itrs[index].p=0; + index = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB); + } + hcb = vmx_vcpu_get_vtlb(vcpu); + thash_purge_entries(hcb, va, ps); + return IA64_NO_FAULT; +} + +IA64FAULT vmx_vcpu_ptc_l(VCPU *vcpu, UINT64 va, UINT64 ps) +{ + thash_cb_t *hcb; + va = PAGEALIGN(va, ps); + hcb = vmx_vcpu_get_vtlb(vcpu); + thash_purge_entries(hcb, va, ps); + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_ptc_e(VCPU *vcpu, UINT64 va) { thash_cb_t *hcb; hcb = vmx_vcpu_get_vtlb(vcpu); @@ -625,15 +488,15 @@ return IA64_NO_FAULT; } -IA64FAULT vmx_vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 ps) -{ - vmx_vcpu_ptc_l(vcpu, vadr, ps); +IA64FAULT vmx_vcpu_ptc_g(VCPU *vcpu, UINT64 va, UINT64 ps) +{ + vmx_vcpu_ptc_l(vcpu, va, ps); return IA64_ILLOP_FAULT; } -IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 ps) -{ - vmx_vcpu_ptc_l(vcpu, vadr, ps); +IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UINT64 va,UINT64 ps) +{ + vmx_vcpu_ptc_l(vcpu, va, ps); return IA64_NO_FAULT; } @@ -644,7 +507,7 @@ ia64_rr vrr; u64 vhpt_offset; vmx_vcpu_get_pta(vcpu, &vpta.val); - vrr=vmx_vcpu_rr(vcpu, vadr); + vcpu_get_rr(vcpu, vadr, &vrr.rrval); if(vpta.vf){ panic("THASH,Don't support long format VHPT"); *pval = ia64_call_vsa(PAL_VPS_THASH,vadr,vrr.rrval,vpta.val,0,0,0,0); @@ -663,7 +526,7 @@ ia64_rr vrr; PTA vpta; vmx_vcpu_get_pta(vcpu, &vpta.val); - vrr=vmx_vcpu_rr(vcpu, vadr); + vcpu_get_rr(vcpu, vadr, &vrr.rrval); if(vpta.vf){ panic("THASH,Don't support long format VHPT"); *pval = ia64_call_vsa(PAL_VPS_TTAG,vadr,vrr.rrval,0,0,0,0,0); @@ -679,13 +542,11 @@ { thash_data_t *data; thash_cb_t *hcb; - ia64_rr vrr; ISR visr,pt_isr; REGS *regs; u64 vhpt_adr; IA64_PSR vpsr; hcb = vmx_vcpu_get_vtlb(vcpu); - vrr=vmx_vcpu_rr(vcpu,vadr); regs=vcpu_regs(vcpu); pt_isr.val=VMX(vcpu,cr_isr); visr.val=0; @@ -696,7 +557,7 @@ visr.ni=1; } visr.na=1; - data = vtlb_lookup_ex(hcb, vrr.rid, vadr, DSIDE_TLB); + data = vtlb_lookup(hcb, vadr, DSIDE_TLB); if(data){ if(data->p==0){ visr.na=1; @@ -744,8 +605,7 @@ } else{ vmx_vcpu_thash(vcpu, vadr, &vhpt_adr); - vrr=vmx_vcpu_rr(vcpu,vhpt_adr); - data = vtlb_lookup_ex(hcb, vrr.rid, vhpt_adr, DSIDE_TLB); + data = vtlb_lookup(hcb, vhpt_adr, DSIDE_TLB); if(data){ if(vpsr.ic){ vcpu_set_isr(vcpu, visr.val); @@ -776,7 +636,6 @@ { thash_data_t *data; thash_cb_t *hcb; - ia64_rr rr; PTA vpta; vmx_vcpu_get_pta(vcpu, &vpta.val); if(vpta.vf==0 || unimplemented_gva(vcpu, vadr)){ @@ -784,8 +643,7 @@ return IA64_NO_FAULT; } hcb = vmx_vcpu_get_vtlb(vcpu); - rr=vmx_vcpu_rr(vcpu,vadr); - data = vtlb_lookup_ex(hcb, rr.rid, vadr, DSIDE_TLB); + data = vtlb_lookup(hcb, vadr, DSIDE_TLB); if(!data||!data->p){ *key=1; }else{ @@ -821,11 +679,9 @@ unsigned long end; /* end of the area mapped by current entry */ thash_data_t *entry; struct vcpu *v = current; - ia64_rr vrr; vtlb = vmx_vcpu_get_vtlb(v); - vrr = vmx_vcpu_rr(v, va); - entry = vtlb_lookup_ex(vtlb, vrr.rid, va, DSIDE_TLB); + entry = vtlb_lookup(vtlb, va, DSIDE_TLB); if (entry == NULL) return -EFAULT; diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/vmx/vmx_hypercall.c --- a/xen/arch/ia64/vmx/vmx_hypercall.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/vmx/vmx_hypercall.c Mon Mar 20 08:56:54 2006 @@ -35,6 +35,9 @@ #include <asm/dom_fw.h> #include <xen/domain.h> +extern long do_sched_op(int cmd, unsigned long arg); +extern unsigned long domain_mpa_to_imva(struct domain *,unsigned long mpaddr); + void hyper_not_support(void) { VCPU *vcpu=current; @@ -123,7 +126,7 @@ vcpu_set_gr(vcpu, 8, ret, 0); vmx_vcpu_increment_iip(vcpu); } - +/* static int do_lock_page(VCPU *vcpu, u64 va, u64 lock) { ia64_rr rr; @@ -132,7 +135,7 @@ rr = vmx_vcpu_rr(vcpu, va); return thash_lock_tc(hcb, va ,1U<<rr.ps, rr.rid, DSIDE_TLB, lock); } - + */ /* * Lock guest page in vTLB, so that it's not relinquished by recycle * session when HV is servicing that hypercall. diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/vmx/vmx_init.c --- a/xen/arch/ia64/vmx/vmx_init.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/vmx/vmx_init.c Mon Mar 20 08:56:54 2006 @@ -50,6 +50,8 @@ #include <public/arch-ia64.h> #include <asm/hvm/vioapic.h> #include <public/event_channel.h> +#include <xen/event.h> +#include <asm/vlsapic.h> /* Global flag to identify whether Intel vmx feature is on */ u32 vmx_enabled = 0; @@ -96,7 +98,7 @@ if (!(vp_env_info & VP_OPCODE)) printk("WARNING: no opcode provided from hardware(%lx)!!!\n", vp_env_info); vm_order = get_order(buffer_size); - printk("vm buffer size: %ld, order: %ld\n", buffer_size, vm_order); + printk("vm buffer size: %ld, order: %d\n", buffer_size, vm_order); vmx_enabled = 1; no_vti: @@ -161,7 +163,7 @@ return NULL; } - printk("vpd base: 0x%lx, vpd size:%d\n", vpd, sizeof(vpd_t)); + printk("vpd base: 0x%p, vpd size:%ld\n", vpd, sizeof(vpd_t)); memset(vpd, 0, VPD_SIZE); /* CPUID init */ for (i = 0; i < 5; i++) @@ -234,7 +236,7 @@ { u64 status; - status = ia64_pal_vp_restore(v->arch.privregs, 0); + status = ia64_pal_vp_restore((u64 *)v->arch.privregs, 0); if (status != PAL_STATUS_SUCCESS) panic("Restore vp status failed\n"); @@ -307,7 +309,6 @@ int vmx_alloc_contig_pages(struct domain *d) { - unsigned int order; unsigned long i, j, start,tmp, end, pgnr, conf_nr; struct page_info *page; struct vcpu *v = d->vcpu[0]; diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/vmx/vmx_interrupt.c --- a/xen/arch/ia64/vmx/vmx_interrupt.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/vmx/vmx_interrupt.c Mon Mar 20 08:56:54 2006 @@ -334,12 +334,13 @@ * @ Nat Consumption Vector * Refer to SDM Vol2 Table 5-6 & 8-1 */ - +#if 0 static void ir_nat_page_consumption (VCPU *vcpu, u64 vadr) { _nat_consumption_fault(vcpu, vadr, DATA); } +#endif //shadow it due to no use currently /* * Instruction Nat Page Consumption Fault diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/vmx/vmx_irq_ia64.c --- a/xen/arch/ia64/vmx/vmx_irq_ia64.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/vmx/vmx_irq_ia64.c Mon Mar 20 08:56:54 2006 @@ -128,6 +128,6 @@ * come through until ia64_eoi() has been done. */ vmx_irq_exit(); - if (current && wake_dom0 != dom0 ) + if (wake_dom0 && current->domain != dom0 ) vcpu_wake(dom0->vcpu[0]); } diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/vmx/vmx_phy_mode.c --- a/xen/arch/ia64/vmx/vmx_phy_mode.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/vmx/vmx_phy_mode.c Mon Mar 20 08:56:54 2006 @@ -104,57 +104,51 @@ vcpu->arch.mode_flags = GUEST_IN_PHY; } -extern u64 get_mfn(struct domain *d, u64 gpfn); extern void vmx_switch_rr7(unsigned long ,shared_info_t*,void *,void *,void *); -void -physical_itlb_miss_dom0(VCPU *vcpu, u64 vadr) +/*void +physical_itlb_miss(VCPU *vcpu, u64 vadr) { u64 psr; IA64_PSR vpsr; - u64 mppn,gppn; + u64 xen_mppn,xen_gppn; vpsr.val=vmx_vcpu_get_psr(vcpu); - gppn=(vadr<<1)>>13; - mppn = get_mfn(vcpu->domain,gppn); - mppn=(mppn<<12)|(vpsr.cpl<<7); -// if(vadr>>63) -// mppn |= PHY_PAGE_UC; -// else - mppn |= PHY_PAGE_WB; + xen_gppn=(vadr<<1)>>(PAGE_SHIFT+1); + xen_mppn = gmfn_to_mfn(vcpu->domain, xen_gppn); + xen_mppn=(xen_mppn<<PAGE_SHIFT)|(vpsr.cpl<<7); + if(vadr>>63) + xen_mppn |= PHY_PAGE_UC; + else + xen_mppn |= PHY_PAGE_WB; psr=ia64_clear_ic(); - ia64_itc(1,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT); + ia64_itc(1,vadr&PAGE_MASK,xen_mppn,PAGE_SHIFT); ia64_set_psr(psr); ia64_srlz_i(); return; } - -void -physical_itlb_miss(VCPU *vcpu, u64 vadr) -{ - physical_itlb_miss_dom0(vcpu, vadr); -} - - -void -physical_dtlb_miss(VCPU *vcpu, u64 vadr) +*/ +/* + * vec=1, itlb miss + * vec=2, dtlb miss + */ +void +physical_tlb_miss(VCPU *vcpu, u64 vadr, u64 vec) { u64 psr; IA64_PSR vpsr; - u64 mppn,gppn; -// if(vcpu->domain!=dom0) -// panic("dom n physical dtlb miss happen\n"); + u64 xen_mppn,xen_gppn; vpsr.val=vmx_vcpu_get_psr(vcpu); - gppn=(vadr<<1)>>13; - mppn = get_mfn(vcpu->domain, gppn); - mppn=(mppn<<12)|(vpsr.cpl<<7); + xen_gppn=(vadr<<1)>>(PAGE_SHIFT+1); + xen_mppn = gmfn_to_mfn(vcpu->domain, xen_gppn); + xen_mppn=(xen_mppn<<PAGE_SHIFT)|(vpsr.cpl<<7); if(vadr>>63) - mppn |= PHY_PAGE_UC; + xen_mppn |= PHY_PAGE_UC; else - mppn |= PHY_PAGE_WB; + xen_mppn |= PHY_PAGE_WB; psr=ia64_clear_ic(); - ia64_itc(2,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT); + ia64_itc(vec,vadr&PAGE_MASK,xen_mppn,PAGE_SHIFT); ia64_set_psr(psr); ia64_srlz_i(); return; @@ -193,13 +187,13 @@ if (is_physical_mode(vcpu)) { if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) panic("Unexpected domain switch in phy emul\n"); - phy_rr.rrval = vcpu->domain->arch.metaphysical_rr0; - phy_rr.ps = EMUL_PHY_PAGE_SHIFT; + phy_rr.rrval = vcpu->arch.metaphysical_rr0; + // phy_rr.ps = PAGE_SHIFT; phy_rr.ve = 1; ia64_set_rr((VRN0 << VRN_SHIFT), phy_rr.rrval); - phy_rr.rrval = vcpu->domain->arch.metaphysical_rr4; - phy_rr.ps = EMUL_PHY_PAGE_SHIFT; + phy_rr.rrval = vcpu->arch.metaphysical_rr4; +// phy_rr.ps = PAGE_SHIFT; phy_rr.ve = 1; ia64_set_rr((VRN4 << VRN_SHIFT), phy_rr.rrval); @@ -224,7 +218,7 @@ extern void * pal_vaddr; vmx_switch_rr7(vmx_vrrtomrr(vcpu,VMX(vcpu, vrr[VRN7])),(void *)vcpu->domain->shared_info, (void *)vcpu->arch.privregs, - ( void *)vcpu->arch.vtlb->ts->vhpt->hash, pal_vaddr ); + (void *)vcpu->arch.vtlb->vhpt->hash, pal_vaddr ); ia64_set_pta(vcpu->arch.arch_vmx.mpta); ia64_srlz_d(); @@ -242,12 +236,12 @@ /* Save original virtual mode rr[0] and rr[4] */ psr=ia64_clear_ic(); phy_rr.rrval = vcpu->domain->arch.metaphysical_rr0; - phy_rr.ps = EMUL_PHY_PAGE_SHIFT; +// phy_rr.ps = EMUL_PHY_PAGE_SHIFT; phy_rr.ve = 1; ia64_set_rr(VRN0<<VRN_SHIFT, phy_rr.rrval); ia64_srlz_d(); phy_rr.rrval = vcpu->domain->arch.metaphysical_rr4; - phy_rr.ps = EMUL_PHY_PAGE_SHIFT; +// phy_rr.ps = EMUL_PHY_PAGE_SHIFT; phy_rr.ve = 1; ia64_set_rr(VRN4<<VRN_SHIFT, phy_rr.rrval); ia64_srlz_d(); @@ -266,10 +260,10 @@ psr=ia64_clear_ic(); - mrr=vmx_vcpu_rr(vcpu,VRN0<<VRN_SHIFT); + vcpu_get_rr(vcpu,VRN0<<VRN_SHIFT,&mrr.rrval); ia64_set_rr(VRN0<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval)); ia64_srlz_d(); - mrr=vmx_vcpu_rr(vcpu,VRN4<<VRN_SHIFT); + vcpu_get_rr(vcpu,VRN4<<VRN_SHIFT,&mrr.rrval); ia64_set_rr(VRN4<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval)); ia64_srlz_d(); ia64_set_psr(psr); diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/vmx/vmx_process.c --- a/xen/arch/ia64/vmx/vmx_process.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/vmx/vmx_process.c Mon Mar 20 08:56:54 2006 @@ -58,6 +58,11 @@ extern void die_if_kernel(char *str, struct pt_regs *regs, long err); extern void rnat_consumption (VCPU *vcpu); +extern unsigned long translate_domain_mpaddr(unsigned long mpaddr); +extern void alt_itlb (VCPU *vcpu, u64 vadr); +extern void itlb_fault (VCPU *vcpu, u64 vadr); +extern void ivhpt_fault (VCPU *vcpu, u64 vadr); + #define DOMN_PAL_REQUEST 0x110000 static UINT64 vec2off[68] = {0x0,0x400,0x800,0xc00,0x1000, 0x1400,0x1800, @@ -292,10 +297,9 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* regs) { IA64_PSR vpsr; - CACHE_LINE_TYPE type=ISIDE_TLB; + int type=ISIDE_TLB; u64 vhpt_adr, gppa; ISR misr; - ia64_rr vrr; // REGS *regs; thash_cb_t *vtlb; thash_data_t *data; @@ -315,34 +319,32 @@ return; } */ - if(vadr == 0x1ea18c00 ){ +/* if(vadr == 0x1ea18c00 ){ ia64_clear_ic(); while(1); } + */ if(is_physical_mode(v)&&(!(vadr<<1>>62))){ - if(vec==1){ - physical_itlb_miss(v, vadr); - return IA64_FAULT; - } if(vec==2){ if(v->domain!=dom0&&__gpfn_is_io(v->domain,(vadr<<1)>>(PAGE_SHIFT+1))){ emulate_io_inst(v,((vadr<<1)>>1),4); // UC - }else{ - physical_dtlb_miss(v, vadr); + return IA64_FAULT; } - return IA64_FAULT; } - } - vrr = vmx_vcpu_rr(v, vadr); + physical_tlb_miss(v, vadr, vec); + return IA64_FAULT; + } if(vec == 1) type = ISIDE_TLB; else if(vec == 2) type = DSIDE_TLB; else panic("wrong vec\n"); // prepare_if_physical_mode(v); - if((data=vtlb_lookup_ex(vtlb, vrr.rid, vadr,type))!=0){ - gppa = (vadr&((1UL<<data->ps)-1))+(data->ppn>>(data->ps-12)<<data->ps); - if(v->domain!=dom0&&type==DSIDE_TLB && __gpfn_is_io(v->domain,gppa>>PAGE_SHIFT)){ + if((data=vtlb_lookup(vtlb, vadr,type))!=0){ +// gppa = (vadr&((1UL<<data->ps)-1))+(data->ppn>>(data->ps-12)<<data->ps); +// if(v->domain!=dom0&&type==DSIDE_TLB && __gpfn_is_io(v->domain,gppa>>PAGE_SHIFT)){ + if(v->domain!=dom0 && data->io && type==DSIDE_TLB ){ + gppa = (vadr&((1UL<<data->ps)-1))+(data->ppn>>(data->ps-12)<<data->ps); emulate_io_inst(v, gppa, data->ma); return IA64_FAULT; } @@ -356,7 +358,7 @@ } else{ */ - thash_vhpt_insert(vtlb->ts->vhpt,data,vadr); + thash_vhpt_insert(vtlb->vhpt,data->page_flags, data->itir ,vadr); // } // } }else if(type == DSIDE_TLB){ @@ -377,8 +379,7 @@ } } else{ vmx_vcpu_thash(v, vadr, &vhpt_adr); - vrr=vmx_vcpu_rr(v,vhpt_adr); - if(vhpt_lookup(vhpt_adr) || vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB)){ + if(vhpt_lookup(vhpt_adr) || vtlb_lookup(vtlb, vhpt_adr, DSIDE_TLB)){ if(vpsr.ic){ vcpu_set_isr(v, misr.val); dtlb_fault(v, vadr); @@ -420,8 +421,7 @@ return IA64_FAULT; } else{ vmx_vcpu_thash(v, vadr, &vhpt_adr); - vrr=vmx_vcpu_rr(v,vhpt_adr); - if(vhpt_lookup(vhpt_adr) || vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB)){ + if(vhpt_lookup(vhpt_adr) || vtlb_lookup(vtlb, vhpt_adr, DSIDE_TLB)){ if(!vpsr.ic){ misr.ni=1; } diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/vmx/vmx_vcpu.c --- a/xen/arch/ia64/vmx/vmx_vcpu.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/vmx/vmx_vcpu.c Mon Mar 20 08:56:54 2006 @@ -204,32 +204,24 @@ } -ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr) -{ - return (ia64_rr)VMX(vcpu,vrr[vadr>>61]); -} - IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val) { ia64_rr oldrr,newrr; thash_cb_t *hcb; extern void * pal_vaddr; - oldrr=vmx_vcpu_rr(vcpu,reg); + vcpu_get_rr(vcpu, reg, &oldrr.rrval); newrr.rrval=val; -#if 1 if(oldrr.ps!=newrr.ps){ hcb = vmx_vcpu_get_vtlb(vcpu); thash_purge_all(hcb); } -#endif VMX(vcpu,vrr[reg>>61]) = val; - switch((u64)(reg>>61)) { case VRN7: - vmx_switch_rr7(vmx_vrrtomrr(vcpu,val),vcpu->domain->shared_info, + vmx_switch_rr7(vmx_vrrtomrr(vcpu,val),vcpu->domain->shared_info, (void *)vcpu->arch.privregs, - ( void *)vcpu->arch.vtlb->ts->vhpt->hash, pal_vaddr ); + (void *)vcpu->arch.vtlb->vhpt->hash, pal_vaddr ); break; default: ia64_set_rr(reg,vmx_vrrtomrr(vcpu,val)); @@ -275,7 +267,7 @@ u64 vmx_vcpu_get_itir_on_fault(VCPU *vcpu, u64 ifa) { ia64_rr rr,rr1; - rr=vmx_vcpu_rr(vcpu,ifa); + vcpu_get_rr(vcpu,ifa,&rr.rrval); rr1.rrval=0; rr1.ps=rr.ps; rr1.rid=rr.rid; diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/vmx/vmx_virt.c --- a/xen/arch/ia64/vmx/vmx_virt.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/vmx/vmx_virt.c Mon Mar 20 08:56:54 2006 @@ -34,6 +34,7 @@ #include <asm/virt_event.h> #include <asm/vmx_phy_mode.h> extern UINT64 privop_trace; +extern void vhpi_detection(VCPU *vcpu);//temporarily place here,need a header file. void ia64_priv_decoder(IA64_SLOT_TYPE slot_type, INST64 inst, UINT64 * cause) @@ -572,7 +573,7 @@ } #endif // VMAL_NO_FAULT_CHECK - return (vmx_vcpu_itr_d(vcpu,pte,itir,ifa,slot)); + return (vmx_vcpu_itr_d(vcpu,slot,pte,itir,ifa)); } IA64FAULT vmx_emul_itr_i(VCPU *vcpu, INST64 inst) @@ -631,7 +632,7 @@ } #endif // VMAL_NO_FAULT_CHECK - return (vmx_vcpu_itr_i(vcpu,pte,itir,ifa,slot)); + return (vmx_vcpu_itr_i(vcpu,slot,pte,itir,ifa)); } IA64FAULT itc_fault_check(VCPU *vcpu, INST64 inst, u64 *itir, u64 *ifa,u64 *pte) @@ -972,7 +973,7 @@ rsv_reg_field(vcpu); } #endif //CHECK_FAULT - vmx_vcpu_get_rr(vcpu,r3,&r1); + vcpu_get_rr(vcpu,r3,&r1); return vcpu_set_gr(vcpu, inst.M43.r1, r1,0); } diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/vmx/vtlb.c --- a/xen/arch/ia64/vmx/vtlb.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/vmx/vtlb.c Mon Mar 20 08:56:54 2006 @@ -32,7 +32,7 @@ #include <asm/tlbflush.h> #define MAX_CCH_LENGTH 40 -thash_data_t *__alloc_chain(thash_cb_t *, thash_data_t *); +thash_data_t *__alloc_chain(thash_cb_t *); static void cch_mem_init(thash_cb_t *hcb) { @@ -71,126 +71,31 @@ * Check to see if the address rid:va is translated by the TLB */ -static int __is_tr_translated(thash_data_t *tlb, u64 rid, u64 va, CACHE_LINE_TYPE cl) -{ - u64 size; - size = PSIZE(tlb->ps); - if(tlb->vadr&(size-1)) - while(1); - if ((tlb->rid == rid) && ((va-tlb->vadr)<size)) - return 1; - else +static inline int __is_tr_translated(thash_data_t *trp, u64 rid, u64 va) +{ + return ((trp->p) && (trp->rid == rid) && ((va-trp->vadr)<PSIZE(trp->ps))); +} + +/* + * Only for GUEST TR format. + */ +static int +__is_tr_overlap(thash_data_t *trp, u64 rid, u64 sva, u64 eva) +{ + uint64_t sa1, ea1; + + if (!trp->p || trp->rid != rid ) { return 0; -} - -/* - * Only for GUEST TR format. - */ -static int -__is_tr_overlap(thash_cb_t *hcb,thash_data_t *entry,int rid, char cl, u64 sva, u64 eva) -{ - uint64_t size, sa1, ea1; - -// if ( entry->invalid || entry->rid != rid || (entry->cl != cl ) ) { - if ( entry->invalid || entry->rid != rid ) { - return 0; - } - size = PSIZE(entry->ps); - sa1 = entry->vadr; - ea1 = sa1 + size -1; + } + sa1 = trp->vadr; + ea1 = sa1 + PSIZE(trp->ps) -1; eva -= 1; - if(sa1&(size-1)) - while(1); if ( (sva>ea1) || (sa1>eva) ) return 0; else return 1; } - -static void __rem_tr (thash_cb_t *hcb, thash_data_t *tr) -{ -/* - if ( hcb->remove_notifier ) { - (hcb->remove_notifier)(hcb,tr); - } -*/ - tr->invalid = 1; -} - -static inline void __set_tr (thash_data_t *tr, thash_data_t *data, int idx) -{ - *tr = *data; - tr->tr_idx = idx; -} - - -static void __init_tr(thash_cb_t *hcb) -{ - int i; - thash_data_t *tr; - - for ( i=0, tr = &ITR(hcb,0); i<NITRS; i++ ) { - tr[i].invalid = 1; - } - for ( i=0, tr = &DTR(hcb,0); i<NDTRS; i++ ) { - tr[i].invalid = 1; - } -} - -/* - * Replace TR entry. - */ -static void rep_tr(thash_cb_t *hcb,thash_data_t *insert, int idx) -{ - thash_data_t *tr; - - if ( insert->cl == ISIDE_TLB ) { - tr = &ITR(hcb,idx); - } - else { - tr = &DTR(hcb,idx); - } - if ( !INVALID_TR(tr) ) { - __rem_tr(hcb, tr); - } - __set_tr (tr, insert, idx); -} - -/* - * remove TR entry. - */ -/* -static void rem_tr(thash_cb_t *hcb,CACHE_LINE_TYPE cl, int idx) -{ - thash_data_t *tr; - - if ( cl == ISIDE_TLB ) { - tr = &ITR(hcb,idx); - } - else { - tr = &DTR(hcb,idx); - } - if ( !INVALID_TR(tr) ) { - __rem_tr(hcb, tr); - } -} - */ -/* - * Delete an thash entry in collision chain. - * prev: the previous entry. - * rem: the removed entry. - */ -/* -static void __rem_chain(thash_cb_t *hcb, thash_data_t *prev, thash_data_t *rem) -{ - //prev->next = rem->next; - if ( hcb->remove_notifier ) { - (hcb->remove_notifier)(hcb,rem); - } - cch_free (hcb, rem); -} - */ /* * Delete an thash entry leading collision chain. @@ -212,69 +117,35 @@ } } -thash_data_t *__vtr_lookup(thash_cb_t *hcb, - u64 rid, u64 va, - CACHE_LINE_TYPE cl) -{ - thash_data_t *tr; - int num,i; - - if ( cl == ISIDE_TLB ) { - tr = &ITR(hcb,0); - num = NITRS; +thash_data_t *__vtr_lookup(VCPU *vcpu, u64 va, int is_data) +{ + + thash_data_t *trp; + int i; + u64 rid; + vcpu_get_rr(vcpu, va, &rid); + rid = rid&RR_RID_MASK;; + if (is_data) { + if (vcpu_quick_region_check(vcpu->arch.dtr_regions,va)) { + for (trp =(thash_data_t *) vcpu->arch.dtrs,i=0; i<NDTRS; i++, trp++) { + if (__is_tr_translated(trp, rid, va)) { + return trp; + } + } + } } else { - tr = &DTR(hcb,0); - num = NDTRS; - } - for ( i=0; i<num; i++ ) { - if ( !INVALID_TR(&tr[i]) && - __is_tr_translated(&tr[i], rid, va, cl) ) - return &tr[i]; + if (vcpu_quick_region_check(vcpu->arch.itr_regions,va)) { + for (trp =(thash_data_t *) vcpu->arch.itrs,i=0; i<NITRS; i++, trp++) { + if (__is_tr_translated(trp, rid, va)) { + return trp; + } + } + } } return NULL; } - -/* - * Find overlap VHPT entry within current collision chain - * base on internal priv info. - */ -/* -static inline thash_data_t* _vhpt_next_overlap_in_chain(thash_cb_t *hcb) -{ - thash_data_t *cch; - thash_internal_t *priv = &hcb->priv; - - - for (cch=priv->cur_cch; cch; cch = cch->next) { - if ( priv->tag == cch->etag ) { - return cch; - } - } - return NULL; -} -*/ -/* - * Find overlap TLB/VHPT entry within current collision chain - * base on internal priv info. - */ -/* -static thash_data_t *_vtlb_next_overlap_in_chain(thash_cb_t *hcb) -{ - thash_data_t *cch; - thash_internal_t *priv = &hcb->priv; - - // Find overlap TLB entry - for (cch=priv->cur_cch; cch; cch = cch->next) { - if ( ( cch->tc ? priv->s_sect.tc : priv->s_sect.tr ) && - __is_translated( cch, priv->rid, priv->_curva, priv->cl)) { - return cch; - } - } - return NULL; -} - */ /* * Get the machine format of VHPT entry. @@ -292,24 +163,16 @@ * 0/1: means successful or fail. * */ -int __tlb_to_vhpt(thash_cb_t *hcb, - thash_data_t *tlb, u64 va, - thash_data_t *vhpt) +int __tlb_to_vhpt(thash_cb_t *hcb, thash_data_t *vhpt, u64 va) { u64 padr,pte; -// ia64_rr vrr; ASSERT ( hcb->ht == THASH_VHPT ); -// vrr = (hcb->get_rr_fn)(hcb->vcpu,va); - padr = tlb->ppn >>(tlb->ps-ARCH_PAGE_SHIFT)<<tlb->ps; - padr += va&((1UL<<tlb->ps)-1); + padr = vhpt->ppn >>(vhpt->ps-ARCH_PAGE_SHIFT)<<vhpt->ps; + padr += va&((1UL<<vhpt->ps)-1); pte=lookup_domain_mpa(current->domain,padr); if((pte>>56)) return 0; - // TODO with machine discontinuous address space issue. vhpt->etag = ia64_ttag(va); - //vhpt->ti = 0; - vhpt->itir = tlb->itir & ~ITIR_RV_MASK; - vhpt->page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK; vhpt->ps = PAGE_SHIFT; vhpt->ppn = (pte&((1UL<<IA64_MAX_PHYS_BITS)-(1UL<<PAGE_SHIFT)))>>ARCH_PAGE_SHIFT; vhpt->next = 0; @@ -331,17 +194,20 @@ /* vhpt only has entries with PAGE_SIZE page size */ -void thash_vhpt_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va) +void thash_vhpt_insert(thash_cb_t *hcb, u64 pte, u64 itir, u64 ifa) { thash_data_t vhpt_entry, *hash_table, *cch; + vhpt_entry.page_flags = pte & ~PAGE_FLAGS_RV_MASK; + vhpt_entry.itir=itir; + // ia64_rr vrr; - if ( !__tlb_to_vhpt(hcb, entry, va, &vhpt_entry) ) { + if ( !__tlb_to_vhpt(hcb, &vhpt_entry, ifa) ) { return; //panic("Can't convert to machine VHPT entry\n"); } - hash_table = (thash_data_t *)ia64_thash(va); + hash_table = (thash_data_t *)ia64_thash(ifa); if( INVALID_VHPT(hash_table) ) { *hash_table = vhpt_entry; hash_table->next = 0; @@ -358,6 +224,7 @@ } cch = cch->next; } + if(hash_table->len>=MAX_CCN_DEPTH){ thash_remove_cch(hcb, hash_table); cch = cch_alloc(hcb); @@ -367,9 +234,9 @@ hash_table->next = cch; return; } - + // TODO: Add collision chain length limitation. - cch = __alloc_chain(hcb,entry); + cch = __alloc_chain(hcb); if(cch == NULL){ *hash_table = vhpt_entry; hash_table->next = 0; @@ -377,10 +244,8 @@ *cch = *hash_table; *hash_table = vhpt_entry; hash_table->next = cch; - hash_table->len = cch->len + 1; - cch->len = 0; -// if(hash_table->tag==hash_table->next->tag) -// while(1); + hash_table->len = cch->len + 1; + cch->len = 0; } return /*hash_table*/; @@ -414,7 +279,7 @@ thash_data_t *hash_table, *prev, *next; u64 start, end, size, tag, rid; ia64_rr vrr; - vrr=vmx_vcpu_rr(current, va); + vcpu_get_rr(current, va, &vrr.rrval); rid = vrr.rid; size = PSIZE(ps); start = va & (-size); @@ -480,36 +345,6 @@ } machine_tlb_purge(va, ps); } -/* - * Insert an entry to hash table. - * NOTES: - * 1: TLB entry may be TR, TC or Foreign Map. For TR entry, - * itr[]/dtr[] need to be updated too. - * 2: Inserting to collision chain may trigger recycling if - * the buffer for collision chain is empty. - * 3: The new entry is inserted at the next of hash table. - * (I.e. head of the collision chain) - * 4: The buffer holding the entry is allocated internally - * from cch_buf or just in the hash table. - * 5: Return the entry in hash table or collision chain. - * 6: Input parameter, entry, should be in TLB format. - * I.e. Has va, rid, ps... - * 7: This API is invoked by emulating ITC/ITR and tlb_miss. - * - */ - -void thash_tr_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va, int idx) -{ - if ( hcb->ht != THASH_TLB || entry->tc ) { - panic("wrong parameter\n"); - } - entry->vadr = PAGEALIGN(entry->vadr,entry->ps); - entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12); - rep_tr(hcb, entry, idx); -// thash_vhpt_insert(hcb->ts->vhpt, entry, va); - return ; -} - /* * Recycle all collisions chain in VTLB or VHPT. @@ -525,30 +360,13 @@ thash_remove_cch(hcb,hash_table); } } -/* -thash_data_t *vtlb_alloc_chain(thash_cb_t *hcb,thash_data_t *entry) + +thash_data_t *__alloc_chain(thash_cb_t *hcb) { thash_data_t *cch; cch = cch_alloc(hcb); if(cch == NULL){ - thash_recycle_cch(hcb); - cch = cch_alloc(hcb); - } - return cch; -} -*/ - -thash_data_t *__alloc_chain(thash_cb_t *hcb,thash_data_t *entry) -{ - thash_data_t *cch; - - cch = cch_alloc(hcb); - if(cch == NULL){ - // recycle -// if ( hcb->recycle_notifier ) { -// hcb->recycle_notifier(hcb,(u64)entry); -// } thash_recycle_cch(hcb); cch = cch_alloc(hcb); } @@ -564,474 +382,117 @@ * 3: The caller need to make sure the new entry will not overlap * with any existed entry. */ -void vtlb_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va) +void vtlb_insert(thash_cb_t *hcb, u64 pte, u64 itir, u64 va) { thash_data_t *hash_table, *cch; /* int flag; */ ia64_rr vrr; /* u64 gppn, ppns, ppne; */ - u64 tag; - vrr=vmx_vcpu_rr(current, va); - if (vrr.ps != entry->ps) { + u64 tag, ps; + ps = itir_ps(itir); + vcpu_get_rr(current, va, &vrr.rrval); + if (vrr.ps != ps) { // machine_tlb_insert(hcb->vcpu, entry); panic("not preferred ps with va: 0x%lx\n", va); return; } - entry->vadr = PAGEALIGN(entry->vadr,entry->ps); - entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12); hash_table = vsa_thash(hcb->pta, va, vrr.rrval, &tag); - entry->etag = tag; if( INVALID_TLB(hash_table) ) { - *hash_table = *entry; + hash_table->page_flags = pte; + hash_table->itir=itir; + hash_table->etag=tag; hash_table->next = 0; } else if (hash_table->len>=MAX_CCN_DEPTH){ thash_remove_cch(hcb, hash_table); cch = cch_alloc(hcb); *cch = *hash_table; - *hash_table = *entry; + hash_table->page_flags = pte; + hash_table->itir=itir; + hash_table->etag=tag; hash_table->len = 1; hash_table->next = cch; } + else { // TODO: Add collision chain length limitation. - cch = __alloc_chain(hcb,entry); + cch = __alloc_chain(hcb); if(cch == NULL){ - *hash_table = *entry; + hash_table->page_flags = pte; + hash_table->itir=itir; + hash_table->etag=tag; hash_table->next = 0; }else{ *cch = *hash_table; - *hash_table = *entry; + hash_table->page_flags = pte; + hash_table->itir=itir; + hash_table->etag=tag; hash_table->next = cch; hash_table->len = cch->len + 1; cch->len = 0; } } -#if 0 - if(hcb->vcpu->domain->domain_id==0){ - thash_insert(hcb->ts->vhpt, entry, va); - return; - } -#endif -/* - flag = 1; - gppn = (POFFSET(va,entry->ps)|PAGEALIGN((entry->ppn<<12),entry->ps))>>PAGE_SHIFT; - ppns = PAGEALIGN((entry->ppn<<12),entry->ps); - ppne = ppns + PSIZE(entry->ps); - if(((ppns<=0xa0000)&&(ppne>0xa0000))||((ppne>0xc0000)&&(ppns<=0xc0000))) - flag = 0; - if((__gpfn_is_mem(hcb->vcpu->domain, gppn)&&flag)) - thash_insert(hcb->ts->vhpt, entry, va); -*/ return ; } -/* -void thash_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va) -{ - thash_data_t *hash_table; - ia64_rr vrr; - - vrr = vmx_vcpu_rr(hcb->vcpu,entry->vadr); - if ( entry->ps != vrr.ps && entry->tc ) { - panic("Not support for multiple page size now\n"); - } - entry->vadr = PAGEALIGN(entry->vadr,entry->ps); - entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12); - (hcb->ins_hash)(hcb, entry, va); - -} -*/ -/* -static void rem_thash(thash_cb_t *hcb, thash_data_t *entry) -{ - thash_data_t *hash_table, *p, *q; - thash_internal_t *priv = &hcb->priv; - int idx; - - hash_table = priv->hash_base; - if ( hash_table == entry ) { -// if ( PURGABLE_ENTRY(hcb, entry) ) { - __rem_hash_head (hcb, entry); -// } - return ; - } - // remove from collision chain - p = hash_table; - for ( q=p->next; q; q = p->next ) { - if ( q == entry ){ -// if ( PURGABLE_ENTRY(hcb,q ) ) { - p->next = q->next; - __rem_chain(hcb, entry); - hash_table->len--; -// } - return ; - } - p = q; - } - panic("Entry not existed or bad sequence\n"); -} -*/ -/* -static void rem_vtlb(thash_cb_t *hcb, thash_data_t *entry) -{ - thash_data_t *hash_table, *p, *q; - thash_internal_t *priv = &hcb->priv; - int idx; - - if ( !entry->tc ) { - return rem_tr(hcb, entry->cl, entry->tr_idx); - } - rem_thash(hcb, entry); -} -*/ -int cch_depth=0; -/* - * Purge the collision chain starting from cch. - * NOTE: - * For those UN-Purgable entries(FM), this function will return - * the head of left collision chain. - */ -/* -static thash_data_t *thash_rem_cch(thash_cb_t *hcb, thash_data_t *cch) -{ - thash_data_t *next; - -// if ( ++cch_depth > MAX_CCH_LENGTH ) { -// printf ("cch length > MAX_CCH_LENGTH, exceed the expected length\n"); -// while(1); -// } - if ( cch -> next ) { - next = thash_rem_cch(hcb, cch->next); +int vtr_find_overlap(VCPU *vcpu, u64 va, u64 ps, int is_data) +{ + thash_data_t *trp; + int i; + u64 end, rid; + vcpu_get_rr(vcpu, va, &rid); + rid = rid&RR_RID_MASK;; + end = va + PSIZE(ps); + if (is_data) { + if (vcpu_quick_region_check(vcpu->arch.dtr_regions,va)) { + for (trp =(thash_data_t *) vcpu->arch.dtrs,i=0; i<NDTRS; i++, trp++) { + if (__is_tr_overlap(trp, rid, va, end )) { + return i; + } + } + } } else { - next = NULL; - } - if ( PURGABLE_ENTRY(hcb, cch) ) { - __rem_chain(hcb, cch); - return next; - } - else { - cch->next = next; - return cch; - } -} - */ - -/* - * Purge one hash line (include the entry in hash table). - * Can only be called by thash_purge_all. - * Input: - * hash: The head of collision chain (hash table) - * - */ -/* -static void thash_rem_line(thash_cb_t *hcb, thash_data_t *hash) -{ - if ( INVALID_ENTRY(hcb, hash) ) return; - - if ( hash->next ) { - cch_depth = 0; - hash->next = thash_rem_cch(hcb, hash->next); - } - // Then hash table itself. - if ( PURGABLE_ENTRY(hcb, hash) ) { - __rem_hash_head(hcb, hash); - } -} - */ - -/* - * Find an overlap entry in hash table and its collision chain. - * Refer to SDM2 4.1.1.4 for overlap definition. - * PARAS: - * 1: in: TLB format entry, rid:ps must be same with vrr[]. - * va & ps identify the address space for overlap lookup - * 2: section can be combination of TR, TC and FM. (THASH_SECTION_XX) - * 3: cl means I side or D side. - * RETURNS: - * NULL to indicate the end of findings. - * NOTES: - * - */ - -/* -thash_data_t *thash_find_overlap(thash_cb_t *hcb, - thash_data_t *in, search_section_t s_sect) -{ - return (hcb->find_overlap)(hcb, in->vadr, - PSIZE(in->ps), in->rid, in->cl, s_sect); -} -*/ - -/* -static thash_data_t *vtlb_find_overlap(thash_cb_t *hcb, - u64 va, u64 size, int rid, char cl, search_section_t s_sect) -{ - thash_data_t *hash_table; - thash_internal_t *priv = &hcb->priv; - u64 tag; - ia64_rr vrr; - - priv->_curva = va & ~(size-1); - priv->_eva = priv->_curva + size; - priv->rid = rid; - vrr = vmx_vcpu_rr(hcb->vcpu,va); - priv->ps = vrr.ps; - hash_table = vsa_thash(hcb->pta, priv->_curva, vrr.rrval, &tag); - priv->s_sect = s_sect; - priv->cl = cl; - priv->_tr_idx = 0; - priv->hash_base = hash_table; - priv->cur_cch = hash_table; - return (hcb->next_overlap)(hcb); -} -*/ - -/* -static thash_data_t *vhpt_find_overlap(thash_cb_t *hcb, - u64 va, u64 size, int rid, char cl, search_section_t s_sect) -{ - thash_data_t *hash_table; - thash_internal_t *priv = &hcb->priv; - u64 tag; - ia64_rr vrr; - - priv->_curva = va & ~(size-1); - priv->_eva = priv->_curva + size; - priv->rid = rid; - vrr = vmx_vcpu_rr(hcb->vcpu,va); - priv->ps = vrr.ps; - hash_table = ia64_thash(priv->_curva); - tag = ia64_ttag(priv->_curva); - priv->tag = tag; - priv->hash_base = hash_table; - priv->cur_cch = hash_table; - return (hcb->next_overlap)(hcb); -} -*/ - - -thash_data_t *vtr_find_overlap(thash_cb_t *hcb, thash_data_t *data, char cl) -{ - thash_data_t *tr; - int i,num; - u64 end; - - if (cl == ISIDE_TLB ) { - num = NITRS; - tr = &ITR(hcb,0); - } - else { - num = NDTRS; - tr = &DTR(hcb,0); - } - end=data->vadr + PSIZE(data->ps); - for (i=0; i<num; i++ ) { - if ( __is_tr_overlap(hcb, &tr[i], data->rid, cl, data->vadr, end )) { - return &tr[i]; - } - } - return NULL; -} - - -/* -static thash_data_t *vtr_find_next_overlap(thash_cb_t *hcb) -{ - thash_data_t *tr; - thash_internal_t *priv = &hcb->priv; - int num; - - if ( priv->cl == ISIDE_TLB ) { - num = NITRS; - tr = &ITR(hcb,0); - } - else { - num = NDTRS; - tr = &DTR(hcb,0); - } - for (; priv->_tr_idx < num; priv->_tr_idx ++ ) { - if ( __is_tr_overlap(hcb, &tr[priv->_tr_idx], - priv->rid, priv->cl, - priv->_curva, priv->_eva) ) { - return &tr[priv->_tr_idx++]; - } - } - return NULL; -} -*/ - -/* - * Similar with vtlb_next_overlap but find next entry. - * NOTES: - * Intermediate position information is stored in hcb->priv. - */ -/* -static thash_data_t *vtlb_next_overlap(thash_cb_t *hcb) -{ - thash_data_t *ovl; - thash_internal_t *priv = &hcb->priv; - u64 addr,rr_psize,tag; - ia64_rr vrr; - - if ( priv->s_sect.tr ) { - ovl = vtr_find_next_overlap (hcb); - if ( ovl ) return ovl; - priv->s_sect.tr = 0; - } - if ( priv->s_sect.v == 0 ) return NULL; - vrr = vmx_vcpu_rr(hcb->vcpu,priv->_curva); - rr_psize = PSIZE(vrr.ps); - - while ( priv->_curva < priv->_eva ) { - if ( !INVALID_ENTRY(hcb, priv->hash_base) ) { - ovl = _vtlb_next_overlap_in_chain(hcb); - if ( ovl ) { - priv->cur_cch = ovl->next; - return ovl; + if (vcpu_quick_region_check(vcpu->arch.itr_regions,va)) { + for (trp =(thash_data_t *) vcpu->arch.itrs,i=0; i<NITRS; i++, trp++) { + if (__is_tr_overlap(trp, rid, va, end )) { + return i; + } } } - priv->_curva += rr_psize; - priv->hash_base = vsa_thash( hcb->pta, priv->_curva, vrr.rrval, &tag); - priv->cur_cch = priv->hash_base; - } - return NULL; -} - */ - - -/* -static thash_data_t *vhpt_next_overlap(thash_cb_t *hcb) -{ - thash_data_t *ovl; - thash_internal_t *priv = &hcb->priv; - u64 addr,rr_psize; - ia64_rr vrr; - - vrr = vmx_vcpu_rr(hcb->vcpu,priv->_curva); - rr_psize = PSIZE(vrr.ps); - - while ( priv->_curva < priv->_eva ) { - if ( !INVALID_ENTRY(hcb, priv->hash_base) ) { - ovl = _vhpt_next_overlap_in_chain(hcb); - if ( ovl ) { - priv->cur_cch = ovl->next; - return ovl; - } - } - priv->_curva += rr_psize; - priv->hash_base = ia64_thash(priv->_curva); - priv->tag = ia64_ttag(priv->_curva); - priv->cur_cch = priv->hash_base; - } - return NULL; -} -*/ - -/* - * Find and purge overlap entries in hash table and its collision chain. - * PARAS: - * 1: in: TLB format entry, rid:ps must be same with vrr[]. - * rid, va & ps identify the address space for purge - * 2: section can be combination of TR, TC and FM. (thash_SECTION_XX) - * 3: cl means I side or D side. - * NOTES: - * - */ -void thash_purge_entries(thash_cb_t *hcb, - thash_data_t *in, search_section_t p_sect) -{ - return thash_purge_entries_ex(hcb, in->rid, in->vadr, - in->ps, p_sect, in->cl); -} - -void thash_purge_entries_ex(thash_cb_t *hcb, - u64 rid, u64 va, u64 ps, - search_section_t p_sect, - CACHE_LINE_TYPE cl) -{ -/* - thash_data_t *ovl; - - ovl = (hcb->find_overlap)(hcb, va, PSIZE(ps), rid, cl, p_sect); - while ( ovl != NULL ) { - (hcb->rem_hash)(hcb, ovl); - ovl = (hcb->next_overlap)(hcb); - }; - */ + } + return -1; +} + +/* + * Purge entries in VTLB and VHPT + */ +void thash_purge_entries(thash_cb_t *hcb, u64 va, u64 ps) +{ vtlb_purge(hcb, va, ps); - vhpt_purge(hcb->ts->vhpt, va, ps); -} + vhpt_purge(hcb->vhpt, va, ps); +} + /* * Purge overlap TCs and then insert the new entry to emulate itc ops. * Notes: Only TC entry can purge and insert. */ -void thash_purge_and_insert(thash_cb_t *hcb, thash_data_t *in, u64 va) -{ - /* thash_data_t *ovl; */ - search_section_t sections; - -#ifdef XEN_DEBUGGER - vrr = vmx_vcpu_rr(hcb->vcpu,in->vadr); - if ( in->ps != vrr.ps || hcb->ht != THASH_TLB || !in->tc ) { - panic ("Oops, wrong call for purge_and_insert\n"); - return; - } -#endif - in->vadr = PAGEALIGN(in->vadr,in->ps); - in->ppn = PAGEALIGN(in->ppn, in->ps-12); - sections.tr = 0; - sections.tc = 1; -/* - ovl = (hcb->find_overlap)(hcb, in->vadr, PSIZE(in->ps), - in->rid, in->cl, sections); - if(ovl) - (hcb->rem_hash)(hcb, ovl); - */ - vtlb_purge(hcb, va, in->ps); - vhpt_purge(hcb->ts->vhpt, va, in->ps); -#ifdef XEN_DEBUGGER - ovl = (hcb->next_overlap)(hcb); - if ( ovl ) { - panic ("Oops, 2+ overlaps for purge_and_insert\n"); - return; - } -#endif - if(in->ps!=PAGE_SHIFT) - vtlb_insert(hcb, in, va); - thash_vhpt_insert(hcb->ts->vhpt, in, va); -} -/* - * Purge one hash line (include the entry in hash table). - * Can only be called by thash_purge_all. - * Input: - * hash: The head of collision chain (hash table) - * - */ -/* -static void thash_purge_line(thash_cb_t *hcb, thash_data_t *hash) -{ - if ( INVALID_ENTRY(hcb, hash) ) return; - thash_data_t *prev, *next; - next=hash->next; - while ( next ) { - prev=next; - next=next->next; - cch_free(hcb, prev); - } - // Then hash table itself. - INVALIDATE_HASH(hcb, hash); -} -*/ - - - - - - +void thash_purge_and_insert(thash_cb_t *hcb, u64 pte, u64 itir, u64 ifa) +{ + u64 ps, va; + ps = itir_ps(itir); + va = PAGEALIGN(ifa,ps); + vtlb_purge(hcb, va, ps); + vhpt_purge(hcb->vhpt, va, ps); + if((ps!=PAGE_SHIFT)||(pte&VTLB_PTE_IO)) + vtlb_insert(hcb, pte, itir, va); + if(!(pte&VTLB_PTE_IO)){ + va = PAGEALIGN(ifa,PAGE_SHIFT); + thash_vhpt_insert(hcb->vhpt, pte, itir, va); + } +} @@ -1064,27 +525,12 @@ } cch_mem_init (hcb); - vhpt = hcb->ts->vhpt; + vhpt = hcb->vhpt; hash_table = (thash_data_t*)((u64)vhpt->hash + vhpt->hash_sz); for (--hash_table;(u64)hash_table >= (u64)vhpt->hash;hash_table--) { INVALIDATE_VHPT_HEADER(hash_table); } cch_mem_init (vhpt); - -/* - entry = &hcb->ts->itr[0]; - for(i=0; i< (NITRS+NDTRS); i++){ - if(!INVALID_TLB(entry)){ - start=entry->vadr & (-PSIZE(entry->ps)); - end = start + PSIZE(entry->ps); - while(start<end){ - thash_vhpt_insert(vhpt, entry, start); - start += PAGE_SIZE; - } - } - entry++; - } -*/ local_flush_tlb_all(); } @@ -1096,100 +542,32 @@ * INPUT: * in: TLB format for both VHPT & TLB. */ -thash_data_t *vtlb_lookup(thash_cb_t *hcb, - thash_data_t *in) -{ - return vtlb_lookup_ex(hcb, in->rid, in->vadr, in->cl); -} - -thash_data_t *vtlb_lookup_ex(thash_cb_t *hcb, - u64 rid, u64 va, - CACHE_LINE_TYPE cl) + +thash_data_t *vtlb_lookup(thash_cb_t *hcb, u64 va,int is_data) { thash_data_t *hash_table, *cch; u64 tag; ia64_rr vrr; - + ASSERT ( hcb->ht == THASH_TLB ); - - cch = __vtr_lookup(hcb, rid, va, cl);; + + cch = __vtr_lookup(hcb->vcpu, va, is_data);; if ( cch ) return cch; - vrr = vmx_vcpu_rr(hcb->vcpu,va); + vcpu_get_rr(hcb->vcpu,va,&vrr.rrval); hash_table = vsa_thash( hcb->pta, va, vrr.rrval, &tag); if ( INVALID_ENTRY(hcb, hash_table ) ) return NULL; - + for (cch=hash_table; cch; cch = cch->next) { -// if ( __is_translated(cch, rid, va, cl) ) if(cch->etag == tag) return cch; } return NULL; } -/* - * Lock/Unlock TC if found. - * NOTES: Only the page in prefered size can be handled. - * return: - * 1: failure - * 0: success - */ -/* -int thash_lock_tc(thash_cb_t *hcb, u64 va, u64 size, int rid, char cl, int lock) -{ - thash_data_t *ovl; - search_section_t sections; - - sections.tr = 1; - sections.tc = 1; - ovl = (hcb->find_overlap)(hcb, va, size, rid, cl, sections); - if ( ovl ) { - if ( !ovl->tc ) { -// panic("Oops, TR for lock\n"); - return 0; - } - else if ( lock ) { - if ( ovl->locked ) { - DPRINTK("Oops, already locked entry\n"); - } - ovl->locked = 1; - } - else if ( !lock ) { - if ( !ovl->locked ) { - DPRINTK("Oops, already unlocked entry\n"); - } - ovl->locked = 0; - } - return 0; - } - return 1; -} -*/ - -/* - * Notifier when TLB is deleted from hash table and its collision chain. - * NOTES: - * The typical situation is that TLB remove needs to inform - * VHPT to remove too. - * PARAS: - * 1: hcb is TLB object. - * 2: The format of entry is always in TLB. - * - */ -//void tlb_remove_notifier(thash_cb_t *hcb, thash_data_t *entry) -//{ -// vhpt_purge(hcb->ts->vhpt,entry->vadr,entry->ps); -// thash_cb_t *vhpt; - -// search_section_t s_sect; - -// s_sect.v = 0; -// thash_purge_entries(hcb->ts->vhpt, entry, s_sect); -// machine_tlb_purge(entry->vadr, entry->ps); -//} /* * Initialize internal control data before service. @@ -1206,28 +584,15 @@ hcb->pta.size = sz; // hcb->get_rr_fn = vmmu_get_rr; ASSERT ( hcb->hash_sz % sizeof(thash_data_t) == 0 ); - if ( hcb->ht == THASH_TLB ) { -// hcb->remove_notifier = NULL; //tlb_remove_notifier; -// hcb->find_overlap = vtlb_find_overlap; -// hcb->next_overlap = vtlb_next_overlap; -// hcb->rem_hash = rem_vtlb; -// hcb->ins_hash = vtlb_insert; - __init_tr(hcb); - } - else { -// hcb->remove_notifier = NULL; -// hcb->find_overlap = vhpt_find_overlap; -// hcb->next_overlap = vhpt_next_overlap; -// hcb->rem_hash = rem_thash; -// hcb->ins_hash = thash_vhpt_insert; - } hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz); for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) { INVALIDATE_HASH_HEADER(hcb,hash_table); } } + #ifdef VTLB_DEBUG +/* static u64 cch_length_statistics[MAX_CCH_LENGTH+1]; u64 sanity_check=0; u64 vtlb_chain_sanity(thash_cb_t *vtlb, thash_cb_t *vhpt, thash_data_t *hash) @@ -1264,7 +629,7 @@ thash_data_t *hash, *cch; thash_data_t *ovl; search_section_t s_sect; - thash_cb_t *vhpt = vtlb->ts->vhpt; + thash_cb_t *vhpt = vtlb->vhpt; u64 invalid_ratio; if ( sanity_check == 0 ) return; @@ -1403,4 +768,5 @@ } printf("End of vTLB dump\n"); } +*/ #endif diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/dom_fw.c --- a/xen/arch/ia64/xen/dom_fw.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/dom_fw.c Mon Mar 20 08:56:54 2006 @@ -47,7 +47,7 @@ } // builds a hypercall bundle at domain physical address -void dom_efi_hypercall_patch(struct domain *d, unsigned long paddr, unsigned long hypercall) +static void dom_efi_hypercall_patch(struct domain *d, unsigned long paddr, unsigned long hypercall) { unsigned long *imva; @@ -96,122 +96,13 @@ # define NUM_MEM_DESCS 5 -#define SECS_PER_HOUR (60 * 60) -#define SECS_PER_DAY (SECS_PER_HOUR * 24) - -/* Compute the `struct tm' representation of *T, - offset OFFSET seconds east of UTC, - and store year, yday, mon, mday, wday, hour, min, sec into *TP. - Return nonzero if successful. */ -int -offtime (unsigned long t, efi_time_t *tp) -{ - const unsigned short int __mon_yday[2][13] = - { - /* Normal years. */ - { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, - /* Leap years. */ - { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } - }; - long int days, rem, y; - const unsigned short int *ip; - - days = t / SECS_PER_DAY; - rem = t % SECS_PER_DAY; - while (rem < 0) { - rem += SECS_PER_DAY; - --days; - } - while (rem >= SECS_PER_DAY) { - rem -= SECS_PER_DAY; - ++days; - } - tp->hour = rem / SECS_PER_HOUR; - rem %= SECS_PER_HOUR; - tp->minute = rem / 60; - tp->second = rem % 60; - /* January 1, 1970 was a Thursday. */ - y = 1970; - -# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0)) -# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400)) -# define __isleap(year) \ - ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0)) - - while (days < 0 || days >= (__isleap (y) ? 366 : 365)) { - /* Guess a corrected year, assuming 365 days per year. */ - long int yg = y + days / 365 - (days % 365 < 0); - - /* Adjust DAYS and Y to match the guessed year. */ - days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1) - - LEAPS_THRU_END_OF (y - 1)); - y = yg; - } - tp->year = y; - ip = __mon_yday[__isleap(y)]; - for (y = 11; days < (long int) ip[y]; --y) - continue; - days -= ip[y]; - tp->month = y + 1; - tp->day = days + 1; - return 1; -} - -/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */ - -#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3) - -#define REG_OFFSET(addr) (0x00000000000000FF & (addr)) -#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr)) -#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr)) - -#ifndef XEN -static efi_status_t -fw_efi_get_time (efi_time_t *tm, efi_time_cap_t *tc) -{ -#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC) - struct { - int tv_sec; /* must be 32bits to work */ - int tv_usec; - } tv32bits; - - ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD); - - memset(tm, 0, sizeof(*tm)); - offtime(tv32bits.tv_sec, tm); - - if (tc) - memset(tc, 0, sizeof(*tc)); -#else -# error Not implemented yet... -#endif - return EFI_SUCCESS; -} - -static void -efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data) -{ -#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC) - ssc(status, 0, 0, 0, SSC_EXIT); -#else -# error Not implemented yet... -#endif -} - -static efi_status_t -efi_unimplemented (void) -{ - return EFI_UNSUPPORTED; -} -#endif /* !XEN */ - struct sal_ret_values sal_emulator (long index, unsigned long in1, unsigned long in2, unsigned long in3, unsigned long in4, unsigned long in5, unsigned long in6, unsigned long in7) { - long r9 = 0; - long r10 = 0; + unsigned long r9 = 0; + unsigned long r10 = 0; long r11 = 0; long status; @@ -285,12 +176,11 @@ } struct ia64_pal_retval -xen_pal_emulator(unsigned long index, unsigned long in1, - unsigned long in2, unsigned long in3) -{ - long r9 = 0; - long r10 = 0; - long r11 = 0; +xen_pal_emulator(unsigned long index, u64 in1, u64 in2, u64 in3) +{ + unsigned long r9 = 0; + unsigned long r10 = 0; + unsigned long r11 = 0; long status = -1; if (running_on_sim) return pal_emulator_static(index); @@ -364,7 +254,7 @@ &r10); break; case PAL_REGISTER_INFO: - status = ia64_pal_register_info(in1,&r9,&r10); + status = ia64_pal_register_info(in1, &r9, &r10); break; case PAL_CACHE_FLUSH: /* FIXME */ @@ -434,7 +324,7 @@ #define NFUNCPTRS 20 -void print_md(efi_memory_desc_t *md) +static void print_md(efi_memory_desc_t *md) { #if 1 printk("domain mem: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n", @@ -496,7 +386,7 @@ } /* base is physical address of acpi table */ -void touch_acpi_table(void) +static void touch_acpi_table(void) { if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_update_lsapic, 0) < 0) printk("Error parsing MADT - no LAPIC entires\n"); @@ -514,15 +404,15 @@ struct acpi_table_header dsdt; u8 aml[16]; struct acpi_table_madt madt; - struct acpi_table_lsapic lsapic; + struct acpi_table_lsapic lsapic[MAX_VIRT_CPUS]; u8 pm1a_evt_blk[4]; u8 pm1a_cnt_blk[1]; u8 pm_tmr_blk[4]; }; /* Create enough of an ACPI structure to make the guest OS ACPI happy. */ -void -dom_fw_fake_acpi(struct fake_acpi_tables *tables) +static void +dom_fw_fake_acpi(struct domain *d, struct fake_acpi_tables *tables) { struct acpi20_table_rsdp *rsdp = &tables->rsdp; struct xsdt_descriptor_rev2 *xsdt = &tables->xsdt; @@ -530,7 +420,8 @@ struct facs_descriptor_rev2 *facs = &tables->facs; struct acpi_table_header *dsdt = &tables->dsdt; struct acpi_table_madt *madt = &tables->madt; - struct acpi_table_lsapic *lsapic = &tables->lsapic; + struct acpi_table_lsapic *lsapic = tables->lsapic; + int i; memset(tables, 0, sizeof(struct fake_acpi_tables)); @@ -608,13 +499,13 @@ /* Trivial namespace, avoids ACPI CA complaints */ tables->aml[0] = 0x10; /* Scope */ tables->aml[1] = 0x12; /* length/offset to next object */ - strncpy(&tables->aml[2], "_SB_", 4); + strncpy((char *)&tables->aml[2], "_SB_", 4); /* The processor object isn't absolutely necessary, revist for SMP */ tables->aml[6] = 0x5b; /* processor object */ tables->aml[7] = 0x83; tables->aml[8] = 0x0b; /* next */ - strncpy(&tables->aml[9], "CPU0", 4); + strncpy((char *)&tables->aml[9], "CPU0", 4); dsdt->checksum = generate_acpi_checksum(dsdt, dsdt->length); @@ -622,16 +513,20 @@ strncpy(madt->header.signature, APIC_SIG, 4); madt->header.revision = 2; madt->header.length = sizeof(struct acpi_table_madt) + - sizeof(struct acpi_table_lsapic); + MAX_VIRT_CPUS * sizeof(struct acpi_table_lsapic); strcpy(madt->header.oem_id, "XEN"); strcpy(madt->header.oem_table_id, "Xen/ia64"); strcpy(madt->header.asl_compiler_id, "XEN"); madt->header.asl_compiler_revision = (XEN_VERSION<<16)|(XEN_SUBVERSION); - /* A single LSAPIC entry describes the CPU. Revisit for SMP guests */ - lsapic->header.type = ACPI_MADT_LSAPIC; - lsapic->header.length = sizeof(struct acpi_table_lsapic); - lsapic->flags.enabled = 1; + /* An LSAPIC entry describes a CPU. */ + for (i = 0; i < MAX_VIRT_CPUS; i++) { + lsapic[i].header.type = ACPI_MADT_LSAPIC; + lsapic[i].header.length = sizeof(struct acpi_table_lsapic); + lsapic[i].id = i; + lsapic[i].eid = 0; + lsapic[i].flags.enabled = (d->vcpu[i] != NULL); + } madt->header.checksum = generate_acpi_checksum(madt, madt->header.length); @@ -785,7 +680,7 @@ acpi_tables = (void *)cp; cp += sizeof(struct fake_acpi_tables); - dom_fw_fake_acpi(acpi_tables); + dom_fw_fake_acpi(d, acpi_tables); efi_tables[i].guid = ACPI_20_TABLE_GUID; efi_tables[i].table = dom_pa((unsigned long) acpi_tables); @@ -801,8 +696,8 @@ sal_systab->sal_rev_major = 0; sal_systab->entry_count = 1; - strcpy(sal_systab->oem_id, "Xen/ia64"); - strcpy(sal_systab->product_id, "Xen/ia64"); + strcpy((char *)sal_systab->oem_id, "Xen/ia64"); + strcpy((char *)sal_systab->product_id, "Xen/ia64"); /* fill in an entry point: */ sal_ed->type = SAL_DESC_ENTRY_POINT; @@ -861,7 +756,10 @@ /* hypercall patches live here, masquerade as reserved PAL memory */ MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 1); MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 1); - MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0); + /* Create a dummy entry for IO ports, so that IO accesses are + trapped by Xen. */ + MAKE_MD(EFI_MEMORY_MAPPED_IO_PORT_SPACE,EFI_MEMORY_UC, + 0x00000ffffc000000, 0x00000fffffffffff, 1); MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0); } diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/domain.c Mon Mar 20 08:56:54 2006 @@ -26,6 +26,7 @@ #include <asm/processor.h> #include <asm/desc.h> #include <asm/hw_irq.h> +#include <asm/setup.h> //#include <asm/mpspec.h> #include <xen/irq.h> #include <xen/event.h> @@ -36,7 +37,6 @@ #include <xen/elf.h> //#include <asm/page.h> #include <asm/pgalloc.h> -#include <asm/dma.h> /* for MAX_DMA_ADDRESS */ #include <asm/asm-offsets.h> /* for IA64_THREAD_INFO_SIZE */ @@ -49,6 +49,9 @@ #include <asm/pal.h> #include <asm/vhpt.h> #include <public/hvm/ioreq.h> +#include <public/arch-ia64.h> +#include <asm/tlbflush.h> +#include <asm/regionreg.h> #define CONFIG_DOMAIN0_CONTIGUOUS unsigned long dom0_start = -1L; @@ -69,10 +72,7 @@ /* FIXME: where these declarations should be there ? */ extern void domain_pend_keyboard_interrupt(int); extern long platform_is_hp_ski(void); -extern unsigned long allocate_metaphysical_rr(void); -extern int allocate_rid_range(struct domain *, unsigned long); extern void sync_split_caches(void); -extern void init_all_rr(struct vcpu *); extern void serial_input_init(void); static void init_switch_stack(struct vcpu *v); @@ -80,9 +80,33 @@ /* this belongs in include/asm, but there doesn't seem to be a suitable place */ void arch_domain_destroy(struct domain *d) { - printf("arch_domain_destroy: not implemented\n"); - //free_page((unsigned long)d->mm.perdomain_pt); - free_xenheap_page(d->shared_info); + struct page_info *page; + struct list_head *ent, *prev; + + if (d->arch.mm->pgd != NULL) + { + list_for_each ( ent, &d->arch.mm->pt_list ) + { + page = list_entry(ent, struct page_info, list); + prev = ent->prev; + list_del(ent); + free_xenheap_page(page_to_virt(page)); + ent = prev; + } + pgd_free(d->arch.mm->pgd); + } + if (d->arch.mm != NULL) + xfree(d->arch.mm); + if (d->shared_info != NULL) + free_xenheap_page(d->shared_info); + + deallocate_rid_range(d); + + /* It is really good in this? */ + flush_tlb_all(); + + /* It is really good in this? */ + vhpt_flush(); } static void default_idle(void) @@ -115,23 +139,9 @@ void startup_cpu_idle_loop(void) { - int cpu = smp_processor_id (); /* Just some sanity to ensure that the scheduler is set up okay. */ ASSERT(current->domain == IDLE_DOMAIN_ID); - printf ("idle%dA\n", cpu); raise_softirq(SCHEDULE_SOFTIRQ); -#if 0 /* All this work is done within continue_cpu_idle_loop */ - printf ("idle%dB\n", cpu); - asm volatile ("mov ar.k2=r0"); - do_softirq(); - printf ("idle%dC\n", cpu); - - /* - * Declares CPU setup done to the boot processor. - * Therefore memory barrier to ensure state is visible. - */ - smp_mb(); -#endif #if 0 //do we have to ensure the idle task has a shared page so that, for example, //region registers can be loaded from it. Apparently not... @@ -201,6 +211,8 @@ void free_vcpu_struct(struct vcpu *v) { + if (v->arch.privregs != NULL) + free_xenheap_pages(v->arch.privregs, get_order(sizeof(mapped_regs_t))); free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER); } @@ -253,6 +265,7 @@ if ((d->arch.mm = xmalloc(struct mm_struct)) == NULL) goto fail_nomem; memset(d->arch.mm, 0, sizeof(*d->arch.mm)); + INIT_LIST_HEAD(&d->arch.mm->pt_list); if ((d->arch.mm->pgd = pgd_alloc(d->arch.mm)) == NULL) goto fail_nomem; @@ -324,10 +337,74 @@ return 0; } +static void relinquish_memory(struct domain *d, struct list_head *list) +{ + struct list_head *ent; + struct page_info *page; +#ifndef __ia64__ + unsigned long x, y; +#endif + + /* Use a recursive lock, as we may enter 'free_domheap_page'. */ + spin_lock_recursive(&d->page_alloc_lock); + ent = list->next; + while ( ent != list ) + { + page = list_entry(ent, struct page_info, list); + /* Grab a reference to the page so it won't disappear from under us. */ + if ( unlikely(!get_page(page, d)) ) + { + /* Couldn't get a reference -- someone is freeing this page. */ + ent = ent->next; + continue; + } + + if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) + put_page_and_type(page); + + if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) + put_page(page); + +#ifndef __ia64__ + /* + * Forcibly invalidate base page tables at this point to break circular + * 'linear page table' references. This is okay because MMU structures + * are not shared across domains and this domain is now dead. Thus base + * tables are not in use so a non-zero count means circular reference. + */ + y = page->u.inuse.type_info; + for ( ; ; ) + { + x = y; + if ( likely((x & (PGT_type_mask|PGT_validated)) != + (PGT_base_page_table|PGT_validated)) ) + break; + + y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated); + if ( likely(y == x) ) + { + free_page_type(page, PGT_base_page_table); + break; + } + } +#endif + + /* Follow the list chain and /then/ potentially free the page. */ + ent = ent->next; + put_page(page); + } + + spin_unlock_recursive(&d->page_alloc_lock); +} + void domain_relinquish_resources(struct domain *d) { - /* FIXME */ - printf("domain_relinquish_resources: not implemented\n"); + /* Relinquish every page of memory. */ + + /* xenheap_list is not used in ia64. */ + BUG_ON(!list_empty(&d->xenpage_list)); + + relinquish_memory(d, &d->page_list); } // heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread() @@ -339,7 +416,7 @@ { struct domain *d = v->domain; struct pt_regs *regs; - extern char saved_command_line[]; + extern char dom0_command_line[]; #ifdef CONFIG_DOMAIN0_CONTIGUOUS if (d == dom0) start_pc += dom0_start; @@ -351,8 +428,9 @@ regs->cr_ipsr = 0x501008826008; /* Need to be expanded as macro */ } else { regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) - | IA64_PSR_BITS_TO_SET | IA64_PSR_BN - & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS); + | IA64_PSR_BITS_TO_SET | IA64_PSR_BN; + regs->cr_ipsr &= ~(IA64_PSR_BITS_TO_CLEAR + | IA64_PSR_RI | IA64_PSR_IS); regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2 } regs->cr_iip = start_pc; @@ -362,24 +440,27 @@ if (VMX_DOMAIN(v)) { vmx_init_all_rr(v); if (d == dom0) -// VCPU(v,vgr[12]) = dom_fw_setup(d,saved_command_line,256L); - regs->r28 = dom_fw_setup(d,saved_command_line,256L); + regs->r28 = dom_fw_setup(d,dom0_command_line, + COMMAND_LINE_SIZE); /* Virtual processor context setup */ VCPU(v, vpsr) = IA64_PSR_BN; VCPU(v, dcr) = 0; } else { init_all_rr(v); if (d == dom0) - regs->r28 = dom_fw_setup(d,saved_command_line,256L); + regs->r28 = dom_fw_setup(d,dom0_command_line, + COMMAND_LINE_SIZE); else { regs->ar_rsc |= (2 << 2); /* force PL2/3 */ if (*d->arch.cmdline == '\0') { #define DEFAULT_CMDLINE "nomca nosmp xencons=tty0 console=tty0 root=/dev/hda1" - regs->r28 = dom_fw_setup(d,DEFAULT_CMDLINE,256L); + regs->r28 = dom_fw_setup(d,DEFAULT_CMDLINE, + sizeof (DEFAULT_CMDLINE)); printf("domU command line defaulted to" DEFAULT_CMDLINE "\n"); } - else regs->r28 = dom_fw_setup(d,d->arch.cmdline,256L); + else regs->r28 = dom_fw_setup(d,d->arch.cmdline, + IA64_COMMAND_LINE_SIZE); } VCPU(v, banknum) = 1; VCPU(v, metaphysical_mode) = 1; @@ -387,7 +468,7 @@ } } -static struct page * assign_new_domain0_page(unsigned long mpaddr) +static struct page_info * assign_new_domain0_page(unsigned long mpaddr) { if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { printk("assign_new_domain0_page: bad domain0 mpaddr 0x%lx!\n",mpaddr); @@ -399,10 +480,10 @@ } /* allocate new page for domain and map it to the specified metaphysical addr */ -struct page * assign_new_domain_page(struct domain *d, unsigned long mpaddr) +struct page_info * assign_new_domain_page(struct domain *d, unsigned long mpaddr) { struct mm_struct *mm = d->arch.mm; - struct page *p = (struct page *)0; + struct page_info *pt, *p = (struct page_info *)0; pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -414,16 +495,28 @@ } pgd = pgd_offset(mm,mpaddr); if (pgd_none(*pgd)) + { pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr)); + pt = maddr_to_page(pgd_val(*pgd)); + list_add_tail(&pt->list, &d->arch.mm->pt_list); + } pud = pud_offset(pgd, mpaddr); if (pud_none(*pud)) + { pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr)); + pt = maddr_to_page(pud_val(*pud)); + list_add_tail(&pt->list, &d->arch.mm->pt_list); + } pmd = pmd_offset(pud, mpaddr); if (pmd_none(*pmd)) + { pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr)); // pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr)); + pt = maddr_to_page(pmd_val(*pmd)); + list_add_tail(&pt->list, &d->arch.mm->pt_list); + } pte = pte_offset_map(pmd, mpaddr); if (pte_none(*pte)) { @@ -456,6 +549,7 @@ void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr) { struct mm_struct *mm = d->arch.mm; + struct page_info *pt; pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -467,16 +561,28 @@ } pgd = pgd_offset(mm,mpaddr); if (pgd_none(*pgd)) + { pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr)); + pt = maddr_to_page(pgd_val(*pgd)); + list_add_tail(&pt->list, &d->arch.mm->pt_list); + } pud = pud_offset(pgd, mpaddr); if (pud_none(*pud)) + { pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr)); + pt = maddr_to_page(pud_val(*pud)); + list_add_tail(&pt->list, &d->arch.mm->pt_list); + } pmd = pmd_offset(pud, mpaddr); if (pmd_none(*pmd)) + { pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr)); // pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr)); + pt = maddr_to_page(pmd_val(*pmd)); + list_add_tail(&pt->list, &d->arch.mm->pt_list); + } pte = pte_offset_map(pmd, mpaddr); if (pte_none(*pte)) { @@ -543,12 +649,13 @@ #ifdef CONFIG_DOMAIN0_CONTIGUOUS if (d == dom0) { + pte_t pteval; if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { //printk("lookup_domain_mpa: bad dom0 mpaddr 0x%lx!\n",mpaddr); //printk("lookup_domain_mpa: start=0x%lx,end=0x%lx!\n",dom0_start,dom0_start+dom0_size); mpafoo(mpaddr); } - pte_t pteval = pfn_pte(mpaddr >> PAGE_SHIFT, + pteval = pfn_pte(mpaddr >> PAGE_SHIFT, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)); pte = &pteval; return *(unsigned long *)pte; @@ -639,7 +746,7 @@ Elf_Phdr phdr; int h, filesz, memsz; unsigned long elfaddr, dom_mpaddr, dom_imva; - struct page *p; + struct page_info *p; copy_memory(&ehdr, (void *) image_start, sizeof(Elf_Ehdr)); for ( h = 0; h < ehdr.e_phnum; h++ ) { diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/gdbstub.c --- a/xen/arch/ia64/xen/gdbstub.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/gdbstub.c Mon Mar 20 08:56:54 2006 @@ -32,6 +32,7 @@ #include <xen/lib.h> +#include <xen/mm.h> #include <asm/byteorder.h> #include <asm/debugger.h> #include <asm/uaccess.h> diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/irq.c --- a/xen/arch/ia64/xen/irq.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/irq.c Mon Mar 20 08:56:54 2006 @@ -1338,6 +1338,7 @@ struct domain *guest[IRQ_MAX_GUESTS]; } irq_guest_action_t; +/* static void __do_IRQ_guest(int irq) { irq_desc_t *desc = &irq_desc[irq]; @@ -1353,7 +1354,7 @@ send_guest_pirq(d, irq); } } - + */ int pirq_guest_unmask(struct domain *d) { irq_desc_t *desc; diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/mm_init.c --- a/xen/arch/ia64/xen/mm_init.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/mm_init.c Mon Mar 20 08:56:54 2006 @@ -60,16 +60,16 @@ #ifdef CONFIG_VIRTUAL_MEM_MAP unsigned long vmalloc_end = VMALLOC_END_INIT; EXPORT_SYMBOL(vmalloc_end); -struct page *vmem_map; +struct page_info *vmem_map; EXPORT_SYMBOL(vmem_map); #endif // static int pgt_cache_water[2] = { 25, 50 }; -struct page *zero_page_memmap_ptr; /* map entry for zero page */ +#ifndef XEN +struct page_info *zero_page_memmap_ptr; /* map entry for zero page */ EXPORT_SYMBOL(zero_page_memmap_ptr); -#ifdef XEN void *high_memory; EXPORT_SYMBOL(high_memory); @@ -172,7 +172,7 @@ pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { if (!pmd_present(*pmd)) { - struct page *new; + struct page_info *new; spin_unlock(&mm->page_table_lock); new = pte_alloc_one(mm, address); @@ -202,7 +202,7 @@ update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte) { unsigned long addr; - struct page *page; + struct page_info *page; if (!pte_exec(pte)) return; /* not an executable page... */ @@ -386,7 +386,7 @@ create_mem_map_page_table (u64 start, u64 end, void *arg) { unsigned long address, start_page, end_page; - struct page *map_start, *map_end; + struct page_info *map_start, *map_end; int node; pgd_t *pgd; pmd_t *pmd; @@ -417,8 +417,8 @@ } struct memmap_init_callback_data { - struct page *start; - struct page *end; + struct page_info *start; + struct page_info *end; int nid; unsigned long zone; }; @@ -427,7 +427,7 @@ virtual_memmap_init (u64 start, u64 end, void *arg) { struct memmap_init_callback_data *args; - struct page *map_start, *map_end; + struct page_info *map_start, *map_end; args = (struct memmap_init_callback_data *) arg; @@ -440,13 +440,13 @@ map_end = args->end; /* - * We have to initialize "out of bounds" struct page elements that fit completely + * We have to initialize "out of bounds" struct page_info elements that fit completely * on the same pages that were allocated for the "in bounds" elements because they * may be referenced later (and found to be "reserved"). */ - map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) / sizeof(struct page); + map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) / sizeof(struct page_info); map_end += ((PAGE_ALIGN((unsigned long) map_end) - (unsigned long) map_end) - / sizeof(struct page)); + / sizeof(struct page_info)); if (map_start < map_end) memmap_init_zone(map_start, (unsigned long) (map_end - map_start), @@ -455,7 +455,7 @@ } void -memmap_init (struct page *start, unsigned long size, int nid, +memmap_init (struct page_info *start, unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { if (!vmem_map) @@ -476,7 +476,7 @@ ia64_mfn_valid (unsigned long pfn) { char byte; - struct page *pg = mfn_to_page(pfn); + struct page_info *pg = mfn_to_page(pfn); return (__get_user(byte, (char *) pg) == 0) && ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK)) diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/privop.c --- a/xen/arch/ia64/xen/privop.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/privop.c Mon Mar 20 08:56:54 2006 @@ -19,6 +19,9 @@ extern void zero_reflect_counts(void); long priv_verbose=0; + +/* Set to 1 to handle privified instructions from the privify tool. */ +static const int privify_en = 0; /************************************************************************** Hypercall bundle creation @@ -131,7 +134,8 @@ UINT src = inst.M28.r3; // NOTE: ptc_e with source gr > 63 is emulated as a fc r(y-64) - if (src > 63) return(vcpu_fc(vcpu,vcpu_get_gr(vcpu,src - 64))); + if (privify_en && src > 63) + return(vcpu_fc(vcpu,vcpu_get_gr(vcpu,src - 64))); return vcpu_ptc_e(vcpu,vcpu_get_gr(vcpu,src)); } @@ -178,7 +182,7 @@ UINT src = inst.M46.r3; // NOTE: tpa with source gr > 63 is emulated as a ttag rx=r(y-64) - if (src > 63) + if (privify_en && src > 63) fault = vcpu_ttag(vcpu,vcpu_get_gr(vcpu,src-64),&padr); else fault = vcpu_tpa(vcpu,vcpu_get_gr(vcpu,src),&padr); if (fault == IA64_NO_FAULT) @@ -193,7 +197,7 @@ UINT src = inst.M46.r3; // NOTE: tak with source gr > 63 is emulated as a thash rx=r(y-64) - if (src > 63) + if (privify_en && src > 63) fault = vcpu_thash(vcpu,vcpu_get_gr(vcpu,src-64),&key); else fault = vcpu_tak(vcpu,vcpu_get_gr(vcpu,src),&key); if (fault == IA64_NO_FAULT) @@ -280,7 +284,8 @@ // I26 and M29 are identical for these fields UINT64 ar3 = inst.M29.ar3; - if (inst.M29.r2 > 63 && inst.M29.ar3 < 8) { // privified mov from kr + if (privify_en && inst.M29.r2 > 63 && inst.M29.ar3 < 8) { + // privified mov from kr UINT64 val; if (vcpu_get_ar(vcpu,ar3,&val) != IA64_ILLOP_FAULT) return vcpu_set_gr(vcpu, inst.M29.r2-64, val,0); @@ -404,14 +409,17 @@ { UINT64 val; IA64FAULT fault; + int reg; - if (inst.M43.r1 > 63) { // privified mov from cpuid - fault = vcpu_get_cpuid(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); + reg = vcpu_get_gr(vcpu,inst.M43.r3); + if (privify_en && inst.M43.r1 > 63) { + // privified mov from cpuid + fault = vcpu_get_cpuid(vcpu,reg,&val); if (fault == IA64_NO_FAULT) return vcpu_set_gr(vcpu, inst.M43.r1-64, val, 0); } else { - fault = vcpu_get_rr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); + fault = vcpu_get_rr(vcpu,reg,&val); if (fault == IA64_NO_FAULT) return vcpu_set_gr(vcpu, inst.M43.r1, val, 0); } @@ -455,14 +463,17 @@ { UINT64 val; IA64FAULT fault; + int reg; - if (inst.M43.r1 > 63) { // privified mov from pmd - fault = vcpu_get_pmd(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); + reg = vcpu_get_gr(vcpu,inst.M43.r3); + if (privify_en && inst.M43.r1 > 63) { + // privified mov from pmd + fault = vcpu_get_pmd(vcpu,reg,&val); if (fault == IA64_NO_FAULT) return vcpu_set_gr(vcpu, inst.M43.r1-64, val, 0); } else { - fault = vcpu_get_pmc(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); + fault = vcpu_get_pmc(vcpu,reg,&val); if (fault == IA64_NO_FAULT) return vcpu_set_gr(vcpu, inst.M43.r1, val, 0); } @@ -666,7 +677,7 @@ else if (inst.generic.major != 1) break; x6 = inst.M29.x6; if (x6 == 0x2a) { - if (inst.M29.r2 > 63 && inst.M29.ar3 < 8) + if (privify_en && inst.M29.r2 > 63 && inst.M29.ar3 < 8) privcnt.mov_from_ar++; // privified mov from kr else privcnt.mov_to_ar_reg++; return priv_mov_to_ar_reg(vcpu,inst); @@ -674,14 +685,14 @@ if (inst.M29.x3 != 0) break; if (!(pfunc = Mpriv_funcs[x6])) break; if (x6 == 0x1e || x6 == 0x1f) { // tpa or tak are "special" - if (inst.M46.r3 > 63) { + if (privify_en && inst.M46.r3 > 63) { if (x6 == 0x1e) x6 = 0x1b; else x6 = 0x1a; } } - if (x6 == 52 && inst.M28.r3 > 63) + if (privify_en && x6 == 52 && inst.M28.r3 > 63) privcnt.fc++; - else if (x6 == 16 && inst.M43.r3 > 63) + else if (privify_en && x6 == 16 && inst.M43.r3 > 63) privcnt.cpuid++; else privcnt.Mpriv_cnt[x6]++; return (*pfunc)(vcpu,inst); @@ -718,7 +729,7 @@ #endif if (inst.I26.x3 != 0) break; // I26.x3 == I27.x3 if (inst.I26.x6 == 0x2a) { - if (inst.I26.r2 > 63 && inst.I26.ar3 < 8) + if (privify_en && inst.I26.r2 > 63 && inst.I26.ar3 < 8) privcnt.mov_from_ar++; // privified mov from kr else privcnt.mov_to_ar_reg++; return priv_mov_to_ar_reg(vcpu,inst); @@ -797,12 +808,17 @@ #define HYPERPRIVOP_GET_RR 0x10 #define HYPERPRIVOP_SET_RR 0x11 #define HYPERPRIVOP_SET_KR 0x12 -#define HYPERPRIVOP_MAX 0x12 +#define HYPERPRIVOP_FC 0x13 +#define HYPERPRIVOP_GET_CPUID 0x14 +#define HYPERPRIVOP_GET_PMD 0x15 +#define HYPERPRIVOP_GET_EFLAG 0x16 +#define HYPERPRIVOP_SET_EFLAG 0x17 +#define HYPERPRIVOP_MAX 0x17 static const char * const hyperpriv_str[HYPERPRIVOP_MAX+1] = { 0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", "ssm.i", "=ivr", "=tpr", "tpr=", "eoi", "itm=", "thash", "ptc.ga", "itr.d", - "=rr", "rr=", "kr=" + "=rr", "rr=", "kr=", "fc", "=cpuid", "=pmd", "=ar.eflg", "ar.eflg=" }; unsigned long slow_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 }; @@ -888,6 +904,24 @@ return 1; case HYPERPRIVOP_SET_KR: (void)vcpu_set_ar(v,regs->r8,regs->r9); + return 1; + case HYPERPRIVOP_FC: + (void)vcpu_fc(v,regs->r8); + return 1; + case HYPERPRIVOP_GET_CPUID: + (void)vcpu_get_cpuid(v,regs->r8,&val); + regs->r8 = val; + return 1; + case HYPERPRIVOP_GET_PMD: + (void)vcpu_get_pmd(v,regs->r8,&val); + regs->r8 = val; + return 1; + case HYPERPRIVOP_GET_EFLAG: + (void)vcpu_get_ar(v,24,&val); + regs->r8 = val; + return 1; + case HYPERPRIVOP_SET_EFLAG: + (void)vcpu_set_ar(v,24,regs->r8); return 1; } return 0; @@ -934,7 +968,7 @@ }; // FIXME: should use snprintf to ensure no buffer overflow -int dump_privop_counts(char *buf) +static int dump_privop_counts(char *buf) { int i, j; UINT64 sum = 0; @@ -1007,7 +1041,7 @@ return s - buf; } -int zero_privop_counts(char *buf) +static int zero_privop_counts(char *buf) { int i, j; char *s = buf; @@ -1043,7 +1077,7 @@ v->overflow++;; } -int dump_privop_addrs(char *buf) +static int dump_privop_addrs(char *buf) { int i,j; char *s = buf; @@ -1061,7 +1095,7 @@ return s - buf; } -void zero_privop_addrs(void) +static void zero_privop_addrs(void) { int i,j; for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) { @@ -1085,7 +1119,7 @@ extern unsigned long pal_halt_light_count; extern unsigned long context_switch_count; -int dump_misc_stats(char *buf) +static int dump_misc_stats(char *buf) { char *s = buf; s += sprintf(s,"Virtual TR translations: %ld\n",tr_translate_count); @@ -1102,7 +1136,7 @@ return s - buf; } -void zero_misc_stats(void) +static void zero_misc_stats(void) { dtlb_translate_count = 0; tr_translate_count = 0; @@ -1117,7 +1151,7 @@ context_switch_count = 0; } -int dump_hyperprivop_counts(char *buf) +static int dump_hyperprivop_counts(char *buf) { int i; char *s = buf; @@ -1138,7 +1172,7 @@ return s - buf; } -void zero_hyperprivop_counts(void) +static void zero_hyperprivop_counts(void) { int i; for (i = 0; i <= HYPERPRIVOP_MAX; i++) slow_hyperpriv_cnt[i] = 0; diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/process.c --- a/xen/arch/ia64/xen/process.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/process.c Mon Mar 20 08:56:54 2006 @@ -1,3 +1,4 @@ + /* * Miscellaneous process/domain related routines * @@ -41,6 +42,7 @@ extern int ia64_hyperprivop(unsigned long, REGS *); extern int ia64_hypercall(struct pt_regs *regs); extern void vmx_do_launch(struct vcpu *); +extern unsigned long lookup_domain_mpa(struct domain *,unsigned long); extern unsigned long dom0_start, dom0_size; @@ -57,20 +59,7 @@ IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \ IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA) -#define PSCB(x,y) VCPU(x,y) -#define PSCBX(x,y) x->arch.y - -extern unsigned long vcpu_verbose; - -long do_iopl(domid_t domain, unsigned int new_io_pl) -{ - dummy(); - return 0; -} - #include <xen/sched-if.h> - -extern struct schedule_data schedule_data[NR_CPUS]; void schedule_tail(struct vcpu *prev) { @@ -95,9 +84,6 @@ { struct domain *d = current->domain; unsigned long mask, pteval2, mpaddr; - unsigned long lookup_domain_mpa(struct domain *,unsigned long); - extern struct domain *dom0; - extern unsigned long dom0_start, dom0_size; // FIXME address had better be pre-validated on insert mask = ~itir_mask(itir); @@ -127,7 +113,6 @@ // given a current domain metaphysical address, return the physical address unsigned long translate_domain_mpaddr(unsigned long mpaddr) { - extern unsigned long lookup_domain_mpa(struct domain *,unsigned long); unsigned long pteval; if (current->domain == dom0) { @@ -224,7 +209,7 @@ void foodpi(void) {} -unsigned long pending_false_positive = 0; +static unsigned long pending_false_positive = 0; void reflect_extint(struct pt_regs *regs) { @@ -293,13 +278,14 @@ return; } - fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha); + fault = vcpu_translate(current,address,is_data,0,&pteval,&itir,&iha); if (fault == IA64_NO_FAULT) { pteval = translate_domain_pte(pteval,address,itir); vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,(itir>>2)&0x3f); return; } - if (IS_VMM_ADDRESS(iip)) { + if (!user_mode (regs)) { + /* The fault occurs inside Xen. */ if (!ia64_done_with_exception(regs)) { // should never happen. If it does, region 0 addr may // indicate a bad xen pointer @@ -543,7 +529,6 @@ void do_ssc(unsigned long ssc, struct pt_regs *regs) { - extern unsigned long lookup_domain_mpa(struct domain *,unsigned long); unsigned long arg0, arg1, arg2, arg3, retval; char buf[2]; /**/ static int last_fd, last_count; // FIXME FIXME FIXME @@ -653,14 +638,14 @@ vcpu_increment_iip(current); } +/* Also read in hyperprivop.S */ int first_break = 1; void ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim) { - struct domain *d = (struct domain *) current->domain; + struct domain *d = current->domain; struct vcpu *v = current; - extern unsigned long running_on_sim; if (first_break) { if (platform_is_hp_ski()) running_on_sim = 1; @@ -668,8 +653,7 @@ first_break = 0; } if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant - if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs); - else do_ssc(vcpu_get_gr(current,36), regs); + do_ssc(vcpu_get_gr(current,36), regs); } #ifdef CRASH_DEBUG else if ((iim == 0 || iim == CDB_BREAK_NUM) && !user_mode(regs)) { @@ -711,6 +695,7 @@ } } +/* Used in vhpt.h. */ #define INTR_TYPE_MAX 10 UINT64 int_counts[INTR_TYPE_MAX]; diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/regionreg.c --- a/xen/arch/ia64/xen/regionreg.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/regionreg.c Mon Mar 20 08:56:54 2006 @@ -157,7 +157,6 @@ int rid_block_end = d->arch.ending_rid >> IA64_MIN_IMPL_RID_BITS; int rid_block_start = d->arch.starting_rid >> IA64_MIN_IMPL_RID_BITS; - return 1; // KLUDGE ALERT // // not all domains will have allocated RIDs (physical mode loaders for instance) // @@ -250,13 +249,18 @@ newrrv.rid = newrid; newrrv.ve = 1; // VHPT now enabled for region 7!! newrrv.ps = PAGE_SHIFT; - if (rreg == 0) + + if (rreg == 0) { v->arch.metaphysical_saved_rr0 = vmMangleRID(newrrv.rrval); - else if (rreg == 7) + if (!PSCB(v,metaphysical_mode)) + set_rr(rr,newrrv.rrval); + } else if (rreg == 7) { ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info, v->arch.privregs, __get_cpu_var(vhpt_paddr), (unsigned long) pal_vaddr); - else set_rr(rr,newrrv.rrval); + } else { + set_rr(rr,newrrv.rrval); + } #endif return 1; } diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/vcpu.c --- a/xen/arch/ia64/xen/vcpu.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/vcpu.c Mon Mar 20 08:56:54 2006 @@ -5,12 +5,6 @@ * Dan Magenheimer (dan.magenheimer@xxxxxx) * */ - -#if 1 -// TEMPORARY PATCH for match_dtlb uses this, can be removed later -// FIXME SMP -int in_tpa = 0; -#endif #include <linux/sched.h> #include <public/arch-ia64.h> @@ -30,19 +24,18 @@ extern void setreg(unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs); extern void panic_domain(struct pt_regs *, const char *, ...); extern int set_metaphysical_rr0(void); +extern unsigned long translate_domain_pte(UINT64,UINT64,UINT64); +extern unsigned long translate_domain_mpaddr(unsigned long); +extern void ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits); + typedef union { struct ia64_psr ia64_psr; unsigned long i64; } PSR; -//typedef struct pt_regs REGS; -//typedef struct domain VCPU; - // this def for vcpu_regs won't work if kernel stack is present //#define vcpu_regs(vcpu) ((struct pt_regs *) vcpu->arch.regs -#define PSCB(x,y) VCPU(x,y) -#define PSCBX(x,y) x->arch.y #define TRUE 1 #define FALSE 0 @@ -71,18 +64,6 @@ unsigned long phys_translate_count = 0; unsigned long vcpu_verbose = 0; -#define verbose(a...) do {if (vcpu_verbose) printf(a);} while(0) - -//#define vcpu_quick_region_check(_tr_regions,_ifa) 1 -#define vcpu_quick_region_check(_tr_regions,_ifa) \ - (_tr_regions & (1 << ((unsigned long)_ifa >> 61))) -#define vcpu_quick_region_set(_tr_regions,_ifa) \ - do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0) - -// FIXME: also need to check && (!trp->key || vcpu_pkr_match(trp->key)) -#define vcpu_match_tr_entry(_trp,_ifa,_rid) \ - ((_trp->p && (_trp->rid==_rid) && (_ifa >= _trp->vadr) && \ - (_ifa < (_trp->vadr + (1L<< _trp->ps)) - 1))) /************************************************************************** VCPU general register access routines @@ -238,7 +219,6 @@ return IA64_NO_FAULT; } -extern UINT64 vcpu_check_pending_interrupts(VCPU *vcpu); #define SPURIOUS_VECTOR 0xf IA64FAULT vcpu_set_psr_dt(VCPU *vcpu) @@ -659,13 +639,6 @@ } } -void early_tick(VCPU *vcpu) -{ - UINT64 *p = &PSCBX(vcpu,irr[3]); - printf("vcpu_check_pending: about to deliver early tick\n"); - printf("&irr[0]=%p, irr[0]=0x%lx\n",p,*p); -} - #define IA64_TPR_MMI 0x10000 #define IA64_TPR_MIC 0x000f0 @@ -677,7 +650,7 @@ * and this must be checked independently; see vcpu_deliverable interrupts() */ UINT64 vcpu_check_pending_interrupts(VCPU *vcpu) { - UINT64 *p, *q, *r, bits, bitnum, mask, i, vector; + UINT64 *p, *r, bits, bitnum, mask, i, vector; /* Always check pending event, since guest may just ack the * event injection without handle. Later guest may throw out @@ -691,8 +664,8 @@ p = &PSCBX(vcpu,irr[3]); r = &PSCBX(vcpu,insvc[3]); - for (i = 3; ; p--, q--, r--, i--) { - bits = *p /* & *q */; + for (i = 3; ; p--, r--, i--) { + bits = *p ; if (bits) break; // got a potential interrupt if (*r) { // nothing in this word which is pending+inservice @@ -713,7 +686,7 @@ if (vector == (PSCB(vcpu,itv) & 0xff)) { uint64_t now = ia64_get_itc(); if (now < PSCBX(vcpu,domain_itm)) { - printk("Ooops, pending guest timer before its due\n"); +// printk("Ooops, pending guest timer before its due\n"); PSCBX(vcpu,irr[i]) &= ~mask; goto check_start; } @@ -753,12 +726,12 @@ IA64FAULT vcpu_get_lid(VCPU *vcpu, UINT64 *pval) { -//extern unsigned long privop_trace; -//privop_trace=1; - //TODO: Implement this - printf("vcpu_get_lid: WARNING: Getting cr.lid always returns zero\n"); - //*pval = 0; - *pval = ia64_getreg(_IA64_REG_CR_LID); + /* Use real LID for domain0 until vIOSAPIC is present. + Use EID=0, ID=vcpu_id for domU. */ + if (vcpu->domain == dom0) + *pval = ia64_getreg(_IA64_REG_CR_LID); + else + *pval = vcpu->vcpu_id << 24; return IA64_NO_FAULT; } @@ -932,6 +905,7 @@ { if (val & 0xff00) return IA64_RSVDREG_FAULT; PSCB(vcpu,tpr) = val; + /* This can unmask interrupts. */ if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) PSCB(vcpu,pending_interruption) = 1; return (IA64_NO_FAULT); @@ -945,8 +919,8 @@ p = &PSCBX(vcpu,insvc[3]); for (i = 3; (i >= 0) && !(bits = *p); i--, p--); if (i < 0) { - printf("Trying to EOI interrupt when none are in-service.\r\n"); - return; + printf("Trying to EOI interrupt when none are in-service.\n"); + return IA64_NO_FAULT; } bitnum = ia64_fls(bits); vec = bitnum + (i*64); @@ -957,7 +931,7 @@ if (PSCB(vcpu,interrupt_delivery_enabled)) { // but only if enabled... // worry about this later... Linux only calls eoi // with interrupts disabled - printf("Trying to EOI interrupt with interrupts enabled\r\n"); + printf("Trying to EOI interrupt with interrupts enabled\n"); } if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) PSCB(vcpu,pending_interruption) = 1; @@ -1296,7 +1270,7 @@ int warn_region0_address = 0; // FIXME later: tie to a boot parameter? -IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64 *pteval, UINT64 *itir, UINT64 *iha) +IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, BOOLEAN in_tpa, UINT64 *pteval, UINT64 *itir, UINT64 *iha) { unsigned long region = address >> 61; unsigned long pta, pte, rid, rr; @@ -1309,13 +1283,23 @@ // FIXME: This seems to happen even though it shouldn't. Need to track // this down, but since it has been apparently harmless, just flag it for now // panic_domain(vcpu_regs(vcpu), - printk( - "vcpu_translate: bad physical address: 0x%lx\n",address); + + /* + * Guest may execute itc.d and rfi with psr.dt=0 + * When VMM try to fetch opcode, tlb miss may happen, + * At this time PSCB(vcpu,metaphysical_mode)=1, + * region=5,VMM need to handle this tlb miss as if + * PSCB(vcpu,metaphysical_mode)=0 + */ + printk("vcpu_translate: bad physical address: 0x%lx\n", + address); + } else { + *pteval = (address & _PAGE_PPN_MASK) | __DIRTY_BITS | + _PAGE_PL_2 | _PAGE_AR_RWX; + *itir = PAGE_SHIFT << 2; + phys_translate_count++; + return IA64_NO_FAULT; } - *pteval = (address & _PAGE_PPN_MASK) | __DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX; - *itir = PAGE_SHIFT << 2; - phys_translate_count++; - return IA64_NO_FAULT; } else if (!region && warn_region0_address) { REGS *regs = vcpu_regs(vcpu); @@ -1408,9 +1392,7 @@ UINT64 pteval, itir, mask, iha; IA64FAULT fault; - in_tpa = 1; - fault = vcpu_translate(vcpu, vadr, 1, &pteval, &itir, &iha); - in_tpa = 0; + fault = vcpu_translate(vcpu, vadr, TRUE, TRUE, &pteval, &itir, &iha); if (fault == IA64_NO_FAULT) { mask = itir_mask(itir); @@ -1655,8 +1637,11 @@ IA64FAULT vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval) { - UINT val = PSCB(vcpu,rrs)[reg>>61]; - *pval = val; + if(VMX_DOMAIN(vcpu)){ + *pval = VMX(vcpu,vrr[reg>>61]); + }else{ + *pval = PSCB(vcpu,rrs)[reg>>61]; + } return (IA64_NO_FAULT); } @@ -1693,7 +1678,7 @@ VCPU translation register access routines **************************************************************************/ -static void vcpu_purge_tr_entry(TR_ENTRY *trp) +static inline void vcpu_purge_tr_entry(TR_ENTRY *trp) { trp->p = 0; } @@ -1746,8 +1731,6 @@ **************************************************************************/ void foobar(void) { /*vcpu_verbose = 1;*/ } - -extern struct domain *dom0; void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 mp_pte, UINT64 logps) { @@ -1793,7 +1776,6 @@ IA64FAULT vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) { unsigned long pteval, logps = itir_ps(itir); - unsigned long translate_domain_pte(UINT64,UINT64,UINT64); BOOLEAN swap_rr0 = (!(ifa>>61) && PSCB(vcpu,metaphysical_mode)); if (logps < PAGE_SHIFT) { @@ -1813,7 +1795,6 @@ IA64FAULT vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) { unsigned long pteval, logps = itir_ps(itir); - unsigned long translate_domain_pte(UINT64,UINT64,UINT64); BOOLEAN swap_rr0 = (!(ifa>>61) && PSCB(vcpu,metaphysical_mode)); // FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK! @@ -1849,8 +1830,6 @@ // TODO: Only allowed for current vcpu UINT64 mpaddr, paddr; IA64FAULT fault; - unsigned long translate_domain_mpaddr(unsigned long); - IA64FAULT vcpu_tpa(VCPU *, UINT64, UINT64 *); fault = vcpu_tpa(vcpu, vadr, &mpaddr); if (fault == IA64_NO_FAULT) { @@ -1885,7 +1864,6 @@ IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 addr_range) { - extern void ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits); // FIXME: validate not flushing Xen addresses // if (Xen address) return(IA64_ILLOP_FAULT); // FIXME: ??breaks if domain PAGE_SIZE < Xen PAGE_SIZE diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/xenirq.c --- a/xen/arch/ia64/xen/xenirq.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/xenirq.c Mon Mar 20 08:56:54 2006 @@ -68,7 +68,7 @@ * ONLY gets called from ia64_leave_kernel * ONLY call with interrupts enabled */ -void process_soft_irq() +void process_soft_irq(void) { if (!in_interrupt() && local_softirq_pending()) { add_preempt_count(SOFTIRQ_OFFSET); diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/xenmem.c --- a/xen/arch/ia64/xen/xenmem.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/xenmem.c Mon Mar 20 08:56:54 2006 @@ -13,12 +13,11 @@ #include <asm/pgtable.h> #include <xen/mm.h> -extern struct page *zero_page_memmap_ptr; struct page_info *frame_table; unsigned long frame_table_size; unsigned long max_page; -struct page *mem_map; +struct page_info *mem_map; #define MAX_DMA_ADDRESS ~0UL // FIXME??? #ifdef CONFIG_VIRTUAL_MEM_MAP @@ -35,6 +34,8 @@ paging_init (void) { unsigned int mpt_order; + unsigned long i; + /* Create machine to physical mapping table * NOTE: similar to frame table, later we may need virtually * mapped mpt table if large hole exists. Also MAX_ORDER needs @@ -47,10 +48,9 @@ panic("Not enough memory to bootstrap Xen.\n"); printk("machine to physical table: 0x%lx\n", (u64)mpt_table); - memset(mpt_table, INVALID_M2P_ENTRY, mpt_table_size); - /* Other mapping setup */ - - zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); + for (i = 0; i < (1UL << mpt_order); i++) { + mpt_table[i] = INVALID_M2P_ENTRY; + } } /* FIXME: postpone support to machines with big holes between physical memorys. diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/xenmisc.c --- a/xen/arch/ia64/xen/xenmisc.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/xenmisc.c Mon Mar 20 08:56:54 2006 @@ -114,23 +114,6 @@ #endif /////////////////////////////// -// from arch/ia64/page_alloc.c -/////////////////////////////// -DEFINE_PER_CPU(struct page_state, page_states) = {0}; -unsigned long totalram_pages; - -void __mod_page_state(unsigned long offset, unsigned long delta) -{ - unsigned long flags; - void* ptr; - - local_irq_save(flags); - ptr = &__get_cpu_var(page_states); - *(unsigned long*)(ptr + offset) += delta; - local_irq_restore(flags); -} - -/////////////////////////////// // from arch/x86/flushtlb.c /////////////////////////////// @@ -147,12 +130,17 @@ //memset(percpu_info, 0, sizeof(percpu_info)); } -#if 0 -void free_page_type(struct page_info *page, unsigned int type) -{ - dummy(); -} -#endif +void free_page_type(struct page_info *page, u32 type) +{ +// dummy(); + return; +} + +int alloc_page_type(struct page_info *page, u32 type) +{ +// dummy(); + return 1; +} /////////////////////////////// //// misc memory stuff @@ -166,7 +154,7 @@ return (unsigned long)p; } -void __free_pages(struct page *page, unsigned int order) +void __free_pages(struct page_info *page, unsigned int order) { if (order) BUG(); free_xenheap_page(page); @@ -306,9 +294,9 @@ uint64_t pta; local_irq_save(spsr); - if(VMX_DOMAIN(prev)){ - vtm_domain_out(prev); - } +// if(VMX_DOMAIN(prev)){ +// vtm_domain_out(prev); +// } context_switch_count++; switch_to(prev,next,prev); // if(VMX_DOMAIN(current)){ @@ -326,7 +314,7 @@ } if (VMX_DOMAIN(current)){ - vtm_domain_in(current); +// vtm_domain_in(current); vmx_load_all_rr(current); }else{ extern char ia64_ivt; @@ -415,3 +403,203 @@ } else printk("sync_split_caches ignored for CPU with no split cache\n"); } + +/////////////////////////////// +// from arch/x86/mm.c +/////////////////////////////// + +#ifdef VERBOSE +#define MEM_LOG(_f, _a...) \ + printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \ + current->domain->domain_id , __LINE__ , ## _a ) +#else +#define MEM_LOG(_f, _a...) ((void)0) +#endif + +void cleanup_writable_pagetable(struct domain *d) +{ + return; +} + +void put_page_type(struct page_info *page) +{ + u32 nx, x, y = page->u.inuse.type_info; + + again: + do { + x = y; + nx = x - 1; + + ASSERT((x & PGT_count_mask) != 0); + + /* + * The page should always be validated while a reference is held. The + * exception is during domain destruction, when we forcibly invalidate + * page-table pages if we detect a referential loop. + * See domain.c:relinquish_list(). + */ + ASSERT((x & PGT_validated) || + test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags)); + + if ( unlikely((nx & PGT_count_mask) == 0) ) + { + /* Record TLB information for flush later. Races are harmless. */ + page->tlbflush_timestamp = tlbflush_current_time(); + + if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) && + likely(nx & PGT_validated) ) + { + /* + * Page-table pages must be unvalidated when count is zero. The + * 'free' is safe because the refcnt is non-zero and validated + * bit is clear => other ops will spin or fail. + */ + if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, + x & ~PGT_validated)) != x) ) + goto again; + /* We cleared the 'valid bit' so we do the clean up. */ + free_page_type(page, x); + /* Carry on, but with the 'valid bit' now clear. */ + x &= ~PGT_validated; + nx &= ~PGT_validated; + } + } + else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) == + (PGT_pinned | 1)) && + ((nx & PGT_type_mask) != PGT_writable_page)) ) + { + /* Page is now only pinned. Make the back pointer mutable again. */ + nx |= PGT_va_mutable; + } + } + while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); +} + + +int get_page_type(struct page_info *page, u32 type) +{ + u32 nx, x, y = page->u.inuse.type_info; + + again: + do { + x = y; + nx = x + 1; + if ( unlikely((nx & PGT_count_mask) == 0) ) + { + MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page)); + return 0; + } + else if ( unlikely((x & PGT_count_mask) == 0) ) + { + if ( (x & (PGT_type_mask|PGT_va_mask)) != type ) + { + if ( (x & PGT_type_mask) != (type & PGT_type_mask) ) + { + /* + * On type change we check to flush stale TLB + * entries. This may be unnecessary (e.g., page + * was GDT/LDT) but those circumstances should be + * very rare. + */ + cpumask_t mask = + page_get_owner(page)->domain_dirty_cpumask; + tlbflush_filter(mask, page->tlbflush_timestamp); + + if ( unlikely(!cpus_empty(mask)) ) + { + perfc_incrc(need_flush_tlb_flush); + flush_tlb_mask(mask); + } + } + + /* We lose existing type, back pointer, and validity. */ + nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated); + nx |= type; + + /* No special validation needed for writable pages. */ + /* Page tables and GDT/LDT need to be scanned for validity. */ + if ( type == PGT_writable_page ) + nx |= PGT_validated; + } + } + else + { + if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) ) + { + if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) + { + if ( current->domain == page_get_owner(page) ) + { + /* + * This ensures functions like set_gdt() see up-to-date + * type info without needing to clean up writable p.t. + * state on the fast path. + */ + LOCK_BIGLOCK(current->domain); + cleanup_writable_pagetable(current->domain); + y = page->u.inuse.type_info; + UNLOCK_BIGLOCK(current->domain); + /* Can we make progress now? */ + if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) || + ((y & PGT_count_mask) == 0) ) + goto again; + } + if ( ((x & PGT_type_mask) != PGT_l2_page_table) || + ((type & PGT_type_mask) != PGT_l1_page_table) ) + MEM_LOG("Bad type (saw %" PRtype_info + " != exp %" PRtype_info ") " + "for mfn %lx (pfn %lx)", + x, type, page_to_mfn(page), + get_gpfn_from_mfn(page_to_mfn(page))); + return 0; + } + else if ( (x & PGT_va_mask) == PGT_va_mutable ) + { + /* The va backpointer is mutable, hence we update it. */ + nx &= ~PGT_va_mask; + nx |= type; /* we know the actual type is correct */ + } + else if ( ((type & PGT_va_mask) != PGT_va_mutable) && + ((type & PGT_va_mask) != (x & PGT_va_mask)) ) + { +#ifdef CONFIG_X86_PAE + /* We use backptr as extra typing. Cannot be unknown. */ + if ( (type & PGT_type_mask) == PGT_l2_page_table ) + return 0; +#endif + /* This table is possibly mapped at multiple locations. */ + nx &= ~PGT_va_mask; + nx |= PGT_va_unknown; + } + } + if ( unlikely(!(x & PGT_validated)) ) + { + /* Someone else is updating validation of this page. Wait... */ + while ( (y = page->u.inuse.type_info) == x ) + cpu_relax(); + goto again; + } + } + } + while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); + + if ( unlikely(!(nx & PGT_validated)) ) + { + /* Try to validate page type; drop the new reference on failure. */ + if ( unlikely(!alloc_page_type(page, type)) ) + { + MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %" + PRtype_info ": caf=%08x taf=%" PRtype_info, + page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), + type, page->count_info, page->u.inuse.type_info); + /* Noone else can get a reference. We hold the only ref. */ + page->u.inuse.type_info = 0; + return 0; + } + + /* Noone else is updating simultaneously. */ + __set_bit(_PGT_validated, &page->u.inuse.type_info); + } + + return 1; +} diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/xensetup.c --- a/xen/arch/ia64/xen/xensetup.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/xensetup.c Mon Mar 20 08:56:54 2006 @@ -27,6 +27,7 @@ unsigned long xenheap_phys_end; char saved_command_line[COMMAND_LINE_SIZE]; +char dom0_command_line[COMMAND_LINE_SIZE]; struct vcpu *idle_vcpu[NR_CPUS]; @@ -119,11 +120,12 @@ void early_cmdline_parse(char **cmdline_p) { char *guest_cmd; - char *split = "--"; + static const char * const split = "--"; if (*cmdline_p == NULL) { *cmdline_p = &null[0]; saved_command_line[0] = '\0'; + dom0_command_line[0] = '\0'; return; } @@ -138,7 +140,8 @@ while (*guest_cmd == ' ') guest_cmd++; } - strlcpy(saved_command_line, guest_cmd, COMMAND_LINE_SIZE); + strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE); + strlcpy(dom0_command_line, guest_cmd, COMMAND_LINE_SIZE); return; } @@ -155,24 +158,6 @@ .parity = 'n', .stop_bits = 1 }; -/* This is a wrapper function of init_domheap_pages, - * memory exceeds (max_page<<PAGE_SHIFT) will not be reclaimed. - * This function will go away when the virtual memmap/discontig - * memory issues are solved - */ -void init_domheap_pages_wrapper(unsigned long ps, unsigned long pe) -{ - unsigned long s_nrm, e_nrm, max_mem; - max_mem = (max_page+1)<<PAGE_SHIFT; - s_nrm = (ps+PAGE_SIZE-1)&PAGE_MASK; - e_nrm = pe&PAGE_MASK; - s_nrm = min(s_nrm, max_mem); - e_nrm = min(e_nrm, max_mem); - if(s_nrm < e_nrm) - init_domheap_pages(s_nrm, e_nrm); -} - - void start_kernel(void) { @@ -349,7 +334,6 @@ if ( num_online_cpus() >= max_cpus ) break; if ( !cpu_online(i) ) { -printk("About to call __cpu_up(%d)\n",i); __cpu_up(i); } } @@ -395,17 +379,6 @@ /* PIN domain0 on CPU 0. */ dom0->vcpu[0]->cpu_affinity = cpumask_of_cpu(0); - /* The stash space for the initial kernel image can now be freed up. */ - /* init_domheap_pages_wrapper is temporary solution, please refer to the - * descriptor of this function */ - init_domheap_pages_wrapper(ia64_boot_param->domain_start, - ia64_boot_param->domain_start+ia64_boot_param->domain_size); - /* throw away initrd area passed from elilo */ - if (ia64_boot_param->initrd_size) { - init_domheap_pages_wrapper(ia64_boot_param->initrd_start, - ia64_boot_param->initrd_start+ia64_boot_param->initrd_size); - } - if (!running_on_sim) // slow on ski and pages are pre-initialized to zero scrub_heap_pages(); diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/xen/xentime.c --- a/xen/arch/ia64/xen/xentime.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/xen/xentime.c Mon Mar 20 08:56:54 2006 @@ -116,6 +116,12 @@ } #endif #endif + +#if 0 + /* Nobody seems to be able to explain this code. + It seems to be accumulated tricks, which are not required anymore. + Also I have made many tests, I'd like to get confirmation from + other site (TG). */ if (current->domain == dom0) { // FIXME: there's gotta be a better way of doing this... // We have to ensure that domain0 is launched before we @@ -130,6 +136,7 @@ vcpu_wake(dom0->vcpu[0]); } } +#endif if (!is_idle_domain(current->domain)) { if (vcpu_timer_expired(current)) { vcpu_pend_timer(current); @@ -141,7 +148,7 @@ new_itm = local_cpu_data->itm_next; if (!VMX_DOMAIN(current) && !time_after(ia64_get_itc(), new_itm)) - return; + return IRQ_HANDLED; if (VMX_DOMAIN(current)) vcpu_wake(current); diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/Makefile Mon Mar 20 08:56:54 2006 @@ -1,59 +1,61 @@ - include $(BASEDIR)/Rules.mk -OBJS += $(patsubst %.S,%.o,$(wildcard $(TARGET_SUBARCH)/*.S)) -OBJS += $(patsubst %.c,%.o,$(wildcard $(TARGET_SUBARCH)/*.c)) -OBJS += $(patsubst %.c,%.o,$(wildcard acpi/*.c)) -OBJS += $(patsubst %.c,%.o,$(wildcard genapic/*.c)) -OBJS += $(patsubst %.c,%.o,$(wildcard cpu/*.c)) -OBJS += $(patsubst %.c,%.o,$(wildcard cpu/mcheck/*.c)) -OBJS += $(patsubst %.c,%.o,$(wildcard cpu/mtrr/*.c)) -OBJS += $(patsubst %.c,%.o,$(wildcard hvm/*.c)) -OBJS += $(patsubst %.c,%.o,$(wildcard hvm/vmx/*.c)) -OBJS += $(patsubst %.S,%.o,$(wildcard hvm/vmx/$(TARGET_SUBARCH)/*.S)) -OBJS += $(patsubst %.c,%.o,$(wildcard hvm/svm/*.c)) -OBJS += $(patsubst %.S,%.o,$(wildcard hvm/svm/$(TARGET_SUBARCH)/*.S)) +subdirs-y += acpi +subdirs-y += cpu +subdirs-y += genapic +subdirs-y += hvm -ifeq ($(TARGET_SUBARCH),x86_64) -OBJS := $(subst cpu/centaur.o,,$(OBJS)) -OBJS := $(subst cpu/cyrix.o,,$(OBJS)) -OBJS := $(subst cpu/rise.o,,$(OBJS)) -OBJS := $(subst cpu/transmeta.o,,$(OBJS)) +subdirs-$(x86_32) += x86_32 +subdirs-$(x86_64) += x86_64 + +obj-y += apic.o +obj-y += audit.o +obj-y += bitops.o +obj-y += delay.o +obj-y += dmi_scan.o +obj-y += dom0_ops.o +obj-y += domain.o +obj-y += domain_build.o +obj-y += e820.o +obj-y += extable.o +obj-y += flushtlb.o +obj-y += i387.o +obj-y += i8259.o +obj-y += io_apic.o +obj-y += irq.o +obj-y += microcode.o +obj-y += mm.o +obj-y += mpparse.o +obj-y += nmi.o +obj-y += physdev.o +obj-y += rwlock.o +obj-y += setup.o +obj-y += smp.o +obj-y += smpboot.o +obj-y += string.o +obj-y += time.o +obj-y += trampoline.o +obj-y += traps.o +obj-y += usercopy.o +obj-y += x86_emulate.o + +ifneq ($(pae),n) +obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o +else +obj-$(x86_32) += shadow32.o endif -OBJS := $(patsubst shadow%.o,,$(OBJS)) # drop all -ifeq ($(TARGET_SUBARCH),x86_64) - OBJS += shadow.o shadow_public.o shadow_guest32.o shadow_guest32pae.o # x86_64: new code -endif -ifeq ($(TARGET_SUBARCH),x86_32) - ifneq ($(pae),n) - OBJS += shadow.o shadow_public.o shadow_guest32.o # x86_32p: new code - else - OBJS += shadow32.o # x86_32: old code - endif -endif +obj-$(x86_64) += shadow.o shadow_public.o shadow_guest32.o shadow_guest32pae.o -ifneq ($(supervisor_mode_kernel),y) -OBJS := $(subst x86_32/supervisor_mode_kernel.o,,$(OBJS)) -endif +obj-$(crash_debug) += gdbstub.o -OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS)) -OBJS := $(subst $(TARGET_SUBARCH)/xen.lds.o,,$(OBJS)) - -ifneq ($(crash_debug),y) -OBJS := $(patsubst gdbstub%.o,,$(OBJS)) -endif - -default: $(TARGET) +include $(BASEDIR)/Post.mk $(TARGET): $(TARGET)-syms boot/mkelf32 ./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000 \ `$(NM) $(TARGET)-syms | sort | tail -n 1 | sed -e 's/^\([^ ]*\).*/0x\1/'` -$(CURDIR)/arch.o: $(OBJS) - $(LD) $(LDFLAGS) -r -o $@ $(OBJS) - -$(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) xen.lds +$(TARGET)-syms: boot/$(TARGET_SUBARCH).o xen.lds $(LD) $(LDFLAGS) -T xen.lds -N \ boot/$(TARGET_SUBARCH).o $(ALL_OBJS) -o $@ $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S @@ -77,21 +79,5 @@ shadow_guest32.o: shadow.c -clean: - rm -f *.o *.s *~ core boot/*.o boot/*~ boot/core boot/mkelf32 - rm -f x86_32/*.o x86_32/*~ x86_32/core - rm -f x86_64/*.o x86_64/*~ x86_64/core - rm -f mtrr/*.o mtrr/*~ mtrr/core - rm -f acpi/*.o acpi/*~ acpi/core - rm -f genapic/*.o genapic/*~ genapic/core - rm -f cpu/*.o cpu/*~ cpu/core - rm -f hvm/*.o hvm/*~ hvm/core - rm -f hvm/vmx/*.o hvm/vmx/*~ hvm/vmx/core - rm -f hvm/vmx/x86_32/*.o hvm/vmx/x86_32/*~ hvm/vmx/x86_32/core - rm -f hvm/vmx/x86_64/*.o hvm/vmx/x86_64/*~ hvm/vmx/x86_64/core - rm -f hvm/svm/*.o hvm/svm/*~ hvm/svm/core - rm -f hvm/svm/x86_32/*.o hvm/svm/x86_32/*~ hvm/svm/x86_32/core - rm -f hvm/svm/x86_64/*.o hvm/svm/x86_64/*~ hvm/svm/x86_64/core - rm -f xen.lds - -.PHONY: default clean +clean:: FORCE + rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32 diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/Rules.mk --- a/xen/arch/x86/Rules.mk Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/Rules.mk Mon Mar 20 08:56:54 2006 @@ -1,5 +1,7 @@ ######################################## # x86-specific definitions + +HAS_ACPI := y # # If you change any of these configuration options then you must @@ -31,13 +33,17 @@ endif ifeq ($(XEN_TARGET_ARCH),x86_32) -LDFLAGS += -m elf_i386 +LDFLAGS += -m elf_i386 +x86_32 := y +x86_64 := n endif ifeq ($(TARGET_SUBARCH),x86_64) CFLAGS += -mno-red-zone -fpic -fno-reorder-blocks CFLAGS += -fno-asynchronous-unwind-tables LDFLAGS += -m elf_x86_64 +x86_32 := n +x86_64 := y endif # Test for at least GCC v3.2.x. diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/domain.c Mon Mar 20 08:56:54 2006 @@ -51,6 +51,9 @@ } __cacheline_aligned; static struct percpu_ctxt percpu_ctxt[NR_CPUS]; +static void paravirt_ctxt_switch_from(struct vcpu *v); +static void paravirt_ctxt_switch_to(struct vcpu *v); + static void continue_idle_domain(struct vcpu *v) { reset_stack_and_jump(idle_loop); @@ -225,6 +228,9 @@ { v->arch.schedule_tail = continue_nonidle_domain; } + + v->arch.ctxt_switch_from = paravirt_ctxt_switch_from; + v->arch.ctxt_switch_to = paravirt_ctxt_switch_to; v->arch.perdomain_ptes = d->arch.mm_perdomain_pt + (vcpu_id << GDT_LDT_VCPU_SHIFT); @@ -685,21 +691,32 @@ percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask; } -#define switch_kernel_stack(_n,_c) ((void)0) +#define switch_kernel_stack(v) ((void)0) #elif defined(__i386__) #define load_segments(n) ((void)0) #define save_segments(p) ((void)0) -static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu) -{ - struct tss_struct *tss = &init_tss[cpu]; - tss->esp1 = n->arch.guest_context.kernel_sp; - tss->ss1 = n->arch.guest_context.kernel_ss; -} - -#endif +static inline void switch_kernel_stack(struct vcpu *v) +{ + struct tss_struct *tss = &init_tss[smp_processor_id()]; + tss->esp1 = v->arch.guest_context.kernel_sp; + tss->ss1 = v->arch.guest_context.kernel_ss; +} + +#endif /* __i386__ */ + +static void paravirt_ctxt_switch_from(struct vcpu *v) +{ + save_segments(v); +} + +static void paravirt_ctxt_switch_to(struct vcpu *v) +{ + set_int80_direct_trap(v); + switch_kernel_stack(v); +} #define loaddebug(_v,_reg) \ __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg])) @@ -720,15 +737,7 @@ stack_regs, CTXT_SWITCH_STACK_BYTES); unlazy_fpu(p); - if ( !hvm_guest(p) ) - { - save_segments(p); - } - else - { - hvm_save_segments(p); - hvm_load_msrs(); - } + p->arch.ctxt_switch_from(p); } if ( !is_idle_vcpu(n) ) @@ -749,15 +758,7 @@ loaddebug(&n->arch.guest_context, 7); } - if ( !hvm_guest(n) ) - { - set_int80_direct_trap(n); - switch_kernel_stack(n, cpu); - } - else - { - hvm_restore_msrs(n); - } + n->arch.ctxt_switch_to(n); } if ( p->domain != n->domain ) diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/hvm/intercept.c Mon Mar 20 08:56:54 2006 @@ -338,10 +338,10 @@ static __inline__ void missed_ticks(struct hvm_virpit*vpit) { - int missed_ticks; + int missed_ticks; missed_ticks = (NOW() - vpit->scheduled)/(s_time_t) vpit->period; - if ( missed_ticks > 0 ) { + if ( missed_ticks++ >= 0 ) { vpit->pending_intr_nr += missed_ticks; vpit->scheduled += missed_ticks * vpit->period; } @@ -355,22 +355,16 @@ /* pick up missed timer tick */ missed_ticks(vpit); - - vpit->pending_intr_nr++; if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) { - vpit->scheduled += vpit->period; set_timer(&vpit->pit_timer, vpit->scheduled); } } +/* pick up missed timer ticks at deactive time */ void pickup_deactive_ticks(struct hvm_virpit *vpit) { - if ( !active_timer(&(vpit->pit_timer)) ) { - /* pick up missed timer tick */ missed_ticks(vpit); - - vpit->scheduled += vpit->period; set_timer(&vpit->pit_timer, vpit->scheduled); } } diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/hvm/svm/svm.c Mon Mar 20 08:56:54 2006 @@ -200,7 +200,8 @@ return 1; } -void svm_store_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs) +static void svm_store_cpu_guest_regs( + struct vcpu *v, struct cpu_user_regs *regs) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; @@ -227,23 +228,11 @@ #endif } -void svm_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs) +static void svm_load_cpu_guest_regs( + struct vcpu *v, struct cpu_user_regs *regs) { svm_load_cpu_user_regs(v, regs); } - -#ifdef __x86_64__ - -void svm_save_segments(struct vcpu *v) -{ -} -void svm_load_msrs(void) -{ -} -void svm_restore_msrs(struct vcpu *v) -{ -} -#endif #define IS_CANO_ADDRESS(add) 1 @@ -458,12 +447,6 @@ hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs; hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs; - -#ifdef __x86_64__ - hvm_funcs.save_segments = svm_save_segments; - hvm_funcs.load_msrs = svm_load_msrs; - hvm_funcs.restore_msrs = svm_restore_msrs; -#endif hvm_funcs.store_cpu_guest_ctrl_regs = svm_store_cpu_guest_ctrl_regs; hvm_funcs.modify_guest_state = svm_modify_guest_state; @@ -687,9 +670,19 @@ reset_stack_and_jump(svm_asm_do_launch); } +static void svm_ctxt_switch_from(struct vcpu *v) +{ +} + +static void svm_ctxt_switch_to(struct vcpu *v) +{ +} + void svm_final_setup_guest(struct vcpu *v) { - v->arch.schedule_tail = arch_svm_do_launch; + v->arch.schedule_tail = arch_svm_do_launch; + v->arch.ctxt_switch_from = svm_ctxt_switch_from; + v->arch.ctxt_switch_to = svm_ctxt_switch_to; if (v == v->domain->vcpu[0]) { diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/hvm/vmx/io.c Mon Mar 20 08:56:54 2006 @@ -40,20 +40,33 @@ #define BSP_CPU(v) (!(v->vcpu_id)) -void vmx_set_tsc_shift(struct vcpu *v, struct hvm_virpit *vpit) -{ - u64 drift; - - if ( vpit->first_injected ) - drift = vpit->period_cycles * vpit->pending_intr_nr; - else - drift = 0; - vpit->shift = v->arch.hvm_vmx.tsc_offset - drift; - __vmwrite(TSC_OFFSET, vpit->shift); - +static inline +void __set_tsc_offset(u64 offset) +{ + __vmwrite(TSC_OFFSET, offset); #if defined (__i386__) - __vmwrite(TSC_OFFSET_HIGH, ((vpit->shift)>> 32)); + __vmwrite(TSC_OFFSET_HIGH, offset >> 32); #endif +} + +u64 get_guest_time(struct vcpu *v) +{ + struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit); + u64 host_tsc; + + rdtscll(host_tsc); + return host_tsc + vpit->cache_tsc_offset; +} + +void set_guest_time(struct vcpu *v, u64 gtime) +{ + struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit); + u64 host_tsc; + + rdtscll(host_tsc); + + vpit->cache_tsc_offset = gtime - host_tsc; + __set_tsc_offset(vpit->cache_tsc_offset); } static inline void @@ -64,6 +77,7 @@ if ( is_pit_irq(v, vector, type) ) { if ( !vpit->first_injected ) { vpit->pending_intr_nr = 0; + vpit->last_pit_gtime = get_guest_time(v); vpit->scheduled = NOW() + vpit->period; set_timer(&vpit->pit_timer, vpit->scheduled); vpit->first_injected = 1; @@ -71,7 +85,9 @@ vpit->pending_intr_nr--; } vpit->inject_point = NOW(); - vmx_set_tsc_shift (v, vpit); + + vpit->last_pit_gtime += vpit->period; + set_guest_time(v, vpit->last_pit_gtime); } switch(type) @@ -189,14 +205,15 @@ vmx_stts(); + /* pick up the elapsed PIT ticks and re-enable pit_timer */ + if ( vpit->first_injected) { + set_guest_time(v, v->domain->arch.hvm_domain.guest_time); + pickup_deactive_ticks(vpit); + } + if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) || test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) hvm_wait_io(); - - /* pick up the elapsed PIT ticks and re-enable pit_timer */ - if ( vpit->first_injected ) - pickup_deactive_ticks(vpit); - vmx_set_tsc_shift(v, vpit); /* We can't resume the guest if we're waiting on I/O */ ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)); diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Mon Mar 20 08:56:54 2006 @@ -195,7 +195,6 @@ /* Update CR3, GDT, LDT, TR */ unsigned int error = 0; unsigned long cr0, cr4; - u64 host_tsc; if (v->vcpu_id == 0) hvm_setup_platform(v->domain); @@ -250,9 +249,7 @@ v->arch.hvm_vmx.launch_cpu = smp_processor_id(); /* init guest tsc to start from 0 */ - rdtscll(host_tsc); - v->arch.hvm_vmx.tsc_offset = 0 - host_tsc; - vmx_set_tsc_shift(v, &v->domain->arch.hvm_domain.vpit); + set_guest_time(v, 0); } /* diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Mar 20 08:56:54 2006 @@ -50,9 +50,14 @@ static unsigned long trace_values[NR_CPUS][4]; #define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value +static void vmx_ctxt_switch_from(struct vcpu *v); +static void vmx_ctxt_switch_to(struct vcpu *v); + void vmx_final_setup_guest(struct vcpu *v) { - v->arch.schedule_tail = arch_vmx_do_launch; + v->arch.schedule_tail = arch_vmx_do_launch; + v->arch.ctxt_switch_from = vmx_ctxt_switch_from; + v->arch.ctxt_switch_to = vmx_ctxt_switch_to; if ( v->vcpu_id == 0 ) { @@ -105,6 +110,7 @@ } #ifdef __x86_64__ + static struct vmx_msr_state percpu_msr[NR_CPUS]; static u32 msr_data_index[VMX_MSR_COUNT] = @@ -113,7 +119,7 @@ MSR_SYSCALL_MASK, MSR_EFER, }; -void vmx_save_segments(struct vcpu *v) +static void vmx_save_segments(struct vcpu *v) { rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.msr_content.shadow_gs); } @@ -124,7 +130,7 @@ * are not modified once set for generic domains, we don't save them, * but simply reset them to the values set at percpu_traps_init(). */ -void vmx_load_msrs(void) +static void vmx_load_msrs(void) { struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()]; int i; @@ -166,118 +172,143 @@ #define IS_CANO_ADDRESS(add) 1 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs) { - u64 msr_content = 0; - struct vcpu *vc = current; - struct vmx_msr_state * msr = &vc->arch.hvm_vmx.msr_content; - switch(regs->ecx){ + u64 msr_content = 0; + struct vcpu *v = current; + struct vmx_msr_state *msr = &v->arch.hvm_vmx.msr_content; + + switch ( regs->ecx ) { case MSR_EFER: + HVM_DBG_LOG(DBG_LEVEL_2, "EFER msr_content 0x%"PRIx64, msr_content); msr_content = msr->msr_items[VMX_INDEX_MSR_EFER]; - HVM_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %"PRIx64"\n", msr_content); - if (test_bit(VMX_CPU_STATE_LME_ENABLED, - &vc->arch.hvm_vmx.cpu_state)) - msr_content |= 1 << _EFER_LME; - - if (VMX_LONG_GUEST(vc)) - msr_content |= 1 << _EFER_LMA; - break; + + /* the following code may be not needed */ + if ( test_bit(VMX_CPU_STATE_LME_ENABLED, &v->arch.hvm_vmx.cpu_state) ) + msr_content |= EFER_LME; + else + msr_content &= ~EFER_LME; + + if ( VMX_LONG_GUEST(v) ) + msr_content |= EFER_LMA; + else + msr_content &= ~EFER_LMA; + break; + case MSR_FS_BASE: - if (!(VMX_LONG_GUEST(vc))) + if ( !(VMX_LONG_GUEST(v)) ) /* XXX should it be GP fault */ domain_crash_synchronous(); + __vmread(GUEST_FS_BASE, &msr_content); break; + case MSR_GS_BASE: - if (!(VMX_LONG_GUEST(vc))) + if ( !(VMX_LONG_GUEST(v)) ) domain_crash_synchronous(); + __vmread(GUEST_GS_BASE, &msr_content); break; + case MSR_SHADOW_GS_BASE: msr_content = msr->shadow_gs; break; - CASE_READ_MSR(STAR); - CASE_READ_MSR(LSTAR); - CASE_READ_MSR(CSTAR); - CASE_READ_MSR(SYSCALL_MASK); + CASE_READ_MSR(STAR); + CASE_READ_MSR(LSTAR); + CASE_READ_MSR(CSTAR); + CASE_READ_MSR(SYSCALL_MASK); + default: return 0; } - HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n", - msr_content); + + HVM_DBG_LOG(DBG_LEVEL_2, "msr_content: 0x%"PRIx64, msr_content); + regs->eax = msr_content & 0xffffffff; regs->edx = msr_content >> 32; + return 1; } static inline int long_mode_do_msr_write(struct cpu_user_regs *regs) { - u64 msr_content = regs->eax | ((u64)regs->edx << 32); - struct vcpu *vc = current; - struct vmx_msr_state * msr = &vc->arch.hvm_vmx.msr_content; - struct vmx_msr_state * host_state = - &percpu_msr[smp_processor_id()]; - - HVM_DBG_LOG(DBG_LEVEL_1, " mode_do_msr_write msr %lx " - "msr_content %"PRIx64"\n", + u64 msr_content = regs->eax | ((u64)regs->edx << 32); + struct vcpu *v = current; + struct vmx_msr_state *msr = &v->arch.hvm_vmx.msr_content; + struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()]; + + HVM_DBG_LOG(DBG_LEVEL_1, "msr 0x%lx msr_content 0x%"PRIx64"\n", (unsigned long)regs->ecx, msr_content); - switch (regs->ecx){ + switch ( regs->ecx ) { case MSR_EFER: /* offending reserved bit will cause #GP */ - if ( msr_content & - ~( EFER_LME | EFER_LMA | EFER_NX | EFER_SCE ) ) - vmx_inject_exception(vc, TRAP_gp_fault, 0); - - if ((msr_content & EFER_LME) ^ - test_bit(VMX_CPU_STATE_LME_ENABLED, - &vc->arch.hvm_vmx.cpu_state)){ - if ( vmx_paging_enabled(vc) || + if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) ) + { + printk("trying to set reserved bit in EFER\n"); + vmx_inject_exception(v, TRAP_gp_fault, 0); + return 0; + } + + /* LME: 0 -> 1 */ + if ( msr_content & EFER_LME && + !test_bit(VMX_CPU_STATE_LME_ENABLED, &v->arch.hvm_vmx.cpu_state) ) + { + if ( vmx_paging_enabled(v) || !test_bit(VMX_CPU_STATE_PAE_ENABLED, - &vc->arch.hvm_vmx.cpu_state)) { - vmx_inject_exception(vc, TRAP_gp_fault, 0); + &v->arch.hvm_vmx.cpu_state) ) + { + printk("trying to set LME bit when " + "in paging mode or PAE bit is not set\n"); + vmx_inject_exception(v, TRAP_gp_fault, 0); + return 0; } - } - if (msr_content & EFER_LME) - set_bit(VMX_CPU_STATE_LME_ENABLED, - &vc->arch.hvm_vmx.cpu_state); - - msr->msr_items[VMX_INDEX_MSR_EFER] = - msr_content; + + set_bit(VMX_CPU_STATE_LME_ENABLED, &v->arch.hvm_vmx.cpu_state); + } + + msr->msr_items[VMX_INDEX_MSR_EFER] = msr_content; break; case MSR_FS_BASE: case MSR_GS_BASE: - if (!(VMX_LONG_GUEST(vc))) + if ( !(VMX_LONG_GUEST(v)) ) domain_crash_synchronous(); - if (!IS_CANO_ADDRESS(msr_content)){ + + if ( !IS_CANO_ADDRESS(msr_content) ) + { HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n"); - vmx_inject_exception(vc, TRAP_gp_fault, 0); - } - if (regs->ecx == MSR_FS_BASE) + vmx_inject_exception(v, TRAP_gp_fault, 0); + return 0; + } + + if ( regs->ecx == MSR_FS_BASE ) __vmwrite(GUEST_FS_BASE, msr_content); else __vmwrite(GUEST_GS_BASE, msr_content); + break; case MSR_SHADOW_GS_BASE: - if (!(VMX_LONG_GUEST(vc))) + if ( !(VMX_LONG_GUEST(v)) ) domain_crash_synchronous(); - vc->arch.hvm_vmx.msr_content.shadow_gs = msr_content; + + v->arch.hvm_vmx.msr_content.shadow_gs = msr_content; wrmsrl(MSR_SHADOW_GS_BASE, msr_content); break; - CASE_WRITE_MSR(STAR); - CASE_WRITE_MSR(LSTAR); - CASE_WRITE_MSR(CSTAR); - CASE_WRITE_MSR(SYSCALL_MASK); + CASE_WRITE_MSR(STAR); + CASE_WRITE_MSR(LSTAR); + CASE_WRITE_MSR(CSTAR); + CASE_WRITE_MSR(SYSCALL_MASK); + default: return 0; } + return 1; } -void -vmx_restore_msrs(struct vcpu *v) +static void vmx_restore_msrs(struct vcpu *v) { int i = 0; struct vmx_msr_state *guest_state; @@ -297,22 +328,52 @@ HVM_DBG_LOG(DBG_LEVEL_2, "restore guest's index %d msr %lx with %lx\n", - i, (unsigned long) msr_data_index[i], (unsigned long) guest_state->msr_items[i]); + i, (unsigned long)msr_data_index[i], + (unsigned long)guest_state->msr_items[i]); set_bit(i, &host_state->flags); wrmsrl(msr_data_index[i], guest_state->msr_items[i]); clear_bit(i, &guest_flags); } } #else /* __i386__ */ -#define vmx_save_init_msrs() ((void)0) - -static inline int long_mode_do_msr_read(struct cpu_user_regs *regs){ + +#define vmx_save_segments(v) ((void)0) +#define vmx_load_msrs() ((void)0) +#define vmx_restore_msrs(v) ((void)0) +#define vmx_save_init_msrs() ((void)0) + +static inline int long_mode_do_msr_read(struct cpu_user_regs *regs) +{ return 0; } -static inline int long_mode_do_msr_write(struct cpu_user_regs *regs){ + +static inline int long_mode_do_msr_write(struct cpu_user_regs *regs) +{ return 0; } -#endif + +#endif /* __i386__ */ + +static void vmx_freeze_time(struct vcpu *v) +{ + struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit; + + v->domain->arch.hvm_domain.guest_time = get_guest_time(v); + if ( vpit->first_injected ) + stop_timer(&(vpit->pit_timer)); +} + +static void vmx_ctxt_switch_from(struct vcpu *v) +{ + vmx_freeze_time(v); + vmx_save_segments(v); + vmx_load_msrs(); +} + +static void vmx_ctxt_switch_to(struct vcpu *v) +{ + vmx_restore_msrs(v); +} void stop_vmx(void) { @@ -554,12 +615,6 @@ hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs; hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs; -#ifdef __x86_64__ - hvm_funcs.save_segments = vmx_save_segments; - hvm_funcs.load_msrs = vmx_load_msrs; - hvm_funcs.restore_msrs = vmx_restore_msrs; -#endif - hvm_funcs.store_cpu_guest_ctrl_regs = vmx_store_cpu_guest_ctrl_regs; hvm_funcs.modify_guest_state = vmx_modify_guest_state; @@ -670,27 +725,31 @@ /* Reserved bits: [31:15], [12:11], [9], [6], [2:1] */ #define VMX_VCPU_CPUID_L1_RESERVED 0xffff9a46 -static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs *regs) -{ +static void vmx_vmexit_do_cpuid(struct cpu_user_regs *regs) +{ + unsigned int input = (unsigned int)regs->eax; + unsigned int count = (unsigned int)regs->ecx; unsigned int eax, ebx, ecx, edx; unsigned long eip; struct vcpu *v = current; __vmread(GUEST_RIP, &eip); - HVM_DBG_LOG(DBG_LEVEL_1, - "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx," - " (esi) %lx, (edi) %lx", + HVM_DBG_LOG(DBG_LEVEL_3, "(eax) 0x%08lx, (ebx) 0x%08lx, " + "(ecx) 0x%08lx, (edx) 0x%08lx, (esi) 0x%08lx, (edi) 0x%08lx", (unsigned long)regs->eax, (unsigned long)regs->ebx, (unsigned long)regs->ecx, (unsigned long)regs->edx, (unsigned long)regs->esi, (unsigned long)regs->edi); - cpuid(input, &eax, &ebx, &ecx, &edx); + if ( input == 4 ) + cpuid_count(input, count, &eax, &ebx, &ecx, &edx); + else + cpuid(input, &eax, &ebx, &ecx, &edx); if ( input == 1 ) { if ( hvm_apic_support(v->domain) && - !vlapic_global_enabled((VLAPIC(v))) ) + !vlapic_global_enabled((VLAPIC(v))) ) clear_bit(X86_FEATURE_APIC, &edx); #if CONFIG_PAGING_LEVELS < 3 @@ -725,10 +784,12 @@ regs->ecx = (unsigned long) ecx; regs->edx = (unsigned long) edx; - HVM_DBG_LOG(DBG_LEVEL_1, - "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x", - eip, input, eax, ebx, ecx, edx); - + HVM_DBG_LOG(DBG_LEVEL_3, "eip@%lx, input: 0x%lx, " + "output: eax = 0x%08lx, ebx = 0x%08lx, " + "ecx = 0x%08lx, edx = 0x%08lx", + (unsigned long)eip, (unsigned long)input, + (unsigned long)eax, (unsigned long)ebx, + (unsigned long)ecx, (unsigned long)edx); } #define CASE_GET_REG_P(REG, reg) \ @@ -1656,7 +1717,7 @@ rdtscll(msr_content); vpit = &(v->domain->arch.hvm_domain.vpit); - msr_content += vpit->shift; + msr_content += vpit->cache_tsc_offset; break; } case MSR_IA32_SYSENTER_CS: @@ -1700,22 +1761,8 @@ switch (regs->ecx) { case MSR_IA32_TIME_STAMP_COUNTER: - { - struct hvm_virpit *vpit; - u64 host_tsc, drift; - - rdtscll(host_tsc); - vpit = &(v->domain->arch.hvm_domain.vpit); - drift = v->arch.hvm_vmx.tsc_offset - vpit->shift; - vpit->shift = msr_content - host_tsc; - v->arch.hvm_vmx.tsc_offset = vpit->shift + drift; - __vmwrite(TSC_OFFSET, vpit->shift); - -#if defined (__i386__) - __vmwrite(TSC_OFFSET_HIGH, ((vpit->shift)>>32)); -#endif - break; - } + set_guest_time(v, msr_content); + break; case MSR_IA32_SYSENTER_CS: __vmwrite(GUEST_SYSENTER_CS, msr_content); break; @@ -2014,8 +2061,8 @@ __hvm_bug(®s); break; case EXIT_REASON_CPUID: + vmx_vmexit_do_cpuid(®s); __get_instruction_length(inst_len); - vmx_vmexit_do_cpuid(regs.eax, ®s); __update_guest_eip(inst_len); break; case EXIT_REASON_HLT: diff -r 516cf6553011 -r cf89e8f0831b xen/common/Makefile --- a/xen/common/Makefile Mon Mar 20 08:56:46 2006 +++ b/xen/common/Makefile Mon Mar 20 08:56:54 2006 @@ -1,19 +1,34 @@ - include $(BASEDIR)/Rules.mk -ifneq ($(perfc),y) -OBJS := $(subst perfc.o,,$(OBJS)) -endif -ifneq ($(crash_debug),y) -OBJS := $(patsubst gdbstub.o,,$(OBJS)) -endif +obj-y += acm_ops.o +obj-y += bitmap.o +obj-y += dom0_ops.o +obj-y += domain.o +obj-y += elf.o +obj-y += event_channel.o +obj-y += grant_table.o +obj-y += kernel.o +obj-y += keyhandler.o +obj-y += lib.o +obj-y += memory.o +obj-y += multicall.o +obj-y += page_alloc.o +obj-y += rangeset.o +obj-y += sched_bvt.o +obj-y += sched_sedf.o +obj-y += schedule.o +obj-y += softirq.o +obj-y += string.o +obj-y += symbols.o +obj-y += trace.o +obj-y += timer.o +obj-y += vsprintf.o +obj-y += xmalloc.o -default: common.o -common.o: $(OBJS) - $(LD) $(LDFLAGS) -r -o common.o $(OBJS) +obj-$(perfc) += perfc.o +obj-$(crash_debug) += gdbstub.o -clean: - rm -f *.o *~ core +include $(BASEDIR)/Post.mk # Object file contains changeset and compiler information. kernel.o: $(BASEDIR)/include/xen/compile.h diff -r 516cf6553011 -r cf89e8f0831b xen/drivers/Makefile --- a/xen/drivers/Makefile Mon Mar 20 08:56:46 2006 +++ b/xen/drivers/Makefile Mon Mar 20 08:56:54 2006 @@ -1,8 +1,6 @@ +include $(BASEDIR)/Rules.mk -default: - $(MAKE) -C char - $(MAKE) -C acpi +subdirs-y := char/ +subdirs-$(HAS_ACPI) += acpi/ -clean: - $(MAKE) -C char clean - $(MAKE) -C acpi clean +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/drivers/acpi/Makefile --- a/xen/drivers/acpi/Makefile Mon Mar 20 08:56:46 2006 +++ b/xen/drivers/acpi/Makefile Mon Mar 20 08:56:54 2006 @@ -1,11 +1,5 @@ - include $(BASEDIR)/Rules.mk -OBJS := tables.o +obj-y += tables.o -default: driver.o -driver.o: $(OBJS) - $(LD) $(LDFLAGS) -r -o driver.o $(OBJS) - -clean: - rm -f *.o *~ core +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/drivers/char/Makefile --- a/xen/drivers/char/Makefile Mon Mar 20 08:56:46 2006 +++ b/xen/drivers/char/Makefile Mon Mar 20 08:56:54 2006 @@ -1,12 +1,10 @@ - include $(BASEDIR)/Rules.mk -default: driver.o -driver.o: $(OBJS) - $(LD) $(LDFLAGS) -r -o driver.o $(OBJS) +obj-y += console.o +obj-y += ns16550.o +obj-y += serial.o -clean: - rm -f *.o *~ core +include $(BASEDIR)/Post.mk # Object file contains changeset and compiler information. console.o: $(BASEDIR)/include/xen/compile.h diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/config.h --- a/xen/include/asm-ia64/config.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/config.h Mon Mar 20 08:56:54 2006 @@ -74,7 +74,7 @@ extern unsigned long dom0_size; // from linux/include/linux/mm.h -extern struct page *mem_map; +extern struct page_info *mem_map; // xen/include/asm/config.h extern char _end[]; /* standard ELF symbol */ @@ -134,9 +134,6 @@ #define smp_num_siblings 1 #endif -// from linux/include/linux/mm.h -struct page; - // function calls; see decl in xen/include/xen/sched.h #undef free_task_struct #undef alloc_task_struct @@ -206,8 +203,6 @@ #define _atomic_read(v) ((v).counter) #define atomic_compareandswap(old, new, v) ((atomic_t){ cmpxchg(v, _atomic_read(old), _atomic_read(new)) }) -// see include/asm-ia64/mm.h, handle remaining page_info uses until gone -#define page_info page // Deprivated linux inf and put here for short time compatibility #define kmalloc(s, t) xmalloc_bytes((s)) #define kfree(s) xfree((s)) @@ -249,7 +244,7 @@ extern char saved_command_line[]; struct screen_info { }; #define seq_printf(a,b...) printf(b) -#define CONFIG_BLK_DEV_INITRD // needed to reserve memory for domain0 +//#define CONFIG_BLK_DEV_INITRD // needed to reserve memory for domain0 void dummy_called(char *function); #define dummy() dummy_called((char *) __FUNCTION__) diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/dom_fw.h --- a/xen/include/asm-ia64/dom_fw.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/dom_fw.h Mon Mar 20 08:56:54 2006 @@ -119,7 +119,7 @@ #define FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT_INDEX) #define FW_HYPERCALL_EFI_RESET_SYSTEM_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_EFI_RESET_SYSTEM_INDEX) -extern struct ia64_pal_retval xen_pal_emulator(UINT64,UINT64,UINT64,UINT64); +extern struct ia64_pal_retval xen_pal_emulator(UINT64, u64, u64, u64); extern struct sal_ret_values sal_emulator (long index, unsigned long in1, unsigned long in2, unsigned long in3, unsigned long in4, unsigned long in5, unsigned long in6, unsigned long in7); extern struct ia64_pal_retval pal_emulator_static (unsigned long); diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/domain.h Mon Mar 20 08:56:54 2006 @@ -14,7 +14,6 @@ extern void domain_relinquish_resources(struct domain *); struct arch_domain { - struct mm_struct *active_mm; struct mm_struct *mm; unsigned long metaphysical_rr0; unsigned long metaphysical_rr4; @@ -68,7 +67,6 @@ int breakimm; // from arch_domain (so is pinned) int starting_rid; /* first RID assigned to domain */ int ending_rid; /* one beyond highest RID assigned to domain */ - struct mm_struct *active_mm; struct thread_struct _thread; // this must be last thash_cb_t *vtlb; @@ -81,7 +79,6 @@ struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */ }; -#define active_mm arch.active_mm //#define thread arch._thread // FOLLOWING FROM linux-2.6.7/include/sched.h @@ -101,6 +98,8 @@ struct rw_semaphore mmap_sem; #endif spinlock_t page_table_lock; /* Protects task page tables and mm->rss */ + + struct list_head pt_list; /* List of pagetable */ struct list_head mmlist; /* List of all active mm's. These are globally strung * together off init_mm.mmlist, and are protected diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/flushtlb.h --- a/xen/include/asm-ia64/flushtlb.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/flushtlb.h Mon Mar 20 08:56:54 2006 @@ -1,5 +1,7 @@ #ifndef __FLUSHTLB_H__ #define __FLUSHTLB_H__ + +#include <asm/tlbflush.h> /* The current time as shown by the virtual TLB clock. */ extern u32 tlbflush_clock; diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux-xen/asm/page.h --- a/xen/include/asm-ia64/linux-xen/asm/page.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/linux-xen/asm/page.h Mon Mar 20 08:56:54 2006 @@ -75,7 +75,7 @@ flush_dcache_page(page); \ } while (0) - +#ifndef XEN #define alloc_zeroed_user_highpage(vma, vaddr) \ ({ \ struct page *page = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr); \ @@ -83,6 +83,7 @@ flush_dcache_page(page); \ page; \ }) +#endif #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux-xen/asm/pgalloc.h --- a/xen/include/asm-ia64/linux-xen/asm/pgalloc.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/linux-xen/asm/pgalloc.h Mon Mar 20 08:56:54 2006 @@ -106,11 +106,13 @@ #define __pmd_free_tlb(tlb, pmd) pmd_free(pmd) +#ifndef XEN static inline void pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, struct page *pte) { pmd_val(*pmd_entry) = page_to_maddr(pte); } +#endif static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte) @@ -118,11 +120,13 @@ pmd_val(*pmd_entry) = __pa(pte); } +#ifndef XEN static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr) { return virt_to_page(pgtable_quicklist_alloc()); } +#endif static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) @@ -130,6 +134,7 @@ return pgtable_quicklist_alloc(); } +#ifndef XEN static inline void pte_free(struct page *pte) { pgtable_quicklist_free(page_address(pte)); @@ -141,6 +146,7 @@ } #define __pte_free_tlb(tlb, pte) pte_free(pte) +#endif extern void check_pgt_cache(void); diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux-xen/asm/pgtable.h --- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h Mon Mar 20 08:56:54 2006 @@ -467,8 +467,10 @@ * for zero-mapped memory areas etc.. */ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; +#ifndef XEN extern struct page *zero_page_memmap_ptr; #define ZERO_PAGE(vaddr) (zero_page_memmap_ptr) +#endif /* We provide our own get_unmapped_area to cope with VA holes for userland */ #define HAVE_ARCH_UNMAPPED_AREA diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux-xen/asm/system.h --- a/xen/include/asm-ia64/linux-xen/asm/system.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/linux-xen/asm/system.h Mon Mar 20 08:56:54 2006 @@ -290,6 +290,9 @@ #ifdef XEN #include <asm/xensystem.h> +#ifndef __ASSEMBLY__ +struct resource; +#endif #endif #endif /* _ASM_IA64_SYSTEM_H */ diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux-xen/asm/tlbflush.h --- a/xen/include/asm-ia64/linux-xen/asm/tlbflush.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/linux-xen/asm/tlbflush.h Mon Mar 20 08:56:54 2006 @@ -80,7 +80,11 @@ #ifdef CONFIG_SMP flush_tlb_range(vma, (addr & PAGE_MASK), (addr & PAGE_MASK) + PAGE_SIZE); #else +#ifdef XEN + if (vma->vm_mm == current->domain->arch.mm) +#else if (vma->vm_mm == current->active_mm) +#endif ia64_ptcl(addr, (PAGE_SHIFT << 2)); #ifndef XEN // FIXME SMP? diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux-xen/linux/gfp.h --- a/xen/include/asm-ia64/linux-xen/linux/gfp.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/linux-xen/linux/gfp.h Mon Mar 20 08:56:54 2006 @@ -3,6 +3,7 @@ #ifdef XEN #include <asm/bitops.h> +#include <linux/topology.h> #endif #include <linux/mmzone.h> #include <linux/stddef.h> @@ -81,6 +82,7 @@ * optimized to &contig_page_data at compile-time. */ +#ifndef XEN #ifndef HAVE_ARCH_FREE_PAGE static inline void arch_free_page(struct page *page, int order) { } #endif @@ -134,6 +136,7 @@ #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr),0) +#endif /* XEN */ void page_alloc_init(void); #ifdef CONFIG_NUMA diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux/README.origin --- a/xen/include/asm-ia64/linux/README.origin Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/linux/README.origin Mon Mar 20 08:56:54 2006 @@ -14,13 +14,10 @@ jiffies.h -> linux/include/linux/jiffies.h kmalloc_sizes.h -> linux/include/linux/kmalloc_sizes.h linkage.h -> linux/include/linux/linkage.h -mmzone.h -> linux/include/linux/mmzone.h notifier.h -> linux/include/linux/notifier.h numa.h -> linux/include/linux/numa.h -page-flags.h -> linux/include/linux/page-flags.h percpu.h -> linux/include/linux/percpu.h preempt.h -> linux/include/linux/preempt.h -rbtree.h -> linux/include/linux/rbtree.h rwsem.h -> linux/include/linux/rwsem.h seqlock.h -> linux/include/linux/seqlock.h sort.h -> linux/include/linux/sort.h diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux/asm-generic/README.origin --- a/xen/include/asm-ia64/linux/asm-generic/README.origin Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/linux/asm-generic/README.origin Mon Mar 20 08:56:54 2006 @@ -10,7 +10,6 @@ errno.h -> linux/include/asm-generic/errno.h ide_iops.h -> linux/include/asm-generic/ide_iops.h iomap.h -> linux/include/asm-generic/iomap.h -pci-dma-compat.h -> linux/include/asm-generic/pci-dma-compat.h pci.h -> linux/include/asm-generic/pci.h pgtable.h -> linux/include/asm-generic/pgtable.h pgtable-nopud.h -> linux/include/asm-generic/pgtable-nopud.h diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux/asm/README.origin --- a/xen/include/asm-ia64/linux/asm/README.origin Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/linux/asm/README.origin Mon Mar 20 08:56:54 2006 @@ -42,7 +42,6 @@ rse.h -> linux/include/asm-ia64/rse.h rwsem.h -> linux/include/asm-ia64/rwsem.h sal.h -> linux/include/asm-ia64/sal.h -scatterlist.h -> linux/include/asm-ia64/scatterlist.h sections.h -> linux/include/asm-ia64/sections.h semaphore.h -> linux/include/asm-ia64/semaphore.h setup.h -> linux/include/asm-ia64/setup.h diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux/asm/hw_irq.h --- a/xen/include/asm-ia64/linux/asm/hw_irq.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/linux/asm/hw_irq.h Mon Mar 20 08:56:54 2006 @@ -85,6 +85,7 @@ extern void free_irq_vector (int vector); extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect); extern void register_percpu_irq (ia64_vector vec, struct irqaction *action); +extern int xen_do_IRQ(ia64_vector vector); static inline void hw_resend_irq (struct hw_interrupt_type *h, unsigned int vector) diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux/asm/irq.h --- a/xen/include/asm-ia64/linux/asm/irq.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/linux/asm/irq.h Mon Mar 20 08:56:54 2006 @@ -40,4 +40,6 @@ struct pt_regs; int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *); +extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs); + #endif /* _ASM_IA64_IRQ_H */ diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/mm.h --- a/xen/include/asm-ia64/mm.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/mm.h Mon Mar 20 08:56:54 2006 @@ -9,8 +9,6 @@ #include <xen/spinlock.h> #include <xen/perfc.h> #include <xen/sched.h> - -#include <linux/rbtree.h> #include <asm/processor.h> #include <asm/atomic.h> @@ -36,37 +34,38 @@ #define PRtype_info "08x" -struct page +struct page_info { /* Each frame can be threaded onto a doubly-linked list. */ struct list_head list; + /* Reference count and various PGC_xxx flags and fields. */ + u32 count_info; + + /* Context-dependent fields follow... */ + union { + + /* Page is in use: ((count_info & PGC_count_mask) != 0). */ + struct { + /* Owner of this page (NULL if page is anonymous). */ + u32 _domain; /* pickled format */ + /* Type reference count and various PGT_xxx flags and fields. */ + unsigned long type_info; + } __attribute__ ((packed)) inuse; + + /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ + struct { + /* Order-size of the free chunk this page is the head of. */ + u32 order; + /* Mask of possibly-tainted TLBs. */ + cpumask_t cpumask; + } __attribute__ ((packed)) free; + + } u; + /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */ u32 tlbflush_timestamp; - /* Reference count and various PGC_xxx flags and fields. */ - u32 count_info; - - /* Context-dependent fields follow... */ - union { - - /* Page is in use by a domain. */ - struct { - /* Owner of this page. */ - u32 _domain; - /* Type reference count and various PGT_xxx flags and fields. */ - u32 type_info; - } inuse; - - /* Page is on a free list. */ - struct { - /* Mask of possibly-tainted TLBs. */ - cpumask_t cpumask; - /* Order-size of the free chunk this page is the head of. */ - u8 order; - } free; - - } u; #if 0 // following added for Linux compiling page_flags_t flags; @@ -77,34 +76,46 @@ #define set_page_count(p,v) atomic_set(&(p)->_count, v - 1) -/* Still small set of flags defined by far on IA-64 */ +/* + * Still small set of flags defined by far on IA-64. + * IA-64 should make it a definition same as x86_64. + */ /* The following page types are MUTUALLY EXCLUSIVE. */ #define PGT_none (0<<29) /* no special uses of this page */ #define PGT_l1_page_table (1<<29) /* using this page as an L1 page table? */ #define PGT_l2_page_table (2<<29) /* using this page as an L2 page table? */ #define PGT_l3_page_table (3<<29) /* using this page as an L3 page table? */ #define PGT_l4_page_table (4<<29) /* using this page as an L4 page table? */ -#define PGT_writable_page (5<<29) /* has writable mappings of this page? */ -#define PGT_type_mask (5<<29) /* Bits 29-31. */ + /* Value 5 reserved. See asm-x86/mm.h */ + /* Value 6 reserved. See asm-x86/mm.h */ +#define PGT_writable_page (7<<29) /* has writable mappings of this page? */ +#define PGT_type_mask (7<<29) /* Bits 29-31. */ /* Has this page been validated for use as its current type? */ #define _PGT_validated 28 #define PGT_validated (1<<_PGT_validated) -/* Owning guest has pinned this page to its current type? */ + /* Owning guest has pinned this page to its current type? */ #define _PGT_pinned 27 #define PGT_pinned (1U<<_PGT_pinned) -/* 27-bit count of uses of this frame as its current type. */ -#define PGT_count_mask ((1U<<27)-1) - -/* Cleared when the owning guest 'frees' this page. */ + /* The 27 most significant bits of virt address if this is a page table. */ +#define PGT_va_shift 32 +#define PGT_va_mask ((unsigned long)((1U<<28)-1)<<PGT_va_shift) + /* Is the back pointer still mutable (i.e. not fixed yet)? */ +#define PGT_va_mutable ((unsigned long)((1U<<28)-1)<<PGT_va_shift) + /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */ +#define PGT_va_unknown ((unsigned long)((1U<<28)-2)<<PGT_va_shift) + + /* 16-bit count of uses of this frame as its current type. */ +#define PGT_count_mask ((1U<<16)-1) + + /* Cleared when the owning guest 'frees' this page. */ #define _PGC_allocated 31 #define PGC_allocated (1U<<_PGC_allocated) -/* Set when the page is used as a page table */ -#define _PGC_page_table 30 -#define PGC_page_table (1U<<_PGC_page_table) -/* 30-bit count of references to this frame. */ -#define PGC_count_mask ((1U<<30)-1) + /* Bit 30 reserved. See asm-x86/mm.h */ + /* Bit 29 reserved. See asm-x86/mm.h */ + /* 29-bit count of references to this frame. */ +#define PGC_count_mask ((1U<<29)-1) #define IS_XEN_HEAP_FRAME(_pfn) ((page_to_maddr(_pfn) < xenheap_phys_end) \ && (page_to_maddr(_pfn) >= xen_pstart)) @@ -139,7 +150,6 @@ static inline void put_page(struct page_info *page) { -#ifdef VALIDATE_VT // doesn't work with non-VTI in grant tables yet u32 nx, x, y = page->count_info; do { @@ -150,14 +160,12 @@ if (unlikely((nx & PGC_count_mask) == 0)) free_domheap_page(page); -#endif } /* count_info and ownership are checked atomically. */ static inline int get_page(struct page_info *page, struct domain *domain) { -#ifdef VALIDATE_VT u64 x, nx, y = *((u64*)&page->count_info); u32 _domain = pickle_domptr(domain); @@ -173,14 +181,13 @@ return 0; } } - while(unlikely(y = cmpxchg(&page->count_info, x, nx)) != x); -#endif + while(unlikely((y = cmpxchg((u64*)&page->count_info, x, nx)) != x)); return 1; } -/* No type info now */ -#define put_page_type(page) -#define get_page_type(page, type) 1 +extern void put_page_type(struct page_info *page); +extern int get_page_type(struct page_info *page, u32 type); + static inline void put_page_and_type(struct page_info *page) { put_page_type(page); @@ -219,7 +226,7 @@ // prototype of misc memory stuff //unsigned long __get_free_pages(unsigned int mask, unsigned int order); -//void __free_pages(struct page *page, unsigned int order); +//void __free_pages(struct page_info *page, unsigned int order); void *pgtable_quicklist_alloc(void); void pgtable_quicklist_free(void *pgtable_entry); @@ -339,11 +346,11 @@ #define NODEZONE_SHIFT (sizeof(page_flags_t)*8 - MAX_NODES_SHIFT - MAX_ZONES_SHIFT) #define NODEZONE(node, zone) ((node << ZONES_SHIFT) | zone) -static inline unsigned long page_zonenum(struct page *page) +static inline unsigned long page_zonenum(struct page_info *page) { return (page->flags >> NODEZONE_SHIFT) & (~(~0UL << ZONES_SHIFT)); } -static inline unsigned long page_to_nid(struct page *page) +static inline unsigned long page_to_nid(struct page_info *page) { return (page->flags >> (NODEZONE_SHIFT + ZONES_SHIFT)); } @@ -351,12 +358,12 @@ struct zone; extern struct zone *zone_table[]; -static inline struct zone *page_zone(struct page *page) +static inline struct zone *page_zone(struct page_info *page) { return zone_table[page->flags >> NODEZONE_SHIFT]; } -static inline void set_page_zone(struct page *page, unsigned long nodezone_num) +static inline void set_page_zone(struct page_info *page, unsigned long nodezone_num) { page->flags &= ~(~0UL << NODEZONE_SHIFT); page->flags |= nodezone_num << NODEZONE_SHIFT; @@ -367,7 +374,7 @@ extern unsigned long max_mapnr; #endif -static inline void *lowmem_page_address(struct page *page) +static inline void *lowmem_page_address(struct page_info *page) { return __va(page_to_mfn(page) << PAGE_SHIFT); } @@ -386,8 +393,8 @@ #endif #if defined(HASHED_PAGE_VIRTUAL) -void *page_address(struct page *page); -void set_page_address(struct page *page, void *virtual); +void *page_address(struct page_info *page); +void set_page_address(struct page_info *page, void *virtual); void page_address_init(void); #endif @@ -400,7 +407,7 @@ #ifndef CONFIG_DEBUG_PAGEALLOC static inline void -kernel_map_pages(struct page *page, int numpages, int enable) +kernel_map_pages(struct page_info *page, int numpages, int enable) { } #endif @@ -415,8 +422,8 @@ #undef machine_to_phys_mapping #define machine_to_phys_mapping mpt_table -#define INVALID_M2P_ENTRY (~0U) -#define VALID_M2P(_e) (!((_e) & (1U<<63))) +#define INVALID_M2P_ENTRY (~0UL) +#define VALID_M2P(_e) (!((_e) & (1UL<<63))) #define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e)) #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn)) @@ -463,4 +470,6 @@ /* Arch-specific portion of memory_op hypercall. */ #define arch_memory_op(op, arg) (-ENOSYS) +extern void assign_domain_page(struct domain *d, unsigned long mpaddr, + unsigned long physaddr); #endif /* __ASM_IA64_MM_H__ */ diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/regionreg.h --- a/xen/include/asm-ia64/regionreg.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/regionreg.h Mon Mar 20 08:56:54 2006 @@ -64,4 +64,13 @@ // since vmMangleRID is symmetric, use it for unmangling also #define vmUnmangleRID(x) vmMangleRID(x) +extern unsigned long allocate_metaphysical_rr(void); + +struct domain; +extern int allocate_rid_range(struct domain *d, unsigned long ridbits); +extern int deallocate_rid_range(struct domain *d); + +struct vcpu; +extern void init_all_rr(struct vcpu *v); + #endif /* !_REGIONREG_H_ */ diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/vcpu.h --- a/xen/include/asm-ia64/vcpu.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/vcpu.h Mon Mar 20 08:56:54 2006 @@ -7,7 +7,6 @@ //#include "thread.h" #include <asm/ia64_int.h> #include <public/arch-ia64.h> - typedef unsigned long UINT64; typedef unsigned int UINT; typedef int BOOLEAN; @@ -16,7 +15,10 @@ typedef cpu_user_regs_t REGS; -#define VCPU(_v,_x) _v->arch.privregs->_x + +#define VCPU(_v,_x) (_v->arch.privregs->_x) +#define PSCB(_v,_x) VCPU(_v,_x) +#define PSCBX(_v,_x) (_v->arch._x) #define PRIVOP_ADDR_COUNT #ifdef PRIVOP_ADDR_COUNT @@ -140,7 +142,9 @@ extern IA64FAULT vcpu_ptc_ga(VCPU *vcpu, UINT64 vadr, UINT64 addr_range); extern IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 vadr, UINT64 addr_range); extern IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr, UINT64 addr_range); -extern IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64 *pteval, UINT64 *itir, UINT64 *iha); +extern IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, + BOOLEAN is_data, BOOLEAN in_tpa, + UINT64 *pteval, UINT64 *itir, UINT64 *iha); extern IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr); extern IA64FAULT vcpu_force_data_miss(VCPU *vcpu, UINT64 ifa); extern IA64FAULT vcpu_fc(VCPU *vcpu, UINT64 vadr); @@ -173,4 +177,18 @@ return (~((1UL << itir_ps(itir)) - 1)); } +#define verbose(a...) do {if (vcpu_verbose) printf(a);} while(0) + +//#define vcpu_quick_region_check(_tr_regions,_ifa) 1 +#define vcpu_quick_region_check(_tr_regions,_ifa) \ + (_tr_regions & (1 << ((unsigned long)_ifa >> 61))) +#define vcpu_quick_region_set(_tr_regions,_ifa) \ + do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0) + +// FIXME: also need to check && (!trp->key || vcpu_pkr_match(trp->key)) +#define vcpu_match_tr_entry(_trp,_ifa,_rid) \ + ((_trp->p && (_trp->rid==_rid) && (_ifa >= _trp->vadr) && \ + (_ifa < (_trp->vadr + (1L<< _trp->ps)) - 1))) + + #endif diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/vmmu.h --- a/xen/include/asm-ia64/vmmu.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/vmmu.h Mon Mar 20 08:56:54 2006 @@ -68,11 +68,14 @@ } search_section_t; -typedef enum { +enum { ISIDE_TLB=0, DSIDE_TLB=1 -} CACHE_LINE_TYPE; - +}; +#define VTLB_PTE_P_BIT 0 +#define VTLB_PTE_IO_BIT 60 +#define VTLB_PTE_IO (1UL<<VTLB_PTE_IO_BIT) +#define VTLB_PTE_P (1UL<<VTLB_PTE_P_BIT) typedef struct thash_data { union { struct { @@ -86,18 +89,16 @@ u64 ppn : 38; // 12-49 u64 rv2 : 2; // 50-51 u64 ed : 1; // 52 - u64 ig1 : 3; // 53-55 - u64 len : 4; // 56-59 - u64 ig2 : 3; // 60-63 + u64 ig1 : 3; // 53-63 }; struct { u64 __rv1 : 53; // 0-52 u64 contiguous : 1; //53 u64 tc : 1; // 54 TR or TC - CACHE_LINE_TYPE cl : 1; // 55 I side or D side cache line + u64 cl : 1; // 55 I side or D side cache line // next extension to ig1, only for TLB instance - u64 __ig1 : 4; // 56-59 - u64 locked : 1; // 60 entry locked or not + u64 len : 4; // 56-59 + u64 io : 1; // 60 entry is for io or not u64 nomap : 1; // 61 entry cann't be inserted into machine TLB. u64 checked : 1; // 62 for VTLB/VHPT sanity check u64 invalid : 1; // 63 invalid entry @@ -112,12 +113,12 @@ u64 key : 24; // 8-31 u64 rv4 : 32; // 32-63 }; - struct { - u64 __rv3 : 32; // 0-31 +// struct { +// u64 __rv3 : 32; // 0-31 // next extension to rv4 - u64 rid : 24; // 32-55 - u64 __rv4 : 8; // 56-63 - }; +// u64 rid : 24; // 32-55 +// u64 __rv4 : 8; // 56-63 +// }; u64 itir; }; union { @@ -136,7 +137,8 @@ }; union { struct thash_data *next; - u64 tr_idx; + u64 rid; // only used in guest TR +// u64 tr_idx; }; } thash_data_t; @@ -152,7 +154,7 @@ #define INVALID_VHPT(hdata) ((hdata)->ti) #define INVALID_TLB(hdata) ((hdata)->ti) -#define INVALID_TR(hdata) ((hdata)->invalid) +#define INVALID_TR(hdata) (!(hdata)->p) #define INVALID_ENTRY(hcb, hdata) INVALID_VHPT(hdata) /* ((hcb)->ht==THASH_TLB ? INVALID_TLB(hdata) : INVALID_VHPT(hdata)) */ @@ -199,18 +201,18 @@ typedef void (REM_THASH_FN)(struct thash_cb *hcb, thash_data_t *entry); typedef void (INS_THASH_FN)(struct thash_cb *hcb, thash_data_t *entry, u64 va); -typedef struct tlb_special { - thash_data_t itr[NITRS]; - thash_data_t dtr[NDTRS]; - struct thash_cb *vhpt; -} tlb_special_t; +//typedef struct tlb_special { +// thash_data_t itr[NITRS]; +// thash_data_t dtr[NDTRS]; +// struct thash_cb *vhpt; +//} tlb_special_t; //typedef struct vhpt_cb { //u64 pta; // pta value. // GET_MFN_FN *get_mfn; // TTAG_FN *tag_func; //} vhpt_special; - +/* typedef struct thash_internal { thash_data_t *hash_base; thash_data_t *cur_cch; // head of overlap search @@ -227,7 +229,7 @@ u64 _curva; // current address to search u64 _eva; } thash_internal_t; - + */ #define THASH_CB_MAGIC 0x55aa00aa55aa55aaUL typedef struct thash_cb { /* THASH base information */ @@ -243,6 +245,7 @@ thash_cch_mem_t *cch_freelist; struct vcpu *vcpu; PTA pta; + struct thash_cb *vhpt; /* VTLB/VHPT common information */ // FIND_OVERLAP_FN *find_overlap; // FIND_NEXT_OVL_FN *next_overlap; @@ -251,15 +254,15 @@ // REM_NOTIFIER_FN *remove_notifier; /* private information */ // thash_internal_t priv; - union { - tlb_special_t *ts; +// union { +// tlb_special_t *ts; // vhpt_special *vs; - }; +// }; // Internal positon information, buffer and storage etc. TBD } thash_cb_t; -#define ITR(hcb,id) ((hcb)->ts->itr[id]) -#define DTR(hcb,id) ((hcb)->ts->dtr[id]) +//#define ITR(hcb,id) ((hcb)->ts->itr[id]) +//#define DTR(hcb,id) ((hcb)->ts->dtr[id]) #define INVALIDATE_HASH_HEADER(hcb,hash) INVALIDATE_TLB_HEADER(hash) /* \ { if ((hcb)->ht==THASH_TLB){ \ @@ -290,10 +293,10 @@ * 4: Return the entry in hash table or collision chain. * */ -extern void thash_vhpt_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va); +extern void thash_vhpt_insert(thash_cb_t *hcb, u64 pte, u64 itir, u64 ifa); //extern void thash_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va); -extern void thash_tr_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va, int idx); -extern thash_data_t *vtr_find_overlap(thash_cb_t *hcb, thash_data_t *data, char cl); +//extern void thash_tr_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va, int idx); +extern int vtr_find_overlap(struct vcpu *vcpu, u64 va, u64 ps, int is_data); extern u64 get_mfn(struct domain *d, u64 gpfn); /* * Force to delete a found entry no matter TR or foreign map for TLB. @@ -344,13 +347,8 @@ * NOTES: * */ -extern void thash_purge_entries(thash_cb_t *hcb, - thash_data_t *in, search_section_t p_sect); -extern void thash_purge_entries_ex(thash_cb_t *hcb, - u64 rid, u64 va, u64 sz, - search_section_t p_sect, - CACHE_LINE_TYPE cl); -extern void thash_purge_and_insert(thash_cb_t *hcb, thash_data_t *in, u64 va); +extern void thash_purge_entries(thash_cb_t *hcb, u64 va, u64 ps); +extern void thash_purge_and_insert(thash_cb_t *hcb, u64 pte, u64 itir, u64 ifa); /* * Purge all TCs or VHPT entries including those in Hash table. @@ -363,10 +361,7 @@ * covering this address rid:va. * */ -extern thash_data_t *vtlb_lookup(thash_cb_t *hcb, - thash_data_t *in); -extern thash_data_t *vtlb_lookup_ex(thash_cb_t *hcb, - u64 rid, u64 va,CACHE_LINE_TYPE cl); +extern thash_data_t *vtlb_lookup(thash_cb_t *hcb,u64 va,int is_data); extern int thash_lock_tc(thash_cb_t *hcb, u64 va, u64 size, int rid, char cl, int lock); @@ -381,6 +376,18 @@ extern thash_data_t * vsa_thash(PTA vpta, u64 va, u64 vrr, u64 *tag); extern thash_data_t * vhpt_lookup(u64 va); extern void machine_tlb_purge(u64 va, u64 ps); +extern int fetch_code(struct vcpu *vcpu, u64 gip, u64 *code1, u64 *code2); +extern void emulate_io_inst(struct vcpu *vcpu, u64 padr, u64 ma); +extern int vhpt_enabled(struct vcpu *vcpu, uint64_t vadr, vhpt_ref_t ref); + +static inline void vmx_vcpu_set_tr (thash_data_t *trp, u64 pte, u64 itir, u64 va, u64 rid) +{ + trp->page_flags = pte; + trp->itir = itir; + trp->vadr = va; + trp->rid = rid; +} + //#define VTLB_DEBUG #ifdef VTLB_DEBUG diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/vmx_phy_mode.h --- a/xen/include/asm-ia64/vmx_phy_mode.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/vmx_phy_mode.h Mon Mar 20 08:56:54 2006 @@ -75,11 +75,11 @@ #define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX) #define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX) -#ifdef PHY_16M /* 16M: large granule for test*/ -#define EMUL_PHY_PAGE_SHIFT 24 -#else /* 4K: emulated physical page granule */ -#define EMUL_PHY_PAGE_SHIFT 12 -#endif +//#ifdef PHY_16M /* 16M: large granule for test*/ +//#define EMUL_PHY_PAGE_SHIFT 24 +//#else /* 4K: emulated physical page granule */ +//#define EMUL_PHY_PAGE_SHIFT 12 +//#endif #define IA64_RSC_MODE 0x0000000000000003 #define XEN_RR7_RID (0xf00010) #define GUEST_IN_PHY 0x1 @@ -96,8 +96,7 @@ extern void recover_if_physical_mode(VCPU *vcpu); extern void vmx_init_all_rr(VCPU *vcpu); extern void vmx_load_all_rr(VCPU *vcpu); -extern void physical_itlb_miss(VCPU *vcpu, u64 vadr); -extern void physical_dtlb_miss(VCPU *vcpu, u64 vadr); +extern void physical_tlb_miss(VCPU *vcpu, u64 vadr, u64 vec); /* * No sanity check here, since all psr changes have been * checked in switch_mm_mode(). diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/vmx_platform.h --- a/xen/include/asm-ia64/vmx_platform.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/vmx_platform.h Mon Mar 20 08:56:54 2006 @@ -22,7 +22,6 @@ #include <public/xen.h> #include <public/arch-ia64.h> #include <asm/hvm/vioapic.h> - struct mmio_list; typedef struct virtual_platform_def { unsigned long shared_page_va; @@ -51,9 +50,8 @@ } vlapic_t; extern uint64_t dummy_tmr[]; -#define VCPU(_v,_x) _v->arch.privregs->_x -#define VLAPIC_ID(l) (uint16_t)(VCPU((l)->vcpu, lid) >> 16) -#define VLAPIC_IRR(l) VCPU((l)->vcpu, irr[0]) +#define VLAPIC_ID(l) (uint16_t)(((l)->vcpu->arch.privregs->lid) >> 16) +#define VLAPIC_IRR(l) ((l)->vcpu->arch.privregs->irr[0]) struct vlapic* apic_round_robin(struct domain *d, uint8_t dest_mode, uint8_t vector, uint32_t bitmap); extern int vmx_vcpu_pend_interrupt(struct vcpu *vcpu, uint8_t vector); static inline int vlapic_set_irq(struct vlapic *t, uint8_t vec, uint8_t trig) diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/vmx_vcpu.h --- a/xen/include/asm-ia64/vmx_vcpu.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/vmx_vcpu.h Mon Mar 20 08:56:54 2006 @@ -66,17 +66,13 @@ extern IA64FAULT vmx_vcpu_cover(VCPU *vcpu); extern thash_cb_t *vmx_vcpu_get_vtlb(VCPU *vcpu); extern thash_cb_t *vmx_vcpu_get_vhpt(VCPU *vcpu); -extern ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr); extern IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val); -#if 0 -extern IA64FAULT vmx_vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval); -#endif extern IA64FAULT vmx_vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval); IA64FAULT vmx_vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val); extern IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa); extern IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa); -extern IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx); -extern IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx); +extern IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 pte, UINT64 itir, UINT64 ifa); +extern IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 pte, UINT64 itir, UINT64 ifa); extern IA64FAULT vmx_vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 ps); extern IA64FAULT vmx_vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 ps); extern IA64FAULT vmx_vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 ps); @@ -102,10 +98,11 @@ extern void vtm_init(VCPU *vcpu); extern uint64_t vtm_get_itc(VCPU *vcpu); extern void vtm_set_itc(VCPU *vcpu, uint64_t new_itc); -extern void vtm_set_itv(VCPU *vcpu); +extern void vtm_set_itv(VCPU *vcpu, uint64_t val); +extern void vtm_set_itm(VCPU *vcpu, uint64_t val); extern void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm); -extern void vtm_domain_out(VCPU *vcpu); -extern void vtm_domain_in(VCPU *vcpu); +//extern void vtm_domain_out(VCPU *vcpu); +//extern void vtm_domain_in(VCPU *vcpu); extern void vlsapic_reset(VCPU *vcpu); extern int vmx_check_pending_irq(VCPU *vcpu); extern void guest_write_eoi(VCPU *vcpu); @@ -255,10 +252,7 @@ IA64FAULT vmx_vcpu_set_itm(VCPU *vcpu, u64 val) { - vtime_t *vtm; - vtm=&(vcpu->arch.arch_vmx.vtm); - VCPU(vcpu,itm)=val; - vtm_interruption_update(vcpu, vtm); + vtm_set_itm(vcpu, val); return IA64_NO_FAULT; } static inline @@ -299,8 +293,7 @@ vmx_vcpu_set_itv(VCPU *vcpu, u64 val) { - VCPU(vcpu,itv)=val; - vtm_set_itv(vcpu); + vtm_set_itv(vcpu, val); return IA64_NO_FAULT; } static inline @@ -350,12 +343,14 @@ *val = vtm_get_itc(vcpu); return IA64_NO_FAULT; } +/* static inline IA64FAULT vmx_vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval) { *pval = VMX(vcpu,vrr[reg>>61]); return (IA64_NO_FAULT); } + */ /************************************************************************** VCPU debug breakpoint register access routines **************************************************************************/ diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/vtm.h --- a/xen/include/asm-ia64/vtm.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/vtm.h Mon Mar 20 08:56:54 2006 @@ -63,5 +63,4 @@ #define VTM_FIRED(vtm) ((vtm)->triggered) -extern void vtm_init(); #endif /* _STATS_H_ */ diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-x86/domain.h Mon Mar 20 08:56:54 2006 @@ -124,6 +124,9 @@ void (*schedule_tail) (struct vcpu *); + void (*ctxt_switch_from) (struct vcpu *); + void (*ctxt_switch_to) (struct vcpu *); + /* Bounce information for propagating an exception to guest OS. */ struct trap_bounce trap_bounce; diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-x86/hvm/domain.h --- a/xen/include/asm-x86/hvm/domain.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-x86/hvm/domain.h Mon Mar 20 08:56:54 2006 @@ -37,6 +37,7 @@ unsigned int pae_enabled; struct hvm_virpit vpit; + u64 guest_time; struct hvm_virpic vpic; struct hvm_vioapic vioapic; struct hvm_io_handler io_handler; diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-x86/hvm/hvm.h Mon Mar 20 08:56:54 2006 @@ -41,18 +41,11 @@ /* * Store and load guest state: * 1) load/store guest register state, - * 2) load/store segment state (x86_64 only), - * 3) load/store msr register state (x86_64 only), - * 4) store guest control register state (used for panic dumps), - * 5) modify guest state (e.g., set debug flags). + * 2) store guest control register state (used for panic dumps), + * 3) modify guest state (e.g., set debug flags). */ void (*store_cpu_guest_regs)(struct vcpu *v, struct cpu_user_regs *r); void (*load_cpu_guest_regs)(struct vcpu *v, struct cpu_user_regs *r); -#ifdef __x86_64__ - void (*save_segments)(struct vcpu *v); - void (*load_msrs)(void); - void (*restore_msrs)(struct vcpu *v); -#endif void (*store_cpu_guest_ctrl_regs)(struct vcpu *v, unsigned long crs[8]); void (*modify_guest_state)(struct vcpu *v); @@ -111,33 +104,6 @@ hvm_funcs.load_cpu_guest_regs(v, r); } -#ifdef __x86_64__ -static inline void -hvm_save_segments(struct vcpu *v) -{ - if (hvm_funcs.save_segments) - hvm_funcs.save_segments(v); -} - -static inline void -hvm_load_msrs(void) -{ - if (hvm_funcs.load_msrs) - hvm_funcs.load_msrs(); -} - -static inline void -hvm_restore_msrs(struct vcpu *v) -{ - if (hvm_funcs.restore_msrs) - hvm_funcs.restore_msrs(v); -} -#else -#define hvm_save_segments(v) ((void)0) -#define hvm_load_msrs(v) ((void)0) -#define hvm_restore_msrs(v) ((void)0) -#endif /* __x86_64__ */ - static inline void hvm_store_cpu_guest_ctrl_regs(struct vcpu *v, unsigned long crs[8]) { diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-x86/hvm/vmx/vmcs.h --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Mar 20 08:56:54 2006 @@ -77,7 +77,6 @@ unsigned long cpu_based_exec_control; struct vmx_msr_state msr_content; void *io_bitmap_a, *io_bitmap_b; - u64 tsc_offset; struct timer hlt_timer; /* hlt ins emulation wakeup timer */ }; diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Mon Mar 20 08:56:54 2006 @@ -30,10 +30,11 @@ extern void vmx_asm_do_resume(void); extern void vmx_asm_do_launch(void); extern void vmx_intr_assist(void); -extern void vmx_set_tsc_shift(struct vcpu *, struct hvm_virpit *); extern void vmx_migrate_timers(struct vcpu *v); extern void arch_vmx_do_launch(struct vcpu *); extern void arch_vmx_do_resume(struct vcpu *); +extern void set_guest_time(struct vcpu *v, u64 gtime); +extern u64 get_guest_time(struct vcpu *v); extern unsigned int cpu_rev; diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-x86/hvm/vpit.h --- a/xen/include/asm-x86/hvm/vpit.h Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-x86/hvm/vpit.h Mon Mar 20 08:56:54 2006 @@ -38,7 +38,6 @@ struct hvm_virpit { /* for simulation of counter 0 in mode 2 */ u64 period_cycles; /* pit frequency in cpu cycles */ - u64 shift; /* save the value of offset - drift */ s_time_t inject_point; /* the time inject virt intr */ s_time_t scheduled; /* scheduled timer interrupt */ struct timer pit_timer; /* periodic timer for mode 2*/ @@ -46,6 +45,8 @@ unsigned int pending_intr_nr; /* the couner for pending timer interrupts */ u32 period; /* pit frequency in ns */ int first_injected; /* flag to prevent shadow window */ + s64 cache_tsc_offset; /* cache of VMCS TSC_OFFSET offset */ + u64 last_pit_gtime; /* guest time when last pit is injected */ /* virtual PIT state for handle related I/O */ int read_state; diff -r 516cf6553011 -r cf89e8f0831b patches/linux-2.6.16-rc6/i386-mach-io-check-nmi.patch --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/patches/linux-2.6.16-rc6/i386-mach-io-check-nmi.patch Mon Mar 20 08:56:54 2006 @@ -0,0 +1,45 @@ +diff -pruN ../pristine-linux-2.6.16-rc6/arch/i386/kernel/traps.c ./arch/i386/kernel/traps.c +--- ../pristine-linux-2.6.16-rc6/arch/i386/kernel/traps.c 2006-03-17 22:59:01.000000000 +0000 ++++ ./arch/i386/kernel/traps.c 2006-03-17 23:04:16.000000000 +0000 +@@ -567,18 +567,11 @@ static void mem_parity_error(unsigned ch + + static void io_check_error(unsigned char reason, struct pt_regs * regs) + { +- unsigned long i; +- + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); + show_registers(regs); + + /* Re-enable the IOCK line, wait for a few seconds */ +- reason = (reason & 0xf) | 8; +- outb(reason, 0x61); +- i = 2000; +- while (--i) udelay(1000); +- reason &= ~8; +- outb(reason, 0x61); ++ clear_io_check_error(reason); + } + + static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +diff -pruN ../pristine-linux-2.6.16-rc6/include/asm-i386/mach-default/mach_traps.h ./include/asm-i386/mach-default/mach_traps.h +--- ../pristine-linux-2.6.16-rc6/include/asm-i386/mach-default/mach_traps.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/mach-default/mach_traps.h 2006-03-17 23:04:16.000000000 +0000 +@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig + outb(reason, 0x61); + } + ++static inline void clear_io_check_error(unsigned char reason) ++{ ++ unsigned long i; ++ ++ reason = (reason & 0xf) | 8; ++ outb(reason, 0x61); ++ i = 2000; ++ while (--i) udelay(1000); ++ reason &= ~8; ++ outb(reason, 0x61); ++} ++ + static inline unsigned char get_nmi_reason(void) + { + return inb(0x61); diff -r 516cf6553011 -r cf89e8f0831b patches/linux-2.6.16-rc6/net-csum.patch --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/patches/linux-2.6.16-rc6/net-csum.patch Mon Mar 20 08:56:54 2006 @@ -0,0 +1,41 @@ +diff -pruN ../pristine-linux-2.6.16-rc6/net/ipv4/netfilter/ip_nat_proto_tcp.c ./net/ipv4/netfilter/ip_nat_proto_tcp.c +--- ../pristine-linux-2.6.16-rc6/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-03-17 22:59:16.000000000 +0000 ++++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-03-17 23:04:19.000000000 +0000 +@@ -129,10 +129,14 @@ tcp_manip_pkt(struct sk_buff **pskb, + if (hdrsize < sizeof(*hdr)) + return 1; + +- hdr->check = ip_nat_cheat_check(~oldip, newip, ++ if ((*pskb)->proto_csum_blank) { ++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); ++ } else { ++ hdr->check = ip_nat_cheat_check(~oldip, newip, + ip_nat_cheat_check(oldport ^ 0xFFFF, + newport, + hdr->check)); ++ } + return 1; + } + +diff -pruN ../pristine-linux-2.6.16-rc6/net/ipv4/netfilter/ip_nat_proto_udp.c ./net/ipv4/netfilter/ip_nat_proto_udp.c +--- ../pristine-linux-2.6.16-rc6/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-03-17 22:59:16.000000000 +0000 ++++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2006-03-17 23:04:19.000000000 +0000 +@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb, + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } +- if (hdr->check) /* 0 is a special case meaning no checksum */ +- hdr->check = ip_nat_cheat_check(~oldip, newip, ++ if (hdr->check) { /* 0 is a special case meaning no checksum */ ++ if ((*pskb)->proto_csum_blank) { ++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); ++ } else { ++ hdr->check = ip_nat_cheat_check(~oldip, newip, + ip_nat_cheat_check(*portptr ^ 0xFFFF, + newport, + hdr->check)); ++ } ++ } + *portptr = newport; + return 1; + } diff -r 516cf6553011 -r cf89e8f0831b patches/linux-2.6.16-rc6/pmd-shared.patch --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/patches/linux-2.6.16-rc6/pmd-shared.patch Mon Mar 20 08:56:54 2006 @@ -0,0 +1,111 @@ +diff -pruN ../pristine-linux-2.6.16-rc6/arch/i386/mm/pageattr.c ./arch/i386/mm/pageattr.c +--- ../pristine-linux-2.6.16-rc6/arch/i386/mm/pageattr.c 2006-03-17 22:59:01.000000000 +0000 ++++ ./arch/i386/mm/pageattr.c 2006-03-17 23:04:21.000000000 +0000 +@@ -78,7 +78,7 @@ static void set_pmd_pte(pte_t *kpte, uns + unsigned long flags; + + set_pte_atomic(kpte, pte); /* change init_mm */ +- if (PTRS_PER_PMD > 1) ++ if (HAVE_SHARED_KERNEL_PMD) + return; + + spin_lock_irqsave(&pgd_lock, flags); +diff -pruN ../pristine-linux-2.6.16-rc6/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c +--- ../pristine-linux-2.6.16-rc6/arch/i386/mm/pgtable.c 2006-01-03 03:21:10.000000000 +0000 ++++ ./arch/i386/mm/pgtable.c 2006-03-17 23:04:21.000000000 +0000 +@@ -215,9 +215,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c + spin_lock_irqsave(&pgd_lock, flags); + } + +- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, +- swapper_pg_dir + USER_PTRS_PER_PGD, +- KERNEL_PGD_PTRS); ++ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD) ++ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, ++ swapper_pg_dir + USER_PTRS_PER_PGD, ++ KERNEL_PGD_PTRS); + if (PTRS_PER_PMD > 1) + return; + +@@ -249,6 +250,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + goto out_oom; + set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); + } ++ ++ if (!HAVE_SHARED_KERNEL_PMD) { ++ unsigned long flags; ++ ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); ++ if (!pmd) ++ goto out_oom; ++ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd))); ++ } ++ ++ spin_lock_irqsave(&pgd_lock, flags); ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ unsigned long v = (unsigned long)i << PGDIR_SHIFT; ++ pgd_t *kpgd = pgd_offset_k(v); ++ pud_t *kpud = pud_offset(kpgd, v); ++ pmd_t *kpmd = pmd_offset(kpud, v); ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ memcpy(pmd, kpmd, PAGE_SIZE); ++ } ++ pgd_list_add(pgd); ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ } ++ + return pgd; + + out_oom: +@@ -263,9 +288,23 @@ void pgd_free(pgd_t *pgd) + int i; + + /* in the PAE case user pgd entries are overwritten before usage */ +- if (PTRS_PER_PMD > 1) +- for (i = 0; i < USER_PTRS_PER_PGD; ++i) +- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); ++ if (PTRS_PER_PMD > 1) { ++ for (i = 0; i < USER_PTRS_PER_PGD; ++i) { ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ kmem_cache_free(pmd_cache, pmd); ++ } ++ if (!HAVE_SHARED_KERNEL_PMD) { ++ unsigned long flags; ++ spin_lock_irqsave(&pgd_lock, flags); ++ pgd_list_del(pgd); ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); ++ kmem_cache_free(pmd_cache, pmd); ++ } ++ } ++ } + /* in the non-PAE case, free_pgtables() clears user pgd entries */ + kmem_cache_free(pgd_cache, pgd); + } +diff -pruN ../pristine-linux-2.6.16-rc6/include/asm-i386/pgtable-2level-defs.h ./include/asm-i386/pgtable-2level-defs.h +--- ../pristine-linux-2.6.16-rc6/include/asm-i386/pgtable-2level-defs.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/pgtable-2level-defs.h 2006-03-17 23:04:21.000000000 +0000 +@@ -1,6 +1,8 @@ + #ifndef _I386_PGTABLE_2LEVEL_DEFS_H + #define _I386_PGTABLE_2LEVEL_DEFS_H + ++#define HAVE_SHARED_KERNEL_PMD 0 ++ + /* + * traditional i386 two-level paging structure: + */ +diff -pruN ../pristine-linux-2.6.16-rc6/include/asm-i386/pgtable-3level-defs.h ./include/asm-i386/pgtable-3level-defs.h +--- ../pristine-linux-2.6.16-rc6/include/asm-i386/pgtable-3level-defs.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/pgtable-3level-defs.h 2006-03-17 23:04:21.000000000 +0000 +@@ -1,6 +1,8 @@ + #ifndef _I386_PGTABLE_3LEVEL_DEFS_H + #define _I386_PGTABLE_3LEVEL_DEFS_H + ++#define HAVE_SHARED_KERNEL_PMD 1 ++ + /* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ diff -r 516cf6553011 -r cf89e8f0831b patches/linux-2.6.16-rc6/smp-alts.patch --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/patches/linux-2.6.16-rc6/smp-alts.patch Mon Mar 20 08:56:54 2006 @@ -0,0 +1,591 @@ +diff -pruN ../pristine-linux-2.6.16-rc6/arch/i386/Kconfig ./arch/i386/Kconfig +--- ../pristine-linux-2.6.16-rc6/arch/i386/Kconfig 2006-03-17 22:59:01.000000000 +0000 ++++ ./arch/i386/Kconfig 2006-03-17 23:04:23.000000000 +0000 +@@ -202,6 +202,19 @@ config SMP + + If you don't know what to do here, say N. + ++config SMP_ALTERNATIVES ++ bool "SMP alternatives support (EXPERIMENTAL)" ++ depends on SMP && EXPERIMENTAL ++ help ++ Try to reduce the overhead of running an SMP kernel on a uniprocessor ++ host slightly by replacing certain key instruction sequences ++ according to whether we currently have more than one CPU available. ++ This should provide a noticeable boost to performance when ++ running SMP kernels on UP machines, and have negligible impact ++ when running on an true SMP host. ++ ++ If unsure, say N. ++ + config NR_CPUS + int "Maximum number of CPUs (2-255)" + range 2 255 +diff -pruN ../pristine-linux-2.6.16-rc6/arch/i386/kernel/Makefile ./arch/i386/kernel/Makefile +--- ../pristine-linux-2.6.16-rc6/arch/i386/kernel/Makefile 2006-03-17 22:59:01.000000000 +0000 ++++ ./arch/i386/kernel/Makefile 2006-03-17 23:04:23.000000000 +0000 +@@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o + obj-$(CONFIG_DOUBLEFAULT) += doublefault.o + obj-$(CONFIG_VM86) += vm86.o + obj-$(CONFIG_EARLY_PRINTK) += early_printk.o ++obj-$(CONFIG_SMP_ALTERNATIVES) += smpalts.o + + EXTRA_AFLAGS := -traditional + +diff -pruN ../pristine-linux-2.6.16-rc6/arch/i386/kernel/smpalts.c ./arch/i386/kernel/smpalts.c +--- ../pristine-linux-2.6.16-rc6/arch/i386/kernel/smpalts.c 1970-01-01 01:00:00.000000000 +0100 ++++ ./arch/i386/kernel/smpalts.c 2006-03-17 23:04:23.000000000 +0000 +@@ -0,0 +1,85 @@ ++#include <linux/kernel.h> ++#include <asm/system.h> ++#include <asm/smp_alt.h> ++#include <asm/processor.h> ++#include <asm/string.h> ++ ++struct smp_replacement_record { ++ unsigned char targ_size; ++ unsigned char smp1_size; ++ unsigned char smp2_size; ++ unsigned char up_size; ++ unsigned char feature; ++ unsigned char data[0]; ++}; ++ ++struct smp_alternative_record { ++ void *targ_start; ++ struct smp_replacement_record *repl; ++}; ++ ++extern struct smp_alternative_record __start_smp_alternatives_table, ++ __stop_smp_alternatives_table; ++extern unsigned long __init_begin, __init_end; ++ ++void prepare_for_smp(void) ++{ ++ struct smp_alternative_record *r; ++ printk(KERN_INFO "Enabling SMP...\n"); ++ for (r = &__start_smp_alternatives_table; ++ r != &__stop_smp_alternatives_table; ++ r++) { ++ BUG_ON(r->repl->targ_size < r->repl->smp1_size); ++ BUG_ON(r->repl->targ_size < r->repl->smp2_size); ++ BUG_ON(r->repl->targ_size < r->repl->up_size); ++ if (system_state == SYSTEM_RUNNING && ++ r->targ_start >= (void *)&__init_begin && ++ r->targ_start < (void *)&__init_end) ++ continue; ++ if (r->repl->feature != (unsigned char)-1 && ++ boot_cpu_has(r->repl->feature)) { ++ memcpy(r->targ_start, ++ r->repl->data + r->repl->smp1_size, ++ r->repl->smp2_size); ++ memset(r->targ_start + r->repl->smp2_size, ++ 0x90, ++ r->repl->targ_size - r->repl->smp2_size); ++ } else { ++ memcpy(r->targ_start, ++ r->repl->data, ++ r->repl->smp1_size); ++ memset(r->targ_start + r->repl->smp1_size, ++ 0x90, ++ r->repl->targ_size - r->repl->smp1_size); ++ } ++ } ++ /* Paranoia */ ++ asm volatile ("jmp 1f\n1:"); ++ mb(); ++} ++ ++void unprepare_for_smp(void) ++{ ++ struct smp_alternative_record *r; ++ printk(KERN_INFO "Disabling SMP...\n"); ++ for (r = &__start_smp_alternatives_table; ++ r != &__stop_smp_alternatives_table; ++ r++) { ++ BUG_ON(r->repl->targ_size < r->repl->smp1_size); ++ BUG_ON(r->repl->targ_size < r->repl->smp2_size); ++ BUG_ON(r->repl->targ_size < r->repl->up_size); ++ if (system_state == SYSTEM_RUNNING && ++ r->targ_start >= (void *)&__init_begin && ++ r->targ_start < (void *)&__init_end) ++ continue; ++ memcpy(r->targ_start, ++ r->repl->data + r->repl->smp1_size + r->repl->smp2_size, ++ r->repl->up_size); ++ memset(r->targ_start + r->repl->up_size, ++ 0x90, ++ r->repl->targ_size - r->repl->up_size); ++ } ++ /* Paranoia */ ++ asm volatile ("jmp 1f\n1:"); ++ mb(); ++} +diff -pruN ../pristine-linux-2.6.16-rc6/arch/i386/kernel/smpboot.c ./arch/i386/kernel/smpboot.c +--- ../pristine-linux-2.6.16-rc6/arch/i386/kernel/smpboot.c 2006-03-17 22:59:01.000000000 +0000 ++++ ./arch/i386/kernel/smpboot.c 2006-03-17 23:04:23.000000000 +0000 +@@ -1208,6 +1208,11 @@ static void __init smp_boot_cpus(unsigne + if (max_cpus <= cpucount+1) + continue; + ++#ifdef CONFIG_SMP_ALTERNATIVES ++ if (kicked == 1) ++ prepare_for_smp(); ++#endif ++ + if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) + printk("CPU #%d not responding - cannot use it.\n", + apicid); +@@ -1386,6 +1391,11 @@ int __devinit __cpu_up(unsigned int cpu) + return -EIO; + } + ++#ifdef CONFIG_SMP_ALTERNATIVES ++ if (num_online_cpus() == 1) ++ prepare_for_smp(); ++#endif ++ + local_irq_enable(); + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + /* Unleash the CPU! */ +diff -pruN ../pristine-linux-2.6.16-rc6/arch/i386/kernel/vmlinux.lds.S ./arch/i386/kernel/vmlinux.lds.S +--- ../pristine-linux-2.6.16-rc6/arch/i386/kernel/vmlinux.lds.S 2006-01-03 03:21:10.000000000 +0000 ++++ ./arch/i386/kernel/vmlinux.lds.S 2006-03-17 23:04:23.000000000 +0000 +@@ -34,6 +34,13 @@ SECTIONS + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } + __stop___ex_table = .; + ++ . = ALIGN(16); ++ __start_smp_alternatives_table = .; ++ __smp_alternatives : { *(__smp_alternatives) } ++ __stop_smp_alternatives_table = .; ++ ++ __smp_replacements : { *(__smp_replacements) } ++ + RODATA + + /* writeable */ +diff -pruN ../pristine-linux-2.6.16-rc6/include/asm-i386/atomic.h ./include/asm-i386/atomic.h +--- ../pristine-linux-2.6.16-rc6/include/asm-i386/atomic.h 2006-03-17 22:59:05.000000000 +0000 ++++ ./include/asm-i386/atomic.h 2006-03-17 23:04:23.000000000 +0000 +@@ -4,18 +4,13 @@ + #include <linux/config.h> + #include <linux/compiler.h> + #include <asm/processor.h> ++#include <asm/smp_alt.h> + + /* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +-#ifdef CONFIG_SMP +-#define LOCK "lock ; " +-#else +-#define LOCK "" +-#endif +- + /* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, +diff -pruN ../pristine-linux-2.6.16-rc6/include/asm-i386/bitops.h ./include/asm-i386/bitops.h +--- ../pristine-linux-2.6.16-rc6/include/asm-i386/bitops.h 2006-03-17 22:59:05.000000000 +0000 ++++ ./include/asm-i386/bitops.h 2006-03-17 23:04:23.000000000 +0000 +@@ -7,6 +7,7 @@ + + #include <linux/config.h> + #include <linux/compiler.h> ++#include <asm/smp_alt.h> + + /* + * These have to be done with inline assembly: that way the bit-setting +@@ -16,12 +17,6 @@ + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +-#ifdef CONFIG_SMP +-#define LOCK_PREFIX "lock ; " +-#else +-#define LOCK_PREFIX "" +-#endif +- + #define ADDR (*(volatile long *) addr) + + /** +@@ -41,7 +36,7 @@ + */ + static inline void set_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btsl %1,%0" + :"+m" (ADDR) + :"Ir" (nr)); +@@ -76,7 +71,7 @@ static inline void __set_bit(int nr, vol + */ + static inline void clear_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btrl %1,%0" + :"+m" (ADDR) + :"Ir" (nr)); +@@ -121,7 +116,7 @@ static inline void __change_bit(int nr, + */ + static inline void change_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btcl %1,%0" + :"+m" (ADDR) + :"Ir" (nr)); +@@ -140,7 +135,7 @@ static inline int test_and_set_bit(int n + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); +@@ -180,7 +175,7 @@ static inline int test_and_clear_bit(int + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); +@@ -231,7 +226,7 @@ static inline int test_and_change_bit(in + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); +diff -pruN ../pristine-linux-2.6.16-rc6/include/asm-i386/futex.h ./include/asm-i386/futex.h +--- ../pristine-linux-2.6.16-rc6/include/asm-i386/futex.h 2006-03-17 22:59:05.000000000 +0000 ++++ ./include/asm-i386/futex.h 2006-03-17 23:04:23.000000000 +0000 +@@ -28,7 +28,7 @@ + "1: movl %2, %0\n\ + movl %0, %3\n" \ + insn "\n" \ +-"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\ ++"2: " LOCK "cmpxchgl %3, %2\n\ + jnz 1b\n\ + 3: .section .fixup,\"ax\"\n\ + 4: mov %5, %1\n\ +@@ -68,7 +68,7 @@ futex_atomic_op_inuser (int encoded_op, + #endif + switch (op) { + case FUTEX_OP_ADD: +- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, ++ __futex_atomic_op1(LOCK "xaddl %0, %2", ret, + oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: +diff -pruN ../pristine-linux-2.6.16-rc6/include/asm-i386/rwsem.h ./include/asm-i386/rwsem.h +--- ../pristine-linux-2.6.16-rc6/include/asm-i386/rwsem.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/rwsem.h 2006-03-17 23:04:23.000000000 +0000 +@@ -40,6 +40,7 @@ + + #include <linux/list.h> + #include <linux/spinlock.h> ++#include <asm/smp_alt.h> + + struct rwsem_waiter; + +@@ -99,7 +100,7 @@ static inline void __down_read(struct rw + { + __asm__ __volatile__( + "# beginning down_read\n\t" +-LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ ++LOCK " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ + " js 2f\n\t" /* jump if we weren't granted the lock */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -130,7 +131,7 @@ static inline int __down_read_trylock(st + " movl %1,%2\n\t" + " addl %3,%2\n\t" + " jle 2f\n\t" +-LOCK_PREFIX " cmpxchgl %2,%0\n\t" ++LOCK " cmpxchgl %2,%0\n\t" + " jnz 1b\n\t" + "2:\n\t" + "# ending __down_read_trylock\n\t" +@@ -150,7 +151,7 @@ static inline void __down_write(struct r + tmp = RWSEM_ACTIVE_WRITE_BIAS; + __asm__ __volatile__( + "# beginning down_write\n\t" +-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ ++LOCK " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ + " testl %%edx,%%edx\n\t" /* was the count 0 before? */ + " jnz 2f\n\t" /* jump if we weren't granted the lock */ + "1:\n\t" +@@ -188,7 +189,7 @@ static inline void __up_read(struct rw_s + __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; + __asm__ __volatile__( + "# beginning __up_read\n\t" +-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ ++LOCK " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ + " js 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -214,7 +215,7 @@ static inline void __up_write(struct rw_ + __asm__ __volatile__( + "# beginning __up_write\n\t" + " movl %2,%%edx\n\t" +-LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ ++LOCK " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ + " jnz 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -239,7 +240,7 @@ static inline void __downgrade_write(str + { + __asm__ __volatile__( + "# beginning __downgrade_write\n\t" +-LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ ++LOCK " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ + " js 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -263,7 +264,7 @@ LOCK_PREFIX " addl %2,(%%eax)\n\t" + static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) + { + __asm__ __volatile__( +-LOCK_PREFIX "addl %1,%0" ++LOCK "addl %1,%0" + : "=m"(sem->count) + : "ir"(delta), "m"(sem->count)); + } +@@ -276,7 +277,7 @@ static inline int rwsem_atomic_update(in + int tmp = delta; + + __asm__ __volatile__( +-LOCK_PREFIX "xadd %0,(%2)" ++LOCK "xadd %0,(%2)" + : "+r"(tmp), "=m"(sem->count) + : "r"(sem), "m"(sem->count) + : "memory"); +diff -pruN ../pristine-linux-2.6.16-rc6/include/asm-i386/smp_alt.h ./include/asm-i386/smp_alt.h +--- ../pristine-linux-2.6.16-rc6/include/asm-i386/smp_alt.h 1970-01-01 01:00:00.000000000 +0100 ++++ ./include/asm-i386/smp_alt.h 2006-03-17 23:04:23.000000000 +0000 +@@ -0,0 +1,32 @@ ++#ifndef __ASM_SMP_ALT_H__ ++#define __ASM_SMP_ALT_H__ ++ ++#include <linux/config.h> ++ ++#ifdef CONFIG_SMP ++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) ++#define LOCK \ ++ "6677: nop\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6677b\n" \ ++ ".long 6678f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6678: .byte 1\n" \ ++ ".byte 1\n" \ ++ ".byte 0\n" \ ++ ".byte 1\n" \ ++ ".byte -1\n" \ ++ "lock\n" \ ++ "nop\n" \ ++ ".previous\n" ++void prepare_for_smp(void); ++void unprepare_for_smp(void); ++#else ++#define LOCK "lock ; " ++#endif ++#else ++#define LOCK "" ++#endif ++ ++#endif /* __ASM_SMP_ALT_H__ */ +diff -pruN ../pristine-linux-2.6.16-rc6/include/asm-i386/spinlock.h ./include/asm-i386/spinlock.h +--- ../pristine-linux-2.6.16-rc6/include/asm-i386/spinlock.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/spinlock.h 2006-03-17 23:04:23.000000000 +0000 +@@ -6,6 +6,7 @@ + #include <asm/page.h> + #include <linux/config.h> + #include <linux/compiler.h> ++#include <asm/smp_alt.h> + + /* + * Your basic SMP spinlocks, allowing only a single CPU anywhere +@@ -23,7 +24,8 @@ + + #define __raw_spin_lock_string \ + "\n1:\t" \ +- "lock ; decb %0\n\t" \ ++ LOCK \ ++ "decb %0\n\t" \ + "jns 3f\n" \ + "2:\t" \ + "rep;nop\n\t" \ +@@ -34,7 +36,8 @@ + + #define __raw_spin_lock_string_flags \ + "\n1:\t" \ +- "lock ; decb %0\n\t" \ ++ LOCK \ ++ "decb %0\n\t" \ + "jns 4f\n\t" \ + "2:\t" \ + "testl $0x200, %1\n\t" \ +@@ -65,10 +68,34 @@ static inline void __raw_spin_lock_flags + static inline int __raw_spin_trylock(raw_spinlock_t *lock) + { + char oldval; ++#ifdef CONFIG_SMP_ALTERNATIVES + __asm__ __volatile__( +- "xchgb %b0,%1" ++ "1:movb %1,%b0\n" ++ "movb $0,%1\n" ++ "2:" ++ ".section __smp_alternatives,\"a\"\n" ++ ".long 1b\n" ++ ".long 3f\n" ++ ".previous\n" ++ ".section __smp_replacements,\"a\"\n" ++ "3: .byte 2b - 1b\n" ++ ".byte 5f-4f\n" ++ ".byte 0\n" ++ ".byte 6f-5f\n" ++ ".byte -1\n" ++ "4: xchgb %b0,%1\n" ++ "5: movb %1,%b0\n" ++ "movb $0,%1\n" ++ "6:\n" ++ ".previous\n" + :"=q" (oldval), "=m" (lock->slock) + :"0" (0) : "memory"); ++#else ++ __asm__ __volatile__( ++ "xchgb %b0,%1\n" ++ :"=q" (oldval), "=m" (lock->slock) ++ :"0" (0) : "memory"); ++#endif + return oldval > 0; + } + +@@ -178,12 +205,12 @@ static inline int __raw_write_trylock(ra + + static inline void __raw_read_unlock(raw_rwlock_t *rw) + { +- asm volatile("lock ; incl %0" :"=m" (rw->lock) : : "memory"); ++ asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory"); + } + + static inline void __raw_write_unlock(raw_rwlock_t *rw) + { +- asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ", %0" ++ asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0" + : "=m" (rw->lock) : : "memory"); + } + +diff -pruN ../pristine-linux-2.6.16-rc6/include/asm-i386/system.h ./include/asm-i386/system.h +--- ../pristine-linux-2.6.16-rc6/include/asm-i386/system.h 2006-03-17 22:59:05.000000000 +0000 ++++ ./include/asm-i386/system.h 2006-03-17 23:04:23.000000000 +0000 +@@ -5,7 +5,7 @@ + #include <linux/kernel.h> + #include <asm/segment.h> + #include <asm/cpufeature.h> +-#include <linux/bitops.h> /* for LOCK_PREFIX */ ++#include <asm/smp_alt.h> + + #ifdef __KERNEL__ + +@@ -271,19 +271,19 @@ static inline unsigned long __cmpxchg(vo + unsigned long prev; + switch (size) { + case 1: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); +@@ -336,7 +336,7 @@ static inline unsigned long long __cmpxc + unsigned long long new) + { + unsigned long long prev; +- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3" ++ __asm__ __volatile__(LOCK "cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), +@@ -503,11 +503,55 @@ struct alt_instr { + #endif + + #ifdef CONFIG_SMP ++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) ++#define smp_alt_mb(instr) \ ++__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6667b\n" \ ++ ".long 6673f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6673:.byte 6668b-6667b\n" \ ++ ".byte 6670f-6669f\n" \ ++ ".byte 6671f-6670f\n" \ ++ ".byte 0\n" \ ++ ".byte %c0\n" \ ++ "6669:lock;addl $0,0(%%esp)\n" \ ++ "6670:" instr "\n" \ ++ "6671:\n" \ ++ ".previous\n" \ ++ : \ ++ : "i" (X86_FEATURE_XMM2) \ ++ : "memory") ++#define smp_rmb() smp_alt_mb("lfence") ++#define smp_mb() smp_alt_mb("mfence") ++#define set_mb(var, value) do { \ ++unsigned long __set_mb_temp; \ ++__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6667b\n" \ ++ ".long 6673f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6673: .byte 6668b-6667b\n" \ ++ ".byte 6670f-6669f\n" \ ++ ".byte 0\n" \ ++ ".byte 6671f-6670f\n" \ ++ ".byte -1\n" \ ++ "6669: xchg %1, %0\n" \ ++ "6670:movl %1, %0\n" \ ++ "6671:\n" \ ++ ".previous\n" \ ++ : "=m" (var), "=r" (__set_mb_temp) \ ++ : "1" (value) \ ++ : "memory"); } while (0) ++#else + #define smp_mb() mb() + #define smp_rmb() rmb() ++#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) ++#endif + #define smp_wmb() wmb() + #define smp_read_barrier_depends() read_barrier_depends() +-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) + #else + #define smp_mb() barrier() + #define smp_rmb() barrier() diff -r 516cf6553011 -r cf89e8f0831b xen/Post.mk --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/Post.mk Mon Mar 20 08:56:54 2006 @@ -0,0 +1,18 @@ + +subdirs-all := $(subdirs-y) $(subdirs-n) + +obj-y += $(patsubst %,%/built_in.o,$(subdirs-y)) + +built_in.o: $(obj-y) + $(LD) $(LDFLAGS) -r -o $@ $^ + +.PHONY: FORCE +FORCE: + +%/built_in.o: FORCE + $(MAKE) -C $* + +clean:: $(addprefix _clean_, $(subdirs-all)) FORCE + rm -f *.o *~ core +_clean_%/: FORCE + $(MAKE) -C $* clean diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/tools/privop/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/tools/privop/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,11 @@ +CC=gcc +CFLAGS=-O -Wall + +all: postat + +postat: postat.c pohcalls.o + +clean: + $(RM) -f *.o postat *.s *~ + + diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/tools/privop/pohcalls.S --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/tools/privop/pohcalls.S Mon Mar 20 08:56:54 2006 @@ -0,0 +1,30 @@ + .file "hypercall.S" + .pred.safe_across_calls p1-p5,p16-p63 + .text + .align 16 + .global dump_privop_counts# + .proc dump_privop_counts# +dump_privop_counts: + .prologue + .body + mov r2 = 0xffff + ;; + break 0x1000 + ;; + br.ret.sptk.many b0 + ;; + .endp dump_privop_counts# + .align 16 + .global zero_privop_counts# + .proc zero_privop_counts# +zero_privop_counts: + .prologue + .body + mov r2 = 0xfffe + ;; + break 0x1000 + ;; + br.ret.sptk.many b0 + ;; + .endp zero_privop_counts# + diff -r 516cf6553011 -r cf89e8f0831b xen/arch/ia64/tools/privop/postat.c --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/ia64/tools/privop/postat.c Mon Mar 20 08:56:54 2006 @@ -0,0 +1,27 @@ +#include <stdio.h> +#include <string.h> + +extern int dump_privop_counts (char *buf, int len); + +extern int zero_privop_counts (char *buf, int len); + +int +main (int argc, char *argv[]) +{ + static char buf[8192]; + int res; + + if (argc == 1) + res = dump_privop_counts (buf, sizeof (buf)); + else if (argc == 2 && strcmp (argv[1], "--clear") == 0) + res = zero_privop_counts (buf, sizeof (buf)); + else + { + printf ("usage: %s [--clear]\n", argv[0]); + return 1; + } + printf ("res=%d\n", res); + fputs (buf, stdout); + + return 0; +} diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/acpi/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/acpi/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,5 @@ +include $(BASEDIR)/Rules.mk + +obj-y += boot.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/cpu/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/cpu/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,16 @@ +include $(BASEDIR)/Rules.mk + +subdirs-y += mcheck +subdirs-y += mtrr + +obj-y += amd.o +obj-y += common.o +obj-y += intel.o +obj-y += intel_cacheinfo.o + +obj-$(x86_32) += centaur.o +obj-$(x86_32) += cyrix.o +obj-$(x86_32) += rise.o +obj-$(x86_32) += transmeta.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/cpu/mcheck/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/cpu/mcheck/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,11 @@ +include $(BASEDIR)/Rules.mk + +obj-y += k7.o +obj-y += mce.o +obj-y += non-fatal.o +obj-y += p4.o +obj-y += p5.o +obj-y += p6.o +obj-y += winchip.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/cpu/mtrr/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/cpu/mtrr/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,10 @@ +include $(BASEDIR)/Rules.mk + +obj-y += amd.o +obj-y += centaur.o +obj-y += cyrix.o +obj-y += generic.o +obj-y += main.o +obj-y += state.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/genapic/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/genapic/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,10 @@ +include $(BASEDIR)/Rules.mk + +obj-y += bigsmp.o +obj-y += default.o +obj-y += es7000.o +obj-y += es7000plat.o +obj-y += probe.o +obj-y += summit.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/hvm/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/hvm/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,14 @@ +include $(BASEDIR)/Rules.mk + +subdirs-y += svm +subdirs-y += vmx + +obj-y += hvm.o +obj-y += i8259.o +obj-y += intercept.o +obj-y += io.o +obj-y += platform.o +obj-y += vioapic.o +obj-y += vlapic.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/hvm/svm/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/hvm/svm/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,12 @@ +include $(BASEDIR)/Rules.mk + +subdirs-$(x86_32) += x86_32 +subdirs-$(x86_64) += x86_64 + +obj-y += emulate.o +obj-y += instrlen.o +obj-y += intr.o +obj-y += svm.o +obj-y += vmcb.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/hvm/svm/x86_32/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/hvm/svm/x86_32/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,5 @@ +include $(BASEDIR)/Rules.mk + +obj-y += exits.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/hvm/svm/x86_64/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/hvm/svm/x86_64/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,5 @@ +include $(BASEDIR)/Rules.mk + +obj-y += exits.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/hvm/vmx/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/hvm/vmx/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,10 @@ +include $(BASEDIR)/Rules.mk + +subdirs-$(x86_32) += x86_32 +subdirs-$(x86_64) += x86_64 + +obj-y += io.o +obj-y += vmcs.o +obj-y += vmx.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/hvm/vmx/x86_32/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/hvm/vmx/x86_32/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,5 @@ +include $(BASEDIR)/Rules.mk + +obj-y += exits.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/hvm/vmx/x86_64/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/hvm/vmx/x86_64/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,5 @@ +include $(BASEDIR)/Rules.mk + +obj-y += exits.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/x86_32/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/x86_32/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,11 @@ +include $(BASEDIR)/Rules.mk + +obj-y += domain_page.o +obj-y += entry.o +obj-y += mm.o +obj-y += seg_fixup.o +obj-y += traps.o + +obj-$(supervisor_mode_kernel) += supervisor_mode_kernel.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/arch/x86/x86_64/Makefile --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/arch/x86/x86_64/Makefile Mon Mar 20 08:56:54 2006 @@ -0,0 +1,7 @@ +include $(BASEDIR)/Rules.mk + +obj-y += entry.o +obj-y += mm.o +obj-y += traps.o + +include $(BASEDIR)/Post.mk diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/vlsapic.h --- /dev/null Mon Mar 20 08:56:46 2006 +++ b/xen/include/asm-ia64/vlsapic.h Mon Mar 20 08:56:54 2006 @@ -0,0 +1,35 @@ + + +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * + */ + +#ifndef _LSAPIC_H +#define _LSAPIC_H +#include <xen/sched.h> + +extern void vmx_virq_line_init(struct domain *d); +extern void vtm_init(struct vcpu *vcpu); +extern void vtm_set_itc(struct vcpu *vcpu, uint64_t new_itc); +extern void vtm_set_itm(struct vcpu *vcpu, uint64_t val); +extern void vtm_set_itv(struct vcpu *vcpu, uint64_t val); +extern void vmx_vexirq(struct vcpu *vcpu); +extern void vhpi_detection(struct vcpu *vcpu); + +#endif diff -r 516cf6553011 -r cf89e8f0831b patches/linux-2.6.16-rc5/i386-mach-io-check-nmi.patch --- a/patches/linux-2.6.16-rc5/i386-mach-io-check-nmi.patch Mon Mar 20 08:56:46 2006 +++ /dev/null Mon Mar 20 08:56:54 2006 @@ -1,45 +0,0 @@ -diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/kernel/traps.c ./arch/i386/kernel/traps.c ---- ../pristine-linux-2.6.16-rc5/arch/i386/kernel/traps.c 2006-02-27 15:46:58.000000000 +0000 -+++ ./arch/i386/kernel/traps.c 2006-02-27 15:55:23.000000000 +0000 -@@ -567,18 +567,11 @@ static void mem_parity_error(unsigned ch - - static void io_check_error(unsigned char reason, struct pt_regs * regs) - { -- unsigned long i; -- - printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); - show_registers(regs); - - /* Re-enable the IOCK line, wait for a few seconds */ -- reason = (reason & 0xf) | 8; -- outb(reason, 0x61); -- i = 2000; -- while (--i) udelay(1000); -- reason &= ~8; -- outb(reason, 0x61); -+ clear_io_check_error(reason); - } - - static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) -diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/mach-default/mach_traps.h ./include/asm-i386/mach-default/mach_traps.h ---- ../pristine-linux-2.6.16-rc5/include/asm-i386/mach-default/mach_traps.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/mach-default/mach_traps.h 2006-02-27 15:55:23.000000000 +0000 -@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig - outb(reason, 0x61); - } - -+static inline void clear_io_check_error(unsigned char reason) -+{ -+ unsigned long i; -+ -+ reason = (reason & 0xf) | 8; -+ outb(reason, 0x61); -+ i = 2000; -+ while (--i) udelay(1000); -+ reason &= ~8; -+ outb(reason, 0x61); -+} -+ - static inline unsigned char get_nmi_reason(void) - { - return inb(0x61); diff -r 516cf6553011 -r cf89e8f0831b patches/linux-2.6.16-rc5/net-csum.patch --- a/patches/linux-2.6.16-rc5/net-csum.patch Mon Mar 20 08:56:46 2006 +++ /dev/null Mon Mar 20 08:56:54 2006 @@ -1,41 +0,0 @@ -diff -pruN ../pristine-linux-2.6.16-rc5/net/ipv4/netfilter/ip_nat_proto_tcp.c ./net/ipv4/netfilter/ip_nat_proto_tcp.c ---- ../pristine-linux-2.6.16-rc5/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-02-27 15:47:38.000000000 +0000 -+++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-02-27 15:55:25.000000000 +0000 -@@ -129,10 +129,14 @@ tcp_manip_pkt(struct sk_buff **pskb, - if (hdrsize < sizeof(*hdr)) - return 1; - -- hdr->check = ip_nat_cheat_check(~oldip, newip, -+ if ((*pskb)->proto_csum_blank) { -+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); -+ } else { -+ hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(oldport ^ 0xFFFF, - newport, - hdr->check)); -+ } - return 1; - } - -diff -pruN ../pristine-linux-2.6.16-rc5/net/ipv4/netfilter/ip_nat_proto_udp.c ./net/ipv4/netfilter/ip_nat_proto_udp.c ---- ../pristine-linux-2.6.16-rc5/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-02-27 15:47:38.000000000 +0000 -+++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2006-02-27 15:55:25.000000000 +0000 -@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb, - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } -- if (hdr->check) /* 0 is a special case meaning no checksum */ -- hdr->check = ip_nat_cheat_check(~oldip, newip, -+ if (hdr->check) { /* 0 is a special case meaning no checksum */ -+ if ((*pskb)->proto_csum_blank) { -+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); -+ } else { -+ hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(*portptr ^ 0xFFFF, - newport, - hdr->check)); -+ } -+ } - *portptr = newport; - return 1; - } diff -r 516cf6553011 -r cf89e8f0831b patches/linux-2.6.16-rc5/pmd-shared.patch --- a/patches/linux-2.6.16-rc5/pmd-shared.patch Mon Mar 20 08:56:46 2006 +++ /dev/null Mon Mar 20 08:56:54 2006 @@ -1,111 +0,0 @@ -diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/mm/pageattr.c ./arch/i386/mm/pageattr.c ---- ../pristine-linux-2.6.16-rc5/arch/i386/mm/pageattr.c 2006-02-27 15:46:58.000000000 +0000 -+++ ./arch/i386/mm/pageattr.c 2006-02-27 15:55:31.000000000 +0000 -@@ -78,7 +78,7 @@ static void set_pmd_pte(pte_t *kpte, uns - unsigned long flags; - - set_pte_atomic(kpte, pte); /* change init_mm */ -- if (PTRS_PER_PMD > 1) -+ if (HAVE_SHARED_KERNEL_PMD) - return; - - spin_lock_irqsave(&pgd_lock, flags); -diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c ---- ../pristine-linux-2.6.16-rc5/arch/i386/mm/pgtable.c 2006-01-03 03:21:10.000000000 +0000 -+++ ./arch/i386/mm/pgtable.c 2006-02-27 15:55:31.000000000 +0000 -@@ -215,9 +215,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c - spin_lock_irqsave(&pgd_lock, flags); - } - -- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, -- swapper_pg_dir + USER_PTRS_PER_PGD, -- KERNEL_PGD_PTRS); -+ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD) -+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, -+ swapper_pg_dir + USER_PTRS_PER_PGD, -+ KERNEL_PGD_PTRS); - if (PTRS_PER_PMD > 1) - return; - -@@ -249,6 +250,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm) - goto out_oom; - set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); - } -+ -+ if (!HAVE_SHARED_KERNEL_PMD) { -+ unsigned long flags; -+ -+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -+ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); -+ if (!pmd) -+ goto out_oom; -+ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd))); -+ } -+ -+ spin_lock_irqsave(&pgd_lock, flags); -+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -+ unsigned long v = (unsigned long)i << PGDIR_SHIFT; -+ pgd_t *kpgd = pgd_offset_k(v); -+ pud_t *kpud = pud_offset(kpgd, v); -+ pmd_t *kpmd = pmd_offset(kpud, v); -+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); -+ memcpy(pmd, kpmd, PAGE_SIZE); -+ } -+ pgd_list_add(pgd); -+ spin_unlock_irqrestore(&pgd_lock, flags); -+ } -+ - return pgd; - - out_oom: -@@ -263,9 +288,23 @@ void pgd_free(pgd_t *pgd) - int i; - - /* in the PAE case user pgd entries are overwritten before usage */ -- if (PTRS_PER_PMD > 1) -- for (i = 0; i < USER_PTRS_PER_PGD; ++i) -- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); -+ if (PTRS_PER_PMD > 1) { -+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) { -+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); -+ kmem_cache_free(pmd_cache, pmd); -+ } -+ if (!HAVE_SHARED_KERNEL_PMD) { -+ unsigned long flags; -+ spin_lock_irqsave(&pgd_lock, flags); -+ pgd_list_del(pgd); -+ spin_unlock_irqrestore(&pgd_lock, flags); -+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); -+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); -+ kmem_cache_free(pmd_cache, pmd); -+ } -+ } -+ } - /* in the non-PAE case, free_pgtables() clears user pgd entries */ - kmem_cache_free(pgd_cache, pgd); - } -diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/pgtable-2level-defs.h ./include/asm-i386/pgtable-2level-defs.h ---- ../pristine-linux-2.6.16-rc5/include/asm-i386/pgtable-2level-defs.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/pgtable-2level-defs.h 2006-02-27 15:55:31.000000000 +0000 -@@ -1,6 +1,8 @@ - #ifndef _I386_PGTABLE_2LEVEL_DEFS_H - #define _I386_PGTABLE_2LEVEL_DEFS_H - -+#define HAVE_SHARED_KERNEL_PMD 0 -+ - /* - * traditional i386 two-level paging structure: - */ -diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/pgtable-3level-defs.h ./include/asm-i386/pgtable-3level-defs.h ---- ../pristine-linux-2.6.16-rc5/include/asm-i386/pgtable-3level-defs.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/pgtable-3level-defs.h 2006-02-27 15:55:31.000000000 +0000 -@@ -1,6 +1,8 @@ - #ifndef _I386_PGTABLE_3LEVEL_DEFS_H - #define _I386_PGTABLE_3LEVEL_DEFS_H - -+#define HAVE_SHARED_KERNEL_PMD 1 -+ - /* - * PGDIR_SHIFT determines what a top-level page table entry can map - */ diff -r 516cf6553011 -r cf89e8f0831b patches/linux-2.6.16-rc5/smp-alts.patch --- a/patches/linux-2.6.16-rc5/smp-alts.patch Mon Mar 20 08:56:46 2006 +++ /dev/null Mon Mar 20 08:56:54 2006 @@ -1,591 +0,0 @@ -diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/Kconfig ./arch/i386/Kconfig ---- ../pristine-linux-2.6.16-rc5/arch/i386/Kconfig 2006-02-27 15:46:58.000000000 +0000 -+++ ./arch/i386/Kconfig 2006-02-27 15:55:34.000000000 +0000 -@@ -202,6 +202,19 @@ config SMP - - If you don't know what to do here, say N. - -+config SMP_ALTERNATIVES -+ bool "SMP alternatives support (EXPERIMENTAL)" -+ depends on SMP && EXPERIMENTAL -+ help -+ Try to reduce the overhead of running an SMP kernel on a uniprocessor -+ host slightly by replacing certain key instruction sequences -+ according to whether we currently have more than one CPU available. -+ This should provide a noticeable boost to performance when -+ running SMP kernels on UP machines, and have negligible impact -+ when running on an true SMP host. -+ -+ If unsure, say N. -+ - config NR_CPUS - int "Maximum number of CPUs (2-255)" - range 2 255 -diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/kernel/Makefile ./arch/i386/kernel/Makefile ---- ../pristine-linux-2.6.16-rc5/arch/i386/kernel/Makefile 2006-02-27 15:46:58.000000000 +0000 -+++ ./arch/i386/kernel/Makefile 2006-02-27 15:55:34.000000000 +0000 -@@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o - obj-$(CONFIG_DOUBLEFAULT) += doublefault.o - obj-$(CONFIG_VM86) += vm86.o - obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -+obj-$(CONFIG_SMP_ALTERNATIVES) += smpalts.o - - EXTRA_AFLAGS := -traditional - -diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/kernel/smpalts.c ./arch/i386/kernel/smpalts.c ---- ../pristine-linux-2.6.16-rc5/arch/i386/kernel/smpalts.c 1970-01-01 01:00:00.000000000 +0100 -+++ ./arch/i386/kernel/smpalts.c 2006-02-27 15:55:34.000000000 +0000 -@@ -0,0 +1,85 @@ -+#include <linux/kernel.h> -+#include <asm/system.h> -+#include <asm/smp_alt.h> -+#include <asm/processor.h> -+#include <asm/string.h> -+ -+struct smp_replacement_record { -+ unsigned char targ_size; -+ unsigned char smp1_size; -+ unsigned char smp2_size; -+ unsigned char up_size; -+ unsigned char feature; -+ unsigned char data[0]; -+}; -+ -+struct smp_alternative_record { -+ void *targ_start; -+ struct smp_replacement_record *repl; -+}; -+ -+extern struct smp_alternative_record __start_smp_alternatives_table, -+ __stop_smp_alternatives_table; -+extern unsigned long __init_begin, __init_end; -+ -+void prepare_for_smp(void) -+{ -+ struct smp_alternative_record *r; -+ printk(KERN_INFO "Enabling SMP...\n"); -+ for (r = &__start_smp_alternatives_table; -+ r != &__stop_smp_alternatives_table; -+ r++) { -+ BUG_ON(r->repl->targ_size < r->repl->smp1_size); -+ BUG_ON(r->repl->targ_size < r->repl->smp2_size); -+ BUG_ON(r->repl->targ_size < r->repl->up_size); -+ if (system_state == SYSTEM_RUNNING && -+ r->targ_start >= (void *)&__init_begin && -+ r->targ_start < (void *)&__init_end) -+ continue; -+ if (r->repl->feature != (unsigned char)-1 && -+ boot_cpu_has(r->repl->feature)) { -+ memcpy(r->targ_start, -+ r->repl->data + r->repl->smp1_size, -+ r->repl->smp2_size); -+ memset(r->targ_start + r->repl->smp2_size, -+ 0x90, -+ r->repl->targ_size - r->repl->smp2_size); -+ } else { -+ memcpy(r->targ_start, -+ r->repl->data, -+ r->repl->smp1_size); -+ memset(r->targ_start + r->repl->smp1_size, -+ 0x90, -+ r->repl->targ_size - r->repl->smp1_size); -+ } -+ } -+ /* Paranoia */ -+ asm volatile ("jmp 1f\n1:"); -+ mb(); -+} -+ -+void unprepare_for_smp(void) -+{ -+ struct smp_alternative_record *r; -+ printk(KERN_INFO "Disabling SMP...\n"); -+ for (r = &__start_smp_alternatives_table; -+ r != &__stop_smp_alternatives_table; -+ r++) { -+ BUG_ON(r->repl->targ_size < r->repl->smp1_size); -+ BUG_ON(r->repl->targ_size < r->repl->smp2_size); -+ BUG_ON(r->repl->targ_size < r->repl->up_size); -+ if (system_state == SYSTEM_RUNNING && -+ r->targ_start >= (void *)&__init_begin && -+ r->targ_start < (void *)&__init_end) -+ continue; -+ memcpy(r->targ_start, -+ r->repl->data + r->repl->smp1_size + r->repl->smp2_size, -+ r->repl->up_size); -+ memset(r->targ_start + r->repl->up_size, -+ 0x90, -+ r->repl->targ_size - r->repl->up_size); -+ } -+ /* Paranoia */ -+ asm volatile ("jmp 1f\n1:"); -+ mb(); -+} -diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/kernel/smpboot.c ./arch/i386/kernel/smpboot.c ---- ../pristine-linux-2.6.16-rc5/arch/i386/kernel/smpboot.c 2006-02-27 15:46:58.000000000 +0000 -+++ ./arch/i386/kernel/smpboot.c 2006-02-27 15:55:34.000000000 +0000 -@@ -1208,6 +1208,11 @@ static void __init smp_boot_cpus(unsigne - if (max_cpus <= cpucount+1) - continue; - -+#ifdef CONFIG_SMP_ALTERNATIVES -+ if (kicked == 1) -+ prepare_for_smp(); -+#endif -+ - if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) - printk("CPU #%d not responding - cannot use it.\n", - apicid); -@@ -1386,6 +1391,11 @@ int __devinit __cpu_up(unsigned int cpu) - return -EIO; - } - -+#ifdef CONFIG_SMP_ALTERNATIVES -+ if (num_online_cpus() == 1) -+ prepare_for_smp(); -+#endif -+ - local_irq_enable(); - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - /* Unleash the CPU! */ -diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/kernel/vmlinux.lds.S ./arch/i386/kernel/vmlinux.lds.S ---- ../pristine-linux-2.6.16-rc5/arch/i386/kernel/vmlinux.lds.S 2006-01-03 03:21:10.000000000 +0000 -+++ ./arch/i386/kernel/vmlinux.lds.S 2006-02-27 15:55:34.000000000 +0000 -@@ -34,6 +34,13 @@ SECTIONS - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } - __stop___ex_table = .; - -+ . = ALIGN(16); -+ __start_smp_alternatives_table = .; -+ __smp_alternatives : { *(__smp_alternatives) } -+ __stop_smp_alternatives_table = .; -+ -+ __smp_replacements : { *(__smp_replacements) } -+ - RODATA - - /* writeable */ -diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/atomic.h ./include/asm-i386/atomic.h ---- ../pristine-linux-2.6.16-rc5/include/asm-i386/atomic.h 2006-02-27 15:47:25.000000000 +0000 -+++ ./include/asm-i386/atomic.h 2006-02-27 15:55:34.000000000 +0000 -@@ -4,18 +4,13 @@ - #include <linux/config.h> - #include <linux/compiler.h> - #include <asm/processor.h> -+#include <asm/smp_alt.h> - - /* - * Atomic operations that C can't guarantee us. Useful for - * resource counting etc.. - */ - --#ifdef CONFIG_SMP --#define LOCK "lock ; " --#else --#define LOCK "" --#endif -- - /* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, -diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/bitops.h ./include/asm-i386/bitops.h ---- ../pristine-linux-2.6.16-rc5/include/asm-i386/bitops.h 2006-02-27 15:47:25.000000000 +0000 -+++ ./include/asm-i386/bitops.h 2006-02-27 15:55:34.000000000 +0000 -@@ -7,6 +7,7 @@ - - #include <linux/config.h> - #include <linux/compiler.h> -+#include <asm/smp_alt.h> - - /* - * These have to be done with inline assembly: that way the bit-setting -@@ -16,12 +17,6 @@ - * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). - */ - --#ifdef CONFIG_SMP --#define LOCK_PREFIX "lock ; " --#else --#define LOCK_PREFIX "" --#endif -- - #define ADDR (*(volatile long *) addr) - - /** -@@ -41,7 +36,7 @@ - */ - static inline void set_bit(int nr, volatile unsigned long * addr) - { -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btsl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -@@ -76,7 +71,7 @@ static inline void __set_bit(int nr, vol - */ - static inline void clear_bit(int nr, volatile unsigned long * addr) - { -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btrl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -@@ -121,7 +116,7 @@ static inline void __change_bit(int nr, - */ - static inline void change_bit(int nr, volatile unsigned long * addr) - { -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btcl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -@@ -140,7 +135,7 @@ static inline int test_and_set_bit(int n - { - int oldbit; - -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btsl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr) : "memory"); -@@ -180,7 +175,7 @@ static inline int test_and_clear_bit(int - { - int oldbit; - -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btrl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr) : "memory"); -@@ -231,7 +226,7 @@ static inline int test_and_change_bit(in - { - int oldbit; - -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btcl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr) : "memory"); -diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/futex.h ./include/asm-i386/futex.h ---- ../pristine-linux-2.6.16-rc5/include/asm-i386/futex.h 2006-02-27 15:47:25.000000000 +0000 -+++ ./include/asm-i386/futex.h 2006-02-27 15:55:34.000000000 +0000 -@@ -28,7 +28,7 @@ - "1: movl %2, %0\n\ - movl %0, %3\n" \ - insn "\n" \ --"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\ -+"2: " LOCK "cmpxchgl %3, %2\n\ - jnz 1b\n\ - 3: .section .fixup,\"ax\"\n\ - 4: mov %5, %1\n\ -@@ -68,7 +68,7 @@ futex_atomic_op_inuser (int encoded_op, - #endif - switch (op) { - case FUTEX_OP_ADD: -- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, -+ __futex_atomic_op1(LOCK "xaddl %0, %2", ret, - oldval, uaddr, oparg); - break; - case FUTEX_OP_OR: -diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/rwsem.h ./include/asm-i386/rwsem.h ---- ../pristine-linux-2.6.16-rc5/include/asm-i386/rwsem.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/rwsem.h 2006-02-27 15:55:34.000000000 +0000 -@@ -40,6 +40,7 @@ - - #include <linux/list.h> - #include <linux/spinlock.h> -+#include <asm/smp_alt.h> - - struct rwsem_waiter; - -@@ -99,7 +100,7 @@ static inline void __down_read(struct rw - { - __asm__ __volatile__( - "# beginning down_read\n\t" --LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ -+LOCK " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ - " js 2f\n\t" /* jump if we weren't granted the lock */ - "1:\n\t" - LOCK_SECTION_START("") -@@ -130,7 +131,7 @@ static inline int __down_read_trylock(st - " movl %1,%2\n\t" - " addl %3,%2\n\t" - " jle 2f\n\t" --LOCK_PREFIX " cmpxchgl %2,%0\n\t" -+LOCK " cmpxchgl %2,%0\n\t" - " jnz 1b\n\t" - "2:\n\t" - "# ending __down_read_trylock\n\t" -@@ -150,7 +151,7 @@ static inline void __down_write(struct r - tmp = RWSEM_ACTIVE_WRITE_BIAS; - __asm__ __volatile__( - "# beginning down_write\n\t" --LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ -+LOCK " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ - " testl %%edx,%%edx\n\t" /* was the count 0 before? */ - " jnz 2f\n\t" /* jump if we weren't granted the lock */ - "1:\n\t" -@@ -188,7 +189,7 @@ static inline void __up_read(struct rw_s - __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; - __asm__ __volatile__( - "# beginning __up_read\n\t" --LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ -+LOCK " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ - " js 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") -@@ -214,7 +215,7 @@ static inline void __up_write(struct rw_ - __asm__ __volatile__( - "# beginning __up_write\n\t" - " movl %2,%%edx\n\t" --LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ -+LOCK " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ - " jnz 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") -@@ -239,7 +240,7 @@ static inline void __downgrade_write(str - { - __asm__ __volatile__( - "# beginning __downgrade_write\n\t" --LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ -+LOCK " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ - " js 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") -@@ -263,7 +264,7 @@ LOCK_PREFIX " addl %2,(%%eax)\n\t" - static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) - { - __asm__ __volatile__( --LOCK_PREFIX "addl %1,%0" -+LOCK "addl %1,%0" - : "=m"(sem->count) - : "ir"(delta), "m"(sem->count)); - } -@@ -276,7 +277,7 @@ static inline int rwsem_atomic_update(in - int tmp = delta; - - __asm__ __volatile__( --LOCK_PREFIX "xadd %0,(%2)" -+LOCK "xadd %0,(%2)" - : "+r"(tmp), "=m"(sem->count) - : "r"(sem), "m"(sem->count) - : "memory"); -diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/smp_alt.h ./include/asm-i386/smp_alt.h ---- ../pristine-linux-2.6.16-rc5/include/asm-i386/smp_alt.h 1970-01-01 01:00:00.000000000 +0100 -+++ ./include/asm-i386/smp_alt.h 2006-02-27 15:55:34.000000000 +0000 -@@ -0,0 +1,32 @@ -+#ifndef __ASM_SMP_ALT_H__ -+#define __ASM_SMP_ALT_H__ -+ -+#include <linux/config.h> -+ -+#ifdef CONFIG_SMP -+#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) -+#define LOCK \ -+ "6677: nop\n" \ -+ ".section __smp_alternatives,\"a\"\n" \ -+ ".long 6677b\n" \ -+ ".long 6678f\n" \ -+ ".previous\n" \ -+ ".section __smp_replacements,\"a\"\n" \ -+ "6678: .byte 1\n" \ -+ ".byte 1\n" \ -+ ".byte 0\n" \ -+ ".byte 1\n" \ -+ ".byte -1\n" \ -+ "lock\n" \ -+ "nop\n" \ -+ ".previous\n" -+void prepare_for_smp(void); -+void unprepare_for_smp(void); -+#else -+#define LOCK "lock ; " -+#endif -+#else -+#define LOCK "" -+#endif -+ -+#endif /* __ASM_SMP_ALT_H__ */ -diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/spinlock.h ./include/asm-i386/spinlock.h ---- ../pristine-linux-2.6.16-rc5/include/asm-i386/spinlock.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/spinlock.h 2006-02-27 15:55:34.000000000 +0000 -@@ -6,6 +6,7 @@ - #include <asm/page.h> - #include <linux/config.h> - #include <linux/compiler.h> -+#include <asm/smp_alt.h> - - /* - * Your basic SMP spinlocks, allowing only a single CPU anywhere -@@ -23,7 +24,8 @@ - - #define __raw_spin_lock_string \ - "\n1:\t" \ -- "lock ; decb %0\n\t" \ -+ LOCK \ -+ "decb %0\n\t" \ - "jns 3f\n" \ - "2:\t" \ - "rep;nop\n\t" \ -@@ -34,7 +36,8 @@ - - #define __raw_spin_lock_string_flags \ - "\n1:\t" \ -- "lock ; decb %0\n\t" \ -+ LOCK \ -+ "decb %0\n\t" \ - "jns 4f\n\t" \ - "2:\t" \ - "testl $0x200, %1\n\t" \ -@@ -65,10 +68,34 @@ static inline void __raw_spin_lock_flags - static inline int __raw_spin_trylock(raw_spinlock_t *lock) - { - char oldval; -+#ifdef CONFIG_SMP_ALTERNATIVES - __asm__ __volatile__( -- "xchgb %b0,%1" -+ "1:movb %1,%b0\n" -+ "movb $0,%1\n" -+ "2:" -+ ".section __smp_alternatives,\"a\"\n" -+ ".long 1b\n" -+ ".long 3f\n" -+ ".previous\n" -+ ".section __smp_replacements,\"a\"\n" -+ "3: .byte 2b - 1b\n" -+ ".byte 5f-4f\n" -+ ".byte 0\n" -+ ".byte 6f-5f\n" -+ ".byte -1\n" -+ "4: xchgb %b0,%1\n" -+ "5: movb %1,%b0\n" -+ "movb $0,%1\n" -+ "6:\n" -+ ".previous\n" - :"=q" (oldval), "=m" (lock->slock) - :"0" (0) : "memory"); -+#else -+ __asm__ __volatile__( -+ "xchgb %b0,%1\n" -+ :"=q" (oldval), "=m" (lock->slock) -+ :"0" (0) : "memory"); -+#endif - return oldval > 0; - } - -@@ -178,12 +205,12 @@ static inline int __raw_write_trylock(ra - - static inline void __raw_read_unlock(raw_rwlock_t *rw) - { -- asm volatile("lock ; incl %0" :"=m" (rw->lock) : : "memory"); -+ asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory"); - } - - static inline void __raw_write_unlock(raw_rwlock_t *rw) - { -- asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ", %0" -+ asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0" - : "=m" (rw->lock) : : "memory"); - } - -diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/system.h ./include/asm-i386/system.h ---- ../pristine-linux-2.6.16-rc5/include/asm-i386/system.h 2006-02-27 15:47:25.000000000 +0000 -+++ ./include/asm-i386/system.h 2006-02-27 15:55:34.000000000 +0000 -@@ -5,7 +5,7 @@ - #include <linux/kernel.h> - #include <asm/segment.h> - #include <asm/cpufeature.h> --#include <linux/bitops.h> /* for LOCK_PREFIX */ -+#include <asm/smp_alt.h> - - #ifdef __KERNEL__ - -@@ -271,19 +271,19 @@ static inline unsigned long __cmpxchg(vo - unsigned long prev; - switch (size) { - case 1: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" -+ __asm__ __volatile__(LOCK "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" -+ __asm__ __volatile__(LOCK "cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" -+ __asm__ __volatile__(LOCK "cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); -@@ -336,7 +336,7 @@ static inline unsigned long long __cmpxc - unsigned long long new) - { - unsigned long long prev; -- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3" -+ __asm__ __volatile__(LOCK "cmpxchg8b %3" - : "=A"(prev) - : "b"((unsigned long)new), - "c"((unsigned long)(new >> 32)), -@@ -503,11 +503,55 @@ struct alt_instr { - #endif - - #ifdef CONFIG_SMP -+#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) -+#define smp_alt_mb(instr) \ -+__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \ -+ ".section __smp_alternatives,\"a\"\n" \ -+ ".long 6667b\n" \ -+ ".long 6673f\n" \ -+ ".previous\n" \ -+ ".section __smp_replacements,\"a\"\n" \ -+ "6673:.byte 6668b-6667b\n" \ -+ ".byte 6670f-6669f\n" \ -+ ".byte 6671f-6670f\n" \ -+ ".byte 0\n" \ -+ ".byte %c0\n" \ -+ "6669:lock;addl $0,0(%%esp)\n" \ -+ "6670:" instr "\n" \ -+ "6671:\n" \ -+ ".previous\n" \ -+ : \ -+ : "i" (X86_FEATURE_XMM2) \ -+ : "memory") -+#define smp_rmb() smp_alt_mb("lfence") -+#define smp_mb() smp_alt_mb("mfence") -+#define set_mb(var, value) do { \ -+unsigned long __set_mb_temp; \ -+__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \ -+ ".section __smp_alternatives,\"a\"\n" \ -+ ".long 6667b\n" \ -+ ".long 6673f\n" \ -+ ".previous\n" \ -+ ".section __smp_replacements,\"a\"\n" \ -+ "6673: .byte 6668b-6667b\n" \ -+ ".byte 6670f-6669f\n" \ -+ ".byte 0\n" \ -+ ".byte 6671f-6670f\n" \ -+ ".byte -1\n" \ -+ "6669: xchg %1, %0\n" \ -+ "6670:movl %1, %0\n" \ -+ "6671:\n" \ -+ ".previous\n" \ -+ : "=m" (var), "=r" (__set_mb_temp) \ -+ : "1" (value) \ -+ : "memory"); } while (0) -+#else - #define smp_mb() mb() - #define smp_rmb() rmb() -+#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) -+#endif - #define smp_wmb() wmb() - #define smp_read_barrier_depends() read_barrier_depends() --#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) - #else - #define smp_mb() barrier() - #define smp_rmb() barrier() diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux/asm-generic/pci-dma-compat.h --- a/xen/include/asm-ia64/linux/asm-generic/pci-dma-compat.h Mon Mar 20 08:56:46 2006 +++ /dev/null Mon Mar 20 08:56:54 2006 @@ -1,107 +0,0 @@ -/* include this file if the platform implements the dma_ DMA Mapping API - * and wants to provide the pci_ DMA Mapping API in terms of it */ - -#ifndef _ASM_GENERIC_PCI_DMA_COMPAT_H -#define _ASM_GENERIC_PCI_DMA_COMPAT_H - -#include <linux/dma-mapping.h> - -/* note pci_set_dma_mask isn't here, since it's a public function - * exported from drivers/pci, use dma_supported instead */ - -static inline int -pci_dma_supported(struct pci_dev *hwdev, u64 mask) -{ - return dma_supported(hwdev == NULL ? NULL : &hwdev->dev, mask); -} - -static inline void * -pci_alloc_consistent(struct pci_dev *hwdev, size_t size, - dma_addr_t *dma_handle) -{ - return dma_alloc_coherent(hwdev == NULL ? NULL : &hwdev->dev, size, dma_handle, GFP_ATOMIC); -} - -static inline void -pci_free_consistent(struct pci_dev *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - dma_free_coherent(hwdev == NULL ? NULL : &hwdev->dev, size, vaddr, dma_handle); -} - -static inline dma_addr_t -pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction) -{ - return dma_map_single(hwdev == NULL ? NULL : &hwdev->dev, ptr, size, (enum dma_data_direction)direction); -} - -static inline void -pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, - size_t size, int direction) -{ - dma_unmap_single(hwdev == NULL ? NULL : &hwdev->dev, dma_addr, size, (enum dma_data_direction)direction); -} - -static inline dma_addr_t -pci_map_page(struct pci_dev *hwdev, struct page *page, - unsigned long offset, size_t size, int direction) -{ - return dma_map_page(hwdev == NULL ? NULL : &hwdev->dev, page, offset, size, (enum dma_data_direction)direction); -} - -static inline void -pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, - size_t size, int direction) -{ - dma_unmap_page(hwdev == NULL ? NULL : &hwdev->dev, dma_address, size, (enum dma_data_direction)direction); -} - -static inline int -pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - return dma_map_sg(hwdev == NULL ? NULL : &hwdev->dev, sg, nents, (enum dma_data_direction)direction); -} - -static inline void -pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - dma_unmap_sg(hwdev == NULL ? NULL : &hwdev->dev, sg, nents, (enum dma_data_direction)direction); -} - -static inline void -pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle, - size_t size, int direction) -{ - dma_sync_single_for_cpu(hwdev == NULL ? NULL : &hwdev->dev, dma_handle, size, (enum dma_data_direction)direction); -} - -static inline void -pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle, - size_t size, int direction) -{ - dma_sync_single_for_device(hwdev == NULL ? NULL : &hwdev->dev, dma_handle, size, (enum dma_data_direction)direction); -} - -static inline void -pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, - int nelems, int direction) -{ - dma_sync_sg_for_cpu(hwdev == NULL ? NULL : &hwdev->dev, sg, nelems, (enum dma_data_direction)direction); -} - -static inline void -pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, - int nelems, int direction) -{ - dma_sync_sg_for_device(hwdev == NULL ? NULL : &hwdev->dev, sg, nelems, (enum dma_data_direction)direction); -} - -static inline int -pci_dma_mapping_error(dma_addr_t dma_addr) -{ - return dma_mapping_error(dma_addr); -} - -#endif diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux/asm/scatterlist.h --- a/xen/include/asm-ia64/linux/asm/scatterlist.h Mon Mar 20 08:56:46 2006 +++ /dev/null Mon Mar 20 08:56:54 2006 @@ -1,28 +0,0 @@ -#ifndef _ASM_IA64_SCATTERLIST_H -#define _ASM_IA64_SCATTERLIST_H - -/* - * Modified 1998-1999, 2001-2002, 2004 - * David Mosberger-Tang <davidm@xxxxxxxxxx>, Hewlett-Packard Co - */ - -struct scatterlist { - struct page *page; - unsigned int offset; - unsigned int length; /* buffer length */ - - dma_addr_t dma_address; - unsigned int dma_length; -}; - -/* - * It used to be that ISA_DMA_THRESHOLD had something to do with the - * DMA-limits of ISA-devices. Nowadays, its only remaining use (apart - * from the aha1542.c driver, which isn't 64-bit clean anyhow) is to - * tell the block-layer (via BLK_BOUNCE_ISA) what the max. physical - * address of a page is that is allocated with GFP_DMA. On IA-64, - * that's 4GB - 1. - */ -#define ISA_DMA_THRESHOLD 0xffffffff - -#endif /* _ASM_IA64_SCATTERLIST_H */ diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux/mmzone.h --- a/xen/include/asm-ia64/linux/mmzone.h Mon Mar 20 08:56:46 2006 +++ /dev/null Mon Mar 20 08:56:54 2006 @@ -1,592 +0,0 @@ -#ifndef _LINUX_MMZONE_H -#define _LINUX_MMZONE_H - -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ - -#include <linux/config.h> -#include <linux/spinlock.h> -#include <linux/list.h> -#include <linux/wait.h> -#include <linux/cache.h> -#include <linux/threads.h> -#include <linux/numa.h> -#include <linux/init.h> -#include <asm/atomic.h> - -/* Free memory management - zoned buddy allocator. */ -#ifndef CONFIG_FORCE_MAX_ZONEORDER -#define MAX_ORDER 11 -#else -#define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER -#endif - -struct free_area { - struct list_head free_list; - unsigned long nr_free; -}; - -struct pglist_data; - -/* - * zone->lock and zone->lru_lock are two of the hottest locks in the kernel. - * So add a wild amount of padding here to ensure that they fall into separate - * cachelines. There are very few zone structures in the machine, so space - * consumption is not a concern here. - */ -#if defined(CONFIG_SMP) -struct zone_padding { - char x[0]; -} ____cacheline_maxaligned_in_smp; -#define ZONE_PADDING(name) struct zone_padding name; -#else -#define ZONE_PADDING(name) -#endif - -struct per_cpu_pages { - int count; /* number of pages in the list */ - int low; /* low watermark, refill needed */ - int high; /* high watermark, emptying needed */ - int batch; /* chunk size for buddy add/remove */ - struct list_head list; /* the list of pages */ -}; - -struct per_cpu_pageset { - struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ -#ifdef CONFIG_NUMA - unsigned long numa_hit; /* allocated in intended node */ - unsigned long numa_miss; /* allocated in non intended node */ - unsigned long numa_foreign; /* was intended here, hit elsewhere */ - unsigned long interleave_hit; /* interleaver prefered this zone */ - unsigned long local_node; /* allocation from local node */ - unsigned long other_node; /* allocation from other node */ -#endif -} ____cacheline_aligned_in_smp; - -#ifdef CONFIG_NUMA -#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)]) -#else -#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) -#endif - -#define ZONE_DMA 0 -#define ZONE_NORMAL 1 -#define ZONE_HIGHMEM 2 - -#define MAX_NR_ZONES 3 /* Sync this with ZONES_SHIFT */ -#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ - - -/* - * When a memory allocation must conform to specific limitations (such - * as being suitable for DMA) the caller will pass in hints to the - * allocator in the gfp_mask, in the zone modifier bits. These bits - * are used to select a priority ordered list of memory zones which - * match the requested limits. GFP_ZONEMASK defines which bits within - * the gfp_mask should be considered as zone modifiers. Each valid - * combination of the zone modifier bits has a corresponding list - * of zones (in node_zonelists). Thus for two zone modifiers there - * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will - * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible - * combinations of zone modifiers in "zone modifier space". - */ -#define GFP_ZONEMASK 0x03 -/* - * As an optimisation any zone modifier bits which are only valid when - * no other zone modifier bits are set (loners) should be placed in - * the highest order bits of this field. This allows us to reduce the - * extent of the zonelists thus saving space. For example in the case - * of three zone modifier bits, we could require up to eight zonelists. - * If the left most zone modifier is a "loner" then the highest valid - * zonelist would be four allowing us to allocate only five zonelists. - * Use the first form when the left most bit is not a "loner", otherwise - * use the second. - */ -/* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */ -#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ - -/* - * On machines where it is needed (eg PCs) we divide physical memory - * into multiple physical zones. On a PC we have 3 zones: - * - * ZONE_DMA < 16 MB ISA DMA capable memory - * ZONE_NORMAL 16-896 MB direct mapped by the kernel - * ZONE_HIGHMEM > 896 MB only page cache and user processes - */ - -struct zone { - /* Fields commonly accessed by the page allocator */ - unsigned long free_pages; - unsigned long pages_min, pages_low, pages_high; - /* - * We don't know if the memory that we're going to allocate will be freeable - * or/and it will be released eventually, so to avoid totally wasting several - * GB of ram we must reserve some of the lower zone memory (otherwise we risk - * to run OOM on the lower zones despite there's tons of freeable ram - * on the higher zones). This array is recalculated at runtime if the - * sysctl_lowmem_reserve_ratio sysctl changes. - */ - unsigned long lowmem_reserve[MAX_NR_ZONES]; - -#ifdef CONFIG_NUMA - struct per_cpu_pageset *pageset[NR_CPUS]; -#else - struct per_cpu_pageset pageset[NR_CPUS]; -#endif - /* - * free areas of different sizes - */ - spinlock_t lock; - struct free_area free_area[MAX_ORDER]; - - - ZONE_PADDING(_pad1_) - - /* Fields commonly accessed by the page reclaim scanner */ - spinlock_t lru_lock; - struct list_head active_list; - struct list_head inactive_list; - unsigned long nr_scan_active; - unsigned long nr_scan_inactive; - unsigned long nr_active; - unsigned long nr_inactive; - unsigned long pages_scanned; /* since last reclaim */ - int all_unreclaimable; /* All pages pinned */ - - /* - * Does the allocator try to reclaim pages from the zone as soon - * as it fails a watermark_ok() in __alloc_pages? - */ - int reclaim_pages; - /* A count of how many reclaimers are scanning this zone */ - atomic_t reclaim_in_progress; - - /* - * prev_priority holds the scanning priority for this zone. It is - * defined as the scanning priority at which we achieved our reclaim - * target at the previous try_to_free_pages() or balance_pgdat() - * invokation. - * - * We use prev_priority as a measure of how much stress page reclaim is - * under - it drives the swappiness decision: whether to unmap mapped - * pages. - * - * temp_priority is used to remember the scanning priority at which - * this zone was successfully refilled to free_pages == pages_high. - * - * Access to both these fields is quite racy even on uniprocessor. But - * it is expected to average out OK. - */ - int temp_priority; - int prev_priority; - - - ZONE_PADDING(_pad2_) - /* Rarely used or read-mostly fields */ - - /* - * wait_table -- the array holding the hash table - * wait_table_size -- the size of the hash table array - * wait_table_bits -- wait_table_size == (1 << wait_table_bits) - * - * The purpose of all these is to keep track of the people - * waiting for a page to become available and make them - * runnable again when possible. The trouble is that this - * consumes a lot of space, especially when so few things - * wait on pages at a given time. So instead of using - * per-page waitqueues, we use a waitqueue hash table. - * - * The bucket discipline is to sleep on the same queue when - * colliding and wake all in that wait queue when removing. - * When something wakes, it must check to be sure its page is - * truly available, a la thundering herd. The cost of a - * collision is great, but given the expected load of the - * table, they should be so rare as to be outweighed by the - * benefits from the saved space. - * - * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the - * primary users of these fields, and in mm/page_alloc.c - * free_area_init_core() performs the initialization of them. - */ - wait_queue_head_t * wait_table; - unsigned long wait_table_size; - unsigned long wait_table_bits; - - /* - * Discontig memory support fields. - */ - struct pglist_data *zone_pgdat; - struct page *zone_mem_map; - /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ - unsigned long zone_start_pfn; - - unsigned long spanned_pages; /* total size, including holes */ - unsigned long present_pages; /* amount of memory (excluding holes) */ - - /* - * rarely used fields: - */ - char *name; -} ____cacheline_maxaligned_in_smp; - - -/* - * The "priority" of VM scanning is how much of the queues we will scan in one - * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the - * queues ("queue_length >> 12") during an aging round. - */ -#define DEF_PRIORITY 12 - -/* - * One allocation request operates on a zonelist. A zonelist - * is a list of zones, the first one is the 'goal' of the - * allocation, the other zones are fallback zones, in decreasing - * priority. - * - * Right now a zonelist takes up less than a cacheline. We never - * modify it apart from boot-up, and only a few indices are used, - * so despite the zonelist table being relatively big, the cache - * footprint of this construct is very small. - */ -struct zonelist { - struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited -}; - - -/* - * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM - * (mostly NUMA machines?) to denote a higher-level memory zone than the - * zone denotes. - * - * On NUMA machines, each NUMA node would have a pg_data_t to describe - * it's memory layout. - * - * Memory statistics and page replacement data structures are maintained on a - * per-zone basis. - */ -struct bootmem_data; -typedef struct pglist_data { - struct zone node_zones[MAX_NR_ZONES]; - struct zonelist node_zonelists[GFP_ZONETYPES]; - int nr_zones; -#ifdef CONFIG_FLAT_NODE_MEM_MAP - struct page *node_mem_map; -#endif - struct bootmem_data *bdata; - unsigned long node_start_pfn; - unsigned long node_present_pages; /* total number of physical pages */ - unsigned long node_spanned_pages; /* total size of physical page - range, including holes */ - int node_id; - struct pglist_data *pgdat_next; - wait_queue_head_t kswapd_wait; - struct task_struct *kswapd; - int kswapd_max_order; -} pg_data_t; - -#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) -#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) -#ifdef CONFIG_FLAT_NODE_MEM_MAP -#define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr)) -#else -#define pgdat_page_nr(pgdat, pagenr) mfn_to_page((pgdat)->node_start_pfn + (pagenr)) -#endif -#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) - -extern struct pglist_data *pgdat_list; - -void __get_zone_counts(unsigned long *active, unsigned long *inactive, - unsigned long *free, struct pglist_data *pgdat); -void get_zone_counts(unsigned long *active, unsigned long *inactive, - unsigned long *free); -void build_all_zonelists(void); -void wakeup_kswapd(struct zone *zone, int order); -int zone_watermark_ok(struct zone *z, int order, unsigned long mark, - int alloc_type, int can_try_harder, int gfp_high); - -#ifdef CONFIG_HAVE_MEMORY_PRESENT -void memory_present(int nid, unsigned long start, unsigned long end); -#else -static inline void memory_present(int nid, unsigned long start, unsigned long end) {} -#endif - -#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE -unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); -#endif - -/* - * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. - */ -#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) - -/** - * for_each_pgdat - helper macro to iterate over all nodes - * @pgdat - pointer to a pg_data_t variable - * - * Meant to help with common loops of the form - * pgdat = pgdat_list; - * while(pgdat) { - * ... - * pgdat = pgdat->pgdat_next; - * } - */ -#define for_each_pgdat(pgdat) \ - for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next) - -/* - * next_zone - helper magic for for_each_zone() - * Thanks to William Lee Irwin III for this piece of ingenuity. - */ -static inline struct zone *next_zone(struct zone *zone) -{ - pg_data_t *pgdat = zone->zone_pgdat; - - if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) - zone++; - else if (pgdat->pgdat_next) { - pgdat = pgdat->pgdat_next; - zone = pgdat->node_zones; - } else - zone = NULL; - - return zone; -} - -/** - * for_each_zone - helper macro to iterate over all memory zones - * @zone - pointer to struct zone variable - * - * The user only needs to declare the zone variable, for_each_zone - * fills it in. This basically means for_each_zone() is an - * easier to read version of this piece of code: - * - * for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next) - * for (i = 0; i < MAX_NR_ZONES; ++i) { - * struct zone * z = pgdat->node_zones + i; - * ... - * } - * } - */ -#define for_each_zone(zone) \ - for (zone = pgdat_list->node_zones; zone; zone = next_zone(zone)) - -static inline int is_highmem_idx(int idx) -{ - return (idx == ZONE_HIGHMEM); -} - -static inline int is_normal_idx(int idx) -{ - return (idx == ZONE_NORMAL); -} -/** - * is_highmem - helper function to quickly check if a struct zone is a - * highmem zone or not. This is an attempt to keep references - * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum. - * @zone - pointer to struct zone variable - */ -static inline int is_highmem(struct zone *zone) -{ - return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; -} - -static inline int is_normal(struct zone *zone) -{ - return zone == zone->zone_pgdat->node_zones + ZONE_NORMAL; -} - -/* These two functions are used to setup the per zone pages min values */ -struct ctl_table; -struct file; -int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); -extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; -int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); - -#include <linux/topology.h> -/* Returns the number of the current Node. */ -#define numa_node_id() (cpu_to_node(raw_smp_processor_id())) - -#ifndef CONFIG_NEED_MULTIPLE_NODES - -extern struct pglist_data contig_page_data; -#define NODE_DATA(nid) (&contig_page_data) -#define NODE_MEM_MAP(nid) mem_map -#define MAX_NODES_SHIFT 1 -#define pfn_to_nid(pfn) (0) - -#else /* CONFIG_NEED_MULTIPLE_NODES */ - -#include <asm/mmzone.h> - -#endif /* !CONFIG_NEED_MULTIPLE_NODES */ - -#ifdef CONFIG_SPARSEMEM -#include <asm/sparsemem.h> -#endif - -#if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED) -/* - * with 32 bit page->flags field, we reserve 8 bits for node/zone info. - * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes. - */ -#define FLAGS_RESERVED 8 - -#elif BITS_PER_LONG == 64 -/* - * with 64 bit flags field, there's plenty of room. - */ -#define FLAGS_RESERVED 32 - -#else - -#error BITS_PER_LONG not defined - -#endif - -#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -#define early_pfn_to_nid(nid) (0UL) -#endif - -#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) -#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) - -#ifdef CONFIG_SPARSEMEM - -/* - * SECTION_SHIFT #bits space required to store a section # - * - * PA_SECTION_SHIFT physical address to/from section number - * PFN_SECTION_SHIFT pfn to/from section number - */ -#define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) - -#define PA_SECTION_SHIFT (SECTION_SIZE_BITS) -#define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT) - -#define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT) - -#define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) -#define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) - -#if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS -#error Allocator MAX_ORDER exceeds SECTION_SIZE -#endif - -struct page; -struct mem_section { - /* - * This is, logically, a pointer to an array of struct - * pages. However, it is stored with some other magic. - * (see sparse.c::sparse_init_one_section()) - * - * Making it a UL at least makes someone do a cast - * before using it wrong. - */ - unsigned long section_mem_map; -}; - -extern struct mem_section mem_section[NR_MEM_SECTIONS]; - -static inline struct mem_section *__nr_to_section(unsigned long nr) -{ - return &mem_section[nr]; -} - -/* - * We use the lower bits of the mem_map pointer to store - * a little bit of information. There should be at least - * 3 bits here due to 32-bit alignment. - */ -#define SECTION_MARKED_PRESENT (1UL<<0) -#define SECTION_HAS_MEM_MAP (1UL<<1) -#define SECTION_MAP_LAST_BIT (1UL<<2) -#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) - -static inline struct page *__section_mem_map_addr(struct mem_section *section) -{ - unsigned long map = section->section_mem_map; - map &= SECTION_MAP_MASK; - return (struct page *)map; -} - -static inline int valid_section(struct mem_section *section) -{ - return (section->section_mem_map & SECTION_MARKED_PRESENT); -} - -static inline int section_has_mem_map(struct mem_section *section) -{ - return (section->section_mem_map & SECTION_HAS_MEM_MAP); -} - -static inline int valid_section_nr(unsigned long nr) -{ - return valid_section(__nr_to_section(nr)); -} - -/* - * Given a kernel address, find the home node of the underlying memory. - */ -#define kvaddr_to_nid(kaddr) pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT) - -static inline struct mem_section *__pfn_to_section(unsigned long pfn) -{ - return __nr_to_section(pfn_to_section_nr(pfn)); -} - -#define mfn_to_page(pfn) \ -({ \ - unsigned long __pfn = (pfn); \ - __section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn; \ -}) -#define page_to_mfn(page) \ -({ \ - page - __section_mem_map_addr(__nr_to_section( \ - page_to_section(page))); \ -}) - -static inline int mfn_valid(unsigned long pfn) -{ - if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) - return 0; - return valid_section(__nr_to_section(pfn_to_section_nr(pfn))); -} - -/* - * These are _only_ used during initialisation, therefore they - * can use __initdata ... They could have names to indicate - * this restriction. - */ -#ifdef CONFIG_NUMA -#define pfn_to_nid early_pfn_to_nid -#endif - -#define pfn_to_pgdat(pfn) \ -({ \ - NODE_DATA(pfn_to_nid(pfn)); \ -}) - -#define early_mfn_valid(pfn) mfn_valid(pfn) -void sparse_init(void); -#else -#define sparse_init() do {} while (0) -#endif /* CONFIG_SPARSEMEM */ - -#ifdef CONFIG_NODES_SPAN_OTHER_NODES -#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid)) -#else -#define early_pfn_in_nid(pfn, nid) (1) -#endif - -#ifndef early_mfn_valid -#define early_mfn_valid(pfn) (1) -#endif - -void memory_present(int nid, unsigned long start, unsigned long end); -unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); - -#endif /* !__ASSEMBLY__ */ -#endif /* __KERNEL__ */ -#endif /* _LINUX_MMZONE_H */ diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux/page-flags.h --- a/xen/include/asm-ia64/linux/page-flags.h Mon Mar 20 08:56:46 2006 +++ /dev/null Mon Mar 20 08:56:54 2006 @@ -1,324 +0,0 @@ -/* - * Macros for manipulating and testing page->flags - */ - -#ifndef PAGE_FLAGS_H -#define PAGE_FLAGS_H - -#include <linux/percpu.h> -#include <linux/cache.h> -#include <asm/pgtable.h> - -/* - * Various page->flags bits: - * - * PG_reserved is set for special pages, which can never be swapped out. Some - * of them might not even exist (eg empty_bad_page)... - * - * The PG_private bitflag is set if page->private contains a valid value. - * - * During disk I/O, PG_locked is used. This bit is set before I/O and - * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks - * waiting for the I/O on this page to complete. - * - * PG_uptodate tells whether the page's contents is valid. When a read - * completes, the page becomes uptodate, unless a disk I/O error happened. - * - * For choosing which pages to swap out, inode pages carry a PG_referenced bit, - * which is set any time the system accesses that page through the (mapping, - * index) hash table. This referenced bit, together with the referenced bit - * in the page tables, is used to manipulate page->age and move the page across - * the active, inactive_dirty and inactive_clean lists. - * - * Note that the referenced bit, the page->lru list_head and the active, - * inactive_dirty and inactive_clean lists are protected by the - * zone->lru_lock, and *NOT* by the usual PG_locked bit! - * - * PG_error is set to indicate that an I/O error occurred on this page. - * - * PG_arch_1 is an architecture specific page state bit. The generic code - * guarantees that this bit is cleared for a page when it first is entered into - * the page cache. - * - * PG_highmem pages are not permanently mapped into the kernel virtual address - * space, they need to be kmapped separately for doing IO on the pages. The - * struct page (these bits with information) are always mapped into kernel - * address space... - */ - -/* - * Don't use the *_dontuse flags. Use the macros. Otherwise you'll break - * locked- and dirty-page accounting. The top eight bits of page->flags are - * used for page->zone, so putting flag bits there doesn't work. - */ -#define PG_locked 0 /* Page is locked. Don't touch. */ -#define PG_error 1 -#define PG_referenced 2 -#define PG_uptodate 3 - -#define PG_dirty 4 -#define PG_lru 5 -#define PG_active 6 -#define PG_slab 7 /* slab debug (Suparna wants this) */ - -#define PG_checked 8 /* kill me in 2.5.<early>. */ -#define PG_arch_1 9 -#define PG_reserved 10 -#define PG_private 11 /* Has something at ->private */ - -#define PG_writeback 12 /* Page is under writeback */ -#define PG_nosave 13 /* Used for system suspend/resume */ -#define PG_compound 14 /* Part of a compound page */ -#define PG_swapcache 15 /* Swap page: swp_entry_t in private */ - -#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ -#define PG_reclaim 17 /* To be reclaimed asap */ -#define PG_nosave_free 18 /* Free, should not be written */ -#define PG_uncached 19 /* Page has been mapped as uncached */ - -/* - * Global page accounting. One instance per CPU. Only unsigned longs are - * allowed. - */ -struct page_state { - unsigned long nr_dirty; /* Dirty writeable pages */ - unsigned long nr_writeback; /* Pages under writeback */ - unsigned long nr_unstable; /* NFS unstable pages */ - unsigned long nr_page_table_pages;/* Pages used for pagetables */ - unsigned long nr_mapped; /* mapped into pagetables */ - unsigned long nr_slab; /* In slab */ -#define GET_PAGE_STATE_LAST nr_slab - - /* - * The below are zeroed by get_page_state(). Use get_full_page_state() - * to add up all these. - */ - unsigned long pgpgin; /* Disk reads */ - unsigned long pgpgout; /* Disk writes */ - unsigned long pswpin; /* swap reads */ - unsigned long pswpout; /* swap writes */ - unsigned long pgalloc_high; /* page allocations */ - - unsigned long pgalloc_normal; - unsigned long pgalloc_dma; - unsigned long pgfree; /* page freeings */ - unsigned long pgactivate; /* pages moved inactive->active */ - unsigned long pgdeactivate; /* pages moved active->inactive */ - - unsigned long pgfault; /* faults (major+minor) */ - unsigned long pgmajfault; /* faults (major only) */ - unsigned long pgrefill_high; /* inspected in refill_inactive_zone */ - unsigned long pgrefill_normal; - unsigned long pgrefill_dma; - - unsigned long pgsteal_high; /* total highmem pages reclaimed */ - unsigned long pgsteal_normal; - unsigned long pgsteal_dma; - unsigned long pgscan_kswapd_high;/* total highmem pages scanned */ - unsigned long pgscan_kswapd_normal; - - unsigned long pgscan_kswapd_dma; - unsigned long pgscan_direct_high;/* total highmem pages scanned */ - unsigned long pgscan_direct_normal; - unsigned long pgscan_direct_dma; - unsigned long pginodesteal; /* pages reclaimed via inode freeing */ - - unsigned long slabs_scanned; /* slab objects scanned */ - unsigned long kswapd_steal; /* pages reclaimed by kswapd */ - unsigned long kswapd_inodesteal;/* reclaimed via kswapd inode freeing */ - unsigned long pageoutrun; /* kswapd's calls to page reclaim */ - unsigned long allocstall; /* direct reclaim calls */ - - unsigned long pgrotated; /* pages rotated to tail of the LRU */ - unsigned long nr_bounce; /* pages for bounce buffers */ -}; - -extern void get_page_state(struct page_state *ret); -extern void get_full_page_state(struct page_state *ret); -extern unsigned long __read_page_state(unsigned long offset); -extern void __mod_page_state(unsigned long offset, unsigned long delta); - -#define read_page_state(member) \ - __read_page_state(offsetof(struct page_state, member)) - -#define mod_page_state(member, delta) \ - __mod_page_state(offsetof(struct page_state, member), (delta)) - -#define inc_page_state(member) mod_page_state(member, 1UL) -#define dec_page_state(member) mod_page_state(member, 0UL - 1) -#define add_page_state(member,delta) mod_page_state(member, (delta)) -#define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta)) - -#define mod_page_state_zone(zone, member, delta) \ - do { \ - unsigned offset; \ - if (is_highmem(zone)) \ - offset = offsetof(struct page_state, member##_high); \ - else if (is_normal(zone)) \ - offset = offsetof(struct page_state, member##_normal); \ - else \ - offset = offsetof(struct page_state, member##_dma); \ - __mod_page_state(offset, (delta)); \ - } while (0) - -/* - * Manipulation of page state flags - */ -#define PageLocked(page) \ - test_bit(PG_locked, &(page)->flags) -#define SetPageLocked(page) \ - set_bit(PG_locked, &(page)->flags) -#define TestSetPageLocked(page) \ - test_and_set_bit(PG_locked, &(page)->flags) -#define ClearPageLocked(page) \ - clear_bit(PG_locked, &(page)->flags) -#define TestClearPageLocked(page) \ - test_and_clear_bit(PG_locked, &(page)->flags) - -#define PageError(page) test_bit(PG_error, &(page)->flags) -#define SetPageError(page) set_bit(PG_error, &(page)->flags) -#define ClearPageError(page) clear_bit(PG_error, &(page)->flags) - -#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags) -#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags) -#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags) -#define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags) - -#define PageUptodate(page) test_bit(PG_uptodate, &(page)->flags) -#ifndef SetPageUptodate -#define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags) -#endif -#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags) - -#define PageDirty(page) test_bit(PG_dirty, &(page)->flags) -#define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags) -#define TestSetPageDirty(page) test_and_set_bit(PG_dirty, &(page)->flags) -#define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags) -#define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags) - -#define SetPageLRU(page) set_bit(PG_lru, &(page)->flags) -#define PageLRU(page) test_bit(PG_lru, &(page)->flags) -#define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags) -#define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags) - -#define PageActive(page) test_bit(PG_active, &(page)->flags) -#define SetPageActive(page) set_bit(PG_active, &(page)->flags) -#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) -#define TestClearPageActive(page) test_and_clear_bit(PG_active, &(page)->flags) -#define TestSetPageActive(page) test_and_set_bit(PG_active, &(page)->flags) - -#define PageSlab(page) test_bit(PG_slab, &(page)->flags) -#define SetPageSlab(page) set_bit(PG_slab, &(page)->flags) -#define ClearPageSlab(page) clear_bit(PG_slab, &(page)->flags) -#define TestClearPageSlab(page) test_and_clear_bit(PG_slab, &(page)->flags) -#define TestSetPageSlab(page) test_and_set_bit(PG_slab, &(page)->flags) - -#ifdef CONFIG_HIGHMEM -#define PageHighMem(page) is_highmem(page_zone(page)) -#else -#define PageHighMem(page) 0 /* needed to optimize away at compile time */ -#endif - -#define PageChecked(page) test_bit(PG_checked, &(page)->flags) -#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) -#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) - -#define PageReserved(page) test_bit(PG_reserved, &(page)->flags) -#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) -#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) -#define __ClearPageReserved(page) __clear_bit(PG_reserved, &(page)->flags) - -#define SetPagePrivate(page) set_bit(PG_private, &(page)->flags) -#define ClearPagePrivate(page) clear_bit(PG_private, &(page)->flags) -#define PagePrivate(page) test_bit(PG_private, &(page)->flags) -#define __SetPagePrivate(page) __set_bit(PG_private, &(page)->flags) -#define __ClearPagePrivate(page) __clear_bit(PG_private, &(page)->flags) - -#define PageWriteback(page) test_bit(PG_writeback, &(page)->flags) -#define SetPageWriteback(page) \ - do { \ - if (!test_and_set_bit(PG_writeback, \ - &(page)->flags)) \ - inc_page_state(nr_writeback); \ - } while (0) -#define TestSetPageWriteback(page) \ - ({ \ - int ret; \ - ret = test_and_set_bit(PG_writeback, \ - &(page)->flags); \ - if (!ret) \ - inc_page_state(nr_writeback); \ - ret; \ - }) -#define ClearPageWriteback(page) \ - do { \ - if (test_and_clear_bit(PG_writeback, \ - &(page)->flags)) \ - dec_page_state(nr_writeback); \ - } while (0) -#define TestClearPageWriteback(page) \ - ({ \ - int ret; \ - ret = test_and_clear_bit(PG_writeback, \ - &(page)->flags); \ - if (ret) \ - dec_page_state(nr_writeback); \ - ret; \ - }) - -#define PageNosave(page) test_bit(PG_nosave, &(page)->flags) -#define SetPageNosave(page) set_bit(PG_nosave, &(page)->flags) -#define TestSetPageNosave(page) test_and_set_bit(PG_nosave, &(page)->flags) -#define ClearPageNosave(page) clear_bit(PG_nosave, &(page)->flags) -#define TestClearPageNosave(page) test_and_clear_bit(PG_nosave, &(page)->flags) - -#define PageNosaveFree(page) test_bit(PG_nosave_free, &(page)->flags) -#define SetPageNosaveFree(page) set_bit(PG_nosave_free, &(page)->flags) -#define ClearPageNosaveFree(page) clear_bit(PG_nosave_free, &(page)->flags) - -#define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags) -#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags) -#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags) - -#define PageReclaim(page) test_bit(PG_reclaim, &(page)->flags) -#define SetPageReclaim(page) set_bit(PG_reclaim, &(page)->flags) -#define ClearPageReclaim(page) clear_bit(PG_reclaim, &(page)->flags) -#define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags) - -#ifdef CONFIG_HUGETLB_PAGE -#define PageCompound(page) test_bit(PG_compound, &(page)->flags) -#else -#define PageCompound(page) 0 -#endif -#define SetPageCompound(page) set_bit(PG_compound, &(page)->flags) -#define ClearPageCompound(page) clear_bit(PG_compound, &(page)->flags) - -#ifdef CONFIG_SWAP -#define PageSwapCache(page) test_bit(PG_swapcache, &(page)->flags) -#define SetPageSwapCache(page) set_bit(PG_swapcache, &(page)->flags) -#define ClearPageSwapCache(page) clear_bit(PG_swapcache, &(page)->flags) -#else -#define PageSwapCache(page) 0 -#endif - -#define PageUncached(page) test_bit(PG_uncached, &(page)->flags) -#define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags) -#define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags) - -struct page; /* forward declaration */ - -int test_clear_page_dirty(struct page *page); -int test_clear_page_writeback(struct page *page); -int test_set_page_writeback(struct page *page); - -static inline void clear_page_dirty(struct page *page) -{ - test_clear_page_dirty(page); -} - -static inline void set_page_writeback(struct page *page) -{ - test_set_page_writeback(page); -} - -#endif /* PAGE_FLAGS_H */ diff -r 516cf6553011 -r cf89e8f0831b xen/include/asm-ia64/linux/rbtree.h --- a/xen/include/asm-ia64/linux/rbtree.h Mon Mar 20 08:56:46 2006 +++ /dev/null Mon Mar 20 08:56:54 2006 @@ -1,141 +0,0 @@ -/* - Red Black Trees - (C) 1999 Andrea Arcangeli <andrea@xxxxxxx> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - linux/include/linux/rbtree.h - - To use rbtrees you'll have to implement your own insert and search cores. - This will avoid us to use callbacks and to drop drammatically performances. - I know it's not the cleaner way, but in C (not in C++) to get - performances and genericity... - - Some example of insert and search follows here. The search is a plain - normal search over an ordered tree. The insert instead must be implemented - int two steps: as first thing the code must insert the element in - order as a red leaf in the tree, then the support library function - rb_insert_color() must be called. Such function will do the - not trivial work to rebalance the rbtree if necessary. - ------------------------------------------------------------------------ -static inline struct page * rb_search_page_cache(struct inode * inode, - unsigned long offset) -{ - struct rb_node * n = inode->i_rb_page_cache.rb_node; - struct page * page; - - while (n) - { - page = rb_entry(n, struct page, rb_page_cache); - - if (offset < page->offset) - n = n->rb_left; - else if (offset > page->offset) - n = n->rb_right; - else - return page; - } - return NULL; -} - -static inline struct page * __rb_insert_page_cache(struct inode * inode, - unsigned long offset, - struct rb_node * node) -{ - struct rb_node ** p = &inode->i_rb_page_cache.rb_node; - struct rb_node * parent = NULL; - struct page * page; - - while (*p) - { - parent = *p; - page = rb_entry(parent, struct page, rb_page_cache); - - if (offset < page->offset) - p = &(*p)->rb_left; - else if (offset > page->offset) - p = &(*p)->rb_right; - else - return page; - } - - rb_link_node(node, parent, p); - - return NULL; -} - -static inline struct page * rb_insert_page_cache(struct inode * inode, - unsigned long offset, - struct rb_node * node) -{ - struct page * ret; - if ((ret = __rb_insert_page_cache(inode, offset, node))) - goto out; - rb_insert_color(node, &inode->i_rb_page_cache); - out: - return ret; -} ------------------------------------------------------------------------ -*/ - -#ifndef _LINUX_RBTREE_H -#define _LINUX_RBTREE_H - -#include <linux/kernel.h> -#include <linux/stddef.h> - -struct rb_node -{ - struct rb_node *rb_parent; - int rb_color; -#define RB_RED 0 -#define RB_BLACK 1 - struct rb_node *rb_right; - struct rb_node *rb_left; -}; - -struct rb_root -{ - struct rb_node *rb_node; -}; - -#define RB_ROOT (struct rb_root) { NULL, } -#define rb_entry(ptr, type, member) container_of(ptr, type, member) - -extern void rb_insert_color(struct rb_node *, struct rb_root *); -extern void rb_erase(struct rb_node *, struct rb_root *); - -/* Find logical next and previous nodes in a tree */ -extern struct rb_node *rb_next(struct rb_node *); -extern struct rb_node *rb_prev(struct rb_node *); -extern struct rb_node *rb_first(struct rb_root *); -extern struct rb_node *rb_last(struct rb_root *); - -/* Fast replacement of a single node without remove/rebalance/add/rebalance */ -extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, - struct rb_root *root); - -static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, - struct rb_node ** rb_link) -{ - node->rb_parent = parent; - node->rb_color = RB_RED; - node->rb_left = node->rb_right = NULL; - - *rb_link = node; -} - -#endif /* _LINUX_RBTREE_H */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |