[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [linux-2.6.18-xen] merge with linux-2.6.18-xen.hg (staging)
# HG changeset patch # User Alex Williamson <alex.williamson@xxxxxx> # Date 1193780291 21600 # Node ID bac78e892e42cb075d98a52c2c9a81df82f12e10 # Parent 0edeca96e7d7821f8413589e2e1f90bf68b696bd # Parent 71a5313e13f5d99b1b550b1b5f1c0198fc4c9a70 merge with linux-2.6.18-xen.hg (staging) --- arch/i386/kernel/entry-xen.S | 23 arch/i386/kernel/smp-xen.c | 8 arch/i386/kernel/sysenter.c | 46 + arch/i386/kernel/time-xen.c | 41 - arch/i386/mm/fault-xen.c | 2 arch/i386/mm/hypervisor.c | 1 arch/um/kernel/physmem.c | 4 arch/x86_64/ia32/Makefile | 17 arch/x86_64/ia32/ia32entry-xen.S | 166 +----- arch/x86_64/ia32/syscall32-xen.c | 49 + arch/x86_64/ia32/syscall32_syscall-xen.S | 2 arch/x86_64/kernel/entry-xen.S | 7 arch/x86_64/kernel/smp-xen.c | 8 arch/x86_64/kernel/traps-xen.c | 2 arch/x86_64/mm/fault-xen.c | 2 arch/x86_64/mm/init-xen.c | 4 arch/x86_64/mm/pageattr-xen.c | 2 buildconfigs/linux-defconfig_xen0_ia64 | 1 buildconfigs/linux-defconfig_xen0_x86_32 | 1 buildconfigs/linux-defconfig_xen0_x86_64 | 1 buildconfigs/linux-defconfig_xenU_ia64 | 1 buildconfigs/linux-defconfig_xenU_x86_32 | 1 buildconfigs/linux-defconfig_xenU_x86_64 | 1 buildconfigs/linux-defconfig_xen_ia64 | 1 buildconfigs/linux-defconfig_xen_x86_32 | 1 buildconfigs/linux-defconfig_xen_x86_64 | 1 drivers/cpufreq/cpufreq_ondemand.c | 81 ++ drivers/xen/Kconfig | 6 drivers/xen/blktap/blktap.c | 8 drivers/xen/core/evtchn.c | 10 drivers/xen/core/smpboot.c | 1 drivers/xen/netfront/accel.c | 820 +++++++++++------------------- drivers/xen/netfront/netfront.c | 20 drivers/xen/netfront/netfront.h | 82 --- drivers/xen/xenbus/xenbus_probe.c | 8 include/asm-i386/mach-xen/asm/page.h | 8 include/asm-ia64/page.h | 9 include/asm-powerpc/page.h | 9 include/asm-um/page.h | 2 include/asm-x86_64/mach-xen/asm/page.h | 8 include/asm-x86_64/mach-xen/asm/pgtable.h | 2 include/linux/gfp.h | 6 include/xen/evtchn.h | 1 include/xen/interface/callback.h | 37 + include/xen/interface/domctl.h | 56 +- include/xen/interface/platform.h | 22 include/xen/interface/sysctl.h | 25 include/xen/interface/vcpu.h | 3 include/xen/interface/xen-compat.h | 2 mm/page_alloc.c | 14 50 files changed, 753 insertions(+), 880 deletions(-) diff -r 0edeca96e7d7 -r bac78e892e42 arch/i386/kernel/entry-xen.S --- a/arch/i386/kernel/entry-xen.S Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/i386/kernel/entry-xen.S Tue Oct 30 15:38:11 2007 -0600 @@ -381,6 +381,29 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT #endif /* !CONFIG_XEN */ CFI_ENDPROC + # pv sysenter call handler stub +ENTRY(sysenter_entry_pv) + RING0_INT_FRAME + movl $__USER_DS,16(%esp) + movl %ebp,12(%esp) + movl $__USER_CS,4(%esp) + addl $4,%esp + /* +5*4 is SS:ESP,EFLAGS,CS:EIP. +8 is esp0 setting. */ + pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) +/* + * Load the potential sixth argument from user stack. + * Careful about security. + */ + cmpl $__PAGE_OFFSET-3,%ebp + jae syscall_fault +1: movl (%ebp),%ebp +.section __ex_table,"a" + .align 4 + .long 1b,syscall_fault +.previous + /* fall through */ + CFI_ENDPROC +ENDPROC(sysenter_entry_pv) # system call handler stub ENTRY(system_call) diff -r 0edeca96e7d7 -r bac78e892e42 arch/i386/kernel/smp-xen.c --- a/arch/i386/kernel/smp-xen.c Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/i386/kernel/smp-xen.c Tue Oct 30 15:38:11 2007 -0600 @@ -558,9 +558,7 @@ static void stop_this_cpu (void * dummy) */ cpu_clear(smp_processor_id(), cpu_online_map); local_irq_disable(); -#if 0 - disable_local_APIC(); -#endif + disable_all_local_evtchn(); if (cpu_data[smp_processor_id()].hlt_works_ok) for(;;) halt(); for (;;); @@ -575,9 +573,7 @@ void smp_send_stop(void) smp_call_function(stop_this_cpu, NULL, 1, 0); local_irq_disable(); -#if 0 - disable_local_APIC(); -#endif + disable_all_local_evtchn(); local_irq_enable(); } diff -r 0edeca96e7d7 -r bac78e892e42 arch/i386/kernel/sysenter.c --- a/arch/i386/kernel/sysenter.c Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/i386/kernel/sysenter.c Tue Oct 30 15:38:11 2007 -0600 @@ -48,7 +48,7 @@ extern asmlinkage void sysenter_entry(vo void enable_sep_cpu(void) { -#ifndef CONFIG_X86_NO_TSS +#ifndef CONFIG_XEN int cpu = get_cpu(); struct tss_struct *tss = &per_cpu(init_tss, cpu); @@ -62,8 +62,36 @@ void enable_sep_cpu(void) wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0); wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); - put_cpu(); -#endif +#else + extern asmlinkage void sysenter_entry_pv(void); + static struct callback_register sysenter = { + .type = CALLBACKTYPE_sysenter, + .address = { __KERNEL_CS, (unsigned long)sysenter_entry_pv }, + }; + + if (!boot_cpu_has(X86_FEATURE_SEP)) + return; + + get_cpu(); + + if (xen_feature(XENFEAT_supervisor_mode_kernel)) + sysenter.address.eip = (unsigned long)sysenter_entry; + + switch (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter)) { + case 0: + break; +#if CONFIG_XEN_COMPAT < 0x030200 + case -ENOSYS: + sysenter.type = CALLBACKTYPE_sysenter_deprecated; + if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) == 0) + break; +#endif + default: + clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); + break; + } +#endif + put_cpu(); } /* @@ -77,18 +105,6 @@ int __init sysenter_setup(void) int __init sysenter_setup(void) { syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); - -#ifdef CONFIG_XEN - if (boot_cpu_has(X86_FEATURE_SEP)) { - static struct callback_register __initdata sysenter = { - .type = CALLBACKTYPE_sysenter, - .address = { __KERNEL_CS, (unsigned long)sysenter_entry }, - }; - - if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0) - clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); - } -#endif #ifdef CONFIG_COMPAT_VDSO __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); diff -r 0edeca96e7d7 -r bac78e892e42 arch/i386/kernel/time-xen.c --- a/arch/i386/kernel/time-xen.c Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/i386/kernel/time-xen.c Tue Oct 30 15:38:11 2007 -0600 @@ -833,44 +833,15 @@ void notify_arch_cmos_timer(void) mod_timer(&sync_xen_wallclock_timer, jiffies + 1); } -static long clock_cmos_diff, sleep_start; - -static int timer_suspend(struct sys_device *dev, pm_message_t state) -{ - /* - * Estimate time zone so that set_time can update the clock - */ - clock_cmos_diff = -get_cmos_time(); - clock_cmos_diff += get_seconds(); - sleep_start = get_cmos_time(); +static int timer_resume(struct sys_device *dev) +{ + extern void time_resume(void); + time_resume(); return 0; } -static int timer_resume(struct sys_device *dev) -{ - unsigned long flags; - unsigned long sec; - unsigned long sleep_length; - -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled()) - hpet_reenable(); -#endif - sec = get_cmos_time() + clock_cmos_diff; - sleep_length = (get_cmos_time() - sleep_start) * HZ; - write_seqlock_irqsave(&xtime_lock, flags); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; - jiffies_64 += sleep_length; - wall_jiffies += sleep_length; - write_sequnlock_irqrestore(&xtime_lock, flags); - touch_softlockup_watchdog(); - return 0; -} - -void time_resume(void); static struct sysdev_class timer_sysclass = { - .resume = time_resume, + .resume = timer_resume, set_kset_name("timer"), }; @@ -1121,7 +1092,7 @@ void local_teardown_timer(unsigned int c } #endif -#if CONFIG_CPU_FREQ +#ifdef CONFIG_CPU_FREQ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { diff -r 0edeca96e7d7 -r bac78e892e42 arch/i386/mm/fault-xen.c --- a/arch/i386/mm/fault-xen.c Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/i386/mm/fault-xen.c Tue Oct 30 15:38:11 2007 -0600 @@ -346,7 +346,7 @@ static int spurious_fault(struct pt_regs if ((error_code & 0x02) && !pte_write(*pte)) return 0; #ifdef CONFIG_X86_PAE - if ((error_code & 0x10) && (pte_val(*pte) & _PAGE_NX)) + if ((error_code & 0x10) && (__pte_val(*pte) & _PAGE_NX)) return 0; #endif diff -r 0edeca96e7d7 -r bac78e892e42 arch/i386/mm/hypervisor.c --- a/arch/i386/mm/hypervisor.c Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/i386/mm/hypervisor.c Tue Oct 30 15:38:11 2007 -0600 @@ -56,6 +56,7 @@ void xen_l1_entry_update(pte_t *ptr, pte u.val = __pte_val(val); BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); } +EXPORT_SYMBOL_GPL(xen_l1_entry_update); void xen_l2_entry_update(pmd_t *ptr, pmd_t val) { diff -r 0edeca96e7d7 -r bac78e892e42 arch/um/kernel/physmem.c --- a/arch/um/kernel/physmem.c Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/um/kernel/physmem.c Tue Oct 30 15:38:11 2007 -0600 @@ -226,7 +226,7 @@ EXPORT_SYMBOL(physmem_remove_mapping); EXPORT_SYMBOL(physmem_remove_mapping); EXPORT_SYMBOL(physmem_subst_mapping); -int arch_free_page(struct page *page, int order) +void arch_free_page(struct page *page, int order) { void *virt; int i; @@ -235,8 +235,6 @@ int arch_free_page(struct page *page, in virt = __va(page_to_phys(page + i)); physmem_remove_mapping(virt); } - - return 0; } int is_remapped(void *virt) diff -r 0edeca96e7d7 -r bac78e892e42 arch/x86_64/ia32/Makefile --- a/arch/x86_64/ia32/Makefile Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/x86_64/ia32/Makefile Tue Oct 30 15:38:11 2007 -0600 @@ -14,11 +14,14 @@ audit-class-$(CONFIG_AUDIT) := audit.o audit-class-$(CONFIG_AUDIT) := audit.o obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y) +syscall32-types-y := sysenter syscall +syscall32-types-$(subst 1,$(CONFIG_XEN),$(shell expr $(CONFIG_XEN_COMPAT)0 '<' 0x0302000)) += int80 + $(obj)/syscall32_syscall.o: \ - $(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so) + $(foreach F,$(syscall32-types-y),$(obj)/vsyscall-$F.so) # Teach kbuild about targets -targets := $(foreach F,sysenter syscall,vsyscall-$F.o vsyscall-$F.so) +targets := $(foreach F,$(syscall32-types-y),vsyscall-$F.o vsyscall-$F.so) # The DSO images are built using a special linker script quiet_cmd_syscall = SYSCALL $@ @@ -27,18 +30,10 @@ quiet_cmd_syscall = SYSCALL $@ -Wl,-soname=linux-gate.so.1 -o $@ \ -Wl,-T,$(filter-out FORCE,$^) -$(obj)/vsyscall-int80.so \ -$(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \ +$(foreach F,$(syscall32-types-y),$(obj)/vsyscall-$F.so): \ $(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE $(call if_changed,syscall) AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32 -Iarch/i386/kernel AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32 -Iarch/i386/kernel - -ifdef CONFIG_XEN AFLAGS_vsyscall-int80.o = -m32 -Wa,-32 -Iarch/i386/kernel -CFLAGS_syscall32.o += -DUSE_INT80 -AFLAGS_syscall32_syscall.o += -DUSE_INT80 - -$(obj)/syscall32_syscall.o: $(obj)/vsyscall-int80.so -endif diff -r 0edeca96e7d7 -r bac78e892e42 arch/x86_64/ia32/ia32entry-xen.S --- a/arch/x86_64/ia32/ia32entry-xen.S Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/x86_64/ia32/ia32entry-xen.S Tue Oct 30 15:38:11 2007 -0600 @@ -92,7 +92,7 @@ * %ebp user stack * 0(%ebp) Arg6 * - * Interrupts off. + * Interrupts on. * * This is purely a fast path. For anything complicated we use the int 0x80 * path below. Set up a complete hardware stack frame to share code @@ -100,38 +100,26 @@ */ ENTRY(ia32_sysenter_target) CFI_STARTPROC32 simple - CFI_DEF_CFA rsp,0 - CFI_REGISTER rsp,rbp - __swapgs - movq %gs:pda_kernelstack, %rsp - addq $(PDA_STACKOFFSET),%rsp - /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs, here we enable it straight after entry: - */ - XEN_UNBLOCK_EVENTS(%r11) - __sti + CFI_DEF_CFA rsp,SS+8-RIP+16 + /*CFI_REL_OFFSET ss,SS-RIP+16*/ + CFI_REL_OFFSET rsp,RSP-RIP+16 + /*CFI_REL_OFFSET rflags,EFLAGS-RIP+16*/ + /*CFI_REL_OFFSET cs,CS-RIP+16*/ + CFI_REL_OFFSET rip,RIP-RIP+16 + CFI_REL_OFFSET r11,8 + CFI_REL_OFFSET rcx,0 + movq 8(%rsp),%r11 + CFI_RESTORE r11 + popq %rcx + CFI_ADJUST_CFA_OFFSET -8 + CFI_RESTORE rcx movl %ebp,%ebp /* zero extension */ - pushq $__USER32_DS - CFI_ADJUST_CFA_OFFSET 8 - /*CFI_REL_OFFSET ss,0*/ - pushq %rbp - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rsp,0 - pushfq - CFI_ADJUST_CFA_OFFSET 8 - /*CFI_REL_OFFSET rflags,0*/ - movl $VSYSCALL32_SYSEXIT, %r10d - CFI_REGISTER rip,r10 - pushq $__USER32_CS - CFI_ADJUST_CFA_OFFSET 8 - /*CFI_REL_OFFSET cs,0*/ - movl %eax, %eax - pushq %r10 - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rip,0 - pushq %rax - CFI_ADJUST_CFA_OFFSET 8 + movl %eax,%eax + movl $__USER32_DS,40(%rsp) + movq %rbp,32(%rsp) + movl $__USER32_CS,16(%rsp) + movl $VSYSCALL32_SYSEXIT,8(%rsp) + movq %rax,(%rsp) cld SAVE_ARGS 0,0,0 /* no need to do an access_ok check here because rbp has been @@ -143,7 +131,6 @@ 1: movl (%rbp),%r9d GET_THREAD_INFO(%r10) orl $TS_COMPAT,threadinfo_status(%r10) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) - CFI_REMEMBER_STATE jnz sysenter_tracesys sysenter_do_call: cmpl $(IA32_NR_syscalls-1),%eax @@ -151,33 +138,9 @@ sysenter_do_call: IA32_ARG_FIXUP 1 call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) - GET_THREAD_INFO(%r10) - XEN_BLOCK_EVENTS(%r11) - __cli - TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) - jnz int_ret_from_sys_call - andl $~TS_COMPAT,threadinfo_status(%r10) - /* clear IF, that popfq doesn't enable interrupts early */ - andl $~0x200,EFLAGS-R11(%rsp) - RESTORE_ARGS 1,24,1,1,1,1 - popfq - CFI_ADJUST_CFA_OFFSET -8 - /*CFI_RESTORE rflags*/ - popq %rcx /* User %esp */ - CFI_ADJUST_CFA_OFFSET -8 - CFI_REGISTER rsp,rcx - movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ - CFI_REGISTER rip,rdx - TRACE_IRQS_ON - __swapgs - XEN_UNBLOCK_EVENTS(%r11) - __sti /* sti only takes effect after the next instruction */ - /* sysexit */ - .byte 0xf, 0x35 /* TBD */ + jmp int_ret_from_sys_call sysenter_tracesys: - CFI_RESTORE_STATE SAVE_REST CLEAR_RREGS movq $-ENOSYS,RAX(%rsp) /* really needed? */ @@ -210,7 +173,7 @@ ENDPROC(ia32_sysenter_target) * %esp user stack * 0(%esp) Arg6 * - * Interrupts off. + * Interrupts on. * * This is purely a fast path. For anything complicated we use the int 0x80 * path below. Set up a complete hardware stack frame to share code @@ -218,32 +181,20 @@ ENDPROC(ia32_sysenter_target) */ ENTRY(ia32_cstar_target) CFI_STARTPROC32 simple - CFI_DEF_CFA rsp,PDA_STACKOFFSET - CFI_REGISTER rip,rcx - /*CFI_REGISTER rflags,r11*/ - __swapgs - movl %esp,%r8d - CFI_REGISTER rsp,r8 - movq %gs:pda_kernelstack,%rsp - /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs and here we enable it straight after entry: - */ - XEN_UNBLOCK_EVENTS(%r11) - __sti - SAVE_ARGS 8,1,1 + CFI_DEF_CFA rsp,SS+8-RIP+16 + /*CFI_REL_OFFSET ss,SS-RIP+16*/ + CFI_REL_OFFSET rsp,RSP-RIP+16 + /*CFI_REL_OFFSET rflags,EFLAGS-RIP+16*/ + /*CFI_REL_OFFSET cs,CS-RIP+16*/ + CFI_REL_OFFSET rip,RIP-RIP+16 movl %eax,%eax /* zero extension */ + movl RSP-RIP+16(%rsp),%r8d + SAVE_ARGS -8,1,1 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) - movq %rcx,RIP-ARGOFFSET(%rsp) - CFI_REL_OFFSET rip,RIP-ARGOFFSET movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */ movl %ebp,%ecx - movq $__USER32_CS,CS-ARGOFFSET(%rsp) - movq $__USER32_DS,SS-ARGOFFSET(%rsp) - movq %r11,EFLAGS-ARGOFFSET(%rsp) - /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ - movq %r8,RSP-ARGOFFSET(%rsp) - CFI_REL_OFFSET rsp,RSP-ARGOFFSET + movl $__USER32_CS,CS-ARGOFFSET(%rsp) + movl $__USER32_DS,SS-ARGOFFSET(%rsp) /* no need to do an access_ok check here because r8 has been 32bit zero extended */ /* hardware stack frame is complete now */ @@ -254,7 +205,6 @@ 1: movl (%r8),%r9d GET_THREAD_INFO(%r10) orl $TS_COMPAT,threadinfo_status(%r10) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) - CFI_REMEMBER_STATE jnz cstar_tracesys cstar_do_call: cmpl $IA32_NR_syscalls-1,%eax @@ -262,26 +212,9 @@ cstar_do_call: IA32_ARG_FIXUP 1 call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) - GET_THREAD_INFO(%r10) - XEN_BLOCK_EVENTS(%r11) - __cli - TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) - jnz int_ret_from_sys_call - andl $~TS_COMPAT,threadinfo_status(%r10) - RESTORE_ARGS 1,-ARG_SKIP,1,1,1 - movl RIP-ARGOFFSET(%rsp),%ecx - CFI_REGISTER rip,rcx - movl EFLAGS-ARGOFFSET(%rsp),%r11d - /*CFI_REGISTER rflags,r11*/ - TRACE_IRQS_ON - movl RSP-ARGOFFSET(%rsp),%esp - CFI_RESTORE rsp - __swapgs - sysretl /* TBD */ + jmp int_ret_from_sys_call cstar_tracesys: - CFI_RESTORE_STATE SAVE_REST CLEAR_RREGS movq $-ENOSYS,RAX(%rsp) /* really needed? */ @@ -322,32 +255,27 @@ ia32_badarg: * Arguments are zero extended. For system calls that want sign extension and * take long arguments a wrapper is needed. Most calls can just be called * directly. - * Assumes it is only called from user space and entered with interrupts off. + * Assumes it is only called from user space and entered with interrupts on. */ ENTRY(ia32_syscall) CFI_STARTPROC simple - CFI_DEF_CFA rsp,SS+8-RIP - /*CFI_REL_OFFSET ss,SS-RIP*/ - CFI_REL_OFFSET rsp,RSP-RIP - /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ - /*CFI_REL_OFFSET cs,CS-RIP*/ - CFI_REL_OFFSET rip,RIP-RIP - __swapgs - /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs and here we enable it straight after entry: - */ - XEN_UNBLOCK_EVENTS(%r11) - __sti - movq (%rsp),%rcx + CFI_DEF_CFA rsp,SS+8-RIP+16 + /*CFI_REL_OFFSET ss,SS-RIP+16*/ + CFI_REL_OFFSET rsp,RSP-RIP+16 + /*CFI_REL_OFFSET rflags,EFLAGS-RIP+16*/ + /*CFI_REL_OFFSET cs,CS-RIP+16*/ + CFI_REL_OFFSET rip,RIP-RIP+16 + CFI_REL_OFFSET r11,8 + CFI_REL_OFFSET rcx,0 movq 8(%rsp),%r11 - addq $0x10,%rsp /* skip rcx and r11 */ + CFI_RESTORE r11 + popq %rcx + CFI_ADJUST_CFA_OFFSET -8 + CFI_RESTORE rcx movl %eax,%eax - pushq %rax - CFI_ADJUST_CFA_OFFSET 8 + movq %rax,(%rsp) cld -/* 1: jmp 1b */ /* note the registers are not zero extended to the sf. this could be a problem. */ SAVE_ARGS 0,0,1 diff -r 0edeca96e7d7 -r bac78e892e42 arch/x86_64/ia32/syscall32-xen.c --- a/arch/x86_64/ia32/syscall32-xen.c Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/x86_64/ia32/syscall32-xen.c Tue Oct 30 15:38:11 2007 -0600 @@ -13,17 +13,18 @@ #include <asm/proto.h> #include <asm/tlbflush.h> #include <asm/ia32_unistd.h> +#include <xen/interface/callback.h> -#ifdef USE_INT80 -extern unsigned char syscall32_int80[], syscall32_int80_end[]; -#endif extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; extern int sysctl_vsyscall32; char *syscall32_page; -#ifndef USE_INT80 static int use_sysenter = -1; + +#if CONFIG_XEN_COMPAT < 0x030200 +extern unsigned char syscall32_int80[], syscall32_int80_end[]; +static int use_int80 = 1; #endif static struct page * @@ -86,13 +87,12 @@ static int __init init_syscall32(void) if (!syscall32_page) panic("Cannot allocate syscall32 page"); -#ifdef USE_INT80 - /* - * At this point we use int 0x80. - */ - memcpy(syscall32_page, syscall32_int80, - syscall32_int80_end - syscall32_int80); -#else +#if CONFIG_XEN_COMPAT < 0x030200 + if (use_int80) { + memcpy(syscall32_page, syscall32_int80, + syscall32_int80_end - syscall32_int80); + } else +#endif if (use_sysenter > 0) { memcpy(syscall32_page, syscall32_sysenter, syscall32_sysenter_end - syscall32_sysenter); @@ -100,7 +100,6 @@ static int __init init_syscall32(void) memcpy(syscall32_page, syscall32_syscall, syscall32_syscall_end - syscall32_syscall); } -#endif return 0; } @@ -113,16 +112,26 @@ core_initcall(init_syscall32); /* May not be __init: called during resume */ void syscall32_cpu_init(void) { -#ifndef USE_INT80 - if (use_sysenter < 0) - use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL); + static struct callback_register cstar = { + .type = CALLBACKTYPE_syscall32, + .address = (unsigned long)ia32_cstar_target + }; + static struct callback_register sysenter = { + .type = CALLBACKTYPE_sysenter, + .address = (unsigned long)ia32_sysenter_target + }; /* Load these always in case some future AMD CPU supports SYSENTER from compat mode too. */ - checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL); - checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); + if ((HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0) || + (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) < 0)) +#if CONFIG_XEN_COMPAT < 0x030200 + return; + use_int80 = 0; +#else + BUG(); +#endif - wrmsrl(MSR_CSTAR, ia32_cstar_target); -#endif + if (use_sysenter < 0) + use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL); } diff -r 0edeca96e7d7 -r bac78e892e42 arch/x86_64/ia32/syscall32_syscall-xen.S --- a/arch/x86_64/ia32/syscall32_syscall-xen.S Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/x86_64/ia32/syscall32_syscall-xen.S Tue Oct 30 15:38:11 2007 -0600 @@ -2,7 +2,7 @@ .section ".init.data","aw" -#ifdef USE_INT80 +#if CONFIG_XEN_COMPAT < 0x030200 .globl syscall32_int80 .globl syscall32_int80_end diff -r 0edeca96e7d7 -r bac78e892e42 arch/x86_64/kernel/entry-xen.S --- a/arch/x86_64/kernel/entry-xen.S Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/x86_64/kernel/entry-xen.S Tue Oct 30 15:38:11 2007 -0600 @@ -237,7 +237,7 @@ END(ret_from_fork) * r11 eflags for syscall/sysret, temporary for C * r12-r15,rbp,rbx saved by C code, not touched. * - * Interrupts are off on entry. + * Interrupts are enabled on entry. * Only called from user space. * * XXX if we had a free scratch register we could save the RSP into the stack frame @@ -252,11 +252,6 @@ ENTRY(system_call) _frame (RIP-0x10) SAVE_ARGS -8,0 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) - /* - * No need to follow this irqs off/on section - it's straight - * and short: - */ - XEN_UNBLOCK_EVENTS(%r11) GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) CFI_REMEMBER_STATE diff -r 0edeca96e7d7 -r bac78e892e42 arch/x86_64/kernel/smp-xen.c --- a/arch/x86_64/kernel/smp-xen.c Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/x86_64/kernel/smp-xen.c Tue Oct 30 15:38:11 2007 -0600 @@ -482,9 +482,7 @@ void smp_stop_cpu(void) */ cpu_clear(smp_processor_id(), cpu_online_map); local_irq_save(flags); -#ifndef CONFIG_XEN - disable_local_APIC(); -#endif + disable_all_local_evtchn(); local_irq_restore(flags); } @@ -512,9 +510,7 @@ void smp_send_stop(void) spin_unlock(&call_lock); local_irq_disable(); -#ifndef CONFIG_XEN - disable_local_APIC(); -#endif + disable_all_local_evtchn(); local_irq_enable(); } diff -r 0edeca96e7d7 -r bac78e892e42 arch/x86_64/kernel/traps-xen.c --- a/arch/x86_64/kernel/traps-xen.c Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/x86_64/kernel/traps-xen.c Tue Oct 30 15:38:11 2007 -0600 @@ -1110,7 +1110,7 @@ static trap_info_t trap_table[] = { #endif { 19, 0|4, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, #ifdef CONFIG_IA32_EMULATION - { IA32_SYSCALL_VECTOR, 3|4, __KERNEL_CS, (unsigned long)ia32_syscall}, + { IA32_SYSCALL_VECTOR, 3, __KERNEL_CS, (unsigned long)ia32_syscall}, #endif { 0, 0, 0, 0 } }; diff -r 0edeca96e7d7 -r bac78e892e42 arch/x86_64/mm/fault-xen.c --- a/arch/x86_64/mm/fault-xen.c Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/x86_64/mm/fault-xen.c Tue Oct 30 15:38:11 2007 -0600 @@ -380,7 +380,7 @@ static int spurious_fault(struct pt_regs return 0; if ((error_code & PF_WRITE) && !pte_write(*pte)) return 0; - if ((error_code & PF_INSTR) && (pte_val(*pte) & _PAGE_NX)) + if ((error_code & PF_INSTR) && (__pte_val(*pte) & _PAGE_NX)) return 0; return 1; diff -r 0edeca96e7d7 -r bac78e892e42 arch/x86_64/mm/init-xen.c --- a/arch/x86_64/mm/init-xen.c Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/x86_64/mm/init-xen.c Tue Oct 30 15:38:11 2007 -0600 @@ -272,7 +272,7 @@ static __init void set_pte_phys(unsigned pte = pte_offset_kernel(pmd, vaddr); if (!pte_none(*pte) && - pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask)) + __pte_val(*pte) != (__pte_val(new_pte) & __supported_pte_mask)) pte_ERROR(*pte); set_pte(pte, new_pte); @@ -868,7 +868,7 @@ void __init clear_kernel_mapping(unsigne pmd = pmd_offset(pud, address); if (!pmd || pmd_none(*pmd)) continue; - if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { + if (0 == (__pmd_val(*pmd) & _PAGE_PSE)) { /* Could handle this, but it should not happen currently. */ printk(KERN_ERR "clear_kernel_mapping: mapping has been split. will leak memory\n"); diff -r 0edeca96e7d7 -r bac78e892e42 arch/x86_64/mm/pageattr-xen.c --- a/arch/x86_64/mm/pageattr-xen.c Tue Oct 30 13:42:54 2007 -0600 +++ b/arch/x86_64/mm/pageattr-xen.c Tue Oct 30 15:38:11 2007 -0600 @@ -370,7 +370,7 @@ static void revert_page(unsigned long ad pud = pud_offset(pgd,address); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, address); - BUG_ON(pmd_val(*pmd) & _PAGE_PSE); + BUG_ON(__pmd_val(*pmd) & _PAGE_PSE); pgprot_val(ref_prot) |= _PAGE_PSE; large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot); set_pte((pte_t *)pmd, large_pte); diff -r 0edeca96e7d7 -r bac78e892e42 buildconfigs/linux-defconfig_xen0_ia64 --- a/buildconfigs/linux-defconfig_xen0_ia64 Tue Oct 30 13:42:54 2007 -0600 +++ b/buildconfigs/linux-defconfig_xen0_ia64 Tue Oct 30 15:38:11 2007 -0600 @@ -1683,6 +1683,7 @@ CONFIG_XEN_SYSFS=y CONFIG_XEN_SYSFS=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_030004_AND_LATER is not set +# CONFIG_XEN_COMPAT_030100_AND_LATER is not set # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT=0x030002 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y diff -r 0edeca96e7d7 -r bac78e892e42 buildconfigs/linux-defconfig_xen0_x86_32 --- a/buildconfigs/linux-defconfig_xen0_x86_32 Tue Oct 30 13:42:54 2007 -0600 +++ b/buildconfigs/linux-defconfig_xen0_x86_32 Tue Oct 30 15:38:11 2007 -0600 @@ -1421,6 +1421,7 @@ CONFIG_XEN_SYSFS=y CONFIG_XEN_SYSFS=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_030004_AND_LATER is not set +# CONFIG_XEN_COMPAT_030100_AND_LATER is not set # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT=0x030002 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y diff -r 0edeca96e7d7 -r bac78e892e42 buildconfigs/linux-defconfig_xen0_x86_64 --- a/buildconfigs/linux-defconfig_xen0_x86_64 Tue Oct 30 13:42:54 2007 -0600 +++ b/buildconfigs/linux-defconfig_xen0_x86_64 Tue Oct 30 15:38:11 2007 -0600 @@ -1369,6 +1369,7 @@ CONFIG_XEN_SYSFS=y CONFIG_XEN_SYSFS=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_030004_AND_LATER is not set +# CONFIG_XEN_COMPAT_030100_AND_LATER is not set # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT=0x030002 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y diff -r 0edeca96e7d7 -r bac78e892e42 buildconfigs/linux-defconfig_xenU_ia64 --- a/buildconfigs/linux-defconfig_xenU_ia64 Tue Oct 30 13:42:54 2007 -0600 +++ b/buildconfigs/linux-defconfig_xenU_ia64 Tue Oct 30 15:38:11 2007 -0600 @@ -1493,6 +1493,7 @@ CONFIG_XEN_SYSFS=y CONFIG_XEN_SYSFS=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_030004_AND_LATER is not set +# CONFIG_XEN_COMPAT_030100_AND_LATER is not set # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT=0x030002 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y diff -r 0edeca96e7d7 -r bac78e892e42 buildconfigs/linux-defconfig_xenU_x86_32 --- a/buildconfigs/linux-defconfig_xenU_x86_32 Tue Oct 30 13:42:54 2007 -0600 +++ b/buildconfigs/linux-defconfig_xenU_x86_32 Tue Oct 30 15:38:11 2007 -0600 @@ -920,6 +920,7 @@ CONFIG_XEN_SYSFS=y CONFIG_XEN_SYSFS=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_030004_AND_LATER is not set +# CONFIG_XEN_COMPAT_030100_AND_LATER is not set # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT=0x030002 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y diff -r 0edeca96e7d7 -r bac78e892e42 buildconfigs/linux-defconfig_xenU_x86_64 --- a/buildconfigs/linux-defconfig_xenU_x86_64 Tue Oct 30 13:42:54 2007 -0600 +++ b/buildconfigs/linux-defconfig_xenU_x86_64 Tue Oct 30 15:38:11 2007 -0600 @@ -1217,6 +1217,7 @@ CONFIG_XEN_SYSFS=y CONFIG_XEN_SYSFS=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_030004_AND_LATER is not set +# CONFIG_XEN_COMPAT_030100_AND_LATER is not set # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT=0x030002 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y diff -r 0edeca96e7d7 -r bac78e892e42 buildconfigs/linux-defconfig_xen_ia64 --- a/buildconfigs/linux-defconfig_xen_ia64 Tue Oct 30 13:42:54 2007 -0600 +++ b/buildconfigs/linux-defconfig_xen_ia64 Tue Oct 30 15:38:11 2007 -0600 @@ -1683,6 +1683,7 @@ CONFIG_XEN_SYSFS=y CONFIG_XEN_SYSFS=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_030004_AND_LATER is not set +# CONFIG_XEN_COMPAT_030100_AND_LATER is not set # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT=0x030002 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y diff -r 0edeca96e7d7 -r bac78e892e42 buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Tue Oct 30 13:42:54 2007 -0600 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Tue Oct 30 15:38:11 2007 -0600 @@ -3280,6 +3280,7 @@ CONFIG_XEN_SYSFS=y CONFIG_XEN_SYSFS=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_030004_AND_LATER is not set +# CONFIG_XEN_COMPAT_030100_AND_LATER is not set # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT=0x030002 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y diff -r 0edeca96e7d7 -r bac78e892e42 buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Tue Oct 30 13:42:54 2007 -0600 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Tue Oct 30 15:38:11 2007 -0600 @@ -3111,6 +3111,7 @@ CONFIG_XEN_SYSFS=y CONFIG_XEN_SYSFS=y CONFIG_XEN_COMPAT_030002_AND_LATER=y # CONFIG_XEN_COMPAT_030004_AND_LATER is not set +# CONFIG_XEN_COMPAT_030100_AND_LATER is not set # CONFIG_XEN_COMPAT_LATEST_ONLY is not set CONFIG_XEN_COMPAT=0x030002 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y diff -r 0edeca96e7d7 -r bac78e892e42 drivers/cpufreq/cpufreq_ondemand.c --- a/drivers/cpufreq/cpufreq_ondemand.c Tue Oct 30 13:42:54 2007 -0600 +++ b/drivers/cpufreq/cpufreq_ondemand.c Tue Oct 30 15:38:11 2007 -0600 @@ -222,17 +222,14 @@ static struct attribute_group dbs_attr_g /************************** sysfs end ************************/ -static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) -{ - unsigned int idle_ticks, total_ticks; - unsigned int load; +#ifndef CONFIG_XEN +static int dbs_calc_load(struct cpu_dbs_info_s *this_dbs_info) +{ + struct cpufreq_policy *policy; cputime64_t cur_jiffies; - - struct cpufreq_policy *policy; + cputime64_t total_ticks, idle_ticks; unsigned int j; - - if (!this_dbs_info->enable) - return; + int load; policy = this_dbs_info->cur_policy; cur_jiffies = jiffies64_to_cputime64(get_jiffies_64()); @@ -240,7 +237,7 @@ static void dbs_check_cpu(struct cpu_dbs this_dbs_info->prev_cpu_wall); this_dbs_info->prev_cpu_wall = cur_jiffies; if (!total_ticks) - return; + return 200; /* * Every sampling_rate, we check, if current idle time is less * than 20% (default), then we try to increase frequency @@ -270,6 +267,70 @@ static void dbs_check_cpu(struct cpu_dbs idle_ticks = tmp_idle_ticks; } load = (100 * (total_ticks - idle_ticks)) / total_ticks; + return load; +} +#else + +#include <xen/interface/platform.h> +static int dbs_calc_load(struct cpu_dbs_info_s *this_dbs_info) +{ + int load = 0; + struct xen_platform_op op; + uint64_t idletime[NR_CPUS]; + struct cpufreq_policy *policy; + unsigned int j; + cpumask_t cpumap; + + op.cmd = XENPF_getidletime; + set_xen_guest_handle(op.u.getidletime.cpumap_bitmap, (uint8_t *) cpus_addr(cpumap)); + op.u.getidletime.cpumap_nr_cpus = NR_CPUS;// num_online_cpus(); + set_xen_guest_handle(op.u.getidletime.idletime, idletime); + if (HYPERVISOR_platform_op(&op)) + return 200; + + policy = this_dbs_info->cur_policy; + cpumap = policy->cpus; + for_each_cpu_mask(j, cpumap) { + cputime64_t total_idle_nsecs, tmp_idle_nsecs; + cputime64_t total_wall_nsecs, tmp_wall_nsecs; + struct cpu_dbs_info_s *j_dbs_info; + unsigned long tmp_load; + + j_dbs_info = &per_cpu(cpu_dbs_info, j); + total_idle_nsecs = idletime[j]; + tmp_idle_nsecs = cputime64_sub(total_idle_nsecs, + j_dbs_info->prev_cpu_idle); + total_wall_nsecs = op.u.getidletime.now; + tmp_wall_nsecs = cputime64_sub(total_wall_nsecs, + j_dbs_info->prev_cpu_wall); + + if (tmp_wall_nsecs == 0) + return 200; + + j_dbs_info->prev_cpu_wall = total_wall_nsecs; + j_dbs_info->prev_cpu_idle = total_idle_nsecs; + + tmp_load = (100 * (tmp_wall_nsecs - tmp_idle_nsecs)) / + tmp_wall_nsecs; + load = max(load, min(100, (int) tmp_load)); + } + return load; +} +#endif + +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) +{ + int load; + + struct cpufreq_policy *policy; + + if (!this_dbs_info->enable) + return; + + policy = this_dbs_info->cur_policy; + load = dbs_calc_load(this_dbs_info); + if (load > 100) + return; /* Check for frequency increase */ if (load > dbs_tuners_ins.up_threshold) { diff -r 0edeca96e7d7 -r bac78e892e42 drivers/xen/Kconfig --- a/drivers/xen/Kconfig Tue Oct 30 13:42:54 2007 -0600 +++ b/drivers/xen/Kconfig Tue Oct 30 15:38:11 2007 -0600 @@ -13,7 +13,7 @@ if XEN if XEN config XEN_INTERFACE_VERSION hex - default 0x00030206 + default 0x00030207 menu "XEN" @@ -247,6 +247,9 @@ choice config XEN_COMPAT_030004_AND_LATER bool "3.0.4 and later" + config XEN_COMPAT_030100_AND_LATER + bool "3.1.0 and later" + config XEN_COMPAT_LATEST_ONLY bool "no compatibility code" @@ -255,6 +258,7 @@ config XEN_COMPAT config XEN_COMPAT hex default 0xffffff if XEN_COMPAT_LATEST_ONLY + default 0x030100 if XEN_COMPAT_030100_AND_LATER default 0x030004 if XEN_COMPAT_030004_AND_LATER default 0x030002 if XEN_COMPAT_030002_AND_LATER default 0 diff -r 0edeca96e7d7 -r bac78e892e42 drivers/xen/blktap/blktap.c --- a/drivers/xen/blktap/blktap.c Tue Oct 30 13:42:54 2007 -0600 +++ b/drivers/xen/blktap/blktap.c Tue Oct 30 15:38:11 2007 -0600 @@ -115,13 +115,7 @@ static struct tap_blkif *tapfds[MAX_TAP_ static struct tap_blkif *tapfds[MAX_TAP_DEV]; static int blktap_next_minor; -static int __init set_blkif_reqs(char *str) -{ - get_option(&str, &blkif_reqs); - return 1; -} -__setup("blkif_reqs=", set_blkif_reqs); - +module_param(blkif_reqs, int, 0); /* Run-time switchable: /sys/module/blktap/parameters/ */ static unsigned int log_stats = 0; static unsigned int debug_lvl = 0; diff -r 0edeca96e7d7 -r bac78e892e42 drivers/xen/core/evtchn.c --- a/drivers/xen/core/evtchn.c Tue Oct 30 13:42:54 2007 -0600 +++ b/drivers/xen/core/evtchn.c Tue Oct 30 15:38:11 2007 -0600 @@ -892,6 +892,16 @@ void unmask_evtchn(int port) } EXPORT_SYMBOL_GPL(unmask_evtchn); +void disable_all_local_evtchn(void) +{ + unsigned i, cpu = smp_processor_id(); + shared_info_t *s = HYPERVISOR_shared_info; + + for (i = 0; i < NR_EVENT_CHANNELS; ++i) + if (cpu_from_evtchn(i) == cpu) + synch_set_bit(i, &s->evtchn_mask[0]); +} + static void restore_cpu_virqs(int cpu) { struct evtchn_bind_virq bind_virq; diff -r 0edeca96e7d7 -r bac78e892e42 drivers/xen/core/smpboot.c --- a/drivers/xen/core/smpboot.c Tue Oct 30 13:42:54 2007 -0600 +++ b/drivers/xen/core/smpboot.c Tue Oct 30 15:38:11 2007 -0600 @@ -174,6 +174,7 @@ void cpu_bringup(void) void cpu_bringup(void) { cpu_init(); + identify_cpu(cpu_data + smp_processor_id()); touch_softlockup_watchdog(); preempt_disable(); local_irq_enable(); diff -r 0edeca96e7d7 -r bac78e892e42 drivers/xen/netfront/accel.c --- a/drivers/xen/netfront/accel.c Tue Oct 30 13:42:54 2007 -0600 +++ b/drivers/xen/netfront/accel.c Tue Oct 30 15:38:11 2007 -0600 @@ -31,7 +31,7 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/list.h> -#include <linux/kref.h> +#include <linux/mutex.h> #include <xen/xenbus.h> @@ -45,34 +45,40 @@ #define WPRINTK(fmt, args...) \ printk(KERN_WARNING "netfront/accel: " fmt, ##args) -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 5) -#define kref_init(x,y) kref_init(x,y) -#define kref_put(x,y) kref_put(x) -#else -#define kref_init(x,y) kref_init(x) -#define kref_put(x,y) kref_put(x,y) -#endif - /* * List of all netfront accelerator plugin modules available. Each * list entry is of type struct netfront_accelerator. */ static struct list_head accelerators_list; -/* - * Lock to protect access to accelerators_list - */ +/* Lock to protect access to accelerators_list */ static spinlock_t accelerators_lock; -/* Forward declaration of kref cleanup functions */ -static void accel_kref_release(struct kref *ref); -static void vif_kref_release(struct kref *ref); - +/* Mutex to prevent concurrent loads and suspends, etc. */ +DEFINE_MUTEX(accelerator_mutex); void netif_init_accel(void) { INIT_LIST_HEAD(&accelerators_list); spin_lock_init(&accelerators_lock); +} + +void netif_exit_accel(void) +{ + struct netfront_accelerator *accelerator, *tmp; + unsigned flags; + + spin_lock_irqsave(&accelerators_lock, flags); + + list_for_each_entry_safe(accelerator, tmp, &accelerators_list, link) { + BUG_ON(!list_empty(&accelerator->vif_states)); + + list_del(&accelerator->link); + kfree(accelerator->frontend); + kfree(accelerator); + } + + spin_unlock_irqrestore(&accelerators_lock, flags); } @@ -87,9 +93,6 @@ void init_accelerator_vif(struct netfron /* It's assumed that these things don't change */ np->accel_vif_state.np = np; np->accel_vif_state.dev = dev; - - np->accel_vif_state.ready_for_probe = 1; - np->accel_vif_state.need_probe = NULL; } @@ -112,18 +115,25 @@ static void add_accelerator_vif(struct n static void add_accelerator_vif(struct netfront_accelerator *accelerator, struct netfront_info *np) { + unsigned flags; + + /* Need lock to write list */ + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + if (np->accelerator == NULL) { np->accelerator = accelerator; list_add(&np->accel_vif_state.link, &accelerator->vif_states); } else { /* - * May get here legitimately if reconnecting to the - * same accelerator, eg. after resume, so check that - * is the case + * May get here legitimately if suspend_cancel is + * called, but in that case configuration should not + * have changed */ BUG_ON(np->accelerator != accelerator); } + + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); } @@ -131,7 +141,8 @@ static void add_accelerator_vif(struct n * Initialise the state to track an accelerator plugin module. */ static int init_accelerator(const char *frontend, - struct netfront_accelerator **result) + struct netfront_accelerator **result, + struct netfront_accel_hooks *hooks) { struct netfront_accelerator *accelerator = kmalloc(sizeof(struct netfront_accelerator), GFP_KERNEL); @@ -154,10 +165,7 @@ static int init_accelerator(const char * INIT_LIST_HEAD(&accelerator->vif_states); spin_lock_init(&accelerator->vif_states_lock); - accelerator->hooks = NULL; - - accelerator->ready_for_probe = 1; - accelerator->need_probe = NULL; + accelerator->hooks = hooks; list_add(&accelerator->link, &accelerators_list); @@ -174,17 +182,9 @@ static void static void accelerator_set_vif_state_hooks(struct netfront_accel_vif_state *vif_state) { - /* This function must be called with the vif_state_lock held */ + /* This function must be called with the vif_states_lock held */ DPRINTK("%p\n",vif_state); - - /* - * Take references to stop hooks disappearing. - * This persists until vif_kref gets to zero. - */ - kref_get(&vif_state->np->accelerator->accel_kref); - /* This persists until vif_state->hooks are cleared */ - kref_init(&vif_state->vif_kref, vif_kref_release); /* Make sure there are no data path operations going on */ netif_poll_disable(vif_state->np->netdev); @@ -202,47 +202,22 @@ static void accelerator_probe_new_vif(st struct netfront_accelerator *accelerator) { struct netfront_accel_hooks *hooks; - unsigned flags; - + DPRINTK("\n"); - spin_lock_irqsave(&accelerator->vif_states_lock, flags); - - /* - * Include this frontend device on the accelerator's list - */ + /* Include this frontend device on the accelerator's list */ add_accelerator_vif(accelerator, np); hooks = accelerator->hooks; if (hooks) { - if (np->accel_vif_state.ready_for_probe) { - np->accel_vif_state.ready_for_probe = 0; - - kref_get(&accelerator->accel_kref); - - spin_unlock_irqrestore(&accelerator->vif_states_lock, - flags); - - hooks->new_device(np->netdev, dev); - - kref_put(&accelerator->accel_kref, - accel_kref_release); - /* - * Hooks will get linked into vif_state by a - * future call by the accelerator to - * netfront_accelerator_ready() - */ - return; - } else { - if (np->accel_vif_state.need_probe != NULL) - DPRINTK("Probe request on vif awaiting probe\n"); - np->accel_vif_state.need_probe = hooks; - } - } - - spin_unlock_irqrestore(&accelerator->vif_states_lock, - flags); + hooks->new_device(np->netdev, dev); + /* + * Hooks will get linked into vif_state by a future + * call by the accelerator to netfront_accelerator_ready() + */ + } + return; } @@ -256,10 +231,12 @@ int netfront_load_accelerator(struct net const char *frontend) { struct netfront_accelerator *accelerator; - int rc; + int rc = 0; unsigned flags; DPRINTK(" %s\n", frontend); + + mutex_lock(&accelerator_mutex); spin_lock_irqsave(&accelerators_lock, flags); @@ -273,22 +250,24 @@ int netfront_load_accelerator(struct net accelerator_probe_new_vif(np, dev, accelerator); + mutex_unlock(&accelerator_mutex); return 0; } } /* Couldn't find it, so create a new one and load the module */ - if ((rc = init_accelerator(frontend, &accelerator)) < 0) { + if ((rc = init_accelerator(frontend, &accelerator, NULL)) < 0) { spin_unlock_irqrestore(&accelerators_lock, flags); + mutex_unlock(&accelerator_mutex); return rc; } spin_unlock_irqrestore(&accelerators_lock, flags); /* Include this frontend device on the accelerator's list */ - spin_lock_irqsave(&accelerator->vif_states_lock, flags); add_accelerator_vif(accelerator, np); - spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); + + mutex_unlock(&accelerator_mutex); DPRINTK("requesting module %s\n", frontend); @@ -300,7 +279,7 @@ int netfront_load_accelerator(struct net * it's up and running, and we can continue from there */ - return 0; + return rc; } @@ -312,21 +291,11 @@ int netfront_load_accelerator(struct net */ static void accelerator_probe_vifs(struct netfront_accelerator *accelerator, - struct netfront_accel_hooks *hooks, - unsigned lock_flags) + struct netfront_accel_hooks *hooks) { struct netfront_accel_vif_state *vif_state, *tmp; - /* Calling function must have taken the vif_states_lock */ - DPRINTK("%p\n", accelerator); - - /* - * kref_init() takes a single reference to the hooks that will - * persist until the accelerator hooks are removed (e.g. by - * accelerator module unload) - */ - kref_init(&accelerator->accel_kref, accel_kref_release); /* * Store the hooks for future calls to probe a new device, and @@ -335,71 +304,23 @@ accelerator_probe_vifs(struct netfront_a */ BUG_ON(hooks == NULL); accelerator->hooks = hooks; - + + /* + * currently hold accelerator_mutex, so don't need + * vif_states_lock to read the list + */ list_for_each_entry_safe(vif_state, tmp, &accelerator->vif_states, link) { struct netfront_info *np = vif_state->np; - - if (vif_state->ready_for_probe) { - vif_state->ready_for_probe = 0; - kref_get(&accelerator->accel_kref); - - /* - * drop lock before calling hook. hooks are - * protected by the kref - */ - spin_unlock_irqrestore(&accelerator->vif_states_lock, - lock_flags); - - hooks->new_device(np->netdev, vif_state->dev); - - kref_put(&accelerator->accel_kref, accel_kref_release); - - /* Retake lock for next go round the loop */ - spin_lock_irqsave(&accelerator->vif_states_lock, lock_flags); - - /* - * Hooks will get linked into vif_state by a call to - * netfront_accelerator_ready() once accelerator - * plugin is ready for action - */ - } else { - if (vif_state->need_probe != NULL) - DPRINTK("Probe request on vif awaiting probe\n"); - vif_state->need_probe = hooks; - } - } - - /* Return with vif_states_lock held, as on entry */ -} - - -/* - * Wrapper for accelerator_probe_vifs that checks now is a good time - * to do the probe, and postpones till previous state cleared up if - * necessary - */ -static void -accelerator_probe_vifs_on_load(struct netfront_accelerator *accelerator, - struct netfront_accel_hooks *hooks) -{ - unsigned flags; - - DPRINTK("\n"); - - spin_lock_irqsave(&accelerator->vif_states_lock, flags); - - if (accelerator->ready_for_probe) { - accelerator->ready_for_probe = 0; - accelerator_probe_vifs(accelerator, hooks, flags); - } else { - if (accelerator->need_probe) - DPRINTK("Probe request on accelerator awaiting probe\n"); - accelerator->need_probe = hooks; - } - - spin_unlock_irqrestore(&accelerator->vif_states_lock, - flags); + + hooks->new_device(np->netdev, vif_state->dev); + + /* + * Hooks will get linked into vif_state by a call to + * netfront_accelerator_ready() once accelerator + * plugin is ready for action + */ + } } @@ -427,6 +348,8 @@ int netfront_accelerator_loaded(int vers } } + mutex_lock(&accelerator_mutex); + spin_lock_irqsave(&accelerators_lock, flags); /* @@ -437,9 +360,9 @@ int netfront_accelerator_loaded(int vers if (match_accelerator(frontend, accelerator)) { spin_unlock_irqrestore(&accelerators_lock, flags); - accelerator_probe_vifs_on_load(accelerator, hooks); - - return 0; + accelerator_probe_vifs(accelerator, hooks); + + goto out; } } @@ -450,10 +373,12 @@ int netfront_accelerator_loaded(int vers DPRINTK("Couldn't find matching accelerator (%s)\n", frontend); - init_accelerator(frontend, &accelerator); + init_accelerator(frontend, &accelerator, hooks); spin_unlock_irqrestore(&accelerators_lock, flags); + out: + mutex_unlock(&accelerator_mutex); return 0; } EXPORT_SYMBOL_GPL(netfront_accelerator_loaded); @@ -477,6 +402,10 @@ void netfront_accelerator_ready(const ch list_for_each_entry(accelerator, &accelerators_list, link) { if (match_accelerator(frontend, accelerator)) { + /* + * Mutex not held so need vif_states_lock for + * list + */ spin_lock_irqsave (&accelerator->vif_states_lock, flags1); @@ -489,11 +418,9 @@ void netfront_accelerator_ready(const ch spin_unlock_irqrestore (&accelerator->vif_states_lock, flags1); - goto done; + break; } } - - done: spin_unlock_irqrestore(&accelerators_lock, flags); } EXPORT_SYMBOL_GPL(netfront_accelerator_ready); @@ -526,36 +453,30 @@ accelerator_remove_single_hook(struct ne /* * Safely remove the accelerator function hooks from a netfront state. */ -static void accelerator_remove_hooks(struct netfront_accelerator *accelerator, - int remove_master) -{ +static void accelerator_remove_hooks(struct netfront_accelerator *accelerator) +{ + struct netfront_accel_hooks *hooks; struct netfront_accel_vif_state *vif_state, *tmp; unsigned flags; - spin_lock_irqsave(&accelerator->vif_states_lock, flags); - + /* Mutex is held so don't need vif_states_lock to iterate list */ list_for_each_entry_safe(vif_state, tmp, &accelerator->vif_states, link) { + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + + BUG_ON(vif_state->hooks == NULL); + hooks = vif_state->hooks; accelerator_remove_single_hook(accelerator, vif_state); - /* - * Remove the reference taken when the vif_state hooks - * were set, must be called without lock held - */ spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); - kref_put(&vif_state->vif_kref, vif_kref_release); - spin_lock_irqsave(&accelerator->vif_states_lock, flags); + + /* Last chance to get statistics from the accelerator */ + hooks->get_stats(vif_state->np->netdev, &vif_state->np->stats); + hooks->remove(vif_state->dev); } - if(remove_master) - accelerator->hooks = NULL; - - spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); - - if(remove_master) - /* Remove the reference taken when module loaded */ - kref_put(&accelerator->accel_kref, accel_kref_release); + accelerator->hooks = NULL; } @@ -564,369 +485,266 @@ static void accelerator_remove_hooks(str * removes the hooks into the plugin and blocks until all devices have * finished using it, so on return it is safe to unload. */ -void netfront_accelerator_stop(const char *frontend, int unloading) -{ - struct netfront_accelerator *accelerator; - unsigned flags; - +void netfront_accelerator_stop(const char *frontend) +{ + struct netfront_accelerator *accelerator; + unsigned flags; + + mutex_lock(&accelerator_mutex); spin_lock_irqsave(&accelerators_lock, flags); list_for_each_entry(accelerator, &accelerators_list, link) { if (match_accelerator(frontend, accelerator)) { spin_unlock_irqrestore(&accelerators_lock, flags); - /* - * Use semaphore to ensure we know when all - * uses of hooks are complete - */ - sema_init(&accelerator->exit_semaphore, 0); - - accelerator_remove_hooks(accelerator, unloading); - - if (unloading) - /* Wait for hooks to be unused, then return */ - down(&accelerator->exit_semaphore); - - return; + accelerator_remove_hooks(accelerator); + + goto out; } } spin_unlock_irqrestore(&accelerators_lock, flags); + out: + mutex_unlock(&accelerator_mutex); } EXPORT_SYMBOL_GPL(netfront_accelerator_stop); - -int netfront_check_accelerator_queue_busy(struct net_device *dev, - struct netfront_info *np) -{ - struct netfront_accel_hooks *hooks; - int rc = 1; - unsigned flags; - - /* - * Call the check busy accelerator hook. The use count for the - * accelerator's hooks is incremented for the duration of the - * call to prevent the accelerator being able to modify the - * hooks in the middle (by, for example, unloading) - */ +/* Helper for call_remove and do_suspend */ +static int do_remove(struct netfront_info *np, struct xenbus_device *dev, + unsigned *lock_flags) +{ + struct netfront_accelerator *accelerator = np->accelerator; + struct netfront_accel_hooks *hooks; + int rc = 0; + if (np->accel_vif_state.hooks) { - spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); hooks = np->accel_vif_state.hooks; - if (hooks) { - kref_get(&np->accel_vif_state.vif_kref); - spin_unlock_irqrestore - (&np->accelerator->vif_states_lock, flags); - - rc = np->accel_vif_state.hooks->check_busy(dev); - - kref_put(&np->accel_vif_state.vif_kref, - vif_kref_release); - } else { - spin_unlock_irqrestore - (&np->accelerator->vif_states_lock, flags); - } - } - - return rc; -} - - + + /* + * Try and do the opposite of accelerator_probe_new_vif + * to ensure there's no state pointing back at the + * netdev + */ + accelerator_remove_single_hook(accelerator, + &np->accel_vif_state); + + /* Last chance to get statistics from the accelerator */ + hooks->get_stats(np->netdev, &np->stats); + + spin_unlock_irqrestore(&accelerator->vif_states_lock, + *lock_flags); + + rc = hooks->remove(dev); + + spin_lock_irqsave(&accelerator->vif_states_lock, *lock_flags); + } + + + return rc; +} + + int netfront_accelerator_call_remove(struct netfront_info *np, - struct xenbus_device *dev) -{ - struct netfront_accelerator *accelerator = np->accelerator; - struct netfront_accel_vif_state *tmp_vif_state; - struct netfront_accel_hooks *hooks; - unsigned flags; - int rc = 0; - - /* - * Call the remove accelerator hook. The use count for the - * accelerator's hooks is incremented for the duration of the - * call to prevent the accelerator being able to modify the - * hooks in the middle (by, for example, unloading) - */ - spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); - hooks = np->accel_vif_state.hooks; - - /* - * Remove this vif_state from the accelerator's list - */ - list_for_each_entry(tmp_vif_state, &accelerator->vif_states, link) { + struct xenbus_device *dev) +{ + struct netfront_accelerator *accelerator; + struct netfront_accel_vif_state *tmp_vif_state; + unsigned flags; + int rc = 0; + + mutex_lock(&accelerator_mutex); + + /* Check that we've got a device that was accelerated */ + if (np->accelerator == NULL) + goto out; + + accelerator = np->accelerator; + + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + + list_for_each_entry(tmp_vif_state, &accelerator->vif_states, + link) { if (tmp_vif_state == &np->accel_vif_state) { list_del(&np->accel_vif_state.link); break; } } - - if (hooks) { - kref_get(&np->accel_vif_state.vif_kref); - spin_unlock_irqrestore - (&np->accelerator->vif_states_lock, flags); - - rc = np->accel_vif_state.hooks->remove(dev); - - kref_put(&np->accel_vif_state.vif_kref, - vif_kref_release); - - spin_lock_irqsave(&np->accelerator->vif_states_lock, - flags); - - /* - * Try and do the opposite of accelerator_probe_new_vif - * to ensure there's no state pointing back at the - * netdev - */ - accelerator_remove_single_hook(accelerator, - &np->accel_vif_state); - - /* - * Remove the reference taken when the vif_state hooks - * were set, must be called without lock held - */ - spin_unlock_irqrestore(&accelerator->vif_states_lock, - flags); - kref_put(&np->accel_vif_state.vif_kref, - vif_kref_release); - } else { - spin_unlock_irqrestore(&np->accelerator->vif_states_lock, - flags); - } - + + rc = do_remove(np, dev, &flags); + + np->accelerator = NULL; + + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); + out: + mutex_unlock(&accelerator_mutex); return rc; } - - -int netfront_accelerator_call_suspend(struct netfront_info *np, - struct xenbus_device *dev) -{ + + +int netfront_accelerator_suspend(struct netfront_info *np, + struct xenbus_device *dev) +{ + unsigned flags; + int rc = 0; + + mutex_lock(&accelerator_mutex); + + /* Check that we've got a device that was accelerated */ + if (np->accelerator == NULL) + goto out; + + /* + * Call the remove accelerator hook, but leave the vif_state + * on the accelerator's list in case there is a suspend_cancel. + */ + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + + rc = do_remove(np, dev, &flags); + + spin_unlock_irqrestore(&np->accelerator->vif_states_lock, flags); + out: + mutex_unlock(&accelerator_mutex); + return rc; +} + + +int netfront_accelerator_suspend_cancel(struct netfront_info *np, + struct xenbus_device *dev) +{ + struct netfront_accel_vif_state *accel_vif_state = NULL; + + mutex_lock(&accelerator_mutex); + + /* Check that we've got a device that was accelerated */ + if (np->accelerator == NULL) + goto out; + + /* Find the vif_state from the accelerator's list */ + list_for_each_entry(accel_vif_state, &np->accelerator->vif_states, + link) { + if (accel_vif_state->dev == dev) { + BUG_ON(accel_vif_state != &np->accel_vif_state); + + /* + * Kick things off again to restore + * acceleration as it was before suspend + */ + accelerator_probe_new_vif(np, dev, np->accelerator); + + break; + } + } + + out: + mutex_unlock(&accelerator_mutex); + return 0; +} + + +void netfront_accelerator_resume(struct netfront_info *np, + struct xenbus_device *dev) +{ + struct netfront_accel_vif_state *accel_vif_state = NULL; + spinlock_t *vif_states_lock; + unsigned flags; + + mutex_lock(&accelerator_mutex); + + /* Check that we've got a device that was accelerated */ + if(np->accelerator == NULL) + goto out; + + /* Find the vif_state from the accelerator's list */ + list_for_each_entry(accel_vif_state, &np->accelerator->vif_states, + link) { + if (accel_vif_state->dev == dev) { + BUG_ON(accel_vif_state != &np->accel_vif_state); + + vif_states_lock = &np->accelerator->vif_states_lock; + spin_lock_irqsave(vif_states_lock, flags); + + /* + * Remove it from the accelerator's list so + * state is consistent for probing new vifs + * when they get connected + */ + list_del(&accel_vif_state->link); + np->accelerator = NULL; + + spin_unlock_irqrestore(vif_states_lock, flags); + + break; + } + } + + out: + mutex_unlock(&accelerator_mutex); + return; +} + + +int netfront_check_accelerator_queue_ready(struct net_device *dev, + struct netfront_info *np) +{ + struct netfront_accelerator *accelerator; struct netfront_accel_hooks *hooks; - unsigned flags; - int rc = 0; - - IPRINTK("netfront_accelerator_call_suspend\n"); - - /* - * Call the suspend accelerator hook. The use count for the - * accelerator's hooks is incremented for the duration of - * the call to prevent the accelerator being able to modify - * the hooks in the middle (by, for example, unloading) - */ - if (np->accel_vif_state.hooks) { - spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + int rc = 1; + unsigned flags; + + accelerator = np->accelerator; + + /* Call the check_ready accelerator hook. */ + if (np->accel_vif_state.hooks && accelerator) { + spin_lock_irqsave(&accelerator->vif_states_lock, flags); hooks = np->accel_vif_state.hooks; - if (hooks) { - kref_get(&np->accel_vif_state.vif_kref); - spin_unlock_irqrestore - (&np->accelerator->vif_states_lock, flags); - - rc = np->accel_vif_state.hooks->suspend(dev); - - kref_put(&np->accel_vif_state.vif_kref, - vif_kref_release); - } else { - spin_unlock_irqrestore - (&np->accelerator->vif_states_lock, flags); - } - } + if (hooks && np->accelerator == accelerator) + rc = np->accel_vif_state.hooks->check_ready(dev); + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); + } + return rc; -} - - -int netfront_accelerator_call_suspend_cancel(struct netfront_info *np, - struct xenbus_device *dev) -{ - struct netfront_accel_hooks *hooks; - unsigned flags; - int rc = 0; - - IPRINTK(" netfront_accelerator_call_suspend_cancel\n"); - - /* - * Call the suspend_cancel accelerator hook. The use count - * for the accelerator's hooks is incremented for the - * duration of the call to prevent the accelerator being able - * to modify the hooks in the middle (by, for example, - * unloading) - */ - if (np->accel_vif_state.hooks) { - spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); - hooks = np->accel_vif_state.hooks; - if (hooks) { - kref_get(&np->accel_vif_state.vif_kref); - spin_unlock_irqrestore - (&np->accelerator->vif_states_lock, flags); - - rc = np->accel_vif_state.hooks->suspend_cancel(dev); - - kref_put(&np->accel_vif_state.vif_kref, - vif_kref_release); - } else { - spin_unlock_irqrestore - (&np->accelerator->vif_states_lock, flags); - } - } - return rc; -} - - -int netfront_accelerator_call_resume(struct netfront_info *np, - struct xenbus_device *dev) -{ - struct netfront_accel_hooks *hooks; - unsigned flags; - int rc = 0; - - /* - * Call the resume accelerator hook. The use count for the - * accelerator's hooks is incremented for the duration of - * the call to prevent the accelerator being able to modify - * the hooks in the middle (by, for example, unloading) - */ - if (np->accel_vif_state.hooks) { - spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); - hooks = np->accel_vif_state.hooks; - if (hooks) { - kref_get(&np->accel_vif_state.vif_kref); - spin_unlock_irqrestore - (&np->accelerator->vif_states_lock, flags); - - rc = np->accel_vif_state.hooks->resume(dev); - - kref_put(&np->accel_vif_state.vif_kref, - vif_kref_release); - } else { - spin_unlock_irqrestore - (&np->accelerator->vif_states_lock, flags); - } - } - return rc; -} - - -void netfront_accelerator_call_backend_changed(struct netfront_info *np, - struct xenbus_device *dev, - enum xenbus_state backend_state) -{ - struct netfront_accel_hooks *hooks; - unsigned flags; - - /* - * Call the backend_changed accelerator hook. The use count - * for the accelerator's hooks is incremented for the duration - * of the call to prevent the accelerator being able to modify - * the hooks in the middle (by, for example, unloading) - */ - if (np->accel_vif_state.hooks) { - spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); - hooks = np->accel_vif_state.hooks; - if (hooks) { - kref_get(&np->accel_vif_state.vif_kref); - spin_unlock_irqrestore - (&np->accelerator->vif_states_lock, flags); - - np->accel_vif_state.hooks->backend_changed - (dev, backend_state); - - kref_put(&np->accel_vif_state.vif_kref, - vif_kref_release); - } else { - spin_unlock_irqrestore - (&np->accelerator->vif_states_lock, flags); - } - } } void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np, struct net_device *dev) { + struct netfront_accelerator *accelerator; struct netfront_accel_hooks *hooks; unsigned flags; - /* - * Call the stop_napi_interrupts accelerator hook. The use - * count for the accelerator's hooks is incremented for the - * duration of the call to prevent the accelerator being able - * to modify the hooks in the middle (by, for example, - * unloading) - */ - - if (np->accel_vif_state.hooks) { - spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + accelerator = np->accelerator; + + /* Call the stop_napi_interrupts accelerator hook. */ + if (np->accel_vif_state.hooks && accelerator != NULL) { + spin_lock_irqsave(&accelerator->vif_states_lock, flags); hooks = np->accel_vif_state.hooks; - if (hooks) { - kref_get(&np->accel_vif_state.vif_kref); - spin_unlock_irqrestore - (&np->accelerator->vif_states_lock, flags); - + if (hooks && np->accelerator == accelerator) np->accel_vif_state.hooks->stop_napi_irq(dev); - - kref_put(&np->accel_vif_state.vif_kref, - vif_kref_release); - } else { - spin_unlock_irqrestore - (&np->accelerator->vif_states_lock, flags); - } - } -} - - -/* - * Once all users of hooks have kref_put()'d we can signal that it's - * safe to unload - */ -static void accel_kref_release(struct kref *ref) -{ - struct netfront_accelerator *accelerator = - container_of(ref, struct netfront_accelerator, accel_kref); + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); + } +} + + +int netfront_accelerator_call_get_stats(struct netfront_info *np, + struct net_device *dev) +{ + struct netfront_accelerator *accelerator; struct netfront_accel_hooks *hooks; unsigned flags; - - DPRINTK("%p\n", accelerator); - - /* Signal that all users of hooks are done */ - up(&accelerator->exit_semaphore); - - spin_lock_irqsave(&accelerator->vif_states_lock, flags); - if (accelerator->need_probe) { - hooks = accelerator->need_probe; - accelerator->need_probe = NULL; - accelerator_probe_vifs(accelerator, hooks, flags); - } - else - accelerator->ready_for_probe = 1; - - spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); -} - - -static void vif_kref_release(struct kref *ref) -{ - struct netfront_accel_vif_state *vif_state = - container_of(ref, struct netfront_accel_vif_state, vif_kref); - struct netfront_accel_hooks *hooks; - unsigned flags; - - DPRINTK("%p\n", vif_state); - - /* - * Now that this vif has finished using the hooks, it can - * decrement the accelerator's global copy ref count - */ - kref_put(&vif_state->np->accelerator->accel_kref, accel_kref_release); - - spin_lock_irqsave(&vif_state->np->accelerator->vif_states_lock, flags); - if (vif_state->need_probe) { - hooks = vif_state->need_probe; - vif_state->need_probe = NULL; - spin_unlock_irqrestore - (&vif_state->np->accelerator->vif_states_lock, flags); - hooks->new_device(vif_state->np->netdev, vif_state->dev); - } else { - vif_state->ready_for_probe = 1; - spin_unlock_irqrestore - (&vif_state->np->accelerator->vif_states_lock, flags); - } -} - + int rc = 0; + + accelerator = np->accelerator; + + /* Call the get_stats accelerator hook. */ + if (np->accel_vif_state.hooks && accelerator != NULL) { + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + hooks = np->accel_vif_state.hooks; + if (hooks && np->accelerator == accelerator) + rc = np->accel_vif_state.hooks->get_stats(dev, + &np->stats); + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); + } + return rc; +} + diff -r 0edeca96e7d7 -r bac78e892e42 drivers/xen/netfront/netfront.c --- a/drivers/xen/netfront/netfront.c Tue Oct 30 13:42:54 2007 -0600 +++ b/drivers/xen/netfront/netfront.c Tue Oct 30 15:38:11 2007 -0600 @@ -308,14 +308,14 @@ static int netfront_suspend(struct xenbu static int netfront_suspend(struct xenbus_device *dev) { struct netfront_info *info = dev->dev.driver_data; - return netfront_accelerator_call_suspend(info, dev); + return netfront_accelerator_suspend(info, dev); } static int netfront_suspend_cancel(struct xenbus_device *dev) { struct netfront_info *info = dev->dev.driver_data; - return netfront_accelerator_call_suspend_cancel(info, dev); + return netfront_accelerator_suspend_cancel(info, dev); } @@ -331,7 +331,7 @@ static int netfront_resume(struct xenbus DPRINTK("%s\n", dev->nodename); - netfront_accelerator_call_resume(info, dev); + netfront_accelerator_resume(info, dev); netif_disconnect_backend(info); return 0; @@ -549,8 +549,6 @@ static void backend_changed(struct xenbu xenbus_frontend_closed(dev); break; } - - netfront_accelerator_call_backend_changed(np, dev, backend_state); } /** Send a packet on a net device to encourage switches to learn the @@ -595,12 +593,12 @@ static inline void network_maybe_wake_tx if (unlikely(netif_queue_stopped(dev)) && netfront_tx_slot_available(np) && likely(netif_running(dev)) && - netfront_check_accelerator_queue_busy(dev, np)) + netfront_check_accelerator_queue_ready(dev, np)) netif_wake_queue(dev); } -int netfront_check_queue_busy(struct net_device *dev) +int netfront_check_queue_ready(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); @@ -608,7 +606,7 @@ int netfront_check_queue_busy(struct net netfront_tx_slot_available(np) && likely(netif_running(dev)); } -EXPORT_SYMBOL(netfront_check_queue_busy); +EXPORT_SYMBOL(netfront_check_queue_ready); static int network_open(struct net_device *dev) @@ -1507,7 +1505,7 @@ err: * fast path is likewise */ accel_more_to_do = - np->accel_vif_state.hooks->start_napi_irq(dev); + np->accel_vif_state.hooks->start_napi_irq(dev); } if (!more_to_do && !accel_more_to_do) @@ -1674,6 +1672,8 @@ static struct net_device_stats *network_ static struct net_device_stats *network_get_stats(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); + + netfront_accelerator_call_get_stats(np, dev); return &np->stats; } @@ -2210,6 +2210,8 @@ static void __exit netif_exit(void) unregister_inetaddr_notifier(¬ifier_inetdev); + netif_exit_accel(); + return xenbus_unregister_driver(&netfront); } module_exit(netif_exit); diff -r 0edeca96e7d7 -r bac78e892e42 drivers/xen/netfront/netfront.h --- a/drivers/xen/netfront/netfront.h Tue Oct 30 13:42:54 2007 -0600 +++ b/drivers/xen/netfront/netfront.h Tue Oct 30 15:38:11 2007 -0600 @@ -37,7 +37,6 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/list.h> -#include <linux/kref.h> #define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) #define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) @@ -60,19 +59,9 @@ struct netfront_accel_hooks { */ int (*new_device)(struct net_device *net_dev, struct xenbus_device *dev); /* - * suspend, suspend_cancel, resume, remove: Equivalent to the - * normal xenbus_* callbacks - */ - int (*suspend)(struct xenbus_device *dev); - int (*suspend_cancel)(struct xenbus_device *dev); - int (*resume)(struct xenbus_device *dev); + * remove: Opposite of new_device + */ int (*remove)(struct xenbus_device *dev); - /* - * backend_changed: Callback from watch based on backend's - * xenbus state changing - */ - void (*backend_changed)(struct xenbus_device *dev, - enum xenbus_state backend_state); /* * The net_device is being polled, check the accelerated * hardware for any pending packets @@ -95,13 +84,18 @@ struct netfront_accel_hooks { * Called before re-enabling the TX queue to check the fast * path has slots too */ - int (*check_busy)(struct net_device *dev); + int (*check_ready)(struct net_device *dev); + /* + * Get the fastpath network statistics + */ + int (*get_stats)(struct net_device *dev, + struct net_device_stats *stats); }; /* Version of API/protocol for communication between netfront and acceleration plugin supported */ -#define NETFRONT_ACCEL_VERSION 0x00010000 +#define NETFRONT_ACCEL_VERSION 0x00010002 /* * Per-netfront device state for the accelerator. This is used to @@ -114,14 +108,6 @@ struct netfront_accel_vif_state { struct xenbus_device *dev; struct netfront_info *np; struct netfront_accel_hooks *hooks; - - /* - * Protect against removal of hooks while in use. - */ - struct kref vif_kref; - - unsigned ready_for_probe; - struct netfront_accel_hooks *need_probe; }; /* @@ -142,10 +128,7 @@ struct netfront_accelerator { char *frontend; /* The hooks into the accelerator plugin module */ struct netfront_accel_hooks *hooks; - /* - * Protect against removal of hooks while in use. - */ - struct kref accel_kref; + /* * List of per-netfront device state (struct * netfront_accel_vif_state) for each netfront device that is @@ -153,14 +136,6 @@ struct netfront_accelerator { */ struct list_head vif_states; spinlock_t vif_states_lock; - /* - * Semaphore to signal that all users of this accelerator have - * finished using it before module is unloaded - */ - struct semaphore exit_semaphore; - - unsigned ready_for_probe; - struct netfront_accel_hooks *need_probe; }; struct netfront_info { @@ -251,18 +226,15 @@ extern void netfront_accelerator_ready(c * * frontend: the string describing the accelerator. Must match the * one passed to netfront_accelerator_loaded() - * - * wait: 1 => wait for all users of module to complete before - * returning, thus making it safe to unload on return */ -extern void netfront_accelerator_stop(const char *frontend, int wait); +extern void netfront_accelerator_stop(const char *frontend); /* * Called by an accelerator before waking the net device's TX queue to * ensure the slow path has available slots. Returns true if OK to * wake, false if still busy */ -extern int netfront_check_queue_busy(struct net_device *net_dev); +extern int netfront_check_queue_ready(struct net_device *net_dev); @@ -274,28 +246,26 @@ extern int netfront_check_queue_busy(str * if still busy */ extern -int netfront_check_accelerator_queue_busy(struct net_device *dev, - struct netfront_info *np); +int netfront_check_accelerator_queue_ready(struct net_device *dev, + struct netfront_info *np); extern int netfront_accelerator_call_remove(struct netfront_info *np, struct xenbus_device *dev); extern -int netfront_accelerator_call_suspend(struct netfront_info *np, - struct xenbus_device *dev); -extern -int netfront_accelerator_call_suspend_cancel(struct netfront_info *np, - struct xenbus_device *dev); -extern -int netfront_accelerator_call_resume(struct netfront_info *np, - struct xenbus_device *dev); -extern -void netfront_accelerator_call_backend_changed(struct netfront_info *np, - struct xenbus_device *dev, - enum xenbus_state backend_state); +int netfront_accelerator_suspend(struct netfront_info *np, + struct xenbus_device *dev); +extern +int netfront_accelerator_suspend_cancel(struct netfront_info *np, + struct xenbus_device *dev); +extern +void netfront_accelerator_resume(struct netfront_info *np, + struct xenbus_device *dev); extern void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np, struct net_device *dev); - +extern +int netfront_accelerator_call_get_stats(struct netfront_info *np, + struct net_device *dev); extern int netfront_load_accelerator(struct netfront_info *np, struct xenbus_device *dev, @@ -303,6 +273,8 @@ int netfront_load_accelerator(struct net extern void netif_init_accel(void); +extern +void netif_exit_accel(void); extern void init_accelerator_vif(struct netfront_info *np, diff -r 0edeca96e7d7 -r bac78e892e42 drivers/xen/xenbus/xenbus_probe.c --- a/drivers/xen/xenbus/xenbus_probe.c Tue Oct 30 13:42:54 2007 -0600 +++ b/drivers/xen/xenbus/xenbus_probe.c Tue Oct 30 15:38:11 2007 -0600 @@ -69,7 +69,7 @@ static unsigned long xen_store_mfn; extern struct mutex xenwatch_mutex; -static ATOMIC_NOTIFIER_HEAD(xenstore_chain); +static BLOCKING_NOTIFIER_HEAD(xenstore_chain); static void wait_for_devices(struct xenbus_driver *xendrv); @@ -825,7 +825,7 @@ int register_xenstore_notifier(struct no if (xenstored_ready > 0) ret = nb->notifier_call(nb, 0, NULL); else - atomic_notifier_chain_register(&xenstore_chain, nb); + blocking_notifier_chain_register(&xenstore_chain, nb); return ret; } @@ -833,7 +833,7 @@ EXPORT_SYMBOL_GPL(register_xenstore_noti void unregister_xenstore_notifier(struct notifier_block *nb) { - atomic_notifier_chain_unregister(&xenstore_chain, nb); + blocking_notifier_chain_unregister(&xenstore_chain, nb); } EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); @@ -848,7 +848,7 @@ void xenbus_probe(void *unused) xenbus_backend_probe_and_watch(); /* Notify others that xenstore is up */ - atomic_notifier_call_chain(&xenstore_chain, 0, NULL); + blocking_notifier_call_chain(&xenstore_chain, 0, NULL); } diff -r 0edeca96e7d7 -r bac78e892e42 include/asm-i386/mach-xen/asm/page.h --- a/include/asm-i386/mach-xen/asm/page.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/asm-i386/mach-xen/asm/page.h Tue Oct 30 15:38:11 2007 -0600 @@ -36,14 +36,6 @@ #include <asm/bug.h> #include <xen/interface/xen.h> #include <xen/features.h> - -#define arch_free_page(_page,_order) \ -({ int foreign = PageForeign(_page); \ - if (foreign) \ - PageForeignDestructor(_page); \ - foreign; \ -}) -#define HAVE_ARCH_FREE_PAGE #ifdef CONFIG_X86_USE_3DNOW diff -r 0edeca96e7d7 -r bac78e892e42 include/asm-ia64/page.h --- a/include/asm-ia64/page.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/asm-ia64/page.h Tue Oct 30 15:38:11 2007 -0600 @@ -239,15 +239,6 @@ get_order (unsigned long size) #include <asm/maddr.h> -#define arch_free_page(_page, _order) \ -({ \ - int foreign = PageForeign(_page); \ - if (foreign) \ - PageForeignDestructor(_page); \ - foreign; \ -}) -#define HAVE_ARCH_FREE_PAGE - #endif /* CONFIG_XEN */ #endif /* __ASSEMBLY__ */ diff -r 0edeca96e7d7 -r bac78e892e42 include/asm-powerpc/page.h --- a/include/asm-powerpc/page.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/asm-powerpc/page.h Tue Oct 30 15:38:11 2007 -0600 @@ -194,15 +194,6 @@ struct vm_area_struct; struct vm_area_struct; extern const char *arch_vma_name(struct vm_area_struct *vma); -#define arch_free_page(_page, _order) \ -({ \ - int foreign = PageForeign(_page); \ - if (foreign) \ - PageForeignDestructor(_page); \ - foreign; \ -}) -#define HAVE_ARCH_FREE_PAGE - #include <asm-generic/memory_model.h> #endif /* __ASSEMBLY__ */ diff -r 0edeca96e7d7 -r bac78e892e42 include/asm-um/page.h --- a/include/asm-um/page.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/asm-um/page.h Tue Oct 30 15:38:11 2007 -0600 @@ -114,7 +114,7 @@ extern struct page *arch_validate(struct extern struct page *arch_validate(struct page *page, gfp_t mask, int order); #define HAVE_ARCH_VALIDATE -extern int arch_free_page(struct page *page, int order); +extern void arch_free_page(struct page *page, int order); #define HAVE_ARCH_FREE_PAGE #include <asm-generic/memory_model.h> diff -r 0edeca96e7d7 -r bac78e892e42 include/asm-x86_64/mach-xen/asm/page.h --- a/include/asm-x86_64/mach-xen/asm/page.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/asm-x86_64/mach-xen/asm/page.h Tue Oct 30 15:38:11 2007 -0600 @@ -15,14 +15,6 @@ * below. The preprocessor will warn if the two definitions aren't identical. */ #define _PAGE_PRESENT 0x001 - -#define arch_free_page(_page,_order) \ -({ int foreign = PageForeign(_page); \ - if (foreign) \ - PageForeignDestructor(_page); \ - foreign; \ -}) -#define HAVE_ARCH_FREE_PAGE /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 diff -r 0edeca96e7d7 -r bac78e892e42 include/asm-x86_64/mach-xen/asm/pgtable.h --- a/include/asm-x86_64/mach-xen/asm/pgtable.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/asm-x86_64/mach-xen/asm/pgtable.h Tue Oct 30 15:38:11 2007 -0600 @@ -505,7 +505,7 @@ static inline pte_t pte_modify(pte_t pte #define __swp_type(x) (((x).val >> 1) & 0x3f) #define __swp_offset(x) ((x).val >> 8) #define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) -#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { __pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) extern spinlock_t pgd_lock; diff -r 0edeca96e7d7 -r bac78e892e42 include/linux/gfp.h --- a/include/linux/gfp.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/linux/gfp.h Tue Oct 30 15:38:11 2007 -0600 @@ -99,11 +99,7 @@ static inline int gfp_zone(gfp_t gfp) */ #ifndef HAVE_ARCH_FREE_PAGE -/* - * If arch_free_page returns non-zero then the generic free_page code can - * immediately bail: the arch-specific function has done all the work. - */ -static inline int arch_free_page(struct page *page, int order) { return 0; } +static inline void arch_free_page(struct page *page, int order) { } #endif extern struct page * diff -r 0edeca96e7d7 -r bac78e892e42 include/xen/evtchn.h --- a/include/xen/evtchn.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/xen/evtchn.h Tue Oct 30 15:38:11 2007 -0600 @@ -102,6 +102,7 @@ void evtchn_device_upcall(int port); void evtchn_device_upcall(int port); void mask_evtchn(int port); +void disable_all_local_evtchn(void); void unmask_evtchn(int port); #ifdef CONFIG_SMP diff -r 0edeca96e7d7 -r bac78e892e42 include/xen/interface/callback.h --- a/include/xen/interface/callback.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/xen/interface/callback.h Tue Oct 30 15:38:11 2007 -0600 @@ -36,15 +36,39 @@ * @extra_args == Operation-specific extra arguments (NULL if none). */ +/* ia64, x86: Callback for event delivery. */ #define CALLBACKTYPE_event 0 + +/* x86: Failsafe callback when guest state cannot be restored by Xen. */ #define CALLBACKTYPE_failsafe 1 -#define CALLBACKTYPE_syscall 2 /* x86_64 only */ + +/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */ +#define CALLBACKTYPE_syscall 2 + /* - * sysenter is only available on x86_32 with the - * supervisor_mode_kernel option enabled. + * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel + * feature is enabled. Do not use this callback type in new code. */ -#define CALLBACKTYPE_sysenter 3 +#define CALLBACKTYPE_sysenter_deprecated 3 + +/* x86: Callback for NMI delivery. */ #define CALLBACKTYPE_nmi 4 + +/* + * x86: sysenter is only available as follows: + * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled + * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs + * ('32-on-32-on-64', '32-on-64-on-64') + * [nb. also 64-bit guest applications on Intel CPUs + * ('64-on-64-on-64'), but syscall is preferred] + */ +#define CALLBACKTYPE_sysenter 5 + +/* + * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs + * ('32-on-32-on-64', '32-on-64-on-64') + */ +#define CALLBACKTYPE_syscall32 7 /* * Disable event deliver during callback? This flag is ignored for event and @@ -79,6 +103,11 @@ typedef struct callback_unregister callb typedef struct callback_unregister callback_unregister_t; DEFINE_XEN_GUEST_HANDLE(callback_unregister_t); +#if __XEN_INTERFACE_VERSION__ < 0x00030207 +#undef CALLBACKTYPE_sysenter +#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated +#endif + #endif /* __XEN_PUBLIC_CALLBACK_H__ */ /* diff -r 0edeca96e7d7 -r bac78e892e42 include/xen/interface/domctl.h --- a/include/xen/interface/domctl.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/xen/interface/domctl.h Tue Oct 30 15:38:11 2007 -0600 @@ -474,11 +474,11 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_ /* Bind machine I/O address range -> HVM address range. */ #define XEN_DOMCTL_memory_mapping 39 struct xen_domctl_memory_mapping { - uint64_t first_gfn; /* first page (hvm guest phys page) in range */ - uint64_t first_mfn; /* first page (machine page) in range */ - uint64_t nr_mfns; /* number of pages in range (>0) */ - uint32_t add_mapping; /* add or remove mapping */ - uint32_t padding; /* padding for 64-bit aligned structure */ + uint64_aligned_t first_gfn; /* first page (hvm guest phys page) in range */ + uint64_aligned_t first_mfn; /* first page (machine page) in range */ + uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ + uint32_t add_mapping; /* add or remove mapping */ + uint32_t padding; /* padding for 64-bit aligned structure */ }; typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t); @@ -494,6 +494,50 @@ struct xen_domctl_ioport_mapping { }; typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t); + + +/* + * Pin caching type of RAM space for x86 HVM domU. + */ +#define XEN_DOMCTL_pin_mem_cacheattr 41 +/* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */ +#define XEN_DOMCTL_MEM_CACHEATTR_UC 0 +#define XEN_DOMCTL_MEM_CACHEATTR_WC 1 +#define XEN_DOMCTL_MEM_CACHEATTR_WT 4 +#define XEN_DOMCTL_MEM_CACHEATTR_WP 5 +#define XEN_DOMCTL_MEM_CACHEATTR_WB 6 +#define XEN_DOMCTL_MEM_CACHEATTR_UCM 7 +struct xen_domctl_pin_mem_cacheattr { + uint64_aligned_t start, end; + unsigned int type; /* XEN_DOMCTL_MEM_CACHEATTR_* */ +}; +typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t); + + +#define XEN_DOMCTL_set_ext_vcpucontext 42 +#define XEN_DOMCTL_get_ext_vcpucontext 43 +struct xen_domctl_ext_vcpucontext { + /* IN: VCPU that this call applies to. */ + uint32_t vcpu; + /* + * SET: Size of struct (IN) + * GET: Size of struct (OUT) + */ + uint32_t size; +#if defined(__i386__) || defined(__x86_64__) + /* SYSCALL from 32-bit mode and SYSENTER callback information. */ + /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */ + uint64_aligned_t syscall32_callback_eip; + uint64_aligned_t sysenter_callback_eip; + uint16_t syscall32_callback_cs; + uint16_t sysenter_callback_cs; + uint8_t syscall32_disables_events; + uint8_t sysenter_disables_events; +#endif +}; +typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t); struct xen_domctl { @@ -529,6 +573,8 @@ struct xen_domctl { struct xen_domctl_bind_pt_irq bind_pt_irq; struct xen_domctl_memory_mapping memory_mapping; struct xen_domctl_ioport_mapping ioport_mapping; + struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr; + struct xen_domctl_ext_vcpucontext ext_vcpucontext; uint8_t pad[128]; } u; }; diff -r 0edeca96e7d7 -r bac78e892e42 include/xen/interface/platform.h --- a/include/xen/interface/platform.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/xen/interface/platform.h Tue Oct 30 15:38:11 2007 -0600 @@ -174,13 +174,27 @@ typedef struct xenpf_change_freq xenpf_c typedef struct xenpf_change_freq xenpf_change_freq_t; DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t); +/* + * Get idle times (nanoseconds since boot) for physical CPUs specified in the + * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is + * indexed by CPU number; only entries with the corresponding @cpumap_bitmap + * bit set are written to. On return, @cpumap_bitmap is modified so that any + * non-existent CPUs are cleared. Such CPUs have their @idletime array entry + * cleared. + */ #define XENPF_getidletime 53 struct xenpf_getidletime { - /* IN variables. */ - uint32_t max_cpus; + /* IN/OUT variables */ + /* IN: CPUs to interrogate; OUT: subset of IN which are present */ + XEN_GUEST_HANDLE(uint8_t) cpumap_bitmap; + /* IN variables */ + /* Size of cpumap bitmap. */ + uint32_t cpumap_nr_cpus; + /* Must be indexable for every cpu in cpumap_bitmap. */ XEN_GUEST_HANDLE(uint64_t) idletime; - /* OUT variables. */ - uint32_t nr_cpus; + /* OUT variables */ + /* System time when the idletime snapshots were taken. */ + uint64_t now; }; typedef struct xenpf_getidletime xenpf_getidletime_t; DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t); diff -r 0edeca96e7d7 -r bac78e892e42 include/xen/interface/sysctl.h --- a/include/xen/interface/sysctl.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/xen/interface/sysctl.h Tue Oct 30 15:38:11 2007 -0600 @@ -34,18 +34,27 @@ #include "xen.h" #include "domctl.h" -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000005 +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000006 /* * Read console content from Xen buffer ring. */ #define XEN_SYSCTL_readconsole 1 struct xen_sysctl_readconsole { - /* IN variables. */ - uint32_t clear; /* Non-zero -> clear after reading. */ - XEN_GUEST_HANDLE_64(char) buffer; /* Buffer start */ - /* IN/OUT variables. */ - uint32_t count; /* In: Buffer size; Out: Used buffer size */ + /* IN: Non-zero -> clear after reading. */ + uint8_t clear; + /* IN: Non-zero -> start index specified by @index field. */ + uint8_t incremental; + uint8_t pad0, pad1; + /* + * IN: Start index for consuming from ring buffer (if @incremental); + * OUT: End index after consuming from ring buffer. + */ + uint32_t index; + /* IN: Virtual address to write console data. */ + XEN_GUEST_HANDLE_64(char) buffer; + /* IN: Size of buffer; OUT: Bytes written to buffer. */ + uint32_t count; }; typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t); @@ -171,7 +180,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug /* Get physical CPU information. */ #define XEN_SYSCTL_getcpuinfo 8 struct xen_sysctl_cpuinfo { - uint64_t idletime; + uint64_aligned_t idletime; }; typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); @@ -192,7 +201,7 @@ struct xen_sysctl_availheap { uint32_t max_bitwidth; /* Largest address width (zero if don't care). */ int32_t node; /* NUMA node of interest (-1 for all nodes). */ /* OUT variables. */ - uint64_t avail_bytes; /* Bytes available in the specified region. */ + uint64_aligned_t avail_bytes;/* Bytes available in the specified region. */ }; typedef struct xen_sysctl_availheap xen_sysctl_availheap_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t); diff -r 0edeca96e7d7 -r bac78e892e42 include/xen/interface/vcpu.h --- a/include/xen/interface/vcpu.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/xen/interface/vcpu.h Tue Oct 30 15:38:11 2007 -0600 @@ -179,6 +179,9 @@ typedef struct vcpu_register_vcpu_info v typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t; DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t); +/* Send an NMI to the specified VCPU. @extra_arg == NULL. */ +#define VCPUOP_send_nmi 11 + #endif /* __XEN_PUBLIC_VCPU_H__ */ /* diff -r 0edeca96e7d7 -r bac78e892e42 include/xen/interface/xen-compat.h --- a/include/xen/interface/xen-compat.h Tue Oct 30 13:42:54 2007 -0600 +++ b/include/xen/interface/xen-compat.h Tue Oct 30 15:38:11 2007 -0600 @@ -27,7 +27,7 @@ #ifndef __XEN_PUBLIC_XEN_COMPAT_H__ #define __XEN_PUBLIC_XEN_COMPAT_H__ -#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030206 +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030207 #if defined(__XEN__) || defined(__XEN_TOOLS__) /* Xen is built with matching headers and implements the latest interface. */ diff -r 0edeca96e7d7 -r bac78e892e42 mm/page_alloc.c --- a/mm/page_alloc.c Tue Oct 30 13:42:54 2007 -0600 +++ b/mm/page_alloc.c Tue Oct 30 15:38:11 2007 -0600 @@ -451,8 +451,13 @@ static void __free_pages_ok(struct page int i; int reserved = 0; - if (arch_free_page(page, order)) +#ifdef CONFIG_XEN + if (PageForeign(page)) { + PageForeignDestructor(page); return; + } +#endif + arch_free_page(page, order); if (!PageHighMem(page)) debug_check_no_locks_freed(page_address(page), PAGE_SIZE<<order); @@ -730,8 +735,13 @@ static void fastcall free_hot_cold_page( struct per_cpu_pages *pcp; unsigned long flags; - if (arch_free_page(page, 0)) +#ifdef CONFIG_XEN + if (PageForeign(page)) { + PageForeignDestructor(page); return; + } +#endif + arch_free_page(page, 0); if (PageAnon(page)) page->mapping = NULL; _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |