[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merged.
# HG changeset patch # User emellor@xxxxxxxxxxxxxxxxxxxxxx # Node ID 4b89195850398b85cd5a3b57ba8228209f010fd9 # Parent 642b26779c4ecb1538032f5fb66b3a83f3ce9d73 # Parent 821368442403cb9110f466a9c7c2c9849bef9733 Merged. diff -r 642b26779c4e -r 4b8919585039 linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Thu Jan 12 12:13:34 2006 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Thu Jan 12 12:20:04 2006 @@ -76,7 +76,9 @@ DF_MASK = 0x00000400 NT_MASK = 0x00004000 VM_MASK = 0x00020000 - +/* Pseudo-eflags. */ +NMI_MASK = 0x80000000 + /* Offsets into shared_info_t. */ #define evtchn_upcall_pending /* 0 */ #define evtchn_upcall_mask 1 @@ -305,8 +307,8 @@ je ldt_ss # returning to user-space with LDT SS #endif /* XEN */ restore_nocheck: - testl $VM_MASK, EFLAGS(%esp) - jnz resume_vm86 + testl $(VM_MASK|NMI_MASK), EFLAGS(%esp) + jnz hypervisor_iret movb EVENT_MASK(%esp), %al notb %al # %al == ~saved_mask XEN_GET_VCPU_INFO(%esi) @@ -328,11 +330,11 @@ .long 1b,iret_exc .previous -resume_vm86: - XEN_UNBLOCK_EVENTS(%esi) +hypervisor_iret: + andl $~NMI_MASK, EFLAGS(%esp) RESTORE_REGS movl %eax,(%esp) - movl $__HYPERVISOR_switch_vm86,%eax + movl $__HYPERVISOR_iret,%eax int $0x82 ud2 @@ -691,6 +693,15 @@ call do_debug jmp ret_from_exception +ENTRY(nmi) + pushl %eax + SAVE_ALL + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_nmi + orl $NMI_MASK, EFLAGS(%esp) + jmp restore_all + #if 0 /* XEN */ /* * NMI is doubly nasty. It can happen _while_ we're handling diff -r 642b26779c4e -r 4b8919585039 linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Thu Jan 12 12:13:34 2006 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Thu Jan 12 12:20:04 2006 @@ -506,18 +506,11 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs) { - unsigned long i; - printk("NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & 0xf) | 8; - outb(reason, 0x61); - i = 2000; - while (--i) udelay(1000); - reason &= ~8; - outb(reason, 0x61); + clear_io_check_error(reason); } static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) diff -r 642b26779c4e -r 4b8919585039 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S Thu Jan 12 12:13:34 2006 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S Thu Jan 12 12:20:04 2006 @@ -121,19 +121,19 @@ .endm /* - * Must be consistent with the definition in arch_x86_64.h: - * struct switch_to_user { + * Must be consistent with the definition in arch-x86_64.h: + * struct iret_context { * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; * }; * #define VGCF_IN_SYSCALL (1<<8) */ - .macro SWITCH_TO_USER flag + .macro HYPERVISOR_IRET flag subq $8*4,%rsp # reuse rip, cs, rflags, rsp, ss in the stack movq %rax,(%rsp) movq %r11,1*8(%rsp) movq %rcx,2*8(%rsp) # we saved %rcx upon exceptions movq $\flag,3*8(%rsp) - movq $__HYPERVISOR_switch_to_user,%rax + movq $__HYPERVISOR_iret,%rax syscall .endm @@ -225,7 +225,7 @@ jnz sysret_careful XEN_UNBLOCK_EVENTS(%rsi) RESTORE_ARGS 0,8,0 - SWITCH_TO_USER VGCF_IN_SYSCALL + HYPERVISOR_IRET VGCF_IN_SYSCALL /* Handle reschedules */ /* edx: work, edi: workmask */ @@ -478,7 +478,7 @@ orb $3,1*8(%rsp) iretq user_mode: - SWITCH_TO_USER 0 + HYPERVISOR_IRET 0 /* edi: workmask, edx: work */ retint_careful: @@ -719,6 +719,18 @@ call evtchn_do_upcall jmp error_exit +#ifdef CONFIG_X86_LOCAL_APIC +ENTRY(nmi) + zeroentry do_nmi_callback +ENTRY(do_nmi_callback) + addq $8, %rsp + call do_nmi + RESTORE_REST + XEN_BLOCK_EVENTS(%rsi) + GET_THREAD_INFO(%rcx) + jmp retint_restore_args +#endif + ALIGN restore_all_enable_events: XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up... @@ -733,7 +745,7 @@ orb $3,1*8(%rsp) iretq crit_user_mode: - SWITCH_TO_USER 0 + HYPERVISOR_IRET 0 14: XEN_LOCKED_BLOCK_EVENTS(%rsi) XEN_PUT_VCPU_INFO(%rsi) diff -r 642b26779c4e -r 4b8919585039 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Thu Jan 12 12:13:34 2006 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Thu Jan 12 12:20:04 2006 @@ -62,6 +62,7 @@ #include <asm-xen/xen-public/physdev.h> #include "setup_arch_pre.h" #include <asm/hypervisor.h> +#include <asm-xen/xen-public/nmi.h> #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) #define PFN_PHYS(x) ((x) << PAGE_SHIFT) #define end_pfn_map end_pfn diff -r 642b26779c4e -r 4b8919585039 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c Thu Jan 12 12:13:34 2006 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c Thu Jan 12 12:20:04 2006 @@ -559,9 +559,11 @@ printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); printk("You probably have a hardware problem with your RAM chips\n"); +#if 0 /* XEN */ /* Clear and disable the memory parity error line. */ reason = (reason & 0xf) | 4; outb(reason, 0x61); +#endif /* XEN */ } static void io_check_error(unsigned char reason, struct pt_regs * regs) @@ -569,12 +571,14 @@ printk("NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); +#if 0 /* XEN */ /* Re-enable the IOCK line, wait for a few seconds */ reason = (reason & 0xf) | 8; outb(reason, 0x61); mdelay(2000); reason &= ~8; outb(reason, 0x61); +#endif /* XEN */ } static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) diff -r 642b26779c4e -r 4b8919585039 linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Thu Jan 12 12:13:34 2006 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Thu Jan 12 12:20:04 2006 @@ -32,6 +32,7 @@ #include <asm-xen/xen-public/xen.h> #include <asm-xen/xen-public/sched.h> +#include <asm-xen/xen-public/nmi.h> #define _hypercall0(type, name) \ ({ \ @@ -300,6 +301,14 @@ SHUTDOWN_suspend, srec); } +static inline int +HYPERVISOR_nmi_op( + unsigned long op, + unsigned long arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} + #endif /* __HYPERCALL_H__ */ /* diff -r 642b26779c4e -r 4b8919585039 linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h Thu Jan 12 12:13:34 2006 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h Thu Jan 12 12:20:04 2006 @@ -29,6 +29,7 @@ extern void hypervisor_callback(void); extern void failsafe_callback(void); +extern void nmi(void); static void __init machine_specific_arch_setup(void) { @@ -36,5 +37,7 @@ __KERNEL_CS, (unsigned long)hypervisor_callback, __KERNEL_CS, (unsigned long)failsafe_callback); + HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi); + machine_specific_modify_cpu_capabilities(&boot_cpu_data); } diff -r 642b26779c4e -r 4b8919585039 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Thu Jan 12 12:13:34 2006 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Thu Jan 12 12:20:04 2006 @@ -287,9 +287,9 @@ } static inline int -HYPERVISOR_switch_to_user(void) -{ - return _hypercall0(int, switch_to_user); +HYPERVISOR_iret(void) +{ + return _hypercall0(int, iret); } static inline int @@ -305,6 +305,14 @@ { return _hypercall3(int, sched_op, SCHEDOP_shutdown, SHUTDOWN_suspend, srec); +} + +static inline int +HYPERVISOR_nmi_op( + unsigned long op, + unsigned long arg) +{ + return _hypercall2(int, nmi_op, op, arg); } #endif /* __HYPERCALL_H__ */ diff -r 642b26779c4e -r 4b8919585039 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h Thu Jan 12 12:13:34 2006 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h Thu Jan 12 12:20:04 2006 @@ -35,6 +35,7 @@ extern void hypervisor_callback(void); extern void failsafe_callback(void); +extern void nmi(void); static void __init machine_specific_arch_setup(void) { @@ -43,5 +44,9 @@ (unsigned long) failsafe_callback, (unsigned long) system_call); +#ifdef CONFIG_X86_LOCAL_APIC + HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi); +#endif + machine_specific_modify_cpu_capabilities(&boot_cpu_data); } diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Jan 12 12:13:34 2006 +++ b/xen/arch/x86/domain.c Thu Jan 12 12:20:04 2006 @@ -288,9 +288,7 @@ #if defined(__i386__) - d->arch.mapcache.l1tab = d->arch.mm_perdomain_pt + - (GDT_LDT_MBYTES << (20 - PAGE_SHIFT)); - spin_lock_init(&d->arch.mapcache.lock); + mapcache_init(d); #else /* __x86_64__ */ @@ -481,14 +479,6 @@ #ifdef __x86_64__ - -void toggle_guest_mode(struct vcpu *v) -{ - v->arch.flags ^= TF_kernel_mode; - __asm__ __volatile__ ( "swapgs" ); - update_pagetables(v); - write_ptbase(v); -} #define loadsegment(seg,value) ({ \ int __r = 1; \ @@ -659,35 +649,6 @@ percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask; } -long do_switch_to_user(void) -{ - struct cpu_user_regs *regs = guest_cpu_user_regs(); - struct switch_to_user stu; - struct vcpu *v = current; - - if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) || - unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) ) - return -EFAULT; - - toggle_guest_mode(v); - - regs->rip = stu.rip; - regs->cs = stu.cs | 3; /* force guest privilege */ - regs->rflags = (stu.rflags & ~(EF_IOPL|EF_VM)) | EF_IE; - regs->rsp = stu.rsp; - regs->ss = stu.ss | 3; /* force guest privilege */ - - if ( !(stu.flags & VGCF_IN_SYSCALL) ) - { - regs->entry_vector = 0; - regs->r11 = stu.r11; - regs->rcx = stu.rcx; - } - - /* Saved %rax gets written back to regs->rax in entry.S. */ - return stu.rax; -} - #define switch_kernel_stack(_n,_c) ((void)0) #elif defined(__i386__) diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu Jan 12 12:13:34 2006 +++ b/xen/arch/x86/mm.c Thu Jan 12 12:20:04 2006 @@ -297,7 +297,6 @@ #if defined(__x86_64__) /* If in user mode, switch to kernel mode just to read LDT mapping. */ - extern void toggle_guest_mode(struct vcpu *); int user_mode = !(v->arch.flags & TF_kernel_mode); #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) #elif defined(__i386__) @@ -2971,7 +2970,6 @@ #ifdef CONFIG_X86_64 struct vcpu *v = current; - extern void toggle_guest_mode(struct vcpu *); int user_mode = !(v->arch.flags & TF_kernel_mode); #endif diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Thu Jan 12 12:13:34 2006 +++ b/xen/arch/x86/traps.c Thu Jan 12 12:20:04 2006 @@ -596,7 +596,6 @@ u16 x; #if defined(__x86_64__) /* If in user mode, switch to kernel mode just to read I/O bitmap. */ - extern void toggle_guest_mode(struct vcpu *); int user_mode = !(v->arch.flags & TF_kernel_mode); #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) #elif defined(__i386__) @@ -1080,26 +1079,23 @@ return 0; } - -/* Defer dom0 notification to softirq context (unsafe in NMI context). */ -static unsigned long nmi_dom0_softirq_reason; -#define NMI_DOM0_PARITY_ERR 0 -#define NMI_DOM0_IO_ERR 1 -#define NMI_DOM0_UNKNOWN 2 - -static void nmi_dom0_softirq(void) -{ - if ( dom0 == NULL ) +static void nmi_softirq(void) +{ + /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */ + evtchn_notify(dom0->vcpu[0]); +} + +static void nmi_dom0_report(unsigned int reason_idx) +{ + struct domain *d; + + if ( (d = dom0) == NULL ) return; - if ( test_and_clear_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason) ) - send_guest_virq(dom0->vcpu[0], VIRQ_PARITY_ERR); - - if ( test_and_clear_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason) ) - send_guest_virq(dom0->vcpu[0], VIRQ_IO_ERR); - - if ( test_and_clear_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason) ) - send_guest_virq(dom0->vcpu[0], VIRQ_NMI); + set_bit(reason_idx, &d->shared_info->arch.nmi_reason); + + if ( test_and_set_bit(_VCPUF_nmi_pending, &d->vcpu[0]->vcpu_flags) ) + raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */ } asmlinkage void mem_parity_error(struct cpu_user_regs *regs) @@ -1107,8 +1103,7 @@ switch ( opt_nmi[0] ) { case 'd': /* 'dom0' */ - set_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason); - raise_softirq(NMI_DOM0_SOFTIRQ); + nmi_dom0_report(_XEN_NMIREASON_parity_error); case 'i': /* 'ignore' */ break; default: /* 'fatal' */ @@ -1127,8 +1122,7 @@ switch ( opt_nmi[0] ) { case 'd': /* 'dom0' */ - set_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason); - raise_softirq(NMI_DOM0_SOFTIRQ); + nmi_dom0_report(_XEN_NMIREASON_io_error); case 'i': /* 'ignore' */ break; default: /* 'fatal' */ @@ -1147,8 +1141,7 @@ switch ( opt_nmi[0] ) { case 'd': /* 'dom0' */ - set_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason); - raise_softirq(NMI_DOM0_SOFTIRQ); + nmi_dom0_report(_XEN_NMIREASON_unknown); case 'i': /* 'ignore' */ break; default: /* 'fatal' */ @@ -1347,7 +1340,7 @@ cpu_init(); - open_softirq(NMI_DOM0_SOFTIRQ, nmi_dom0_softirq); + open_softirq(NMI_SOFTIRQ, nmi_softirq); } diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_32/asm-offsets.c --- a/xen/arch/x86/x86_32/asm-offsets.c Thu Jan 12 12:13:34 2006 +++ b/xen/arch/x86/x86_32/asm-offsets.c Thu Jan 12 12:20:04 2006 @@ -65,6 +65,10 @@ arch.guest_context.kernel_ss); OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp); + OFFSET(VCPU_flags, struct vcpu, vcpu_flags); + OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr); + DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending); + DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked); BLANK(); OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending); diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_32/domain_page.c --- a/xen/arch/x86/x86_32/domain_page.c Thu Jan 12 12:13:34 2006 +++ b/xen/arch/x86/x86_32/domain_page.c Thu Jan 12 12:20:04 2006 @@ -20,33 +20,16 @@ #include <asm/flushtlb.h> #include <asm/hardirq.h> -#define MAPCACHE_ORDER 10 -#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER) - -/* Use a spare PTE bit to mark entries ready for recycling. */ -#define READY_FOR_TLB_FLUSH (1<<10) - -static void flush_all_ready_maps(void) -{ - struct mapcache *cache = ¤t->domain->arch.mapcache; - unsigned int i; - - for ( i = 0; i < MAPCACHE_ENTRIES; i++ ) - if ( (l1e_get_flags(cache->l1tab[i]) & READY_FOR_TLB_FLUSH) ) - cache->l1tab[i] = l1e_empty(); -} - -void *map_domain_pages(unsigned long pfn, unsigned int order) +void *map_domain_page(unsigned long pfn) { unsigned long va; - unsigned int idx, i, flags, vcpu = current->vcpu_id; + unsigned int idx, i, vcpu = current->vcpu_id; struct domain *d; struct mapcache *cache; -#ifndef NDEBUG - unsigned int flush_count = 0; -#endif + struct vcpu_maphash_entry *hashent; ASSERT(!in_irq()); + perfc_incrc(map_domain_page_count); /* If we are the idle domain, ensure that we run on our own page tables. */ @@ -56,6 +39,18 @@ cache = &d->arch.mapcache; + hashent = &cache->vcpu_maphash[vcpu].hash[MAPHASH_HASHFN(pfn)]; +#if 0 + if ( hashent->pfn == pfn ) + { + idx = hashent->idx; + hashent->refcnt++; + ASSERT(hashent->refcnt != 0); + ASSERT(l1e_get_pfn(cache->l1tab[idx]) == pfn); + goto out; + } +#endif + spin_lock(&cache->lock); /* Has some other CPU caused a wrap? We must flush if so. */ @@ -70,45 +65,97 @@ } } - do { - idx = cache->cursor = (cache->cursor + 1) & (MAPCACHE_ENTRIES - 1); - if ( unlikely(idx == 0) ) - { - ASSERT(flush_count++ == 0); - flush_all_ready_maps(); - perfc_incrc(domain_page_tlb_flush); - local_flush_tlb(); - cache->shadow_epoch[vcpu] = ++cache->epoch; - cache->tlbflush_timestamp = tlbflush_current_time(); - } - - flags = 0; - for ( i = 0; i < (1U << order); i++ ) - flags |= l1e_get_flags(cache->l1tab[idx+i]); - } - while ( flags & _PAGE_PRESENT ); - - for ( i = 0; i < (1U << order); i++ ) - cache->l1tab[idx+i] = l1e_from_pfn(pfn+i, __PAGE_HYPERVISOR); + idx = find_next_zero_bit(cache->inuse, MAPCACHE_ENTRIES, cache->cursor); + if ( unlikely(idx >= MAPCACHE_ENTRIES) ) + { + /* /First/, clean the garbage map and update the inuse list. */ + for ( i = 0; i < ARRAY_SIZE(cache->garbage); i++ ) + { + unsigned long x = xchg(&cache->garbage[i], 0); + cache->inuse[i] &= ~x; + } + + /* /Second/, flush TLBs. */ + perfc_incrc(domain_page_tlb_flush); + local_flush_tlb(); + cache->shadow_epoch[vcpu] = ++cache->epoch; + cache->tlbflush_timestamp = tlbflush_current_time(); + + idx = find_first_zero_bit(cache->inuse, MAPCACHE_ENTRIES); + ASSERT(idx < MAPCACHE_ENTRIES); + } + + set_bit(idx, cache->inuse); + cache->cursor = idx + 1; spin_unlock(&cache->lock); + cache->l1tab[idx] = l1e_from_pfn(pfn, __PAGE_HYPERVISOR); + +/*out:*/ va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT); return (void *)va; } -void unmap_domain_pages(void *va, unsigned int order) -{ - unsigned int idx, i; +void unmap_domain_page(void *va) +{ + unsigned int idx; struct mapcache *cache = ¤t->domain->arch.mapcache; + unsigned long pfn; + struct vcpu_maphash_entry *hashent; + + ASSERT(!in_irq()); ASSERT((void *)MAPCACHE_VIRT_START <= va); ASSERT(va < (void *)MAPCACHE_VIRT_END); idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT; - - for ( i = 0; i < (1U << order); i++ ) - l1e_add_flags(cache->l1tab[idx+i], READY_FOR_TLB_FLUSH); + pfn = l1e_get_pfn(cache->l1tab[idx]); + hashent = &cache->vcpu_maphash[current->vcpu_id].hash[MAPHASH_HASHFN(pfn)]; + + if ( hashent->idx == idx ) + { + ASSERT(hashent->pfn == pfn); + ASSERT(hashent->refcnt != 0); + hashent->refcnt--; + } + else if ( hashent->refcnt == 0 ) + { + if ( hashent->idx != MAPHASHENT_NOTINUSE ) + { + /* /First/, zap the PTE. */ + ASSERT(l1e_get_pfn(cache->l1tab[hashent->idx]) == hashent->pfn); + cache->l1tab[hashent->idx] = l1e_empty(); + /* /Second/, mark as garbage. */ + set_bit(hashent->idx, cache->garbage); + } + + /* Add newly-freed mapping to the maphash. */ + hashent->pfn = pfn; + hashent->idx = idx; + } + else + { + /* /First/, zap the PTE. */ + cache->l1tab[idx] = l1e_empty(); + /* /Second/, mark as garbage. */ + set_bit(idx, cache->garbage); + } +} + +void mapcache_init(struct domain *d) +{ + unsigned int i, j; + + d->arch.mapcache.l1tab = d->arch.mm_perdomain_pt + + (GDT_LDT_MBYTES << (20 - PAGE_SHIFT)); + spin_lock_init(&d->arch.mapcache.lock); + + /* Mark all maphash entries as not in use. */ + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + for ( j = 0; j < MAPHASH_ENTRIES; j++ ) + d->arch.mapcache.vcpu_maphash[i].hash[j].idx = + MAPHASHENT_NOTINUSE; } #define GLOBALMAP_BITS (IOREMAP_MBYTES << (20 - PAGE_SHIFT)) @@ -128,15 +175,10 @@ spin_lock(&globalmap_lock); - for ( ; ; ) - { - idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor); - va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT); - - /* End of round? If not then we're done in this loop. */ - if ( va < FIXADDR_START ) - break; - + idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor); + va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT); + if ( unlikely(va >= FIXADDR_START) ) + { /* /First/, clean the garbage map and update the inuse list. */ for ( i = 0; i < ARRAY_SIZE(garbage); i++ ) { @@ -147,7 +189,9 @@ /* /Second/, flush all TLBs to get rid of stale garbage mappings. */ flush_tlb_all(); - inuse_cursor = 0; + idx = find_first_zero_bit(inuse, GLOBALMAP_BITS); + va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT); + ASSERT(va < FIXADDR_START); } set_bit(idx, inuse); diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_32/entry.S --- a/xen/arch/x86/x86_32/entry.S Thu Jan 12 12:13:34 2006 +++ b/xen/arch/x86/x86_32/entry.S Thu Jan 12 12:20:04 2006 @@ -326,7 +326,9 @@ shl $IRQSTAT_shift,%eax test %ecx,irq_stat(%eax,1) jnz process_softirqs -/*test_guest_events:*/ + btr $_VCPUF_nmi_pending,VCPU_flags(%ebx) + jc process_nmi +test_guest_events: movl VCPU_vcpu_info(%ebx),%eax testb $0xFF,VCPUINFO_upcall_mask(%eax) jnz restore_all_guest @@ -348,7 +350,24 @@ sti call do_softirq jmp test_all_events - + + ALIGN +process_nmi: + movl VCPU_nmi_addr(%ebx),%eax + test %eax,%eax + jz test_all_events + bts $_VCPUF_nmi_masked,VCPU_flags(%ebx) + jc 1f + sti + leal VCPU_trap_bounce(%ebx),%edx + movl %eax,TRAPBOUNCE_eip(%edx) + movw $FLAT_KERNEL_CS,TRAPBOUNCE_cs(%edx) + movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx) + call create_bounce_frame + jmp test_all_events +1: bts $_VCPUF_nmi_pending,VCPU_flags(%ebx) + jmp test_guest_events + /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK: */ /* {EIP, CS, EFLAGS, [ESP, SS]} */ /* %edx == trap_bounce, %ebx == struct vcpu */ @@ -620,9 +639,7 @@ jne defer_nmi continue_nmi: - movl $(__HYPERVISOR_DS),%edx - movl %edx,%ds - movl %edx,%es + SET_XEN_SEGMENTS(d) movl %esp,%edx pushl %edx call do_nmi @@ -659,42 +676,6 @@ GET_GUEST_REGS(%ecx) movl %eax,UREGS_eax(%ecx) jmp do_sched_op - -do_switch_vm86: - # Reset the stack pointer - GET_GUEST_REGS(%ecx) - movl %ecx,%esp - - # GS:ESI == Ring-1 stack activation - movl UREGS_esp(%esp),%esi -VFLT1: mov UREGS_ss(%esp),%gs - - # ES:EDI == Ring-0 stack activation - leal UREGS_eip(%esp),%edi - - # Restore the hypercall-number-clobbered EAX on our stack frame -VFLT2: movl %gs:(%esi),%eax - movl %eax,UREGS_eax(%esp) - addl $4,%esi - - # Copy the VM86 activation from the ring-1 stack to the ring-0 stack - movl $(UREGS_user_sizeof-UREGS_eip)/4,%ecx -VFLT3: movl %gs:(%esi),%eax - stosl - addl $4,%esi - loop VFLT3 - - # Fix up EFLAGS: IOPL=0, IF=1, VM=1 - andl $~X86_EFLAGS_IOPL,UREGS_eflags(%esp) - orl $X86_EFLAGS_IF|X86_EFLAGS_VM,UREGS_eflags(%esp) - - jmp test_all_events - -.section __ex_table,"a" - .long VFLT1,domain_crash_synchronous - .long VFLT2,domain_crash_synchronous - .long VFLT3,domain_crash_synchronous -.previous .data @@ -744,11 +725,12 @@ .long do_grant_table_op /* 20 */ .long do_vm_assist .long do_update_va_mapping_otherdomain - .long do_switch_vm86 + .long do_iret .long do_vcpu_op .long do_ni_hypercall /* 25 */ .long do_mmuext_op - .long do_acm_op /* 27 */ + .long do_acm_op + .long do_nmi_op .rept NR_hypercalls-((.-hypercall_table)/4) .long do_ni_hypercall .endr @@ -777,11 +759,12 @@ .byte 3 /* do_grant_table_op */ /* 20 */ .byte 2 /* do_vm_assist */ .byte 5 /* do_update_va_mapping_otherdomain */ - .byte 0 /* do_switch_vm86 */ + .byte 0 /* do_iret */ .byte 3 /* do_vcpu_op */ .byte 0 /* do_ni_hypercall */ /* 25 */ .byte 4 /* do_mmuext_op */ .byte 1 /* do_acm_op */ + .byte 2 /* do_nmi_op */ .rept NR_hypercalls-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Thu Jan 12 12:13:34 2006 +++ b/xen/arch/x86/x86_32/traps.c Thu Jan 12 12:20:04 2006 @@ -157,6 +157,64 @@ __asm__ __volatile__ ( "hlt" ); } +static inline void pop_from_guest_stack( + void *dst, struct cpu_user_regs *regs, unsigned int bytes) +{ + if ( unlikely(__copy_from_user(dst, (void __user *)regs->esp, bytes)) ) + domain_crash_synchronous(); + regs->esp += bytes; +} + +asmlinkage unsigned long do_iret(void) +{ + struct cpu_user_regs *regs = guest_cpu_user_regs(); + u32 eflags; + + /* Check worst-case stack frame for overlap with Xen protected area. */ + if ( unlikely(!access_ok(regs->esp, 40)) ) + domain_crash_synchronous(); + + /* Pop and restore EAX (clobbered by hypercall). */ + pop_from_guest_stack(®s->eax, regs, 4); + + /* Pop and restore CS and EIP. */ + pop_from_guest_stack(®s->eip, regs, 8); + + /* + * Pop, fix up and restore EFLAGS. We fix up in a local staging area + * to avoid firing the BUG_ON(IOPL) check in arch_getdomaininfo_ctxt. + */ + pop_from_guest_stack(&eflags, regs, 4); + regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF; + + if ( VM86_MODE(regs) ) + { + /* Return to VM86 mode: pop and restore ESP,SS,ES,DS,FS and GS. */ + pop_from_guest_stack(®s->esp, regs, 24); + } + else if ( unlikely(RING_0(regs)) ) + { + domain_crash_synchronous(); + } + else if ( !RING_1(regs) ) + { + /* Return to ring 2/3: pop and restore ESP and SS. */ + pop_from_guest_stack(®s->esp, regs, 8); + } + + /* No longer in NMI context. */ + clear_bit(_VCPUF_nmi_masked, ¤t->vcpu_flags); + + /* Restore upcall mask from saved value. */ + current->vcpu_info->evtchn_upcall_mask = regs->saved_upcall_mask; + + /* + * The hypercall exit path will overwrite EAX with this return + * value. + */ + return regs->eax; +} + BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi) asmlinkage void smp_deferred_nmi(struct cpu_user_regs regs) { diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_64/asm-offsets.c --- a/xen/arch/x86/x86_64/asm-offsets.c Thu Jan 12 12:13:34 2006 +++ b/xen/arch/x86/x86_64/asm-offsets.c Thu Jan 12 12:20:04 2006 @@ -65,6 +65,10 @@ arch.guest_context.syscall_callback_eip); OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp); + OFFSET(VCPU_flags, struct vcpu, vcpu_flags); + OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr); + DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending); + DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked); BLANK(); OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending); diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Thu Jan 12 12:13:34 2006 +++ b/xen/arch/x86/x86_64/entry.S Thu Jan 12 12:20:04 2006 @@ -171,7 +171,9 @@ leaq irq_stat(%rip),%rcx testl $~0,(%rcx,%rax,1) jnz process_softirqs -/*test_guest_events:*/ + btr $_VCPUF_nmi_pending,VCPU_flags(%rbx) + jc process_nmi +test_guest_events: movq VCPU_vcpu_info(%rbx),%rax testb $0xFF,VCPUINFO_upcall_mask(%rax) jnz restore_all_guest @@ -322,6 +324,23 @@ call do_softirq jmp test_all_events + ALIGN +/* %rbx: struct vcpu */ +process_nmi: + movq VCPU_nmi_addr(%rbx),%rax + test %rax,%rax + jz test_all_events + bts $_VCPUF_nmi_masked,VCPU_flags(%rbx) + jc 1f + sti + leaq VCPU_trap_bounce(%rbx),%rdx + movq %rax,TRAPBOUNCE_eip(%rdx) + movw $(TBF_INTERRUPT|TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx) + call create_bounce_frame + jmp test_all_events +1: bts $_VCPUF_nmi_pending,VCPU_flags(%rbx) + jmp test_guest_events + /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK: */ /* { RCX, R11, [DS-GS,] [CR2,] [ERRCODE,] RIP, CS, RFLAGS, RSP, SS } */ /* %rdx: trap_bounce, %rbx: struct vcpu */ @@ -339,6 +358,9 @@ 1: /* In kernel context already: push new frame at existing %rsp. */ movq UREGS_rsp+8(%rsp),%rsi andb $0xfc,UREGS_cs+8(%rsp) # Indicate kernel context to guest. + testw $(TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx) + jz 2f + orb $0x01,UREGS_cs+8(%rsp) 2: andq $~0xf,%rsi # Stack frames are 16-byte aligned. movq $HYPERVISOR_VIRT_START,%rax cmpq %rax,%rsi @@ -569,7 +591,7 @@ SAVE_ALL movq %rsp,%rdi call do_nmi - jmp restore_all_xen + jmp ret_from_intr do_arch_sched_op: # Ensure we return success even if we return via schedule_tail() @@ -626,11 +648,12 @@ .quad do_grant_table_op /* 20 */ .quad do_vm_assist .quad do_update_va_mapping_otherdomain - .quad do_switch_to_user + .quad do_iret .quad do_vcpu_op .quad do_set_segment_base /* 25 */ .quad do_mmuext_op .quad do_acm_op + .quad do_nmi_op .rept NR_hypercalls-((.-hypercall_table)/4) .quad do_ni_hypercall .endr @@ -659,11 +682,12 @@ .byte 3 /* do_grant_table_op */ /* 20 */ .byte 2 /* do_vm_assist */ .byte 4 /* do_update_va_mapping_otherdomain */ - .byte 0 /* do_switch_to_user */ + .byte 0 /* do_iret */ .byte 3 /* do_vcpu_op */ .byte 2 /* do_set_segment_base */ /* 25 */ .byte 4 /* do_mmuext_op */ .byte 1 /* do_acm_op */ + .byte 2 /* do_nmi_op */ .rept NR_hypercalls-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Thu Jan 12 12:13:34 2006 +++ b/xen/arch/x86/x86_64/traps.c Thu Jan 12 12:20:04 2006 @@ -12,6 +12,7 @@ #include <asm/current.h> #include <asm/flushtlb.h> #include <asm/msr.h> +#include <asm/shadow.h> #include <asm/vmx.h> void show_registers(struct cpu_user_regs *regs) @@ -113,6 +114,52 @@ __asm__ __volatile__ ( "hlt" ); } +void toggle_guest_mode(struct vcpu *v) +{ + v->arch.flags ^= TF_kernel_mode; + __asm__ __volatile__ ( "swapgs" ); + update_pagetables(v); + write_ptbase(v); +} + +long do_iret(void) +{ + struct cpu_user_regs *regs = guest_cpu_user_regs(); + struct iret_context iret_saved; + struct vcpu *v = current; + + if ( unlikely(copy_from_user(&iret_saved, (void *)regs->rsp, + sizeof(iret_saved))) ) + domain_crash_synchronous(); + + /* Returning to user mode? */ + if ( (iret_saved.cs & 3) == 3 ) + { + if ( unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) ) + return -EFAULT; + toggle_guest_mode(v); + } + + regs->rip = iret_saved.rip; + regs->cs = iret_saved.cs | 3; /* force guest privilege */ + regs->rflags = (iret_saved.rflags & ~(EF_IOPL|EF_VM)) | EF_IE; + regs->rsp = iret_saved.rsp; + regs->ss = iret_saved.ss | 3; /* force guest privilege */ + + if ( !(iret_saved.flags & VGCF_IN_SYSCALL) ) + { + regs->entry_vector = 0; + regs->r11 = iret_saved.r11; + regs->rcx = iret_saved.rcx; + } + + /* No longer in NMI context. */ + clear_bit(_VCPUF_nmi_masked, ¤t->vcpu_flags); + + /* Saved %rax gets written back to regs->rax in entry.S. */ + return iret_saved.rax; +} + asmlinkage void syscall_enter(void); void __init percpu_traps_init(void) { diff -r 642b26779c4e -r 4b8919585039 xen/common/dom0_ops.c --- a/xen/common/dom0_ops.c Thu Jan 12 12:13:34 2006 +++ b/xen/common/dom0_ops.c Thu Jan 12 12:20:04 2006 @@ -323,7 +323,7 @@ new_affinity = v->cpu_affinity; memcpy(cpus_addr(new_affinity), &op->u.setvcpuaffinity.cpumap, - min((int)BITS_TO_LONGS(NR_CPUS), + min((int)(BITS_TO_LONGS(NR_CPUS) * sizeof(long)), (int)sizeof(op->u.setvcpuaffinity.cpumap))); ret = vcpu_set_affinity(v, &new_affinity); @@ -501,7 +501,7 @@ op->u.getvcpuinfo.cpumap = 0; memcpy(&op->u.getvcpuinfo.cpumap, cpus_addr(v->cpu_affinity), - min((int)BITS_TO_LONGS(NR_CPUS), + min((int)(BITS_TO_LONGS(NR_CPUS) * sizeof(long)), (int)sizeof(op->u.getvcpuinfo.cpumap))); ret = 0; diff -r 642b26779c4e -r 4b8919585039 xen/common/kernel.c --- a/xen/common/kernel.c Thu Jan 12 12:13:34 2006 +++ b/xen/common/kernel.c Thu Jan 12 12:20:04 2006 @@ -11,6 +11,7 @@ #include <xen/compile.h> #include <xen/sched.h> #include <asm/current.h> +#include <public/nmi.h> #include <public/version.h> void cmdline_parse(char *cmdline) @@ -146,6 +147,43 @@ } return -ENOSYS; +} + +long do_nmi_op(unsigned int cmd, void *arg) +{ + struct vcpu *v = current; + struct domain *d = current->domain; + long rc = 0; + + switch ( cmd ) + { + case XENNMI_register_callback: + if ( (d->domain_id != 0) || (v->vcpu_id != 0) ) + { + rc = -EINVAL; + } + else + { + v->nmi_addr = (unsigned long)arg; +#ifdef CONFIG_X86 + /* + * If no handler was registered we can 'lose the NMI edge'. + * Re-assert it now. + */ + if ( d->shared_info->arch.nmi_reason != 0 ) + set_bit(_VCPUF_nmi_pending, &v->vcpu_flags); +#endif + } + break; + case XENNMI_unregister_callback: + v->nmi_addr = 0; + break; + default: + rc = -ENOSYS; + break; + } + + return rc; } long do_vm_assist(unsigned int cmd, unsigned int type) diff -r 642b26779c4e -r 4b8919585039 xen/common/schedule.c --- a/xen/common/schedule.c Thu Jan 12 12:13:34 2006 +++ b/xen/common/schedule.c Thu Jan 12 12:20:04 2006 @@ -207,7 +207,10 @@ int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity) { - if ( cpus_empty(*affinity) ) + cpumask_t online_affinity; + + cpus_and(online_affinity, *affinity, cpu_online_map); + if ( cpus_empty(online_affinity) ) return -EINVAL; return SCHED_OP(set_affinity, v, affinity); diff -r 642b26779c4e -r 4b8919585039 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Thu Jan 12 12:13:34 2006 +++ b/xen/include/asm-x86/domain.h Thu Jan 12 12:20:04 2006 @@ -13,13 +13,43 @@ unsigned long eip; }; +#define MAPHASH_ENTRIES 8 +#define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1)) +#define MAPHASHENT_NOTINUSE ((u16)~0U) +struct vcpu_maphash { + struct vcpu_maphash_entry { + unsigned long pfn; + uint16_t idx; + uint16_t refcnt; + } hash[MAPHASH_ENTRIES]; +} __cacheline_aligned; + +#define MAPCACHE_ORDER 10 +#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER) struct mapcache { + /* The PTEs that provide the mappings, and a cursor into the array. */ l1_pgentry_t *l1tab; unsigned int cursor; + + /* Protects map_domain_page(). */ + spinlock_t lock; + + /* Garbage mappings are flushed from TLBs in batches called 'epochs'. */ unsigned int epoch, shadow_epoch[MAX_VIRT_CPUS]; u32 tlbflush_timestamp; - spinlock_t lock; + + /* Which mappings are in use, and which are garbage to reap next epoch? */ + unsigned long inuse[BITS_TO_LONGS(MAPCACHE_ENTRIES)]; + unsigned long garbage[BITS_TO_LONGS(MAPCACHE_ENTRIES)]; + + /* Lock-free per-VCPU hash of recently-used mappings. */ + struct vcpu_maphash vcpu_maphash[MAX_VIRT_CPUS]; }; + +extern void mapcache_init(struct domain *); + +/* x86/64: toggle guest between kernel and user modes. */ +extern void toggle_guest_mode(struct vcpu *); struct arch_domain { diff -r 642b26779c4e -r 4b8919585039 xen/include/asm-x86/nmi.h --- a/xen/include/asm-x86/nmi.h Thu Jan 12 12:13:34 2006 +++ b/xen/include/asm-x86/nmi.h Thu Jan 12 12:20:04 2006 @@ -1,6 +1,8 @@ #ifndef ASM_NMI_H #define ASM_NMI_H + +#include <public/nmi.h> struct cpu_user_regs; diff -r 642b26779c4e -r 4b8919585039 xen/include/asm-x86/processor.h --- a/xen/include/asm-x86/processor.h Thu Jan 12 12:13:34 2006 +++ b/xen/include/asm-x86/processor.h Thu Jan 12 12:20:04 2006 @@ -123,6 +123,7 @@ #define TBF_EXCEPTION_ERRCODE 2 #define TBF_INTERRUPT 8 #define TBF_FAILSAFE 16 +#define TBF_SLOW_IRET 32 /* 'arch_vcpu' flags values */ #define _TF_kernel_mode 0 diff -r 642b26779c4e -r 4b8919585039 xen/include/public/arch-x86_32.h --- a/xen/include/public/arch-x86_32.h Thu Jan 12 12:13:34 2006 +++ b/xen/include/public/arch-x86_32.h Thu Jan 12 12:20:04 2006 @@ -135,6 +135,7 @@ unsigned long max_pfn; /* max pfn that appears in table */ /* Frame containing list of mfns containing list of mfns containing p2m. */ unsigned long pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; } arch_shared_info_t; typedef struct { diff -r 642b26779c4e -r 4b8919585039 xen/include/public/arch-x86_64.h --- a/xen/include/public/arch-x86_64.h Thu Jan 12 12:13:34 2006 +++ b/xen/include/public/arch-x86_64.h Thu Jan 12 12:20:04 2006 @@ -88,11 +88,20 @@ #define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ /* - * int HYPERVISOR_switch_to_user(void) + * int HYPERVISOR_iret(void) * All arguments are on the kernel stack, in the following format. * Never returns if successful. Current kernel context is lost. + * The saved CS is mapped as follows: + * RING0 -> RING3 kernel mode. + * RING1 -> RING3 kernel mode. + * RING2 -> RING3 kernel mode. + * RING3 -> RING3 user mode. + * However RING0 indicates that the guest kernel should return to iteself + * directly with + * orb $3,1*8(%rsp) + * iretq * If flags contains VGCF_IN_SYSCALL: - * Restore RAX, RIP, RFLAGS, RSP. + * Restore RAX, RIP, RFLAGS, RSP. * Discard R11, RCX, CS, SS. * Otherwise: * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. @@ -100,10 +109,19 @@ */ /* Guest exited in SYSCALL context? Return to guest with SYSRET? */ #define VGCF_IN_SYSCALL (1<<8) +struct iret_context { + /* Top of stack (%rsp at point of hypercall). */ + uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; + /* Bottom of iret stack frame. */ +}; +/* + * For compatibility with HYPERVISOR_switch_to_user which is the old + * name for HYPERVISOR_iret. + */ struct switch_to_user { /* Top of stack (%rsp at point of hypercall). */ uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; - /* Bottom of switch_to_user stack frame. */ + /* Bottom of iret stack frame. */ }; /* @@ -202,6 +220,7 @@ unsigned long max_pfn; /* max pfn that appears in table */ /* Frame containing list of mfns containing list of mfns containing p2m. */ unsigned long pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; } arch_shared_info_t; typedef struct { diff -r 642b26779c4e -r 4b8919585039 xen/include/public/xen.h --- a/xen/include/public/xen.h Thu Jan 12 12:13:34 2006 +++ b/xen/include/public/xen.h Thu Jan 12 12:20:04 2006 @@ -53,12 +53,14 @@ #define __HYPERVISOR_grant_table_op 20 #define __HYPERVISOR_vm_assist 21 #define __HYPERVISOR_update_va_mapping_otherdomain 22 -#define __HYPERVISOR_switch_vm86 23 /* x86/32 only */ -#define __HYPERVISOR_switch_to_user 23 /* x86/64 only */ +#define __HYPERVISOR_iret 23 /* x86 only */ +#define __HYPERVISOR_switch_vm86 23 /* x86/32 only (obsolete name) */ +#define __HYPERVISOR_switch_to_user 23 /* x86/64 only (obsolete name) */ #define __HYPERVISOR_vcpu_op 24 #define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ #define __HYPERVISOR_mmuext_op 26 #define __HYPERVISOR_acm_op 27 +#define __HYPERVISOR_nmi_op 28 /* * VIRTUAL INTERRUPTS @@ -69,10 +71,7 @@ #define VIRQ_DEBUG 1 /* Request guest to dump debug info. */ #define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ #define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ -#define VIRQ_PARITY_ERR 4 /* (DOM0) NMI parity error (port 0x61, bit 7). */ -#define VIRQ_IO_ERR 5 /* (DOM0) NMI I/O error (port 0x61, bit 6). */ #define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ -#define VIRQ_NMI 7 /* (DOM0) Unknown NMI (not from ISA port 0x61).*/ #define NR_VIRQS 8 /* diff -r 642b26779c4e -r 4b8919585039 xen/include/xen/domain_page.h --- a/xen/include/xen/domain_page.h Thu Jan 12 12:13:34 2006 +++ b/xen/include/xen/domain_page.h Thu Jan 12 12:20:04 2006 @@ -10,24 +10,19 @@ #include <xen/config.h> #include <xen/mm.h> -#define map_domain_page(pfn) map_domain_pages(pfn,0) -#define unmap_domain_page(va) unmap_domain_pages(va,0) - #ifdef CONFIG_DOMAIN_PAGE /* - * Maps a given range of page frames, returning the mapped virtual address. The - * pages are now accessible within the current VCPU until a corresponding - * call to unmap_domain_page(). + * Map a given page frame, returning the mapped virtual address. The page is + * then accessible within the current VCPU until a corresponding unmap call. */ -extern void *map_domain_pages(unsigned long pfn, unsigned int order); +extern void *map_domain_page(unsigned long pfn); /* - * Pass a VA within the first page of a range previously mapped in the context - * of the currently-executing VCPU via a call to map_domain_pages(). Those - * pages will then be removed from the mapping lists. + * Pass a VA within a page previously mapped in the context of the + * currently-executing VCPU via a call to map_domain_pages(). */ -extern void unmap_domain_pages(void *va, unsigned int order); +extern void unmap_domain_page(void *va); /* * Similar to the above calls, except the mapping is accessible in all @@ -97,8 +92,8 @@ #else /* !CONFIG_DOMAIN_PAGE */ -#define map_domain_pages(pfn,order) phys_to_virt((pfn)<<PAGE_SHIFT) -#define unmap_domain_pages(va,order) ((void)((void)(va),(void)(order))) +#define map_domain_page(pfn) phys_to_virt((pfn)<<PAGE_SHIFT) +#define unmap_domain_page(va) ((void)(va)) #define map_domain_page_global(pfn) phys_to_virt((pfn)<<PAGE_SHIFT) #define unmap_domain_page_global(va) ((void)(va)) diff -r 642b26779c4e -r 4b8919585039 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Thu Jan 12 12:13:34 2006 +++ b/xen/include/xen/sched.h Thu Jan 12 12:20:04 2006 @@ -80,6 +80,8 @@ /* Bitmask of CPUs on which this VCPU may run. */ cpumask_t cpu_affinity; + + unsigned long nmi_addr; /* NMI callback address. */ /* Bitmask of CPUs which are holding onto this VCPU's state. */ cpumask_t vcpu_dirty_cpumask; @@ -361,6 +363,12 @@ /* VCPU is not-runnable */ #define _VCPUF_down 5 #define VCPUF_down (1UL<<_VCPUF_down) + /* NMI callback pending for this VCPU? */ +#define _VCPUF_nmi_pending 8 +#define VCPUF_nmi_pending (1UL<<_VCPUF_nmi_pending) + /* Avoid NMI reentry by allowing NMIs to be masked for short periods. */ +#define _VCPUF_nmi_masked 9 +#define VCPUF_nmi_masked (1UL<<_VCPUF_nmi_masked) /* * Per-domain flags (domain_flags). diff -r 642b26779c4e -r 4b8919585039 xen/include/xen/softirq.h --- a/xen/include/xen/softirq.h Thu Jan 12 12:13:34 2006 +++ b/xen/include/xen/softirq.h Thu Jan 12 12:20:04 2006 @@ -6,7 +6,7 @@ #define SCHEDULE_SOFTIRQ 1 #define NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ 2 #define KEYPRESS_SOFTIRQ 3 -#define NMI_DOM0_SOFTIRQ 4 +#define NMI_SOFTIRQ 4 #define PAGE_SCRUB_SOFTIRQ 5 #define DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ 6 #define NR_SOFTIRQS 7 diff -r 642b26779c4e -r 4b8919585039 linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/mach_traps.h --- /dev/null Thu Jan 12 12:13:34 2006 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/mach_traps.h Thu Jan 12 12:20:04 2006 @@ -0,0 +1,33 @@ +/* + * include/asm-xen/asm-i386/mach-xen/mach_traps.h + * + * Machine specific NMI handling for Xen + */ +#ifndef _MACH_TRAPS_H +#define _MACH_TRAPS_H + +#include <linux/bitops.h> +#include <asm-xen/xen-public/nmi.h> + +static inline void clear_mem_error(unsigned char reason) {} +static inline void clear_io_check_error(unsigned char reason) {} + +static inline unsigned char get_nmi_reason(void) +{ + shared_info_t *s = HYPERVISOR_shared_info; + unsigned char reason = 0; + + /* construct a value which looks like it came from + * port 0x61. + */ + if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason)) + reason |= 0x40; + if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason)) + reason |= 0x80; + + return reason; +} + +static inline void reassert_nmi(void) {} + +#endif /* !_MACH_TRAPS_H */ diff -r 642b26779c4e -r 4b8919585039 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/nmi.h --- /dev/null Thu Jan 12 12:13:34 2006 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/nmi.h Thu Jan 12 12:20:04 2006 @@ -0,0 +1,75 @@ +/* + * linux/include/asm-i386/nmi.h + */ +#ifndef ASM_NMI_H +#define ASM_NMI_H + +#include <linux/pm.h> + +#include <asm-xen/xen-public/nmi.h> + +struct pt_regs; + +typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu); + +/** + * set_nmi_callback + * + * Set a handler for an NMI. Only one handler may be + * set. Return 1 if the NMI was handled. + */ +void set_nmi_callback(nmi_callback_t callback); + +/** + * unset_nmi_callback + * + * Remove the handler previously set. + */ +void unset_nmi_callback(void); + +#ifdef CONFIG_PM + +/** Replace the PM callback routine for NMI. */ +struct pm_dev * set_nmi_pm_callback(pm_callback callback); + +/** Unset the PM callback routine back to the default. */ +void unset_nmi_pm_callback(struct pm_dev * dev); + +#else + +static inline struct pm_dev * set_nmi_pm_callback(pm_callback callback) +{ + return 0; +} + +static inline void unset_nmi_pm_callback(struct pm_dev * dev) +{ +} + +#endif /* CONFIG_PM */ + +extern void default_do_nmi(struct pt_regs *); +extern void die_nmi(char *str, struct pt_regs *regs); + +static inline unsigned char get_nmi_reason(void) +{ + shared_info_t *s = HYPERVISOR_shared_info; + unsigned char reason = 0; + + /* construct a value which looks like it came from + * port 0x61. + */ + if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason)) + reason |= 0x40; + if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason)) + reason |= 0x80; + + return reason; +} + +extern int panic_on_timeout; +extern int unknown_nmi_panic; + +extern int check_nmi_watchdog(void); + +#endif /* ASM_NMI_H */ diff -r 642b26779c4e -r 4b8919585039 patches/linux-2.6.12/i386-mach-io-check-nmi.patch --- /dev/null Thu Jan 12 12:13:34 2006 +++ b/patches/linux-2.6.12/i386-mach-io-check-nmi.patch Thu Jan 12 12:20:04 2006 @@ -0,0 +1,43 @@ +--- ref-linux-2.6.12/arch/i386/kernel/traps.c 2005-12-19 09:23:44.000000000 +0000 ++++ linux-2.6.12-xen0/arch/i386/kernel/traps.c 2006-01-05 15:51:52.000000000 +0000 +@@ -521,18 +521,11 @@ + + static void io_check_error(unsigned char reason, struct pt_regs * regs) + { +- unsigned long i; +- + printk("NMI: IOCK error (debug interrupt?)\n"); + show_registers(regs); + + /* Re-enable the IOCK line, wait for a few seconds */ +- reason = (reason & 0xf) | 8; +- outb(reason, 0x61); +- i = 2000; +- while (--i) udelay(1000); +- reason &= ~8; +- outb(reason, 0x61); ++ clear_io_check_error(reason); + } + + static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +--- ref-linux-2.6.12/include/asm-i386/mach-default/mach_traps.h 2005-06-17 20:48:29.000000000 +0100 ++++ linux-2.6.12-xen0/include/asm-i386/mach-default/mach_traps.h 2006-01-05 15:52:33.000000000 +0000 +@@ -15,6 +15,18 @@ + outb(reason, 0x61); + } + ++static inline void clear_io_check_error(unsigned char reason) ++{ ++ unsigned long i; ++ ++ reason = (reason & 0xf) | 8; ++ outb(reason, 0x61); ++ i = 2000; ++ while (--i) udelay(1000); ++ reason &= ~8; ++ outb(reason, 0x61); ++} ++ + static inline unsigned char get_nmi_reason(void) + { + return inb(0x61); diff -r 642b26779c4e -r 4b8919585039 xen/include/public/nmi.h --- /dev/null Thu Jan 12 12:13:34 2006 +++ b/xen/include/public/nmi.h Thu Jan 12 12:20:04 2006 @@ -0,0 +1,54 @@ +/****************************************************************************** + * nmi.h + * + * NMI callback registration and reason codes. + * + * Copyright (c) 2005, Keir Fraser <keir@xxxxxxxxxxxxx> + */ + +#ifndef __XEN_PUBLIC_NMI_H__ +#define __XEN_PUBLIC_NMI_H__ + +/* + * NMI reason codes: + * Currently these are x86-specific, stored in arch_shared_info.nmi_reason. + */ + /* I/O-check error reported via ISA port 0x61, bit 6. */ +#define _XEN_NMIREASON_io_error 0 +#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error) + /* Parity error reported via ISA port 0x61, bit 7. */ +#define _XEN_NMIREASON_parity_error 1 +#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error) + /* Unknown hardware-generated NMI. */ +#define _XEN_NMIREASON_unknown 2 +#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown) + +/* + * long nmi_op(unsigned int cmd, void *arg) + * NB. All ops return zero on success, else a negative error code. + */ + +/* + * Register NMI callback for this (calling) VCPU. Currently this only makes + * sense for domain 0, vcpu 0. All other callers will be returned EINVAL. + * arg == address of callback function. + */ +#define XENNMI_register_callback 0 + +/* + * Deregister NMI callback for this (calling) VCPU. + * arg == NULL. + */ +#define XENNMI_unregister_callback 1 + +#endif /* __XEN_PUBLIC_NMI_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |