[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] merge
# HG changeset patch # User kaf24@xxxxxxxxxxxxxxxxxxxx # Node ID eee0489b3a174dd53e56e975bc44c46933697215 # Parent 7eac3edd0589e7077932f1bbc207c6e33d15d97b # Parent 8cf7d1d715f46076d5c0c88dede313851cf98d43 merge diff -r 7eac3edd0589 -r eee0489b3a17 extras/mini-os/include/events.h --- a/extras/mini-os/include/events.h Tue Oct 25 03:00:35 2005 +++ b/extras/mini-os/include/events.h Sat Oct 29 08:51:35 2005 @@ -47,7 +47,7 @@ { evtchn_op_t op; op.cmd = EVTCHNOP_send; - op.u.send.local_port = port; + op.u.send.port = port; return HYPERVISOR_event_channel_op(&op); } diff -r 7eac3edd0589 -r eee0489b3a17 extras/mini-os/include/hypervisor.h --- a/extras/mini-os/include/hypervisor.h Tue Oct 25 03:00:35 2005 +++ b/extras/mini-os/include/hypervisor.h Sat Oct 29 08:51:35 2005 @@ -14,6 +14,7 @@ #include <types.h> #include <xen/xen.h> +#include <xen/dom0_ops.h> /* * a placeholder for the start of day information passed up from the hypervisor @@ -37,548 +38,281 @@ * Assembler stubs for hyper-calls. */ #if defined(__i386__) +/* Taken from Linux */ + +#ifndef __HYPERCALL_H__ +#define __HYPERCALL_H__ + +#include <xen/sched.h> + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res) \ + : "0" (__HYPERVISOR_##name) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)), "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)), "3" ((long)(a3)), \ + "4" ((long)(a4)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)), "3" ((long)(a3)), \ + "4" ((long)(a4)), "5" ((long)(a5)) \ + : "memory" ); \ + (type)__res; \ +}) + static inline int HYPERVISOR_set_trap_table( - trap_info_t *table) -{ - int ret; - unsigned long ignore; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ignore) - : "0" (__HYPERVISOR_set_trap_table), "1" (table) - : "memory" ); - - return ret; + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); } static inline int HYPERVISOR_mmu_update( - mmu_update_t *req, int count, int *success_count, domid_t domid) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count), - "3" (success_count), "4" (domid) - : "memory" ); - - return ret; + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); } static inline int HYPERVISOR_mmuext_op( - struct mmuext_op *op, int count, int *success_count, domid_t domid) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count), - "3" (success_count), "4" (domid) - : "memory" ); - - return ret; + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); } static inline int HYPERVISOR_set_gdt( - unsigned long *frame_list, int entries) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries) - : "memory" ); - - - return ret; + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); } static inline int HYPERVISOR_stack_switch( - unsigned long ss, unsigned long esp) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_stack_switch), "1" (ss), "2" (esp) - : "memory" ); - - return ret; + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); } static inline int HYPERVISOR_set_callbacks( - unsigned long event_selector, unsigned long event_address, - unsigned long failsafe_selector, unsigned long failsafe_address) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_set_callbacks), "1" (event_selector), - "2" (event_address), "3" (failsafe_selector), "4" (failsafe_address) - : "memory" ); - - return ret; + unsigned long event_selector, unsigned long event_address, + unsigned long failsafe_selector, unsigned long failsafe_address) +{ + return _hypercall4(int, set_callbacks, + event_selector, event_address, + failsafe_selector, failsafe_address); } static inline int HYPERVISOR_fpu_taskswitch( - int set) -{ - int ret; - unsigned long ign; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_fpu_taskswitch), "1" (set) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_yield( - void) -{ - int ret; - unsigned long ign; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield) - : "memory", "ecx" ); - - return ret; -} - -static inline int -HYPERVISOR_block( - void) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block) - : "memory", "ecx" ); - - return ret; -} - -static inline int -HYPERVISOR_shutdown( - void) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift)) - : "memory", "ecx" ); - - return ret; -} - -static inline int -HYPERVISOR_reboot( - void) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift)) - : "memory", "ecx" ); - - return ret; -} - -static inline int -HYPERVISOR_suspend( - unsigned long srec) -{ - int ret; - unsigned long ign1, ign2; - - /* NB. On suspend, control software expects a suspend record in %esi. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=S" (ign2) - : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), - "S" (srec) : "memory", "ecx"); - - return ret; -} - -static inline int -HYPERVISOR_crash( - void) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift)) - : "memory", "ecx" ); - - return ret; + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op, cmd, arg); } static inline long HYPERVISOR_set_timer_op( - u64 timeout) -{ - int ret; - unsigned long timeout_hi = (unsigned long)(timeout>>32); - unsigned long timeout_lo = (unsigned long)timeout; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_set_timer_op), "b" (timeout_lo), "c" (timeout_hi) - : "memory"); - - return ret; -} - -#if 0 + u64 timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} + static inline int HYPERVISOR_dom0_op( - dom0_op_t *dom0_op) -{ - int ret; - unsigned long ign1; - - dom0_op->interface_version = DOM0_INTERFACE_VERSION; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_dom0_op), "1" (dom0_op) - : "memory"); - - return ret; -} -#endif + dom0_op_t *dom0_op) +{ + dom0_op->interface_version = DOM0_INTERFACE_VERSION; + return _hypercall1(int, dom0_op, dom0_op); +} static inline int HYPERVISOR_set_debugreg( - int reg, unsigned long value) -{ - int ret; - unsigned long ign1, ign2; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_set_debugreg), "1" (reg), "2" (value) - : "memory" ); - - return ret; + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); } static inline unsigned long HYPERVISOR_get_debugreg( - int reg) -{ - unsigned long ret; - unsigned long ign; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_get_debugreg), "1" (reg) - : "memory" ); - - return ret; + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); } static inline int HYPERVISOR_update_descriptor( - u64 ma, u64 desc) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_update_descriptor), - "1" ((unsigned long)ma), "2" ((unsigned long)(ma>>32)), - "3" ((unsigned long)desc), "4" ((unsigned long)(desc>>32)) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_dom_mem_op( - unsigned int op, unsigned long *extent_list, - unsigned long nr_extents, unsigned int extent_order) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4, ign5; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4), - "=D" (ign5) - : "0" (__HYPERVISOR_dom_mem_op), "1" (op), "2" (extent_list), - "3" (nr_extents), "4" (extent_order), "5" (DOMID_SELF) - : "memory" ); - - return ret; + u64 ma, u64 desc) +{ + return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); } static inline int HYPERVISOR_multicall( - void *call_list, int nr_calls) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_multicall), "1" (call_list), "2" (nr_calls) - : "memory" ); - - return ret; + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); } static inline int HYPERVISOR_update_va_mapping( - unsigned long va, pte_t new_val, unsigned long flags) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_update_va_mapping), - "1" (va), "2" ((new_val).pte_low), + unsigned long va, pte_t new_val, unsigned long flags) +{ + unsigned long pte_hi = 0; #ifdef CONFIG_X86_PAE - "3" ((new_val).pte_high), -#else - "3" (0), + pte_hi = new_val.pte_high; #endif - "4" (flags) - : "memory" ); - - return ret; + return _hypercall4(int, update_va_mapping, va, + new_val.pte_low, pte_hi, flags); } static inline int HYPERVISOR_event_channel_op( - void *op) -{ - int ret; - unsigned long ignore; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ignore) - : "0" (__HYPERVISOR_event_channel_op), "1" (op) - : "memory" ); - - return ret; + void *op) +{ + return _hypercall1(int, event_channel_op, op); } static inline int HYPERVISOR_xen_version( - int cmd, void *arg) -{ - int ret; - unsigned long ignore, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ignore), "=c" (ign2) - : "0" (__HYPERVISOR_xen_version), "1" (cmd), "2" (arg) - : "memory" ); - - return ret; + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); } static inline int HYPERVISOR_console_io( - int cmd, int count, char *str) -{ - int ret; - unsigned long ign1, ign2, ign3; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) - : "0" (__HYPERVISOR_console_io), "1" (cmd), "2" (count), "3" (str) - : "memory" ); - - return ret; + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); } static inline int HYPERVISOR_physdev_op( - void *physdev_op) -{ - int ret; - unsigned long ign; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op) - : "memory" ); - - return ret; + void *physdev_op) +{ + return _hypercall1(int, physdev_op, physdev_op); } static inline int HYPERVISOR_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) -{ - int ret; - unsigned long ign1, ign2, ign3; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) - : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (uop), "3" (count) - : "memory" ); - - return ret; + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); } static inline int HYPERVISOR_update_va_mapping_otherdomain( - unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4, ign5; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), - "=S" (ign4), "=D" (ign5) - : "0" (__HYPERVISOR_update_va_mapping_otherdomain), - "1" (va), "2" ((new_val).pte_low), + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + unsigned long pte_hi = 0; #ifdef CONFIG_X86_PAE - "3" ((new_val).pte_high), -#else - "3" (0), + pte_hi = new_val.pte_high; #endif - "4" (flags), "5" (domid) : - "memory" ); - - return ret; + return _hypercall5(int, update_va_mapping_otherdomain, va, + new_val.pte_low, pte_hi, flags, domid); } static inline int HYPERVISOR_vm_assist( - unsigned int cmd, unsigned int type) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_vm_assist), "1" (cmd), "2" (type) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_boot_vcpu( - unsigned long vcpu, vcpu_guest_context_t *ctxt) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt) - : "memory"); - - return ret; -} - -static inline int -HYPERVISOR_vcpu_down( - int vcpu) -{ - int ret; - unsigned long ign1; - /* Yes, I really do want to clobber edx here: when we resume a - vcpu after unpickling a multi-processor domain, it returns - here, but clobbers all of the call clobbered registers. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift)) - : "memory", "ecx", "edx" ); - - return ret; -} - -static inline int -HYPERVISOR_vcpu_up( - int vcpu) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift)) - : "memory", "ecx" ); - - return ret; -} - -static inline int -HYPERVISOR_vcpu_pickle( - int vcpu, vcpu_guest_context_t *ctxt) -{ - int ret; - unsigned long ign1, ign2; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)), - "2" (ctxt) - : "memory" ); - - return ret; -} + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +} + +#endif /* __HYPERCALL_H__ */ #elif defined(__x86_64__) #define __syscall_clobber "r11","rcx","memory" @@ -792,106 +526,4 @@ } #endif - -static __inline__ int HYPERVISOR_dom0_op(void *dom0_op) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_dom0_op), - _a1 (dom0_op) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_set_debugreg(int reg, unsigned long value) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_debugreg), - _a1 (reg), _a2 (value) : "memory" ); - - return ret; -} - -static __inline__ unsigned long HYPERVISOR_get_debugreg(int reg) -{ - unsigned long ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_get_debugreg), - _a1 (reg) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_update_descriptor( - unsigned long pa, unsigned long word1, unsigned long word2) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_update_descriptor), - _a1 (pa), _a2 (word1), _a3 (word2) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_dom_mem_op(void *dom_mem_op) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_memory_op), - _a1 (dom_mem_op) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_multicall(void *call_list, int nr_calls) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_multicall), - _a1 (call_list), _a2 (nr_calls) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_update_va_mapping( - unsigned long page_nr, unsigned long new_val, unsigned long flags) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping), - _a1 (page_nr), _a2 (new_val), _a3 (flags) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_xen_version(int cmd) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_xen_version), - _a1 (cmd) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_console_io(int cmd, int count, char *str) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_console_io), - _a1 (cmd), _a2 (count), _a3 (str) : "memory" ); - - return ret; -} - #endif /* __HYPERVISOR_H__ */ diff -r 7eac3edd0589 -r eee0489b3a17 extras/mini-os/include/os.h --- a/extras/mini-os/include/os.h Tue Oct 25 03:00:35 2005 +++ b/extras/mini-os/include/os.h Sat Oct 29 08:51:35 2005 @@ -24,7 +24,7 @@ #include <xen/xen.h> -#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0)) +#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0, 0)) #define __KERNEL_CS FLAT_KERNEL_CS #define __KERNEL_DS FLAT_KERNEL_DS @@ -55,6 +55,8 @@ /* Everything below this point is not included by assembler (.S) files. */ #ifndef __ASSEMBLY__ +extern shared_info_t *HYPERVISOR_shared_info; + void trap_init(void); /* diff -r 7eac3edd0589 -r eee0489b3a17 extras/mini-os/include/types.h --- a/extras/mini-os/include/types.h Tue Oct 25 03:00:35 2005 +++ b/extras/mini-os/include/types.h Sat Oct 29 08:51:35 2005 @@ -54,7 +54,14 @@ typedef struct { unsigned long pte; } pte_t; #endif - +typedef u8 uint8_t; +typedef s8 int8_t; +typedef u16 uint16_t; +typedef s16 int16_t; +typedef u32 uint32_t; +typedef s32 int32_t; +typedef u64 uint64_t; +typedef s64 int64_t; #define INT_MAX ((int)(~0U>>1)) diff -r 7eac3edd0589 -r eee0489b3a17 extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Tue Oct 25 03:00:35 2005 +++ b/extras/mini-os/kernel.c Sat Oct 29 08:51:35 2005 @@ -61,7 +61,7 @@ extern char shared_info[PAGE_SIZE]; -#define __pte(x) ((pte_t) { (0) } ) +#define __pte(x) ((pte_t) { (x) } ) static shared_info_t *map_shared_info(unsigned long pa) { @@ -150,5 +150,5 @@ void do_exit(void) { printk("do_exit called!\n"); - for ( ;; ) HYPERVISOR_shutdown(); + for ( ;; ) HYPERVISOR_sched_op(SCHEDOP_shutdown, SHUTDOWN_crash); } diff -r 7eac3edd0589 -r eee0489b3a17 extras/mini-os/time.c --- a/extras/mini-os/time.c Tue Oct 25 03:00:35 2005 +++ b/extras/mini-os/time.c Sat Oct 29 08:51:35 2005 @@ -208,7 +208,7 @@ struct timeval tv; gettimeofday(&tv); HYPERVISOR_set_timer_op(monotonic_clock() + 1000000LL * (s64) millisecs); - HYPERVISOR_block(); + HYPERVISOR_sched_op(SCHEDOP_block, 0); } diff -r 7eac3edd0589 -r eee0489b3a17 extras/mini-os/xenbus/xenbus_xs.c --- a/extras/mini-os/xenbus/xenbus_xs.c Tue Oct 25 03:00:35 2005 +++ b/extras/mini-os/xenbus/xenbus_xs.c Sat Oct 29 08:51:35 2005 @@ -39,7 +39,7 @@ #include <wait.h> #include <sched.h> #include <semaphore.h> -#include "xenstored.h" +#include <xen/io/xs_wire.h> #include "xenbus_comms.h" #define streq(a, b) (strcmp((a), (b)) == 0) @@ -408,7 +408,12 @@ static int xs_acknowledge_watch(const char *token) { +#if 0 return xs_error(xs_single(XS_WATCH_ACK, token, NULL)); +#else + /* XS_WATCH_ACK is no longer available */ + return 0; +#endif } static int xs_unwatch(const char *path, const char *token) diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Sat Oct 29 08:51:35 2005 @@ -733,6 +733,7 @@ #ifdef CONFIG_XEN { int i, j, k, fpp; + unsigned long va; /* Make sure we have a large enough P->M table. */ phys_to_machine_mapping = alloc_bootmem( @@ -746,9 +747,21 @@ __pa(xen_start_info->mfn_list), PFN_PHYS(PFN_UP(xen_start_info->nr_pages * sizeof(unsigned long)))); - make_pages_readonly((void *)xen_start_info->mfn_list, - PFN_UP(xen_start_info->nr_pages * - sizeof(unsigned long))); + + /* 'Initial mapping' of old p2m table must be destroyed. */ + for (va = xen_start_info->mfn_list; + va < (xen_start_info->mfn_list + + (xen_start_info->nr_pages*sizeof(unsigned long))); + va += PAGE_SIZE) { + HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0); + } + + /* 'Initial mapping' of initrd must be destroyed. */ + for (va = xen_start_info->mod_start; + va < (xen_start_info->mod_start+xen_start_info->mod_len); + va += PAGE_SIZE) { + HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0); + } /* * Initialise the list of the frames that specify the list of diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S Sat Oct 29 08:51:35 2005 @@ -5,7 +5,7 @@ #define evtchn_upcall_pending 0 #define evtchn_upcall_mask 1 -#define sizeof_vcpu_shift 3 +#define sizeof_vcpu_shift 4 #ifdef CONFIG_SMP //#define preempt_disable(reg) incl threadinfo_preempt_count(reg) diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Sat Oct 29 08:51:35 2005 @@ -70,6 +70,9 @@ static unsigned long current_pages; static unsigned long target_pages; +/* VM /proc information for memory */ +extern unsigned long totalram_pages; + /* We may hit the hard limit in Xen. If we do then we remember it. */ static unsigned long hard_limit; @@ -188,12 +191,13 @@ rc = HYPERVISOR_memory_op( XENMEM_increase_reservation, &reservation); if (rc < nr_pages) { + int ret; /* We hit the Xen hard limit: reprobe. */ reservation.extent_start = mfn_list; reservation.nr_extents = rc; - BUG_ON(HYPERVISOR_memory_op( - XENMEM_decrease_reservation, - &reservation) != rc); + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation); + BUG_ON(ret != rc); hard_limit = current_pages + rc - driver_pages; goto out; } @@ -210,11 +214,14 @@ xen_machphys_update(mfn_list[i], pfn); /* Link back into the page tables if not highmem. */ - if (pfn < max_low_pfn) - BUG_ON(HYPERVISOR_update_va_mapping( + if (pfn < max_low_pfn) { + int ret; + ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), pfn_pte_ma(mfn_list[i], PAGE_KERNEL), - 0)); + 0); + BUG_ON(ret); + } /* Relinquish the page back to the allocator. */ ClearPageReserved(page); @@ -223,6 +230,7 @@ } current_pages += nr_pages; + totalram_pages = current_pages; out: balloon_unlock(flags); @@ -238,6 +246,7 @@ struct page *page; void *v; int need_sleep = 0; + int ret; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, @@ -264,8 +273,9 @@ if (!PageHighMem(page)) { v = phys_to_virt(pfn << PAGE_SHIFT); scrub_pages(v, 1); - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)v, __pte_ma(0), 0)); + ret = HYPERVISOR_update_va_mapping( + (unsigned long)v, __pte_ma(0), 0); + BUG_ON(ret); } #ifdef CONFIG_XEN_SCRUB_PAGES else { @@ -291,10 +301,11 @@ reservation.extent_start = mfn_list; reservation.nr_extents = nr_pages; - BUG_ON(HYPERVISOR_memory_op( - XENMEM_decrease_reservation, &reservation) != nr_pages); + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + BUG_ON(ret != nr_pages); current_pages -= nr_pages; + totalram_pages = current_pages; balloon_unlock(flags); @@ -496,6 +507,7 @@ pte_t *pte, struct page *pte_page, unsigned long addr, void *data) { unsigned long mfn = pte_mfn(*pte); + int ret; struct xen_memory_reservation reservation = { .extent_start = &mfn, .nr_extents = 1, @@ -505,8 +517,8 @@ set_pte_at(&init_mm, addr, pte, __pte_ma(0)); phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] = INVALID_P2M_ENTRY; - BUG_ON(HYPERVISOR_memory_op( - XENMEM_decrease_reservation, &reservation) != 1); + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + BUG_ON(ret != 1); return 0; } @@ -514,6 +526,7 @@ { unsigned long vstart, flags; unsigned int order = get_order(nr_pages * PAGE_SIZE); + int ret; vstart = __get_free_pages(GFP_KERNEL, order); if (vstart == 0) @@ -522,8 +535,9 @@ scrub_pages(vstart, 1 << order); balloon_lock(flags); - BUG_ON(generic_page_range( - &init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL)); + ret = generic_page_range( + &init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL); + BUG_ON(ret); current_pages -= 1UL << order; balloon_unlock(flags); diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Sat Oct 29 08:51:35 2005 @@ -108,6 +108,7 @@ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; u16 handle; + int ret; for (i = 0; i < nr_pages; i++) { handle = pending_handle(idx, i); @@ -120,8 +121,9 @@ invcount++; } - BUG_ON(HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, unmap, invcount)); + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); } @@ -338,6 +340,7 @@ struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int nbio = 0; request_queue_t *q; + int ret; /* Check that number of segments is sane. */ nseg = req->nr_segments; @@ -367,8 +370,8 @@ map[i].flags |= GNTMAP_readonly; } - BUG_ON(HYPERVISOR_grant_table_op( - GNTTABOP_map_grant_ref, map, nseg)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); + BUG_ON(ret); for (i = 0; i < nseg; i++) { if (unlikely(map[i].handle < 0)) { @@ -493,6 +496,7 @@ { int i; struct page *page; + int ret; blkif_interface_init(); @@ -509,7 +513,8 @@ spin_lock_init(&blkio_schedule_list_lock); INIT_LIST_HEAD(&blkio_schedule_list); - BUG_ON(kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0); + ret = kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES); + BUG_ON(ret < 0); blkif_xenbus_init(); diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/drivers/xen/blkback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Sat Oct 29 08:51:35 2005 @@ -31,6 +31,7 @@ static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) { struct gnttab_map_grant_ref op; + int ret; op.host_addr = (unsigned long)blkif->blk_ring_area->addr; op.flags = GNTMAP_host_map; @@ -38,8 +39,9 @@ op.dom = blkif->domid; lock_vm_area(blkif->blk_ring_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); unlock_vm_area(blkif->blk_ring_area); + BUG_ON(ret); if (op.handle < 0) { DPRINTK(" Grant table operation failure !\n"); @@ -55,14 +57,16 @@ static void unmap_frontend_page(blkif_t *blkif) { struct gnttab_unmap_grant_ref op; + int ret; op.host_addr = (unsigned long)blkif->blk_ring_area->addr; op.handle = blkif->shmem_handle; op.dev_bus_addr = 0; lock_vm_area(blkif->blk_ring_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); unlock_vm_area(blkif->blk_ring_area); + BUG_ON(ret); } int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Sat Oct 29 08:51:35 2005 @@ -305,6 +305,7 @@ for (i = info->ring.rsp_cons; i != rp; i++) { unsigned long id; + int ret; bret = RING_GET_RESPONSE(&info->ring, i); id = bret->id; @@ -321,9 +322,10 @@ DPRINTK("Bad return from blkdev data " "request: %x\n", bret->status); - BUG_ON(end_that_request_first( + ret = end_that_request_first( req, (bret->status == BLKIF_RSP_OKAY), - req->hard_nr_sectors)); + req->hard_nr_sectors); + BUG_ON(ret); end_that_request_last(req); break; default: diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Sat Oct 29 08:51:35 2005 @@ -413,6 +413,7 @@ unsigned int i, op = 0; struct grant_handle_pair *handle; unsigned long ptep; + int ret; for ( i = 0; i < nr_pages; i++) { @@ -440,8 +441,8 @@ BLKTAP_INVALIDATE_HANDLE(handle); } - BUG_ON(HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, unmap, op)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, op); + BUG_ON(ret); if (blktap_vma != NULL) zap_page_range(blktap_vma, @@ -673,6 +674,7 @@ struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; int op, ret; unsigned int nseg; + int retval; /* Check that number of segments is sane. */ nseg = req->nr_segments; @@ -740,8 +742,8 @@ op++; } - BUG_ON(HYPERVISOR_grant_table_op( - GNTTABOP_map_grant_ref, map, op)); + retval = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op); + BUG_ON(retval); op = 0; for (i = 0; i < (req->nr_segments*2); i += 2) { @@ -877,7 +879,8 @@ spin_lock_init(&blkio_schedule_list_lock); INIT_LIST_HEAD(&blkio_schedule_list); - BUG_ON(kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0); + i = kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES); + BUG_ON(i<0); blkif_xenbus_init(); diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/drivers/xen/blktap/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Sat Oct 29 08:51:35 2005 @@ -31,6 +31,7 @@ static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) { struct gnttab_map_grant_ref op; + int ret; op.host_addr = (unsigned long)blkif->blk_ring_area->addr; op.flags = GNTMAP_host_map; @@ -38,8 +39,9 @@ op.dom = blkif->domid; lock_vm_area(blkif->blk_ring_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); unlock_vm_area(blkif->blk_ring_area); + BUG_ON(ret); if (op.handle < 0) { DPRINTK(" Grant table operation failure !\n"); @@ -55,14 +57,16 @@ static void unmap_frontend_page(blkif_t *blkif) { struct gnttab_unmap_grant_ref op; + int ret; op.host_addr = (unsigned long)blkif->blk_ring_area->addr; op.handle = blkif->shmem_handle; op.dev_bus_addr = 0; lock_vm_area(blkif->blk_ring_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); unlock_vm_area(blkif->blk_ring_area); + BUG_ON(ret); } int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c --- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Sat Oct 29 08:51:35 2005 @@ -282,6 +282,7 @@ case IOCTL_EVTCHN_UNBIND: { struct ioctl_evtchn_unbind unbind; + int ret; rc = -EFAULT; if (copy_from_user(&unbind, (void *)arg, sizeof(unbind))) @@ -306,7 +307,8 @@ op.cmd = EVTCHNOP_close; op.u.close.port = unbind.port; - BUG_ON(HYPERVISOR_event_channel_op(&op)); + ret = HYPERVISOR_event_channel_op(&op); + BUG_ON(ret); rc = 0; break; @@ -399,6 +401,7 @@ for (i = 0; i < NR_EVENT_CHANNELS; i++) { + int ret; if (port_user[i] != u) continue; @@ -407,7 +410,8 @@ op.cmd = EVTCHNOP_close; op.u.close.port = i; - BUG_ON(HYPERVISOR_event_channel_op(&op)); + ret = HYPERVISOR_event_channel_op(&op); + BUG_ON(ret); } spin_unlock_irq(&port_user_lock); diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/drivers/xen/netback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Sat Oct 29 08:51:35 2005 @@ -115,6 +115,7 @@ netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref) { struct gnttab_map_grant_ref op; + int ret; op.host_addr = (unsigned long)netif->comms_area->addr; op.flags = GNTMAP_host_map; @@ -122,8 +123,9 @@ op.dom = netif->domid; lock_vm_area(netif->comms_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); unlock_vm_area(netif->comms_area); + BUG_ON(ret); if (op.handle < 0) { DPRINTK(" Gnttab failure mapping tx_ring_ref!\n"); @@ -139,8 +141,9 @@ op.dom = netif->domid; lock_vm_area(netif->comms_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); unlock_vm_area(netif->comms_area); + BUG_ON(ret); if (op.handle < 0) { DPRINTK(" Gnttab failure mapping rx_ring_ref!\n"); @@ -156,22 +159,25 @@ static void unmap_frontend_pages(netif_t *netif) { struct gnttab_unmap_grant_ref op; + int ret; op.host_addr = (unsigned long)netif->comms_area->addr; op.handle = netif->tx_shmem_handle; op.dev_bus_addr = 0; lock_vm_area(netif->comms_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); unlock_vm_area(netif->comms_area); + BUG_ON(ret); op.host_addr = (unsigned long)netif->comms_area->addr + PAGE_SIZE; op.handle = netif->rx_shmem_handle; op.dev_bus_addr = 0; lock_vm_area(netif->comms_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); unlock_vm_area(netif->comms_area); + BUG_ON(ret); } int netif_map(netif_t *netif, unsigned long tx_ring_ref, diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Sat Oct 29 08:51:35 2005 @@ -112,9 +112,12 @@ spin_lock_irqsave(&mfn_lock, flags); if ( alloc_index != MAX_MFN_ALLOC ) mfn_list[alloc_index++] = mfn; - else - BUG_ON(HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation) != 1); + else { + int ret; + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation); + BUG_ON(ret != 1); + } spin_unlock_irqrestore(&mfn_lock, flags); } #endif @@ -159,13 +162,15 @@ */ if (skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb)) { int hlen = skb->data - skb->head; + int ret; struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len); if ( unlikely(nskb == NULL) ) goto drop; skb_reserve(nskb, hlen); __skb_put(nskb, skb->len); - BUG_ON(skb_copy_bits(skb, -hlen, nskb->data - hlen, - skb->len + hlen)); + ret = skb_copy_bits(skb, -hlen, nskb->data - hlen, + skb->len + hlen); + BUG_ON(ret); nskb->dev = skb->dev; nskb->proto_csum_valid = skb->proto_csum_valid; dev_kfree_skb(skb); @@ -218,6 +223,7 @@ struct sk_buff *skb; u16 notify_list[NETIF_RX_RING_SIZE]; int notify_nr = 0; + int ret; skb_queue_head_init(&rxq); @@ -279,7 +285,8 @@ mcl++; mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; - BUG_ON(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0); + ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); + BUG_ON(ret != 0); mcl = rx_mcl; if( HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, @@ -421,6 +428,7 @@ u16 pending_idx; PEND_RING_IDX dc, dp; netif_t *netif; + int ret; dc = dealloc_cons; dp = dealloc_prod; @@ -436,8 +444,9 @@ gop->handle = grant_tx_ref[pending_idx]; gop++; } - BUG_ON(HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops)); + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops); + BUG_ON(ret); while (dealloc_cons != dp) { pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)]; @@ -477,6 +486,7 @@ NETIF_RING_IDX i; gnttab_map_grant_ref_t *mop; unsigned int data_len; + int ret; if (dealloc_cons != dealloc_prod) net_tx_action_dealloc(); @@ -599,8 +609,9 @@ if (mop == tx_map_ops) return; - BUG_ON(HYPERVISOR_grant_table_op( - GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops)); + ret = HYPERVISOR_grant_table_op( + GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops); + BUG_ON(ret); mop = tx_map_ops; while ((skb = __skb_dequeue(&tx_queue)) != NULL) { diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Sat Oct 29 08:51:35 2005 @@ -78,6 +78,7 @@ static int map_frontend_page(tpmif_t *tpmif, unsigned long shared_page) { + int ret; struct gnttab_map_grant_ref op = { .host_addr = (unsigned long)tpmif->tx_area->addr, .flags = GNTMAP_host_map, @@ -86,8 +87,9 @@ }; lock_vm_area(tpmif->tx_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); unlock_vm_area(tpmif->tx_area); + BUG_ON(ret); if (op.handle < 0) { DPRINTK(" Grant table operation failure !\n"); @@ -104,14 +106,16 @@ unmap_frontend_page(tpmif_t *tpmif) { struct gnttab_unmap_grant_ref op; + int ret; op.host_addr = (unsigned long)tpmif->tx_area->addr; op.handle = tpmif->shmem_handle; op.dev_bus_addr = 0; lock_vm_area(tpmif->tx_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); unlock_vm_area(tpmif->tx_area); + BUG_ON(ret); } int diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/drivers/xen/util.c --- a/linux-2.6-xen-sparse/drivers/xen/util.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/util.c Sat Oct 29 08:51:35 2005 @@ -34,7 +34,9 @@ void free_vm_area(struct vm_struct *area) { - BUG_ON(remove_vm_area(area->addr) != area); + struct vm_struct *ret; + ret = remove_vm_area(area->addr); + BUG_ON(ret != area); kfree(area); } diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Sat Oct 29 08:51:35 2005 @@ -738,6 +738,7 @@ unsigned long page; evtchn_op_t op = { 0 }; + int ret; /* Allocate page. */ @@ -758,7 +759,8 @@ op.u.alloc_unbound.dom = DOMID_SELF; op.u.alloc_unbound.remote_dom = 0; - BUG_ON(HYPERVISOR_event_channel_op(&op)); + ret = HYPERVISOR_event_channel_op(&op); + BUG_ON(ret); xen_start_info->store_evtchn = op.u.alloc_unbound.port; /* And finally publish the above info in /proc/xen */ diff -r 7eac3edd0589 -r eee0489b3a17 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Tue Oct 25 03:00:35 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Sat Oct 29 08:51:35 2005 @@ -82,7 +82,7 @@ static LIST_HEAD(watches); static DEFINE_SPINLOCK(watches_lock); -/* List of pending watch calbback events, and a lock to protect it. */ +/* List of pending watch callback events, and a lock to protect it. */ static LIST_HEAD(watch_events); static DEFINE_SPINLOCK(watch_events_lock); @@ -544,11 +544,8 @@ char *printf_buffer = NULL, *path_buffer = NULL; printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); - if (printf_buffer == NULL) { - printk("xenbus: failed to write error node for %s (%d): %d\n", - dev->nodename, err, errno); + if (printf_buffer == NULL) goto fail; - } len = sprintf(printf_buffer, "%i ", -err); va_start(ap, fmt); @@ -561,8 +558,8 @@ path_buffer = error_path(dev); if (path_buffer == NULL) { - printk("xenbus: failed to write error node for %s (%s): %d\n", - dev->nodename, printf_buffer, errno); + printk("xenbus: failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); goto fail; } @@ -587,8 +584,8 @@ char *path_buffer = error_path(dev); if (path_buffer == NULL) { - printk("xenbus: failed to clear error node for %s: " - "%d\n", dev->nodename, errno); + printk("xenbus: failed to clear error node for %s\n", + dev->nodename); return; } diff -r 7eac3edd0589 -r eee0489b3a17 tools/Makefile --- a/tools/Makefile Tue Oct 25 03:00:35 2005 +++ b/tools/Makefile Sat Oct 29 08:51:35 2005 @@ -16,9 +16,6 @@ SUBDIRS += vtpm endif SUBDIRS += xenstat - -.PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean - # These don't cross-compile ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH)) SUBDIRS += python diff -r 7eac3edd0589 -r eee0489b3a17 tools/console/daemon/io.c --- a/tools/console/daemon/io.c Tue Oct 25 03:00:35 2005 +++ b/tools/console/daemon/io.c Sat Oct 29 08:51:35 2005 @@ -512,7 +512,9 @@ enum_domains(); else if (sscanf(vec[XS_WATCH_TOKEN], "dom%u", &domid) == 1) { dom = lookup_domain(domid); - if (dom->is_dead == false) + /* We may get watches firing for domains that have recently + been removed, so dom may be NULL here. */ + if (dom && dom->is_dead == false) domain_create_ring(dom); } diff -r 7eac3edd0589 -r eee0489b3a17 tools/examples/Makefile --- a/tools/examples/Makefile Tue Oct 25 03:00:35 2005 +++ b/tools/examples/Makefile Sat Oct 29 08:51:35 2005 @@ -9,6 +9,7 @@ # Init scripts. XEND_INITD = init.d/xend XENDOMAINS_INITD = init.d/xendomains +XENDOMAINS_SYSCONFIG = init.d/sysconfig.xendomains # Xen configuration dir and configs to go there. XEN_CONFIG_DIR = /etc/xen @@ -24,8 +25,9 @@ XEN_SCRIPTS += network-route vif-route XEN_SCRIPTS += network-nat vif-nat XEN_SCRIPTS += block -XEN_SCRIPTS += block-enbd +XEN_SCRIPTS += block-enbd block-nbd XEN_SCRIPTS += xen-hotplug-common.sh xen-network-common.sh vif-common.sh +XEN_SCRIPTS += block-common.sh XEN_HOTPLUG_DIR = /etc/hotplug XEN_HOTPLUG_SCRIPTS = xen-backend.agent @@ -52,8 +54,10 @@ install-initd: [ -d $(DESTDIR)/etc/init.d ] || $(INSTALL_DIR) $(DESTDIR)/etc/init.d + [ -d $(DESTDIR)/etc/sysconfig ] || $(INSTALL_DIR) $(DESTDIR)/etc/sysconfig $(INSTALL_PROG) $(XEND_INITD) $(DESTDIR)/etc/init.d $(INSTALL_PROG) $(XENDOMAINS_INITD) $(DESTDIR)/etc/init.d + $(INSTALL_PROG) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)/etc/sysconfig/xendomains install-configs: $(XEN_CONFIGS) [ -d $(DESTDIR)$(XEN_CONFIG_DIR) ] || \ diff -r 7eac3edd0589 -r eee0489b3a17 tools/examples/block --- a/tools/examples/block Tue Oct 25 03:00:35 2005 +++ b/tools/examples/block Sat Oct 29 08:51:35 2005 @@ -1,7 +1,7 @@ #!/bin/sh dir=$(dirname "$0") -. "$dir/xen-hotplug-common.sh" +. "$dir/block-common.sh" expand_dev() { local dev @@ -16,21 +16,9 @@ echo -n $dev } -write_dev() { - local major - local minor - local pdev - - major=$(stat -L -c %t "$1") - minor=$(stat -L -c %T "$1") - pdev=$(printf "0x%02x%02x" 0x$major 0x$minor) - xenstore_write "$XENBUS_PATH"/physical-device $pdev \ - "$XENBUS_PATH"/node $1 -} - t=$(xenstore_read "$XENBUS_PATH"/type || true) -case $1 in +case "$command" in bind) p=$(xenstore_read "$XENBUS_PATH"/params) case $t in diff -r 7eac3edd0589 -r eee0489b3a17 tools/examples/block-enbd --- a/tools/examples/block-enbd Tue Oct 25 03:00:35 2005 +++ b/tools/examples/block-enbd Sat Oct 29 08:51:35 2005 @@ -1,26 +1,20 @@ #!/bin/sh # Usage: block-enbd [bind server ctl_port |unbind node] -# -# The file argument to the bind command is the file we are to bind to a -# loop device. # # The node argument to unbind is the name of the device node we are to # unbind. # # This assumes you're running a correctly configured server at the other end! -set -e +dir=$(dirname "$0") +. "$dir/block-common.sh" -case $1 in +case "$command" in bind) for dev in /dev/nd*; do if nbd-client $2:$3 $dev; then - major=$(stat -L -c %t "$dev") - minor=$(stat -L -c %T "$dev") - pdev=$(printf "0x%02x%02x" 0x$major 0x$minor) - xenstore-write "$XENBUS_PATH"/physical-device $pdev \ - "$XENBUS_PATH"/node $dev + write_dev $dev exit 0 fi done diff -r 7eac3edd0589 -r eee0489b3a17 tools/examples/init.d/xendomains --- a/tools/examples/init.d/xendomains Tue Oct 25 03:00:35 2005 +++ b/tools/examples/init.d/xendomains Sat Oct 29 08:51:35 2005 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # /etc/init.d/xendomains # Start / stop domains automatically when domain 0 boots / shuts down. @@ -22,101 +22,418 @@ # Should-Stop: # Default-Start: 3 4 5 # Default-Stop: 0 1 2 6 +# Default-Enabled: yes # Short-Description: Start/stop secondary xen domains # Description: Start / stop domains automatically when domain 0 # boots / shuts down. ### END INIT INFO +# Correct exit code would probably be 5, but it's enough +# if xend complains if we're not running as privileged domain if ! [ -e /proc/xen/privcmd ]; then exit 0 fi -RETVAL=0 - -INITD=/etc/init.d - -AUTODIR=/etc/xen/auto LOCKFILE=/var/lock/subsys/xendomains - -if [ -e /lib/lsb ]; then - # assume an LSB-compliant distro (Debian with LSB package, - # recent-enough SuSE, others...) - - . /lib/lsb/init-functions # source LSB standard functions - - on_fn_exit() +XENDOM_CONFIG=/etc/sysconfig/xendomains + +test -r $XENDOM_CONFIG || { echo "$XENDOM_CONFIG not existing"; + if [ "$1" = "stop" ]; then exit 0; + else exit 6; fi; } + +. $XENDOM_CONFIG + +# Use the SUSE rc_ init script functions; +# emulate them on LSB, RH and other systems +if test -e /etc/rc.status; then + # SUSE rc script library + . /etc/rc.status +else + _cmd=$1 + declare -a _SMSG + if test "${_cmd}" = "status"; then + _SMSG=(running dead dead unused unknown) + _RC_UNUSED=3 + else + _SMSG=(done failed failed missed failed skipped unused failed failed) + _RC_UNUSED=6 + fi + if test -e /lib/lsb/init-functions; then + # LSB + . /lib/lsb/init-functions + echo_rc() + { + if test ${_RC_RV} = 0; then + log_success_msg " [${_SMSG[${_RC_RV}]}] " + else + log_failure_msg " [${_SMSG[${_RC_RV}]}] " + fi + } + elif test -e /etc/init.d/functions; then + # REDHAT + . /etc/init.d/functions + echo_rc() + { + #echo -n " [${_SMSG[${_RC_RV}]}] " + if test ${_RC_RV} = 0; then + success " [${_SMSG[${_RC_RV}]}] " + else + failure " [${_SMSG[${_RC_RV}]}] " + fi + } + else + # emulate it + echo_rc() + { + echo " [${_SMSG[${_RC_RV}]}] " + } + fi + rc_reset() { _RC_RV=0; } + rc_failed() { - if [ $RETVAL -eq 0 ]; then - log_success_msg - else - log_failure_msg - fi + if test -z "$1"; then + _RC_RV=1; + elif test "$1" != "0"; then + _RC_RV=$1; + fi + return ${_RC_RV} } -elif [ -r $INITD/functions ]; then - # assume a Redhat-like distro - . $INITD/functions # source Redhat functions - - on_fn_exit() + rc_check() { - if [ $RETVAL -eq 0 ]; then - success - else - failure - fi - - echo + return rc_failed $? + } + rc_status() + { + rc_failed $? + if test "$1" = "-r"; then _RC_RV=0; shift; fi + if test "$1" = "-s"; then rc_failed 5; echo_rc; rc_failed 3; shift; fi + if test "$1" = "-u"; then rc_failed ${_RC_UNUSED}; echo_rc; rc_failed 3; shift; fi + if test "$1" = "-v"; then echo_rc; shift; fi + if test "$1" = "-r"; then _RC_RV=0; shift; fi + return ${_RC_RV} } -else - # none of the above - LOCKFILE=/var/lock/xendomains - - on_fn_exit() + rc_exit() { exit ${_RC_RV}; } + rc_active() { - echo + if test -z "$RUNLEVEL"; then read RUNLEVEL REST < <(/sbin/runlevel); fi + if test -e /etc/init.d/S[0-9][0-9]${1}; then return 0; fi + return 1 } fi - - -start() { - if [ -f $LOCKFILE ]; then return; fi - - echo -n $"Starting auto Xen domains:" - - # We expect config scripts for auto starting domains to be in - # AUTODIR - they could just be symlinks to files elsewhere - if [ -d $AUTODIR ] && [ $(ls $AUTODIR | wc -l) -gt 0 ]; then +if ! which usleep >&/dev/null +then + usleep() + { + if [ -n "$1" ] + then + sleep $(( $1 / 1000 )) + fi + } +fi + +# Reset status of this service +rc_reset + +## +# Returns 0 (success) if the given parameter names a directory, and that +# directory is not empty. +# +contains_something() +{ + if [ -d "$1" ] && [ `/bin/ls $1 | wc -l` -gt 0 ] + then + return 0 + else + return 1 + fi +} + +# read name from xen config file +rdname() +{ + NM=`grep '^name *=' $1 | sed -e 's/^name *= *"\([^"]*\)".*$/\1/' -e 's/%[id]/[0-9]*/g'` +} + +rdnames() +{ + NAMES= + if ! contains_something "$XENDOMAINS_AUTO" + then + return + fi + for dom in $XENDOMAINS_AUTO/*; do + rdname $dom + if test -z $NAMES; then + NAMES=$NM; + else + NAMES="$NAMES|$NM" + fi + done +} + +parseln() +{ + name=`echo "$1" | cut -c0-17` + name=${name%% *} + rest=`echo "$1" | cut -c18- ` + read id mem cpu vcpu state tm < <(echo "$rest") +} + +is_running() +{ + rdname $1 + RC=1 + while read LN; do + parseln "$LN" + if test $id = 0; then continue; fi + case $name in + ($NM) + RC=0 + ;; + esac + done < <(xm list | grep -v '^Name') + return $RC +} + +start() +{ + if [ -f $LOCKFILE ]; then + echo -n "xendomains already running (lockfile exists)" + return; + fi + + if [ "$XENDOMAINS_RESTORE" = "true" ] && + contains_something "$XENDOMAINS_SAVE" + then + mkdir -p $(dirname "$LOCKFILE") touch $LOCKFILE - - # Create all domains with config files in AUTODIR. - for dom in $AUTODIR/*; do - xm create --quiet --defconfig $dom + echo -n "Restoring Xen domains:" + for dom in $XENDOMAINS_SAVE/*; do + echo -n " ${dom##*/}" + xm restore $dom if [ $? -ne 0 ]; then - RETVAL=$? + rc_failed $? + echo -n '!' + else + # mv $dom ${dom%/*}/.${dom##*/} + rm $dom fi done - - fi - - on_fn_exit + fi + + if contains_something "$XENDOMAINS_AUTO" + then + touch $LOCKFILE + echo -n "Starting auto Xen domains:" + # We expect config scripts for auto starting domains to be in + # XENDOMAINS_AUTO - they could just be symlinks to files elsewhere + + # Create all domains with config files in XENDOMAINS_AUTO. + # TODO: We should record which domain name belongs + # so we have the option to selectively shut down / migrate later + for dom in $XENDOMAINS_AUTO/*; do + echo -n " ${dom##*/}" + if is_running $dom; then + echo -n "(skip)" + else + xm create --quiet --defconfig $dom + if [ $? -ne 0 ]; then + rc_failed $? + echo -n '!' + fi + fi + done + fi +} + +all_zombies() +{ + while read LN; do + parseln "$LN" + if test $id = 0; then continue; fi + if test "$state" != "-b---d" -a "$state" != "-----d"; then + return 1; + fi + done < <(xm list | grep -v '^Name') + return 0 +} + +# Wait for max $XENDOMAINS_STOP_MAXWAIT for xm $1 to finish; +# if it has not exited by that time kill it, so the init script will +# succeed within a finite amount of time; if $2 is nonnull, it will +# kill the command as well as soon as no domain (except for zombies) +# are left (used for shutdown --all). +watchdog_xm() +{ + if test -z "$XENDOMAINS_STOP_MAXWAIT" -o "$XENDOMAINS_STOP_MAXWAIT" = "0"; then + exit + fi + usleep 20000 + for no in `seq 0 $XENDOMAINS_STOP_MAXWAIT`; do + # exit if xm save/migrate/shutdown is finished + PSAX=`ps axlw | grep "xm $1" | grep -v grep` + if test -z "$PSAX"; then exit; fi + echo -n "."; sleep 1 + # go to kill immediately if there's only zombies left + if all_zombies && test -n "$2"; then break; fi + done + sleep 1 + read PSF PSUID PSPID PSPPID < <(echo "$PSAX") + # kill xm $1 + kill $PSPID >/dev/null 2>&1 } stop() { + # Collect list of domains to shut down + if test "$XENDOMAINS_AUTO_ONLY" = "true"; then + rdnames + fi + echo -n "Shutting down Xen domains:" + while read LN; do + parseln "$LN" + if test $id = 0; then continue; fi + echo -n " $name" + if test "$XENDOMAINS_AUTO_ONLY" = "true"; then + case $name in + ($NAMES) + # nothing + ;; + (*) + echo -n "(skip)" + continue + ;; + esac + fi + # XENDOMAINS_SYSRQ chould be something like just "s" + # or "s e i u" or even "s e s i u o" + # for the latter, you should set XENDOMAINS_USLEEP to 1200000 or so + if test -n "$XENDOMAINS_SYSRQ"; then + for sysrq in $XENDOMAINS_SYSRQ; do + echo -n "(SR-$sysrq)" + xm sysrq $id $sysrq + if test $? -ne 0; then + rc_failed $? + echo -n '!' + fi + # usleep just ignores empty arg + usleep $XENDOMAINS_USLEEP + done + fi + if test "$state" = "-b---d" -o "$state" = "-----d"; then + echo -n "(zomb)" + continue + fi + if test -n "$XENDOMAINS_MIGRATE"; then + echo -n "(migr)" + watchdog_xm migrate & + WDOG_PID=$! + xm migrate $id $XENDOMAINS_MIGRATE + if test $? -ne 0; then + rc_failed $? + echo -n '!' + kill $WDOG_PID >/dev/null 2>&1 + else + kill $WDOG_PID >/dev/null 2>&1 + continue + fi + fi + if test -n "$XENDOMAINS_SAVE"; then + echo -n "(save)" + watchdog_xm save & + WDOG_PID=$! + xm save $id $XENDOMAINS_SAVE/$name + if test $? -ne 0; then + rc_failed $? + echo -n '!' + kill $WDOG_PIG >/dev/null 2>&1 + else + kill $WDOG_PIG >/dev/null 2>&1 + continue + fi + fi + if test -n "$XENDOMAINS_SHUTDOWN"; then + # XENDOMAINS_SHUTDOWN should be "--halt --wait" + echo -n "(shut)" + watchdog_xm shutdown & + WDOG_PID=$! + xm shutdown $id $XENDOMAINS_SHUTDOWN + if test $? -ne 0; then + rc_failed $? + echo -n '!' + fi + kill $WDOG_PIG >/dev/null 2>&1 + fi + done < <(xm list | grep -v '^Name') + # NB. this shuts down ALL Xen domains (politely), not just the ones in # AUTODIR/* # This is because it's easier to do ;-) but arguably if this script is run # on system shutdown then it's also the right thing to do. - - echo -n $"Shutting down all Xen domains:" - - xm shutdown --all --wait --halt - - RETVAL=$? - - [ $RETVAL -eq 0 ] && rm -f $LOCKFILE - - on_fn_exit + if ! all_zombies && test -n "$XENDOMAINS_SHUTDOWN_ALL"; then + # XENDOMAINS_SHUTDOWN_ALL should be "--all --halt --wait" + echo -n " SHUTDOWN_ALL " + watchdog_xm shutdown 1 & + WDOG_PID=$! + xm shutdown $XENDOMAINS_SHUTDOWN_ALL + if test $? -ne 0; then + rc_failed $? + echo -n '!' + fi + kill $WDOG_PID >/dev/null 2>&1 + fi + + # Unconditionally delete lock file + rm -f $LOCKFILE +} + +check_domain_up() +{ + while read LN; do + parseln "$LN" + if test $id = 0; then continue; fi + case $name in + ($1) + return 0 + ;; + esac + done < <(xm list | grep -v "^Name") + return 1 +} + +check_all_auto_domains_up() +{ + if ! contains_something "$XENDOMAINS_AUTO" + then + return 0 + fi + missing= + for nm in $XENDOMAINS_AUTO/*; do + rdname $nm + found=0 + if check_domain_up "$NM"; then + echo -n " $name" + else + missing="$missing $NM" + fi + done + if test -n "$missing"; then + echo -n " MISS AUTO:$missing" + return 1 + fi + return 0 +} + +check_all_saved_domains_up() +{ + if ! contains_something "$XENDOMAINS_SAVE" + then + return 0 + fi + missing=`/bin/ls $XENDOMAINS_SAVE` + echo -n " MISS SAVED: " $missing + return 1 } # This does NOT necessarily restart all running domains: instead it @@ -124,47 +441,56 @@ # AUTODIR. If other domains have been started manually then they will # not get restarted. # Commented out to avoid confusion! -# -#restart() -#{ -# stop -# start -#} - -# same as restart for now - commented out to avoid confusion -#reload() -#{ -# restart -#} + +restart() +{ + stop + start +} + +reload() +{ + restart +} case "$1" in start) start + rc_status + if test -f $LOCKFILE; then rc_status -v; fi ;; stop) stop - ;; - -# The following are commented out to disable them by default to avoid confusion -# - see the notes above -# -# restart) -# restart -# ;; -# -# reload) -# reload -# ;; + rc_status -v + ;; + + restart) + restart + ;; + reload) + reload + ;; status) - xm list + echo -n "Checking for xendomains:" + if test ! -f $LOCKFILE; then + rc_failed 3 + else + check_all_auto_domains_up + rc_status + check_all_saved_domains_up + rc_status + fi + rc_status -v ;; *) - echo $"Usage: $0 {start|stop|status}" + echo "Usage: $0 {start|stop|restart|reload|status}" + rc_failed 3 + rc_status -v ;; esac -exit $RETVAL +rc_exit diff -r 7eac3edd0589 -r eee0489b3a17 tools/examples/network-bridge --- a/tools/examples/network-bridge Tue Oct 25 03:00:35 2005 +++ b/tools/examples/network-bridge Sat Oct 29 08:51:35 2005 @@ -208,7 +208,7 @@ if ip link show ${vdev} 2>/dev/null >/dev/null; then mac=`ip link show ${netdev} | grep 'link\/ether' | sed -e 's/.*ether \(..:..:..:..:..:..\).*/\1/'` - eval `/sbin/getcfg -d /etc/sysconfig/network/ -f ifcfg- -- ${netdev}` + preiftransfer ${netdev} transfer_addrs ${netdev} ${vdev} if ! ifdown ${netdev}; then # if ifup didn't work, see if we have an ip= on cmd line @@ -231,7 +231,7 @@ ip link set ${bridge} up ip link set ${vif0} up ip link set ${pdev} up - if ! ifup ${HWD_CONFIG_0} ${netdev} ; then + if ! ifup ${netdev} ; then if [ ${kip} ] ; then # use the addresses we grocked from /proc/cmdline if [ -z "${kmask}" ]; then diff -r 7eac3edd0589 -r eee0489b3a17 tools/examples/xen-hotplug-common.sh --- a/tools/examples/xen-hotplug-common.sh Tue Oct 25 03:00:35 2005 +++ b/tools/examples/xen-hotplug-common.sh Sat Oct 29 08:51:35 2005 @@ -1,3 +1,21 @@ +# +# Copyright (c) 2005 XenSource Ltd. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + + set -e export PATH="/sbin:/bin:/usr/bin:/usr/sbin:$PATH" diff -r 7eac3edd0589 -r eee0489b3a17 tools/examples/xen-network-common.sh --- a/tools/examples/xen-network-common.sh Tue Oct 25 03:00:35 2005 +++ b/tools/examples/xen-network-common.sh Sat Oct 29 08:51:35 2005 @@ -16,11 +16,40 @@ # -# Gentoo doesn't have ifup/ifdown: define appropriate alternatives -if ! which ifup >&/dev/null +# On SuSE it is necessary to run a command before transfering addresses and +# routes from the physical interface to the virtual. This command creates a +# variable $HWD_CONFIG_0 that specifies the appropriate configuration for +# ifup. + +# Gentoo doesn't have ifup/ifdown, so we define appropriate alternatives. + +# Other platforms just use ifup / ifdown directly. + +## +# preiftransfer +# +# @param $1 The current name for the physical device, which is also the name +# that the virtual device will take once the physical device has +# been renamed. + +if [ -e /etc/SuSE-release ] +then + preiftransfer() + { + eval `/sbin/getcfg -d /etc/sysconfig/network/ -f ifcfg- -- $1` + } + ifup() + { + /sbin/ifup ${HWD_CONFIG_0} $1 + } +elif ! which ifup >&/dev/null then if [ -e /etc/conf.d/net ] then + preiftransfer() + { + true + } ifup() { /etc/init.d/net.$1 start @@ -34,4 +63,9 @@ "You don't have ifup and don't seem to be running Gentoo either!" exit 1 fi +else + preiftransfer() + { + true + } fi diff -r 7eac3edd0589 -r eee0489b3a17 tools/ioemu/hw/ide.c --- a/tools/ioemu/hw/ide.c Tue Oct 25 03:00:35 2005 +++ b/tools/ioemu/hw/ide.c Sat Oct 29 08:51:35 2005 @@ -22,6 +22,7 @@ * THE SOFTWARE. */ #include "vl.h" +#include <pthread.h> /* debug IDE devices */ //#define DEBUG_IDE @@ -359,6 +360,48 @@ IDEState ide_if[4]; BMDMAState bmdma[2]; } PCIIDEState; + +#define DMA_MULTI_THREAD + +#ifdef DMA_MULTI_THREAD + +static int file_pipes[2]; + +static void ide_dma_loop(BMDMAState *bm); +static void dma_thread_loop(BMDMAState *bm); + +static void *dma_thread_func(void* opaque) +{ + BMDMAState* req; + + while (read(file_pipes[0], &req, sizeof(req))) { + dma_thread_loop(req); + } + + return NULL; +} + +static void dma_create_thread() +{ + pthread_t tid; + int rt; + + if (pipe(file_pipes) != 0){ + fprintf(stderr, "create pipe failed\n"); + exit(1); + } + + if ( (rt = pthread_create(&tid, NULL, dma_thread_func, NULL)) ) { + fprintf(stderr, "Oops, dma thread creation failed, errno=%d\n", rt); + exit(1); + } + + if ( (rt = pthread_detach(tid)) ) { + fprintf(stderr, "Oops, dma thread detachment failed, errno=%d\n", rt); + exit(1); + } +} +#endif //DMA_MULTI_THREAD static void ide_dma_start(IDEState *s, IDEDMAFunc *dma_cb); @@ -1978,7 +2021,15 @@ /* XXX: full callback usage to prepare non blocking I/Os support - error handling */ +#ifdef DMA_MULTI_THREAD static void ide_dma_loop(BMDMAState *bm) +{ + write(file_pipes[1], &bm, sizeof(bm)); +} +static void dma_thread_loop(BMDMAState *bm) +#else +static void ide_dma_loop(BMDMAState *bm) +#endif //DMA_MULTI_THREAD { struct { uint32_t addr; @@ -2166,6 +2217,9 @@ d->ide_if[i].pci_dev = (PCIDevice *)d; ide_init2(&d->ide_if[0], 16, hd_table[0], hd_table[1]); ide_init2(&d->ide_if[2], 16, hd_table[2], hd_table[3]); +#ifdef DMA_MULTI_THREAD + dma_create_thread(); +#endif //DMA_MULTI_THREAD } /* hd_table must contain 4 block drivers */ @@ -2196,6 +2250,9 @@ ide_init2(&d->ide_if[2], 15, hd_table[2], hd_table[3]); ide_init_ioport(&d->ide_if[0], 0x1f0, 0x3f6); ide_init_ioport(&d->ide_if[2], 0x170, 0x376); +#ifdef DMA_MULTI_THREAD + dma_create_thread(); +#endif //DMA_MULTI_THREAD } /***********************************************************/ diff -r 7eac3edd0589 -r eee0489b3a17 tools/libxc/xc_elf.h --- a/tools/libxc/xc_elf.h Tue Oct 25 03:00:35 2005 +++ b/tools/libxc/xc_elf.h Sat Oct 29 08:51:35 2005 @@ -24,26 +24,26 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -typedef u_int8_t Elf_Byte; - -typedef u_int32_t Elf32_Addr; /* Unsigned program address */ -typedef u_int32_t Elf32_Off; /* Unsigned file offset */ +typedef uint8_t Elf_Byte; + +typedef uint32_t Elf32_Addr; /* Unsigned program address */ +typedef uint32_t Elf32_Off; /* Unsigned file offset */ typedef int32_t Elf32_Sword; /* Signed large integer */ -typedef u_int32_t Elf32_Word; /* Unsigned large integer */ -typedef u_int16_t Elf32_Half; /* Unsigned medium integer */ - -typedef u_int64_t Elf64_Addr; -typedef u_int64_t Elf64_Off; +typedef uint32_t Elf32_Word; /* Unsigned large integer */ +typedef uint16_t Elf32_Half; /* Unsigned medium integer */ + +typedef uint64_t Elf64_Addr; +typedef uint64_t Elf64_Off; typedef int32_t Elf64_Shalf; typedef int32_t Elf64_Sword; -typedef u_int32_t Elf64_Word; +typedef uint32_t Elf64_Word; typedef int64_t Elf64_Sxword; -typedef u_int64_t Elf64_Xword; - -typedef u_int32_t Elf64_Half; -typedef u_int16_t Elf64_Quarter; +typedef uint64_t Elf64_Xword; + +typedef uint32_t Elf64_Half; +typedef uint16_t Elf64_Quarter; /* * e_ident[] identification indexes diff -r 7eac3edd0589 -r eee0489b3a17 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Tue Oct 25 03:00:35 2005 +++ b/tools/libxc/xc_linux_build.c Sat Oct 29 08:51:35 2005 @@ -350,6 +350,8 @@ start_info = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[0]); memset(start_info, 0, sizeof(*start_info)); + rc = xc_version(xc_handle, XENVER_version, NULL); + sprintf(start_info->magic, "Xen-%i.%i", rc >> 16, rc & (0xFFFF)); start_info->flags = flags; start_info->store_mfn = nr_pages - 2; start_info->store_evtchn = store_evtchn; @@ -624,6 +626,8 @@ xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]); memset(start_info, 0, sizeof(*start_info)); + rc = xc_version(xc_handle, XENVER_version, NULL); + sprintf(start_info->magic, "Xen-%i.%i", rc >> 16, rc & (0xFFFF)); start_info->nr_pages = nr_pages; start_info->shared_info = shared_info_frame << PAGE_SHIFT; start_info->flags = flags; diff -r 7eac3edd0589 -r eee0489b3a17 tools/libxc/xc_vmx_build.c --- a/tools/libxc/xc_vmx_build.c Tue Oct 25 03:00:35 2005 +++ b/tools/libxc/xc_vmx_build.c Sat Oct 29 08:51:35 2005 @@ -279,6 +279,7 @@ vcpu_guest_context_t *ctxt, unsigned long shared_info_frame, unsigned int control_evtchn, + unsigned int lapic, unsigned int vcpus, unsigned int store_evtchn, unsigned long *store_mfn) @@ -554,7 +555,7 @@ ctxt->user_regs.eax = 0; ctxt->user_regs.esp = 0; ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */ - ctxt->user_regs.ecx = 0; + ctxt->user_regs.ecx = lapic; ctxt->user_regs.esi = 0; ctxt->user_regs.edi = 0; ctxt->user_regs.ebp = 0; @@ -597,6 +598,7 @@ int memsize, const char *image_name, unsigned int control_evtchn, + unsigned int lapic, unsigned int vcpus, unsigned int store_evtchn, unsigned long *store_mfn) @@ -651,9 +653,9 @@ goto error_out; } - if ( setup_guest(xc_handle, domid, memsize, image, image_size, - nr_pages, ctxt, op.u.getdomaininfo.shared_info_frame, - control_evtchn, vcpus, store_evtchn, store_mfn) < 0) + if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages, + ctxt, op.u.getdomaininfo.shared_info_frame, control_evtchn, + lapic, vcpus, store_evtchn, store_mfn) < 0) { ERROR("Error constructing guest OS"); goto error_out; diff -r 7eac3edd0589 -r eee0489b3a17 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Tue Oct 25 03:00:35 2005 +++ b/tools/libxc/xenguest.h Sat Oct 29 08:51:35 2005 @@ -56,6 +56,7 @@ int memsize, const char *image_name, unsigned int control_evtchn, + unsigned int lapic, unsigned int vcpus, unsigned int store_evtchn, unsigned long *store_mfn); diff -r 7eac3edd0589 -r eee0489b3a17 tools/misc/cpuperf/cpuperf.c --- a/tools/misc/cpuperf/cpuperf.c Tue Oct 25 03:00:35 2005 +++ b/tools/misc/cpuperf/cpuperf.c Sat Oct 29 08:51:35 2005 @@ -16,7 +16,6 @@ #include <sys/types.h> #include <sched.h> -#include <error.h> #include <stdio.h> #include <unistd.h> #include <stdlib.h> diff -r 7eac3edd0589 -r eee0489b3a17 tools/misc/miniterm/miniterm.c --- a/tools/misc/miniterm/miniterm.c Tue Oct 25 03:00:35 2005 +++ b/tools/misc/miniterm/miniterm.c Sat Oct 29 08:51:35 2005 @@ -29,7 +29,7 @@ #include <stdlib.h> #include <unistd.h> #include <fcntl.h> -#include <sys/signal.h> +#include <signal.h> #include <sys/types.h> #include <sys/wait.h> diff -r 7eac3edd0589 -r eee0489b3a17 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Tue Oct 25 03:00:35 2005 +++ b/tools/python/xen/lowlevel/xc/xc.c Sat Oct 29 08:51:35 2005 @@ -438,19 +438,20 @@ char *image; int control_evtchn, store_evtchn; int vcpus = 1; + int lapic = 0; int memsize; unsigned long store_mfn = 0; static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn", - "memsize", "image", "vcpus", NULL }; - - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisi", kwd_list, + "memsize", "image", "lapic", "vcpus", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisii", kwd_list, &dom, &control_evtchn, &store_evtchn, - &memsize, &image, &vcpus) ) + &memsize, &image, &lapic, &vcpus) ) return NULL; if ( xc_vmx_build(xc->xc_handle, dom, memsize, image, control_evtchn, - vcpus, store_evtchn, &store_mfn) != 0 ) + lapic, vcpus, store_evtchn, &store_mfn) != 0 ) return PyErr_SetFromErrno(xc_error); return Py_BuildValue("{s:i}", "store_mfn", store_mfn); diff -r 7eac3edd0589 -r eee0489b3a17 tools/python/xen/lowlevel/xs/xs.c --- a/tools/python/xen/lowlevel/xs/xs.c Tue Oct 25 03:00:35 2005 +++ b/tools/python/xen/lowlevel/xs/xs.c Sat Oct 29 08:51:35 2005 @@ -695,7 +695,7 @@ PyObject *kwds) { static char *kwd_spec[] = { "dom", "page", "port", NULL }; - static char *arg_spec = "iii"; + static char *arg_spec = "ili"; domid_t dom = 0; unsigned long page = 0; unsigned int port = 0; diff -r 7eac3edd0589 -r eee0489b3a17 tools/python/xen/web/tcp.py --- a/tools/python/xen/web/tcp.py Tue Oct 25 03:00:35 2005 +++ b/tools/python/xen/web/tcp.py Sat Oct 29 08:51:35 2005 @@ -99,7 +99,7 @@ return l def SetCloExec(SocketListener): - SocketListener.SetCloExec() + SocketListener.setCloExec() def connectTCP(host, port, factory, timeout=None, bindAddress=None): c = TCPConnector(host, port, factory, timeout=timeout, bindAddress=bindAddress) diff -r 7eac3edd0589 -r eee0489b3a17 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Tue Oct 25 03:00:35 2005 +++ b/tools/python/xen/xend/XendDomainInfo.py Sat Oct 29 08:51:35 2005 @@ -595,6 +595,7 @@ to_store = { 'domid': str(self.domid), 'vm': self.vmpath, + 'name': self.info['name'], 'console/limit': str(xroot.get_console_limit() * 1024), 'memory/target': str(self.info['memory_KiB']) } diff -r 7eac3edd0589 -r eee0489b3a17 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Tue Oct 25 03:00:35 2005 +++ b/tools/python/xen/xend/image.py Sat Oct 29 08:51:35 2005 @@ -203,6 +203,10 @@ self.dmargs += self.configVNC(imageConfig) + self.lapic = 0 + lapic = sxp.child_value(imageConfig, 'lapic') + if not lapic is None: + self.lapic = int(lapic) def buildDomain(self): # Create an event channel @@ -217,6 +221,7 @@ log.debug("control_evtchn = %d", self.device_channel) log.debug("store_evtchn = %d", store_evtchn) log.debug("memsize = %d", self.vm.getMemoryTarget() / 1024) + log.debug("lapic = %d", self.lapic) log.debug("vcpus = %d", self.vm.getVCpuCount()) return xc.vmx_build(dom = self.vm.getDomid(), @@ -224,6 +229,7 @@ control_evtchn = self.device_channel, store_evtchn = store_evtchn, memsize = self.vm.getMemoryTarget() / 1024, + lapic = self.lapic, vcpus = self.vm.getVCpuCount()) diff -r 7eac3edd0589 -r eee0489b3a17 tools/python/xen/xend/server/SrvDaemon.py --- a/tools/python/xen/xend/server/SrvDaemon.py Tue Oct 25 03:00:35 2005 +++ b/tools/python/xen/xend/server/SrvDaemon.py Sat Oct 29 08:51:35 2005 @@ -262,10 +262,7 @@ return 1 def stop(self): - result = self.cleanup_xend(True) - from xen.xend import Vifctl - Vifctl.network("stop") - return result + return self.cleanup_xend(True) def run(self, status): try: diff -r 7eac3edd0589 -r eee0489b3a17 tools/python/xen/xend/server/SrvDomainDir.py --- a/tools/python/xen/xend/server/SrvDomainDir.py Tue Oct 25 03:00:35 2005 +++ b/tools/python/xen/xend/server/SrvDomainDir.py Sat Oct 29 08:51:35 2005 @@ -25,7 +25,6 @@ from xen.xend.XendDomainInfo import XendDomainInfo from xen.xend.Args import FormFn from xen.xend.XendError import XendError -from xen.xend.XendLogging import log from xen.web.SrvDir import SrvDir from SrvDomain import SrvDomain @@ -52,7 +51,7 @@ else: return self.domain(x) - def op_create(self, op, req): + def op_create(self, _, req): """Create a domain. Expects the domain config in request parameter 'config' in SXP format. """ @@ -66,12 +65,12 @@ pin.input_eof() config = pin.get_val() ok = 1 + except sxp.ParseError, ex: + errmsg = 'Invalid configuration ' + str(ex) except Exception, ex: print 'op_create> Exception in config', ex traceback.print_exc() errmsg = 'Configuration error ' + str(ex) - except sxp.ParseError, ex: - errmsg = 'Invalid configuration ' + str(ex) if not ok: raise XendError(errmsg) try: @@ -108,7 +107,7 @@ """ return req.threadRequest(self.do_restore, op, req) - def do_restore(self, op, req): + def do_restore(self, _, req): fn = FormFn(self.xd.domain_restore, [['file', 'str']]) dominfo = fn(req.args) diff -r 7eac3edd0589 -r eee0489b3a17 tools/python/xen/xend/server/event.py --- a/tools/python/xen/xend/server/event.py Tue Oct 25 03:00:35 2005 +++ b/tools/python/xen/xend/server/event.py Sat Oct 29 08:51:35 2005 @@ -192,4 +192,5 @@ if xroot.get_xend_http_server(): port = xroot.get_xend_event_port() interface = xroot.get_xend_address() - tcp.listenTCP(port, factory, interface=interface) + l = tcp.listenTCP(port, factory, interface=interface) + l.setCloExec() diff -r 7eac3edd0589 -r eee0489b3a17 tools/python/xen/xend/xenstore/xstransact.py --- a/tools/python/xen/xend/xenstore/xstransact.py Tue Oct 25 03:00:35 2005 +++ b/tools/python/xen/xend/xenstore/xstransact.py Sat Oct 29 08:51:35 2005 @@ -5,9 +5,6 @@ # Public License. See the file "COPYING" in the main directory of # this archive for more details. -import errno -import threading -from xen.lowlevel import xs from xen.xend.xenstore.xsutil import xshandle diff -r 7eac3edd0589 -r eee0489b3a17 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Tue Oct 25 03:00:35 2005 +++ b/tools/python/xen/xm/create.py Sat Oct 29 08:51:35 2005 @@ -158,6 +158,10 @@ fn=set_int, default=None, use="CPU to run the domain on.") +gopts.var('lapic', val='LAPIC', + fn=set_int, default=0, + use="Disable or enable local APIC of VMX domain.") + gopts.var('vcpus', val='VCPUS', fn=set_int, default=1, use="# of Virtual CPUS in domain.") @@ -315,10 +319,6 @@ gopts.var('nfs_root', val="PATH", fn=set_value, default=None, use="Set the path of the root NFS directory.") - -gopts.var('memmap', val='FILE', - fn=set_value, default='', - use="Path to memap SXP file.") gopts.var('device_model', val='FILE', fn=set_value, default='', @@ -542,9 +542,9 @@ def configure_vmx(opts, config_image, vals): """Create the config for VMX devices. """ - args = [ 'memmap', 'device_model', 'vcpus', 'cdrom', - 'boot', 'fda', 'fdb', 'localtime', 'serial', 'macaddr', 'stdvga', - 'isa', 'nographic', 'vnc', 'vncviewer', 'sdl', 'display', 'ne2000'] + args = [ 'device_model', 'vcpus', 'cdrom', 'boot', 'fda', 'fdb', + 'localtime', 'serial', 'macaddr', 'stdvga', 'isa', 'nographic', + 'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'lapic'] for a in args: if (vals.__dict__[a]): config_image.append([a, vals.__dict__[a]]) diff -r 7eac3edd0589 -r eee0489b3a17 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Tue Oct 25 03:00:35 2005 +++ b/tools/python/xen/xm/main.py Sat Oct 29 08:51:35 2005 @@ -61,6 +61,8 @@ top monitor system and domains in real-time unpause <DomId> unpause a paused domain +<DomName> can be substituted for <DomId> in xm subcommands. + For a complete list of subcommands run 'xm help --long' For more help on xm see the xm(1) man page For more help on xm create, see the xmdomain.cfg(5) man page""" @@ -118,6 +120,8 @@ vnet-list [-l|--long] list vnets vnet-create <config> create a vnet from a config file vnet-delete <vnetid> delete a vnet + +<DomName> can be substituted for <DomId> in xm subcommands. For a short list of subcommands run 'xm help' For more help on xm see the xm(1) man page diff -r 7eac3edd0589 -r eee0489b3a17 tools/security/getlabel.sh --- a/tools/security/getlabel.sh Tue Oct 25 03:00:35 2005 +++ b/tools/security/getlabel.sh Sat Oct 29 08:51:35 2005 @@ -36,18 +36,21 @@ usage () { - echo "Usage: $0 -sid <ssidref> [<policy name>] or" - echo " $0 -dom <domid> [<policy name>] " - echo "" - echo "policy name : the name of the policy, i.e. 'chwall'" - echo " If the policy name is omitted, the grub.conf" - echo " entry of the running system is tried to be read" - echo " and the policy name determined from there." - echo "ssidref : an ssidref in hex or decimal format, i.e., '0x00010002'" - echo " or '65538'" - echo "domid : id of the domain, i.e., '1'; Use numbers from the 2nd" - echo " column shown when invoking 'xm list'" - echo "" +echo "Use this tool to display the label of a domain or the label that is +corresponding to an ssidref given the name of the running policy. + +Usage: $0 -sid <ssidref> [<policy name>] or + $0 -dom <domid> [<policy name>] + +policy name : the name of the policy, i.e. 'chwall' + If the policy name is omitted, the grub.conf + entry of the running system is tried to be read + and the policy name determined from there. +ssidref : an ssidref in hex or decimal format, i.e., '0x00010002' + or '65538' +domid : id of the domain, i.e., '1'; Use numbers from the 2nd + column shown when invoking 'xm list' +" } diff -r 7eac3edd0589 -r eee0489b3a17 tools/security/setlabel.sh --- a/tools/security/setlabel.sh Tue Oct 25 03:00:35 2005 +++ b/tools/security/setlabel.sh Sat Oct 29 08:51:35 2005 @@ -39,21 +39,27 @@ usage () { - echo "Usage: $0 [Option] <vmfile> <label> [<policy name>]" - echo " or $0 -l [<policy name>]" - echo "" - echo "Valid options are:" - echo "-r : to relabel a file without being prompted" - echo "" - echo "vmfile : XEN vm configuration file" - echo "label : the label to map to an ssidref" - echo "policy name : the name of the policy, i.e. 'chwall'" - echo " If the policy name is omitted, it is attempted" - echo " to find the current policy's name in grub.conf." - echo "" - echo "-l [<policy name>] is used to show valid labels in the map file of" - echo " the given or current policy." - echo "" +echo "Use this tool to put the ssidref corresponding to a label of a policy into +the VM configuration file, or use it to display all labels of a policy. + +Usage: $0 [Option] <vmfile> <label> [<policy name>] + or $0 -l [<policy name>] + +Valid options are: +-r : to relabel a file without being prompted + +vmfile : XEN vm configuration file; give complete path +label : the label to map to an ssidref +policy name : the name of the policy, i.e. 'chwall' + If the policy name is omitted, it is attempted + to find the current policy's name in grub.conf. + +-l [<policy name>] is used to show valid labels in the map file of + the given or current policy. If the policy name + is omitted, it will be tried to determine the + current policy from grub.conf (/boot/grub/grub.conf) + +" } @@ -83,7 +89,7 @@ exit -1; fi else - policy=$3; + policy=$1; fi @@ -92,7 +98,7 @@ if [ "$res" != "0" ]; then showLabels $mapfile else - echo "Could not find map file for policy '$1'." + echo "Could not find map file for policy '$policy'." fi elif [ "$mode" == "usage" ]; then usage diff -r 7eac3edd0589 -r eee0489b3a17 tools/security/updategrub.sh --- a/tools/security/updategrub.sh Tue Oct 25 03:00:35 2005 +++ b/tools/security/updategrub.sh Sat Oct 29 08:51:35 2005 @@ -26,11 +26,16 @@ # Show usage of this program usage () { - echo "Usage: $0 <policy name> <root of xen repository>" - echo "" - echo "<policy name> : The name of the policy, i.e. xen_null" - echo "<root of xen repository> : The root of the XEN repositrory." - echo "" +echo "Use this tool to add the binary policy to the Xen grub entry and +have Xen automatically enforce the policy when starting. + +Usage: $0 <policy name> <root of xen repository> + +<policy name> : The name of the policy, i.e. xen_null +<root of xen repository> : The root of the XEN repository. Give + complete path. + +" } # This function sets the global variable 'linux' @@ -43,11 +48,24 @@ for f in $path/linux-*-xen0 ; do versionfile=$f/include/linux/version.h if [ -r $versionfile ]; then - lnx=`cat $versionfile | \ - grep UTS_RELEASE | \ - awk '{ \ - len=length($3); \ - print substr($3,2,len-2) }'` + lnx=`cat $versionfile | \ + grep UTS_RELEASE | \ + awk '{ \ + len=length($3); \ + version=substr($3,2,len-2); \ + split(version,numbers,"."); \ + if (numbers[4]=="") { \ + printf("%s.%s.%s", \ + numbers[1], \ + numbers[2], \ + numbers[3]); \ + } else { \ + printf("%s.%s.%s[.0-9]*-xen0",\ + numbers[1], \ + numbers[2], \ + numbers[3]); \ + } \ + }'` fi if [ "$lnx" != "" ]; then linux="[./0-9a-zA-z]*$lnx" @@ -143,10 +161,19 @@ echo "Could not create temporary file! Aborting." exit -1 fi - mv -f $tmpfile $grubconf + diff $tmpfile $grubconf > /dev/null + RES=$? + if [ "$RES" == "0" ]; then + echo "No changes were made to $grubconf." + else + echo "Successfully updated $grubconf." + mv -f $tmpfile $grubconf + fi } if [ "$1" == "" -o "$2" == "" ]; then + echo "Error: Not enough command line parameters." + echo "" usage exit -1 fi diff -r 7eac3edd0589 -r eee0489b3a17 tools/xenstat/libxenstat/Makefile --- a/tools/xenstat/libxenstat/Makefile Tue Oct 25 03:00:35 2005 +++ b/tools/xenstat/libxenstat/Makefile Sat Oct 29 08:51:35 2005 @@ -38,13 +38,13 @@ WARN_FLAGS=-Wall -Werror -CFLAGS+=-Isrc -I$(XEN_LIBXC) +CFLAGS+=-Isrc -I$(XEN_LIBXC) -I$(XEN_XENSTORE) LDFLAGS+=-Lsrc all: $(LIB) $(LIB): $(OBJECTS) - $(AR) rc $@ $^ + $(AR) rc $@ $^ $(XEN_XENSTORE)/libxenstore.so $(RANLIB) $@ $(SHLIB): $(OBJECTS) diff -r 7eac3edd0589 -r eee0489b3a17 tools/xenstat/libxenstat/src/xenstat.c --- a/tools/xenstat/libxenstat/src/xenstat.c Tue Oct 25 03:00:35 2005 +++ b/tools/xenstat/libxenstat/src/xenstat.c Sat Oct 29 08:51:35 2005 @@ -21,6 +21,7 @@ #include <string.h> #include <unistd.h> #include <xen-interface.h> +#include <xs.h> #include "xenstat.h" /* @@ -31,6 +32,7 @@ struct xenstat_handle { xi_handle *xihandle; + struct xs_handle *xshandle; /* xenstore handle */ int page_size; FILE *procnetdev; char xen_version[VERSION_SIZE]; /* xen version running on this node */ @@ -49,6 +51,7 @@ struct xenstat_domain { unsigned int id; + char *name; unsigned int state; unsigned long long cpu_ns; unsigned int num_vcpus; /* No. vcpus configured for domain */ @@ -110,6 +113,7 @@ static void xenstat_uninit_vcpus(xenstat_handle * handle); static void xenstat_uninit_networks(xenstat_handle * handle); static void xenstat_uninit_xen_version(xenstat_handle * handle); +static char *xenstat_get_domain_name(xenstat_handle * handle, unsigned int domain_id); static xenstat_collector collectors[] = { { XENSTAT_VCPU, xenstat_collect_vcpus, @@ -153,6 +157,13 @@ return NULL; } + handle->xshandle = xs_daemon_open_readonly(); /* open handle to xenstore*/ + if (handle->xshandle == NULL) { + perror("unable to open xenstore\n"); + free(handle); + return NULL; + } + return handle; } @@ -163,6 +174,7 @@ for (i = 0; i < NUM_COLLECTORS; i++) collectors[i].uninit(handle); xi_uninit(handle->xihandle); + xs_daemon_close(handle->xshandle); free(handle); } } @@ -228,6 +240,7 @@ for (i = 0; i < new_domains; i++) { /* Fill in domain using domaininfo[i] */ domain->id = domaininfo[i].domain; + domain->name = xenstat_get_domain_name(handle, domaininfo[i].domain); domain->state = domaininfo[i].flags; domain->cpu_ns = domaininfo[i].cpu_time; domain->num_vcpus = (domaininfo[i].max_vcpu_id+1); @@ -337,6 +350,12 @@ unsigned xenstat_domain_id(xenstat_domain * domain) { return domain->id; +} + +/* Get the domain name for the domain */ +char *xenstat_domain_name(xenstat_domain * domain) +{ + return domain->name; } /* Get information about how much CPU time has been used */ @@ -675,3 +694,25 @@ static void xenstat_uninit_xen_version(xenstat_handle * handle) { } + +static char *xenstat_get_domain_name(xenstat_handle *handle, unsigned int domain_id) +{ + char path[80]; + char *name; + unsigned int *len; + struct xs_transaction_handle *xstranshandle; + + snprintf(path, sizeof(path),"/local/domain/%i/name", domain_id); + + xstranshandle = xs_transaction_start(handle->xshandle); + if (xstranshandle == NULL) { + perror("Unable to get transcation handle from xenstore\n"); + exit(1); /* Change this */ + } + + name = (char *) xs_read(handle->xshandle, xstranshandle, path, len); + + xs_transaction_end(handle->xshandle, xstranshandle, false); + + return name; +} diff -r 7eac3edd0589 -r eee0489b3a17 tools/xenstat/libxenstat/src/xenstat.h --- a/tools/xenstat/libxenstat/src/xenstat.h Tue Oct 25 03:00:35 2005 +++ b/tools/xenstat/libxenstat/src/xenstat.h Sat Oct 29 08:51:35 2005 @@ -80,6 +80,9 @@ /* Get the domain ID for this domain */ unsigned xenstat_domain_id(xenstat_domain * domain); +/* Set the domain name for the domain */ +char *xenstat_domain_name(xenstat_domain * domain); + /* Get information about how much CPU time has been used */ unsigned long long xenstat_domain_cpu_ns(xenstat_domain * domain); diff -r 7eac3edd0589 -r eee0489b3a17 tools/xenstat/xentop/xentop.c --- a/tools/xenstat/xentop/xentop.c Tue Oct 25 03:00:35 2005 +++ b/tools/xenstat/xentop/xentop.c Sat Oct 29 08:51:35 2005 @@ -67,8 +67,6 @@ static unsigned long long tot_net_bytes( xenstat_domain *, int); /* Field functions */ -static int compare_domid(xenstat_domain *domain1, xenstat_domain *domain2); -static void print_domid(xenstat_domain *domain); static int compare_state(xenstat_domain *domain1, xenstat_domain *domain2); static void print_state(xenstat_domain *domain); static int compare_cpu(xenstat_domain *domain1, xenstat_domain *domain2); @@ -91,6 +89,8 @@ static void print_net_rx(xenstat_domain *domain); static int compare_ssid(xenstat_domain *domain1, xenstat_domain *domain2); static void print_ssid(xenstat_domain *domain); +static int compare_name(xenstat_domain *domain1, xenstat_domain *domain2); +static void print_name(xenstat_domain *domain); /* Section printing functions */ static void do_summary(void); @@ -104,6 +104,7 @@ /* Field types */ typedef enum field_id { FIELD_DOMID, + FIELD_NAME, FIELD_STATE, FIELD_CPU, FIELD_CPU_PCT, @@ -127,7 +128,7 @@ } field; field fields[] = { - { FIELD_DOMID, "DOMID", 5, compare_domid, print_domid }, + { FIELD_NAME, "NAME", 10, compare_name, print_name }, { FIELD_STATE, "STATE", 6, compare_state, print_state }, { FIELD_CPU, "CPU(sec)", 10, compare_cpu, print_cpu }, { FIELD_CPU_PCT, "CPU(%)", 6, compare_cpu_pct, print_cpu_pct }, @@ -344,16 +345,16 @@ /* Field functions */ -/* Compares domain ids of two domains, returning -1,0,1 for <,=,> */ -int compare_domid(xenstat_domain *domain1, xenstat_domain *domain2) -{ - return compare(xenstat_domain_id(domain1), xenstat_domain_id(domain2)); -} - -/* Prints domain identification number */ -void print_domid(xenstat_domain *domain) -{ - print("%5u", xenstat_domain_id(domain)); +/* Compare domain names, returning -1,0,1 for <,=,> */ +int compare_name(xenstat_domain *domain1, xenstat_domain *domain2) +{ + return strcasecmp(xenstat_domain_name(domain1), xenstat_domain_name(domain2)); +} + +/* Prints domain name */ +void print_name(xenstat_domain *domain) +{ + print("%10s", xenstat_domain_name(domain)); } struct { diff -r 7eac3edd0589 -r eee0489b3a17 tools/xenstore/Makefile --- a/tools/xenstore/Makefile Tue Oct 25 03:00:35 2005 +++ b/tools/xenstore/Makefile Sat Oct 29 08:51:35 2005 @@ -77,7 +77,7 @@ clean: testsuite-clean rm -f *.o *.opic *.so rm -f xenstored xs_random xs_stress xs_crashme - rm -f xs_test xenstored_test + rm -f xs_test xenstored_test xs_tdb_dump $(RM) $(PROG_DEP) print-dir: diff -r 7eac3edd0589 -r eee0489b3a17 tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Tue Oct 25 03:00:35 2005 +++ b/tools/xenstore/xenstored_core.c Sat Oct 29 08:51:35 2005 @@ -188,7 +188,7 @@ tm = localtime(&now); write(tracefd, prefix, strlen(prefix)); - sprintf(string, " %p %0d:%0d:%0d ", conn, tm->tm_hour, tm->tm_min, + sprintf(string, " %p %02d:%02d:%02d ", conn, tm->tm_hour, tm->tm_min, tm->tm_sec); write(tracefd, string, strlen(string)); write(tracefd, sockmsg_string(data->hdr.msg.type), diff -r 7eac3edd0589 -r eee0489b3a17 xen/acm/acm_simple_type_enforcement_hooks.c --- a/xen/acm/acm_simple_type_enforcement_hooks.c Tue Oct 25 03:00:35 2005 +++ b/xen/acm/acm_simple_type_enforcement_hooks.c Sat Oct 29 08:51:35 2005 @@ -392,8 +392,11 @@ int i; printkd("checking cache: %x --> %x.\n", dom->domain_id, rdom); + + if (dom->ssid == NULL) + return 0; ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, - (struct acm_ssid_domain *)(dom)->ssid); + (struct acm_ssid_domain *)(dom->ssid)); for(i=0; i< ACM_TE_CACHE_SIZE; i++) { if ((ste_ssid->ste_cache[i].valid == VALID) && @@ -412,6 +415,8 @@ struct ste_ssid *ste_ssid; int i; printkd("caching from doms: %x --> %x.\n", subj->domain_id, obj->domain_id); + if (subj->ssid == NULL) + return; ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, (struct acm_ssid_domain *)(subj)->ssid); for(i=0; i< ACM_TE_CACHE_SIZE; i++) @@ -431,26 +436,34 @@ struct ste_ssid *ste_ssid; int i; struct domain **pd; + struct acm_ssid_domain *ssid; printkd("deleting cache for dom %x.\n", id); - read_lock(&domlist_lock); /* look through caches of all domains */ pd = &domain_list; for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) { - ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, - (struct acm_ssid_domain *)(*pd)->ssid); + ssid = (struct acm_ssid_domain *)((*pd)->ssid); + + if (ssid == NULL) + continue; /* hanging domain structure, no ssid any more ... */ + ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, ssid); + if (!ste_ssid) { + printk("%s: deleting ID from cache ERROR (no ste_ssid)!\n", + __func__); + goto out; + } for (i=0; i<ACM_TE_CACHE_SIZE; i++) if ((ste_ssid->ste_cache[i].valid == VALID) && - (ste_ssid->ste_cache[i].id = id)) + (ste_ssid->ste_cache[i].id == id)) ste_ssid->ste_cache[i].valid = FREE; } + out: read_unlock(&domlist_lock); } /*************************** * Authorization functions **************************/ - static int ste_pre_domain_create(void *subject_ssid, ssidref_t ssidref) { @@ -484,19 +497,29 @@ /* -------- EVENTCHANNEL OPERATIONS -----------*/ static int -ste_pre_eventchannel_unbound(domid_t id) { +ste_pre_eventchannel_unbound(domid_t id1, domid_t id2) { struct domain *subj, *obj; int ret; - traceprintk("%s: dom%x-->dom%x.\n", - __func__, current->domain->domain_id, id); - - if (check_cache(current->domain, id)) { + traceprintk("%s: dom%x-->dom%x.\n", __func__, + (id1 == DOMID_SELF) ? current->domain->domain_id : id1, + (id2 == DOMID_SELF) ? current->domain->domain_id : id2); + + if (id1 == DOMID_SELF) id1 = current->domain->domain_id; + if (id2 == DOMID_SELF) id2 = current->domain->domain_id; + + subj = find_domain_by_id(id1); + obj = find_domain_by_id(id2); + if ((subj == NULL) || (obj == NULL)) { + ret = ACM_ACCESS_DENIED; + goto out; + } + /* cache check late */ + if (check_cache(subj, obj->domain_id)) { atomic_inc(&ste_bin_pol.ec_cachehit_count); - return ACM_ACCESS_PERMITTED; + ret = ACM_ACCESS_PERMITTED; + goto out; } atomic_inc(&ste_bin_pol.ec_eval_count); - subj = current->domain; - obj = find_domain_by_id(id); if (share_common_type(subj, obj)) { cache_result(subj, obj); @@ -505,38 +528,43 @@ atomic_inc(&ste_bin_pol.ec_denied_count); ret = ACM_ACCESS_DENIED; } + out: if (obj != NULL) put_domain(obj); + if (subj != NULL) + put_domain(subj); return ret; } static int -ste_pre_eventchannel_interdomain(domid_t id1, domid_t id2) -{ - struct domain *subj, *obj; +ste_pre_eventchannel_interdomain(domid_t id) +{ + struct domain *subj=NULL, *obj=NULL; int ret; + traceprintk("%s: dom%x-->dom%x.\n", __func__, - (id1 == DOMID_SELF) ? current->domain->domain_id : id1, - (id2 == DOMID_SELF) ? current->domain->domain_id : id2); + current->domain->domain_id, + (id == DOMID_SELF) ? current->domain->domain_id : id); /* following is a bit longer but ensures that we * "put" only domains that we where "find"-ing */ - if (id1 == DOMID_SELF) id1 = current->domain->domain_id; - if (id2 == DOMID_SELF) id2 = current->domain->domain_id; - - subj = find_domain_by_id(id1); - obj = find_domain_by_id(id2); - if ((subj == NULL) || (obj == NULL)) { + if (id == DOMID_SELF) id = current->domain->domain_id; + + subj = current->domain; + obj = find_domain_by_id(id); + if (obj == NULL) { ret = ACM_ACCESS_DENIED; goto out; } + /* cache check late, but evtchn is not on performance critical path */ if (check_cache(subj, obj->domain_id)) { atomic_inc(&ste_bin_pol.ec_cachehit_count); ret = ACM_ACCESS_PERMITTED; goto out; } + atomic_inc(&ste_bin_pol.ec_eval_count); if (share_common_type(subj, obj)) { @@ -549,8 +577,6 @@ out: if (obj != NULL) put_domain(obj); - if (subj != NULL) - put_domain(subj); return ret; } diff -r 7eac3edd0589 -r eee0489b3a17 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Tue Oct 25 03:00:35 2005 +++ b/xen/arch/ia64/xen/domain.c Sat Oct 29 08:51:35 2005 @@ -29,6 +29,7 @@ #include <xen/event.h> //#include <xen/shadow.h> #include <xen/console.h> +#include <xen/compile.h> #include <xen/elf.h> //#include <asm/page.h> @@ -948,6 +949,7 @@ si = (start_info_t *)alloc_xenheap_page(); memset(si, 0, PAGE_SIZE); d->shared_info->arch.start_info_pfn = __pa(si) >> PAGE_SHIFT; + sprintf(si->magic, "Xen-%i.%i", XEN_VERSION, XEN_SUBVERSION); #if 0 si->nr_pages = d->tot_pages; diff -r 7eac3edd0589 -r eee0489b3a17 xen/arch/x86/apic.c --- a/xen/arch/x86/apic.c Tue Oct 25 03:00:35 2005 +++ b/xen/arch/x86/apic.c Sat Oct 29 08:51:35 2005 @@ -815,6 +815,10 @@ return result; } +unsigned int get_apic_bus_scale(void) +{ + return bus_scale; +} static unsigned int calibration_result; diff -r 7eac3edd0589 -r eee0489b3a17 xen/arch/x86/dm/i8259.c --- a/xen/arch/x86/dm/i8259.c Tue Oct 25 03:00:35 2005 +++ b/xen/arch/x86/dm/i8259.c Sat Oct 29 08:51:35 2005 @@ -32,8 +32,8 @@ #include <public/io/ioreq.h> #include <asm/vmx.h> #include <public/io/vmx_vpic.h> -#include <public/io/vmx_vlapic.h> #include <asm/current.h> +#include <asm/vmx_vlapic.h> /* set irq level. If an edge is detected, then the IRR is set to 1 */ static inline void pic_set_irq1(PicState *s, int irq, int level) @@ -135,7 +135,6 @@ { s->pics[1].irr |= (uint8_t)(irqs >> 8); s->pics[0].irr |= (uint8_t) irqs; - /* TODO for alt_irq_func */ pic_update_irq(s); } @@ -505,14 +504,22 @@ { int intno; struct vmx_virpic *s = &v->domain->arch.vmx_platform.vmx_pic; - + struct vmx_platform *plat = &v->domain->arch.vmx_platform; + + if ( !vlapic_accept_pic_intr(v) ) + return -1; + + if ( !plat->interrupt_request ) + return -1; + /* read the irq from the PIC */ intno = pic_read_irq(s); *type = VLAPIC_DELIV_MODE_EXT; + plat->interrupt_request = 0; return intno; } -int is_pit_irq(struct vcpu *v, int irq) +int is_pit_irq(struct vcpu *v, int irq, int type) { int pit_vec = v->domain->arch.vmx_platform.vmx_pic.pics[0].irq_base; diff -r 7eac3edd0589 -r eee0489b3a17 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Tue Oct 25 03:00:35 2005 +++ b/xen/arch/x86/domain_build.c Sat Oct 29 08:51:35 2005 @@ -15,6 +15,7 @@ #include <xen/elf.h> #include <xen/kernel.h> #include <xen/domain.h> +#include <xen/compile.h> #include <asm/regs.h> #include <asm/system.h> #include <asm/io.h> @@ -582,26 +583,23 @@ _initrd_start, (_initrd_start+initrd_len+PAGE_SIZE-1) & PAGE_MASK); } - d->next_io_page = max_page; - /* Set up start info area. */ si = (start_info_t *)vstartinfo_start; memset(si, 0, PAGE_SIZE); si->nr_pages = nr_pages; + si->shared_info = virt_to_phys(d->shared_info); if ( opt_dom0_translate ) { - si->shared_info = d->next_io_page << PAGE_SHIFT; - set_pfn_from_mfn(virt_to_phys(d->shared_info) >> PAGE_SHIFT, d->next_io_page); - d->next_io_page++; - } - else - si->shared_info = virt_to_phys(d->shared_info); + si->shared_info = max_page << PAGE_SHIFT; + set_pfn_from_mfn(virt_to_phys(d->shared_info) >> PAGE_SHIFT, max_page); + } si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; si->pt_base = vpt_start; si->nr_pt_frames = nr_pt_pages; si->mfn_list = vphysmap_start; + sprintf(si->magic, "Xen-%i.%i", XEN_VERSION, XEN_SUBVERSION); /* Write the phys->machine and machine->phys table entries. */ for ( pfn = 0; pfn < d->tot_pages; pfn++ ) diff -r 7eac3edd0589 -r eee0489b3a17 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Tue Oct 25 03:00:35 2005 +++ b/xen/arch/x86/mm.c Sat Oct 29 08:51:35 2005 @@ -1164,6 +1164,7 @@ { l3_pgentry_t ol3e; unsigned long vaddr; + int okay; if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) { @@ -1218,7 +1219,9 @@ return 0; } - BUG_ON(!create_pae_xen_mappings(pl3e)); + okay = create_pae_xen_mappings(pl3e); + BUG_ON(!okay); + put_page_from_l3e(ol3e, pfn); return 1; } diff -r 7eac3edd0589 -r eee0489b3a17 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Tue Oct 25 03:00:35 2005 +++ b/xen/arch/x86/time.c Sat Oct 29 08:51:35 2005 @@ -323,7 +323,7 @@ return pit_counter64 + (u16)(pit_stamp - pit_read_counter()); } -static int init_pit(void) +static void init_pit(void) { read_platform_count = read_pit_count; @@ -333,8 +333,6 @@ printk("Platform timer is %s PIT\n", freq_string(CLOCK_TICK_RATE)); using_pit = 1; - - return 1; } /************************************************************ @@ -563,7 +561,7 @@ static void init_platform_timer(void) { if ( !init_cyclone() && !init_hpet() ) - BUG_ON(!init_pit()); + init_pit(); } diff -r 7eac3edd0589 -r eee0489b3a17 xen/arch/x86/vmx.c --- a/xen/arch/x86/vmx.c Tue Oct 25 03:00:35 2005 +++ b/xen/arch/x86/vmx.c Sat Oct 29 08:51:35 2005 @@ -65,6 +65,11 @@ if ( v == v->domain->vcpu[0] ) { + v->domain->arch.vmx_platform.lapic_enable = + v->arch.guest_context.user_regs.ecx; + v->arch.guest_context.user_regs.ecx = 0; + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "lapic enable is %d.\n", + v->domain->arch.vmx_platform.lapic_enable); /* * Required to do this once per domain * XXX todo: add a seperate function to do these. @@ -96,6 +101,10 @@ destroy_vmcs(&v->arch.arch_vmx); free_monitor_pagetable(v); rem_ac_timer(&v->domain->arch.vmx_platform.vmx_pit.pit_timer); + if ( vmx_apic_support(v->domain) ) { + rem_ac_timer( &(VLAPIC(v)->vlapic_timer) ); + xfree( VLAPIC(v) ); + } } #ifdef __x86_64__ @@ -442,7 +451,9 @@ /* Use 1:1 page table to identify MMIO address space */ if ( mmio_space(gpa) ){ - if (gpa >= 0xFEE00000) { /* workaround for local APIC */ + struct vcpu *v = current; + /* No support for APIC */ + if (!vmx_apic_support(v->domain) && gpa >= 0xFEC00000) { u32 inst_len; __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len)); __update_guest_eip(inst_len); @@ -487,6 +498,7 @@ { unsigned int eax, ebx, ecx, edx; unsigned long eip; + struct vcpu *v = current; __vmread(GUEST_RIP, &eip); @@ -500,6 +512,9 @@ cpuid(input, &eax, &ebx, &ecx, &edx); if (input == 1) { + if ( vmx_apic_support(v->domain) && + !vlapic_global_enabled((VLAPIC(v))) ) + clear_bit(X86_FEATURE_APIC, &edx); #ifdef __i386__ clear_bit(X86_FEATURE_PSE, &edx); clear_bit(X86_FEATURE_PAE, &edx); @@ -1441,6 +1456,7 @@ static inline void vmx_do_msr_read(struct cpu_user_regs *regs) { u64 msr_content = 0; + struct vcpu *v = current; VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx", (unsigned long)regs->ecx, (unsigned long)regs->eax, @@ -1455,6 +1471,9 @@ case MSR_IA32_SYSENTER_EIP: __vmread(GUEST_SYSENTER_EIP, &msr_content); break; + case MSR_IA32_APICBASE: + msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0; + break; default: if(long_mode_do_msr_read(regs)) return; @@ -1474,6 +1493,7 @@ static inline void vmx_do_msr_write(struct cpu_user_regs *regs) { u64 msr_content; + struct vcpu *v = current; VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write: ecx=%lx, eax=%lx, edx=%lx", (unsigned long)regs->ecx, (unsigned long)regs->eax, @@ -1490,6 +1510,9 @@ break; case MSR_IA32_SYSENTER_EIP: __vmwrite(GUEST_SYSENTER_EIP, msr_content); + break; + case MSR_IA32_APICBASE: + vlapic_msr_set(VLAPIC(v), msr_content); break; default: long_mode_do_msr_write(regs); diff -r 7eac3edd0589 -r eee0489b3a17 xen/arch/x86/vmx_intercept.c --- a/xen/arch/x86/vmx_intercept.c Tue Oct 25 03:00:35 2005 +++ b/xen/arch/x86/vmx_intercept.c Sat Oct 29 08:51:35 2005 @@ -23,6 +23,7 @@ #include <asm/vmx_platform.h> #include <asm/vmx_virpit.h> #include <asm/vmx_intercept.h> +#include <asm/vmx_vlapic.h> #include <public/io/ioreq.h> #include <xen/lib.h> #include <xen/sched.h> @@ -31,6 +32,123 @@ #include <xen/event.h> #ifdef CONFIG_VMX + +struct vmx_mmio_handler vmx_mmio_handers[VMX_MMIO_HANDLER_NR] = +{ + { + .check_handler = vlapic_range, + .read_handler = vlapic_read, + .write_handler = vlapic_write + } +}; + +static inline void vmx_mmio_access(struct vcpu *v, + ioreq_t *p, + vmx_mmio_read_t read_handler, + vmx_mmio_write_t write_handler) +{ + ioreq_t *req; + vcpu_iodata_t *vio = get_vio(v->domain, v->vcpu_id); + unsigned int tmp1, tmp2; + unsigned long data; + + if (vio == NULL) { + printk("vlapic_access: bad shared page\n"); + domain_crash_synchronous(); + } + + req = &vio->vp_ioreq; + + switch (req->type) { + case IOREQ_TYPE_COPY: + { + int sign = (req->df) ? -1 : 1, i; + + if (!req->pdata_valid) { + if (req->dir == IOREQ_READ){ + req->u.data = read_handler(v, req->addr, req->size); + } else { /* req->dir != IOREQ_READ */ + write_handler(v, req->addr, req->size, req->u.data); + } + } else { /* !req->pdata_valid */ + if (req->dir == IOREQ_READ) { + for (i = 0; i < req->count; i++) { + data = read_handler(v, + req->addr + (sign * i * req->size), + req->size); + vmx_copy(&data, + (unsigned long)p->u.pdata + (sign * i * req->size), + p->size, + VMX_COPY_OUT); + } + } else { /* !req->dir == IOREQ_READ */ + for (i = 0; i < req->count; i++) { + vmx_copy(&data, + (unsigned long)p->u.pdata + (sign * i * req->size), + p->size, + VMX_COPY_IN); + write_handler(v, + req->addr + (sign * i * req->size), + req->size, data); + } + } + } + break; + } + + case IOREQ_TYPE_AND: + tmp1 = read_handler(v, req->addr, req->size); + if (req->dir == IOREQ_WRITE) { + tmp2 = tmp1 & (unsigned long) req->u.data; + write_handler(v, req->addr, req->size, tmp2); + } + req->u.data = tmp1; + break; + + case IOREQ_TYPE_OR: + tmp1 = read_handler(v, req->addr, req->size); + if (req->dir == IOREQ_WRITE) { + tmp2 = tmp1 | (unsigned long) req->u.data; + write_handler(v, req->addr, req->size, tmp2); + } + req->u.data = tmp1; + break; + + case IOREQ_TYPE_XOR: + tmp1 = read_handler(v, req->addr, req->size); + if (req->dir == IOREQ_WRITE) { + tmp2 = tmp1 ^ (unsigned long) req->u.data; + write_handler(v, req->addr, req->size, tmp2); + } + req->u.data = tmp1; + break; + + default: + printk("error ioreq type for local APIC %x\n", req->type); + domain_crash_synchronous(); + break; + } +} + +int vmx_mmio_intercept(ioreq_t *p) +{ + struct vcpu *v = current; + int i; + struct vmx_mmio_handler *handler = vmx_mmio_handers; + + /* XXX currently only APIC use intercept */ + if ( !vmx_apic_support(v->domain) ) + return 0; + + for ( i = 0; i < VMX_MMIO_HANDLER_NR; i++ ) { + if ( handler[i].check_handler(v, p->addr) ) { + vmx_mmio_access(v, p, + handler[i].read_handler, handler[i].write_handler); + return 1; + } + } + return 0; +} /* * Check if the request is handled inside xen diff -r 7eac3edd0589 -r eee0489b3a17 xen/arch/x86/vmx_io.c --- a/xen/arch/x86/vmx_io.c Tue Oct 25 03:00:35 2005 +++ b/xen/arch/x86/vmx_io.c Sat Oct 29 08:51:35 2005 @@ -36,9 +36,9 @@ #include <asm/apic.h> #include <asm/shadow.h> +#include <asm/vmx_vlapic.h> #include <public/io/ioreq.h> #include <public/io/vmx_vpic.h> -#include <public/io/vmx_vlapic.h> #ifdef CONFIG_VMX #if defined (__i386__) @@ -732,48 +732,6 @@ } while(1); } -#if defined(__i386__) || defined(__x86_64__) -static inline int __fls(u32 word) -{ - int bit; - - __asm__("bsrl %1,%0" - :"=r" (bit) - :"rm" (word)); - return word ? bit : -1; -} -#else -#define __fls(x) generic_fls(x) -static __inline__ int generic_fls(u32 x) -{ - int r = 31; - - if (!x) - return -1; - if (!(x & 0xffff0000u)) { - x <<= 16; - r -= 16; - } - if (!(x & 0xff000000u)) { - x <<= 8; - r -= 8; - } - if (!(x & 0xf0000000u)) { - x <<= 4; - r -= 4; - } - if (!(x & 0xc0000000u)) { - x <<= 2; - r -= 2; - } - if (!(x & 0x80000000u)) { - x <<= 1; - r -= 1; - } - return r; -} -#endif - /* Simple minded Local APIC priority implementation. Fix later */ static __inline__ int find_highest_irq(u32 *pintr) { @@ -801,31 +759,31 @@ struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit); u64 drift; + if ( is_pit_irq(v, vector, type) ) { + if ( !vpit->first_injected ) { + vpit->first_injected = 1; + vpit->pending_intr_nr = 0; + } else { + vpit->pending_intr_nr--; + } + vpit->inject_point = NOW(); + drift = vpit->period_cycles * vpit->pending_intr_nr; + drift = v->arch.arch_vmx.tsc_offset - drift; + __vmwrite(TSC_OFFSET, drift); + +#if defined (__i386__) + __vmwrite(TSC_OFFSET_HIGH, (drift >> 32)); +#endif + + } + switch(type) { case VLAPIC_DELIV_MODE_EXT: - if ( is_pit_irq(v, vector) ) { - if ( !vpit->first_injected ) { - vpit->first_injected = 1; - vpit->pending_intr_nr = 0; - } - else { - vpit->pending_intr_nr--; - } - vpit->inject_point = NOW(); - drift = vpit->period_cycles * vpit->pending_intr_nr; - drift = v->arch.arch_vmx.tsc_offset - drift; - __vmwrite(TSC_OFFSET, drift); - -#if defined (__i386__) - __vmwrite(TSC_OFFSET_HIGH, (drift >> 32)); -#endif - - } break; default: - printk("Not support interrupt type\n"); + vlapic_post_injection(v, vector, type); break; } } @@ -885,6 +843,24 @@ } +int cpu_get_interrupt(struct vcpu *v, int *type) +{ + int intno; + struct vmx_virpic *s = &v->domain->arch.vmx_platform.vmx_pic; + + if ( (intno = cpu_get_apic_interrupt(v, type)) != -1 ) { + /* set irq request if a PIC irq is still pending */ + /* XXX: improve that */ + pic_update_irq(s); + return intno; + } + /* read the irq from the PIC */ + if ( (intno = cpu_get_pic_interrupt(v, type)) != -1 ) + return intno; + + return -1; +} + asmlinkage void vmx_intr_assist(void) { int intr_type = 0; @@ -902,11 +878,6 @@ pic_set_irq(pic, 0, 1); } - if ( !plat->interrupt_request ) { - disable_irq_window(cpu_exec_control); - return; - } - __vmread(VM_ENTRY_INTR_INFO_FIELD, &intr_fields); if (intr_fields & INTR_INFO_VALID_MASK) { @@ -928,16 +899,21 @@ enable_irq_window(cpu_exec_control); return; } - plat->interrupt_request = 0; - highest_vector = cpu_get_pic_interrupt(v, &intr_type); + + highest_vector = cpu_get_interrupt(v, &intr_type); + + if (highest_vector == -1) { + disable_irq_window(cpu_exec_control); + return; + } switch (intr_type) { case VLAPIC_DELIV_MODE_EXT: + case VLAPIC_DELIV_MODE_FIXED: + case VLAPIC_DELIV_MODE_LPRI: vmx_inject_extint(v, highest_vector, VMX_INVALID_ERROR_CODE); TRACE_3D(TRC_VMX_INT, v->domain->domain_id, highest_vector, 0); break; - case VLAPIC_DELIV_MODE_FIXED: - case VLAPIC_DELIV_MODE_LPRI: case VLAPIC_DELIV_MODE_SMI: case VLAPIC_DELIV_MODE_NMI: case VLAPIC_DELIV_MODE_INIT: diff -r 7eac3edd0589 -r eee0489b3a17 xen/arch/x86/vmx_vmcs.c --- a/xen/arch/x86/vmx_vmcs.c Tue Oct 25 03:00:35 2005 +++ b/xen/arch/x86/vmx_vmcs.c Sat Oct 29 08:51:35 2005 @@ -252,6 +252,10 @@ pic_init(&platform->vmx_pic, pic_irq_request, &platform->interrupt_request); register_pic_io_hook(); + + if ( vmx_apic_support(d) ) { + spin_lock_init(&d->arch.vmx_platform.round_robin_lock); + } } static void vmx_set_host_env(struct vcpu *v) @@ -312,6 +316,9 @@ error |= __vmwrite(CR4_READ_SHADOW, cr4); vmx_stts(); + + if(vmx_apic_support(v->domain)) + vlapic_init(v); vmx_set_host_env(v); diff -r 7eac3edd0589 -r eee0489b3a17 xen/common/acm_ops.c --- a/xen/common/acm_ops.c Tue Oct 25 03:00:35 2005 +++ b/xen/common/acm_ops.c Sat Oct 29 08:51:35 2005 @@ -133,7 +133,10 @@ struct domain *subj = find_domain_by_id(op->u.getssid.id.domainid); if (!subj) return -ESRCH; /* domain not found */ - + if (subj->ssid == NULL) { + put_domain(subj); + return -ESRCH; + } ssidref = ((struct acm_ssid_domain *)(subj->ssid))->ssidref; put_domain(subj); } else @@ -167,6 +170,10 @@ ret = -ESRCH; /* domain not found */ goto out; } + if (subj->ssid == NULL) { + put_domain(subj); + ret = -ESRCH; + } ssidref1 = ((struct acm_ssid_domain *)(subj->ssid))->ssidref; put_domain(subj); } else { @@ -181,6 +188,10 @@ if (!subj) { ret = -ESRCH; /* domain not found */ goto out; + } + if (subj->ssid == NULL) { + put_domain(subj); + return -ESRCH; } ssidref2 = ((struct acm_ssid_domain *)(subj->ssid))->ssidref; put_domain(subj); diff -r 7eac3edd0589 -r eee0489b3a17 xen/common/schedule.c --- a/xen/common/schedule.c Tue Oct 25 03:00:35 2005 +++ b/xen/common/schedule.c Sat Oct 29 08:51:35 2005 @@ -514,7 +514,7 @@ /* Initialise the data structures. */ void __init scheduler_init(void) { - int i; + int i, rc; open_softirq(SCHEDULE_SOFTIRQ, __enter_scheduler); @@ -540,7 +540,9 @@ printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name); - BUG_ON(SCHED_OP(alloc_task, idle_task[0]) < 0); + rc = SCHED_OP(alloc_task, idle_task[0]); + BUG_ON(rc < 0); + sched_add_domain(idle_task[0]); } diff -r 7eac3edd0589 -r eee0489b3a17 xen/include/acm/acm_hooks.h --- a/xen/include/acm/acm_hooks.h Tue Oct 25 03:00:35 2005 +++ b/xen/include/acm/acm_hooks.h Sat Oct 29 08:51:35 2005 @@ -100,10 +100,10 @@ void (*fail_domain_create) (void *subject_ssid, ssidref_t ssidref); void (*post_domain_destroy) (void *object_ssid, domid_t id); /* event channel control hooks (can be NULL) */ - int (*pre_eventchannel_unbound) (domid_t id); - void (*fail_eventchannel_unbound) (domid_t id); - int (*pre_eventchannel_interdomain) (domid_t id1, domid_t id2); - int (*fail_eventchannel_interdomain) (domid_t id1, domid_t id2); + int (*pre_eventchannel_unbound) (domid_t id1, domid_t id2); + void (*fail_eventchannel_unbound) (domid_t id1, domid_t id2); + int (*pre_eventchannel_interdomain) (domid_t id); + void (*fail_eventchannel_interdomain) (domid_t id); /* grant table control hooks (can be NULL) */ int (*pre_grant_map_ref) (domid_t id); void (*fail_grant_map_ref) (domid_t id); @@ -193,31 +193,31 @@ return; } -static inline int acm_pre_eventchannel_unbound(domid_t id) +static inline int acm_pre_eventchannel_unbound(domid_t id1, domid_t id2) { if ((acm_primary_ops->pre_eventchannel_unbound != NULL) && - acm_primary_ops->pre_eventchannel_unbound(id)) + acm_primary_ops->pre_eventchannel_unbound(id1, id2)) return ACM_ACCESS_DENIED; else if ((acm_secondary_ops->pre_eventchannel_unbound != NULL) && - acm_secondary_ops->pre_eventchannel_unbound(id)) { + acm_secondary_ops->pre_eventchannel_unbound(id1, id2)) { /* roll-back primary */ if (acm_primary_ops->fail_eventchannel_unbound != NULL) - acm_primary_ops->fail_eventchannel_unbound(id); + acm_primary_ops->fail_eventchannel_unbound(id1, id2); return ACM_ACCESS_DENIED; } else return ACM_ACCESS_PERMITTED; } -static inline int acm_pre_eventchannel_interdomain(domid_t id1, domid_t id2) +static inline int acm_pre_eventchannel_interdomain(domid_t id) { if ((acm_primary_ops->pre_eventchannel_interdomain != NULL) && - acm_primary_ops->pre_eventchannel_interdomain(id1, id2)) + acm_primary_ops->pre_eventchannel_interdomain(id)) return ACM_ACCESS_DENIED; else if ((acm_secondary_ops->pre_eventchannel_interdomain != NULL) && - acm_secondary_ops->pre_eventchannel_interdomain(id1, id2)) { + acm_secondary_ops->pre_eventchannel_interdomain(id)) { /* roll-back primary */ if (acm_primary_ops->fail_eventchannel_interdomain != NULL) - acm_primary_ops->fail_eventchannel_interdomain(id1, id2); + acm_primary_ops->fail_eventchannel_interdomain(id); return ACM_ACCESS_DENIED; } else return ACM_ACCESS_PERMITTED; @@ -234,10 +234,22 @@ current->domain->ssid, op->u.createdomain.ssidref); break; case DOM0_DESTROYDOMAIN: + if (*ssid != NULL) { + printkd("%s: Warning. Overlapping destruction.\n", + __func__); + return -EACCES; + } d = find_domain_by_id(op->u.destroydomain.domain); if (d != NULL) { *ssid = d->ssid; /* save for post destroy when d is gone */ - /* no policy-specific hook */ + if (*ssid == NULL) { + printk("%s: Warning. Destroying domain without ssid pointer.\n", + __func__); + put_domain(d); + return -EACCES; + } + d->ssid = NULL; /* make sure it's not used any more */ + /* no policy-specific hook */ put_domain(d); ret = 0; } @@ -248,7 +260,7 @@ return ret; } -static inline void acm_post_dom0_op(dom0_op_t *op, void *ssid) +static inline void acm_post_dom0_op(dom0_op_t *op, void **ssid) { switch(op->cmd) { case DOM0_CREATEDOMAIN: @@ -261,7 +273,8 @@ case DOM0_DESTROYDOMAIN: acm_post_domain_destroy(ssid, op->u.destroydomain.domain); /* free security ssid for the destroyed domain (also if null policy */ - acm_free_domain_ssid((struct acm_ssid_domain *)ssid); + acm_free_domain_ssid((struct acm_ssid_domain *)(*ssid)); + *ssid = NULL; break; } } @@ -282,12 +295,13 @@ switch(op->cmd) { case EVTCHNOP_alloc_unbound: - ret = acm_pre_eventchannel_unbound(op->u.alloc_unbound.dom); + ret = acm_pre_eventchannel_unbound( + op->u.alloc_unbound.dom, + op->u.alloc_unbound.remote_dom); break; case EVTCHNOP_bind_interdomain: ret = acm_pre_eventchannel_interdomain( - current->domain->domain_id, - op->u.bind_interdomain.remote_dom); + op->u.bind_interdomain.remote_dom); break; default: ret = 0; /* ok */ diff -r 7eac3edd0589 -r eee0489b3a17 xen/include/asm-x86/vmx_intercept.h --- a/xen/include/asm-x86/vmx_intercept.h Tue Oct 25 03:00:35 2005 +++ b/xen/include/asm-x86/vmx_intercept.h Sat Oct 29 08:51:35 2005 @@ -14,6 +14,16 @@ #define VMX_MMIO 1 typedef int (*intercept_action_t)(ioreq_t *); +typedef unsigned long (*vmx_mmio_read_t)(struct vcpu *v, + unsigned long addr, + unsigned long length); + +typedef unsigned long (*vmx_mmio_write_t)(struct vcpu *v, + unsigned long addr, + unsigned long length, + unsigned long val); + +typedef int (*vmx_mmio_check_t)(struct vcpu *v, unsigned long addr); struct io_handler { int type; @@ -27,6 +37,16 @@ struct io_handler hdl_list[MAX_IO_HANDLER]; }; +struct vmx_mmio_handler { + vmx_mmio_check_t check_handler; + vmx_mmio_read_t read_handler; + vmx_mmio_write_t write_handler; +}; + +#define VMX_MMIO_HANDLER_NR 1 + +extern struct vmx_mmio_handler vmx_mmio_handers[VMX_MMIO_HANDLER_NR]; + /* global io interception point in HV */ extern int vmx_io_intercept(ioreq_t *p, int type); extern int register_io_handler(unsigned long addr, unsigned long size, @@ -37,10 +57,7 @@ return vmx_io_intercept(p, VMX_PORTIO); } -static inline int vmx_mmio_intercept(ioreq_t *p) -{ - return vmx_io_intercept(p, VMX_MMIO); -} +int vmx_mmio_intercept(ioreq_t *p); static inline int register_portio_handler(unsigned long addr, unsigned long size, @@ -49,11 +66,4 @@ return register_io_handler(addr, size, action, VMX_PORTIO); } -static inline int register_mmio_handler(unsigned long addr, - unsigned long size, - intercept_action_t action) -{ - return register_io_handler(addr, size, action, VMX_MMIO); -} - #endif /* _VMX_INTERCEPT_H */ diff -r 7eac3edd0589 -r eee0489b3a17 xen/include/asm-x86/vmx_platform.h --- a/xen/include/asm-x86/vmx_platform.h Tue Oct 25 03:00:35 2005 +++ b/xen/include/asm-x86/vmx_platform.h Sat Oct 29 08:51:35 2005 @@ -80,10 +80,13 @@ struct vmx_platform { unsigned long shared_page_va; unsigned int nr_vcpu; + unsigned int lapic_enable; struct vmx_virpit vmx_pit; struct vmx_io_handler vmx_io_handler; struct vmx_virpic vmx_pic; + unsigned char round_info[256]; + spinlock_t round_robin_lock; int interrupt_request; }; diff -r 7eac3edd0589 -r eee0489b3a17 xen/include/asm-x86/vmx_vmcs.h --- a/xen/include/asm-x86/vmx_vmcs.h Tue Oct 25 03:00:35 2005 +++ b/xen/include/asm-x86/vmx_vmcs.h Sat Oct 29 08:51:35 2005 @@ -22,6 +22,7 @@ #include <asm/config.h> #include <asm/vmx_cpu.h> #include <asm/vmx_platform.h> +#include <asm/vmx_vlapic.h> #include <public/vmx_assist.h> extern int start_vmx(void); @@ -96,6 +97,7 @@ struct msr_state msr_content; struct mmio_op mmio_op; /* MMIO */ void *io_bitmap_a, *io_bitmap_b; + struct vlapic *vlapic; u64 tsc_offset; }; @@ -272,18 +274,21 @@ #define VMX_DEBUG 1 #if VMX_DEBUG -#define DBG_LEVEL_0 (1 << 0) -#define DBG_LEVEL_1 (1 << 1) -#define DBG_LEVEL_2 (1 << 2) -#define DBG_LEVEL_3 (1 << 3) -#define DBG_LEVEL_IO (1 << 4) -#define DBG_LEVEL_VMMU (1 << 5) +#define DBG_LEVEL_0 (1 << 0) +#define DBG_LEVEL_1 (1 << 1) +#define DBG_LEVEL_2 (1 << 2) +#define DBG_LEVEL_3 (1 << 3) +#define DBG_LEVEL_IO (1 << 4) +#define DBG_LEVEL_VMMU (1 << 5) +#define DBG_LEVEL_VLAPIC (1 << 6) +#define DBG_LEVEL_VLAPIC_TIMER (1 << 7) +#define DBG_LEVEL_VLAPIC_INTERRUPT (1 << 7) extern unsigned int opt_vmx_debug_level; #define VMX_DBG_LOG(level, _f, _a...) \ if ((level) & opt_vmx_debug_level) \ printk("[VMX:%d.%d] " _f "\n", \ - current->domain->domain_id, current->vcpu_id, ## _a) + current->domain->domain_id, current->vcpu_id, ## _a) #else #define VMX_DBG_LOG(level, _f, _a...) #endif diff -r 7eac3edd0589 -r eee0489b3a17 xen/include/public/io/ioreq.h --- a/xen/include/public/io/ioreq.h Tue Oct 25 03:00:35 2005 +++ b/xen/include/public/io/ioreq.h Sat Oct 29 08:51:35 2005 @@ -29,11 +29,11 @@ #define STATE_IORESP_READY 3 #define STATE_IORESP_HOOK 4 -#define IOREQ_TYPE_PIO 0 /* pio */ -#define IOREQ_TYPE_COPY 1 /* mmio ops */ -#define IOREQ_TYPE_AND 2 -#define IOREQ_TYPE_OR 3 -#define IOREQ_TYPE_XOR 4 +#define IOREQ_TYPE_PIO 0 /* pio */ +#define IOREQ_TYPE_COPY 1 /* mmio ops */ +#define IOREQ_TYPE_AND 2 +#define IOREQ_TYPE_OR 3 +#define IOREQ_TYPE_XOR 4 /* * VMExit dispatcher should cooperate with instruction decoder to @@ -55,9 +55,10 @@ uint8_t type; /* I/O type */ } ioreq_t; -#define MAX_VECTOR 256 +#define MAX_VECTOR 256 #define BITS_PER_BYTE 8 #define INTR_LEN (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint64_t))) +#define INTR_LEN_32 (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint32_t))) typedef struct { uint16_t pic_elcr; diff -r 7eac3edd0589 -r eee0489b3a17 xen/include/public/io/vmx_vpic.h --- a/xen/include/public/io/vmx_vpic.h Tue Oct 25 03:00:35 2005 +++ b/xen/include/public/io/vmx_vpic.h Sat Oct 29 08:51:35 2005 @@ -76,7 +76,7 @@ uint32_t pic_intack_read(struct vmx_virpic *s); void register_pic_io_hook (void); int cpu_get_pic_interrupt(struct vcpu *v, int *type); -int is_pit_irq(struct vcpu *v, int irq); +int is_pit_irq(struct vcpu *v, int irq, int type); void do_pic_irqs (struct vmx_virpic *s, uint16_t irqs); void do_pic_irqs_clear (struct vmx_virpic *s, uint16_t irqs); diff -r 7eac3edd0589 -r eee0489b3a17 xen/include/public/xen.h --- a/xen/include/public/xen.h Tue Oct 25 03:00:35 2005 +++ b/xen/include/public/xen.h Sat Oct 29 08:51:35 2005 @@ -410,6 +410,7 @@ #define MAX_GUEST_CMDLINE 1024 typedef struct start_info { /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ + char magic[32]; /* "Xen-<version>.<subversion>". */ unsigned long nr_pages; /* Total pages allocated to this domain. */ unsigned long shared_info; /* MACHINE address of shared info struct. */ uint32_t flags; /* SIF_xxx flags. */ diff -r 7eac3edd0589 -r eee0489b3a17 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Tue Oct 25 03:00:35 2005 +++ b/xen/include/xen/sched.h Sat Oct 29 08:51:35 2005 @@ -101,7 +101,6 @@ struct list_head xenpage_list; /* linked list, of size xenheap_pages */ unsigned int tot_pages; /* number of pages currently possesed */ unsigned int max_pages; /* maximum value for tot_pages */ - unsigned int next_io_page; /* next io pfn to give to domain */ unsigned int xenheap_pages; /* # pages allocated from Xen heap */ /* Scheduling. */ diff -r 7eac3edd0589 -r eee0489b3a17 tools/examples/block-common.sh --- /dev/null Tue Oct 25 03:00:35 2005 +++ b/tools/examples/block-common.sh Sat Oct 29 08:51:35 2005 @@ -0,0 +1,51 @@ +# +# Copyright (c) 2005 XenSource Ltd. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + + +dir=$(dirname "$0") +. "$dir/xen-hotplug-common.sh" + +command="$1" + +if [ "$command" != "bind" ] && [ "$command" != "unbind" ] +then + log err "Invalid command: $command" + exit 1 +fi + + +XENBUS_PATH="${XENBUS_PATH:?}" + + +## +# Write physical-device = 0xMMmm and node = device to the store, where MM +# and mm are the major and minor numbers of device. +# +# @param device The device from which major and minor numbers are read, which +# will be written into the store. +# +write_dev() { + local major + local minor + local pdev + + major=$(stat -L -c %t "$1") + minor=$(stat -L -c %T "$1") + pdev=$(printf "0x%02x%02x" "0x$major" "0x$minor") + xenstore_write "$XENBUS_PATH"/physical-device "$pdev" \ + "$XENBUS_PATH"/node "$1" +} diff -r 7eac3edd0589 -r eee0489b3a17 tools/examples/block-nbd --- /dev/null Tue Oct 25 03:00:35 2005 +++ b/tools/examples/block-nbd Sat Oct 29 08:51:35 2005 @@ -0,0 +1,27 @@ +#!/bin/sh + +# Usage: block-nbd [bind server ctl_port |unbind node] +# +# The node argument to unbind is the name of the device node we are to +# unbind. +# +# This assumes you're running a correctly configured server at the other end! + +dir=$(dirname "$0") +. "$dir/block-common.sh" + +case "$command" in + bind) + for dev in /dev/nbd*; do + if nbd-client $2 $3 $dev; then + write_dev $dev + exit 0 + fi + done + exit 1 + ;; + unbind) + nbd-client -d $2 + exit 0 + ;; +esac diff -r 7eac3edd0589 -r eee0489b3a17 tools/examples/init.d/sysconfig.xendomains --- /dev/null Tue Oct 25 03:00:35 2005 +++ b/tools/examples/init.d/sysconfig.xendomains Sat Oct 29 08:51:35 2005 @@ -0,0 +1,126 @@ +## Path: System/xen +## Description: xen domain start/stop on boot +## Type: string +## Default: +# +# The xendomains script can send SysRq requests to domains on shutdown. +# If you don't want to MIGRATE, SAVE, or SHUTDOWN, this may be a possibility +# to do a quick and dirty shutdown ("s e i u o") or at least sync the disks +# of the domains ("s"). +# +XENDOMAINS_SYSRQ="" + +## Type: integer +## Default: 100000 +# +# If XENDOMAINS_SYSRQ is set, this variable determines how long to wait +# (in microseconds) after each SysRq, so the domain has a chance to react. +# If you want to a quick'n'dirty shutdown via SysRq, you may want to set +# it to a relatively high value (1200000). +# +XENDOMAINS_USLEEP=100000 + +## Type: string +## Default: "" +# +# Set this to a non-empty string if you want to migrate virtual machines +# on shutdown. The string will be passed to the xm migrate DOMID command +# as is: It should contain the target IP address of the physical machine +# to migrate to and optionally parameters like --live. Leave empty if +# you don't want to try virtual machine relocation on shutdown. +# If migration succeeds, neither SAVE nor SHUTDOWN will be executed for +# that domain. +# +XENDOMAINS_MIGRATE="" + +## Type: string +## Default: /var/lib/xen/save +# +# Directory to save running domains to when the system (dom0) is +# shut down. Will also be used to restore domains from if # XENDOMAINS_RESTORE +# is set (see below). Leave empty to disable domain saving on shutdown +# (e.g. because you rather shut domains down). +# If domain saving does succeed, SHUTDOWN will not be executed. +# +XENDOMAINS_SAVE=/var/lib/xen/save + +## Type: string +## Default: "--halt --wait" +# +# If neither MIGRATE nor SAVE were enabled or if they failed, you can +# try to shut down a domain by sending it a shutdown request. To do this, +# set this to "--halt --wait". Omit the "--wait" flag to avoid waiting +# for the domain to be really down. Leave empty to skip domain shutdown. +# +XENDOMAINS_SHUTDOWN="--halt --wait" + +## Type: string +## Default: "--all --halt --wait" +# +# After we have gone over all virtual machines (resp. all automatically +# started ones, see XENDOMAINS_AUTO_ONLY below) in a loop and sent SysRq, +# migrated, saved and/or shutdown according to the settings above, we +# might want to shutdown the virtual machines that are still running +# for some reason or another. To do this, set this variable to +# "--all --halt --wait", it will be passed to xm shutdown. +# Leave it empty not to do anything special here. +# (Note: This will hit all virtual machines, even if XENDOMAINS_AUTO_ONLY +# is set.) +# +XENDOMAINS_SHUTDOWN_ALL="--all --halt --wait" + +## Type: boolean +## Default: true +# +# This variable determines whether saved domains from XENDOMAINS_SAVE +# will be restored on system startup. +# +XENDOMAINS_RESTORE=true + +## Type: string +## Default: /etc/xen/auto +# +# This variable sets the directory where domains configurations +# are stored that should be started on system startup automatically. +# Leave empty if you don't want to start domains automatically +# (or just don't place any xen domain config files in that dir). +# Note that the script tries to be clever if both RESTORE and AUTO are +# set: It will first restore saved domains and then only start domains +# in AUTO which are not running yet. +# Note that the name matching is somewhat fuzzy. +# +XENDOMAINS_AUTO=/etc/xen/auto + +## Type: boolean +## Default: false +# +# If this variable is set to "true", only the domains started via config +# files in XENDOMAINS_AUTO will be treated according to XENDOMAINS_SYSRQ, +# XENDOMAINS_MIGRATE, XENDOMAINS_SAVE, XENDMAINS_SHUTDOWN; otherwise +# all running domains will be. +# Note that the name matching is somewhat fuzzy. +# +XENDOMAINS_AUTO_ONLY=false + +## Type: integer +## Default: 300 +# +# On xendomains stop, a number of xm commands (xm migrate, save, shutdown, +# shutdown --all) may be executed. In the worst case, these commands may +# stall forever, which will prevent a successful shutdown of the machine. +# If this variable is non-zero, the script will set up a watchdog timer +# for every of these xm commands and time it out after the number of seconds +# specified by this variable. +# Note that SHUTDOWN_ALL will not be called if no virtual machines or only +# zombies are still running, so you don't need to enable this timeout just +# for the zombie case. +# The setting should be large enough to make sure that migrate/save/shutdown +# can succeed. If you do live migrations, keep in mind that live migration +# of a 1GB machine over Gigabit ethernet may actually take something like +# 100s (assuming that live migration uses 10% of the network # bandwidth). +# Depending on the virtual machine, a shutdown may also require a significant +# amount of time. So better setup this variable to a huge number and hope the +# watchdog never fires. +# +XENDOMAINS_STOP_MAXWAIT=300 + diff -r 7eac3edd0589 -r eee0489b3a17 tools/examples/xmexample.nbd --- /dev/null Tue Oct 25 03:00:35 2005 +++ b/tools/examples/xmexample.nbd Sat Oct 29 08:51:35 2005 @@ -0,0 +1,23 @@ +# -*- mode: python; -*- +# +# xm create configuration example. +# +# This configuration is appropriate for using Network Block Device (NBD) +# filesystems. +# +# Each of these parameters will need changing to match your setup. +# + +kernel = "/boot/vmlinuz-2.6.13-15b-xen" +ramdisk = "/boot/initrd-2.6.13-15b-xen" +memory = 128 +name = "nbd4" +nics=1 +# Please change MAC +vif = [ 'mac=aa:cc:10:10:00:a0, bridge=xenbr0' ] +# Please change PORT +disk = [ 'nbd:134.100.233.115 20004,hda1,w' ] +dhcp = "dhcp" +hostname= "nbd4" +root = "/dev/hda1 ro" +extra = "3" diff -r 7eac3edd0589 -r eee0489b3a17 xen/arch/x86/vmx_vlapic.c --- /dev/null Tue Oct 25 03:00:35 2005 +++ b/xen/arch/x86/vmx_vlapic.c Sat Oct 29 08:51:35 2005 @@ -0,0 +1,997 @@ +/* + * vmx_vlapic.c: virtualize LAPIC for VMX vcpus. + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <xen/config.h> +#include <xen/types.h> +#include <xen/mm.h> +#include <xen/xmalloc.h> +#include <asm/shadow.h> +#include <asm/page.h> +#include <xen/event.h> +#include <xen/trace.h> +#include <asm/vmx.h> +#include <asm/vmx_platform.h> +#include <asm/vmx_vlapic.h> + +#include <xen/lib.h> +#include <xen/sched.h> +#include <asm/current.h> +#include <public/io/ioreq.h> + +#ifdef CONFIG_VMX + +/* XXX remove this definition after GFW enabled */ +#define VLAPIC_NO_BIOS + +extern unsigned int get_apic_bus_scale(void); + +static unsigned int vlapic_lvt_mask[VLAPIC_LVT_NUM] = +{ + 0x310ff, 0x117ff, 0x117ff, 0x1f7ff, 0x1f7ff, 0x117ff +}; + +int vlapic_find_highest_irr(struct vlapic *vlapic) +{ + int result; + + result = find_highest_bit((uint32_t *)&vlapic->irr[0], INTR_LEN_32); + + if (result != -1 && result < 16) { + printk("VLAPIC: irr on reserved bits %d\n ", result); + domain_crash_synchronous(); + } + + return result; +} + +inline int vmx_apic_support(struct domain *d) +{ + return d->arch.vmx_platform.lapic_enable; +} + +int vlapic_find_highest_isr(struct vlapic *vlapic) +{ + int result; + + result = find_highest_bit((uint32_t *)&vlapic->isr[0], INTR_LEN_32); + + if (result != -1 && result < 16) { + int i = 0; + printk("VLAPIC: isr on reserved bits %d, isr is\n ", result); + for (i = 0; i < INTR_LEN_32; i += 2) + printk("%d: 0x%08x%08x\n", i, vlapic->isr[i], vlapic->isr[i+1]); + return -1; + } + + return result; +} + +uint32_t vlapic_update_ppr(struct vlapic *vlapic) +{ + uint32_t tpr, isrv, ppr; + int isr; + + tpr = (vlapic->task_priority >> 4) & 0xf; /* we want 7:4 */ + + isr = vlapic_find_highest_isr(vlapic); + if (isr != -1) + isrv = (isr >> 4) & 0xf; /* ditto */ + else + isrv = 0; + + if (tpr >= isrv) + ppr = vlapic->task_priority & 0xff; + else + ppr = isrv << 4; /* low 4 bits of PPR have to be cleared */ + + vlapic->processor_priority = ppr; + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC_INTERRUPT, + "vlapic_update_ppr: vlapic %p ppr %x isr %x isrv %x", + vlapic, ppr, isr, isrv); + + return ppr; +} + +/* This only for fixed delivery mode */ +int vlapic_match_dest(struct vlapic *target, struct vlapic *source, + int short_hand, int dest, int dest_mode, + int delivery_mode) +{ + int result = 0; + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_match_dest: " + "target %p source %p dest %x dest_mode %x short_hand %x " + "delivery_mode %x", + target, source, dest, dest_mode, short_hand, delivery_mode); + + switch (short_hand) { + case VLAPIC_NO_SHORTHAND: + if (!dest_mode) { /* Physical */ + result = (target->id == dest); + } else { /* Logical */ + if (((target->dest_format >> 28) & 0xf) == 0xf) { /* Flat mode */ + result = (target->logical_dest >> 24) & dest; + } else { + if ((delivery_mode == VLAPIC_DELIV_MODE_LPRI) && + (dest == 0xff)) { + /* What shall we do now? */ + printk("Broadcast IPI with lowest priority " + "delivery mode\n"); + domain_crash_synchronous(); + } + result = (target->logical_dest == (dest & 0xf)) ? + ((target->logical_dest >> 4) & (dest >> 4)) : 0; + } + } + break; + + case VLAPIC_SHORTHAND_SELF: + if (target == source) + result = 1; + break; + + case VLAPIC_SHORTHAND_INCLUDE_SELF: + result = 1; + break; + + case VLAPIC_SHORTHAND_EXCLUDE_SELF: + if (target != source) + result = 1; + break; + + default: + break; + } + + return result; +} + +/* + * Add a pending IRQ into lapic. + * Return 1 if successfully added and 0 if discarded. + */ +int vlapic_accept_irq(struct vlapic *vlapic, int delivery_mode, + int vector, int level, int trig_mode) +{ + int result = 1; + + switch (delivery_mode) { + case VLAPIC_DELIV_MODE_FIXED: + case VLAPIC_DELIV_MODE_LPRI: + /* FIXME add logic for vcpu on reset */ + if (!vlapic->vcpu || !vlapic_enabled(vlapic)) + return 0; + + if (test_and_set_bit(vector, &vlapic->irr[0])) { + printk("<vlapic_accept_irq>" + "level trig mode repeatedly for vector %d\n", vector); + result = 0; + } else { + if (level) { + printk("<vlapic_accept_irq> level trig mode for vector %d\n", vector); + set_bit(vector, &vlapic->tmr[0]); + } + } + evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->domain)); + break; + + case VLAPIC_DELIV_MODE_RESERVED: + printk("Ignore deliver mode 3 in vlapic_accept_irq\n"); + break; + + case VLAPIC_DELIV_MODE_SMI: + case VLAPIC_DELIV_MODE_NMI: + /* Fixme */ + printk("TODO: for guest SMI/NMI\n"); + break; + + case VLAPIC_DELIV_MODE_INIT: + if (!level && trig_mode == 1) { //Deassert + printk("This vmx_vlapic is for P4, no work for De-assert init\n"); + } else { + /* FIXME How to check the situation after vcpu reset? */ + vlapic->init_sipi_sipi_state = VLAPIC_INIT_SIPI_SIPI_STATE_WAIT_SIPI; + if (vlapic->vcpu) { + vcpu_pause(vlapic->vcpu); + } + } + break; + + case VLAPIC_DELIV_MODE_STARTUP: + if (vlapic->init_sipi_sipi_state != VLAPIC_INIT_SIPI_SIPI_STATE_WAIT_SIPI) + break; + vlapic->init_sipi_sipi_state = VLAPIC_INIT_SIPI_SIPI_STATE_NORM; + if (!vlapic->vcpu) { + /* XXX Call vmx_bringup_ap here */ + result = 0; + }else{ + //vmx_vcpu_reset(vlapic->vcpu); + } + break; + + default: + printk("TODO: not support interrup type %x\n", delivery_mode); + domain_crash_synchronous(); + break; + } + + return result; +} +/* + This function is used by both ioapic and local APIC + The bitmap is for vcpu_id + */ +struct vlapic* apic_round_robin(struct domain *d, + uint8_t dest_mode, + uint8_t vector, + uint32_t bitmap) +{ + int next, old; + struct vlapic* target = NULL; + + if (dest_mode == 0) { //Physical mode + printk("<apic_round_robin> lowest priority for physical mode\n"); + return NULL; + } + + if (!bitmap) { + printk("<apic_round_robin> no bit on bitmap\n"); + return NULL; + } + + spin_lock(&d->arch.vmx_platform.round_robin_lock); + + old = next = d->arch.vmx_platform.round_info[vector]; + + next++; + if (next == MAX_VIRT_CPUS || !d->vcpu[next]) + next = 0; + + do { + /* the vcpu array is arranged according to vcpu_id */ + if (test_bit(next, &bitmap)) { + target = d->vcpu[next]->arch.arch_vmx.vlapic; + if (!vlapic_enabled(target)) { + printk("warning: targe round robin local apic disabled\n"); + /* XXX should we domain crash?? Or should we return NULL */ + } + break; + } + + next ++; + if (next == MAX_VIRT_CPUS || !d->vcpu[next]) + next = 0; + }while(next != old); + + d->arch.vmx_platform.round_info[vector] = next; + spin_unlock(&d->arch.vmx_platform.round_robin_lock); + return target; +} + +void +vlapic_EOI_set(struct vlapic *vlapic) +{ + int vector = vlapic_find_highest_isr(vlapic); + + /* Not every write EOI will has correpsoning ISR, + one example is when Kernel check timer on setup_IO_APIC */ + if (vector == -1) { + return ; + } + + vlapic_clear_isr(vlapic, vector); + vlapic_update_ppr(vlapic); +} + +int vlapic_check_vector(struct vlapic *vlapic, + unsigned char dm, int vector) +{ + if ((dm == VLAPIC_DELIV_MODE_FIXED) && (vector < 16)) { + vlapic->err_status |= 0x40; + vlapic_accept_irq(vlapic, VLAPIC_DELIV_MODE_FIXED, + vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR), 0, 0); + printk("<vlapic_check_vector>: check fail\n"); + return 0; + } + return 1; +} + + +void vlapic_ipi(struct vlapic *vlapic) +{ + unsigned int dest = (vlapic->icr_high >> 24) & 0xff; + unsigned int short_hand = (vlapic->icr_low >> 18) & 3; + unsigned int trig_mode = (vlapic->icr_low >> 15) & 1; + unsigned int level = (vlapic->icr_low >> 14) & 1; + unsigned int dest_mode = (vlapic->icr_low >> 11) & 1; + unsigned int delivery_mode = (vlapic->icr_low >> 8) & 7; + unsigned int vector = (vlapic->icr_low & 0xff); + + struct vlapic *target; + struct vcpu *v = NULL; + int result = 0; + uint32_t lpr_map; + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_ipi: " + "icr_high %x icr_low %x " + "short_hand %x dest %x trig_mode %x level %x " + "dest_mode %x delivery_mode %x vector %x", + vlapic->icr_high, vlapic->icr_low, + short_hand, dest, trig_mode, level, dest_mode, + delivery_mode, vector); + + for_each_vcpu ( vlapic->domain, v ) { + target = VLAPIC(v); + if (vlapic_match_dest(target, vlapic, short_hand, + dest, dest_mode, delivery_mode)) { + if (delivery_mode == VLAPIC_DELIV_MODE_LPRI) { + set_bit(v->vcpu_id, &lpr_map); + }else + result = vlapic_accept_irq(target, delivery_mode, + vector, level, trig_mode); + } + } + + if (delivery_mode == VLAPIC_DELIV_MODE_LPRI) { + extern struct vlapic* + apic_round_robin(struct domain *d, + uint8_t dest_mode, uint8_t vector, uint32_t bitmap); + + v = vlapic->vcpu; + target = apic_round_robin(v->domain, dest_mode, vector, lpr_map); + + if (target) + vlapic_accept_irq(target, delivery_mode, + vector, level, trig_mode); + } +} + +void vlapic_begin_timer(struct vlapic *vlapic) +{ + s_time_t cur = NOW(), offset; + + offset = vlapic->timer_current * + (262144 / get_apic_bus_scale()) * vlapic->timer_divide_counter; + vlapic->vlapic_timer.expires = cur + offset; + + set_ac_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires ); + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_begin_timer: " + "bus_scale %x now %08x%08x expire %08x%08x " + "offset %08x%08x current %x", + get_apic_bus_scale(), (uint32_t)(cur >> 32), (uint32_t)cur, + (uint32_t)(vlapic->vlapic_timer.expires >> 32), + (uint32_t) vlapic->vlapic_timer.expires, + (uint32_t)(offset >> 32), (uint32_t)offset, + vlapic->timer_current); +} + +void vlapic_read_aligned(struct vlapic *vlapic, unsigned int offset, + unsigned int len, unsigned int *result) +{ + if (len != 4) { + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, + "local apic read with len=%d (should be 4)", len); + } + + *result = 0; + + switch (offset) { + case APIC_ID: + *result = (vlapic->id) << 24; + break; + + case APIC_LVR: + *result = vlapic->version; + break; + + case APIC_TASKPRI: + *result = vlapic->task_priority; + break; + + case APIC_ARBPRI: + printk("Access local APIC ARBPRI register which is for P6\n"); + break; + + case APIC_PROCPRI: + *result = vlapic->processor_priority; + break; + + case APIC_EOI: /* EOI is write only */ + break; + + case APIC_LDR: + *result = vlapic->logical_dest; + break; + + case APIC_DFR: + *result = vlapic->dest_format; + break; + + case APIC_SPIV: + *result = vlapic->spurious_vec; + break; + + case APIC_ISR: + case 0x110: + case 0x120: + case 0x130: + case 0x140: + case 0x150: + case 0x160: + case 0x170: + *result = vlapic->isr[(offset - APIC_ISR) >> 4]; + break; + + case APIC_TMR: + case 0x190: + case 0x1a0: + case 0x1b0: + case 0x1c0: + case 0x1d0: + case 0x1e0: + case 0x1f0: + *result = vlapic->tmr[(offset - APIC_TMR) >> 4]; + break; + + case APIC_IRR: + case 0x210: + case 0x220: + case 0x230: + case 0x240: + case 0x250: + case 0x260: + case 0x270: + *result = vlapic->irr[(offset - APIC_IRR) >> 4]; + break; + + case APIC_ESR: + if (vlapic->err_write_count) + *result = vlapic->err_status; + break; + + case APIC_ICR: + *result = vlapic->icr_low; + break; + + case APIC_ICR2: + *result = vlapic->icr_high; + break; + + case APIC_LVTT: /* LVT Timer Reg */ + case APIC_LVTTHMR: /* LVT Thermal Monitor */ + case APIC_LVTPC: /* LVT Performance Counter */ + case APIC_LVT0: /* LVT LINT0 Reg */ + case APIC_LVT1: /* LVT Lint1 Reg */ + case APIC_LVTERR: /* LVT Error Reg */ + *result = vlapic->lvt[(offset - APIC_LVTT) >> 4]; + break; + + case APIC_TMICT: + *result = vlapic->timer_initial; + break; + + case APIC_TMCCT: //Timer CCR + { + uint32_t counter; + s_time_t passed, cur = NOW(); + + if (cur <= vlapic->timer_current_update) { + passed = ~0x0LL - vlapic->timer_current_update + cur; + VMX_DBG_LOG(DBG_LEVEL_VLAPIC,"time elapsed"); + }else + passed = cur - vlapic->timer_current_update; + + counter = (passed * get_apic_bus_scale()) / (262144* vlapic->timer_divide_counter); + if (vlapic->timer_current > counter) + *result = vlapic->timer_current - counter; + else { + if (!vlapic_lvt_timer_period(vlapic)) + *result = 0; + //FIXME should we add interrupt here? + else + //*result = counter % vlapic->timer_initial; + *result = vlapic->timer_initial - (counter - vlapic->timer_current); + } + vlapic->timer_current = *result; + vlapic->timer_current_update = NOW(); + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, + "initial %x timer current %x " + "update %08x%08x cur %08x%08x offset %d", + vlapic->timer_initial, vlapic->timer_current, + (uint32_t)(vlapic->timer_current_update >> 32), + (uint32_t)vlapic->timer_current_update , + (uint32_t)(cur >> 32), (uint32_t)cur, counter); + } + break; + + case APIC_TDCR: + *result = vlapic->timer_divconf; + break; + + default: + printk("Read local APIC address %x not implemented\n",offset); + *result = 0; + break; + } +} + +unsigned long vlapic_read(struct vcpu *v, unsigned long address, + unsigned long len) +{ + unsigned int alignment; + unsigned int tmp; + unsigned long result; + struct vlapic *vlapic = VLAPIC(v); + unsigned int offset = address - vlapic->base_address; + + if ( len != 4) { + /* some bugs on kernel cause read this with byte*/ + printk("Local APIC read with len = %lx, should be 4 instead\n", len); + } + + alignment = offset & 0x3; + + vlapic_read_aligned(vlapic, offset & ~0x3, 4, &tmp); + switch (len) { + case 1: + result = *((unsigned char *)&tmp + alignment); + break; + + case 2: + result = *(unsigned short *)((unsigned char *)&tmp + alignment); + break; + + case 4: + result = *(unsigned int *)((unsigned char *)&tmp + alignment); + break; + + default: + printk("Local APIC read with len = %lx, should be 4 instead\n", len); + domain_crash_synchronous(); + break; + } + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, + "vlapic_read offset %x with length %lx and the result is %lx", + offset, len, result); + return result; +} + +unsigned long vlapic_write(struct vcpu *v, unsigned long address, + unsigned long len, unsigned long val) +{ + struct vlapic *vlapic = VLAPIC(v); + unsigned int offset = address - vlapic->base_address; + + if (offset != 0xb0) + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, + "vlapic_write offset %x with length %lx source %lx", + offset, len, val); + + /* + * According to IA 32 Manual, all resgiters should be accessed with + * 32 bits alignment. + */ + if (len != 4) { + unsigned int tmp; + unsigned char alignment; + + /* Some kernel do will access with byte/word alignment*/ + printk("Notice: Local APIC write with len = %lx\n",len); + alignment = offset & 0x3; + tmp = vlapic_read(v, offset & (~0x3), 4); + switch (len) { + case 1: + /* XXX the saddr is a tmp variable from caller, so should be ok + But we should still change the following ref to val to + local variable later */ + val = (tmp & ~(0xff << alignment)) | + ((val & 0xff) << alignment); + break; + + case 2: + if (alignment != 0x0 && alignment != 0x2) { + printk("alignment error for vlapic with len == 2\n"); + domain_crash_synchronous(); + } + + val = (tmp & ~(0xffff << alignment)) | + ((val & 0xffff) << alignment); + break; + + case 3: + /* will it happen? */ + printk("vlapic_write with len = 3 !!!\n"); + domain_crash_synchronous(); + break; + + default: + printk("Local APIC write with len = %lx, should be 4 instead\n", len); + domain_crash_synchronous(); + break; + } + } + + offset &= 0xff0; + + switch (offset) { + case APIC_ID: /* Local APIC ID */ + vlapic->id = ((val) >> 24) & VAPIC_ID_MASK; + break; + + case APIC_TASKPRI: + vlapic->task_priority = val & 0xff; + vlapic_update_ppr(vlapic); + break; + + case APIC_EOI: + vlapic_EOI_set(vlapic); + break; + + case APIC_LDR: + vlapic->logical_dest = val & VAPIC_LDR_MASK; + break; + + case APIC_DFR: + vlapic->dest_format = val ; + break; + + case APIC_SPIV: + vlapic->spurious_vec = val & 0x1ff; + if (!(vlapic->spurious_vec & 0x100)) { + int i = 0; + for (i=0; i < VLAPIC_LVT_NUM; i++) + vlapic->lvt[i] |= 0x10000; + vlapic->status |= VLAPIC_SOFTWARE_DISABLE_MASK; + } + else + vlapic->status &= ~VLAPIC_SOFTWARE_DISABLE_MASK; + break; + + case APIC_ESR: + vlapic->err_write_count = !vlapic->err_write_count; + if (!vlapic->err_write_count) + vlapic->err_status = 0; + break; + + case APIC_ICR: + /* No delay here, so we always clear the pending bit*/ + vlapic->icr_low = val & ~(1 << 12); + vlapic_ipi(vlapic); + break; + + case APIC_ICR2: + vlapic->icr_high = val & 0xff000000; + break; + + case APIC_LVTT: // LVT Timer Reg + case APIC_LVTTHMR: // LVT Thermal Monitor + case APIC_LVTPC: // LVT Performance Counter + case APIC_LVT0: // LVT LINT0 Reg + case APIC_LVT1: // LVT Lint1 Reg + case APIC_LVTERR: // LVT Error Reg + { + int vt = (offset - APIC_LVTT) >> 4; + + vlapic->lvt[vt] = val & vlapic_lvt_mask[vt]; + if (vlapic->status & VLAPIC_SOFTWARE_DISABLE_MASK) + vlapic->lvt[vt] |= VLAPIC_LVT_BIT_MASK; + + /* On hardware, when write vector less than 0x20 will error */ + vlapic_check_vector(vlapic, vlapic_lvt_dm(vlapic->lvt[vt]), + vlapic_lvt_vector(vlapic, vt)); + + if (!vlapic->vcpu_id && (offset == APIC_LVT0)) { + if ((vlapic->lvt[VLAPIC_LVT_LINT0] & VLAPIC_LVT_BIT_DELIMOD) + == 0x700) { + if (!(vlapic->lvt[VLAPIC_LVT_LINT0] & VLAPIC_LVT_BIT_MASK)) { + set_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); + }else + clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); + } + else + clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); + } + + } + break; + + case APIC_TMICT: + if (vlapic_timer_active(vlapic)) + rem_ac_timer(&(vlapic->vlapic_timer)); + + vlapic->timer_initial = val; + vlapic->timer_current = val; + vlapic->timer_current_update = NOW(); + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, + "timer_init %x timer_current %x timer_current_update %08x%08x", + vlapic->timer_initial, vlapic->timer_current, (uint32_t)(vlapic->timer_current_update>>32), (uint32_t)vlapic->timer_current_update); + vlapic_begin_timer(vlapic); + break; + + case APIC_TDCR: + { + //FIXME clean this code + unsigned char tmp1,tmp2; + tmp1 = (val & 0xf); + tmp2 = ((tmp1 & 0x3 )|((tmp1 & 0x8) >>1)) + 1; + vlapic->timer_divide_counter = 0x1<<tmp2; + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, + "timer divider is 0x%x", + vlapic->timer_divide_counter); + } + break; + + default: + printk("Local APIC Write to read-only register\n"); + break; + } + return 1; +} + +int vlapic_range(struct vcpu *v, unsigned long addr) +{ + struct vlapic *vlapic = VLAPIC(v); + + if (vlapic_global_enabled(vlapic) && + (addr >= vlapic->base_address) && + (addr <= (vlapic->base_address + VLOCAL_APIC_MEM_LENGTH))) + return 1; + + return 0; +} + +void vlapic_msr_set(struct vlapic *vlapic, uint64_t value) +{ + /* When apic disabled */ + if (!vlapic) + return; + + if (vlapic->vcpu_id) + value &= ~MSR_IA32_APICBASE_BSP; + + vlapic->apic_base_msr = value; + vlapic->base_address = vlapic_get_base_address(vlapic); + + if (!(value & 0x800)) + set_bit(_VLAPIC_GLOB_DISABLE, &vlapic->status ); + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, + "apic base msr = 0x%08x%08x,\nbase address = 0x%lx", + (uint32_t)(vlapic->apic_base_msr >> 32), + (uint32_t)vlapic->apic_base_msr, + vlapic->base_address); +} + +static inline int vlapic_get_init_id(struct vcpu *v) +{ + return v->vcpu_id; +} + +void vlapic_timer_fn(void *data) +{ + struct vlapic *vlapic; + + vlapic = data; + if (!vlapic_enabled(vlapic)) return; + + vlapic->timer_current_update = NOW(); + + if (vlapic_lvt_timer_enabled(vlapic)) { + if (!vlapic_irr_status(vlapic, + vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER))) { + test_and_set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER), + &vlapic->irr[0]); + } + else + vlapic->intr_pending_count[vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER)]++; + } + + vlapic->timer_current_update = NOW(); + if (vlapic_lvt_timer_period(vlapic)) { + s_time_t offset; + + vlapic->timer_current = vlapic->timer_initial; + offset = vlapic->timer_current * (262144/get_apic_bus_scale()) * vlapic->timer_divide_counter; + vlapic->vlapic_timer.expires = NOW() + offset; + set_ac_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires); + }else { + vlapic->timer_current = 0; + } + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, + "vlapic_timer_fn: now: %08x%08x expire %08x%08x init %x current %x", + (uint32_t)(NOW() >> 32),(uint32_t)NOW(), + (uint32_t)(vlapic->vlapic_timer.expires >> 32), + (uint32_t)vlapic->vlapic_timer.expires, + vlapic->timer_initial,vlapic->timer_current); +} + +#if 0 +static int +vlapic_check_direct_intr(struct vcpu *v, int * mode) +{ + struct vlapic *vlapic = VLAPIC(v); + int type; + + type = __fls(vlapic->direct_intr.deliver_mode); + if (type == -1) + return -1; + + *mode = type; + return 0; +} +#endif + +int +vlapic_accept_pic_intr(struct vcpu *v) +{ + struct vlapic *vlapic = VLAPIC(v); + + return vlapic ? test_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status) : 1; +} + +int cpu_get_apic_interrupt(struct vcpu* v, int *mode) +{ + struct vlapic *vlapic = VLAPIC(v); + + if (vlapic && vlapic_enabled(vlapic)) { + int highest_irr = vlapic_find_highest_irr(vlapic); + + if (highest_irr != -1 && highest_irr >= vlapic->processor_priority) { + if (highest_irr < 0x10) { + vlapic->err_status |= 0x20; + /* XXX What will happen if this vector illegal stil */ + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, + "vmx_intr_assist: illegal vector number %x err_status %x", + highest_irr, vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR)); + + set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR), &vlapic->irr[0]); + highest_irr = vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR); + } + + *mode = VLAPIC_DELIV_MODE_FIXED; + return highest_irr; + } + } + return -1; +} + +void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode) { + struct vlapic *vlapic = VLAPIC(v); + + if (!vlapic) + return; + + switch (deliver_mode) { + case VLAPIC_DELIV_MODE_FIXED: + case VLAPIC_DELIV_MODE_LPRI: + vlapic_set_isr(vlapic, vector); + vlapic_clear_irr(vlapic, vector); + vlapic_update_ppr(vlapic); + + if (vector == vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER)) { + vlapic->intr_pending_count[vector]--; + if (vlapic->intr_pending_count[vector] > 0) + test_and_set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER), + &vlapic->irr[0]); + } + + break; + /*XXX deal with these later */ + + case VLAPIC_DELIV_MODE_RESERVED: + printk("Ignore deliver mode 3 in vlapic_post_injection\n"); + break; + + case VLAPIC_DELIV_MODE_SMI: + case VLAPIC_DELIV_MODE_NMI: + case VLAPIC_DELIV_MODE_INIT: + case VLAPIC_DELIV_MODE_STARTUP: + vlapic->direct_intr.deliver_mode &= ~(1 << deliver_mode); + break; + + default: + printk("<vlapic_post_injection> error deliver mode\n"); + break; + } +} + +static int vlapic_reset(struct vlapic *vlapic) +{ + struct vcpu *v = vlapic->vcpu; + int apic_id = v->vcpu_id, i; + + if (!v || !vlapic) + return 0; + + memset(vlapic, 0,sizeof(struct vlapic)); + + v->arch.arch_vmx.vlapic = vlapic; + + vlapic->domain = v->domain; + + vlapic->id = apic_id; + + vlapic->version = VLAPIC_VERSION; + + vlapic->apic_base_msr = VLAPIC_BASE_MSR_INIT_VALUE; + + if (apic_id == 0) + vlapic->apic_base_msr |= MSR_IA32_APICBASE_BSP; + vlapic->base_address = vlapic_get_base_address(vlapic); + + for (i = 0; i < VLAPIC_LVT_NUM; i++) + vlapic->lvt[i] = VLAPIC_LVT_BIT_MASK; + + vlapic->dest_format = 0xffffffffU; + + vlapic->spurious_vec = 0xff; + + + init_ac_timer(&vlapic->vlapic_timer, + vlapic_timer_fn, vlapic, v->processor); + +#ifdef VLAPIC_NO_BIOS + /* + * XXX According to mp sepcific, BIOS will enable LVT0/1, + * remove it after BIOS enabled + */ + if (!v->vcpu_id) { + vlapic->lvt[VLAPIC_LVT_LINT0] = 0x700; + vlapic->lvt[VLAPIC_LVT_LINT1] = 0x500; + set_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); + } +#endif + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_reset: " + "vcpu=%p id=%d vlapic_apic_base_msr=%08x%08x " + "vlapic_base_address=%0lx", + v, vlapic->id, (uint32_t)(vlapic->apic_base_msr >> 32), + (uint32_t)vlapic->apic_base_msr, vlapic->base_address); + + return 1; +} + +int vlapic_init(struct vcpu *v) +{ + struct vlapic *vlapic = NULL; + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_init %d", v->vcpu_id); + + vlapic = xmalloc_bytes(sizeof(struct vlapic)); + + if (!vlapic) { + printk("malloc vlapic error for vcpu %x\n", v->vcpu_id); + return -ENOMEM; + } + + vlapic->vcpu = v; + + vlapic_reset(vlapic); + + return 0; +} + +#endif /* CONFIG_VMX */ diff -r 7eac3edd0589 -r eee0489b3a17 xen/include/asm-x86/vmx_vlapic.h --- /dev/null Tue Oct 25 03:00:35 2005 +++ b/xen/include/asm-x86/vmx_vlapic.h Sat Oct 29 08:51:35 2005 @@ -0,0 +1,245 @@ +/* + * vmx_vlapic.h: virtualize LAPIC definitions. + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#ifndef VMX_VLAPIC_H +#define VMX_VLAPIC_H + +#include <asm/msr.h> +#include <public/io/ioreq.h> + +#if defined(__i386__) || defined(__x86_64__) +static inline int __fls(uint32_t word) +{ + int bit; + + __asm__("bsrl %1,%0" + :"=r" (bit) + :"rm" (word)); + return word ? bit : -1; +} +#else +#define __fls(x) generic_fls(x) +static __inline__ int generic_fls(uint32_t x) +{ + int r = 31; + + if (!x) + return -1; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} +#endif + +static __inline__ int find_highest_bit(uint32_t *data, int length) +{ + while(length && !data[--length]); + return __fls(data[length]) + 32 * length; +} + +#define VLAPIC(v) (v->arch.arch_vmx.vlapic) + +#define VAPIC_ID_MASK 0xff +#define VAPIC_LDR_MASK (VAPIC_ID_MASK << 24) +#define VLAPIC_VERSION 0x00050014 + +#define VLAPIC_BASE_MSR_MASK 0x00000000fffff900ULL +#define VLAPIC_BASE_MSR_INIT_BASE_ADDR 0xfee00000U +#define VLAPIC_BASE_MSR_BASE_ADDR_MASK 0xfffff000U +#define VLAPIC_BASE_MSR_INIT_VALUE (VLAPIC_BASE_MSR_INIT_BASE_ADDR | \ + MSR_IA32_APICBASE_ENABLE) +#define VLOCAL_APIC_MEM_LENGTH (1 << 12) + +#define VLAPIC_LVT_TIMER 0 +#define VLAPIC_LVT_THERMAL 1 +#define VLAPIC_LVT_PERFORM 2 +#define VLAPIC_LVT_LINT0 3 +#define VLAPIC_LVT_LINT1 4 +#define VLAPIC_LVT_ERROR 5 +#define VLAPIC_LVT_NUM 6 + +#define VLAPIC_LVT_BIT_MASK (1 << 16) +#define VLAPIC_LVT_BIT_VECTOR 0xff +#define VLAPIC_LVT_BIT_DELIMOD (0x7 << 8) +#define VLAPIC_LVT_BIT_DELISTATUS (1 << 12) +#define VLAPIC_LVT_BIT_POLARITY (1 << 13) +#define VLAPIC_LVT_BIT_IRR (1 << 14) +#define VLAPIC_LVT_BIT_TRIG (1 << 15) +#define VLAPIC_LVT_TIMERMODE (1 << 17) + +#define VLAPIC_DELIV_MODE_FIXED 0x0 +#define VLAPIC_DELIV_MODE_LPRI 0x1 +#define VLAPIC_DELIV_MODE_SMI 0x2 +#define VLAPIC_DELIV_MODE_RESERVED 0x3 +#define VLAPIC_DELIV_MODE_NMI 0x4 +#define VLAPIC_DELIV_MODE_INIT 0x5 +#define VLAPIC_DELIV_MODE_STARTUP 0x6 +#define VLAPIC_DELIV_MODE_EXT 0x7 + + + +#define VLAPIC_NO_SHORTHAND 0x0 +#define VLAPIC_SHORTHAND_SELF 0x1 +#define VLAPIC_SHORTHAND_INCLUDE_SELF 0x2 +#define VLAPIC_SHORTHAND_EXCLUDE_SELF 0x3 + +#define vlapic_lvt_timer_enabled(vlapic) \ + (!(vlapic->lvt[VLAPIC_LVT_TIMER] & VLAPIC_LVT_BIT_MASK)) + +#define vlapic_lvt_vector(vlapic, type) \ + (vlapic->lvt[type] & VLAPIC_LVT_BIT_VECTOR) + +#define vlapic_lvt_dm(value) ((value >> 8) && 7) +#define vlapic_lvt_timer_period(vlapic) \ + (vlapic->lvt[VLAPIC_LVT_TIMER] & VLAPIC_LVT_TIMERMODE) + +#define vlapic_isr_status(vlapic,vector) \ + test_bit(vector, &vlapic->isr[0]) + +#define vlapic_irr_status(vlapic,vector) \ + test_bit(vector, &vlapic->irr[0]) + +#define vlapic_set_isr(vlapic,vector) \ + test_and_set_bit(vector, &vlapic->isr[0]) + +#define vlapic_set_irr(vlapic,vector) \ + test_and_set_bit(vector, &vlapic->irr[0]) + +#define vlapic_clear_irr(vlapic,vector) \ + clear_bit(vector, &vlapic->irr[0]) +#define vlapic_clear_isr(vlapic,vector) \ + clear_bit(vector, &vlapic->isr[0]) + +#define vlapic_enabled(vlapic) \ + (!(vlapic->status & \ + (VLAPIC_GLOB_DISABLE_MASK | VLAPIC_SOFTWARE_DISABLE_MASK))) + +#define vlapic_global_enabled(vlapic) \ + !(test_bit(_VLAPIC_GLOB_DISABLE, &(vlapic)->status)) + +typedef struct direct_intr_info { + int deliver_mode; + int source[6]; +} direct_intr_info_t; + +#define VLAPIC_INIT_SIPI_SIPI_STATE_NORM 0 +#define VLAPIC_INIT_SIPI_SIPI_STATE_WAIT_SIPI 1 + +struct vlapic +{ + //FIXME check what would be 64 bit on EM64T + uint32_t version; +#define _VLAPIC_GLOB_DISABLE 0x0 +#define VLAPIC_GLOB_DISABLE_MASK 0x1 +#define VLAPIC_SOFTWARE_DISABLE_MASK 0x2 +#define _VLAPIC_BSP_ACCEPT_PIC 0x3 + uint32_t status; + uint32_t id; + uint32_t vcpu_id; + unsigned long base_address; + uint32_t isr[8]; + uint32_t irr[INTR_LEN_32]; + uint32_t tmr[INTR_LEN_32]; + uint32_t task_priority; + uint32_t processor_priority; + uint32_t logical_dest; + uint32_t dest_format; + uint32_t spurious_vec; + uint32_t lvt[6]; + uint32_t timer_initial; + uint32_t timer_current; + uint32_t timer_divconf; + uint32_t timer_divide_counter; + struct ac_timer vlapic_timer; + int intr_pending_count[MAX_VECTOR]; + s_time_t timer_current_update; + uint32_t icr_high; + uint32_t icr_low; + direct_intr_info_t direct_intr; + uint32_t err_status; + unsigned long init_ticks; + uint32_t err_write_count; + uint64_t apic_base_msr; + uint32_t init_sipi_sipi_state; + struct vcpu *vcpu; + struct domain *domain; +}; + +static inline int vlapic_timer_active(struct vlapic *vlapic) +{ + return active_ac_timer(&(vlapic->vlapic_timer)); +} + +int vlapic_find_highest_irr(struct vlapic *vlapic); + +int vlapic_find_highest_isr(struct vlapic *vlapic); + +static uint32_t inline vlapic_get_base_address(struct vlapic *vlapic) +{ + return (vlapic->apic_base_msr & VLAPIC_BASE_MSR_BASE_ADDR_MASK); +} + +void vlapic_post_injection(struct vcpu* v, int vector, int deliver_mode); + +int cpu_get_apic_interrupt(struct vcpu* v, int *mode); + +extern uint32_t vlapic_update_ppr(struct vlapic *vlapic); + +int vlapic_update(struct vcpu *v); + +extern int vlapic_init(struct vcpu *vc); + +extern void vlapic_msr_set(struct vlapic *vlapic, uint64_t value); + +int vlapic_range(struct vcpu *v, unsigned long addr); + +unsigned long vlapic_write(struct vcpu *v, unsigned long address, + unsigned long len, unsigned long val); + +unsigned long vlapic_read(struct vcpu *v, unsigned long address, + unsigned long len); + +int vlapic_accept_pic_intr(struct vcpu *v); + +struct vlapic* apic_round_robin(struct domain *d, + uint8_t dest_mode, + uint8_t vector, + uint32_t bitmap); + +int vmx_apic_support(struct domain *d); + +#endif /* VMX_VLAPIC_H */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |